From 6d2e9b2520ed7fbcc85d3216bb24e44def69d353 Mon Sep 17 00:00:00 2001
From: jironglin <jironglin@gmail.com>
Date: Tue, 18 Aug 2020 14:02:07 +0000
Subject: [PATCH 1/9] add some avx512f intrinsics

---
 crates/core_arch/avx512f.md            |  107 +
 crates/core_arch/src/x86/avx512f.rs    | 5277 ++++++++++++++++--------
 crates/core_arch/src/x86_64/avx512f.rs |  552 +++
 3 files changed, 4216 insertions(+), 1720 deletions(-)
 create mode 100644 crates/core_arch/avx512f.md
diff --git a/crates/core_arch/avx512f.md b/crates/core_arch/avx512f.md
new file mode 100644
index 0000000000..567fd0e7ce
--- /dev/null
+++ b/crates/core_arch/avx512f.md
@@ -0,0 +1,107 @@
+<summary>["AVX512F"]</summary><p>
+  * [x] [`_mm512_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_epi32&expand=5236)
+  * [x] [`_mm512_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_epi64&expand=5236)
+  * [x] [`_mm512_and_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_si512&expand=5236)
+  * [x] [`_mm512_kand`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kand&expand=5236)
+  * [x] [`_mm512_kor`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kor&expand=5236)
+  * [x] [`_mm512_kxor`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kxor&expand=5236)
+  * [x] [`_kand_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kand_mask16&expand=3212)
+  * [x] [`_kor_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kor_mask16&expand=3239)
+  * [x] [`_kxor_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kxor_mask16&expand=3291)
+  * [x] [`_mm512_mask_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_and_epi32&expand=5236)
+  * [x] [`_mm512_mask_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_and_epi64&expand=5236)
+  * [x] [`_mm512_mask_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_or_epi32&expand=5236)
+  * [x] [`_mm512_mask_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_or_epi64&expand=5236)
+  * [x] [`_mm512_mask_rol_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rol_epi32&expand=5236)
+  * [x] [`_mm512_mask_rol_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rol_epi64&expand=5236)
+  * [x] [`_mm512_mask_rolv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rolv_epi32&expand=5236)
+  * [x] [`_mm512_mask_rolv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rolv_epi64&expand=5236)
+  * [x] [`_mm512_mask_ror_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ror_epi32&expand=5236)
+  * [x] [`_mm512_mask_ror_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ror_epi64&expand=5236)
+  * [x] [`_mm512_mask_rorv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rorv_epi32&expand=5236)
+  * [x] [`_mm512_mask_rorv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rorv_epi64&expand=5236)
+  * [x] [`_mm512_mask_sll_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sll_epi32&expand=5236)
+  * [x] [`_mm512_mask_sll_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sll_epi64&expand=5236)
+  * [x] [`_mm512_mask_slli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_slli_epi32&expand=5236)
+  * [x] [`_mm512_mask_slli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_slli_epi64&expand=5236)
+  * [x] [`_mm512_mask_sllv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sllv_epi32&expand=5236)
+  * [x] [`_mm512_mask_sllv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sllv_epi64&expand=5236)
+  * [x] [`_mm512_mask_sra_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sra_epi32&expand=5236)
+  * [x] [`_mm512_mask_sra_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sra_epi64&expand=5236)
+  * [x] [`_mm512_mask_srai_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srai_epi32&expand=5236)
+  * [x] [`_mm512_mask_srai_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srai_epi64&expand=5236)
+  * [x] [`_mm512_mask_srav_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srav_epi32&expand=5236)
+  * [x] [`_mm512_mask_srav_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srav_epi64&expand=5236)
+  * [x] [`_mm512_mask_srl_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srl_epi32&expand=5236)
+  * [x] [`_mm512_mask_srl_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srl_epi64&expand=5236)
+  * [x] [`_mm512_mask_srli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srli_epi32&expand=5236)
+  * [x] [`_mm512_mask_srli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srli_epi64&expand=5236)
+  * [x] [`_mm512_mask_srlv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srlv_epi32&expand=5236)
+  * [x] [`_mm512_mask_srlv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srlv_epi64&expand=5236)
+  * [x] [`_mm512_mask_xor_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_xor_epi32&expand=5236)
+  * [x] [`_mm512_mask_xor_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_xor_epi64&expand=5236)
+  * [x] [`_mm512_maskz_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_and_epi32&expand=5236)
+  * [x] [`_mm512_maskz_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_and_epi64&expand=5236)
+  * [x] [`_mm512_maskz_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_or_epi32&expand=5236)
+  * [x] [`_mm512_maskz_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_or_epi64&expand=5236)
+  * [x] [`_mm512_maskz_rol_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rol_epi32&expand=5236)
+  * [x] [`_mm512_maskz_rol_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rol_epi64&expand=5236)
+  * [x] [`_mm512_maskz_rolv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rolv_epi32&expand=5236)
+  * [x] [`_mm512_maskz_rolv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rolv_epi64&expand=5236)
+  * [x] [`_mm512_maskz_ror_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ror_epi32&expand=5236)
+  * [x] [`_mm512_maskz_ror_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ror_epi64&expand=5236)
+  * [x] [`_mm512_maskz_rorv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rorv_epi32&expand=5236)
+  * [x] [`_mm512_maskz_rorv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rorv_epi64&expand=5236)
+  * [x] [`_mm512_maskz_sll_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sll_epi32&expand=5236)
+  * [x] [`_mm512_maskz_sll_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sll_epi64&expand=5236)
+  * [x] [`_mm512_maskz_slli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_slli_epi32&expand=5236)
+  * [x] [`_mm512_maskz_slli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_slli_epi64&expand=5236)
+  * [x] [`_mm512_maskz_sllv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sllv_epi32&expand=5236)
+  * [x] [`_mm512_maskz_sllv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sllv_epi64&expand=5236)
+  * [x] [`_mm512_maskz_sra_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sra_epi32&expand=5236)
+  * [x] [`_mm512_maskz_sra_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sra_epi64&expand=5236)
+  * [x] [`_mm512_maskz_srai_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srai_epi32&expand=5236)
+  * [x] [`_mm512_maskz_srai_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srai_epi64&expand=5236)
+  * [x] [`_mm512_maskz_srav_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srav_epi32&expand=5236)
+  * [x] [`_mm512_maskz_srav_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srav_epi64&expand=5236)
+  * [x] [`_mm512_maskz_srl_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srl_epi32&expand=5236)
+  * [x] [`_mm512_maskz_srl_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srl_epi64&expand=5236)
+  * [x] [`_mm512_maskz_srli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srli_epi32&expand=5236)
+  * [x] [`_mm512_maskz_srli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srli_epi64&expand=5236)
+  * [x] [`_mm512_maskz_srlv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srlv_epi32&expand=5236)
+  * [x] [`_mm512_maskz_srlv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srlv_epi64&expand=5236)
+  * [x] [`_mm512_maskz_xor_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_xor_epi32&expand=5236)
+  * [x] [`_mm512_maskz_xor_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_xor_epi64&expand=5236)
+  * [x] [`_mm512_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_epi32&expand=5236)
+  * [x] [`_mm512_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_epi64&expand=5236)
+  * [x] [`_mm512_or_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_si512&expand=5236)
+  * [x] [`_mm512_rol_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rol_epi32&expand=5236)
+  * [x] [`_mm512_rol_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rol_epi64&expand=5236)
+  * [x] [`_mm512_rolv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rolv_epi32&expand=5236)
+  * [x] [`_mm512_rolv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rolv_epi64&expand=5236)
+  * [x] [`_mm512_ror_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ror_epi32&expand=5236)
+  * [x] [`_mm512_ror_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ror_epi64&expand=5236)
+  * [x] [`_mm512_rorv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rorv_epi32&expand=5236)
+  * [x] [`_mm512_rorv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rorv_epi64&expand=5236)
+  * [x] [`_mm512_sll_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sll_epi32&expand=5236)
+  * [x] [`_mm512_sll_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sll_epi64&expand=5236)
+  * [x] [`_mm512_slli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_slli_epi32&expand=5236)
+  * [x] [`_mm512_slli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_slli_epi64&expand=5236)
+  * [x] [`_mm512_sllv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sllv_epi32&expand=5236)
+  * [x] [`_mm512_sllv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sllv_epi64&expand=5236)
+  * [x] [`_mm512_sra_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sra_epi32&expand=5236)
+  * [x] [`_mm512_sra_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sra_epi64&expand=5236)
+  * [x] [`_mm512_srai_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srai_epi32&expand=5236)
+  * [x] [`_mm512_srai_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srai_epi64&expand=5236)
+  * [x] [`_mm512_srav_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srav_epi32&expand=5236)
+  * [x] [`_mm512_srav_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srav_epi64&expand=5236)
+  * [x] [`_mm512_srl_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srl_epi32&expand=5236)
+  * [x] [`_mm512_srl_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srl_epi64&expand=5236)
+  * [x] [`_mm512_srli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srli_epi32&expand=5236)
+  * [x] [`_mm512_srli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srli_epi64&expand=5236)
+  * [x] [`_mm512_srlv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srlv_epi32&expand=5236)
+  * [x] [`_mm512_srlv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srlv_epi64&expand=5236)
+  * [x] [`_mm512_xor_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_xor_epi32&expand=5236)
+  * [x] [`_mm512_xor_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_xor_epi64&expand=5236)
+  * [x] [`_mm512_xor_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_xor_si512&expand=5236)
+</p>
diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 4744b435dd..dfcc7e7bc1 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -854,1646 +854,2897 @@ pub unsafe fn _mm512_mask_i64scatter_epi32(
     constify_imm8_gather!(scale, call);
 }
 
-/// Sets packed 32-bit integers in `dst` with the supplied values.
+/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
 ///
-/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi32&expand=4685)
 #[inline]
 #[target_feature(enable = "avx512f")]
-pub unsafe fn _mm512_set_ps(
-    e0: f32,
-    e1: f32,
-    e2: f32,
-    e3: f32,
-    e4: f32,
-    e5: f32,
-    e6: f32,
-    e7: f32,
-    e8: f32,
-    e9: f32,
-    e10: f32,
-    e11: f32,
-    e12: f32,
-    e13: f32,
-    e14: f32,
-    e15: f32,
-) -> __m512 {
-    _mm512_setr_ps(
-        e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
-    )
+#[cfg_attr(test, assert_instr(vprold))]
+pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: u8) -> __m512i {
+    transmute(vprold(a.as_i32x16(), imm8 as i8))
 }
 
-/// Sets packed 32-bit integers in `dst` with the supplied values in
-/// reverse order.
+/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi32&expand=4683)
 #[inline]
 #[target_feature(enable = "avx512f")]
-pub unsafe fn _mm512_setr_ps(
-    e0: f32,
-    e1: f32,
-    e2: f32,
-    e3: f32,
-    e4: f32,
-    e5: f32,
-    e6: f32,
-    e7: f32,
-    e8: f32,
-    e9: f32,
-    e10: f32,
-    e11: f32,
-    e12: f32,
-    e13: f32,
-    e14: f32,
-    e15: f32,
-) -> __m512 {
-    let r = f32x16::new(
-        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
-    );
-    transmute(r)
+#[cfg_attr(test, assert_instr(vprold))]
+pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u8) -> __m512i {
+    let rol = _mm512_rol_epi32(a, imm8).as_i32x16();
+    transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
 }
 
-/// Broadcast 64-bit float `a` to all elements of `dst`.
+/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi32&expand=4684)
 #[inline]
 #[target_feature(enable = "avx512f")]
-pub unsafe fn _mm512_set1_pd(a: f64) -> __m512d {
-    transmute(f64x8::splat(a))
+#[cfg_attr(test, assert_instr(vprold))]
+pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: u8) -> __m512i {
+    let rol = _mm512_rol_epi32(a, imm8).as_i32x16();
+    let zero = _mm512_setzero_si512().as_i32x16();
+    transmute(simd_select_bitmask(k, rol, zero))
 }
 
-/// Broadcast 32-bit float `a` to all elements of `dst`.
+/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi32&expand=4721)
 #[inline]
 #[target_feature(enable = "avx512f")]
-pub unsafe fn _mm512_set1_ps(a: f32) -> __m512 {
-    transmute(f32x16::splat(a))
+#[cfg_attr(test, assert_instr(vprord))]
+pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: u8) -> __m512i {
+    transmute(vprord(a.as_i32x16(), imm8 as i8))
 }
 
-/// Sets packed 32-bit integers in `dst` with the supplied values.
+/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi32&expand=4719)
 #[inline]
 #[target_feature(enable = "avx512f")]
-pub unsafe fn _mm512_set_epi32(
-    e15: i32,
-    e14: i32,
-    e13: i32,
-    e12: i32,
-    e11: i32,
-    e10: i32,
-    e9: i32,
-    e8: i32,
-    e7: i32,
-    e6: i32,
-    e5: i32,
-    e4: i32,
-    e3: i32,
-    e2: i32,
-    e1: i32,
-    e0: i32,
-) -> __m512i {
-    _mm512_setr_epi32(
-        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
-    )
+#[cfg_attr(test, assert_instr(vprord))]
+pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u8) -> __m512i {
+    let ror = _mm512_ror_epi32(a, imm8).as_i32x16();
+    transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
 }
 
-/// Broadcast 32-bit integer `a` to all elements of `dst`.
+/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi32&expand=4720)
 #[inline]
 #[target_feature(enable = "avx512f")]
-pub unsafe fn _mm512_set1_epi32(a: i32) -> __m512i {
-    transmute(i32x16::splat(a))
+#[cfg_attr(test, assert_instr(vprord))]
+pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: u8) -> __m512i {
+    let ror = _mm512_ror_epi32(a, imm8).as_i32x16();
+    let zero = _mm512_setzero_si512().as_i32x16();
+    transmute(simd_select_bitmask(k, ror, zero))
 }
 
-/// Broadcast 64-bit integer `a` to all elements of `dst`.
+/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi64&expand=4694)
 #[inline]
 #[target_feature(enable = "avx512f")]
-pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
-    transmute(i64x8::splat(a))
+#[cfg_attr(test, assert_instr(vprolq))]
+pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: u8) -> __m512i {
+    transmute(vprolq(a.as_i64x8(), imm8 as i8))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector.
+/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi64&expand=4692)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
-    _mm512_cmp_ps_mask(a, b, _CMP_LT_OS)
+#[cfg_attr(test, assert_instr(vprolq))]
+pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u8) -> __m512i {
+    let rol = _mm512_rol_epi64(a, imm8).as_i64x8();
+    transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi64&expand=4693)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmplt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
-    _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LT_OS)
+#[cfg_attr(test, assert_instr(vprolq))]
+pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m512i {
+    let rol = _mm512_rol_epi64(a, imm8).as_i64x8();
+    let zero = _mm512_setzero_si512().as_i64x8();
+    transmute(simd_select_bitmask(k, rol, zero))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
+/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnlt_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi64&expand=4730)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
-    _mm512_cmp_ps_mask(a, b, _CMP_NLT_US)
+#[cfg_attr(test, assert_instr(vprorq))]
+pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: u8) -> __m512i {
+    transmute(vprorq(a.as_i64x8(), imm8 as i8))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnlt_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi64&expand=4728)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmpnlt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
-    _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLT_US)
+#[cfg_attr(test, assert_instr(vprorq))]
+pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u8) -> __m512i {
+    let ror = _mm512_ror_epi64(a, imm8).as_i64x8();
+    transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector.
+/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi64&expand=4729)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
-    _mm512_cmp_ps_mask(a, b, _CMP_LE_OS)
+#[cfg_attr(test, assert_instr(vprorq))]
+pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m512i {
+    let ror = _mm512_ror_epi64(a, imm8).as_i64x8();
+    let zero = _mm512_setzero_si512().as_i64x8();
+    transmute(simd_select_bitmask(k, ror, zero))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi32&expand=5310)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmple_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
-    _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LE_OS)
+#[cfg_attr(test, assert_instr(vpsllid))]
+pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i {
+    transmute(vpsllid(a.as_i32x16(), imm8))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnle_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi32&expand=5308)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
-    _mm512_cmp_ps_mask(a, b, _CMP_NLE_US)
+#[cfg_attr(test, assert_instr(vpsllid))]
+pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
+    let shf = _mm512_slli_epi32(a, imm8).as_i32x16();
+    transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnle_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi32&expand=5309)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmpnle_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
-    _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLE_US)
+#[cfg_attr(test, assert_instr(vpsllid))]
+pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
+    let shf = _mm512_slli_epi32(a, imm8).as_i32x16();
+    let zero = _mm512_setzero_si512().as_i32x16();
+    transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi32&expand=5522)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
-    _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ)
+#[cfg_attr(test, assert_instr(vpsrlid))]
+pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i {
+    transmute(vpsrlid(a.as_i32x16(), imm8))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi32&expand=5520)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmpeq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
-    _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ)
+#[cfg_attr(test, assert_instr(vpsrlid))]
+pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
+    let shf = _mm512_srli_epi32(a, imm8).as_i32x16();
+    transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi32&expand=5521)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
-    _mm512_cmp_ps_mask(a, b, _CMP_NEQ_UQ)
+#[cfg_attr(test, assert_instr(vpsrlid))]
+pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
+    let shf = _mm512_srli_epi32(a, imm8).as_i32x16();
+    let zero = _mm512_setzero_si512().as_i32x16();
+    transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.  
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_ps_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi64&expand=5319)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmpneq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
-    _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_UQ)
+#[cfg_attr(test, assert_instr(vpslliq))]
+pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i {
+    transmute(vpslliq(a.as_i64x8(), imm8))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op.
+/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi64&expand=5317)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_cmp_ps_mask(a: __m512, b: __m512, op: i32) -> __mmask16 {
-    let neg_one = -1;
-    macro_rules! call {
-        ($imm5:expr) => {
-            vcmpps(
-                a.as_f32x16(),
-                b.as_f32x16(),
-                $imm5,
-                neg_one,
-                _MM_FROUND_CUR_DIRECTION,
-            )
-        };
-    }
-    let r = constify_imm5!(op, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpslliq))]
+pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
+    let shf = _mm512_slli_epi64(a, imm8).as_i64x8();
+    transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op,
-///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi64&expand=5318)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_mask_cmp_ps_mask(m: __mmask16, a: __m512, b: __m512, op: i32) -> __mmask16 {
-    macro_rules! call {
-        ($imm5:expr) => {
-            vcmpps(
-                a.as_f32x16(),
-                b.as_f32x16(),
-                $imm5,
-                m as i16,
-                _MM_FROUND_CUR_DIRECTION,
-            )
-        };
-    }
-    let r = constify_imm5!(op, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpslliq))]
+pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
+    let shf = _mm512_slli_epi64(a, imm8).as_i64x8();
+    let zero = _mm512_setzero_si512().as_i64x8();
+    transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op.
+/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_ps_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi64&expand=5531)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2, 3)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm512_cmp_round_ps_mask(a: __m512, b: __m512, op: i32, sae: i32) -> __mmask16 {
-    let neg_one = -1;
-    macro_rules! call {
-        ($imm5:expr, $imm4:expr) => {
-            vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, neg_one, $imm4)
-        };
-    }
-    let r = constify_imm5_sae!(op, sae, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpsrliq))]
+pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i {
+    transmute(vpsrliq(a.as_i64x8(), imm8))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op,
-///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_ps_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi64&expand=5529)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3, 4)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm512_mask_cmp_round_ps_mask(
-    m: __mmask16,
-    a: __m512,
-    b: __m512,
-    op: i32,
-    sae: i32,
-) -> __mmask16 {
-    macro_rules! call {
-        ($imm5:expr, $imm4:expr) => {
-            vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, m as i16, $imm4)
-        };
-    }
-    let r = constify_imm5_sae!(op, sae, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpsrliq))]
+pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
+    let shf = _mm512_srli_epi64(a, imm8).as_i64x8();
+    transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
+/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).   
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_ps_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi64&expand=5530)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
-    _mm512_cmp_ps_mask(a, b, _CMP_ORD_Q)
+#[cfg_attr(test, assert_instr(vpsrliq))]
+pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
+    let shf = _mm512_srli_epi64(a, imm8).as_i64x8();
+    let zero = _mm512_setzero_si512().as_i64x8();
+    transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_ps_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi32&expand=5280)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_mask_cmpord_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
-    _mm512_mask_cmp_ps_mask(m, a, b, _CMP_ORD_Q)
+#[cfg_attr(test, assert_instr(vpslld))]
+pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
+    transmute(vpslld(a.as_i32x16(), count.as_i32x4()))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_ps_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sll_epi32&expand=5278)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
-    _mm512_cmp_ps_mask(a, b, _CMP_UNORD_Q)
+#[cfg_attr(test, assert_instr(vpslld))]
+pub unsafe fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
+    let shf = _mm512_sll_epi32(a, count).as_i32x16();
+    transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).  
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_ps_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi32&expand=5279)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_mask_cmpunord_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
-    _mm512_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_Q)
+#[cfg_attr(test, assert_instr(vpslld))]
+pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
+    let shf = _mm512_sll_epi32(a, count).as_i32x16();
+    let zero = _mm512_setzero_si512().as_i32x16();
+    transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srl_epi32&expand=5492)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
-    _mm512_cmp_pd_mask(a, b, _CMP_LT_OS)
+#[cfg_attr(test, assert_instr(vpsrld))]
+pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
+    transmute(vpsrld(a.as_i32x16(), count.as_i32x4()))
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srl_epi32&expand=5490)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmplt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
-    _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LT_OS)
+#[cfg_attr(test, assert_instr(vpsrld))]
+pub unsafe fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
+    let shf = _mm512_srl_epi32(a, count).as_i32x16();
+    transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).  
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnlt_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srl_epi32&expand=5491)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
-    _mm512_cmp_pd_mask(a, b, _CMP_NLT_US)
+#[cfg_attr(test, assert_instr(vpsrld))]
+pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
+    let shf = _mm512_srl_epi32(a, count).as_i32x16();
+    let zero = _mm512_setzero_si512().as_i32x16();
+    transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnlt_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi64&expand=5289)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
-    _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US)
+#[cfg_attr(test, assert_instr(vpsllq))]
+pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
+    transmute(vpsllq(a.as_i64x8(), count.as_i64x2()))
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector.
+/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sll_epi64&expand=5287)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
-    _mm512_cmp_pd_mask(a, b, _CMP_LE_OS)
+#[cfg_attr(test, assert_instr(vpsllq))]
+pub unsafe fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
+    let shf = _mm512_sll_epi64(a, count).as_i64x8();
+    transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmple_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
-    _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LE_OS)
+#[cfg_attr(test, assert_instr(vpsllq))]
+pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
+    let shf = _mm512_sll_epi64(a, count).as_i64x8();
+    let zero = _mm512_setzero_si512().as_i64x8();
+    transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
+/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnle_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srl_epi64&expand=5501)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
-    _mm512_cmp_pd_mask(a, b, _CMP_NLE_US)
+#[cfg_attr(test, assert_instr(vpsrlq))]
+pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
+    transmute(vpsrlq(a.as_i64x8(), count.as_i64x2()))
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnle_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srl_epi64&expand=5499)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmpnle_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
-    _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLE_US)
+#[cfg_attr(test, assert_instr(vpsrlq))]
+pub unsafe fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
+    let shf = _mm512_srl_epi64(a, count).as_i64x8();
+    transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in a mask vector.
+/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).   
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
-    _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ)
+#[cfg_attr(test, assert_instr(vpsrlq))]
+pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
+    let shf = _mm512_srl_epi64(a, count).as_i64x8();
+    let zero = _mm512_setzero_si512().as_i64x8();
+    transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi32&expand=5407)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmpeq_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
-    _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OQ)
+#[cfg_attr(test, assert_instr(vpsrad))]
+pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
+    transmute(vpsrad(a.as_i32x16(), count.as_i32x4()))
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements in a and b for inequality, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi32&expand=5405)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
-    _mm512_cmp_pd_mask(a, b, _CMP_NEQ_UQ)
+#[cfg_attr(test, assert_instr(vpsrad))]
+pub unsafe fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
+    let shf = _mm512_sra_epi32(a, count).as_i32x16();
+    transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).   
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_pd_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi32&expand=5406)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmpneq_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
-    _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_UQ)
+#[cfg_attr(test, assert_instr(vpsrad))]
+pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
+    let shf = _mm512_sra_epi32(a, count).as_i32x16();
+    let zero = _mm512_setzero_si512().as_i32x16();
+    transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op.
+/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_pd_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi64&expand=5416)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_cmp_pd_mask(a: __m512d, b: __m512d, op: i32) -> __mmask8 {
-    let neg_one = -1;
-    macro_rules! call {
-        ($imm5:expr) => {
-            vcmppd(
-                a.as_f64x8(),
-                b.as_f64x8(),
-                $imm5,
-                neg_one,
-                _MM_FROUND_CUR_DIRECTION,
-            )
-        };
-    }
-    let r = constify_imm5!(op, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpsraq))]
+pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
+    transmute(vpsraq(a.as_i64x8(), count.as_i64x2()))
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op,
-///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_pd_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi64&expand=5414)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_mask_cmp_pd_mask(m: __mmask8, a: __m512d, b: __m512d, op: i32) -> __mmask8 {
-    macro_rules! call {
-        ($imm5:expr) => {
-            vcmppd(
-                a.as_f64x8(),
-                b.as_f64x8(),
-                $imm5,
-                m as i8,
-                _MM_FROUND_CUR_DIRECTION,
-            )
-        };
-    }
-    let r = constify_imm5!(op, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpsraq))]
+pub unsafe fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
+    let shf = _mm512_sra_epi64(a, count).as_i64x8();
+    transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op.
+/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_pd_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi64&expand=5415)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2, 3)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm512_cmp_round_pd_mask(a: __m512d, b: __m512d, op: i32, sae: i32) -> __mmask8 {
-    let neg_one = -1;
-    macro_rules! call {
-        ($imm5:expr, $imm4:expr) => {
-            vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, neg_one, $imm4)
-        };
-    }
-    let r = constify_imm5_sae!(op, sae, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpsraq))]
+pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
+    let shf = _mm512_sra_epi64(a, count).as_i64x8();
+    let zero = _mm512_setzero_si512().as_i64x8();
+    transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op,
-///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_pd_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi32&expand=5436)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3, 4)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm512_mask_cmp_round_pd_mask(
-    m: __mmask8,
-    a: __m512d,
-    b: __m512d,
-    op: i32,
-    sae: i32,
-) -> __mmask8 {
-    macro_rules! call {
-        ($imm5:expr, $imm4:expr) => {
-            vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, m as i8, $imm4)
-        };
-    }
-    let r = constify_imm5_sae!(op, sae, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpsraid))]
+pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i {
+    transmute(vpsraid(a.as_i32x16(), imm8))
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_pd_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi32&expand=5434)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
-    _mm512_cmp_pd_mask(a, b, _CMP_ORD_Q)
+#[cfg_attr(test, assert_instr(vpsraid))]
+pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
+    let shf = _mm512_srai_epi32(a, imm8).as_i32x16();
+    transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_pd_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi32&expand=5435)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_mask_cmpord_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
-    _mm512_mask_cmp_pd_mask(m, a, b, _CMP_ORD_Q)
+#[cfg_attr(test, assert_instr(vpsraid))]
+pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
+    let shf = _mm512_srai_epi32(a, imm8).as_i32x16();
+    let zero = _mm512_setzero_si512().as_i32x16();
+    transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
+/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_pd_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi64&expand=5445)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
-    _mm512_cmp_pd_mask(a, b, _CMP_UNORD_Q)
+#[cfg_attr(test, assert_instr(vpsraiq))]
+pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i {
+    transmute(vpsraiq(a.as_i64x8(), imm8))
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
+/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_pd_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi64&expand=5443)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_mask_cmpunord_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
-    _mm512_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_Q)
+#[cfg_attr(test, assert_instr(vpsraiq))]
+pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
+    let shf = _mm512_srai_epi64(a, imm8).as_i64x8();
+    transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
 
-/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
+/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ss_mask&expand=5236,755,757)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi64&expand=5444)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm_cmp_ss_mask(a: __m128, b: __m128, op: i32) -> __mmask8 {
-    let neg_one = -1;
-    macro_rules! call {
-        ($imm5:expr) => {
-            vcmpss(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION)
-        };
-    }
-    let r = constify_imm5!(op, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpsraiq))]
+pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
+    let shf = _mm512_srai_epi64(a, imm8).as_i64x8();
+    let zero = _mm512_setzero_si512().as_i64x8();
+    transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
+/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ss_mask&expand=5236,755,757)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi32&expand=5465)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm_mask_cmp_ss_mask(m: __mmask8, a: __m128, b: __m128, op: i32) -> __mmask8 {
-    macro_rules! call {
-        ($imm5:expr) => {
-            vcmpss(a, b, $imm5, m as i8, _MM_FROUND_CUR_DIRECTION)
-        };
-    }
-    let r = constify_imm5!(op, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpsravd))]
+pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
+    transmute(vpsravd(a.as_i32x16(), count.as_i32x16()))
 }
 
-/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
+/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_ss_mask&expand=5236,755,757)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi32&expand=5463)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2, 3)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm_cmp_round_ss_mask(a: __m128, b: __m128, op: i32, sae: i32) -> __mmask8 {
-    let neg_one = -1;
-    macro_rules! call {
-        ($imm5:expr, $imm4:expr) => {
-            vcmpss(a, b, $imm5, neg_one, $imm4)
-        };
-    }
-    let r = constify_imm5_sae!(op, sae, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpsravd))]
+pub unsafe fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
+    let shf = _mm512_srav_epi32(a, count).as_i32x16();
+    transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
-/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
+/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).   
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_ss_mask&expand=5236,755,757)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srav_epi32&expand=5464)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3, 4)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm_mask_cmp_round_ss_mask(
-    m: __mmask8,
-    a: __m128,
-    b: __m128,
-    op: i32,
-    sae: i32,
-) -> __mmask8 {
-    macro_rules! call {
-        ($imm5:expr, $imm4:expr) => {
-            vcmpss(a, b, $imm5, m as i8, $imm4)
-        };
-    }
-    let r = constify_imm5_sae!(op, sae, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpsravd))]
+pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
+    let shf = _mm512_srav_epi32(a, count).as_i32x16();
+    let zero = _mm512_setzero_si512().as_i32x16();
+    transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
+/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sd_mask&expand=5236,755,757)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi64&expand=5474)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm_cmp_sd_mask(a: __m128d, b: __m128d, op: i32) -> __mmask8 {
-    let neg_one = -1;
-    macro_rules! call {
-        ($imm5:expr) => {
-            vcmpsd(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION)
-        };
-    }
-    let r = constify_imm5!(op, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpsravq))]
+pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
+    transmute(vpsravq(a.as_i64x8(), count.as_i64x8()))
 }
 
-/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
+/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sd_mask&expand=5236,755,757)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi64&expand=5472)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm_mask_cmp_sd_mask(m: __mmask8, a: __m128d, b: __m128d, op: i32) -> __mmask8 {
-    macro_rules! call {
-        ($imm5:expr) => {
-            vcmpsd(a, b, $imm5, m as i8, _MM_FROUND_CUR_DIRECTION)
-        };
-    }
-    let r = constify_imm5!(op, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpsravq))]
+pub unsafe fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
+    let shf = _mm512_srav_epi64(a, count).as_i64x8();
+    transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
 
-/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
+/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sd_mask&expand=5236,755,757)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srav_epi64&expand=5473)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2, 3)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm_cmp_round_sd_mask(a: __m128d, b: __m128d, op: i32, sae: i32) -> __mmask8 {
-    let neg_one = -1;
-    macro_rules! call {
-        ($imm5:expr, $imm4:expr) => {
-            vcmpsd(a, b, $imm5, neg_one, $imm4)
-        };
-    }
-    let r = constify_imm5_sae!(op, sae, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpsravq))]
+pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
+    let shf = _mm512_srav_epi64(a, count).as_i64x8();
+    let zero = _mm512_setzero_si512().as_i64x8();
+    transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
+/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sd_mask&expand=5236,755,757)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rolv_epi32&expand=4703)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3, 4)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm_mask_cmp_round_sd_mask(
-    m: __mmask8,
-    a: __m128d,
-    b: __m128d,
-    op: i32,
-    sae: i32,
-) -> __mmask8 {
-    macro_rules! call {
-        ($imm5:expr, $imm4:expr) => {
-            vcmpsd(a, b, $imm5, m as i8, $imm4)
-        };
-    }
-    let r = constify_imm5_sae!(op, sae, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vprolvd))]
+pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
+    transmute(vprolvd(a.as_i32x16(), b.as_i32x16()))
 }
 
-/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector.
+/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rolv_epi32&expand=4701)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
-    simd_bitmask::<u32x16, _>(simd_lt(a.as_u32x16(), b.as_u32x16()))
+#[cfg_attr(test, assert_instr(vprolvd))]
+pub unsafe fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+    let rol = _mm512_rolv_epi32(a, b).as_i32x16();
+    transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
 }
 
-/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rolv_epi32&expand=4702)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmplt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
-    _mm512_cmplt_epu32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vprolvd))]
+pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+    let rol = _mm512_rolv_epi32(a, b).as_i32x16();
+    let zero = _mm512_setzero_si512().as_i32x16();
+    transmute(simd_select_bitmask(k, rol, zero))
 }
 
-/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in a mask vector.
+/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi32&expand=4739)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
-    simd_bitmask::<u32x16, _>(simd_gt(a.as_u32x16(), b.as_u32x16()))
+#[cfg_attr(test, assert_instr(vprorvd))]
+pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
+    transmute(vprorvd(a.as_i32x16(), b.as_i32x16()))
 }
 
-/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi32&expand=4737)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpgt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
-    _mm512_cmpgt_epu32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vprorvd))]
+pub unsafe fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+    let ror = _mm512_rorv_epi32(a, b).as_i32x16();
+    transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
 }
 
-/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
+/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi32&expand=4738)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
-    simd_bitmask::<u32x16, _>(simd_le(a.as_u32x16(), b.as_u32x16()))
+#[cfg_attr(test, assert_instr(vprorvd))]
+pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+    let ror = _mm512_rorv_epi32(a, b).as_i32x16();
+    let zero = _mm512_setzero_si512().as_i32x16();
+    transmute(simd_select_bitmask(k, ror, zero))
 }
 
-/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rolv_epi64&expand=4712)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmple_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
-    _mm512_cmple_epu32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vprolvq))]
+pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
+    transmute(vprolvq(a.as_i64x8(), b.as_i64x8()))
 }
 
-/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
+/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rolv_epi64&expand=4710)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
-    simd_bitmask::<u32x16, _>(simd_ge(a.as_u32x16(), b.as_u32x16()))
+#[cfg_attr(test, assert_instr(vprolvq))]
+pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+    let rol = _mm512_rolv_epi64(a, b).as_i64x8();
+    transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
 }
 
-/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rolv_epi64&expand=4711)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpge_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
-    _mm512_cmpge_epu32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vprolvq))]
+pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+    let rol = _mm512_rolv_epi64(a, b).as_i64x8();
+    let zero = _mm512_setzero_si512().as_i64x8();
+    transmute(simd_select_bitmask(k, rol, zero))
 }
 
-/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector.
+/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi64&expand=4748)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
-    simd_bitmask::<u32x16, _>(simd_eq(a.as_u32x16(), b.as_u32x16()))
+#[cfg_attr(test, assert_instr(vprorvq))]
+pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
+    transmute(vprorvq(a.as_i64x8(), b.as_i64x8()))
 }
 
-/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi64&expand=4746)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpeq_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
-    _mm512_cmpeq_epu32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vprorvq))]
+pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+    let ror = _mm512_rorv_epi64(a, b).as_i64x8();
+    transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
 }
 
-/// Compare packed unsigned 32-bit integers in a and b for inequality, and store the results in a mask vector.
+/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi64&expand=4747)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
-    simd_bitmask::<u32x16, _>(simd_ne(a.as_u32x16(), b.as_u32x16()))
+#[cfg_attr(test, assert_instr(vprorvq))]
+pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+    let ror = _mm512_rorv_epi64(a, b).as_i64x8();
+    let zero = _mm512_setzero_si512().as_i64x8();
+    transmute(simd_select_bitmask(k, ror, zero))
 }
 
-/// Compare packed unsigned 32-bit integers in a and b for inequality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu32_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sllv_epi32&expand=5342)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpneq_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
-    _mm512_cmpneq_epu32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpsllvd))]
+pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
+    transmute(vpsllvd(a.as_i32x16(), count.as_i32x16()))
 }
 
-/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by op.
+/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu32_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi32&expand=5340)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2)]
-#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
-pub unsafe fn _mm512_cmp_epu32_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask16 {
-    let neg_one = -1;
-    macro_rules! call {
-        ($imm3:expr) => {
-            vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one)
-        };
-    }
-    let r = constify_imm3!(op, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpsllvd))]
+pub unsafe fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
+    let shf = _mm512_sllv_epi32(a, count).as_i32x16();
+    transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
-/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by op,
-///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu32_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi32&expand=5341)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3)]
-#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
-pub unsafe fn _mm512_mask_cmp_epu32_mask(
-    m: __mmask16,
-    a: __m512i,
-    b: __m512i,
-    op: _MM_CMPINT_ENUM,
-) -> __mmask16 {
-    macro_rules! call {
-        ($imm3:expr) => {
-            vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, m as i16)
-        };
-    }
-    let r = constify_imm3!(op, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpsllvd))]
+pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
+    let shf = _mm512_sllv_epi32(a, count).as_i32x16();
+    let zero = _mm512_setzero_si512().as_i32x16();
+    transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srlv_epi32&expand=5554)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
-    simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16()))
+#[cfg_attr(test, assert_instr(vpsrlvd))]
+pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
+    transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16()))
 }
 
-/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srlv_epi32&expand=5552)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmplt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
-    _mm512_cmplt_epi32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpsrlvd))]
+pub unsafe fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
+    let shf = _mm512_srlv_epi32(a, count).as_i32x16();
+    transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
-/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi32&expand=5553)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
-    simd_bitmask::<i32x16, _>(simd_gt(a.as_i32x16(), b.as_i32x16()))
+#[cfg_attr(test, assert_instr(vpsrlvd))]
+pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
+    let shf = _mm512_srlv_epi32(a, count).as_i32x16();
+    let zero = _mm512_setzero_si512().as_i32x16();
+    transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sllv_epi64&expand=5351)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpgt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
-    _mm512_cmpgt_epi32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpsllvq))]
+pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
+    transmute(vpsllvq(a.as_i64x8(), count.as_i64x8()))
 }
 
-/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
+/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi64&expand=5349)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
-    simd_bitmask::<i32x16, _>(simd_le(a.as_i32x16(), b.as_i32x16()))
+#[cfg_attr(test, assert_instr(vpsllvq))]
+pub unsafe fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
+    let shf = _mm512_sllv_epi64(a, count).as_i64x8();
+    transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
 
-/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi64&expand=5350)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmple_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
-    _mm512_cmple_epi32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpsllvq))]
+pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
+    let shf = _mm512_sllv_epi64(a, count).as_i64x8();
+    let zero = _mm512_setzero_si512().as_i64x8();
+    transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
+/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srlv_epi64&expand=5563)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
-    simd_bitmask::<i32x16, _>(simd_ge(a.as_i32x16(), b.as_i32x16()))
+#[cfg_attr(test, assert_instr(vpsrlvq))]
+pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
+    transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8()))
 }
 
-/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mask_srlv_epi64&expand=5561)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpge_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
-    _mm512_cmpge_epi32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpsrlvq))]
+pub unsafe fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
+    let shf = _mm512_srlv_epi64(a, count).as_i64x8();
+    transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
 
-/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector.
+/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi64&expand=5562)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
-    simd_bitmask::<i32x16, _>(simd_eq(a.as_i32x16(), b.as_i32x16()))
+#[cfg_attr(test, assert_instr(vpsrlvq))]
+pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
+    let shf = _mm512_srlv_epi64(a, count).as_i64x8();
+    let zero = _mm512_setzero_si512().as_i64x8();
+    transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi32&expand=272)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpeq_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
-    _mm512_cmpeq_epi32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpandd))]
+pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
+    transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
 }
 
-/// Compare packed signed 32-bit integers in a and b for inequality, and store the results in a mask vector.
+/// Performs element-by-element bitwise AND between packed 32-bit integer elements of v2 and v3, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_and_epi32&expand=273)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
-    simd_bitmask::<i32x16, _>(simd_ne(a.as_i32x16(), b.as_i32x16()))
+#[cfg_attr(test, assert_instr(vpandd))]
+pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+    let and = _mm512_and_epi32(a, b).as_i32x16();
+    transmute(simd_select_bitmask(k, and, src.as_i32x16()))
 }
 
-/// Compare packed signed 32-bit integers in a and b for inequality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_and_epi32&expand=274)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpneq_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
-    _mm512_cmpneq_epi32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpandd))]
+pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+    let and = _mm512_and_epi32(a, b).as_i32x16();
+    let zero = _mm512_setzero_si512().as_i32x16();
+    transmute(simd_select_bitmask(k, and, zero))
 }
 
-/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by op.
+/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi32_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi64&expand=279)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2)]
-#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
-pub unsafe fn _mm512_cmp_epi32_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask16 {
-    let neg_one = -1;
-    macro_rules! call {
-        ($imm3:expr) => {
-            vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one)
-        };
-    }
-    let r = constify_imm3!(op, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpandq))]
+pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
+    transmute(simd_and(a.as_i64x8(), b.as_i64x8()))
 }
 
-/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by op,
-///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi32_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_and_epi64&expand=280)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3)]
-#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
-pub unsafe fn _mm512_mask_cmp_epi32_mask(
-    m: __mmask16,
-    a: __m512i,
-    b: __m512i,
-    op: _MM_CMPINT_ENUM,
-) -> __mmask16 {
-    macro_rules! call {
-        ($imm3:expr) => {
-            vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, m as i16)
-        };
-    }
-    let r = constify_imm3!(op, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpandq))]
+pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+    let and = _mm512_and_epi64(a, b).as_i64x8();
+    transmute(simd_select_bitmask(k, and, src.as_i64x8()))
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_and_Epi32&expand=274)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
-    simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8()))
+#[cfg_attr(test, assert_instr(vpandq))]
+pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+    let and = _mm512_and_epi64(a, b).as_i64x8();
+    let zero = _mm512_setzero_si512().as_i64x8();
+    transmute(simd_select_bitmask(k, and, zero))
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_si512&expand=302)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmplt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
-    _mm512_cmplt_epu64_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpandd))]
+pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
+    transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector.
+/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi32&expand=4042)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
-    simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8()))
+#[cfg_attr(test, assert_instr(vpord))]
+pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
+    transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_or_epi32&expand=4040)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpgt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
-    _mm512_cmpgt_epu64_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpord))]
+pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+    let or = _mm512_or_epi32(a, b).as_i32x16();
+    transmute(simd_select_bitmask(k, or, src.as_i32x16()))
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
+/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_or_epi32&expand=4041)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
-    simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8()))
+#[cfg_attr(test, assert_instr(vpord))]
+pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+    let or = _mm512_or_epi32(a, b).as_i32x16();
+    let zero = _mm512_setzero_si512().as_i32x16();
+    transmute(simd_select_bitmask(k, or, zero))
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi64&expand=4051)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmple_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
-    _mm512_cmple_epu64_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vporq))]
+pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
+    transmute(simd_or(a.as_i64x8(), b.as_i64x8()))
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
+/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_or_epi64&expand=4049)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
-    simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8()))
+#[cfg_attr(test, assert_instr(vporq))]
+pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+    let or = _mm512_or_epi64(a, b).as_i64x8();
+    transmute(simd_select_bitmask(k, or, src.as_i64x8()))
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_or_epi64&expand=4050)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpge_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
-    _mm512_cmpge_epu64_mask(b, a) & m
+#[cfg_attr(test, assert_instr(vporq))]
+pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+    let or = _mm512_or_epi64(a, b).as_i64x8();
+    let zero = _mm512_setzero_si512().as_i64x8();
+    transmute(simd_select_bitmask(k, or, zero))
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector.
+/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_si512&expand=4072)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
-    simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8()))
+#[cfg_attr(test, assert_instr(vpord))]
+pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
+    transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst. 
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi32&expand=6142)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpeq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
-    _mm512_cmpeq_epu64_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpxord))]
+pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
+    transmute(simd_xor(a.as_i32x16(), b.as_i32x16()))
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector.
+/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_xor_epi32&expand=6140)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
-    simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8()))
+#[cfg_attr(test, assert_instr(vpxord))]
+pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+    let xor = _mm512_xor_epi32(a, b).as_i32x16();
+    transmute(simd_select_bitmask(k, xor, src.as_i32x16()))
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu64_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_xor_epi32&expand=6141)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpneq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
-    _mm512_cmpneq_epu64_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpxord))]
+pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+    let xor = _mm512_xor_epi32(a, b).as_i32x16();
+    let zero = _mm512_setzero_si512().as_i32x16();
+    transmute(simd_select_bitmask(k, xor, zero))
 }
 
-/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op.
+/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi64&expand=6151)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2)]
-#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
-pub unsafe fn _mm512_cmp_epu64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask8 {
-    let neg_one = -1;
-    macro_rules! call {
-        ($imm3:expr) => {
-            vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one)
-        };
-    }
-    let r = constify_imm3!(op, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpxorq))]
+pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
+    transmute(simd_xor(a.as_i64x8(), b.as_i64x8()))
 }
 
-/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op,
-///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_xor_epi64&expand=6149)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3)]
-#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
-pub unsafe fn _mm512_mask_cmp_epu64_mask(
-    m: __mmask8,
-    a: __m512i,
-    b: __m512i,
-    op: _MM_CMPINT_ENUM,
-) -> __mmask8 {
-    macro_rules! call {
-        ($imm3:expr) => {
-            vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, m as i8)
-        };
-    }
-    let r = constify_imm3!(op, call);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vpxorq))]
+pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+    let xor = _mm512_xor_epi64(a, b).as_i64x8();
+    transmute(simd_select_bitmask(k, xor, src.as_i64x8()))
 }
 
-/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector.
+/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_xor_epi64&expand=6150)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
-    simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8()))
+#[cfg_attr(test, assert_instr(vpxorq))]
+pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+    let xor = _mm512_xor_epi64(a, b).as_i64x8();
+    let zero = _mm512_setzero_si512().as_i64x8();
+    transmute(simd_select_bitmask(k, xor, zero))
 }
 
-/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_si512&expand=6172)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmplt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
-    _mm512_cmplt_epi64_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpxord))]
+pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
+    transmute(simd_xor(a.as_i32x16(), b.as_i32x16()))
 }
 
-/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in a mask vector.
+/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kand_mask16&expand=3212)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
-    simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8()))
+#[cfg_attr(test, assert_instr(kandw))]
+pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
+    kandw(a, b)
 }
 
-/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kand&expand=3210)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpgt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
-    _mm512_cmpgt_epi64_mask(a, b) & m
+#[cfg_attr(test, assert_instr(kandw))]
+pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
+    kandw(a, b)
 }
 
-/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
+/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kor_mask16&expand=3239)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
-    simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8()))
+#[cfg_attr(test, assert_instr(korw))]
+pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
+    korw(a, b)
 }
 
-/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kor&expand=3237)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmple_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
-    _mm512_cmple_epi64_mask(a, b) & m
+#[cfg_attr(test, assert_instr(korw))]
+pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
+    korw(a, b)
 }
 
-/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
+/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kxor_mask16&expand=3291)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
-    simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8()))
+#[cfg_attr(test, assert_instr(kxorw))]
+pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
+    kxorw(a, b)
 }
 
-/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kxor&expand=3289)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpge_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
-    _mm512_cmpge_epi64_mask(b, a) & m
+#[cfg_attr(test, assert_instr(kxorw))]
+pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
+    kxorw(a, b)
 }
 
-/// Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector.
+
+
+
+
+
+/// Sets packed 32-bit integers in `dst` with the supplied values.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi64)
+/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
-    simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8()))
+pub unsafe fn _mm512_set_ps(
+    e0: f32,
+    e1: f32,
+    e2: f32,
+    e3: f32,
+    e4: f32,
+    e5: f32,
+    e6: f32,
+    e7: f32,
+    e8: f32,
+    e9: f32,
+    e10: f32,
+    e11: f32,
+    e12: f32,
+    e13: f32,
+    e14: f32,
+    e15: f32,
+) -> __m512 {
+    _mm512_setr_ps(
+        e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
+    )
 }
 
-/// Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Sets packed 32-bit integers in `dst` with the supplied values in
+/// reverse order.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi64)
+/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpeq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
-    _mm512_cmpeq_epi64_mask(a, b) & m
+pub unsafe fn _mm512_setr_ps(
+    e0: f32,
+    e1: f32,
+    e2: f32,
+    e3: f32,
+    e4: f32,
+    e5: f32,
+    e6: f32,
+    e7: f32,
+    e8: f32,
+    e9: f32,
+    e10: f32,
+    e11: f32,
+    e12: f32,
+    e13: f32,
+    e14: f32,
+    e15: f32,
+) -> __m512 {
+    let r = f32x16::new(
+        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
+    );
+    transmute(r)
 }
 
-/// Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi64)
+/// Broadcast 64-bit float `a` to all elements of `dst`.
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
-    simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8()))
+pub unsafe fn _mm512_set1_pd(a: f64) -> __m512d {
+    transmute(f64x8::splat(a))
 }
 
-/// Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi64)
+/// Broadcast 32-bit float `a` to all elements of `dst`.
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpneq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
-    _mm512_cmpneq_epi64_mask(a, b) & m
+pub unsafe fn _mm512_set1_ps(a: f32) -> __m512 {
+    transmute(f32x16::splat(a))
 }
 
-/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64_mask)
+/// Sets packed 32-bit integers in `dst` with the supplied values.
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2)]
-#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
-pub unsafe fn _mm512_cmp_epi64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask8 {
-    let neg_one = -1;
-    macro_rules! call {
-        ($imm3:expr) => {
-            vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one)
-        };
-    }
-    let r = constify_imm3!(op, call);
-    transmute(r)
+pub unsafe fn _mm512_set_epi32(
+    e15: i32,
+    e14: i32,
+    e13: i32,
+    e12: i32,
+    e11: i32,
+    e10: i32,
+    e9: i32,
+    e8: i32,
+    e7: i32,
+    e6: i32,
+    e5: i32,
+    e4: i32,
+    e3: i32,
+    e2: i32,
+    e1: i32,
+    e0: i32,
+) -> __m512i {
+    _mm512_setr_epi32(
+        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
+    )
 }
 
-/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op,
-///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64_mask)
+/// Broadcast 32-bit integer `a` to all elements of `dst`.
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3)]
-#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
-pub unsafe fn _mm512_mask_cmp_epi64_mask(
-    m: __mmask8,
-    a: __m512i,
-    b: __m512i,
-    op: _MM_CMPINT_ENUM,
-) -> __mmask8 {
-    macro_rules! call {
-        ($imm3:expr) => {
-            vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, m as i8)
-        };
-    }
-    let r = constify_imm3!(op, call);
-    transmute(r)
+pub unsafe fn _mm512_set1_epi32(a: i32) -> __m512i {
+    transmute(i32x16::splat(a))
 }
 
-/// Returns vector of type `__m512d` with undefined elements.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_pd)
+/// Broadcast 64-bit integer `a` to all elements of `dst`.
 #[inline]
 #[target_feature(enable = "avx512f")]
-// This intrinsic has no corresponding instruction.
-pub unsafe fn _mm512_undefined_pd() -> __m512d {
-    _mm512_set1_pd(0.0)
+pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
+    transmute(i64x8::splat(a))
 }
 
-/// Returns vector of type `__m512` with undefined elements.
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_ps)
 #[inline]
 #[target_feature(enable = "avx512f")]
-// This intrinsic has no corresponding instruction.
-pub unsafe fn _mm512_undefined_ps() -> __m512 {
-    _mm512_set1_ps(0.0)
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
+    _mm512_cmp_ps_mask(a, b, _CMP_LT_OS)
 }
 
-/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
-/// floating-point elements) from memory into result.
-/// `mem_addr` does not need to be aligned on any particular boundary.
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_ps)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vmovups))]
-pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
-    ptr::read_unaligned(mem_addr as *const __m512d)
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmplt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
+    _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LT_OS)
 }
 
-/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
-/// floating-point elements) from `a` into memory.
-/// `mem_addr` does not need to be aligned on any particular boundary.
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnlt_ps)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vmovups))]
-pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
-    ptr::write_unaligned(mem_addr as *mut __m512d, a);
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
+    _mm512_cmp_ps_mask(a, b, _CMP_NLT_US)
 }
 
-/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
-/// floating-point elements) from memory into result.
-/// `mem_addr` does not need to be aligned on any particular boundary.
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnlt_ps)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vmovups))]
-pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
-    ptr::read_unaligned(mem_addr as *const __m512)
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmpnlt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
+    _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLT_US)
 }
 
-/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
-/// floating-point elements) from `a` into memory.
-/// `mem_addr` does not need to be aligned on any particular boundary.
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_ps)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vmovups))]
-#[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
-    ptr::write_unaligned(mem_addr as *mut __m512, a);
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
+    _mm512_cmp_ps_mask(a, b, _CMP_LE_OS)
 }
 
-/// Sets packed 64-bit integers in `dst` with the supplied values in
-/// reverse order.
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_ps)
 #[inline]
 #[target_feature(enable = "avx512f")]
-pub unsafe fn _mm512_setr_pd(
-    e0: f64,
-    e1: f64,
-    e2: f64,
-    e3: f64,
-    e4: f64,
-    e5: f64,
-    e6: f64,
-    e7: f64,
-) -> __m512d {
-    let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
-    transmute(r)
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmple_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
+    _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LE_OS)
 }
 
-/// Sets packed 64-bit integers in `dst` with the supplied values.
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
 ///
-/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnle_ps)
 #[inline]
 #[target_feature(enable = "avx512f")]
-pub unsafe fn _mm512_set_pd(
-    e0: f64,
-    e1: f64,
-    e2: f64,
-    e3: f64,
-    e4: f64,
-    e5: f64,
-    e6: f64,
-    e7: f64,
-) -> __m512d {
-    _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
+    _mm512_cmp_ps_mask(a, b, _CMP_NLE_US)
 }
 
-/// Equal
-pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
-/// Less-than
-pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
-/// Less-than-or-equal
-pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
-/// False
-pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
-/// Not-equal
-pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04;
-/// Not less-than
-pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05;
-/// Not less-than-or-equal
-pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
-/// True
-pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnle_ps)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmpnle_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
+    _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLE_US)
+}
 
-#[allow(improper_ctypes)]
-extern "C" {
-    #[link_name = "llvm.x86.avx512.gather.dpd.512"]
-    fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
-    #[link_name = "llvm.x86.avx512.gather.dps.512"]
-    fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
-    #[link_name = "llvm.x86.avx512.gather.qpd.512"]
-    fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
-    #[link_name = "llvm.x86.avx512.gather.qps.512"]
-    fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
-    #[link_name = "llvm.x86.avx512.gather.dpq.512"]
-    fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
-    #[link_name = "llvm.x86.avx512.gather.dpi.512"]
-    fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
-    #[link_name = "llvm.x86.avx512.gather.qpq.512"]
-    fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
-    #[link_name = "llvm.x86.avx512.gather.qpi.512"]
-    fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_ps)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
+    _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ)
+}
 
-    #[link_name = "llvm.x86.avx512.scatter.dpd.512"]
-    fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
-    #[link_name = "llvm.x86.avx512.scatter.dps.512"]
-    fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
-    #[link_name = "llvm.x86.avx512.scatter.qpd.512"]
-    fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
-    #[link_name = "llvm.x86.avx512.scatter.qps.512"]
-    fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
-    #[link_name = "llvm.x86.avx512.scatter.dpq.512"]
-    fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
-    #[link_name = "llvm.x86.avx512.scatter.dpi.512"]
-    fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
-    #[link_name = "llvm.x86.avx512.scatter.qpq.512"]
-    fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
-    #[link_name = "llvm.x86.avx512.scatter.qpi.512"]
-    fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_ps)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmpeq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
+    _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ)
+}
 
-    #[link_name = "llvm.x86.avx512.mask.cmp.ss"]
-    fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
-    #[link_name = "llvm.x86.avx512.mask.cmp.sd"]
-    fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
-    #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
-    fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
-    #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
-    fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
-    #[link_name = "llvm.x86.avx512.mask.ucmp.q.512"]
-    fn vpcmpuq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
-    #[link_name = "llvm.x86.avx512.mask.cmp.q.512"]
-    fn vpcmpq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
-    #[link_name = "llvm.x86.avx512.mask.ucmp.d.512"]
-    fn vpcmpud(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
-    #[link_name = "llvm.x86.avx512.mask.cmp.d.512"]
-    fn vpcmpd(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_ps)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
+    _mm512_cmp_ps_mask(a, b, _CMP_NEQ_UQ)
 }
 
-#[cfg(test)]
-mod tests {
-    use std;
-    use stdarch_test::simd_test;
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_ps_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmpneq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
+    _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_UQ)
+}
 
-    use crate::core_arch::x86::*;
-    use crate::hint::black_box;
+/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_cmp_ps_mask(a: __m512, b: __m512, op: i32) -> __mmask16 {
+    let neg_one = -1;
+    macro_rules! call {
+        ($imm5:expr) => {
+            vcmpps(
+                a.as_f32x16(),
+                b.as_f32x16(),
+                $imm5,
+                neg_one,
+                _MM_FROUND_CUR_DIRECTION,
+            )
+        };
+    }
+    let r = constify_imm5!(op, call);
+    transmute(r)
+}
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_abs_epi32() {
-        #[rustfmt::skip]
-        let a = _mm512_setr_epi32(
-            0, 1, -1, i32::MAX,
-            i32::MIN, 100, -100, -32,
-            0, 1, -1, i32::MAX,
-            i32::MIN, 100, -100, -32,
-        );
-        let r = _mm512_abs_epi32(a);
+/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op,
+///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmp_ps_mask(m: __mmask16, a: __m512, b: __m512, op: i32) -> __mmask16 {
+    macro_rules! call {
+        ($imm5:expr) => {
+            vcmpps(
+                a.as_f32x16(),
+                b.as_f32x16(),
+                $imm5,
+                m as i16,
+                _MM_FROUND_CUR_DIRECTION,
+            )
+        };
+    }
+    let r = constify_imm5!(op, call);
+    transmute(r)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_ps_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2, 3)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm512_cmp_round_ps_mask(a: __m512, b: __m512, op: i32, sae: i32) -> __mmask16 {
+    let neg_one = -1;
+    macro_rules! call {
+        ($imm5:expr, $imm4:expr) => {
+            vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, neg_one, $imm4)
+        };
+    }
+    let r = constify_imm5_sae!(op, sae, call);
+    transmute(r)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op,
+///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_ps_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3, 4)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm512_mask_cmp_round_ps_mask(
+    m: __mmask16,
+    a: __m512,
+    b: __m512,
+    op: i32,
+    sae: i32,
+) -> __mmask16 {
+    macro_rules! call {
+        ($imm5:expr, $imm4:expr) => {
+            vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, m as i16, $imm4)
+        };
+    }
+    let r = constify_imm5_sae!(op, sae, call);
+    transmute(r)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_ps_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
+    _mm512_cmp_ps_mask(a, b, _CMP_ORD_Q)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_ps_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmpord_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
+    _mm512_mask_cmp_ps_mask(m, a, b, _CMP_ORD_Q)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_ps_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
+    _mm512_cmp_ps_mask(a, b, _CMP_UNORD_Q)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_ps_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmpunord_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
+    _mm512_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_Q)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
+    _mm512_cmp_pd_mask(a, b, _CMP_LT_OS)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmplt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
+    _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LT_OS)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnlt_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
+    _mm512_cmp_pd_mask(a, b, _CMP_NLT_US)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnlt_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
+    _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
+    _mm512_cmp_pd_mask(a, b, _CMP_LE_OS)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmple_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
+    _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LE_OS)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnle_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
+    _mm512_cmp_pd_mask(a, b, _CMP_NLE_US)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnle_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmpnle_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
+    _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLE_US)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
+    _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmpeq_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
+    _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OQ)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b for inequality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
+    _mm512_cmp_pd_mask(a, b, _CMP_NEQ_UQ)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_pd_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmpneq_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
+    _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_UQ)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_pd_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_cmp_pd_mask(a: __m512d, b: __m512d, op: i32) -> __mmask8 {
+    let neg_one = -1;
+    macro_rules! call {
+        ($imm5:expr) => {
+            vcmppd(
+                a.as_f64x8(),
+                b.as_f64x8(),
+                $imm5,
+                neg_one,
+                _MM_FROUND_CUR_DIRECTION,
+            )
+        };
+    }
+    let r = constify_imm5!(op, call);
+    transmute(r)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op,
+///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_pd_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmp_pd_mask(m: __mmask8, a: __m512d, b: __m512d, op: i32) -> __mmask8 {
+    macro_rules! call {
+        ($imm5:expr) => {
+            vcmppd(
+                a.as_f64x8(),
+                b.as_f64x8(),
+                $imm5,
+                m as i8,
+                _MM_FROUND_CUR_DIRECTION,
+            )
+        };
+    }
+    let r = constify_imm5!(op, call);
+    transmute(r)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_pd_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2, 3)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm512_cmp_round_pd_mask(a: __m512d, b: __m512d, op: i32, sae: i32) -> __mmask8 {
+    let neg_one = -1;
+    macro_rules! call {
+        ($imm5:expr, $imm4:expr) => {
+            vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, neg_one, $imm4)
+        };
+    }
+    let r = constify_imm5_sae!(op, sae, call);
+    transmute(r)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op,
+///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_pd_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3, 4)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm512_mask_cmp_round_pd_mask(
+    m: __mmask8,
+    a: __m512d,
+    b: __m512d,
+    op: i32,
+    sae: i32,
+) -> __mmask8 {
+    macro_rules! call {
+        ($imm5:expr, $imm4:expr) => {
+            vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, m as i8, $imm4)
+        };
+    }
+    let r = constify_imm5_sae!(op, sae, call);
+    transmute(r)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_pd_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
+    _mm512_cmp_pd_mask(a, b, _CMP_ORD_Q)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_pd_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmpord_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
+    _mm512_mask_cmp_pd_mask(m, a, b, _CMP_ORD_Q)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_pd_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
+    _mm512_cmp_pd_mask(a, b, _CMP_UNORD_Q)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_pd_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmpunord_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
+    _mm512_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_Q)
+}
+
+/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ss_mask&expand=5236,755,757)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm_cmp_ss_mask(a: __m128, b: __m128, op: i32) -> __mmask8 {
+    let neg_one = -1;
+    macro_rules! call {
+        ($imm5:expr) => {
+            vcmpss(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION)
+        };
+    }
+    let r = constify_imm5!(op, call);
+    transmute(r)
+}
+
+/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ss_mask&expand=5236,755,757)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm_mask_cmp_ss_mask(m: __mmask8, a: __m128, b: __m128, op: i32) -> __mmask8 {
+    macro_rules! call {
+        ($imm5:expr) => {
+            vcmpss(a, b, $imm5, m as i8, _MM_FROUND_CUR_DIRECTION)
+        };
+    }
+    let r = constify_imm5!(op, call);
+    transmute(r)
+}
+
+/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_ss_mask&expand=5236,755,757)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2, 3)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm_cmp_round_ss_mask(a: __m128, b: __m128, op: i32, sae: i32) -> __mmask8 {
+    let neg_one = -1;
+    macro_rules! call {
+        ($imm5:expr, $imm4:expr) => {
+            vcmpss(a, b, $imm5, neg_one, $imm4)
+        };
+    }
+    let r = constify_imm5_sae!(op, sae, call);
+    transmute(r)
+}
+
+/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_ss_mask&expand=5236,755,757)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3, 4)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm_mask_cmp_round_ss_mask(
+    m: __mmask8,
+    a: __m128,
+    b: __m128,
+    op: i32,
+    sae: i32,
+) -> __mmask8 {
+    macro_rules! call {
+        ($imm5:expr, $imm4:expr) => {
+            vcmpss(a, b, $imm5, m as i8, $imm4)
+        };
+    }
+    let r = constify_imm5_sae!(op, sae, call);
+    transmute(r)
+}
+
+/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sd_mask&expand=5236,755,757)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm_cmp_sd_mask(a: __m128d, b: __m128d, op: i32) -> __mmask8 {
+    let neg_one = -1;
+    macro_rules! call {
+        ($imm5:expr) => {
+            vcmpsd(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION)
+        };
+    }
+    let r = constify_imm5!(op, call);
+    transmute(r)
+}
+
+/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sd_mask&expand=5236,755,757)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm_mask_cmp_sd_mask(m: __mmask8, a: __m128d, b: __m128d, op: i32) -> __mmask8 {
+    macro_rules! call {
+        ($imm5:expr) => {
+            vcmpsd(a, b, $imm5, m as i8, _MM_FROUND_CUR_DIRECTION)
+        };
+    }
+    let r = constify_imm5!(op, call);
+    transmute(r)
+}
+
+/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sd_mask&expand=5236,755,757)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2, 3)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm_cmp_round_sd_mask(a: __m128d, b: __m128d, op: i32, sae: i32) -> __mmask8 {
+    let neg_one = -1;
+    macro_rules! call {
+        ($imm5:expr, $imm4:expr) => {
+            vcmpsd(a, b, $imm5, neg_one, $imm4)
+        };
+    }
+    let r = constify_imm5_sae!(op, sae, call);
+    transmute(r)
+}
+
+/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sd_mask&expand=5236,755,757)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3, 4)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm_mask_cmp_round_sd_mask(
+    m: __mmask8,
+    a: __m128d,
+    b: __m128d,
+    op: i32,
+    sae: i32,
+) -> __mmask8 {
+    macro_rules! call {
+        ($imm5:expr, $imm4:expr) => {
+            vcmpsd(a, b, $imm5, m as i8, $imm4)
+        };
+    }
+    let r = constify_imm5_sae!(op, sae, call);
+    transmute(r)
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<u32x16, _>(simd_lt(a.as_u32x16(), b.as_u32x16()))
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmplt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmplt_epu32_mask(a, b) & m
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<u32x16, _>(simd_gt(a.as_u32x16(), b.as_u32x16()))
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpgt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmpgt_epu32_mask(a, b) & m
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<u32x16, _>(simd_le(a.as_u32x16(), b.as_u32x16()))
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmple_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmple_epu32_mask(a, b) & m
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<u32x16, _>(simd_ge(a.as_u32x16(), b.as_u32x16()))
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpge_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmpge_epu32_mask(a, b) & m
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<u32x16, _>(simd_eq(a.as_u32x16(), b.as_u32x16()))
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpeq_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmpeq_epu32_mask(a, b) & m
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for inequality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<u32x16, _>(simd_ne(a.as_u32x16(), b.as_u32x16()))
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for inequality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu32_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpneq_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmpneq_epu32_mask(a, b) & m
+}
+
+/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by op.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu32_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_cmp_epu32_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask16 {
+    let neg_one = -1;
+    macro_rules! call {
+        ($imm3:expr) => {
+            vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one)
+        };
+    }
+    let r = constify_imm3!(op, call);
+    transmute(r)
+}
+
+/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by op,
+///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu32_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmp_epu32_mask(
+    m: __mmask16,
+    a: __m512i,
+    b: __m512i,
+    op: _MM_CMPINT_ENUM,
+) -> __mmask16 {
+    macro_rules! call {
+        ($imm3:expr) => {
+            vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, m as i16)
+        };
+    }
+    let r = constify_imm3!(op, call);
+    transmute(r)
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16()))
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmplt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmplt_epi32_mask(a, b) & m
+}
+
+/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<i32x16, _>(simd_gt(a.as_i32x16(), b.as_i32x16()))
+}
+
+/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpgt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmpgt_epi32_mask(a, b) & m
+}
+
+/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<i32x16, _>(simd_le(a.as_i32x16(), b.as_i32x16()))
+}
+
+/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmple_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmple_epi32_mask(a, b) & m
+}
+
+/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<i32x16, _>(simd_ge(a.as_i32x16(), b.as_i32x16()))
+}
+
+/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpge_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmpge_epi32_mask(a, b) & m
+}
+
+/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<i32x16, _>(simd_eq(a.as_i32x16(), b.as_i32x16()))
+}
+
+/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpeq_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmpeq_epi32_mask(a, b) & m
+}
+
+/// Compare packed signed 32-bit integers in a and b for inequality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<i32x16, _>(simd_ne(a.as_i32x16(), b.as_i32x16()))
+}
+
+/// Compare packed signed 32-bit integers in a and b for inequality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpneq_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmpneq_epi32_mask(a, b) & m
+}
+
+/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by op.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi32_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_cmp_epi32_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask16 {
+    let neg_one = -1;
+    macro_rules! call {
+        ($imm3:expr) => {
+            vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one)
+        };
+    }
+    let r = constify_imm3!(op, call);
+    transmute(r)
+}
+
+/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by op,
+///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi32_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmp_epi32_mask(
+    m: __mmask16,
+    a: __m512i,
+    b: __m512i,
+    op: _MM_CMPINT_ENUM,
+) -> __mmask16 {
+    macro_rules! call {
+        ($imm3:expr) => {
+            vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, m as i16)
+        };
+    }
+    let r = constify_imm3!(op, call);
+    transmute(r)
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8()))
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmplt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+    _mm512_cmplt_epu64_mask(a, b) & m
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8()))
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpgt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+    _mm512_cmpgt_epu64_mask(a, b) & m
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8()))
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmple_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+    _mm512_cmple_epu64_mask(a, b) & m
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8()))
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpge_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+    _mm512_cmpge_epu64_mask(b, a) & m
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8()))
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpeq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+    _mm512_cmpeq_epu64_mask(a, b) & m
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8()))
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu64_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpneq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+    _mm512_cmpneq_epu64_mask(a, b) & m
+}
+
+/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_cmp_epu64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask8 {
+    let neg_one = -1;
+    macro_rules! call {
+        ($imm3:expr) => {
+            vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one)
+        };
+    }
+    let r = constify_imm3!(op, call);
+    transmute(r)
+}
+
+/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op,
+///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmp_epu64_mask(
+    m: __mmask8,
+    a: __m512i,
+    b: __m512i,
+    op: _MM_CMPINT_ENUM,
+) -> __mmask8 {
+    macro_rules! call {
+        ($imm3:expr) => {
+            vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, m as i8)
+        };
+    }
+    let r = constify_imm3!(op, call);
+    transmute(r)
+}
+
+/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8()))
+}
+
+/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmplt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+    _mm512_cmplt_epi64_mask(a, b) & m
+}
+
+/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8()))
+}
+
+/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpgt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+    _mm512_cmpgt_epi64_mask(a, b) & m
+}
+
+/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8()))
+}
+
+/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmple_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+    _mm512_cmple_epi64_mask(a, b) & m
+}
+
+/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8()))
+}
+
+/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpge_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+    _mm512_cmpge_epi64_mask(b, a) & m
+}
+
+/// Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8()))
+}
+
+/// Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpeq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+    _mm512_cmpeq_epi64_mask(a, b) & m
+}
+
+/// Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8()))
+}
+
+/// Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpneq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+    _mm512_cmpneq_epi64_mask(a, b) & m
+}
+
+/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_cmp_epi64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask8 {
+    let neg_one = -1;
+    macro_rules! call {
+        ($imm3:expr) => {
+            vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one)
+        };
+    }
+    let r = constify_imm3!(op, call);
+    transmute(r)
+}
+
+/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op,
+///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmp_epi64_mask(
+    m: __mmask8,
+    a: __m512i,
+    b: __m512i,
+    op: _MM_CMPINT_ENUM,
+) -> __mmask8 {
+    macro_rules! call {
+        ($imm3:expr) => {
+            vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, m as i8)
+        };
+    }
+    let r = constify_imm3!(op, call);
+    transmute(r)
+}
+
+/// Returns vector of type `__m512d` with undefined elements.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+// This intrinsic has no corresponding instruction.
+pub unsafe fn _mm512_undefined_pd() -> __m512d {
+    _mm512_set1_pd(0.0)
+}
+
+/// Returns vector of type `__m512` with undefined elements.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ps)
+#[inline]
+#[target_feature(enable = "avx512f")]
+// This intrinsic has no corresponding instruction.
+pub unsafe fn _mm512_undefined_ps() -> __m512 {
+    _mm512_set1_ps(0.0)
+}
+
+/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
+/// floating-point elements) from memory into result.
+/// `mem_addr` does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vmovups))]
+pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
+    ptr::read_unaligned(mem_addr as *const __m512d)
+}
+
+/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
+/// floating-point elements) from `a` into memory.
+/// `mem_addr` does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vmovups))]
+pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
+    ptr::write_unaligned(mem_addr as *mut __m512d, a);
+}
+
+/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
+/// floating-point elements) from memory into result.
+/// `mem_addr` does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ps)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vmovups))]
+pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
+    ptr::read_unaligned(mem_addr as *const __m512)
+}
+
+/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
+/// floating-point elements) from `a` into memory.
+/// `mem_addr` does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ps)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vmovups))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
+    ptr::write_unaligned(mem_addr as *mut __m512, a);
+}
+
+/// Sets packed 64-bit integers in `dst` with the supplied values in
+/// reverse order.
+///
+/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+pub unsafe fn _mm512_setr_pd(
+    e0: f64,
+    e1: f64,
+    e2: f64,
+    e3: f64,
+    e4: f64,
+    e5: f64,
+    e6: f64,
+    e7: f64,
+) -> __m512d {
+    let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
+    transmute(r)
+}
+
+/// Sets packed 64-bit integers in `dst` with the supplied values.
+///
+/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+pub unsafe fn _mm512_set_pd(
+    e0: f64,
+    e1: f64,
+    e2: f64,
+    e3: f64,
+    e4: f64,
+    e5: f64,
+    e6: f64,
+    e7: f64,
+) -> __m512d {
+    _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
+}
+
+/// Equal
+pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
+/// Less-than
+pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
+/// Less-than-or-equal
+pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
+/// False
+pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
+/// Not-equal
+pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04;
+/// Not less-than
+pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05;
+/// Not less-than-or-equal
+pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
+/// True
+pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.x86.avx512.gather.dpd.512"]
+    fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
+    #[link_name = "llvm.x86.avx512.gather.dps.512"]
+    fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
+    #[link_name = "llvm.x86.avx512.gather.qpd.512"]
+    fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
+    #[link_name = "llvm.x86.avx512.gather.qps.512"]
+    fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
+    #[link_name = "llvm.x86.avx512.gather.dpq.512"]
+    fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
+    #[link_name = "llvm.x86.avx512.gather.dpi.512"]
+    fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
+    #[link_name = "llvm.x86.avx512.gather.qpq.512"]
+    fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
+    #[link_name = "llvm.x86.avx512.gather.qpi.512"]
+    fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
+
+    #[link_name = "llvm.x86.avx512.scatter.dpd.512"]
+    fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
+    #[link_name = "llvm.x86.avx512.scatter.dps.512"]
+    fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
+    #[link_name = "llvm.x86.avx512.scatter.qpd.512"]
+    fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
+    #[link_name = "llvm.x86.avx512.scatter.qps.512"]
+    fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
+    #[link_name = "llvm.x86.avx512.scatter.dpq.512"]
+    fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
+    #[link_name = "llvm.x86.avx512.scatter.dpi.512"]
+    fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
+    #[link_name = "llvm.x86.avx512.scatter.qpq.512"]
+    fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
+    #[link_name = "llvm.x86.avx512.scatter.qpi.512"]
+    fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
+
+    #[link_name = "llvm.x86.avx512.mask.cmp.ss"]
+    fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
+    #[link_name = "llvm.x86.avx512.mask.cmp.sd"]
+    fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
+    #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
+    fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
+    #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
+    fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
+    #[link_name = "llvm.x86.avx512.mask.ucmp.q.512"]
+    fn vpcmpuq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
+    #[link_name = "llvm.x86.avx512.mask.cmp.q.512"]
+    fn vpcmpq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
+    #[link_name = "llvm.x86.avx512.mask.ucmp.d.512"]
+    fn vpcmpud(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
+    #[link_name = "llvm.x86.avx512.mask.cmp.d.512"]
+    fn vpcmpd(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
+
+    #[link_name = "llvm.x86.avx512.mask.prol.d.512"]
+    fn vprold(a: i32x16, shift: i8) -> i32x16;
+    #[link_name = "llvm.x86.avx512.mask.pror.d.512"]
+    fn vprord(a: i32x16, shift: i8) -> i32x16;
+    #[link_name = "llvm.x86.avx512.mask.prol.q.512"]
+    fn vprolq(a: i64x8, shift: i8) -> i64x8;
+    #[link_name = "llvm.x86.avx512.mask.pror.q.512"]
+    fn vprorq(a: i64x8, shift: i8) -> i64x8;
+
+    #[link_name = "llvm.x86.avx512.mask.prolv.d.512"]
+    fn vprolvd(a: i32x16, b: i32x16) -> i32x16;
+    #[link_name = "llvm.x86.avx512.mask.prorv.d.512"]
+    fn vprorvd(a: i32x16, b: i32x16) -> i32x16;
+    #[link_name = "llvm.x86.avx512.mask.prolv.q.512"]
+    fn vprolvq(a: i64x8, b: i64x8) -> i64x8;
+    #[link_name = "llvm.x86.avx512.mask.prorv.q.512"]
+    fn vprorvq(a: i64x8, b: i64x8) -> i64x8;
+
+    #[link_name = "llvm.x86.avx512.psllv.d.512"]
+    fn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
+    #[link_name = "llvm.x86.avx512.psrlv.d.512"]
+    fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
+    #[link_name = "llvm.x86.avx512.psllv.q.512"]
+    fn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
+    #[link_name = "llvm.x86.avx512.psrlv.q.512"]
+    fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
+
+    #[link_name = "llvm.x86.avx512.pslli.d.512"]
+    fn vpsllid(a: i32x16, imm8: u32) -> i32x16;
+    #[link_name = "llvm.x86.avx512.psrli.d.512"]
+    fn vpsrlid(a: i32x16, imm8: u32) -> i32x16;
+    #[link_name = "llvm.x86.avx512.pslli.q.512"]
+    fn vpslliq(a: i64x8, imm8: u32) -> i64x8;
+    #[link_name = "llvm.x86.avx512.psrli.q.512"]
+    fn vpsrliq(a: i64x8, imm8: u32) -> i64x8;
+
+    #[link_name = "llvm.x86.avx512.psll.d.512"]
+    fn vpslld(a: i32x16, count: i32x4) -> i32x16;
+    #[link_name = "llvm.x86.avx512.psrl.d.512"]
+    fn vpsrld(a: i32x16, count: i32x4) -> i32x16;
+    #[link_name = "llvm.x86.avx512.psll.q.512"]
+    fn vpsllq(a: i64x8, count: i64x2) -> i64x8;
+    #[link_name = "llvm.x86.avx512.psrl.q.512"]
+    fn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
+
+    #[link_name = "llvm.x86.avx512.psra.d.512"]
+    fn vpsrad(a: i32x16, count: i32x4) -> i32x16;
+    #[link_name = "llvm.x86.avx512.psra.q.512"]
+    fn vpsraq(a: i64x8, count: i64x2) -> i64x8;
+
+    #[link_name = "llvm.x86.avx512.psrai.d.512"]
+    fn vpsraid(a: i32x16, imm8: u32) -> i32x16;
+    #[link_name = "llvm.x86.avx512.psrai.q.512"]
+    fn vpsraiq(a: i64x8, imm8: u32) -> i64x8;
+
+    #[link_name = "llvm.x86.avx512.psrav.d.512"]
+    fn vpsravd(a: i32x16, count: i32x16) -> i32x16;
+    #[link_name = "llvm.x86.avx512.psrav.q.512"]
+    fn vpsravq(a: i64x8, count: i64x8) -> i64x8;
+
+    #[link_name = "llvm.x86.avx512.kand.w"]
+    fn kandw(ma: u16, mb: u16) -> u16;
+    #[link_name = "llvm.x86.avx512.kor.w"]
+    fn korw(ma: u16, mb: u16) -> u16;
+    #[link_name = "llvm.x86.avx512.kxor.w"]
+    fn kxorw(ma: u16, mb: u16) -> u16;
+}
+
+#[cfg(test)]
+mod tests {
+    use std;
+    use stdarch_test::simd_test;
+
+    use crate::core_arch::x86::*;
+    use crate::hint::black_box;
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_abs_epi32() {
+        #[rustfmt::skip]
+        let a = _mm512_setr_epi32(
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
+        );
+        let r = _mm512_abs_epi32(a);
+        let e = _mm512_setr_epi32(
+            0,
+            1,
+            1,
+            i32::MAX,
+            i32::MAX.wrapping_add(1),
+            100,
+            100,
+            32,
+            0,
+            1,
+            1,
+            i32::MAX,
+            i32::MAX.wrapping_add(1),
+            100,
+            100,
+            32,
+        );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_abs_epi32() {
+        #[rustfmt::skip]
+        let a = _mm512_setr_epi32(
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
+        );
+        let r = _mm512_mask_abs_epi32(a, 0, a);
+        assert_eq_m512i(r, a);
+        let r = _mm512_mask_abs_epi32(a, 0b11111111, a);
         let e = _mm512_setr_epi32(
             0,
             1,
@@ -2503,6 +3754,31 @@ mod tests {
             100,
             100,
             32,
+            0,
+            1,
+            -1,
+            i32::MAX,
+            i32::MIN,
+            100,
+            -100,
+            -32,
+        );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_maskz_abs_epi32() {
+        #[rustfmt::skip]
+        let a = _mm512_setr_epi32(
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
+        );
+        let r = _mm512_maskz_abs_epi32(0, a);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+        let r = _mm512_maskz_abs_epi32(0b11111111, a);
+        let e = _mm512_setr_epi32(
             0,
             1,
             1,
@@ -2511,945 +3787,1506 @@ mod tests {
             100,
             100,
             32,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+        );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_i32gather_ps() {
+        let mut arr = [0f32; 256];
+        for i in 0..256 {
+            arr[i] = i as f32;
+        }
+        // A multiplier of 4 is word-addressing
+        #[rustfmt::skip]
+        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
+                                      120, 128, 136, 144, 152, 160, 168, 176);
+        let r = _mm512_i32gather_ps(index, arr.as_ptr() as *const u8, 4);
+        #[rustfmt::skip]
+        assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
+                                         120., 128., 136., 144., 152., 160., 168., 176.));
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_i32gather_ps() {
+        let mut arr = [0f32; 256];
+        for i in 0..256 {
+            arr[i] = i as f32;
+        }
+        let src = _mm512_set1_ps(2.);
+        let mask = 0b10101010_10101010;
+        #[rustfmt::skip]
+        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
+                                      120, 128, 136, 144, 152, 160, 168, 176);
+        // A multiplier of 4 is word-addressing
+        let r = _mm512_mask_i32gather_ps(src, mask, index, arr.as_ptr() as *const u8, 4);
+        #[rustfmt::skip]
+        assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
+                                         2., 128., 2., 144., 2., 160., 2., 176.));
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_i32gather_epi32() {
+        let mut arr = [0i32; 256];
+        for i in 0..256 {
+            arr[i] = i as i32;
+        }
+        // A multiplier of 4 is word-addressing
+        #[rustfmt::skip]
+        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
+                                      120, 128, 136, 144, 152, 160, 168, 176);
+        let r = _mm512_i32gather_epi32(index, arr.as_ptr() as *const u8, 4);
+        #[rustfmt::skip]
+        assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
+                                             120, 128, 136, 144, 152, 160, 168, 176));
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_i32gather_epi32() {
+        let mut arr = [0i32; 256];
+        for i in 0..256 {
+            arr[i] = i as i32;
+        }
+        let src = _mm512_set1_epi32(2);
+        let mask = 0b10101010_10101010;
+        #[rustfmt::skip]
+        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
+                                      128, 144, 160, 176, 192, 208, 224, 240);
+        // A multiplier of 4 is word-addressing
+        let r = _mm512_mask_i32gather_epi32(src, mask, index, arr.as_ptr() as *const u8, 4);
+        #[rustfmt::skip]
+        assert_eq_m512i(r, _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112,
+                                             2, 144, 2, 176, 2, 208, 2, 240));
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_i32scatter_ps() {
+        let mut arr = [0f32; 256];
+        #[rustfmt::skip]
+        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
+                                      128, 144, 160, 176, 192, 208, 224, 240);
+        let src = _mm512_setr_ps(
+            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
-        assert_eq_m512i(r, e);
+        // A multiplier of 4 is word-addressing
+        _mm512_i32scatter_ps(arr.as_mut_ptr() as *mut u8, index, src, 4);
+        let mut expected = [0f32; 256];
+        for i in 0..16 {
+            expected[i * 16] = (i + 1) as f32;
+        }
+        assert_eq!(&arr[..], &expected[..],);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_i32scatter_ps() {
+        let mut arr = [0f32; 256];
+        let mask = 0b10101010_10101010;
+        #[rustfmt::skip]
+        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
+                                      128, 144, 160, 176, 192, 208, 224, 240);
+        let src = _mm512_setr_ps(
+            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+        );
+        // A multiplier of 4 is word-addressing
+        _mm512_mask_i32scatter_ps(arr.as_mut_ptr() as *mut u8, mask, index, src, 4);
+        let mut expected = [0f32; 256];
+        for i in 0..8 {
+            expected[i * 32 + 16] = 2. * (i + 1) as f32;
+        }
+        assert_eq!(&arr[..], &expected[..],);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_i32scatter_epi32() {
+        let mut arr = [0i32; 256];
+        #[rustfmt::skip]
+
+        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
+                                      128, 144, 160, 176, 192, 208, 224, 240);
+        let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        // A multiplier of 4 is word-addressing
+        _mm512_i32scatter_epi32(arr.as_mut_ptr() as *mut u8, index, src, 4);
+        let mut expected = [0i32; 256];
+        for i in 0..16 {
+            expected[i * 16] = (i + 1) as i32;
+        }
+        assert_eq!(&arr[..], &expected[..],);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_i32scatter_epi32() {
+        let mut arr = [0i32; 256];
+        let mask = 0b10101010_10101010;
+        #[rustfmt::skip]
+        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
+                                      128, 144, 160, 176, 192, 208, 224, 240);
+        let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        // A multiplier of 4 is word-addressing
+        _mm512_mask_i32scatter_epi32(arr.as_mut_ptr() as *mut u8, mask, index, src, 4);
+        let mut expected = [0i32; 256];
+        for i in 0..8 {
+            expected[i * 32 + 16] = 2 * (i + 1) as i32;
+        }
+        assert_eq!(&arr[..], &expected[..],);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmplt_ps_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
+                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
+        let b = _mm512_set1_ps(-1.);
+        let m = _mm512_cmplt_ps_mask(a, b);
+        assert_eq!(m, 0b00000101_00000101);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmplt_ps_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
+                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
+        let b = _mm512_set1_ps(-1.);
+        let mask = 0b01100110_01100110;
+        let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
+        assert_eq!(r, 0b00000100_00000100);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpnlt_ps_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
+                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
+        let b = _mm512_set1_ps(-1.);
+        assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
+                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
+        let b = _mm512_set1_ps(-1.);
+        let mask = 0b01111010_01111010;
+        assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpnle_ps_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
+                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
+        let b = _mm512_set1_ps(-1.);
+        let m = _mm512_cmpnle_ps_mask(b, a);
+        assert_eq!(m, 0b00001101_00001101);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpnle_ps_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
+                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
+        let b = _mm512_set1_ps(-1.);
+        let mask = 0b01100110_01100110;
+        let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
+        assert_eq!(r, 0b00000100_00000100);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmple_ps_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
+                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
+        let b = _mm512_set1_ps(-1.);
+        assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmple_ps_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
+                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
+        let b = _mm512_set1_ps(-1.);
+        let mask = 0b01111010_01111010;
+        assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpeq_ps_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
+                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
+        #[rustfmt::skip]
+        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
+                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
+        let m = _mm512_cmpeq_ps_mask(b, a);
+        assert_eq!(m, 0b11001101_11001101);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpeq_ps_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
+                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
+        #[rustfmt::skip]
+        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
+                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
+        let mask = 0b01111010_01111010;
+        let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
+        assert_eq!(r, 0b01001000_01001000);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpneq_ps_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
+                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
+        #[rustfmt::skip]
+        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
+                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
+        let m = _mm512_cmpneq_ps_mask(b, a);
+        assert_eq!(m, 0b00110010_00110010);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpneq_ps_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
+                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
+        #[rustfmt::skip]
+        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
+                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
+        let mask = 0b01111010_01111010;
+        let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
+        assert_eq!(r, 0b00110010_00110010)
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmp_ps_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
+                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
+        let b = _mm512_set1_ps(-1.);
+        let m = _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ);
+        assert_eq!(m, 0b00000101_00000101);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmp_ps_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
+                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
+        let b = _mm512_set1_ps(-1.);
+        let mask = 0b01100110_01100110;
+        let r = _mm512_mask_cmp_ps_mask(mask, a, b, _CMP_LT_OQ);
+        assert_eq!(r, 0b00000100_00000100);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmp_round_ps_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
+                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
+        let b = _mm512_set1_ps(-1.);
+        let m = _mm512_cmp_round_ps_mask(a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION);
+        assert_eq!(m, 0b00000101_00000101);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmp_round_ps_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
+                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
+        let b = _mm512_set1_ps(-1.);
+        let mask = 0b01100110_01100110;
+        let r = _mm512_mask_cmp_round_ps_mask(mask, a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION);
+        assert_eq!(r, 0b00000100_00000100);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_abs_epi32() {
+    unsafe fn test_mm512_cmpord_ps_mask() {
         #[rustfmt::skip]
-        let a = _mm512_setr_epi32(
-            0, 1, -1, i32::MAX,
-            i32::MIN, 100, -100, -32,
-            0, 1, -1, i32::MAX,
-            i32::MIN, 100, -100, -32,
-        );
-        let r = _mm512_mask_abs_epi32(a, 0, a);
-        assert_eq_m512i(r, a);
-        let r = _mm512_mask_abs_epi32(a, 0b11111111, a);
-        let e = _mm512_setr_epi32(
-            0,
-            1,
-            1,
-            i32::MAX,
-            i32::MAX.wrapping_add(1),
-            100,
-            100,
-            32,
-            0,
-            1,
-            -1,
-            i32::MAX,
-            i32::MIN,
-            100,
-            -100,
-            -32,
+        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
+                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
+        #[rustfmt::skip]
+        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
+                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
+        let m = _mm512_cmpord_ps_mask(a, b);
+        assert_eq!(m, 0b00000101_00000101);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpord_ps_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
+                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
+        #[rustfmt::skip]
+        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
+                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
+        let mask = 0b11000011_11000011;
+        let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
+        assert_eq!(m, 0b00000001_00000001);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpunord_ps_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
+                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
+        #[rustfmt::skip]
+        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
+                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
+        let m = _mm512_cmpunord_ps_mask(a, b);
+
+        assert_eq!(m, 0b11111010_11111010);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpunord_ps_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
+                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
+        #[rustfmt::skip]
+        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
+                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
+        let mask = 0b00001111_00001111;
+        let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
+        assert_eq!(m, 0b000001010_00001010);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm_cmp_ss_mask() {
+        let a = _mm_setr_ps(2., 1., 1., 1.);
+        let b = _mm_setr_ps(1., 2., 2., 2.);
+        let m = _mm_cmp_ss_mask(a, b, _CMP_GE_OS);
+        assert_eq!(m, 1);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm_mask_cmp_ss_mask() {
+        let a = _mm_setr_ps(2., 1., 1., 1.);
+        let b = _mm_setr_ps(1., 2., 2., 2.);
+        let m = _mm_mask_cmp_ss_mask(0b10, a, b, _CMP_GE_OS);
+        assert_eq!(m, 0);
+        let m = _mm_mask_cmp_ss_mask(0b1, a, b, _CMP_GE_OS);
+        assert_eq!(m, 1);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm_cmp_round_ss_mask() {
+        let a = _mm_setr_ps(2., 1., 1., 1.);
+        let b = _mm_setr_ps(1., 2., 2., 2.);
+        let m = _mm_cmp_round_ss_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
+        assert_eq!(m, 1);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm_mask_cmp_round_ss_mask() {
+        let a = _mm_setr_ps(2., 1., 1., 1.);
+        let b = _mm_setr_ps(1., 2., 2., 2.);
+        let m = _mm_mask_cmp_round_ss_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
+        assert_eq!(m, 0);
+        let m = _mm_mask_cmp_round_ss_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
+        assert_eq!(m, 1);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm_cmp_sd_mask() {
+        let a = _mm_setr_pd(2., 1.);
+        let b = _mm_setr_pd(1., 2.);
+        let m = _mm_cmp_sd_mask(a, b, _CMP_GE_OS);
+        assert_eq!(m, 1);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm_mask_cmp_sd_mask() {
+        let a = _mm_setr_pd(2., 1.);
+        let b = _mm_setr_pd(1., 2.);
+        let m = _mm_mask_cmp_sd_mask(0b10, a, b, _CMP_GE_OS);
+        assert_eq!(m, 0);
+        let m = _mm_mask_cmp_sd_mask(0b1, a, b, _CMP_GE_OS);
+        assert_eq!(m, 1);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm_cmp_round_sd_mask() {
+        let a = _mm_setr_pd(2., 1.);
+        let b = _mm_setr_pd(1., 2.);
+        let m = _mm_cmp_round_sd_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
+        assert_eq!(m, 1);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm_mask_cmp_round_sd_mask() {
+        let a = _mm_setr_pd(2., 1.);
+        let b = _mm_setr_pd(1., 2.);
+        let m = _mm_mask_cmp_round_sd_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
+        assert_eq!(m, 0);
+        let m = _mm_mask_cmp_round_sd_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
+        assert_eq!(m, 1);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmplt_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let m = _mm512_cmplt_epu32_mask(a, b);
+        assert_eq!(m, 0b11001111_11001111);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmplt_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let mask = 0b01111010_01111010;
+        let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
+        assert_eq!(r, 0b01001010_01001010);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpgt_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let m = _mm512_cmpgt_epu32_mask(b, a);
+        assert_eq!(m, 0b11001111_11001111);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let mask = 0b01111010_01111010;
+        let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
+        assert_eq!(r, 0b01001010_01001010);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmple_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        assert_eq!(
+            _mm512_cmple_epu32_mask(a, b),
+            !_mm512_cmpgt_epu32_mask(a, b)
+        )
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmple_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let mask = 0b01111010_01111010;
+        assert_eq!(
+            _mm512_mask_cmple_epu32_mask(mask, a, b),
+            0b01111010_01111010
         );
-        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_abs_epi32() {
+    unsafe fn test_mm512_cmpge_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        assert_eq!(
+            _mm512_cmpge_epu32_mask(a, b),
+            !_mm512_cmplt_epu32_mask(a, b)
+        )
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpge_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let mask = 0b01111010_01111010;
+        assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpeq_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        #[rustfmt::skip]
+        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+        let m = _mm512_cmpeq_epu32_mask(b, a);
+        assert_eq!(m, 0b11001111_11001111);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
         #[rustfmt::skip]
-        let a = _mm512_setr_epi32(
-            0, 1, -1, i32::MAX,
-            i32::MIN, 100, -100, -32,
-            0, 1, -1, i32::MAX,
-            i32::MIN, 100, -100, -32,
-        );
-        let r = _mm512_maskz_abs_epi32(0, a);
-        assert_eq_m512i(r, _mm512_setzero_si512());
-        let r = _mm512_maskz_abs_epi32(0b11111111, a);
-        let e = _mm512_setr_epi32(
-            0,
-            1,
-            1,
-            i32::MAX,
-            i32::MAX.wrapping_add(1),
-            100,
-            100,
-            32,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
-        );
-        assert_eq_m512i(r, e);
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        #[rustfmt::skip]
+        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+        let mask = 0b01111010_01111010;
+        let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
+        assert_eq!(r, 0b01001010_01001010);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i32gather_ps() {
-        let mut arr = [0f32; 256];
-        for i in 0..256 {
-            arr[i] = i as f32;
-        }
-        // A multiplier of 4 is word-addressing
+    unsafe fn test_mm512_cmpneq_epu32_mask() {
         #[rustfmt::skip]
-        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
-                                      120, 128, 136, 144, 152, 160, 168, 176);
-        let r = _mm512_i32gather_ps(index, arr.as_ptr() as *const u8, 4);
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
         #[rustfmt::skip]
-        assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
-                                         120., 128., 136., 144., 152., 160., 168., 176.));
+        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+        let m = _mm512_cmpneq_epu32_mask(b, a);
+        assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i32gather_ps() {
-        let mut arr = [0f32; 256];
-        for i in 0..256 {
-            arr[i] = i as f32;
-        }
-        let src = _mm512_set1_ps(2.);
-        let mask = 0b10101010_10101010;
+    unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
         #[rustfmt::skip]
-        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
-                                      120, 128, 136, 144, 152, 160, 168, 176);
-        // A multiplier of 4 is word-addressing
-        let r = _mm512_mask_i32gather_ps(src, mask, index, arr.as_ptr() as *const u8, 4);
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
         #[rustfmt::skip]
-        assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
-                                         2., 128., 2., 144., 2., 160., 2., 176.));
+        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+        let mask = 0b01111010_01111010;
+        let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
+        assert_eq!(r, 0b00110010_00110010);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i32gather_epi32() {
-        let mut arr = [0i32; 256];
-        for i in 0..256 {
-            arr[i] = i as i32;
-        }
-        // A multiplier of 4 is word-addressing
+    unsafe fn test_mm512_cmp_epu32_mask() {
         #[rustfmt::skip]
-        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
-                                      120, 128, 136, 144, 152, 160, 168, 176);
-        let r = _mm512_i32gather_epi32(index, arr.as_ptr() as *const u8, 4);
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let m = _mm512_cmp_epu32_mask(a, b, _MM_CMPINT_LT);
+        assert_eq!(m, 0b11001111_11001111);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmp_epu32_mask() {
         #[rustfmt::skip]
-        assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
-                                             120, 128, 136, 144, 152, 160, 168, 176));
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let mask = 0b01111010_01111010;
+        let r = _mm512_mask_cmp_epu32_mask(mask, a, b, _MM_CMPINT_LT);
+        assert_eq!(r, 0b01001010_01001010);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i32gather_epi32() {
-        let mut arr = [0i32; 256];
-        for i in 0..256 {
-            arr[i] = i as i32;
-        }
-        let src = _mm512_set1_epi32(2);
-        let mask = 0b10101010_10101010;
+    unsafe fn test_mm512_cmplt_epi32_mask() {
         #[rustfmt::skip]
-        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
-                                      128, 144, 160, 176, 192, 208, 224, 240);
-        // A multiplier of 4 is word-addressing
-        let r = _mm512_mask_i32gather_epi32(src, mask, index, arr.as_ptr() as *const u8, 4);
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let m = _mm512_cmplt_epi32_mask(a, b);
+        assert_eq!(m, 0b00000101_00000101);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmplt_epi32_mask() {
         #[rustfmt::skip]
-        assert_eq_m512i(r, _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112,
-                                             2, 144, 2, 176, 2, 208, 2, 240));
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let mask = 0b01100110_01100110;
+        let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
+        assert_eq!(r, 0b00000100_00000100);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i32scatter_ps() {
-        let mut arr = [0f32; 256];
+    unsafe fn test_mm512_cmpgt_epi32_mask() {
         #[rustfmt::skip]
-        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
-                                      128, 144, 160, 176, 192, 208, 224, 240);
-        let src = _mm512_setr_ps(
-            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
-        );
-        // A multiplier of 4 is word-addressing
-        _mm512_i32scatter_ps(arr.as_mut_ptr() as *mut u8, index, src, 4);
-        let mut expected = [0f32; 256];
-        for i in 0..16 {
-            expected[i * 16] = (i + 1) as f32;
-        }
-        assert_eq!(&arr[..], &expected[..],);
+        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let m = _mm512_cmpgt_epi32_mask(b, a);
+        assert_eq!(m, 0b00000101_00000101);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i32scatter_ps() {
-        let mut arr = [0f32; 256];
-        let mask = 0b10101010_10101010;
+    unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
         #[rustfmt::skip]
-        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
-                                      128, 144, 160, 176, 192, 208, 224, 240);
-        let src = _mm512_setr_ps(
-            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
-        );
-        // A multiplier of 4 is word-addressing
-        _mm512_mask_i32scatter_ps(arr.as_mut_ptr() as *mut u8, mask, index, src, 4);
-        let mut expected = [0f32; 256];
-        for i in 0..8 {
-            expected[i * 32 + 16] = 2. * (i + 1) as f32;
-        }
-        assert_eq!(&arr[..], &expected[..],);
+        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let mask = 0b01100110_01100110;
+        let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
+        assert_eq!(r, 0b00000100_00000100);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i32scatter_epi32() {
-        let mut arr = [0i32; 256];
+    unsafe fn test_mm512_cmple_epi32_mask() {
         #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        assert_eq!(
+            _mm512_cmple_epi32_mask(a, b),
+            !_mm512_cmpgt_epi32_mask(a, b)
+        )
+    }
 
-        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
-                                      128, 144, 160, 176, 192, 208, 224, 240);
-        let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        // A multiplier of 4 is word-addressing
-        _mm512_i32scatter_epi32(arr.as_mut_ptr() as *mut u8, index, src, 4);
-        let mut expected = [0i32; 256];
-        for i in 0..16 {
-            expected[i * 16] = (i + 1) as i32;
-        }
-        assert_eq!(&arr[..], &expected[..],);
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmple_epi32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let mask = 0b01111010_01111010;
+        assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i32scatter_epi32() {
-        let mut arr = [0i32; 256];
-        let mask = 0b10101010_10101010;
+    unsafe fn test_mm512_cmpge_epi32_mask() {
         #[rustfmt::skip]
-        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
-                                      128, 144, 160, 176, 192, 208, 224, 240);
-        let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        // A multiplier of 4 is word-addressing
-        _mm512_mask_i32scatter_epi32(arr.as_mut_ptr() as *mut u8, mask, index, src, 4);
-        let mut expected = [0i32; 256];
-        for i in 0..8 {
-            expected[i * 32 + 16] = 2 * (i + 1) as i32;
-        }
-        assert_eq!(&arr[..], &expected[..],);
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        assert_eq!(
+            _mm512_cmpge_epi32_mask(a, b),
+            !_mm512_cmplt_epi32_mask(a, b)
+        )
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmplt_ps_mask() {
+    unsafe fn test_mm512_mask_cmpge_epi32_mask() {
         #[rustfmt::skip]
-        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
-                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
-        let b = _mm512_set1_ps(-1.);
-        let m = _mm512_cmplt_ps_mask(a, b);
-        assert_eq!(m, 0b00000101_00000101);
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let mask = 0b01111010_01111010;
+        assert_eq!(
+            _mm512_mask_cmpge_epi32_mask(mask, a, b),
+            0b01111010_01111010
+        );
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmplt_ps_mask() {
+    unsafe fn test_mm512_cmpeq_epi32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
+        #[rustfmt::skip]
+        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+        let m = _mm512_cmpeq_epi32_mask(b, a);
+        assert_eq!(m, 0b11001111_11001111);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
         #[rustfmt::skip]
-        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
-                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
-        let b = _mm512_set1_ps(-1.);
-        let mask = 0b01100110_01100110;
-        let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
-        assert_eq!(r, 0b00000100_00000100);
+        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+        let mask = 0b01111010_01111010;
+        let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
+        assert_eq!(r, 0b01001010_01001010);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpnlt_ps_mask() {
+    unsafe fn test_mm512_cmpneq_epi32_mask() {
         #[rustfmt::skip]
-        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
-                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
-        let b = _mm512_set1_ps(-1.);
-        assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
+        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
+        #[rustfmt::skip]
+        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+        let m = _mm512_cmpneq_epi32_mask(b, a);
+        assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
+    unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
         #[rustfmt::skip]
-        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
-                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
-        let b = _mm512_set1_ps(-1.);
+        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
+                                 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
+        #[rustfmt::skip]
+        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
         let mask = 0b01111010_01111010;
-        assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
+        let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
+        assert_eq!(r, 0b00110010_00110010)
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpnle_ps_mask() {
+    unsafe fn test_mm512_cmp_epi32_mask() {
         #[rustfmt::skip]
-        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
-                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
-        let b = _mm512_set1_ps(-1.);
-        let m = _mm512_cmpnle_ps_mask(b, a);
-        assert_eq!(m, 0b00001101_00001101);
+        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let m = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LT);
+        assert_eq!(m, 0b00000101_00000101);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpnle_ps_mask() {
+    unsafe fn test_mm512_mask_cmp_epi32_mask() {
         #[rustfmt::skip]
-        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
-                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
-        let b = _mm512_set1_ps(-1.);
+        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
         let mask = 0b01100110_01100110;
-        let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
+        let r = _mm512_mask_cmp_epi32_mask(mask, a, b, _MM_CMPINT_LT);
         assert_eq!(r, 0b00000100_00000100);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmple_ps_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
-                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
-        let b = _mm512_set1_ps(-1.);
-        assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
+    unsafe fn test_mm512_set_epi32() {
+        let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        assert_eq_m512i(
+            r,
+            _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
+        )
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmple_ps_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
-                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
-        let b = _mm512_set1_ps(-1.);
-        let mask = 0b01111010_01111010;
-        assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
+    unsafe fn test_mm512_setr_epi32() {
+        let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        assert_eq_m512i(
+            r,
+            _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
+        )
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpeq_ps_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
-                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
-        #[rustfmt::skip]
-        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
-                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
-        let m = _mm512_cmpeq_ps_mask(b, a);
-        assert_eq!(m, 0b11001101_11001101);
+    unsafe fn test_mm512_set1_epi32() {
+        let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        assert_eq_m512i(r, _mm512_set1_epi32(2));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpeq_ps_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
-                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
-        #[rustfmt::skip]
-        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
-                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
-        let mask = 0b01111010_01111010;
-        let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
-        assert_eq!(r, 0b01001000_01001000);
+    unsafe fn test_mm512_setzero_si512() {
+        assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpneq_ps_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
-                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
-        #[rustfmt::skip]
-        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
-                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
-        let m = _mm512_cmpneq_ps_mask(b, a);
-        assert_eq!(m, 0b00110010_00110010);
+    unsafe fn test_mm512_set_ps() {
+        let r = _mm512_setr_ps(
+            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
+        );
+        assert_eq_m512(
+            r,
+            _mm512_set_ps(
+                15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
+            ),
+        )
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpneq_ps_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
-                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
-        #[rustfmt::skip]
-        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
-                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
-        let mask = 0b01111010_01111010;
-        let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
-        assert_eq!(r, 0b00110010_00110010)
+    unsafe fn test_mm512_setr_ps() {
+        let r = _mm512_set_ps(
+            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
+        );
+        assert_eq_m512(
+            r,
+            _mm512_setr_ps(
+                15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
+            ),
+        )
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmp_ps_mask() {
+    unsafe fn test_mm512_set1_ps() {
         #[rustfmt::skip]
-        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
-                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
-        let b = _mm512_set1_ps(-1.);
-        let m = _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ);
-        assert_eq!(m, 0b00000101_00000101);
+        let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
+                                     2., 2., 2., 2., 2., 2., 2., 2.);
+        assert_eq_m512(expected, _mm512_set1_ps(2.));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmp_ps_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
-                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
-        let b = _mm512_set1_ps(-1.);
-        let mask = 0b01100110_01100110;
-        let r = _mm512_mask_cmp_ps_mask(mask, a, b, _CMP_LT_OQ);
-        assert_eq!(r, 0b00000100_00000100);
+    unsafe fn test_mm512_setzero_ps() {
+        assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmp_round_ps_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
-                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
-        let b = _mm512_set1_ps(-1.);
-        let m = _mm512_cmp_round_ps_mask(a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION);
-        assert_eq!(m, 0b00000101_00000101);
+    unsafe fn test_mm512_loadu_pd() {
+        let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
+        let p = a.as_ptr();
+        let r = _mm512_loadu_pd(black_box(p));
+        let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
+        assert_eq_m512d(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmp_round_ps_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
-                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
-        let b = _mm512_set1_ps(-1.);
-        let mask = 0b01100110_01100110;
-        let r = _mm512_mask_cmp_round_ps_mask(mask, a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION);
-        assert_eq!(r, 0b00000100_00000100);
+    unsafe fn test_mm512_storeu_pd() {
+        let a = _mm512_set1_pd(9.);
+        let mut r = _mm512_undefined_pd();
+        _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
+        assert_eq_m512d(r, a);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpord_ps_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
-                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
-        #[rustfmt::skip]
-        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
-                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
-        let m = _mm512_cmpord_ps_mask(a, b);
-        assert_eq!(m, 0b00000101_00000101);
+    unsafe fn test_mm512_loadu_ps() {
+        let a = &[
+            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
+        ];
+        let p = a.as_ptr();
+        let r = _mm512_loadu_ps(black_box(p));
+        let e = _mm512_setr_ps(
+            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
+        );
+        assert_eq_m512(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpord_ps_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
-                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
-        #[rustfmt::skip]
-        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
-                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
-        let mask = 0b11000011_11000011;
-        let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
-        assert_eq!(m, 0b00000001_00000001);
+    unsafe fn test_mm512_storeu_ps() {
+        let a = _mm512_set1_ps(9.);
+        let mut r = _mm512_undefined_ps();
+        _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
+        assert_eq_m512(r, a);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpunord_ps_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
-                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
-        #[rustfmt::skip]
-        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
-                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
-        let m = _mm512_cmpunord_ps_mask(a, b);
+    unsafe fn test_mm512_setr_pd() {
+        let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+        assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
+    }
 
-        assert_eq!(m, 0b11111010_11111010);
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_set_pd() {
+        let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+        assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpunord_ps_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
-                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
-        #[rustfmt::skip]
-        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
-                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
-        let mask = 0b00001111_00001111;
-        let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
-        assert_eq!(m, 0b000001010_00001010);
+    unsafe fn test_mm512_rol_epi32() {
+        let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let r = _mm512_rol_epi32(a, 1);
+        let e = _mm512_set_epi32(1<<0,  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cmp_ss_mask() {
-        let a = _mm_setr_ps(2., 1., 1., 1.);
-        let b = _mm_setr_ps(1., 2., 2., 2.);
-        let m = _mm_cmp_ss_mask(a, b, _CMP_GE_OS);
-        assert_eq!(m, 1);
+    unsafe fn test_mm512_mask_rol_epi32() {
+        let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let r = _mm512_mask_rol_epi32(a, 0, a, 1);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_rol_epi32(a, 0b11111111_11111111, a, 1);
+        let e = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_cmp_ss_mask() {
-        let a = _mm_setr_ps(2., 1., 1., 1.);
-        let b = _mm_setr_ps(1., 2., 2., 2.);
-        let m = _mm_mask_cmp_ss_mask(0b10, a, b, _CMP_GE_OS);
-        assert_eq!(m, 0);
-        let m = _mm_mask_cmp_ss_mask(0b1, a, b, _CMP_GE_OS);
-        assert_eq!(m, 1);
+    unsafe fn test_mm512_maskz_rol_epi32() {
+        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<31);
+        let r = _mm512_maskz_rol_epi32(0, a, 1);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_rol_epi32(0b00000000_11111111, a, 1);
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1<<0);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cmp_round_ss_mask() {
-        let a = _mm_setr_ps(2., 1., 1., 1.);
-        let b = _mm_setr_ps(1., 2., 2., 2.);
-        let m = _mm_cmp_round_ss_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
-        assert_eq!(m, 1);
+    unsafe fn test_mm512_ror_epi32() {
+        let a = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let r = _mm512_ror_epi32(a, 1);
+        let e = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_cmp_round_ss_mask() {
-        let a = _mm_setr_ps(2., 1., 1., 1.);
-        let b = _mm_setr_ps(1., 2., 2., 2.);
-        let m = _mm_mask_cmp_round_ss_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
-        assert_eq!(m, 0);
-        let m = _mm_mask_cmp_round_ss_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
-        assert_eq!(m, 1);
+    unsafe fn test_mm512_mask_ror_epi32() {
+        let a = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let r = _mm512_mask_ror_epi32(a, 0, a, 1);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_ror_epi32(a, 0b11111111_11111111, a, 1);
+        let e = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cmp_sd_mask() {
-        let a = _mm_setr_pd(2., 1.);
-        let b = _mm_setr_pd(1., 2.);
-        let m = _mm_cmp_sd_mask(a, b, _CMP_GE_OS);
-        assert_eq!(m, 1);
+    unsafe fn test_mm512_maskz_ror_epi32() {
+        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1<<0);
+        let r = _mm512_maskz_ror_epi32(0, a, 1);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_ror_epi32(0b00000000_11111111, a, 1);
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1<<31);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_cmp_sd_mask() {
-        let a = _mm_setr_pd(2., 1.);
-        let b = _mm_setr_pd(1., 2.);
-        let m = _mm_mask_cmp_sd_mask(0b10, a, b, _CMP_GE_OS);
-        assert_eq!(m, 0);
-        let m = _mm_mask_cmp_sd_mask(0b1, a, b, _CMP_GE_OS);
-        assert_eq!(m, 1);
+    unsafe fn test_mm512_slli_epi32() {
+        let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let r = _mm512_slli_epi32(a, 1);
+        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cmp_round_sd_mask() {
-        let a = _mm_setr_pd(2., 1.);
-        let b = _mm_setr_pd(1., 2.);
-        let m = _mm_cmp_round_sd_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
-        assert_eq!(m, 1);
+    unsafe fn test_mm512_mask_slli_epi32() {
+        let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let r = _mm512_mask_slli_epi32(a, 0, a, 1);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_slli_epi32(a, 0b11111111_11111111, a, 1);
+        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_cmp_round_sd_mask() {
-        let a = _mm_setr_pd(2., 1.);
-        let b = _mm_setr_pd(1., 2.);
-        let m = _mm_mask_cmp_round_sd_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
-        assert_eq!(m, 0);
-        let m = _mm_mask_cmp_round_sd_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
-        assert_eq!(m, 1);
+    unsafe fn test_mm512_maskz_slli_epi32() {
+        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<31);
+        let r = _mm512_maskz_slli_epi32(0, a, 1);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_slli_epi32(0b00000000_11111111, a, 1);
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmplt_epu32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
-        let b = _mm512_set1_epi32(-1);
-        let m = _mm512_cmplt_epu32_mask(a, b);
-        assert_eq!(m, 0b11001111_11001111);
+    unsafe fn test_mm512_srli_epi32() {
+        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let r = _mm512_srli_epi32(a, 1);
+        let e = _mm512_set_epi32(0<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmplt_epu32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
-        let b = _mm512_set1_epi32(-1);
-        let mask = 0b01111010_01111010;
-        let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
-        assert_eq!(r, 0b01001010_01001010);
+    unsafe fn test_mm512_mask_srli_epi32() {
+        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let r = _mm512_mask_srli_epi32(a, 0, a, 1);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_srli_epi32(a, 0b11111111_11111111, a, 1);
+        let e = _mm512_set_epi32(0<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpgt_epu32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
-        let b = _mm512_set1_epi32(-1);
-        let m = _mm512_cmpgt_epu32_mask(b, a);
-        assert_eq!(m, 0b11001111_11001111);
+    unsafe fn test_mm512_maskz_srli_epi32() {
+        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
+        let r = _mm512_maskz_srli_epi32(0, a, 1);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_srli_epi32(0b00000000_11111111, a, 1);
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0<<31);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
-        let b = _mm512_set1_epi32(-1);
-        let mask = 0b01111010_01111010;
-        let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
-        assert_eq!(r, 0b01001010_01001010);
+    unsafe fn test_mm512_rolv_epi32() {
+        let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+        let r = _mm512_rolv_epi32(a, b);
+
+        let e = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmple_epu32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
-        let b = _mm512_set1_epi32(-1);
-        assert_eq!(
-            _mm512_cmple_epu32_mask(a, b),
-            !_mm512_cmpgt_epu32_mask(a, b)
-        )
+    unsafe fn test_mm512_mask_rolv_epi32() {
+        let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1,1,1,1);
+
+        let r = _mm512_mask_rolv_epi32(a, 0, a, b);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b);
+
+        let e = _mm512_set_epi32(1<<0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmple_epu32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
-        let b = _mm512_set1_epi32(-1);
-        let mask = 0b01111010_01111010;
-        assert_eq!(
-            _mm512_mask_cmple_epu32_mask(mask, a, b),
-            0b01111010_01111010
-        );
+    unsafe fn test_mm512_maskz_rolv_epi32() {
+        let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<31);
+        let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+        let r = _mm512_maskz_rolv_epi32(0, a, b);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b);
+
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1<<0);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpge_epu32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
-        let b = _mm512_set1_epi32(-1);
-        assert_eq!(
-            _mm512_cmpge_epu32_mask(a, b),
-            !_mm512_cmplt_epu32_mask(a, b)
-        )
+    unsafe fn test_mm512_rorv_epi32() {
+        let a = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+        let r = _mm512_rorv_epi32(a, b);
+
+        let e = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpge_epu32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
-        let b = _mm512_set1_epi32(-1);
-        let mask = 0b01111010_01111010;
-        assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
+    unsafe fn test_mm512_mask_rorv_epi32() {
+        let a = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+        let r = _mm512_mask_rorv_epi32(a, 0, a, b);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b);
+
+        let e = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpeq_epu32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
-        #[rustfmt::skip]
-        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
-        let m = _mm512_cmpeq_epu32_mask(b, a);
-        assert_eq!(m, 0b11001111_11001111);
+    unsafe fn test_mm512_maskz_rorv_epi32() {
+        let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1<<0);
+        let b = _mm512_set_epi32(2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+        let r = _mm512_maskz_rorv_epi32(0, a, b);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b);
+
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1<<31);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
-        #[rustfmt::skip]
-        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
-        let mask = 0b01111010_01111010;
-        let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
-        assert_eq!(r, 0b01001010_01001010);
+    unsafe fn test_mm512_sllv_epi32() {
+        let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+        let r = _mm512_sllv_epi32(a, count);
+
+        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpneq_epu32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
-        #[rustfmt::skip]
-        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
-        let m = _mm512_cmpneq_epu32_mask(b, a);
-        assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
+    unsafe fn test_mm512_mask_sllv_epi32() {
+        let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+        let r = _mm512_mask_sllv_epi32(a, 0, a, count);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count);
+
+        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
-                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
-        #[rustfmt::skip]
-        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
-        let mask = 0b01111010_01111010;
-        let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
-        assert_eq!(r, 0b00110010_00110010);
+    unsafe fn test_mm512_maskz_sllv_epi32() {
+        let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<31);
+        let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+        let r = _mm512_maskz_sllv_epi32(0, a, count);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count);
+
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmp_epu32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
-        let b = _mm512_set1_epi32(-1);
-        let m = _mm512_cmp_epu32_mask(a, b, _MM_CMPINT_LT);
-        assert_eq!(m, 0b11001111_11001111);
+    unsafe fn test_mm512_srlv_epi32() {
+        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+        let r = _mm512_srlv_epi32(a, count);
+
+        let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmp_epu32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
-        let b = _mm512_set1_epi32(-1);
-        let mask = 0b01111010_01111010;
-        let r = _mm512_mask_cmp_epu32_mask(mask, a, b, _MM_CMPINT_LT);
-        assert_eq!(r, 0b01001010_01001010);
+    unsafe fn test_mm512_mask_srlv_epi32() {
+        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+        let r = _mm512_mask_srlv_epi32(a, 0, a, count);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count);
+
+        let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmplt_epi32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
-        let b = _mm512_set1_epi32(-1);
-        let m = _mm512_cmplt_epi32_mask(a, b);
-        assert_eq!(m, 0b00000101_00000101);
+    unsafe fn test_mm512_maskz_srlv_epi32() {
+        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
+        let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+        let r = _mm512_maskz_srlv_epi32(0, a, count);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count);
+
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0);
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_sll_epi32() {
+        let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let count = _mm_set_epi32(0, 0, 0, 2);
+        let r = _mm512_sll_epi32(a, count);
+        let e = _mm512_set_epi32(0, 1<<2, 1<<3, 1<<4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmplt_epi32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
-        let b = _mm512_set1_epi32(-1);
-        let mask = 0b01100110_01100110;
-        let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
-        assert_eq!(r, 0b00000100_00000100);
+    unsafe fn test_mm512_mask_sll_epi32() {
+        let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let count = _mm_set_epi32(0, 0, 0, 2);
+        let r = _mm512_mask_sll_epi32(a, 0, a, count);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count);
+        let e = _mm512_set_epi32(0, 1<<2, 1<<3, 1<<4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpgt_epi32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
-        let b = _mm512_set1_epi32(-1);
-        let m = _mm512_cmpgt_epi32_mask(b, a);
-        assert_eq!(m, 0b00000101_00000101);
+    unsafe fn test_mm512_maskz_sll_epi32() {
+        let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<31);
+        let count = _mm_set_epi32(2, 0, 0, 2);
+        let r = _mm512_maskz_sll_epi32(0, a, count);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count);
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
-        let b = _mm512_set1_epi32(-1);
-        let mask = 0b01100110_01100110;
-        let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
-        assert_eq!(r, 0b00000100_00000100);
+    unsafe fn test_mm512_srl_epi32() {
+        let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let count = _mm_set_epi32(0, 0, 0, 2);
+        let r = _mm512_srl_epi32(a, count);
+        let e = _mm512_set_epi32(1<<29, 0, 0, 1<<0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmple_epi32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
-        let b = _mm512_set1_epi32(-1);
-        assert_eq!(
-            _mm512_cmple_epi32_mask(a, b),
-            !_mm512_cmpgt_epi32_mask(a, b)
-        )
+    unsafe fn test_mm512_mask_srl_epi32() {
+        let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let count = _mm_set_epi32(0, 0, 0, 2);
+        let r = _mm512_mask_srl_epi32(a, 0, a, count);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count);
+        let e = _mm512_set_epi32(1<<29, 0, 0, 1<<0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmple_epi32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
-        let b = _mm512_set1_epi32(-1);
-        let mask = 0b01111010_01111010;
-        assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
+    unsafe fn test_mm512_maskz_srl_epi32() {
+        let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<31);
+        let count = _mm_set_epi32(2, 0, 0, 2);
+        let r = _mm512_maskz_srl_epi32(0, a, count);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count);
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<29);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpge_epi32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
-        let b = _mm512_set1_epi32(-1);
-        assert_eq!(
-            _mm512_cmpge_epi32_mask(a, b),
-            !_mm512_cmplt_epi32_mask(a, b)
-        )
+    unsafe fn test_mm512_sra_epi32() {
+        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
+        let count = _mm_set_epi32(1, 0, 0, 2);
+        let r = _mm512_sra_epi32(a, count);
+        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpge_epi32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
-        let b = _mm512_set1_epi32(-1);
-        let mask = 0b01111010_01111010;
-        assert_eq!(
-            _mm512_mask_cmpge_epi32_mask(mask, a, b),
-            0b01111010_01111010
-        );
+    unsafe fn test_mm512_mask_sra_epi32() {
+        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
+        let count = _mm_set_epi32(0, 0, 0, 2);
+        let r = _mm512_mask_sra_epi32(a, 0, a, count);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count);
+        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpeq_epi32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
-        #[rustfmt::skip]
-        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
-        let m = _mm512_cmpeq_epi32_mask(b, a);
-        assert_eq!(m, 0b11001111_11001111);
+    unsafe fn test_mm512_maskz_sra_epi32() {
+        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
+        let count = _mm_set_epi32(2, 0, 0, 2);
+        let r = _mm512_maskz_sra_epi32(0, a, count);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count);
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4 );
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
-        #[rustfmt::skip]
-        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
-        let mask = 0b01111010_01111010;
-        let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
-        assert_eq!(r, 0b01001010_01001010);
+    unsafe fn test_mm512_srav_epi32() {
+        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
+        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r = _mm512_srav_epi32(a, count);
+        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpneq_epi32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
-        #[rustfmt::skip]
-        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
-        let m = _mm512_cmpneq_epi32_mask(b, a);
-        assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
+    unsafe fn test_mm512_mask_srav_epi32() {
+        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
+        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
+        let r = _mm512_mask_srav_epi32(a, 0, a, count);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count);
+        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
-                                 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
-        #[rustfmt::skip]
-        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
-        let mask = 0b01111010_01111010;
-        let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
-        assert_eq!(r, 0b00110010_00110010)
+    unsafe fn test_mm512_maskz_srav_epi32() {
+        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
+        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2);
+        let r = _mm512_maskz_srav_epi32(0, a, count);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count);
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4 );
+        assert_eq_m512i(r, e);
+    }
+    
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_srai_epi32() {
+        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
+        let r = _mm512_srai_epi32(a, 2);
+        let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmp_epi32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
-        let b = _mm512_set1_epi32(-1);
-        let m = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LT);
-        assert_eq!(m, 0b00000101_00000101);
+    unsafe fn test_mm512_mask_srai_epi32() {
+        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
+        let r = _mm512_mask_srai_epi32(a, 0, a, 2);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_srai_epi32(a, 0b11111111_11111111, a, 2);
+        let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmp_epi32_mask() {
-        #[rustfmt::skip]
-        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
-                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
-        let b = _mm512_set1_epi32(-1);
-        let mask = 0b01100110_01100110;
-        let r = _mm512_mask_cmp_epi32_mask(mask, a, b, _MM_CMPINT_LT);
-        assert_eq!(r, 0b00000100_00000100);
+    unsafe fn test_mm512_maskz_srai_epi32() {
+        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
+        let r = _mm512_maskz_srai_epi32(0, a, 2);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_srai_epi32(0b00000000_11111111, a, 2);
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
+        assert_eq_m512i(r, e);
+    }
+    
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_and_epi32() {
+        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
+        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let r = _mm512_and_epi32(a, b);
+        let e = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_set_epi32() {
-        let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        assert_eq_m512i(
-            r,
-            _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
-        )
+    unsafe fn test_mm512_mask_and_epi32() {
+        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
+        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let r = _mm512_mask_and_epi32(a, 0, a, b);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b);
+        let e = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_setr_epi32() {
-        let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        assert_eq_m512i(
-            r,
-            _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
-        )
+    unsafe fn test_mm512_maskz_and_epi32() {
+        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
+        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let r = _mm512_maskz_and_epi32(0, a, b);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b);
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3);
+        assert_eq_m512i(r, e);
+    }
+    
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_and_si512() {
+        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
+        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let r = _mm512_and_epi32(a, b);
+        let e = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_set1_epi32() {
-        let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        assert_eq_m512i(r, _mm512_set1_epi32(2));
+    unsafe fn test_mm512_or_epi32() {
+        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
+        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let r = _mm512_or_epi32(a, b);
+        let e = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3|1<<4);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_setzero_si512() {
-        assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
+    unsafe fn test_mm512_mask_or_epi32() {
+        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
+        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let r = _mm512_mask_or_epi32(a, 0, a, b);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b);
+        let e = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3|1<<4);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_set_ps() {
-        let r = _mm512_setr_ps(
-            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
-        );
-        assert_eq_m512(
-            r,
-            _mm512_set_ps(
-                15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
-            ),
-        )
+    unsafe fn test_mm512_maskz_or_epi32() {
+        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
+        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let r = _mm512_maskz_or_epi32(0, a, b);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b);
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3|1<<4);
+        assert_eq_m512i(r, e);
+    }
+    
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_or_si512() {
+        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
+        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let r = _mm512_or_epi32(a, b);
+        let e = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3|1<<4);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_setr_ps() {
-        let r = _mm512_set_ps(
-            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
-        );
-        assert_eq_m512(
-            r,
-            _mm512_setr_ps(
-                15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
-            ),
-        )
+    unsafe fn test_mm512_xor_epi32() {
+        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
+        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let r = _mm512_xor_epi32(a, b);
+        let e = _mm512_set_epi32(1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<4);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_set1_ps() {
-        #[rustfmt::skip]
-        let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
-                                     2., 2., 2., 2., 2., 2., 2., 2.);
-        assert_eq_m512(expected, _mm512_set1_ps(2.));
+    unsafe fn test_mm512_mask_xor_epi32() {
+        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
+        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let r = _mm512_mask_xor_epi32(a, 0, a, b);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b);
+        let e = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<4);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_setzero_ps() {
-        assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
+    unsafe fn test_mm512_maskz_xor_epi32() {
+        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
+        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let r = _mm512_maskz_xor_epi32(0, a, b);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b);
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<4);
+        assert_eq_m512i(r, e);
+    }
+    
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_xor_si512() {
+        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
+        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let r = _mm512_xor_epi32(a, b);
+        let e = _mm512_set_epi32(1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<4);
+        assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_loadu_pd() {
-        let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
-        let p = a.as_ptr();
-        let r = _mm512_loadu_pd(black_box(p));
-        let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
-        assert_eq_m512d(r, e);
+    unsafe fn test_mm512_kand() {
+        let a: u16 = 0b11001100_00110011;
+        let b: u16 = 0b11001100_00110011;
+        let r = _mm512_kand(a, b);
+        let e: u16 = 0b11001100_00110011;
+        assert_eq!(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_storeu_pd() {
-        let a = _mm512_set1_pd(9.);
-        let mut r = _mm512_undefined_pd();
-        _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
-        assert_eq_m512d(r, a);
+    unsafe fn test_kand_mask16() {
+        let a: u16 = 0b11001100_00110011;
+        let b: u16 = 0b11001100_00110011;
+        let r = _kand_mask16(a, b);
+        let e: u16 = 0b11001100_00110011;
+        assert_eq!(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_loadu_ps() {
-        let a = &[
-            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
-        ];
-        let p = a.as_ptr();
-        let r = _mm512_loadu_ps(black_box(p));
-        let e = _mm512_setr_ps(
-            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
-        );
-        assert_eq_m512(r, e);
+    unsafe fn test_mm512_kor() {
+        let a: u16 = 0b11001100_00110011;
+        let b: u16 = 0b00101110_00001011;
+        let r = _mm512_kor(a, b);
+        let e: u16 = 0b11101110_00111011;
+        assert_eq!(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_storeu_ps() {
-        let a = _mm512_set1_ps(9.);
-        let mut r = _mm512_undefined_ps();
-        _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
-        assert_eq_m512(r, a);
+    unsafe fn test_kor_mask16() {
+        let a: u16 = 0b11001100_00110011;
+        let b: u16 = 0b00101110_00001011;
+        let r = _kor_mask16(a, b);
+        let e: u16 = 0b11101110_00111011;
+        assert_eq!(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_setr_pd() {
-        let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
-        assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
+    unsafe fn test_mm512_kxor() {
+        let a: u16 = 0b11001100_00110011;
+        let b: u16 = 0b00101110_00001011;
+        let r = _mm512_kxor(a, b);
+        let e: u16 = 0b11100010_00111000;
+        assert_eq!(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_set_pd() {
-        let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
-        assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
+    unsafe fn test_kxor_mask16() {
+        let a: u16 = 0b11001100_00110011;
+        let b: u16 = 0b00101110_00001011;
+        let r = _kxor_mask16(a, b);
+        let e: u16 = 0b11100010_00111000;
+        assert_eq!(r, e);
     }
+
 }
diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index ce2970ee51..da728413e7 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -41,6 +41,8 @@ pub unsafe fn _mm512_setr_epi64(
     transmute(r)
 }
 
+
+
 #[cfg(test)]
 mod tests {
     use std;
@@ -854,4 +856,554 @@ mod tests {
         }
         assert_eq!(&arr[..], &expected[..],);
     }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_rol_epi64() {
+        let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let r = _mm512_rol_epi64(a, 1);
+        let e = _mm512_set_epi64( 1<<0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_rol_epi64() {
+        let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let r = _mm512_mask_rol_epi64(a, 0, a, 1);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_rol_epi64(a, 0b11111111, a, 1);
+        let e = _mm512_set_epi64( 1<<0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_maskz_rol_epi64() {
+        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<63 );
+        let r = _mm512_maskz_rol_epi64(0, a, 1);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_rol_epi64(0b00001111, a, 1);
+        let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<33, 1<<33, 1<<33, 1<<0 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_ror_epi64() {
+        let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let r = _mm512_ror_epi64(a, 1);
+        let e = _mm512_set_epi64( 1<<63, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_ror_epi64() {
+        let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let r = _mm512_mask_ror_epi64(a, 0, a, 1);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_ror_epi64(a, 0b11111111, a, 1);
+        let e = _mm512_set_epi64( 1<<63, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_maskz_ror_epi64() {
+        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<0 );
+        let r = _mm512_maskz_ror_epi64(0, a, 1);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_ror_epi64(0b00001111, a, 1);
+        let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<31, 1<<31, 1<<31, 1<<63 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_slli_epi64() {
+        let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let r = _mm512_slli_epi64(a, 1);
+        let e = _mm512_set_epi64( 0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_slli_epi64() {
+        let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let r = _mm512_mask_slli_epi64(a, 0, a, 1);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_slli_epi64(a, 0b11111111, a, 1);
+        let e = _mm512_set_epi64( 0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_maskz_slli_epi64() {
+        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<63 );
+        let r = _mm512_maskz_slli_epi64(0, a, 1);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_slli_epi64(0b00001111, a, 1);
+        let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<33, 1<<33, 1<<33, 0 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_srli_epi64() {
+        let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let r = _mm512_srli_epi64(a, 1);
+        let e = _mm512_set_epi64( 0, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_srli_epi64() {
+        let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let r = _mm512_mask_srli_epi64(a, 0, a, 1);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_srli_epi64(a, 0b11111111, a, 1);
+        let e = _mm512_set_epi64( 0, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_maskz_srli_epi64() {
+        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<0 );
+        let r = _mm512_maskz_srli_epi64(0, a, 1);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_srli_epi64(0b00001111, a, 1);
+        let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<31, 1<<31, 1<<31, 0 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_rolv_epi64() {
+        let a = _mm512_set_epi64( 1<<32, 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 );
+        let r = _mm512_rolv_epi64(a, b);
+        let e = _mm512_set_epi64( 1<<32, 1<<0, 1<<34, 1<<35, 1<<36, 1<<37, 1<<38, 1<<39 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_rolv_epi64() {
+        let a = _mm512_set_epi64( 1<<32, 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 );
+        let r = _mm512_mask_rolv_epi64(a, 0, a, b);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_rolv_epi64(a, 0b11111111, a, b);
+        let e = _mm512_set_epi64( 1<<32, 1<<0, 1<<34, 1<<35, 1<<36, 1<<37, 1<<38, 1<<39 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_maskz_rolv_epi64() {
+        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<62 );
+        let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 2 );
+        let r = _mm512_maskz_rolv_epi64(0, a, b);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_rolv_epi64(0b00001111, a, b);
+        let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<36, 1<<37, 1<<38, 1<<0 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_rorv_epi64() {
+        let a = _mm512_set_epi64( 1<<32, 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 );
+        let r = _mm512_rorv_epi64(a, b);
+        let e = _mm512_set_epi64( 1<<32, 1<<63, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_rorv_epi64() {
+        let a = _mm512_set_epi64( 1<<32, 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 );
+        let r = _mm512_mask_rorv_epi64(a, 0, a, b);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_rorv_epi64(a, 0b11111111, a, b);
+        let e = _mm512_set_epi64( 1<<32, 1<<63, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_maskz_rorv_epi64() {
+        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<0 );
+        let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 2 );
+        let r = _mm512_maskz_rorv_epi64(0, a, b);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_rorv_epi64(0b00001111, a, b);
+        let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<28, 1<<27, 1<<26, 1<<62 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_sllv_epi64() {
+        let a = _mm512_set_epi64( 1<<32, 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let count = _mm512_set_epi64( 0, 2, 2, 3, 4, 5, 6, 7 );
+        let r = _mm512_sllv_epi64(a, count);
+        let e = _mm512_set_epi64( 1<<32, 0, 1<<34, 1<<35, 1<<36, 1<<37, 1<<38, 1<<39 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_sllv_epi64() {
+        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let count = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 );
+        let r = _mm512_mask_sllv_epi64(a, 0, a, count);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_sllv_epi64(a, 0b11111111, a, count);
+        let e = _mm512_set_epi64( 1<<32, 1<<33, 0, 1<<35, 1<<36, 1<<37, 1<<38, 1<<39 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_maskz_sllv_epi64() {
+        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<63 );
+        let count = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 1 );
+        let r = _mm512_maskz_sllv_epi64(0, a, count);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_sllv_epi64(0b00001111, a, count);
+        let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<36, 1<<37, 1<<38, 0 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_srlv_epi64() {
+        let a = _mm512_set_epi64( 1<<32, 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let count = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 );
+        let r = _mm512_srlv_epi64(a, count);
+        let e = _mm512_set_epi64( 1<<32, 0, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_srlv_epi64() {
+        let a = _mm512_set_epi64( 1<<32, 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let count = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 );
+        let r = _mm512_mask_srlv_epi64(a, 0, a, count);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_srlv_epi64(a, 0b11111111, a, count);
+        let e = _mm512_set_epi64( 1<<32, 0, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_maskz_srlv_epi64() {
+        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<0 );
+        let count = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 );
+        let r = _mm512_maskz_srlv_epi64(0, a, count);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_srlv_epi64(0b00001111, a, count);
+        let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<28, 1<<27, 1<<26, 0 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_sll_epi64() {
+        let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let count = _mm_set_epi64x(0, 1);
+        let r = _mm512_sll_epi64(a, count);
+        let e = _mm512_set_epi64( 0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 );
+        assert_eq_m512i(r, e);
+
+        let count = _mm_set_epi64x(1, 0);
+        let r = _mm512_sll_epi64(a, count);
+        assert_eq_m512i(r, a);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_sll_epi64() {
+        let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let count = _mm_set_epi64x(0, 1);
+        let r = _mm512_mask_sll_epi64(a, 0, a, count);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_sll_epi64(a, 0b11111111, a, count);
+        let e = _mm512_set_epi64( 0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_maskz_sll_epi64() {
+        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<63 );
+        let count = _mm_set_epi64x(0, 1);
+        let r = _mm512_maskz_sll_epi64(0, a, count);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_sll_epi64(0b00001111, a, count);
+        let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<33, 1<<33, 1<<33, 0 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_srl_epi64() {
+        let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let count = _mm_set_epi64x(0, 1);
+        let r = _mm512_srl_epi64(a, count);
+        let e = _mm512_set_epi64( 0, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_srl_epi64() {
+        let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let count = _mm_set_epi64x(0, 1);
+        let r = _mm512_mask_srl_epi64(a, 0, a, count);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_srl_epi64(a, 0b11111111, a, count);
+        let e = _mm512_set_epi64( 0, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_maskz_srl_epi64() {
+        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<0 );
+        let count = _mm_set_epi64x(0, 1);
+        let r = _mm512_maskz_srl_epi64(0, a, count);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_srl_epi64(0b00001111, a, count);
+        let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<31, 1<<31, 1<<31, 0 );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_sra_epi64() {
+        let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
+        let count = _mm_set_epi64x(0, 2);
+        let r = _mm512_sra_epi64(a, count);
+        let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -4);
+        assert_eq_m512i(r, e);
+    }
+    
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_sra_epi64() {
+        let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
+        let count = _mm_set_epi64x(0, 2);
+        let r = _mm512_mask_sra_epi64(a, 0, a, count);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_sra_epi64(a, 0b11111111, a, count);
+        let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -4);
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_maskz_sra_epi64() {
+        let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
+        let count = _mm_set_epi64x(0, 2);
+        let r = _mm512_maskz_sra_epi64(0, a, count);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_sra_epi64(0b00001111, a, count);
+        let e = _mm512_set_epi64( 0, 0, 0, 0, 0, 0, 3, -4);
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_srav_epi64() {
+        let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
+        let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1);
+        let r = _mm512_srav_epi64(a, count);
+        let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -8);
+        assert_eq_m512i(r, e);
+    }
+    
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_srav_epi64() {
+        let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
+        let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1);
+        let r = _mm512_mask_srav_epi64(a, 0, a, count);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_srav_epi64(a, 0b11111111, a, count);
+        let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -8);
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_maskz_srav_epi64() {
+        let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
+        let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1);
+        let r = _mm512_maskz_srav_epi64(0, a, count);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_srav_epi64(0b00001111, a, count);
+        let e = _mm512_set_epi64( 0, 0, 0, 0, 0, 0, 3, -8);
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_srai_epi64() {
+        let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16);
+        let r = _mm512_srai_epi64(a, 2);
+        let e = _mm512_set_epi64(0, -1, 3, 0, 0, 0, 0, -4);
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_srai_epi64() {
+        let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16);
+        let r = _mm512_mask_srai_epi64(a, 0, a, 2);
+        assert_eq_m512i(r, a);
+
+        let r = _mm512_mask_srai_epi64(a, 0b11111111, a, 2);
+        let e = _mm512_set_epi64( 0, -1, 3, 0, 0, 0, 0, -4);
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_maskz_srai_epi64() {
+        let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16);
+        let r = _mm512_maskz_srai_epi64(0, a, 2);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+
+        let r = _mm512_maskz_srai_epi64(0b00001111, a, 2);
+        let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, -4);
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_and_epi64() {
+        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let r = _mm512_and_epi64(a, b);
+        let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_and_epi64() {
+        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let r = _mm512_mask_and_epi64(a, 0, a, b);
+        let e = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        assert_eq_m512i(r, e);
+
+        let r = _mm512_mask_and_epi64(a, 0b01111111, a, b);
+        let e = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_maskz_and_epi64() {
+        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let r = _mm512_maskz_and_epi64(0, a, b);
+        let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0);
+        assert_eq_m512i(r, e);
+
+        let r = _mm512_maskz_and_epi64(0b00001111, a, b);
+        let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_and_si512() {
+        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let r = _mm512_and_epi64(a, b);
+        let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_or_epi64() {
+        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let r = _mm512_or_epi64(a, b);
+        let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_or_epi64() {
+        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let r = _mm512_mask_or_epi64(a, 0, a, b);
+        let e = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        assert_eq_m512i(r, e);
+
+        let r = _mm512_mask_or_epi64(a, 0b11111111, a, b);
+        let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_maskz_or_epi64() {
+        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let r = _mm512_maskz_or_epi64(0, a, b);
+        let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0);
+        assert_eq_m512i(r, e);
+
+        let r = _mm512_maskz_or_epi64(0b00001111, a, b);
+        let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_or_si512() {
+        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let r = _mm512_or_epi64(a, b);
+        let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_xor_epi64() {
+        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let r = _mm512_xor_epi64(a, b);
+        let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 0);
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_xor_epi64() {
+        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let r = _mm512_mask_xor_epi64(a, 0, a, b);
+        let e = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        assert_eq_m512i(r, e);
+
+        let r = _mm512_mask_xor_epi64(a, 0b11111111, a, b);
+        let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 0);
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_maskz_xor_epi64() {
+        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let r = _mm512_maskz_xor_epi64(0, a, b);
+        let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0);
+        assert_eq_m512i(r, e);
+
+        let r = _mm512_maskz_xor_epi64(0b00001111, a, b);
+        let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0);
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_xor_si512() {
+        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let r = _mm512_xor_epi64(a, b);
+        let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 0);
+        assert_eq_m512i(r, e);
+    }
 }

From a77dfebdecbb7e3194bddfe9fed1531cbb516e03 Mon Sep 17 00:00:00 2001
From: jironglin <jironglin@gmail.com>
Date: Wed, 19 Aug 2020 23:45:29 +0000
Subject: [PATCH 2/9] add rustc_args_required_const

---
 crates/core_arch/src/x86/avx512f.rs    | 145 ++++++++++++++++++++++++-
 crates/core_arch/src/x86_64/avx512f.rs |  66 +++++++++++
 2 files changed, 206 insertions(+), 5 deletions(-)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index dfcc7e7bc1..72e4fd4017 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -49,6 +49,44 @@ pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
     transmute(simd_select_bitmask(k, abs, zero))
 }
 
+/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_abs_epi64&expand=48)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpabsq))]
+pub unsafe fn _mm512_abs_epi64(a: __m512i) -> __m512i {
+    let a = a.as_i64x8();
+    // all-0 is a properly initialized i64x8
+    let zero: i64x8 = mem::zeroed();
+    let sub = simd_sub(zero, a);
+    let cmp: i64x8 = simd_gt(a, zero);
+    transmute(simd_select(cmp, a, sub))
+}
+
+/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_abs_epi64&expand=49)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpabsq))]
+pub unsafe fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
+    let abs = _mm512_abs_epi64(a).as_i64x8();
+    transmute(simd_select_bitmask(k, abs, src.as_i64x8()))
+}
+
+/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_abs_epi64&expand=50)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpabsq))]
+pub unsafe fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
+    let abs = _mm512_abs_epi64(a).as_i64x8();
+    let zero = _mm512_setzero_si512().as_i64x8();
+    transmute(simd_select_bitmask(k, abs, zero))
+}
+
 /// Returns vector of type `__m512d` with all elements set to zero.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_pd)
@@ -893,6 +931,7 @@ pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: u8) -> __m5
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprord))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: u8) -> __m512i {
     transmute(vprord(a.as_i32x16(), imm8 as i8))
 }
@@ -903,6 +942,7 @@ pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: u8) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprord))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u8) -> __m512i {
     let ror = _mm512_ror_epi32(a, imm8).as_i32x16();
     transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
@@ -914,6 +954,7 @@ pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprord))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: u8) -> __m512i {
     let ror = _mm512_ror_epi32(a, imm8).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -926,6 +967,7 @@ pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: u8) -> __m5
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprolq))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: u8) -> __m512i {
     transmute(vprolq(a.as_i64x8(), imm8 as i8))
 }
@@ -936,6 +978,7 @@ pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: u8) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprolq))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u8) -> __m512i {
     let rol = _mm512_rol_epi64(a, imm8).as_i64x8();
     transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
@@ -947,6 +990,7 @@ pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8:
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprolq))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m512i {
     let rol = _mm512_rol_epi64(a, imm8).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -959,6 +1003,7 @@ pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m51
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprorq))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: u8) -> __m512i {
     transmute(vprorq(a.as_i64x8(), imm8 as i8))
 }
@@ -969,6 +1014,7 @@ pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: u8) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprorq))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u8) -> __m512i {
     let ror = _mm512_ror_epi64(a, imm8).as_i64x8();
     transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
@@ -980,6 +1026,7 @@ pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8:
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprorq))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m512i {
     let ror = _mm512_ror_epi64(a, imm8).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -992,6 +1039,7 @@ pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m51
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllid))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i {
     transmute(vpsllid(a.as_i32x16(), imm8))
 }
@@ -1002,6 +1050,7 @@ pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllid))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     let shf = _mm512_slli_epi32(a, imm8).as_i32x16();
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
@@ -1013,6 +1062,7 @@ pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllid))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     let shf = _mm512_slli_epi32(a, imm8).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1025,6 +1075,7 @@ pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlid))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i {
     transmute(vpsrlid(a.as_i32x16(), imm8))
 }
@@ -1035,6 +1086,7 @@ pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlid))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     let shf = _mm512_srli_epi32(a, imm8).as_i32x16();
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
@@ -1046,6 +1098,7 @@ pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlid))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     let shf = _mm512_srli_epi32(a, imm8).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1058,6 +1111,7 @@ pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpslliq))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i {
     transmute(vpslliq(a.as_i64x8(), imm8))
 }
@@ -1068,6 +1122,7 @@ pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpslliq))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     let shf = _mm512_slli_epi64(a, imm8).as_i64x8();
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
@@ -1079,6 +1134,7 @@ pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpslliq))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     let shf = _mm512_slli_epi64(a, imm8).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1091,6 +1147,7 @@ pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrliq))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i {
     transmute(vpsrliq(a.as_i64x8(), imm8))
 }
@@ -1101,6 +1158,7 @@ pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrliq))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     let shf = _mm512_srli_epi64(a, imm8).as_i64x8();
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
@@ -1112,6 +1170,7 @@ pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrliq))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     let shf = _mm512_srli_epi64(a, imm8).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1124,6 +1183,7 @@ pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpslld))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
     transmute(vpslld(a.as_i32x16(), count.as_i32x4()))
 }
@@ -1134,6 +1194,7 @@ pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpslld))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
     let shf = _mm512_sll_epi32(a, count).as_i32x16();
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
@@ -1145,6 +1206,7 @@ pub unsafe fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, coun
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpslld))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
     let shf = _mm512_sll_epi32(a, count).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1157,6 +1219,7 @@ pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrld))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
     transmute(vpsrld(a.as_i32x16(), count.as_i32x4()))
 }
@@ -1167,6 +1230,7 @@ pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrld))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
     let shf = _mm512_srl_epi32(a, count).as_i32x16();
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
@@ -1178,6 +1242,7 @@ pub unsafe fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, coun
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrld))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
     let shf = _mm512_srl_epi32(a, count).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1190,6 +1255,7 @@ pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllq))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
     transmute(vpsllq(a.as_i64x8(), count.as_i64x2()))
 }
@@ -1200,6 +1266,7 @@ pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllq))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
     let shf = _mm512_sll_epi64(a, count).as_i64x8();
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
@@ -1211,6 +1278,7 @@ pub unsafe fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllq))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
     let shf = _mm512_sll_epi64(a, count).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1223,6 +1291,7 @@ pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) ->
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlq))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
     transmute(vpsrlq(a.as_i64x8(), count.as_i64x2()))
 }
@@ -1233,6 +1302,7 @@ pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlq))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
     let shf = _mm512_srl_epi64(a, count).as_i64x8();
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
@@ -1244,6 +1314,7 @@ pub unsafe fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlq))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
     let shf = _mm512_srl_epi64(a, count).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1256,6 +1327,7 @@ pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) ->
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrad))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
     transmute(vpsrad(a.as_i32x16(), count.as_i32x4()))
 }
@@ -1266,6 +1338,7 @@ pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrad))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
     let shf = _mm512_sra_epi32(a, count).as_i32x16();
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
@@ -1277,6 +1350,7 @@ pub unsafe fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, coun
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrad))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
     let shf = _mm512_sra_epi32(a, count).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1289,6 +1363,7 @@ pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsraq))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
     transmute(vpsraq(a.as_i64x8(), count.as_i64x2()))
 }
@@ -1299,6 +1374,7 @@ pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsraq))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
     let shf = _mm512_sra_epi64(a, count).as_i64x8();
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
@@ -1310,6 +1386,7 @@ pub unsafe fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsraq))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
     let shf = _mm512_sra_epi64(a, count).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1322,6 +1399,7 @@ pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) ->
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsraid))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i {
     transmute(vpsraid(a.as_i32x16(), imm8))
 }
@@ -1332,6 +1410,7 @@ pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsraid))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     let shf = _mm512_srai_epi32(a, imm8).as_i32x16();
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
@@ -1343,6 +1422,7 @@ pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsraid))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     let shf = _mm512_srai_epi32(a, imm8).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1355,6 +1435,7 @@ pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsraiq))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i {
     transmute(vpsraiq(a.as_i64x8(), imm8))
 }
@@ -1365,6 +1446,7 @@ pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsraiq))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     let shf = _mm512_srai_epi64(a, imm8).as_i64x8();
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
@@ -1376,6 +1458,7 @@ pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsraiq))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     let shf = _mm512_srai_epi64(a, imm8).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1388,6 +1471,7 @@ pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsravd))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
     transmute(vpsravd(a.as_i32x16(), count.as_i32x16()))
 }
@@ -1398,6 +1482,7 @@ pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsravd))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
     let shf = _mm512_srav_epi32(a, count).as_i32x16();
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
@@ -1409,6 +1494,7 @@ pub unsafe fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, cou
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsravd))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
     let shf = _mm512_srav_epi32(a, count).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1421,6 +1507,7 @@ pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsravq))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
     transmute(vpsravq(a.as_i64x8(), count.as_i64x8()))
 }
@@ -1431,6 +1518,7 @@ pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsravq))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
     let shf = _mm512_srav_epi64(a, count).as_i64x8();
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
@@ -1442,6 +1530,7 @@ pub unsafe fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, coun
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsravq))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
     let shf = _mm512_srav_epi64(a, count).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1454,6 +1543,7 @@ pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprolvd))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
     transmute(vprolvd(a.as_i32x16(), b.as_i32x16()))
 }
@@ -1464,6 +1554,7 @@ pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprolvd))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     let rol = _mm512_rolv_epi32(a, b).as_i32x16();
     transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
@@ -1475,6 +1566,7 @@ pub unsafe fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b:
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprolvd))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     let rol = _mm512_rolv_epi32(a, b).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1487,6 +1579,7 @@ pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> _
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprorvd))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
     transmute(vprorvd(a.as_i32x16(), b.as_i32x16()))
 }
@@ -1497,6 +1590,7 @@ pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprorvd))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     let ror = _mm512_rorv_epi32(a, b).as_i32x16();
     transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
@@ -1508,6 +1602,7 @@ pub unsafe fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b:
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprorvd))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     let ror = _mm512_rorv_epi32(a, b).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1520,6 +1615,7 @@ pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> _
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprolvq))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
     transmute(vprolvq(a.as_i64x8(), b.as_i64x8()))
 }
@@ -1530,6 +1626,7 @@ pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprolvq))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     let rol = _mm512_rolv_epi64(a, b).as_i64x8();
     transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
@@ -1541,6 +1638,7 @@ pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: _
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprolvq))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     let rol = _mm512_rolv_epi64(a, b).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1553,6 +1651,7 @@ pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprorvq))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
     transmute(vprorvq(a.as_i64x8(), b.as_i64x8()))
 }
@@ -1563,6 +1662,7 @@ pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprorvq))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     let ror = _mm512_rorv_epi64(a, b).as_i64x8();
     transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
@@ -1574,6 +1674,7 @@ pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: _
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprorvq))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     let ror = _mm512_rorv_epi64(a, b).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1586,6 +1687,7 @@ pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllvd))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
     transmute(vpsllvd(a.as_i32x16(), count.as_i32x16()))
 }
@@ -1596,6 +1698,7 @@ pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllvd))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
     let shf = _mm512_sllv_epi32(a, count).as_i32x16();
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
@@ -1607,6 +1710,7 @@ pub unsafe fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, cou
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllvd))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
     let shf = _mm512_sllv_epi32(a, count).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1619,6 +1723,7 @@ pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlvd))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
     transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16()))
 }
@@ -1629,6 +1734,7 @@ pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlvd))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
     let shf = _mm512_srlv_epi32(a, count).as_i32x16();
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
@@ -1640,6 +1746,7 @@ pub unsafe fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, cou
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlvd))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
     let shf = _mm512_srlv_epi32(a, count).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1652,6 +1759,7 @@ pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllvq))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
     transmute(vpsllvq(a.as_i64x8(), count.as_i64x8()))
 }
@@ -1662,6 +1770,7 @@ pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllvq))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
     let shf = _mm512_sllv_epi64(a, count).as_i64x8();
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
@@ -1673,6 +1782,7 @@ pub unsafe fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, coun
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllvq))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
     let shf = _mm512_sllv_epi64(a, count).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1685,6 +1795,7 @@ pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlvq))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
     transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8()))
 }
@@ -1695,6 +1806,7 @@ pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlvq))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
     let shf = _mm512_srlv_epi64(a, count).as_i64x8();
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
@@ -1706,6 +1818,7 @@ pub unsafe fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, coun
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlvq))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
     let shf = _mm512_srlv_epi64(a, count).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1718,6 +1831,7 @@ pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpandd))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
 }
@@ -1728,6 +1842,7 @@ pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpandd))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     let and = _mm512_and_epi32(a, b).as_i32x16();
     transmute(simd_select_bitmask(k, and, src.as_i32x16()))
@@ -1739,6 +1854,7 @@ pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpandd))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     let and = _mm512_and_epi32(a, b).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1751,6 +1867,7 @@ pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpandq))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_and(a.as_i64x8(), b.as_i64x8()))
 }
@@ -1761,6 +1878,7 @@ pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpandq))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     let and = _mm512_and_epi64(a, b).as_i64x8();
     transmute(simd_select_bitmask(k, and, src.as_i64x8()))
@@ -1772,6 +1890,7 @@ pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpandq))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     let and = _mm512_and_epi64(a, b).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1784,6 +1903,7 @@ pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpandd))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
 }
@@ -1794,6 +1914,7 @@ pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpord))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
 }
@@ -1804,6 +1925,7 @@ pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpord))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     let or = _mm512_or_epi32(a, b).as_i32x16();
     transmute(simd_select_bitmask(k, or, src.as_i32x16()))
@@ -1815,6 +1937,7 @@ pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpord))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     let or = _mm512_or_epi32(a, b).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1827,6 +1950,7 @@ pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vporq))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_or(a.as_i64x8(), b.as_i64x8()))
 }
@@ -1837,6 +1961,7 @@ pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vporq))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     let or = _mm512_or_epi64(a, b).as_i64x8();
     transmute(simd_select_bitmask(k, or, src.as_i64x8()))
@@ -1848,6 +1973,7 @@ pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vporq))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     let or = _mm512_or_epi64(a, b).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1860,6 +1986,7 @@ pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m5
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpord))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
 }
@@ -1870,6 +1997,7 @@ pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpxord))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_xor(a.as_i32x16(), b.as_i32x16()))
 }
@@ -1880,6 +2008,7 @@ pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpxord))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     let xor = _mm512_xor_epi32(a, b).as_i32x16();
     transmute(simd_select_bitmask(k, xor, src.as_i32x16()))
@@ -1891,6 +2020,7 @@ pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpxord))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     let xor = _mm512_xor_epi32(a, b).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1903,6 +2033,7 @@ pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpxorq))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_xor(a.as_i64x8(), b.as_i64x8()))
 }
@@ -1913,6 +2044,7 @@ pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpxorq))]
+#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     let xor = _mm512_xor_epi64(a, b).as_i64x8();
     transmute(simd_select_bitmask(k, xor, src.as_i64x8()))
@@ -1924,6 +2056,7 @@ pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpxorq))]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     let xor = _mm512_xor_epi64(a, b).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1936,6 +2069,7 @@ pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpxord))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_xor(a.as_i32x16(), b.as_i32x16()))
 }
@@ -1946,6 +2080,7 @@ pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(kandw))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
     kandw(a, b)
 }
@@ -1956,6 +2091,7 @@ pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(kandw))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
     kandw(a, b)
 }
@@ -1966,6 +2102,7 @@ pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(korw))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
     korw(a, b)
 }
@@ -1976,6 +2113,7 @@ pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(korw))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
     korw(a, b)
 }
@@ -1986,6 +2124,7 @@ pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(kxorw))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
     kxorw(a, b)
 }
@@ -1996,15 +2135,11 @@ pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(kxorw))]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
     kxorw(a, b)
 }
 
-
-
-
-
-
 /// Sets packed 32-bit integers in `dst` with the supplied values.
 ///
 /// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps)
diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index da728413e7..74077c8023 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -51,6 +51,72 @@ mod tests {
     use crate::core_arch::x86::*;
     use crate::core_arch::x86_64::*;
 
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_abs_epi64() {
+        let a = _mm512_setr_epi64(
+            0, 1, -1, i64::MAX,
+            i64::MIN, 100, -100, -32
+        );
+        let r = _mm512_abs_epi64(a);
+        let e = _mm512_setr_epi64(
+            0,
+            1,
+            1,
+            i64::MAX,
+            i64::MAX.wrapping_add(1),
+            100,
+            100,
+            32
+        );
+        assert_eq_m512i(r, e);
+    } 
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_abs_epi64() {
+        #[rustfmt::skip]
+        let a = _mm512_setr_epi64(
+            0, 1, -1, i64::MAX,
+            i64::MIN, 100, -100, -32
+        );
+        let r = _mm512_mask_abs_epi64(a, 0, a);
+        assert_eq_m512i(r, a);
+        let r = _mm512_mask_abs_epi64(a, 0b11111111, a);
+        let e = _mm512_setr_epi64(
+            0,
+            1,
+            1,
+            i64::MAX,
+            i64::MAX.wrapping_add(1),
+            100,
+            100,
+            32
+        );
+        assert_eq_m512i(r, e);
+    }
+
+        #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_maskz_abs_epi64() {
+        #[rustfmt::skip]
+        let a = _mm512_setr_epi64(
+            0, 1, -1, i64::MAX,
+            i64::MIN, 100, -100, -32
+        );
+        let r = _mm512_maskz_abs_epi64(0, a);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+        let r = _mm512_maskz_abs_epi64(0b01111111, a);
+        let e = _mm512_setr_epi64(
+            0,
+            1,
+            1,
+            i64::MAX,
+            i64::MAX.wrapping_add(1),
+            100,
+            100,
+            0
+        );
+        assert_eq_m512i(r, e);
+    }
+    
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_setzero_pd() {
         assert_eq_m512d(_mm512_setzero_pd(), _mm512_set1_pd(0.));

From 8c74b076120e1acc87a101645cc4a74ad1b3b6fc Mon Sep 17 00:00:00 2001
From: jironglin <jironglin@gmail.com>
Date: Thu, 20 Aug 2020 13:10:34 +0000
Subject: [PATCH 3/9] run cargo fmt to fix the CI errors

---
 crates/core_arch/src/aarch64/neon/mod.rs |   12 +-
 crates/core_arch/src/arm/neon/mod.rs     |   10 +-
 crates/core_arch/src/simd.rs             |  678 ++++++++++++--
 crates/core_arch/src/x86/avx512f.rs      | 1081 ++++++++++++++++++----
 crates/core_arch/src/x86_64/avx512f.rs   |  753 ++++++++++++---
 5 files changed, 2110 insertions(+), 424 deletions(-)

diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs
index 0c73e5935d..adc653e31c 100644
--- a/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/crates/core_arch/src/aarch64/neon/mod.rs
@@ -88,7 +88,7 @@ extern "C" {
     fn vpaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     #[link_name = "llvm.aarch64.neon.addp.v16i8"]
     fn vpaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    
+
     #[link_name = "llvm.aarch64.neon.saddv.i32.v4i16"]
     fn vaddv_s16_(a: int16x4_t) -> i16;
     #[link_name = "llvm.aarch64.neon.saddv.i32.v2i32"]
@@ -1826,9 +1826,13 @@ mod tests {
     #[simd_test(enable = "neon")]
     unsafe fn test_vpaddq_s8() {
         let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b = i8x16::new(0, -1, -2, -3, -4, -5, -6, -7, -8, -8, -10, -11, -12, -13, -14, -15);
+        let b = i8x16::new(
+            0, -1, -2, -3, -4, -5, -6, -7, -8, -8, -10, -11, -12, -13, -14, -15,
+        );
         let r: i8x16 = transmute(vpaddq_s8(transmute(a), transmute(b)));
-        let e = i8x16::new(3, 7, 11, 15, 19, 23, 27, 31, -1, -5, -9, -13, -16, -21, -25, -29);
+        let e = i8x16::new(
+            3, 7, 11, 15, 19, 23, 27, 31, -1, -5, -9, -13, -16, -21, -25, -29,
+        );
         assert_eq!(r, e);
     }
     #[simd_test(enable = "neon")]
@@ -2829,7 +2833,7 @@ mod tests {
         let e = i64x2::new(i64::MIN, i64::MAX);
         assert_eq!(r, e);
     }
-    
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vaddv_s16() {
         let a = i16x4::new(1, 2, 3, -4);
diff --git a/crates/core_arch/src/arm/neon/mod.rs b/crates/core_arch/src/arm/neon/mod.rs
index c006ea70d4..43fa753cc3 100644
--- a/crates/core_arch/src/arm/neon/mod.rs
+++ b/crates/core_arch/src/arm/neon/mod.rs
@@ -175,7 +175,7 @@ extern "C" {
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")]
     #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxp.v2f32")]
     fn vpmaxf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    
+
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")]
     #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v4i16")]
     fn vpadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
@@ -299,7 +299,7 @@ pub unsafe fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
 pub unsafe fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    vpadd_s8_(a,b)
+    vpadd_s8_(a, b)
 }
 /// Add pairwise.
 #[inline]
@@ -308,7 +308,7 @@ pub unsafe fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
 pub unsafe fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    transmute(vpadd_s16_(transmute(a),transmute(b)))
+    transmute(vpadd_s16_(transmute(a), transmute(b)))
 }
 /// Add pairwise.
 #[inline]
@@ -317,7 +317,7 @@ pub unsafe fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
 pub unsafe fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    transmute(vpadd_s32_(transmute(a),transmute(b)))
+    transmute(vpadd_s32_(transmute(a), transmute(b)))
 }
 /// Add pairwise.
 #[inline]
@@ -326,7 +326,7 @@ pub unsafe fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
 pub unsafe fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    transmute(vpadd_s8_(transmute(a),transmute(b)))
+    transmute(vpadd_s8_(transmute(a), transmute(b)))
 }
 
 /// Unsigned saturating extract narrow.
diff --git a/crates/core_arch/src/simd.rs b/crates/core_arch/src/simd.rs
index 202df0143c..4b71d6c2bf 100644
--- a/crates/core_arch/src/simd.rs
+++ b/crates/core_arch/src/simd.rs
@@ -90,16 +90,44 @@ simd_ty!(i16x2[i16]: i16, i16 | x0, x1);
 
 // 64-bit wide types:
 
-simd_ty!(u8x8[u8]:
-         u8, u8, u8, u8, u8, u8, u8, u8
-         | x0, x1, x2, x3, x4, x5, x6, x7);
+simd_ty!(
+    u8x8[u8]: u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7
+);
 simd_ty!(u16x4[u16]: u16, u16, u16, u16 | x0, x1, x2, x3);
 simd_ty!(u32x2[u32]: u32, u32 | x0, x1);
 simd_ty!(u64x1[u64]: u64 | x1);
 
-simd_ty!(i8x8[i8]:
-         i8, i8, i8, i8, i8, i8, i8, i8
-         | x0, x1, x2, x3, x4, x5, x6, x7);
+simd_ty!(
+    i8x8[i8]: i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7
+);
 simd_ty!(i16x4[i16]: i16, i16, i16, i16 | x0, x1, x2, x3);
 simd_ty!(i32x2[i32]: i32, i32 | x0, x1);
 simd_ty!(i64x1[i64]: i64 | x1);
@@ -108,116 +136,576 @@ simd_ty!(f32x2[f32]: f32, f32 | x0, x1);
 
 // 128-bit wide types:
 
-simd_ty!(u8x16[u8]:
-         u8, u8, u8, u8, u8, u8, u8, u8,
-         u8, u8, u8, u8, u8, u8, u8, u8
-         | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
+simd_ty!(
+    u8x16[u8]: u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7,
+    x8,
+    x9,
+    x10,
+    x11,
+    x12,
+    x13,
+    x14,
+    x15
+);
+simd_ty!(
+    u16x8[u16]: u16,
+    u16,
+    u16,
+    u16,
+    u16,
+    u16,
+    u16,
+    u16 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7
 );
-simd_ty!(u16x8[u16]:
-         u16, u16, u16, u16, u16, u16, u16, u16
-         | x0, x1, x2, x3, x4, x5, x6, x7);
 simd_ty!(u32x4[u32]: u32, u32, u32, u32 | x0, x1, x2, x3);
 simd_ty!(u64x2[u64]: u64, u64 | x0, x1);
 
-simd_ty!(i8x16[i8]:
-         i8, i8, i8, i8, i8, i8, i8, i8,
-         i8, i8, i8, i8, i8, i8, i8, i8
-         | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
+simd_ty!(
+    i8x16[i8]: i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7,
+    x8,
+    x9,
+    x10,
+    x11,
+    x12,
+    x13,
+    x14,
+    x15
+);
+simd_ty!(
+    i16x8[i16]: i16,
+    i16,
+    i16,
+    i16,
+    i16,
+    i16,
+    i16,
+    i16 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7
 );
-simd_ty!(i16x8[i16]:
-         i16, i16, i16, i16, i16, i16, i16, i16
-         | x0, x1, x2, x3, x4, x5, x6, x7);
 simd_ty!(i32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3);
 simd_ty!(i64x2[i64]: i64, i64 | x0, x1);
 
 simd_ty!(f32x4[f32]: f32, f32, f32, f32 | x0, x1, x2, x3);
 simd_ty!(f64x2[f64]: f64, f64 | x0, x1);
 
-simd_m_ty!(m8x16[i8]:
-           i8, i8, i8, i8, i8, i8, i8, i8,
-           i8, i8, i8, i8, i8, i8, i8, i8
-           | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
+simd_m_ty!(
+    m8x16[i8]: i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7,
+    x8,
+    x9,
+    x10,
+    x11,
+    x12,
+    x13,
+    x14,
+    x15
+);
+simd_m_ty!(
+    m16x8[i16]: i16,
+    i16,
+    i16,
+    i16,
+    i16,
+    i16,
+    i16,
+    i16 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7
 );
-simd_m_ty!(m16x8[i16]:
-           i16, i16, i16, i16, i16, i16, i16, i16
-           | x0, x1, x2, x3, x4, x5, x6, x7);
 simd_m_ty!(m32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3);
 simd_m_ty!(m64x2[i64]: i64, i64 | x0, x1);
 
 // 256-bit wide types:
 
-simd_ty!(u8x32[u8]:
-         u8, u8, u8, u8, u8, u8, u8, u8,
-         u8, u8, u8, u8, u8, u8, u8, u8,
-         u8, u8, u8, u8, u8, u8, u8, u8,
-         u8, u8, u8, u8, u8, u8, u8, u8
-         | x0, x1, x2, x3, x4, x5, x6, x7,
-         x8, x9, x10, x11, x12, x13, x14, x15,
-         x16, x17, x18, x19, x20, x21, x22, x23,
-         x24, x25, x26, x27, x28, x29, x30, x31
-);
-simd_ty!(u16x16[u16]:
-         u16, u16, u16, u16, u16, u16, u16, u16,
-         u16, u16, u16, u16, u16, u16, u16, u16
-         | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
-);
-simd_ty!(u32x8[u32]:
-         u32, u32, u32, u32, u32, u32, u32, u32
-         | x0, x1, x2, x3, x4, x5, x6, x7);
+simd_ty!(
+    u8x32[u8]: u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8,
+    u8 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7,
+    x8,
+    x9,
+    x10,
+    x11,
+    x12,
+    x13,
+    x14,
+    x15,
+    x16,
+    x17,
+    x18,
+    x19,
+    x20,
+    x21,
+    x22,
+    x23,
+    x24,
+    x25,
+    x26,
+    x27,
+    x28,
+    x29,
+    x30,
+    x31
+);
+simd_ty!(
+    u16x16[u16]: u16,
+    u16,
+    u16,
+    u16,
+    u16,
+    u16,
+    u16,
+    u16,
+    u16,
+    u16,
+    u16,
+    u16,
+    u16,
+    u16,
+    u16,
+    u16 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7,
+    x8,
+    x9,
+    x10,
+    x11,
+    x12,
+    x13,
+    x14,
+    x15
+);
+simd_ty!(
+    u32x8[u32]: u32,
+    u32,
+    u32,
+    u32,
+    u32,
+    u32,
+    u32,
+    u32 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7
+);
 simd_ty!(u64x4[u64]: u64, u64, u64, u64 | x0, x1, x2, x3);
 
-simd_ty!(i8x32[i8]:
-         i8, i8, i8, i8, i8, i8, i8, i8,
-         i8, i8, i8, i8, i8, i8, i8, i8,
-         i8, i8, i8, i8, i8, i8, i8, i8,
-         i8, i8, i8, i8, i8, i8, i8, i8
-         | x0, x1, x2, x3, x4, x5, x6, x7,
-         x8, x9, x10, x11, x12, x13, x14, x15,
-         x16, x17, x18, x19, x20, x21, x22, x23,
-         x24, x25, x26, x27, x28, x29, x30, x31
-);
-simd_ty!(i16x16[i16]:
-         i16, i16, i16, i16, i16, i16, i16, i16,
-         i16, i16, i16, i16, i16, i16, i16, i16
-         | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
-);
-simd_ty!(i32x8[i32]:
-         i32, i32, i32, i32, i32, i32, i32, i32
-         | x0, x1, x2, x3, x4, x5, x6, x7);
+simd_ty!(
+    i8x32[i8]: i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8,
+    i8 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7,
+    x8,
+    x9,
+    x10,
+    x11,
+    x12,
+    x13,
+    x14,
+    x15,
+    x16,
+    x17,
+    x18,
+    x19,
+    x20,
+    x21,
+    x22,
+    x23,
+    x24,
+    x25,
+    x26,
+    x27,
+    x28,
+    x29,
+    x30,
+    x31
+);
+simd_ty!(
+    i16x16[i16]: i16,
+    i16,
+    i16,
+    i16,
+    i16,
+    i16,
+    i16,
+    i16,
+    i16,
+    i16,
+    i16,
+    i16,
+    i16,
+    i16,
+    i16,
+    i16 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7,
+    x8,
+    x9,
+    x10,
+    x11,
+    x12,
+    x13,
+    x14,
+    x15
+);
+simd_ty!(
+    i32x8[i32]: i32,
+    i32,
+    i32,
+    i32,
+    i32,
+    i32,
+    i32,
+    i32 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7
+);
 simd_ty!(i64x4[i64]: i64, i64, i64, i64 | x0, x1, x2, x3);
 
-simd_ty!(f32x8[f32]:
-         f32, f32, f32, f32, f32, f32, f32, f32 |
-         x0, x1, x2, x3, x4, x5, x6, x7);
+simd_ty!(
+    f32x8[f32]: f32,
+    f32,
+    f32,
+    f32,
+    f32,
+    f32,
+    f32,
+    f32 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7
+);
 
 // 512-bit wide types:
 
-simd_ty!(i32x16[i32]:
-         i32, i32, i32, i32, i32, i32, i32, i32,
-         i32, i32, i32, i32, i32, i32, i32, i32
-         | x0, x1, x2, x3, x4, x5, x6, x7,
-         x8, x9, x10, x11, x12, x13, x14, x15);
-
-simd_ty!(u32x16[u32]:
-         u32, u32, u32, u32, u32, u32, u32, u32,
-         u32, u32, u32, u32, u32, u32, u32, u32
-         | x0, x1, x2, x3, x4, x5, x6, x7,
-         x8, x9, x10, x11, x12, x13, x14, x15);
-
-simd_ty!(f32x16[f32]:
-         f32, f32, f32, f32, f32, f32, f32, f32,
-         f32, f32, f32, f32, f32, f32, f32, f32
-         | x0, x1, x2, x3, x4, x5, x6, x7,
-         x8, x9, x10, x11, x12, x13, x14, x15);
-
-simd_ty!(i64x8[i64]:
-         i64, i64, i64, i64, i64, i64, i64, i64
-         | x0, x1, x2, x3, x4, x5, x6, x7);
-
-simd_ty!(u64x8[u64]:
-         u64, u64, u64, u64, u64, u64, u64, u64
-         | x0, x1, x2, x3, x4, x5, x6, x7);
-
-simd_ty!(f64x8[f64]:
-         f64, f64, f64, f64, f64, f64, f64, f64
-         | x0, x1, x2, x3, x4, x5, x6, x7);
+simd_ty!(
+    i32x16[i32]: i32,
+    i32,
+    i32,
+    i32,
+    i32,
+    i32,
+    i32,
+    i32,
+    i32,
+    i32,
+    i32,
+    i32,
+    i32,
+    i32,
+    i32,
+    i32 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7,
+    x8,
+    x9,
+    x10,
+    x11,
+    x12,
+    x13,
+    x14,
+    x15
+);
+
+simd_ty!(
+    u32x16[u32]: u32,
+    u32,
+    u32,
+    u32,
+    u32,
+    u32,
+    u32,
+    u32,
+    u32,
+    u32,
+    u32,
+    u32,
+    u32,
+    u32,
+    u32,
+    u32 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7,
+    x8,
+    x9,
+    x10,
+    x11,
+    x12,
+    x13,
+    x14,
+    x15
+);
+
+simd_ty!(
+    f32x16[f32]: f32,
+    f32,
+    f32,
+    f32,
+    f32,
+    f32,
+    f32,
+    f32,
+    f32,
+    f32,
+    f32,
+    f32,
+    f32,
+    f32,
+    f32,
+    f32 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7,
+    x8,
+    x9,
+    x10,
+    x11,
+    x12,
+    x13,
+    x14,
+    x15
+);
+
+simd_ty!(
+    i64x8[i64]: i64,
+    i64,
+    i64,
+    i64,
+    i64,
+    i64,
+    i64,
+    i64 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7
+);
+
+simd_ty!(
+    u64x8[u64]: u64,
+    u64,
+    u64,
+    u64,
+    u64,
+    u64,
+    u64,
+    u64 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7
+);
+
+simd_ty!(
+    f64x8[f64]: f64,
+    f64,
+    f64,
+    f64,
+    f64,
+    f64,
+    f64,
+    f64 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7
+);
diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 72e4fd4017..3ec8ffffa0 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -902,7 +902,7 @@ pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: u8) -> __m512i {
     transmute(vprold(a.as_i32x16(), imm8 as i8))
 }
 
-/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi32&expand=4683)
 #[inline]
@@ -913,7 +913,7 @@ pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8
     transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
 }
 
-/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
+/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi32&expand=4684)
 #[inline]
@@ -936,7 +936,7 @@ pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: u8) -> __m512i {
     transmute(vprord(a.as_i32x16(), imm8 as i8))
 }
 
-/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi32&expand=4719)
 #[inline]
@@ -948,7 +948,7 @@ pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8
     transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
 }
 
-/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
+/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi32&expand=4720)
 #[inline]
@@ -972,7 +972,7 @@ pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: u8) -> __m512i {
     transmute(vprolq(a.as_i64x8(), imm8 as i8))
 }
 
-/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi64&expand=4692)
 #[inline]
@@ -984,7 +984,7 @@ pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8:
     transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
 }
 
-/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
+/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi64&expand=4693)
 #[inline]
@@ -1008,7 +1008,7 @@ pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: u8) -> __m512i {
     transmute(vprorq(a.as_i64x8(), imm8 as i8))
 }
 
-/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi64&expand=4728)
 #[inline]
@@ -1020,7 +1020,7 @@ pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8:
     transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
 }
 
-/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
+/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi64&expand=4729)
 #[inline]
@@ -1033,7 +1033,7 @@ pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m51
     transmute(simd_select_bitmask(k, ror, zero))
 }
 
-/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. 
+/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi32&expand=5310)
 #[inline]
@@ -1044,7 +1044,7 @@ pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i {
     transmute(vpsllid(a.as_i32x16(), imm8))
 }
 
-/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi32&expand=5308)
 #[inline]
@@ -1056,7 +1056,7 @@ pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
-/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
+/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi32&expand=5309)
 #[inline]
@@ -1069,7 +1069,7 @@ pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __
     transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. 
+/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi32&expand=5522)
 #[inline]
@@ -1080,7 +1080,7 @@ pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i {
     transmute(vpsrlid(a.as_i32x16(), imm8))
 }
 
-/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi32&expand=5520)
 #[inline]
@@ -1092,7 +1092,7 @@ pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
-/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
+/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi32&expand=5521)
 #[inline]
@@ -1116,7 +1116,7 @@ pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i {
     transmute(vpslliq(a.as_i64x8(), imm8))
 }
 
-/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi64&expand=5317)
 #[inline]
@@ -1128,7 +1128,7 @@ pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
 
-/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
+/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi64&expand=5318)
 #[inline]
@@ -1141,7 +1141,7 @@ pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m
     transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. 
+/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi64&expand=5531)
 #[inline]
@@ -1152,7 +1152,7 @@ pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i {
     transmute(vpsrliq(a.as_i64x8(), imm8))
 }
 
-/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi64&expand=5529)
 #[inline]
@@ -1177,7 +1177,7 @@ pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m
     transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst. 
+/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi32&expand=5280)
 #[inline]
@@ -1188,14 +1188,19 @@ pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
     transmute(vpslld(a.as_i32x16(), count.as_i32x4()))
 }
 
-/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sll_epi32&expand=5278)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpslld))]
 #[rustc_args_required_const(4)]
-pub unsafe fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
+pub unsafe fn _mm512_mask_sll_epi32(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+    count: __m128i,
+) -> __m512i {
     let shf = _mm512_sll_epi32(a, count).as_i32x16();
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
@@ -1213,7 +1218,7 @@ pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -
     transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst. 
+/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srl_epi32&expand=5492)
 #[inline]
@@ -1224,14 +1229,19 @@ pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
     transmute(vpsrld(a.as_i32x16(), count.as_i32x4()))
 }
 
-/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srl_epi32&expand=5490)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrld))]
 #[rustc_args_required_const(4)]
-pub unsafe fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
+pub unsafe fn _mm512_mask_srl_epi32(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+    count: __m128i,
+) -> __m512i {
     let shf = _mm512_srl_epi32(a, count).as_i32x16();
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
@@ -1249,7 +1259,7 @@ pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -
     transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst. 
+/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi64&expand=5289)
 #[inline]
@@ -1267,12 +1277,17 @@ pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllq))]
 #[rustc_args_required_const(4)]
-pub unsafe fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
+pub unsafe fn _mm512_mask_sll_epi64(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+    count: __m128i,
+) -> __m512i {
     let shf = _mm512_sll_epi64(a, count).as_i64x8();
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
 
-/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
+/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288)
 #[inline]
@@ -1303,7 +1318,12 @@ pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlq))]
 #[rustc_args_required_const(4)]
-pub unsafe fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
+pub unsafe fn _mm512_mask_srl_epi64(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+    count: __m128i,
+) -> __m512i {
     let shf = _mm512_srl_epi64(a, count).as_i64x8();
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
@@ -1321,7 +1341,7 @@ pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) ->
     transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst. 
+/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi32&expand=5407)
 #[inline]
@@ -1332,14 +1352,19 @@ pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
     transmute(vpsrad(a.as_i32x16(), count.as_i32x4()))
 }
 
-/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi32&expand=5405)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrad))]
 #[rustc_args_required_const(4)]
-pub unsafe fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
+pub unsafe fn _mm512_mask_sra_epi32(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+    count: __m128i,
+) -> __m512i {
     let shf = _mm512_sra_epi32(a, count).as_i32x16();
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
@@ -1357,7 +1382,7 @@ pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -
     transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst. 
+/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi64&expand=5416)
 #[inline]
@@ -1368,19 +1393,24 @@ pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
     transmute(vpsraq(a.as_i64x8(), count.as_i64x2()))
 }
 
-/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi64&expand=5414)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsraq))]
 #[rustc_args_required_const(4)]
-pub unsafe fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
+pub unsafe fn _mm512_mask_sra_epi64(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+    count: __m128i,
+) -> __m512i {
     let shf = _mm512_sra_epi64(a, count).as_i64x8();
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
 
-/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
+/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi64&expand=5415)
 #[inline]
@@ -1404,7 +1434,7 @@ pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i {
     transmute(vpsraid(a.as_i32x16(), imm8))
 }
 
-/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi32&expand=5434)
 #[inline]
@@ -1416,7 +1446,7 @@ pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
-/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
+/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi32&expand=5435)
 #[inline]
@@ -1429,7 +1459,7 @@ pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __
     transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. 
+/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi64&expand=5445)
 #[inline]
@@ -1440,7 +1470,7 @@ pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i {
     transmute(vpsraiq(a.as_i64x8(), imm8))
 }
 
-/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi64&expand=5443)
 #[inline]
@@ -1452,7 +1482,7 @@ pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
 
-/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
+/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi64&expand=5444)
 #[inline]
@@ -1465,7 +1495,7 @@ pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m
     transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. 
+/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi32&expand=5465)
 #[inline]
@@ -1476,14 +1506,19 @@ pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
     transmute(vpsravd(a.as_i32x16(), count.as_i32x16()))
 }
 
-/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi32&expand=5463)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsravd))]
 #[rustc_args_required_const(4)]
-pub unsafe fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
+pub unsafe fn _mm512_mask_srav_epi32(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+    count: __m512i,
+) -> __m512i {
     let shf = _mm512_srav_epi32(a, count).as_i32x16();
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
@@ -1501,7 +1536,7 @@ pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i)
     transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. 
+/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi64&expand=5474)
 #[inline]
@@ -1512,14 +1547,19 @@ pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
     transmute(vpsravq(a.as_i64x8(), count.as_i64x8()))
 }
 
-/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi64&expand=5472)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsravq))]
 #[rustc_args_required_const(4)]
-pub unsafe fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
+pub unsafe fn _mm512_mask_srav_epi64(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+    count: __m512i,
+) -> __m512i {
     let shf = _mm512_srav_epi64(a, count).as_i64x8();
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
@@ -1548,14 +1588,19 @@ pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
     transmute(vprolvd(a.as_i32x16(), b.as_i32x16()))
 }
 
-/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rolv_epi32&expand=4701)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprolvd))]
 #[rustc_args_required_const(4)]
-pub unsafe fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+pub unsafe fn _mm512_mask_rolv_epi32(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     let rol = _mm512_rolv_epi32(a, b).as_i32x16();
     transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
 }
@@ -1573,7 +1618,7 @@ pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> _
     transmute(simd_select_bitmask(k, rol, zero))
 }
 
-/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. 
+/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi32&expand=4739)
 #[inline]
@@ -1584,19 +1629,24 @@ pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
     transmute(vprorvd(a.as_i32x16(), b.as_i32x16()))
 }
 
-/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi32&expand=4737)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprorvd))]
 #[rustc_args_required_const(4)]
-pub unsafe fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+pub unsafe fn _mm512_mask_rorv_epi32(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     let ror = _mm512_rorv_epi32(a, b).as_i32x16();
     transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
 }
 
-/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
+/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi32&expand=4738)
 #[inline]
@@ -1609,7 +1659,7 @@ pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> _
     transmute(simd_select_bitmask(k, ror, zero))
 }
 
-/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. 
+/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rolv_epi64&expand=4712)
 #[inline]
@@ -1632,7 +1682,7 @@ pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: _
     transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
 }
 
-/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
+/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rolv_epi64&expand=4711)
 #[inline]
@@ -1645,7 +1695,7 @@ pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __
     transmute(simd_select_bitmask(k, rol, zero))
 }
 
-/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. 
+/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi64&expand=4748)
 #[inline]
@@ -1656,7 +1706,7 @@ pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
     transmute(vprorvq(a.as_i64x8(), b.as_i64x8()))
 }
 
-/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi64&expand=4746)
 #[inline]
@@ -1668,7 +1718,7 @@ pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: _
     transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
 }
 
-/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
+/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi64&expand=4747)
 #[inline]
@@ -1692,19 +1742,24 @@ pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
     transmute(vpsllvd(a.as_i32x16(), count.as_i32x16()))
 }
 
-/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi32&expand=5340)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllvd))]
 #[rustc_args_required_const(4)]
-pub unsafe fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
+pub unsafe fn _mm512_mask_sllv_epi32(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+    count: __m512i,
+) -> __m512i {
     let shf = _mm512_sllv_epi32(a, count).as_i32x16();
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
-/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
+/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi32&expand=5341)
 #[inline]
@@ -1717,7 +1772,7 @@ pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i)
     transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. 
+/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srlv_epi32&expand=5554)
 #[inline]
@@ -1728,19 +1783,24 @@ pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
     transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16()))
 }
 
-/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srlv_epi32&expand=5552)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlvd))]
 #[rustc_args_required_const(4)]
-pub unsafe fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
+pub unsafe fn _mm512_mask_srlv_epi32(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+    count: __m512i,
+) -> __m512i {
     let shf = _mm512_srlv_epi32(a, count).as_i32x16();
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
-/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
+/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi32&expand=5553)
 #[inline]
@@ -1753,7 +1813,7 @@ pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i)
     transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. 
+/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sllv_epi64&expand=5351)
 #[inline]
@@ -1764,19 +1824,24 @@ pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
     transmute(vpsllvq(a.as_i64x8(), count.as_i64x8()))
 }
 
-/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi64&expand=5349)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllvq))]
 #[rustc_args_required_const(4)]
-pub unsafe fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
+pub unsafe fn _mm512_mask_sllv_epi64(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+    count: __m512i,
+) -> __m512i {
     let shf = _mm512_sllv_epi64(a, count).as_i64x8();
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
 
-/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
+/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi64&expand=5350)
 #[inline]
@@ -1800,19 +1865,24 @@ pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
     transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8()))
 }
 
-/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mask_srlv_epi64&expand=5561)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlvq))]
 #[rustc_args_required_const(4)]
-pub unsafe fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
+pub unsafe fn _mm512_mask_srlv_epi64(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+    count: __m512i,
+) -> __m512i {
     let shf = _mm512_srlv_epi64(a, count).as_i64x8();
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
 
-/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). 
+/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi64&expand=5562)
 #[inline]
@@ -1825,7 +1895,7 @@ pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -
     transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst. 
+/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi32&expand=272)
 #[inline]
@@ -1836,7 +1906,7 @@ pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
 }
 
-/// Performs element-by-element bitwise AND between packed 32-bit integer elements of v2 and v3, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). 
+/// Performs element-by-element bitwise AND between packed 32-bit integer elements of v2 and v3, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_and_epi32&expand=273)
 #[inline]
@@ -1861,7 +1931,7 @@ pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __
     transmute(simd_select_bitmask(k, and, zero))
 }
 
-/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst. 
+/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi64&expand=279)
 #[inline]
@@ -1897,7 +1967,7 @@ pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m
     transmute(simd_select_bitmask(k, and, zero))
 }
 
-/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst. 
+/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_si512&expand=302)
 #[inline]
@@ -1908,7 +1978,7 @@ pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
 }
 
-/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst. 
+/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi32&expand=4042)
 #[inline]
@@ -1991,7 +2061,7 @@ pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
 }
 
-/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst. 
+/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi32&expand=6142)
 #[inline]
@@ -4808,67 +4878,67 @@ mod tests {
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_rol_epi32() {
-        let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         let r = _mm512_rol_epi32(a, 1);
-        let e = _mm512_set_epi32(1<<0,  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_rol_epi32() {
-        let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         let r = _mm512_mask_rol_epi32(a, 0, a, 1);
         assert_eq_m512i(r, a);
 
         let r = _mm512_mask_rol_epi32(a, 0b11111111_11111111, a, 1);
-        let e = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_rol_epi32() {
-        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<31);
+        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
         let r = _mm512_maskz_rol_epi32(0, a, 1);
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_rol_epi32(0b00000000_11111111, a, 1);
-        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1<<0);
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_ror_epi32() {
-        let a = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         let r = _mm512_ror_epi32(a, 1);
-        let e = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_ror_epi32() {
-        let a = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         let r = _mm512_mask_ror_epi32(a, 0, a, 1);
         assert_eq_m512i(r, a);
 
         let r = _mm512_mask_ror_epi32(a, 0b11111111_11111111, a, 1);
-        let e = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_ror_epi32() {
-        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1<<0);
+        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
         let r = _mm512_maskz_ror_epi32(0, a, 1);
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_ror_epi32(0b00000000_11111111, a, 1);
-        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1<<31);
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_slli_epi32() {
-        let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         let r = _mm512_slli_epi32(a, 1);
         let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         assert_eq_m512i(r, e);
@@ -4876,7 +4946,7 @@ mod tests {
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_slli_epi32() {
-        let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         let r = _mm512_mask_slli_epi32(a, 0, a, 1);
         assert_eq_m512i(r, a);
 
@@ -4887,7 +4957,7 @@ mod tests {
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_slli_epi32() {
-        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<31);
+        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
         let r = _mm512_maskz_slli_epi32(0, a, 1);
         assert_eq_m512i(r, _mm512_setzero_si512());
 
@@ -4900,7 +4970,7 @@ mod tests {
     unsafe fn test_mm512_srli_epi32() {
         let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         let r = _mm512_srli_epi32(a, 1);
-        let e = _mm512_set_epi32(0<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         assert_eq_m512i(r, e);
     }
 
@@ -4911,7 +4981,7 @@ mod tests {
         assert_eq_m512i(r, a);
 
         let r = _mm512_mask_srli_epi32(a, 0b11111111_11111111, a, 1);
-        let e = _mm512_set_epi32(0<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         assert_eq_m512i(r, e);
     }
 
@@ -4922,38 +4992,38 @@ mod tests {
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_srli_epi32(0b00000000_11111111, a, 1);
-        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0<<31);
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_rolv_epi32() {
-        let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
 
         let r = _mm512_rolv_epi32(a, b);
 
-        let e = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_rolv_epi32() {
-        let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1,1,1,1);
+        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
 
         let r = _mm512_mask_rolv_epi32(a, 0, a, b);
         assert_eq_m512i(r, a);
 
         let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b);
 
-        let e = _mm512_set_epi32(1<<0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2);
+        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_rolv_epi32() {
-        let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<31);
+        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
         let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
 
         let r = _mm512_maskz_rolv_epi32(0, a, b);
@@ -4961,24 +5031,24 @@ mod tests {
 
         let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b);
 
-        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1<<0);
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_rorv_epi32() {
-        let a = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
 
         let r = _mm512_rorv_epi32(a, b);
 
-        let e = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_rorv_epi32() {
-        let a = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
 
         let r = _mm512_mask_rorv_epi32(a, 0, a, b);
@@ -4986,13 +5056,13 @@ mod tests {
 
         let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b);
 
-        let e = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_rorv_epi32() {
-        let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1<<0);
+        let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
         let b = _mm512_set_epi32(2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
 
         let r = _mm512_maskz_rorv_epi32(0, a, b);
@@ -5000,13 +5070,13 @@ mod tests {
 
         let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b);
 
-        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1<<31);
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_sllv_epi32() {
-        let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
 
         let r = _mm512_sllv_epi32(a, count);
@@ -5017,7 +5087,7 @@ mod tests {
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_sllv_epi32() {
-        let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
 
         let r = _mm512_mask_sllv_epi32(a, 0, a, count);
@@ -5031,7 +5101,7 @@ mod tests {
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_sllv_epi32() {
-        let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<31);
+        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
         let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
 
         let r = _mm512_maskz_sllv_epi32(0, a, count);
@@ -5084,28 +5154,113 @@ mod tests {
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_sll_epi32() {
-        let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let a = _mm512_set_epi32(
+            1 << 31,
+            1 << 0,
+            1 << 1,
+            1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+        );
         let count = _mm_set_epi32(0, 0, 0, 2);
         let r = _mm512_sll_epi32(a, count);
-        let e = _mm512_set_epi32(0, 1<<2, 1<<3, 1<<4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e = _mm512_set_epi32(
+            0,
+            1 << 2,
+            1 << 3,
+            1 << 4,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_sll_epi32() {
-        let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let a = _mm512_set_epi32(
+            1 << 31,
+            1 << 0,
+            1 << 1,
+            1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+        );
         let count = _mm_set_epi32(0, 0, 0, 2);
         let r = _mm512_mask_sll_epi32(a, 0, a, count);
         assert_eq_m512i(r, a);
 
         let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count);
-        let e = _mm512_set_epi32(0, 1<<2, 1<<3, 1<<4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e = _mm512_set_epi32(
+            0,
+            1 << 2,
+            1 << 3,
+            1 << 4,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_sll_epi32() {
-        let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<31);
+        let a = _mm512_set_epi32(
+            1 << 31,
+            1 << 0,
+            1 << 1,
+            1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 31,
+        );
         let count = _mm_set_epi32(2, 0, 0, 2);
         let r = _mm512_maskz_sll_epi32(0, a, count);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -5117,34 +5272,85 @@ mod tests {
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_srl_epi32() {
-        let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let a = _mm512_set_epi32(
+            1 << 31,
+            1 << 0,
+            1 << 1,
+            1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+        );
         let count = _mm_set_epi32(0, 0, 0, 2);
         let r = _mm512_srl_epi32(a, count);
-        let e = _mm512_set_epi32(1<<29, 0, 0, 1<<0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_srl_epi32() {
-        let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let a = _mm512_set_epi32(
+            1 << 31,
+            1 << 0,
+            1 << 1,
+            1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+        );
         let count = _mm_set_epi32(0, 0, 0, 2);
         let r = _mm512_mask_srl_epi32(a, 0, a, count);
         assert_eq_m512i(r, a);
 
         let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count);
-        let e = _mm512_set_epi32(1<<29, 0, 0, 1<<0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_srl_epi32() {
-        let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<31);
+        let a = _mm512_set_epi32(
+            1 << 31,
+            1 << 0,
+            1 << 1,
+            1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 31,
+        );
         let count = _mm_set_epi32(2, 0, 0, 2);
         let r = _mm512_maskz_srl_epi32(0, a, count);
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count);
-        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<29);
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29);
         assert_eq_m512i(r, e);
     }
 
@@ -5177,7 +5383,7 @@ mod tests {
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count);
-        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4 );
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
         assert_eq_m512i(r, e);
     }
 
@@ -5210,10 +5416,10 @@ mod tests {
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count);
-        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4 );
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
         assert_eq_m512i(r, e);
     }
-    
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_srai_epi32() {
         let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
@@ -5243,130 +5449,674 @@ mod tests {
         let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
         assert_eq_m512i(r, e);
     }
-    
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_and_epi32() {
-        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
-        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let a = _mm512_set_epi32(
+            1 << 1 | 1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 3,
+        );
+        let b = _mm512_set_epi32(
+            1 << 1,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 3 | 1 << 4,
+        );
         let r = _mm512_and_epi32(a, b);
-        let e = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3);
+        let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_and_epi32() {
-        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
-        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let a = _mm512_set_epi32(
+            1 << 1 | 1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 3,
+        );
+        let b = _mm512_set_epi32(
+            1 << 1,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 3 | 1 << 4,
+        );
         let r = _mm512_mask_and_epi32(a, 0, a, b);
         assert_eq_m512i(r, a);
 
         let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b);
-        let e = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3);
+        let e = _mm512_set_epi32(
+            1 << 1 | 1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 3,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_and_epi32() {
-        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
-        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let a = _mm512_set_epi32(
+            1 << 1 | 1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 3,
+        );
+        let b = _mm512_set_epi32(
+            1 << 1,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 3 | 1 << 4,
+        );
         let r = _mm512_maskz_and_epi32(0, a, b);
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b);
-        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3);
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
         assert_eq_m512i(r, e);
     }
-    
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_and_si512() {
-        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
-        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let a = _mm512_set_epi32(
+            1 << 1 | 1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 3,
+        );
+        let b = _mm512_set_epi32(
+            1 << 1,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 3 | 1 << 4,
+        );
         let r = _mm512_and_epi32(a, b);
-        let e = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3);
+        let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_or_epi32() {
-        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
-        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let a = _mm512_set_epi32(
+            1 << 1 | 1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 3,
+        );
+        let b = _mm512_set_epi32(
+            1 << 1,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 3 | 1 << 4,
+        );
         let r = _mm512_or_epi32(a, b);
-        let e = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3|1<<4);
+        let e = _mm512_set_epi32(
+            1 << 1 | 1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 3 | 1 << 4,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_or_epi32() {
-        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
-        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let a = _mm512_set_epi32(
+            1 << 1 | 1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 3,
+        );
+        let b = _mm512_set_epi32(
+            1 << 1,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 3 | 1 << 4,
+        );
         let r = _mm512_mask_or_epi32(a, 0, a, b);
         assert_eq_m512i(r, a);
 
         let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b);
-        let e = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3|1<<4);
+        let e = _mm512_set_epi32(
+            1 << 1 | 1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 3 | 1 << 4,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_or_epi32() {
-        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
-        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let a = _mm512_set_epi32(
+            1 << 1 | 1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 3,
+        );
+        let b = _mm512_set_epi32(
+            1 << 1,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 3 | 1 << 4,
+        );
         let r = _mm512_maskz_or_epi32(0, a, b);
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b);
-        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3|1<<4);
+        let e = _mm512_set_epi32(
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 3 | 1 << 4,
+        );
         assert_eq_m512i(r, e);
     }
-    
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_or_si512() {
-        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
-        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let a = _mm512_set_epi32(
+            1 << 1 | 1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 3,
+        );
+        let b = _mm512_set_epi32(
+            1 << 1,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 3 | 1 << 4,
+        );
         let r = _mm512_or_epi32(a, b);
-        let e = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3|1<<4);
+        let e = _mm512_set_epi32(
+            1 << 1 | 1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 3 | 1 << 4,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_xor_epi32() {
-        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
-        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let a = _mm512_set_epi32(
+            1 << 1 | 1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 3,
+        );
+        let b = _mm512_set_epi32(
+            1 << 1,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 3 | 1 << 4,
+        );
         let r = _mm512_xor_epi32(a, b);
-        let e = _mm512_set_epi32(1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<4);
+        let e = _mm512_set_epi32(
+            1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 4,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_xor_epi32() {
-        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
-        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let a = _mm512_set_epi32(
+            1 << 1 | 1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 3,
+        );
+        let b = _mm512_set_epi32(
+            1 << 1,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 3 | 1 << 4,
+        );
         let r = _mm512_mask_xor_epi32(a, 0, a, b);
         assert_eq_m512i(r, a);
 
         let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b);
-        let e = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<4);
+        let e = _mm512_set_epi32(
+            1 << 1 | 1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 4,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_xor_epi32() {
-        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
-        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let a = _mm512_set_epi32(
+            1 << 1 | 1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 3,
+        );
+        let b = _mm512_set_epi32(
+            1 << 1,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 3 | 1 << 4,
+        );
         let r = _mm512_maskz_xor_epi32(0, a, b);
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b);
-        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<4);
+        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4);
         assert_eq_m512i(r, e);
     }
-    
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_xor_si512() {
-        let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3);
-        let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4);
+        let a = _mm512_set_epi32(
+            1 << 1 | 1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 3,
+        );
+        let b = _mm512_set_epi32(
+            1 << 1,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 3 | 1 << 4,
+        );
         let r = _mm512_xor_epi32(a, b);
-        let e = _mm512_set_epi32(1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<4);
+        let e = _mm512_set_epi32(
+            1 << 2,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 4,
+        );
         assert_eq_m512i(r, e);
     }
 
@@ -5423,5 +6173,4 @@ mod tests {
         let e: u16 = 0b11100010_00111000;
         assert_eq!(r, e);
     }
-
 }
diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index 74077c8023..aa6857ecce 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -41,8 +41,6 @@ pub unsafe fn _mm512_setr_epi64(
     transmute(r)
 }
 
-
-
 #[cfg(test)]
 mod tests {
     use std;
@@ -53,23 +51,11 @@ mod tests {
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_abs_epi64() {
-        let a = _mm512_setr_epi64(
-            0, 1, -1, i64::MAX,
-            i64::MIN, 100, -100, -32
-        );
+        let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
         let r = _mm512_abs_epi64(a);
-        let e = _mm512_setr_epi64(
-            0,
-            1,
-            1,
-            i64::MAX,
-            i64::MAX.wrapping_add(1),
-            100,
-            100,
-            32
-        );
+        let e = _mm512_setr_epi64(0, 1, 1, i64::MAX, i64::MAX.wrapping_add(1), 100, 100, 32);
         assert_eq_m512i(r, e);
-    } 
+    }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_abs_epi64() {
@@ -81,20 +67,11 @@ mod tests {
         let r = _mm512_mask_abs_epi64(a, 0, a);
         assert_eq_m512i(r, a);
         let r = _mm512_mask_abs_epi64(a, 0b11111111, a);
-        let e = _mm512_setr_epi64(
-            0,
-            1,
-            1,
-            i64::MAX,
-            i64::MAX.wrapping_add(1),
-            100,
-            100,
-            32
-        );
+        let e = _mm512_setr_epi64(0, 1, 1, i64::MAX, i64::MAX.wrapping_add(1), 100, 100, 32);
         assert_eq_m512i(r, e);
     }
 
-        #[simd_test(enable = "avx512f")]
+    #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_abs_epi64() {
         #[rustfmt::skip]
         let a = _mm512_setr_epi64(
@@ -104,19 +81,10 @@ mod tests {
         let r = _mm512_maskz_abs_epi64(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
         let r = _mm512_maskz_abs_epi64(0b01111111, a);
-        let e = _mm512_setr_epi64(
-            0,
-            1,
-            1,
-            i64::MAX,
-            i64::MAX.wrapping_add(1),
-            100,
-            100,
-            0
-        );
+        let e = _mm512_setr_epi64(0, 1, 1, i64::MAX, i64::MAX.wrapping_add(1), 100, 100, 0);
         assert_eq_m512i(r, e);
     }
-    
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_setzero_pd() {
         assert_eq_m512d(_mm512_setzero_pd(), _mm512_set1_pd(0.));
@@ -925,262 +893,640 @@ mod tests {
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_rol_epi64() {
-        let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let a = _mm512_set_epi64(
+            1 << 63,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+        );
         let r = _mm512_rol_epi64(a, 1);
-        let e = _mm512_set_epi64( 1<<0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 );
+        let e = _mm512_set_epi64(
+            1 << 0,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_rol_epi64() {
-        let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let a = _mm512_set_epi64(
+            1 << 63,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+        );
         let r = _mm512_mask_rol_epi64(a, 0, a, 1);
         assert_eq_m512i(r, a);
 
         let r = _mm512_mask_rol_epi64(a, 0b11111111, a, 1);
-        let e = _mm512_set_epi64( 1<<0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 );
+        let e = _mm512_set_epi64(
+            1 << 0,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_rol_epi64() {
-        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<63 );
+        let a = _mm512_set_epi64(
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 63,
+        );
         let r = _mm512_maskz_rol_epi64(0, a, 1);
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_rol_epi64(0b00001111, a, 1);
-        let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<33, 1<<33, 1<<33, 1<<0 );
+        let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 1 << 0);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_ror_epi64() {
-        let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let a = _mm512_set_epi64(
+            1 << 0,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+        );
         let r = _mm512_ror_epi64(a, 1);
-        let e = _mm512_set_epi64( 1<<63, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 );
+        let e = _mm512_set_epi64(
+            1 << 63,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_ror_epi64() {
-        let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let a = _mm512_set_epi64(
+            1 << 0,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+        );
         let r = _mm512_mask_ror_epi64(a, 0, a, 1);
         assert_eq_m512i(r, a);
 
         let r = _mm512_mask_ror_epi64(a, 0b11111111, a, 1);
-        let e = _mm512_set_epi64( 1<<63, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 );
+        let e = _mm512_set_epi64(
+            1 << 63,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_ror_epi64() {
-        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<0 );
+        let a = _mm512_set_epi64(
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 0,
+        );
         let r = _mm512_maskz_ror_epi64(0, a, 1);
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_ror_epi64(0b00001111, a, 1);
-        let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<31, 1<<31, 1<<31, 1<<63 );
+        let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 1 << 63);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_slli_epi64() {
-        let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let a = _mm512_set_epi64(
+            1 << 63,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+        );
         let r = _mm512_slli_epi64(a, 1);
-        let e = _mm512_set_epi64( 0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 );
+        let e = _mm512_set_epi64(
+            0,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_slli_epi64() {
-        let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let a = _mm512_set_epi64(
+            1 << 63,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+        );
         let r = _mm512_mask_slli_epi64(a, 0, a, 1);
         assert_eq_m512i(r, a);
 
         let r = _mm512_mask_slli_epi64(a, 0b11111111, a, 1);
-        let e = _mm512_set_epi64( 0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 );
+        let e = _mm512_set_epi64(
+            0,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_slli_epi64() {
-        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<63 );
+        let a = _mm512_set_epi64(
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 63,
+        );
         let r = _mm512_maskz_slli_epi64(0, a, 1);
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_slli_epi64(0b00001111, a, 1);
-        let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<33, 1<<33, 1<<33, 0 );
+        let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 0);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_srli_epi64() {
-        let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let a = _mm512_set_epi64(
+            1 << 0,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+        );
         let r = _mm512_srli_epi64(a, 1);
-        let e = _mm512_set_epi64( 0, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 );
+        let e = _mm512_set_epi64(
+            0,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_srli_epi64() {
-        let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let a = _mm512_set_epi64(
+            1 << 0,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+        );
         let r = _mm512_mask_srli_epi64(a, 0, a, 1);
         assert_eq_m512i(r, a);
 
         let r = _mm512_mask_srli_epi64(a, 0b11111111, a, 1);
-        let e = _mm512_set_epi64( 0, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 );
+        let e = _mm512_set_epi64(
+            0,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_srli_epi64() {
-        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<0 );
+        let a = _mm512_set_epi64(
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 0,
+        );
         let r = _mm512_maskz_srli_epi64(0, a, 1);
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_srli_epi64(0b00001111, a, 1);
-        let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<31, 1<<31, 1<<31, 0 );
+        let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 0);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_rolv_epi64() {
-        let a = _mm512_set_epi64( 1<<32, 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
-        let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 );
+        let a = _mm512_set_epi64(
+            1 << 32,
+            1 << 63,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+        );
+        let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm512_rolv_epi64(a, b);
-        let e = _mm512_set_epi64( 1<<32, 1<<0, 1<<34, 1<<35, 1<<36, 1<<37, 1<<38, 1<<39 );
+        let e = _mm512_set_epi64(
+            1 << 32,
+            1 << 0,
+            1 << 34,
+            1 << 35,
+            1 << 36,
+            1 << 37,
+            1 << 38,
+            1 << 39,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_rolv_epi64() {
-        let a = _mm512_set_epi64( 1<<32, 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
-        let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 );
+        let a = _mm512_set_epi64(
+            1 << 32,
+            1 << 63,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+        );
+        let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm512_mask_rolv_epi64(a, 0, a, b);
         assert_eq_m512i(r, a);
 
         let r = _mm512_mask_rolv_epi64(a, 0b11111111, a, b);
-        let e = _mm512_set_epi64( 1<<32, 1<<0, 1<<34, 1<<35, 1<<36, 1<<37, 1<<38, 1<<39 );
+        let e = _mm512_set_epi64(
+            1 << 32,
+            1 << 0,
+            1 << 34,
+            1 << 35,
+            1 << 36,
+            1 << 37,
+            1 << 38,
+            1 << 39,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_rolv_epi64() {
-        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<62 );
-        let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 2 );
+        let a = _mm512_set_epi64(
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 62,
+        );
+        let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 2);
         let r = _mm512_maskz_rolv_epi64(0, a, b);
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_rolv_epi64(0b00001111, a, b);
-        let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<36, 1<<37, 1<<38, 1<<0 );
+        let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 36, 1 << 37, 1 << 38, 1 << 0);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_rorv_epi64() {
-        let a = _mm512_set_epi64( 1<<32, 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
-        let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 );
+        let a = _mm512_set_epi64(
+            1 << 32,
+            1 << 0,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+        );
+        let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm512_rorv_epi64(a, b);
-        let e = _mm512_set_epi64( 1<<32, 1<<63, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25 );
+        let e = _mm512_set_epi64(
+            1 << 32,
+            1 << 63,
+            1 << 30,
+            1 << 29,
+            1 << 28,
+            1 << 27,
+            1 << 26,
+            1 << 25,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_rorv_epi64() {
-        let a = _mm512_set_epi64( 1<<32, 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
-        let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 );
+        let a = _mm512_set_epi64(
+            1 << 32,
+            1 << 0,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+        );
+        let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm512_mask_rorv_epi64(a, 0, a, b);
         assert_eq_m512i(r, a);
 
         let r = _mm512_mask_rorv_epi64(a, 0b11111111, a, b);
-        let e = _mm512_set_epi64( 1<<32, 1<<63, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25 );
+        let e = _mm512_set_epi64(
+            1 << 32,
+            1 << 63,
+            1 << 30,
+            1 << 29,
+            1 << 28,
+            1 << 27,
+            1 << 26,
+            1 << 25,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_rorv_epi64() {
-        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<0 );
-        let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 2 );
+        let a = _mm512_set_epi64(
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 0,
+        );
+        let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 2);
         let r = _mm512_maskz_rorv_epi64(0, a, b);
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_rorv_epi64(0b00001111, a, b);
-        let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<28, 1<<27, 1<<26, 1<<62 );
+        let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 28, 1 << 27, 1 << 26, 1 << 62);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_sllv_epi64() {
-        let a = _mm512_set_epi64( 1<<32, 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
-        let count = _mm512_set_epi64( 0, 2, 2, 3, 4, 5, 6, 7 );
+        let a = _mm512_set_epi64(
+            1 << 32,
+            1 << 63,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+        );
+        let count = _mm512_set_epi64(0, 2, 2, 3, 4, 5, 6, 7);
         let r = _mm512_sllv_epi64(a, count);
-        let e = _mm512_set_epi64( 1<<32, 0, 1<<34, 1<<35, 1<<36, 1<<37, 1<<38, 1<<39 );
+        let e = _mm512_set_epi64(
+            1 << 32,
+            0,
+            1 << 34,
+            1 << 35,
+            1 << 36,
+            1 << 37,
+            1 << 38,
+            1 << 39,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_sllv_epi64() {
-        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
-        let count = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 );
+        let a = _mm512_set_epi64(
+            1 << 32,
+            1 << 32,
+            1 << 63,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+        );
+        let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm512_mask_sllv_epi64(a, 0, a, count);
         assert_eq_m512i(r, a);
 
         let r = _mm512_mask_sllv_epi64(a, 0b11111111, a, count);
-        let e = _mm512_set_epi64( 1<<32, 1<<33, 0, 1<<35, 1<<36, 1<<37, 1<<38, 1<<39 );
+        let e = _mm512_set_epi64(
+            1 << 32,
+            1 << 33,
+            0,
+            1 << 35,
+            1 << 36,
+            1 << 37,
+            1 << 38,
+            1 << 39,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_sllv_epi64() {
-        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<63 );
-        let count = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 1 );
+        let a = _mm512_set_epi64(
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 63,
+        );
+        let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 1);
         let r = _mm512_maskz_sllv_epi64(0, a, count);
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_sllv_epi64(0b00001111, a, count);
-        let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<36, 1<<37, 1<<38, 0 );
+        let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 36, 1 << 37, 1 << 38, 0);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_srlv_epi64() {
-        let a = _mm512_set_epi64( 1<<32, 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
-        let count = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 );
+        let a = _mm512_set_epi64(
+            1 << 32,
+            1 << 0,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+        );
+        let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm512_srlv_epi64(a, count);
-        let e = _mm512_set_epi64( 1<<32, 0, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25 );
+        let e = _mm512_set_epi64(
+            1 << 32,
+            0,
+            1 << 30,
+            1 << 29,
+            1 << 28,
+            1 << 27,
+            1 << 26,
+            1 << 25,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_srlv_epi64() {
-        let a = _mm512_set_epi64( 1<<32, 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
-        let count = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 );
+        let a = _mm512_set_epi64(
+            1 << 32,
+            1 << 0,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+        );
+        let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm512_mask_srlv_epi64(a, 0, a, count);
         assert_eq_m512i(r, a);
 
         let r = _mm512_mask_srlv_epi64(a, 0b11111111, a, count);
-        let e = _mm512_set_epi64( 1<<32, 0, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25 );
+        let e = _mm512_set_epi64(
+            1 << 32,
+            0,
+            1 << 30,
+            1 << 29,
+            1 << 28,
+            1 << 27,
+            1 << 26,
+            1 << 25,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_srlv_epi64() {
-        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<0 );
-        let count = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 );
+        let a = _mm512_set_epi64(
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 0,
+        );
+        let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm512_maskz_srlv_epi64(0, a, count);
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_srlv_epi64(0b00001111, a, count);
-        let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<28, 1<<27, 1<<26, 0 );
+        let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 28, 1 << 27, 1 << 26, 0);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_sll_epi64() {
-        let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let a = _mm512_set_epi64(
+            1 << 63,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+        );
         let count = _mm_set_epi64x(0, 1);
         let r = _mm512_sll_epi64(a, count);
-        let e = _mm512_set_epi64( 0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 );
+        let e = _mm512_set_epi64(
+            0,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+        );
         assert_eq_m512i(r, e);
 
         let count = _mm_set_epi64x(1, 0);
@@ -1190,58 +1536,130 @@ mod tests {
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_sll_epi64() {
-        let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let a = _mm512_set_epi64(
+            1 << 63,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+        );
         let count = _mm_set_epi64x(0, 1);
         let r = _mm512_mask_sll_epi64(a, 0, a, count);
         assert_eq_m512i(r, a);
 
         let r = _mm512_mask_sll_epi64(a, 0b11111111, a, count);
-        let e = _mm512_set_epi64( 0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 );
+        let e = _mm512_set_epi64(
+            0,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+            1 << 33,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_sll_epi64() {
-        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<63 );
+        let a = _mm512_set_epi64(
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 63,
+        );
         let count = _mm_set_epi64x(0, 1);
         let r = _mm512_maskz_sll_epi64(0, a, count);
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_sll_epi64(0b00001111, a, count);
-        let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<33, 1<<33, 1<<33, 0 );
+        let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 0);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_srl_epi64() {
-        let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let a = _mm512_set_epi64(
+            1 << 0,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+        );
         let count = _mm_set_epi64x(0, 1);
         let r = _mm512_srl_epi64(a, count);
-        let e = _mm512_set_epi64( 0, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 );
+        let e = _mm512_set_epi64(
+            0,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_srl_epi64() {
-        let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 );
+        let a = _mm512_set_epi64(
+            1 << 0,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+        );
         let count = _mm_set_epi64x(0, 1);
         let r = _mm512_mask_srl_epi64(a, 0, a, count);
         assert_eq_m512i(r, a);
 
         let r = _mm512_mask_srl_epi64(a, 0b11111111, a, count);
-        let e = _mm512_set_epi64( 0, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 );
+        let e = _mm512_set_epi64(
+            0,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+            1 << 31,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_srl_epi64() {
-        let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<0 );
+        let a = _mm512_set_epi64(
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 32,
+            1 << 0,
+        );
         let count = _mm_set_epi64x(0, 1);
         let r = _mm512_maskz_srl_epi64(0, a, count);
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_srl_epi64(0b00001111, a, count);
-        let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<31, 1<<31, 1<<31, 0 );
+        let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 0);
         assert_eq_m512i(r, e);
     }
 
@@ -1253,7 +1671,7 @@ mod tests {
         let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -4);
         assert_eq_m512i(r, e);
     }
-    
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_sra_epi64() {
         let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
@@ -1274,7 +1692,7 @@ mod tests {
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_sra_epi64(0b00001111, a, count);
-        let e = _mm512_set_epi64( 0, 0, 0, 0, 0, 0, 3, -4);
+        let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 3, -4);
         assert_eq_m512i(r, e);
     }
 
@@ -1286,7 +1704,7 @@ mod tests {
         let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -8);
         assert_eq_m512i(r, e);
     }
-    
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_srav_epi64() {
         let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
@@ -1307,7 +1725,7 @@ mod tests {
         assert_eq_m512i(r, _mm512_setzero_si512());
 
         let r = _mm512_maskz_srav_epi64(0b00001111, a, count);
-        let e = _mm512_set_epi64( 0, 0, 0, 0, 0, 0, 3, -8);
+        let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 3, -8);
         assert_eq_m512i(r, e);
     }
 
@@ -1326,7 +1744,7 @@ mod tests {
         assert_eq_m512i(r, a);
 
         let r = _mm512_mask_srai_epi64(a, 0b11111111, a, 2);
-        let e = _mm512_set_epi64( 0, -1, 3, 0, 0, 0, 0, -4);
+        let e = _mm512_set_epi64(0, -1, 3, 0, 0, 0, 0, -4);
         assert_eq_m512i(r, e);
     }
 
@@ -1343,118 +1761,145 @@ mod tests {
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_and_epi64() {
-        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
-        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+        let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_and_epi64(a, b);
-        let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_and_epi64() {
-        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
-        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+        let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_mask_and_epi64(a, 0, a, b);
-        let e = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let e = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         assert_eq_m512i(r, e);
 
         let r = _mm512_mask_and_epi64(a, 0b01111111, a, b);
-        let e = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let e = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_and_epi64() {
-        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
-        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+        let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_maskz_and_epi64(0, a, b);
         let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0);
         assert_eq_m512i(r, e);
 
         let r = _mm512_maskz_and_epi64(0b00001111, a, b);
-        let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_and_si512() {
-        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
-        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+        let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_and_epi64(a, b);
-        let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_or_epi64() {
-        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
-        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+        let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_or_epi64(a, b);
-        let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let e = _mm512_set_epi64(
+            1 << 0 | 1 << 13 | 1 << 15,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 2 | 1 << 3,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_or_epi64() {
-        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
-        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+        let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_mask_or_epi64(a, 0, a, b);
-        let e = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let e = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         assert_eq_m512i(r, e);
 
         let r = _mm512_mask_or_epi64(a, 0b11111111, a, b);
-        let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let e = _mm512_set_epi64(
+            1 << 0 | 1 << 13 | 1 << 15,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 2 | 1 << 3,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_or_epi64() {
-        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
-        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+        let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_maskz_or_epi64(0, a, b);
         let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0);
         assert_eq_m512i(r, e);
 
         let r = _mm512_maskz_or_epi64(0b00001111, a, b);
-        let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_or_si512() {
-        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
-        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+        let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_or_epi64(a, b);
-        let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let e = _mm512_set_epi64(
+            1 << 0 | 1 << 13 | 1 << 15,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1 << 1 | 1 << 2 | 1 << 3,
+        );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_xor_epi64() {
-        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
-        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+        let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_xor_epi64(a, b);
-        let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 0);
+        let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_xor_epi64() {
-        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
-        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+        let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_mask_xor_epi64(a, 0, a, b);
-        let e = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let e = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         assert_eq_m512i(r, e);
 
         let r = _mm512_mask_xor_epi64(a, 0b11111111, a, b);
-        let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 0);
+        let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_xor_epi64() {
-        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
-        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+        let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_maskz_xor_epi64(0, a, b);
         let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0);
         assert_eq_m512i(r, e);
@@ -1466,10 +1911,10 @@ mod tests {
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_xor_si512() {
-        let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
-        let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3);
+        let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+        let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_xor_epi64(a, b);
-        let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 0);
+        let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0);
         assert_eq_m512i(r, e);
     }
 }

From 601b615319ac3d1daad281a929e1bd75ba289da4 Mon Sep 17 00:00:00 2001
From: jironglin <jironglin@gmail.com>
Date: Thu, 20 Aug 2020 16:57:49 +0000
Subject: [PATCH 4/9] fix rustc_args_reqired_const and imm8

---
 crates/core_arch/src/x86/avx512f.rs | 260 ++++++++++++----------------
 1 file changed, 111 insertions(+), 149 deletions(-)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 3ec8ffffa0..19813ea0cb 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -898,8 +898,10 @@ pub unsafe fn _mm512_mask_i64scatter_epi32(
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprold))]
-pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: u8) -> __m512i {
-    transmute(vprold(a.as_i32x16(), imm8 as i8))
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: i32) -> __m512i {
+    assert!(imm8 >= 0 && imm8 <= 255);
+    transmute(vprold(a.as_i32x16(), imm8))
 }
 
 /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -908,8 +910,10 @@ pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: u8) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprold))]
-pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u8) -> __m512i {
-    let rol = _mm512_rol_epi32(a, imm8).as_i32x16();
+#[rustc_args_required_const(3)]
+pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
+    assert!(imm8 >= 0 && imm8 <= 255);
+    let rol = vprold(a.as_i32x16(), imm8);
     transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
 }
 
@@ -919,8 +923,10 @@ pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprold))]
-pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: u8) -> __m512i {
-    let rol = _mm512_rol_epi32(a, imm8).as_i32x16();
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
+    assert!(imm8 >= 0 && imm8 <= 255);
+    let rol = vprold(a.as_i32x16(), imm8);
     let zero = _mm512_setzero_si512().as_i32x16();
     transmute(simd_select_bitmask(k, rol, zero))
 }
@@ -931,9 +937,10 @@ pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: u8) -> __m5
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprord))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: u8) -> __m512i {
-    transmute(vprord(a.as_i32x16(), imm8 as i8))
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i {
+    assert!(imm8 >= 0 && imm8 <= 255);
+    transmute(vprord(a.as_i32x16(), imm8))
 }
 
 /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -942,9 +949,10 @@ pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: u8) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprord))]
-#[rustc_args_required_const(4)]
-pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u8) -> __m512i {
-    let ror = _mm512_ror_epi32(a, imm8).as_i32x16();
+#[rustc_args_required_const(3)]
+pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
+    assert!(imm8 >= 0 && imm8 <= 255);
+    let ror = vprord(a.as_i32x16(), imm8);
     transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
 }
 
@@ -954,9 +962,10 @@ pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprord))]
-#[rustc_args_required_const(3)]
-pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: u8) -> __m512i {
-    let ror = _mm512_ror_epi32(a, imm8).as_i32x16();
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
+    assert!(imm8 >= 0 && imm8 <= 255);
+    let ror = vprord(a.as_i32x16(), imm8);
     let zero = _mm512_setzero_si512().as_i32x16();
     transmute(simd_select_bitmask(k, ror, zero))
 }
@@ -967,9 +976,10 @@ pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: u8) -> __m5
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprolq))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: u8) -> __m512i {
-    transmute(vprolq(a.as_i64x8(), imm8 as i8))
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: i32) -> __m512i {
+    assert!(imm8 >= 0 && imm8 <= 255);
+    transmute(vprolq(a.as_i64x8(), imm8))
 }
 
 /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -978,9 +988,10 @@ pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: u8) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprolq))]
-#[rustc_args_required_const(4)]
-pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u8) -> __m512i {
-    let rol = _mm512_rol_epi64(a, imm8).as_i64x8();
+#[rustc_args_required_const(3)]
+pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
+    assert!(imm8 >= 0 && imm8 <= 255);
+    let rol = vprolq(a.as_i64x8(), imm8);
     transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
 }
 
@@ -990,9 +1001,10 @@ pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8:
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprolq))]
-#[rustc_args_required_const(3)]
-pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m512i {
-    let rol = _mm512_rol_epi64(a, imm8).as_i64x8();
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
+    assert!(imm8 >= 0 && imm8 <= 255);
+    let rol = vprolq(a.as_i64x8(), imm8);
     let zero = _mm512_setzero_si512().as_i64x8();
     transmute(simd_select_bitmask(k, rol, zero))
 }
@@ -1003,9 +1015,10 @@ pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m51
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprorq))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: u8) -> __m512i {
-    transmute(vprorq(a.as_i64x8(), imm8 as i8))
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i {
+    assert!(imm8 >= 0 && imm8 <= 255);
+    transmute(vprorq(a.as_i64x8(), imm8))
 }
 
 /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1014,9 +1027,10 @@ pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: u8) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprorq))]
-#[rustc_args_required_const(4)]
-pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u8) -> __m512i {
-    let ror = _mm512_ror_epi64(a, imm8).as_i64x8();
+#[rustc_args_required_const(3)]
+pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
+    assert!(imm8 >= 0 && imm8 <= 255);
+    let ror = vprorq(a.as_i64x8(), imm8);
     transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
 }
 
@@ -1026,9 +1040,10 @@ pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8:
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprorq))]
-#[rustc_args_required_const(3)]
-pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m512i {
-    let ror = _mm512_ror_epi64(a, imm8).as_i64x8();
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
+    assert!(imm8 >= 0 && imm8 <= 255);
+    let ror = vprorq(a.as_i64x8(), imm8);
     let zero = _mm512_setzero_si512().as_i64x8();
     transmute(simd_select_bitmask(k, ror, zero))
 }
@@ -1039,8 +1054,9 @@ pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m51
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllid))]
-#[rustc_args_required_const(2)]
+#[rustc_args_required_const(1)]
 pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i {
+    assert!(imm8 <= 255);
     transmute(vpsllid(a.as_i32x16(), imm8))
 }
 
@@ -1050,9 +1066,10 @@ pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllid))]
-#[rustc_args_required_const(4)]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
-    let shf = _mm512_slli_epi32(a, imm8).as_i32x16();
+    assert!(imm8 <= 255);
+    let shf = vpsllid(a.as_i32x16(), imm8);
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
@@ -1062,9 +1079,10 @@ pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllid))]
-#[rustc_args_required_const(3)]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
-    let shf = _mm512_slli_epi32(a, imm8).as_i32x16();
+    assert!(imm8 <= 255);
+    let shf = vpsllid(a.as_i32x16(), imm8);
     let zero = _mm512_setzero_si512().as_i32x16();
     transmute(simd_select_bitmask(k, shf, zero))
 }
@@ -1075,8 +1093,9 @@ pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlid))]
-#[rustc_args_required_const(2)]
+#[rustc_args_required_const(1)]
 pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i {
+    assert!(imm8 <= 255);
     transmute(vpsrlid(a.as_i32x16(), imm8))
 }
 
@@ -1086,9 +1105,10 @@ pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlid))]
-#[rustc_args_required_const(4)]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
-    let shf = _mm512_srli_epi32(a, imm8).as_i32x16();
+    assert!(imm8 <= 255);
+    let shf = vpsrlid(a.as_i32x16(), imm8);
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
@@ -1098,21 +1118,23 @@ pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlid))]
-#[rustc_args_required_const(3)]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
-    let shf = _mm512_srli_epi32(a, imm8).as_i32x16();
+    assert!(imm8 <= 255);
+    let shf = vpsrlid(a.as_i32x16(), imm8);
     let zero = _mm512_setzero_si512().as_i32x16();
     transmute(simd_select_bitmask(k, shf, zero))
 }
 
-/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.  
+/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi64&expand=5319)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpslliq))]
-#[rustc_args_required_const(2)]
+#[rustc_args_required_const(1)]
 pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i {
+    assert!(imm8 <= 255);
     transmute(vpslliq(a.as_i64x8(), imm8))
 }
 
@@ -1122,9 +1144,10 @@ pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpslliq))]
-#[rustc_args_required_const(4)]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
-    let shf = _mm512_slli_epi64(a, imm8).as_i64x8();
+    assert!(imm8 <= 255);
+    let shf = vpslliq(a.as_i64x8(), imm8);
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
 
@@ -1134,9 +1157,10 @@ pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpslliq))]
-#[rustc_args_required_const(3)]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
-    let shf = _mm512_slli_epi64(a, imm8).as_i64x8();
+    assert!(imm8 <= 255);
+    let shf = vpslliq(a.as_i64x8(), imm8);
     let zero = _mm512_setzero_si512().as_i64x8();
     transmute(simd_select_bitmask(k, shf, zero))
 }
@@ -1147,8 +1171,9 @@ pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrliq))]
-#[rustc_args_required_const(2)]
+#[rustc_args_required_const(1)]
 pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i {
+    assert!(imm8 <= 255);
     transmute(vpsrliq(a.as_i64x8(), imm8))
 }
 
@@ -1158,21 +1183,23 @@ pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrliq))]
-#[rustc_args_required_const(4)]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
-    let shf = _mm512_srli_epi64(a, imm8).as_i64x8();
+    assert!(imm8 <= 255);
+    let shf = vpsrliq(a.as_i64x8(), imm8);
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
 
-/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).   
+/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi64&expand=5530)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrliq))]
-#[rustc_args_required_const(3)]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
-    let shf = _mm512_srli_epi64(a, imm8).as_i64x8();
+    assert!(imm8 <= 255);
+    let shf = vpsrliq(a.as_i64x8(), imm8);
     let zero = _mm512_setzero_si512().as_i64x8();
     transmute(simd_select_bitmask(k, shf, zero))
 }
@@ -1183,7 +1210,6 @@ pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpslld))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
     transmute(vpslld(a.as_i32x16(), count.as_i32x4()))
 }
@@ -1194,7 +1220,6 @@ pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpslld))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_sll_epi32(
     src: __m512i,
     k: __mmask16,
@@ -1205,13 +1230,12 @@ pub unsafe fn _mm512_mask_sll_epi32(
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
-/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).  
+/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi32&expand=5279)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpslld))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
     let shf = _mm512_sll_epi32(a, count).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1224,7 +1248,6 @@ pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrld))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
     transmute(vpsrld(a.as_i32x16(), count.as_i32x4()))
 }
@@ -1235,7 +1258,6 @@ pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrld))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_srl_epi32(
     src: __m512i,
     k: __mmask16,
@@ -1246,13 +1268,12 @@ pub unsafe fn _mm512_mask_srl_epi32(
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
-/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).  
+/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srl_epi32&expand=5491)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrld))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
     let shf = _mm512_srl_epi32(a, count).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1265,7 +1286,6 @@ pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllq))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
     transmute(vpsllq(a.as_i64x8(), count.as_i64x2()))
 }
@@ -1276,7 +1296,6 @@ pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllq))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_sll_epi64(
     src: __m512i,
     k: __mmask8,
@@ -1293,7 +1312,6 @@ pub unsafe fn _mm512_mask_sll_epi64(
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllq))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
     let shf = _mm512_sll_epi64(a, count).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1306,7 +1324,6 @@ pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) ->
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlq))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
     transmute(vpsrlq(a.as_i64x8(), count.as_i64x2()))
 }
@@ -1317,7 +1334,6 @@ pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlq))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_srl_epi64(
     src: __m512i,
     k: __mmask8,
@@ -1328,13 +1344,12 @@ pub unsafe fn _mm512_mask_srl_epi64(
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
 
-/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).   
+/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlq))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
     let shf = _mm512_srl_epi64(a, count).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1347,7 +1362,6 @@ pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) ->
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrad))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
     transmute(vpsrad(a.as_i32x16(), count.as_i32x4()))
 }
@@ -1358,7 +1372,6 @@ pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrad))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_sra_epi32(
     src: __m512i,
     k: __mmask16,
@@ -1369,13 +1382,12 @@ pub unsafe fn _mm512_mask_sra_epi32(
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
-/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).   
+/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi32&expand=5406)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrad))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
     let shf = _mm512_sra_epi32(a, count).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1388,7 +1400,6 @@ pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsraq))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
     transmute(vpsraq(a.as_i64x8(), count.as_i64x2()))
 }
@@ -1399,7 +1410,6 @@ pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsraq))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_sra_epi64(
     src: __m512i,
     k: __mmask8,
@@ -1416,7 +1426,6 @@ pub unsafe fn _mm512_mask_sra_epi64(
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsraq))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
     let shf = _mm512_sra_epi64(a, count).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1429,8 +1438,9 @@ pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) ->
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsraid))]
-#[rustc_args_required_const(2)]
+#[rustc_args_required_const(1)]
 pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i {
+    assert!(imm8 <= 255);
     transmute(vpsraid(a.as_i32x16(), imm8))
 }
 
@@ -1440,9 +1450,11 @@ pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsraid))]
-#[rustc_args_required_const(4)]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
-    let shf = _mm512_srai_epi32(a, imm8).as_i32x16();
+    assert!(imm8 <= 255);
+    let shf = vpsraid(a.as_i32x16(), imm8);
+    //let shf = _mm512_srai_epi32(a, imm8).as_i32x16();
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
@@ -1452,9 +1464,11 @@ pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsraid))]
-#[rustc_args_required_const(3)]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
-    let shf = _mm512_srai_epi32(a, imm8).as_i32x16();
+    assert!(imm8 <= 255);
+    let shf = vpsraid(a.as_i32x16(), imm8);
+    //let shf = _mm512_srai_epi32(a, imm8).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
     transmute(simd_select_bitmask(k, shf, zero))
 }
@@ -1465,8 +1479,9 @@ pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsraiq))]
-#[rustc_args_required_const(2)]
+#[rustc_args_required_const(1)]
 pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i {
+    assert!(imm8 <= 255);
     transmute(vpsraiq(a.as_i64x8(), imm8))
 }
 
@@ -1476,9 +1491,11 @@ pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsraiq))]
-#[rustc_args_required_const(4)]
+#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
-    let shf = _mm512_srai_epi64(a, imm8).as_i64x8();
+    assert!(imm8 <= 255);
+    let shf = vpsraiq(a.as_i64x8(), imm8);
+    //let shf = _mm512_srai_epi64(a, imm8).as_i64x8();
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
 
@@ -1488,9 +1505,11 @@ pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsraiq))]
-#[rustc_args_required_const(3)]
+#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
-    let shf = _mm512_srai_epi64(a, imm8).as_i64x8();
+    assert!(imm8 <= 255);
+    let shf = vpsraiq(a.as_i64x8(), imm8);
+    //let shf = _mm512_srai_epi64(a, imm8).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
     transmute(simd_select_bitmask(k, shf, zero))
 }
@@ -1501,7 +1520,6 @@ pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsravd))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
     transmute(vpsravd(a.as_i32x16(), count.as_i32x16()))
 }
@@ -1512,7 +1530,6 @@ pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsravd))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_srav_epi32(
     src: __m512i,
     k: __mmask16,
@@ -1523,13 +1540,12 @@ pub unsafe fn _mm512_mask_srav_epi32(
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
-/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).   
+/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srav_epi32&expand=5464)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsravd))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
     let shf = _mm512_srav_epi32(a, count).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1542,7 +1558,6 @@ pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsravq))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
     transmute(vpsravq(a.as_i64x8(), count.as_i64x8()))
 }
@@ -1553,7 +1568,6 @@ pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsravq))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_srav_epi64(
     src: __m512i,
     k: __mmask8,
@@ -1570,7 +1584,6 @@ pub unsafe fn _mm512_mask_srav_epi64(
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsravq))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
     let shf = _mm512_srav_epi64(a, count).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1583,7 +1596,6 @@ pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprolvd))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
     transmute(vprolvd(a.as_i32x16(), b.as_i32x16()))
 }
@@ -1594,7 +1606,6 @@ pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprolvd))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_rolv_epi32(
     src: __m512i,
     k: __mmask16,
@@ -1611,7 +1622,6 @@ pub unsafe fn _mm512_mask_rolv_epi32(
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprolvd))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     let rol = _mm512_rolv_epi32(a, b).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1624,7 +1634,6 @@ pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> _
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprorvd))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
     transmute(vprorvd(a.as_i32x16(), b.as_i32x16()))
 }
@@ -1635,7 +1644,6 @@ pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprorvd))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_rorv_epi32(
     src: __m512i,
     k: __mmask16,
@@ -1652,7 +1660,6 @@ pub unsafe fn _mm512_mask_rorv_epi32(
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprorvd))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     let ror = _mm512_rorv_epi32(a, b).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1665,7 +1672,6 @@ pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> _
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprolvq))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
     transmute(vprolvq(a.as_i64x8(), b.as_i64x8()))
 }
@@ -1676,7 +1682,6 @@ pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprolvq))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     let rol = _mm512_rolv_epi64(a, b).as_i64x8();
     transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
@@ -1688,7 +1693,6 @@ pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: _
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprolvq))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     let rol = _mm512_rolv_epi64(a, b).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1701,7 +1705,6 @@ pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprorvq))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
     transmute(vprorvq(a.as_i64x8(), b.as_i64x8()))
 }
@@ -1712,7 +1715,6 @@ pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprorvq))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     let ror = _mm512_rorv_epi64(a, b).as_i64x8();
     transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
@@ -1724,7 +1726,6 @@ pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: _
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vprorvq))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     let ror = _mm512_rorv_epi64(a, b).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1737,7 +1738,6 @@ pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllvd))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
     transmute(vpsllvd(a.as_i32x16(), count.as_i32x16()))
 }
@@ -1748,7 +1748,6 @@ pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllvd))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_sllv_epi32(
     src: __m512i,
     k: __mmask16,
@@ -1765,7 +1764,6 @@ pub unsafe fn _mm512_mask_sllv_epi32(
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllvd))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
     let shf = _mm512_sllv_epi32(a, count).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1778,7 +1776,6 @@ pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlvd))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
     transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16()))
 }
@@ -1789,7 +1786,6 @@ pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlvd))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_srlv_epi32(
     src: __m512i,
     k: __mmask16,
@@ -1806,7 +1802,6 @@ pub unsafe fn _mm512_mask_srlv_epi32(
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlvd))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
     let shf = _mm512_srlv_epi32(a, count).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1819,7 +1814,6 @@ pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllvq))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
     transmute(vpsllvq(a.as_i64x8(), count.as_i64x8()))
 }
@@ -1830,7 +1824,6 @@ pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllvq))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_sllv_epi64(
     src: __m512i,
     k: __mmask8,
@@ -1847,7 +1840,6 @@ pub unsafe fn _mm512_mask_sllv_epi64(
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsllvq))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
     let shf = _mm512_sllv_epi64(a, count).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1860,7 +1852,6 @@ pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlvq))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
     transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8()))
 }
@@ -1871,7 +1862,6 @@ pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlvq))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_srlv_epi64(
     src: __m512i,
     k: __mmask8,
@@ -1888,7 +1878,6 @@ pub unsafe fn _mm512_mask_srlv_epi64(
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsrlvq))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
     let shf = _mm512_srlv_epi64(a, count).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1901,7 +1890,6 @@ pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpandd))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
 }
@@ -1912,7 +1900,6 @@ pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpandd))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     let and = _mm512_and_epi32(a, b).as_i32x16();
     transmute(simd_select_bitmask(k, and, src.as_i32x16()))
@@ -1924,7 +1911,6 @@ pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpandd))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     let and = _mm512_and_epi32(a, b).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -1937,7 +1923,6 @@ pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpandq))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_and(a.as_i64x8(), b.as_i64x8()))
 }
@@ -1948,7 +1933,6 @@ pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpandq))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     let and = _mm512_and_epi64(a, b).as_i64x8();
     transmute(simd_select_bitmask(k, and, src.as_i64x8()))
@@ -1960,7 +1944,6 @@ pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpandq))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     let and = _mm512_and_epi64(a, b).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -1973,7 +1956,6 @@ pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpandd))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
 }
@@ -1984,7 +1966,6 @@ pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpord))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
 }
@@ -1995,7 +1976,6 @@ pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpord))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     let or = _mm512_or_epi32(a, b).as_i32x16();
     transmute(simd_select_bitmask(k, or, src.as_i32x16()))
@@ -2007,7 +1987,6 @@ pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpord))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     let or = _mm512_or_epi32(a, b).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -2020,7 +1999,6 @@ pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vporq))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_or(a.as_i64x8(), b.as_i64x8()))
 }
@@ -2031,7 +2009,6 @@ pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vporq))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     let or = _mm512_or_epi64(a, b).as_i64x8();
     transmute(simd_select_bitmask(k, or, src.as_i64x8()))
@@ -2043,7 +2020,6 @@ pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vporq))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     let or = _mm512_or_epi64(a, b).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -2056,7 +2032,6 @@ pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m5
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpord))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
 }
@@ -2067,7 +2042,6 @@ pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpxord))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_xor(a.as_i32x16(), b.as_i32x16()))
 }
@@ -2078,7 +2052,6 @@ pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpxord))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     let xor = _mm512_xor_epi32(a, b).as_i32x16();
     transmute(simd_select_bitmask(k, xor, src.as_i32x16()))
@@ -2090,7 +2063,6 @@ pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpxord))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     let xor = _mm512_xor_epi32(a, b).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
@@ -2103,7 +2075,6 @@ pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpxorq))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_xor(a.as_i64x8(), b.as_i64x8()))
 }
@@ -2114,7 +2085,6 @@ pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpxorq))]
-#[rustc_args_required_const(4)]
 pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     let xor = _mm512_xor_epi64(a, b).as_i64x8();
     transmute(simd_select_bitmask(k, xor, src.as_i64x8()))
@@ -2126,7 +2096,6 @@ pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpxorq))]
-#[rustc_args_required_const(3)]
 pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     let xor = _mm512_xor_epi64(a, b).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
@@ -2139,7 +2108,6 @@ pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpxord))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_xor(a.as_i32x16(), b.as_i32x16()))
 }
@@ -2150,7 +2118,6 @@ pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(kandw))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
     kandw(a, b)
 }
@@ -2161,7 +2128,6 @@ pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(kandw))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
     kandw(a, b)
 }
@@ -2172,7 +2138,6 @@ pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(korw))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
     korw(a, b)
 }
@@ -2183,7 +2148,6 @@ pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(korw))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
     korw(a, b)
 }
@@ -2194,7 +2158,6 @@ pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(kxorw))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
     kxorw(a, b)
 }
@@ -2205,7 +2168,6 @@ pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(kxorw))]
-#[rustc_args_required_const(2)]
 pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
     kxorw(a, b)
 }
@@ -3832,13 +3794,13 @@ extern "C" {
     fn vpcmpd(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
 
     #[link_name = "llvm.x86.avx512.mask.prol.d.512"]
-    fn vprold(a: i32x16, shift: i8) -> i32x16;
+    fn vprold(a: i32x16, i8: i32) -> i32x16;
     #[link_name = "llvm.x86.avx512.mask.pror.d.512"]
-    fn vprord(a: i32x16, shift: i8) -> i32x16;
+    fn vprord(a: i32x16, i8: i32) -> i32x16;
     #[link_name = "llvm.x86.avx512.mask.prol.q.512"]
-    fn vprolq(a: i64x8, shift: i8) -> i64x8;
+    fn vprolq(a: i64x8, i8: i32) -> i64x8;
     #[link_name = "llvm.x86.avx512.mask.pror.q.512"]
-    fn vprorq(a: i64x8, shift: i8) -> i64x8;
+    fn vprorq(a: i64x8, i8: i32) -> i64x8;
 
     #[link_name = "llvm.x86.avx512.mask.prolv.d.512"]
     fn vprolvd(a: i32x16, b: i32x16) -> i32x16;

From 16b931ca86df019a87a706a779a06adb20cfda1a Mon Sep 17 00:00:00 2001
From: jironglin <jironglin@gmail.com>
Date: Thu, 20 Aug 2020 17:29:08 +0000
Subject: [PATCH 5/9] fix assert_instr

---
 crates/core_arch/src/x86/avx512f.rs | 58 ++++++++++++++---------------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 19813ea0cb..fd973d8e0d 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -897,7 +897,7 @@ pub unsafe fn _mm512_mask_i64scatter_epi32(
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi32&expand=4685)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vprold))]
+#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: i32) -> __m512i {
     assert!(imm8 >= 0 && imm8 <= 255);
@@ -909,7 +909,7 @@ pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: i32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi32&expand=4683)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vprold))]
+#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
 #[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
     assert!(imm8 >= 0 && imm8 <= 255);
@@ -922,7 +922,7 @@ pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi32&expand=4684)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vprold))]
+#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
 #[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
     assert!(imm8 >= 0 && imm8 <= 255);
@@ -936,7 +936,7 @@ pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi32&expand=4721)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vprord))]
+#[cfg_attr(test, assert_instr(vprord, imm8 = 1))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i {
     assert!(imm8 >= 0 && imm8 <= 255);
@@ -948,7 +948,7 @@ pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi32&expand=4719)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vprord))]
+#[cfg_attr(test, assert_instr(vprord, imm8 = 1))]
 #[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
     assert!(imm8 >= 0 && imm8 <= 255);
@@ -961,7 +961,7 @@ pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi32&expand=4720)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vprord))]
+#[cfg_attr(test, assert_instr(vprord, imm8 = 1))]
 #[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
     assert!(imm8 >= 0 && imm8 <= 255);
@@ -975,7 +975,7 @@ pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi64&expand=4694)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vprolq))]
+#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: i32) -> __m512i {
     assert!(imm8 >= 0 && imm8 <= 255);
@@ -987,7 +987,7 @@ pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: i32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi64&expand=4692)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vprolq))]
+#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
 #[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
     assert!(imm8 >= 0 && imm8 <= 255);
@@ -1000,7 +1000,7 @@ pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8:
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi64&expand=4693)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vprolq))]
+#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
 #[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
     assert!(imm8 >= 0 && imm8 <= 255);
@@ -1014,7 +1014,7 @@ pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m5
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi64&expand=4730)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vprorq))]
+#[cfg_attr(test, assert_instr(vprorq, imm8 = 1))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i {
     assert!(imm8 >= 0 && imm8 <= 255);
@@ -1026,7 +1026,7 @@ pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi64&expand=4728)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vprorq))]
+#[cfg_attr(test, assert_instr(vprorq, imm8 = 1))]
 #[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
     assert!(imm8 >= 0 && imm8 <= 255);
@@ -1039,7 +1039,7 @@ pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8:
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi64&expand=4729)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vprorq))]
+#[cfg_attr(test, assert_instr(vprorq, imm8 = 1))]
 #[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
     assert!(imm8 >= 0 && imm8 <= 255);
@@ -1053,7 +1053,7 @@ pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m5
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi32&expand=5310)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsllid))]
+#[cfg_attr(test, assert_instr(vpsllid, imm8 = 1))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1065,7 +1065,7 @@ pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi32&expand=5308)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsllid))]
+#[cfg_attr(test, assert_instr(vpsllid, imm8 = 1))]
 #[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1078,7 +1078,7 @@ pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi32&expand=5309)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsllid))]
+#[cfg_attr(test, assert_instr(vpsllid, imm8 = 1))]
 #[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1092,7 +1092,7 @@ pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi32&expand=5522)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsrlid))]
+#[cfg_attr(test, assert_instr(vpsrlid, imm8 = 1))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1104,7 +1104,7 @@ pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi32&expand=5520)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsrlid))]
+#[cfg_attr(test, assert_instr(vpsrlid, imm8 = 1))]
 #[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1117,7 +1117,7 @@ pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi32&expand=5521)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsrlid))]
+#[cfg_attr(test, assert_instr(vpsrlid, imm8 = 1))]
 #[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1131,7 +1131,7 @@ pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi64&expand=5319)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpslliq))]
+#[cfg_attr(test, assert_instr(vpslliq, imm8 = 1))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1143,7 +1143,7 @@ pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi64&expand=5317)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpslliq))]
+#[cfg_attr(test, assert_instr(vpslliq, imm8 = 1))]
 #[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1156,7 +1156,7 @@ pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi64&expand=5318)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpslliq))]
+#[cfg_attr(test, assert_instr(vpslliq, imm8 = 1))]
 #[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1170,7 +1170,7 @@ pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi64&expand=5531)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsrliq))]
+#[cfg_attr(test, assert_instr(vpsrliq, imm8 = 1))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1182,7 +1182,7 @@ pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi64&expand=5529)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsrliq))]
+#[cfg_attr(test, assert_instr(vpsrliq, imm8 = 1))]
 #[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1195,7 +1195,7 @@ pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi64&expand=5530)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsrliq))]
+#[cfg_attr(test, assert_instr(vpsrliq, imm8 = 1))]
 #[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1449,7 +1449,7 @@ pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi32&expand=5434)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsraid))]
+#[cfg_attr(test, assert_instr(vpsraid, imm8 = 1))]
 #[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1463,7 +1463,7 @@ pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi32&expand=5435)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsraid))]
+#[cfg_attr(test, assert_instr(vpsraid, imm8 = 1))]
 #[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1478,7 +1478,7 @@ pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi64&expand=5445)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsraiq))]
+#[cfg_attr(test, assert_instr(vpsraiq, imm8 = 1))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1490,7 +1490,7 @@ pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi64&expand=5443)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsraiq))]
+#[cfg_attr(test, assert_instr(vpsraiq, imm8 = 1))]
 #[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1504,7 +1504,7 @@ pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi64&expand=5444)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsraiq))]
+#[cfg_attr(test, assert_instr(vpsraiq, imm8 = 1))]
 #[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);

From ae9e8bf70bbd972442029e1cec5d4b03fee1b2bc Mon Sep 17 00:00:00 2001
From: jironglin <jironglin@gmail.com>
Date: Thu, 20 Aug 2020 17:37:01 +0000
Subject: [PATCH 6/9] fix assert_instr 2

---
 crates/core_arch/src/x86/avx512f.rs | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index fd973d8e0d..89f5db10a6 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -1437,7 +1437,7 @@ pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) ->
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi32&expand=5436)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsraid))]
+#[cfg_attr(test, assert_instr(vpsraid, imm8 = 1))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1454,7 +1454,6 @@ pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i {
 pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
     let shf = vpsraid(a.as_i32x16(), imm8);
-    //let shf = _mm512_srai_epi32(a, imm8).as_i32x16();
     transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
 }
 
@@ -1468,7 +1467,6 @@ pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm
 pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
     let shf = vpsraid(a.as_i32x16(), imm8);
-    //let shf = _mm512_srai_epi32(a, imm8).as_i32x16();
     let zero = _mm512_setzero_si512().as_i32x16();
     transmute(simd_select_bitmask(k, shf, zero))
 }
@@ -1495,7 +1493,6 @@ pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i {
 pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
     let shf = vpsraiq(a.as_i64x8(), imm8);
-    //let shf = _mm512_srai_epi64(a, imm8).as_i64x8();
     transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
 }
 
@@ -1509,7 +1506,6 @@ pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8
 pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
     let shf = vpsraiq(a.as_i64x8(), imm8);
-    //let shf = _mm512_srai_epi64(a, imm8).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
     transmute(simd_select_bitmask(k, shf, zero))
 }

From a9193d66ccae50fe6835ebe47062d892126a748e Mon Sep 17 00:00:00 2001
From: jironglin <jironglin@gmail.com>
Date: Thu, 20 Aug 2020 18:04:20 +0000
Subject: [PATCH 7/9] fix assert_instr 3

---
 crates/core_arch/src/x86/avx512f.rs | 36 ++++++++++++++---------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 89f5db10a6..0641dbf615 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -1053,7 +1053,7 @@ pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m5
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi32&expand=5310)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsllid, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpslld, imm8 = 1))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1065,7 +1065,7 @@ pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi32&expand=5308)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsllid, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpslld, imm8 = 1))]
 #[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1078,7 +1078,7 @@ pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi32&expand=5309)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsllid, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpslld, imm8 = 1))]
 #[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1092,7 +1092,7 @@ pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi32&expand=5522)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsrlid, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1104,7 +1104,7 @@ pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi32&expand=5520)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsrlid, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
 #[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1117,7 +1117,7 @@ pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi32&expand=5521)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsrlid, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
 #[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1131,7 +1131,7 @@ pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi64&expand=5319)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpslliq, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpsllq, imm8 = 1))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1143,7 +1143,7 @@ pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi64&expand=5317)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpslliq, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpsllq, imm8 = 1))]
 #[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1156,7 +1156,7 @@ pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi64&expand=5318)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpslliq, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpsllq, imm8 = 1))]
 #[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1170,7 +1170,7 @@ pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi64&expand=5531)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsrliq, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1182,7 +1182,7 @@ pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi64&expand=5529)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsrliq, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
 #[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1195,7 +1195,7 @@ pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi64&expand=5530)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsrliq, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
 #[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1437,7 +1437,7 @@ pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) ->
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi32&expand=5436)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsraid, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1449,7 +1449,7 @@ pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi32&expand=5434)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsraid, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
 #[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1462,7 +1462,7 @@ pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi32&expand=5435)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsraid, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
 #[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1476,7 +1476,7 @@ pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi64&expand=5445)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsraiq, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1488,7 +1488,7 @@ pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi64&expand=5443)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsraiq, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
 #[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1501,7 +1501,7 @@ pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi64&expand=5444)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsraiq, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
 #[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);

From 0cc213995e111fb35f253b49e07298f0361502de Mon Sep 17 00:00:00 2001
From: jironglin <jironglin@gmail.com>
Date: Thu, 20 Aug 2020 20:24:36 +0000
Subject: [PATCH 8/9] change llvm.x86.avx512.mask.pror.d to
 llvm.x86.avx512.pror.d

---
 crates/core_arch/src/x86/avx512f.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 0641dbf615..977a177595 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -3791,7 +3791,7 @@ extern "C" {
 
     #[link_name = "llvm.x86.avx512.mask.prol.d.512"]
     fn vprold(a: i32x16, i8: i32) -> i32x16;
-    #[link_name = "llvm.x86.avx512.mask.pror.d.512"]
+    #[link_name = "llvm.x86.avx512.pror.d.512"]
     fn vprord(a: i32x16, i8: i32) -> i32x16;
     #[link_name = "llvm.x86.avx512.mask.prol.q.512"]
     fn vprolq(a: i64x8, i8: i32) -> i64x8;

From e700382dcafc82d5b6488ae37de46f6a9a61479a Mon Sep 17 00:00:00 2001
From: jironglin <jironglin@gmail.com>
Date: Mon, 24 Aug 2020 18:38:29 +0000
Subject: [PATCH 9/9] fix to pass CI

---
 crates/core_arch/src/x86/avx512f.rs | 62 ++++++++++++++---------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 977a177595..2160744a5c 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -936,7 +936,7 @@ pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi32&expand=4721)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vprord, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vprold, imm8 = 233))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i {
     assert!(imm8 >= 0 && imm8 <= 255);
@@ -948,7 +948,7 @@ pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi32&expand=4719)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vprord, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vprold, imm8 = 123))]
 #[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
     assert!(imm8 >= 0 && imm8 <= 255);
@@ -961,7 +961,7 @@ pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi32&expand=4720)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vprord, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vprold, imm8 = 123))]
 #[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
     assert!(imm8 >= 0 && imm8 <= 255);
@@ -1014,7 +1014,7 @@ pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m5
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi64&expand=4730)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vprorq, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i {
     assert!(imm8 >= 0 && imm8 <= 255);
@@ -1026,7 +1026,7 @@ pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi64&expand=4728)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vprorq, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
 #[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
     assert!(imm8 >= 0 && imm8 <= 255);
@@ -1039,7 +1039,7 @@ pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8:
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi64&expand=4729)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vprorq, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
 #[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
     assert!(imm8 >= 0 && imm8 <= 255);
@@ -1053,7 +1053,7 @@ pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m5
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi32&expand=5310)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpslld, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1065,7 +1065,7 @@ pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi32&expand=5308)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpslld, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
 #[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1078,7 +1078,7 @@ pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi32&expand=5309)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpslld, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
 #[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1131,7 +1131,7 @@ pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi64&expand=5319)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsllq, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1143,7 +1143,7 @@ pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi64&expand=5317)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsllq, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
 #[rustc_args_required_const(3)]
 pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1156,7 +1156,7 @@ pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi64&expand=5318)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpsllq, imm8 = 1))]
+#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
 #[rustc_args_required_const(2)]
 pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
     assert!(imm8 <= 255);
@@ -1885,7 +1885,7 @@ pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi32&expand=272)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpandd))]
+#[cfg_attr(test, assert_instr(vpandq))]
 pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
 }
@@ -1951,7 +1951,7 @@ pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_si512&expand=302)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpandd))]
+#[cfg_attr(test, assert_instr(vpandq))]
 pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
 }
@@ -1961,7 +1961,7 @@ pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi32&expand=4042)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpord))]
+#[cfg_attr(test, assert_instr(vporq))]
 pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
 }
@@ -2027,7 +2027,7 @@ pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m5
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_si512&expand=4072)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpord))]
+#[cfg_attr(test, assert_instr(vporq))]
 pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
 }
@@ -2037,7 +2037,7 @@ pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi32&expand=6142)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpxord))]
+#[cfg_attr(test, assert_instr(vpxorq))]
 pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_xor(a.as_i32x16(), b.as_i32x16()))
 }
@@ -2103,7 +2103,7 @@ pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_si512&expand=6172)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpxord))]
+#[cfg_attr(test, assert_instr(vpxorq))]
 pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
     transmute(simd_xor(a.as_i32x16(), b.as_i32x16()))
 }
@@ -2113,9 +2113,9 @@ pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kand_mask16&expand=3212)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(kandw))]
+#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
 pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
-    kandw(a, b)
+    transmute(kandw(a, b))
 }
 
 /// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
@@ -2123,9 +2123,9 @@ pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kand&expand=3210)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(kandw))]
+#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
 pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
-    kandw(a, b)
+    transmute(kandw(a, b))
 }
 
 /// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
@@ -2133,9 +2133,9 @@ pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kor_mask16&expand=3239)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(korw))]
+#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
 pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
-    korw(a, b)
+    transmute(korw(a, b))
 }
 
 /// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
@@ -2143,9 +2143,9 @@ pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kor&expand=3237)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(korw))]
+#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
 pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
-    korw(a, b)
+    transmute(korw(a, b))
 }
 
 /// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
@@ -2153,9 +2153,9 @@ pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kxor_mask16&expand=3291)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(kxorw))]
+#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
 pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
-    kxorw(a, b)
+    transmute(kxorw(a, b))
 }
 
 /// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
@@ -2163,9 +2163,9 @@ pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kxor&expand=3289)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(kxorw))]
+#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
 pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
-    kxorw(a, b)
+    transmute(kxorw(a, b))
 }
 
 /// Sets packed 32-bit integers in `dst` with the supplied values.
@@ -3791,7 +3791,7 @@ extern "C" {
 
     #[link_name = "llvm.x86.avx512.mask.prol.d.512"]
     fn vprold(a: i32x16, i8: i32) -> i32x16;
-    #[link_name = "llvm.x86.avx512.pror.d.512"]
+    #[link_name = "llvm.x86.avx512.mask.pror.d.512"]
     fn vprord(a: i32x16, i8: i32) -> i32x16;
     #[link_name = "llvm.x86.avx512.mask.prol.q.512"]
     fn vprolq(a: i64x8, i8: i32) -> i64x8;