diff --git a/crates/core_arch/avx512f.md b/crates/core_arch/avx512f.md
new file mode 100644
index 0000000000..567fd0e7ce
--- /dev/null
+++ b/crates/core_arch/avx512f.md
@@ -0,0 +1,107 @@
+["AVX512F"]
+ * [x] [`_mm512_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_epi32&expand=5236)
+ * [x] [`_mm512_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_epi64&expand=5236)
+ * [x] [`_mm512_and_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_si512&expand=5236)
+ * [x] [`_mm512_kand`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kand&expand=5236)
+ * [x] [`_mm512_kor`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kor&expand=5236)
+ * [x] [`_mm512_kxor`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kxor&expand=5236)
+ * [x] [`_kand_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kand_mask16&expand=3212)
+ * [x] [`_kor_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kor_mask16&expand=3239)
+ * [x] [`_kxor_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kxor_mask16&expand=3291)
+ * [x] [`_mm512_mask_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_and_epi32&expand=5236)
+ * [x] [`_mm512_mask_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_and_epi64&expand=5236)
+ * [x] [`_mm512_mask_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_or_epi32&expand=5236)
+ * [x] [`_mm512_mask_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_or_epi64&expand=5236)
+ * [x] [`_mm512_mask_rol_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rol_epi32&expand=5236)
+ * [x] [`_mm512_mask_rol_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rol_epi64&expand=5236)
+ * [x] [`_mm512_mask_rolv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rolv_epi32&expand=5236)
+ * [x] [`_mm512_mask_rolv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rolv_epi64&expand=5236)
+ * [x] [`_mm512_mask_ror_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ror_epi32&expand=5236)
+ * [x] [`_mm512_mask_ror_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ror_epi64&expand=5236)
+ * [x] [`_mm512_mask_rorv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rorv_epi32&expand=5236)
+ * [x] [`_mm512_mask_rorv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rorv_epi64&expand=5236)
+ * [x] [`_mm512_mask_sll_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sll_epi32&expand=5236)
+ * [x] [`_mm512_mask_sll_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sll_epi64&expand=5236)
+ * [x] [`_mm512_mask_slli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_slli_epi32&expand=5236)
+ * [x] [`_mm512_mask_slli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_slli_epi64&expand=5236)
+ * [x] [`_mm512_mask_sllv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sllv_epi32&expand=5236)
+ * [x] [`_mm512_mask_sllv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sllv_epi64&expand=5236)
+ * [x] [`_mm512_mask_sra_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sra_epi32&expand=5236)
+ * [x] [`_mm512_mask_sra_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sra_epi64&expand=5236)
+ * [x] [`_mm512_mask_srai_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srai_epi32&expand=5236)
+ * [x] [`_mm512_mask_srai_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srai_epi64&expand=5236)
+ * [x] [`_mm512_mask_srav_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srav_epi32&expand=5236)
+ * [x] [`_mm512_mask_srav_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srav_epi64&expand=5236)
+ * [x] [`_mm512_mask_srl_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srl_epi32&expand=5236)
+ * [x] [`_mm512_mask_srl_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srl_epi64&expand=5236)
+ * [x] [`_mm512_mask_srli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srli_epi32&expand=5236)
+ * [x] [`_mm512_mask_srli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srli_epi64&expand=5236)
+ * [x] [`_mm512_mask_srlv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srlv_epi32&expand=5236)
+ * [x] [`_mm512_mask_srlv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srlv_epi64&expand=5236)
+ * [x] [`_mm512_mask_xor_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_xor_epi32&expand=5236)
+ * [x] [`_mm512_mask_xor_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_xor_epi64&expand=5236)
+ * [x] [`_mm512_maskz_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_and_epi32&expand=5236)
+ * [x] [`_mm512_maskz_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_and_epi64&expand=5236)
+ * [x] [`_mm512_maskz_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_or_epi32&expand=5236)
+ * [x] [`_mm512_maskz_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_or_epi64&expand=5236)
+ * [x] [`_mm512_maskz_rol_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rol_epi32&expand=5236)
+ * [x] [`_mm512_maskz_rol_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rol_epi64&expand=5236)
+ * [x] [`_mm512_maskz_rolv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rolv_epi32&expand=5236)
+ * [x] [`_mm512_maskz_rolv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rolv_epi64&expand=5236)
+ * [x] [`_mm512_maskz_ror_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ror_epi32&expand=5236)
+ * [x] [`_mm512_maskz_ror_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ror_epi64&expand=5236)
+ * [x] [`_mm512_maskz_rorv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rorv_epi32&expand=5236)
+ * [x] [`_mm512_maskz_rorv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rorv_epi64&expand=5236)
+ * [x] [`_mm512_maskz_sll_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sll_epi32&expand=5236)
+ * [x] [`_mm512_maskz_sll_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sll_epi64&expand=5236)
+ * [x] [`_mm512_maskz_slli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_slli_epi32&expand=5236)
+ * [x] [`_mm512_maskz_slli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_slli_epi64&expand=5236)
+ * [x] [`_mm512_maskz_sllv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sllv_epi32&expand=5236)
+ * [x] [`_mm512_maskz_sllv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sllv_epi64&expand=5236)
+ * [x] [`_mm512_maskz_sra_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sra_epi32&expand=5236)
+ * [x] [`_mm512_maskz_sra_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sra_epi64&expand=5236)
+ * [x] [`_mm512_maskz_srai_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srai_epi32&expand=5236)
+ * [x] [`_mm512_maskz_srai_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srai_epi64&expand=5236)
+ * [x] [`_mm512_maskz_srav_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srav_epi32&expand=5236)
+ * [x] [`_mm512_maskz_srav_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srav_epi64&expand=5236)
+ * [x] [`_mm512_maskz_srl_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srl_epi32&expand=5236)
+ * [x] [`_mm512_maskz_srl_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srl_epi64&expand=5236)
+ * [x] [`_mm512_maskz_srli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srli_epi32&expand=5236)
+ * [x] [`_mm512_maskz_srli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srli_epi64&expand=5236)
+ * [x] [`_mm512_maskz_srlv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srlv_epi32&expand=5236)
+ * [x] [`_mm512_maskz_srlv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srlv_epi64&expand=5236)
+ * [x] [`_mm512_maskz_xor_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_xor_epi32&expand=5236)
+ * [x] [`_mm512_maskz_xor_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_xor_epi64&expand=5236)
+ * [x] [`_mm512_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_epi32&expand=5236)
+ * [x] [`_mm512_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_epi64&expand=5236)
+ * [x] [`_mm512_or_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_si512&expand=5236)
+ * [x] [`_mm512_rol_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rol_epi32&expand=5236)
+ * [x] [`_mm512_rol_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rol_epi64&expand=5236)
+ * [x] [`_mm512_rolv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rolv_epi32&expand=5236)
+ * [x] [`_mm512_rolv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rolv_epi64&expand=5236)
+ * [x] [`_mm512_ror_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ror_epi32&expand=5236)
+ * [x] [`_mm512_ror_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ror_epi64&expand=5236)
+ * [x] [`_mm512_rorv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rorv_epi32&expand=5236)
+ * [x] [`_mm512_rorv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rorv_epi64&expand=5236)
+ * [x] [`_mm512_sll_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sll_epi32&expand=5236)
+ * [x] [`_mm512_sll_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sll_epi64&expand=5236)
+ * [x] [`_mm512_slli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_slli_epi32&expand=5236)
+ * [x] [`_mm512_slli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_slli_epi64&expand=5236)
+ * [x] [`_mm512_sllv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sllv_epi32&expand=5236)
+ * [x] [`_mm512_sllv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sllv_epi64&expand=5236)
+ * [x] [`_mm512_sra_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sra_epi32&expand=5236)
+ * [x] [`_mm512_sra_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sra_epi64&expand=5236)
+ * [x] [`_mm512_srai_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srai_epi32&expand=5236)
+ * [x] [`_mm512_srai_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srai_epi64&expand=5236)
+ * [x] [`_mm512_srav_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srav_epi32&expand=5236)
+ * [x] [`_mm512_srav_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srav_epi64&expand=5236)
+ * [x] [`_mm512_srl_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srl_epi32&expand=5236)
+ * [x] [`_mm512_srl_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srl_epi64&expand=5236)
+ * [x] [`_mm512_srli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srli_epi32&expand=5236)
+ * [x] [`_mm512_srli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srli_epi64&expand=5236)
+ * [x] [`_mm512_srlv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srlv_epi32&expand=5236)
+ * [x] [`_mm512_srlv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srlv_epi64&expand=5236)
+ * [x] [`_mm512_xor_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_xor_epi32&expand=5236)
+ * [x] [`_mm512_xor_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_xor_epi64&expand=5236)
+ * [x] [`_mm512_xor_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_xor_si512&expand=5236)
+
diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs
index 0c73e5935d..adc653e31c 100644
--- a/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/crates/core_arch/src/aarch64/neon/mod.rs
@@ -88,7 +88,7 @@ extern "C" {
fn vpaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
#[link_name = "llvm.aarch64.neon.addp.v16i8"]
fn vpaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-
+
#[link_name = "llvm.aarch64.neon.saddv.i32.v4i16"]
fn vaddv_s16_(a: int16x4_t) -> i16;
#[link_name = "llvm.aarch64.neon.saddv.i32.v2i32"]
@@ -1826,9 +1826,13 @@ mod tests {
#[simd_test(enable = "neon")]
unsafe fn test_vpaddq_s8() {
let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- let b = i8x16::new(0, -1, -2, -3, -4, -5, -6, -7, -8, -8, -10, -11, -12, -13, -14, -15);
+ let b = i8x16::new(
+ 0, -1, -2, -3, -4, -5, -6, -7, -8, -8, -10, -11, -12, -13, -14, -15,
+ );
let r: i8x16 = transmute(vpaddq_s8(transmute(a), transmute(b)));
- let e = i8x16::new(3, 7, 11, 15, 19, 23, 27, 31, -1, -5, -9, -13, -16, -21, -25, -29);
+ let e = i8x16::new(
+ 3, 7, 11, 15, 19, 23, 27, 31, -1, -5, -9, -13, -16, -21, -25, -29,
+ );
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
@@ -2829,7 +2833,7 @@ mod tests {
let e = i64x2::new(i64::MIN, i64::MAX);
assert_eq!(r, e);
}
-
+
#[simd_test(enable = "neon")]
unsafe fn test_vaddv_s16() {
let a = i16x4::new(1, 2, 3, -4);
diff --git a/crates/core_arch/src/arm/neon/mod.rs b/crates/core_arch/src/arm/neon/mod.rs
index c006ea70d4..43fa753cc3 100644
--- a/crates/core_arch/src/arm/neon/mod.rs
+++ b/crates/core_arch/src/arm/neon/mod.rs
@@ -175,7 +175,7 @@ extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxp.v2f32")]
fn vpmaxf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-
+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v4i16")]
fn vpadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
@@ -299,7 +299,7 @@ pub unsafe fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
pub unsafe fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
- vpadd_s8_(a,b)
+ vpadd_s8_(a, b)
}
/// Add pairwise.
#[inline]
@@ -308,7 +308,7 @@ pub unsafe fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
pub unsafe fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
- transmute(vpadd_s16_(transmute(a),transmute(b)))
+ transmute(vpadd_s16_(transmute(a), transmute(b)))
}
/// Add pairwise.
#[inline]
@@ -317,7 +317,7 @@ pub unsafe fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
pub unsafe fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
- transmute(vpadd_s32_(transmute(a),transmute(b)))
+ transmute(vpadd_s32_(transmute(a), transmute(b)))
}
/// Add pairwise.
#[inline]
@@ -326,7 +326,7 @@ pub unsafe fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
pub unsafe fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
- transmute(vpadd_s8_(transmute(a),transmute(b)))
+ transmute(vpadd_s8_(transmute(a), transmute(b)))
}
/// Unsigned saturating extract narrow.
diff --git a/crates/core_arch/src/simd.rs b/crates/core_arch/src/simd.rs
index 202df0143c..4b71d6c2bf 100644
--- a/crates/core_arch/src/simd.rs
+++ b/crates/core_arch/src/simd.rs
@@ -90,16 +90,44 @@ simd_ty!(i16x2[i16]: i16, i16 | x0, x1);
// 64-bit wide types:
-simd_ty!(u8x8[u8]:
- u8, u8, u8, u8, u8, u8, u8, u8
- | x0, x1, x2, x3, x4, x5, x6, x7);
+simd_ty!(
+ u8x8[u8]: u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7
+);
simd_ty!(u16x4[u16]: u16, u16, u16, u16 | x0, x1, x2, x3);
simd_ty!(u32x2[u32]: u32, u32 | x0, x1);
simd_ty!(u64x1[u64]: u64 | x1);
-simd_ty!(i8x8[i8]:
- i8, i8, i8, i8, i8, i8, i8, i8
- | x0, x1, x2, x3, x4, x5, x6, x7);
+simd_ty!(
+ i8x8[i8]: i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7
+);
simd_ty!(i16x4[i16]: i16, i16, i16, i16 | x0, x1, x2, x3);
simd_ty!(i32x2[i32]: i32, i32 | x0, x1);
simd_ty!(i64x1[i64]: i64 | x1);
@@ -108,116 +136,576 @@ simd_ty!(f32x2[f32]: f32, f32 | x0, x1);
// 128-bit wide types:
-simd_ty!(u8x16[u8]:
- u8, u8, u8, u8, u8, u8, u8, u8,
- u8, u8, u8, u8, u8, u8, u8, u8
- | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
+simd_ty!(
+ u8x16[u8]: u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7,
+ x8,
+ x9,
+ x10,
+ x11,
+ x12,
+ x13,
+ x14,
+ x15
+);
+simd_ty!(
+ u16x8[u16]: u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7
);
-simd_ty!(u16x8[u16]:
- u16, u16, u16, u16, u16, u16, u16, u16
- | x0, x1, x2, x3, x4, x5, x6, x7);
simd_ty!(u32x4[u32]: u32, u32, u32, u32 | x0, x1, x2, x3);
simd_ty!(u64x2[u64]: u64, u64 | x0, x1);
-simd_ty!(i8x16[i8]:
- i8, i8, i8, i8, i8, i8, i8, i8,
- i8, i8, i8, i8, i8, i8, i8, i8
- | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
+simd_ty!(
+ i8x16[i8]: i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7,
+ x8,
+ x9,
+ x10,
+ x11,
+ x12,
+ x13,
+ x14,
+ x15
+);
+simd_ty!(
+ i16x8[i16]: i16,
+ i16,
+ i16,
+ i16,
+ i16,
+ i16,
+ i16,
+ i16 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7
);
-simd_ty!(i16x8[i16]:
- i16, i16, i16, i16, i16, i16, i16, i16
- | x0, x1, x2, x3, x4, x5, x6, x7);
simd_ty!(i32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3);
simd_ty!(i64x2[i64]: i64, i64 | x0, x1);
simd_ty!(f32x4[f32]: f32, f32, f32, f32 | x0, x1, x2, x3);
simd_ty!(f64x2[f64]: f64, f64 | x0, x1);
-simd_m_ty!(m8x16[i8]:
- i8, i8, i8, i8, i8, i8, i8, i8,
- i8, i8, i8, i8, i8, i8, i8, i8
- | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
+simd_m_ty!(
+ m8x16[i8]: i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7,
+ x8,
+ x9,
+ x10,
+ x11,
+ x12,
+ x13,
+ x14,
+ x15
+);
+simd_m_ty!(
+ m16x8[i16]: i16,
+ i16,
+ i16,
+ i16,
+ i16,
+ i16,
+ i16,
+ i16 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7
);
-simd_m_ty!(m16x8[i16]:
- i16, i16, i16, i16, i16, i16, i16, i16
- | x0, x1, x2, x3, x4, x5, x6, x7);
simd_m_ty!(m32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3);
simd_m_ty!(m64x2[i64]: i64, i64 | x0, x1);
// 256-bit wide types:
-simd_ty!(u8x32[u8]:
- u8, u8, u8, u8, u8, u8, u8, u8,
- u8, u8, u8, u8, u8, u8, u8, u8,
- u8, u8, u8, u8, u8, u8, u8, u8,
- u8, u8, u8, u8, u8, u8, u8, u8
- | x0, x1, x2, x3, x4, x5, x6, x7,
- x8, x9, x10, x11, x12, x13, x14, x15,
- x16, x17, x18, x19, x20, x21, x22, x23,
- x24, x25, x26, x27, x28, x29, x30, x31
-);
-simd_ty!(u16x16[u16]:
- u16, u16, u16, u16, u16, u16, u16, u16,
- u16, u16, u16, u16, u16, u16, u16, u16
- | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
-);
-simd_ty!(u32x8[u32]:
- u32, u32, u32, u32, u32, u32, u32, u32
- | x0, x1, x2, x3, x4, x5, x6, x7);
+simd_ty!(
+ u8x32[u8]: u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7,
+ x8,
+ x9,
+ x10,
+ x11,
+ x12,
+ x13,
+ x14,
+ x15,
+ x16,
+ x17,
+ x18,
+ x19,
+ x20,
+ x21,
+ x22,
+ x23,
+ x24,
+ x25,
+ x26,
+ x27,
+ x28,
+ x29,
+ x30,
+ x31
+);
+simd_ty!(
+ u16x16[u16]: u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7,
+ x8,
+ x9,
+ x10,
+ x11,
+ x12,
+ x13,
+ x14,
+ x15
+);
+simd_ty!(
+ u32x8[u32]: u32,
+ u32,
+ u32,
+ u32,
+ u32,
+ u32,
+ u32,
+ u32 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7
+);
simd_ty!(u64x4[u64]: u64, u64, u64, u64 | x0, x1, x2, x3);
-simd_ty!(i8x32[i8]:
- i8, i8, i8, i8, i8, i8, i8, i8,
- i8, i8, i8, i8, i8, i8, i8, i8,
- i8, i8, i8, i8, i8, i8, i8, i8,
- i8, i8, i8, i8, i8, i8, i8, i8
- | x0, x1, x2, x3, x4, x5, x6, x7,
- x8, x9, x10, x11, x12, x13, x14, x15,
- x16, x17, x18, x19, x20, x21, x22, x23,
- x24, x25, x26, x27, x28, x29, x30, x31
-);
-simd_ty!(i16x16[i16]:
- i16, i16, i16, i16, i16, i16, i16, i16,
- i16, i16, i16, i16, i16, i16, i16, i16
- | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
-);
-simd_ty!(i32x8[i32]:
- i32, i32, i32, i32, i32, i32, i32, i32
- | x0, x1, x2, x3, x4, x5, x6, x7);
+simd_ty!(
+ i8x32[i8]: i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8,
+ i8 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7,
+ x8,
+ x9,
+ x10,
+ x11,
+ x12,
+ x13,
+ x14,
+ x15,
+ x16,
+ x17,
+ x18,
+ x19,
+ x20,
+ x21,
+ x22,
+ x23,
+ x24,
+ x25,
+ x26,
+ x27,
+ x28,
+ x29,
+ x30,
+ x31
+);
+simd_ty!(
+ i16x16[i16]: i16,
+ i16,
+ i16,
+ i16,
+ i16,
+ i16,
+ i16,
+ i16,
+ i16,
+ i16,
+ i16,
+ i16,
+ i16,
+ i16,
+ i16,
+ i16 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7,
+ x8,
+ x9,
+ x10,
+ x11,
+ x12,
+ x13,
+ x14,
+ x15
+);
+simd_ty!(
+ i32x8[i32]: i32,
+ i32,
+ i32,
+ i32,
+ i32,
+ i32,
+ i32,
+ i32 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7
+);
simd_ty!(i64x4[i64]: i64, i64, i64, i64 | x0, x1, x2, x3);
-simd_ty!(f32x8[f32]:
- f32, f32, f32, f32, f32, f32, f32, f32 |
- x0, x1, x2, x3, x4, x5, x6, x7);
+simd_ty!(
+ f32x8[f32]: f32,
+ f32,
+ f32,
+ f32,
+ f32,
+ f32,
+ f32,
+ f32 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7
+);
// 512-bit wide types:
-simd_ty!(i32x16[i32]:
- i32, i32, i32, i32, i32, i32, i32, i32,
- i32, i32, i32, i32, i32, i32, i32, i32
- | x0, x1, x2, x3, x4, x5, x6, x7,
- x8, x9, x10, x11, x12, x13, x14, x15);
-
-simd_ty!(u32x16[u32]:
- u32, u32, u32, u32, u32, u32, u32, u32,
- u32, u32, u32, u32, u32, u32, u32, u32
- | x0, x1, x2, x3, x4, x5, x6, x7,
- x8, x9, x10, x11, x12, x13, x14, x15);
-
-simd_ty!(f32x16[f32]:
- f32, f32, f32, f32, f32, f32, f32, f32,
- f32, f32, f32, f32, f32, f32, f32, f32
- | x0, x1, x2, x3, x4, x5, x6, x7,
- x8, x9, x10, x11, x12, x13, x14, x15);
-
-simd_ty!(i64x8[i64]:
- i64, i64, i64, i64, i64, i64, i64, i64
- | x0, x1, x2, x3, x4, x5, x6, x7);
-
-simd_ty!(u64x8[u64]:
- u64, u64, u64, u64, u64, u64, u64, u64
- | x0, x1, x2, x3, x4, x5, x6, x7);
-
-simd_ty!(f64x8[f64]:
- f64, f64, f64, f64, f64, f64, f64, f64
- | x0, x1, x2, x3, x4, x5, x6, x7);
+simd_ty!(
+ i32x16[i32]: i32,
+ i32,
+ i32,
+ i32,
+ i32,
+ i32,
+ i32,
+ i32,
+ i32,
+ i32,
+ i32,
+ i32,
+ i32,
+ i32,
+ i32,
+ i32 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7,
+ x8,
+ x9,
+ x10,
+ x11,
+ x12,
+ x13,
+ x14,
+ x15
+);
+
+simd_ty!(
+ u32x16[u32]: u32,
+ u32,
+ u32,
+ u32,
+ u32,
+ u32,
+ u32,
+ u32,
+ u32,
+ u32,
+ u32,
+ u32,
+ u32,
+ u32,
+ u32,
+ u32 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7,
+ x8,
+ x9,
+ x10,
+ x11,
+ x12,
+ x13,
+ x14,
+ x15
+);
+
+simd_ty!(
+ f32x16[f32]: f32,
+ f32,
+ f32,
+ f32,
+ f32,
+ f32,
+ f32,
+ f32,
+ f32,
+ f32,
+ f32,
+ f32,
+ f32,
+ f32,
+ f32,
+ f32 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7,
+ x8,
+ x9,
+ x10,
+ x11,
+ x12,
+ x13,
+ x14,
+ x15
+);
+
+simd_ty!(
+ i64x8[i64]: i64,
+ i64,
+ i64,
+ i64,
+ i64,
+ i64,
+ i64,
+ i64 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7
+);
+
+simd_ty!(
+ u64x8[u64]: u64,
+ u64,
+ u64,
+ u64,
+ u64,
+ u64,
+ u64,
+ u64 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7
+);
+
+simd_ty!(
+ f64x8[f64]: f64,
+ f64,
+ f64,
+ f64,
+ f64,
+ f64,
+ f64,
+ f64 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7
+);
diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 4744b435dd..2160744a5c 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -49,6 +49,44 @@ pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
transmute(simd_select_bitmask(k, abs, zero))
}
+/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_abs_epi64&expand=48)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpabsq))]
+pub unsafe fn _mm512_abs_epi64(a: __m512i) -> __m512i {
+ let a = a.as_i64x8();
+ // all-0 is a properly initialized i64x8
+ let zero: i64x8 = mem::zeroed();
+ let sub = simd_sub(zero, a);
+ let cmp: i64x8 = simd_gt(a, zero);
+ transmute(simd_select(cmp, a, sub))
+}
+
+/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_abs_epi64&expand=49)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpabsq))]
+pub unsafe fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
+ let abs = _mm512_abs_epi64(a).as_i64x8();
+ transmute(simd_select_bitmask(k, abs, src.as_i64x8()))
+}
+
+/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_abs_epi64&expand=50)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpabsq))]
+pub unsafe fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
+ let abs = _mm512_abs_epi64(a).as_i64x8();
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, abs, zero))
+}
+
/// Returns vector of type `__m512d` with all elements set to zero.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_pd)
@@ -854,1562 +892,2838 @@ pub unsafe fn _mm512_mask_i64scatter_epi32(
constify_imm8_gather!(scale, call);
}
-/// Sets packed 32-bit integers in `dst` with the supplied values.
+/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
///
-/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi32&expand=4685)
#[inline]
#[target_feature(enable = "avx512f")]
-pub unsafe fn _mm512_set_ps(
- e0: f32,
- e1: f32,
- e2: f32,
- e3: f32,
- e4: f32,
- e5: f32,
- e6: f32,
- e7: f32,
- e8: f32,
- e9: f32,
- e10: f32,
- e11: f32,
- e12: f32,
- e13: f32,
- e14: f32,
- e15: f32,
-) -> __m512 {
- _mm512_setr_ps(
- e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
- )
+#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: i32) -> __m512i {
+ assert!(imm8 >= 0 && imm8 <= 255);
+ transmute(vprold(a.as_i32x16(), imm8))
}
-/// Sets packed 32-bit integers in `dst` with the supplied values in
-/// reverse order.
+/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi32&expand=4683)
#[inline]
#[target_feature(enable = "avx512f")]
-pub unsafe fn _mm512_setr_ps(
- e0: f32,
- e1: f32,
- e2: f32,
- e3: f32,
- e4: f32,
- e5: f32,
- e6: f32,
- e7: f32,
- e8: f32,
- e9: f32,
- e10: f32,
- e11: f32,
- e12: f32,
- e13: f32,
- e14: f32,
- e15: f32,
-) -> __m512 {
- let r = f32x16::new(
- e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
- );
- transmute(r)
+#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
+#[rustc_args_required_const(3)]
+pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
+ assert!(imm8 >= 0 && imm8 <= 255);
+ let rol = vprold(a.as_i32x16(), imm8);
+ transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
}
-/// Broadcast 64-bit float `a` to all elements of `dst`.
+/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi32&expand=4684)
#[inline]
#[target_feature(enable = "avx512f")]
-pub unsafe fn _mm512_set1_pd(a: f64) -> __m512d {
- transmute(f64x8::splat(a))
+#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
+ assert!(imm8 >= 0 && imm8 <= 255);
+ let rol = vprold(a.as_i32x16(), imm8);
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, rol, zero))
}
-/// Broadcast 32-bit float `a` to all elements of `dst`.
+/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi32&expand=4721)
#[inline]
#[target_feature(enable = "avx512f")]
-pub unsafe fn _mm512_set1_ps(a: f32) -> __m512 {
- transmute(f32x16::splat(a))
+#[cfg_attr(test, assert_instr(vprold, imm8 = 233))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i {
+ assert!(imm8 >= 0 && imm8 <= 255);
+ transmute(vprord(a.as_i32x16(), imm8))
}
-/// Sets packed 32-bit integers in `dst` with the supplied values.
+/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi32&expand=4719)
#[inline]
#[target_feature(enable = "avx512f")]
-pub unsafe fn _mm512_set_epi32(
- e15: i32,
- e14: i32,
- e13: i32,
- e12: i32,
- e11: i32,
- e10: i32,
- e9: i32,
- e8: i32,
- e7: i32,
- e6: i32,
- e5: i32,
- e4: i32,
- e3: i32,
- e2: i32,
- e1: i32,
- e0: i32,
-) -> __m512i {
- _mm512_setr_epi32(
- e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
- )
+#[cfg_attr(test, assert_instr(vprold, imm8 = 123))]
+#[rustc_args_required_const(3)]
+pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
+ assert!(imm8 >= 0 && imm8 <= 255);
+ let ror = vprord(a.as_i32x16(), imm8);
+ transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
}
-/// Broadcast 32-bit integer `a` to all elements of `dst`.
+/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi32&expand=4720)
#[inline]
#[target_feature(enable = "avx512f")]
-pub unsafe fn _mm512_set1_epi32(a: i32) -> __m512i {
- transmute(i32x16::splat(a))
+#[cfg_attr(test, assert_instr(vprold, imm8 = 123))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
+ assert!(imm8 >= 0 && imm8 <= 255);
+ let ror = vprord(a.as_i32x16(), imm8);
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, ror, zero))
}
-/// Broadcast 64-bit integer `a` to all elements of `dst`.
+/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi64&expand=4694)
#[inline]
#[target_feature(enable = "avx512f")]
-pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
- transmute(i64x8::splat(a))
+#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: i32) -> __m512i {
+ assert!(imm8 >= 0 && imm8 <= 255);
+ transmute(vprolq(a.as_i64x8(), imm8))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector.
+/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi64&expand=4692)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
- _mm512_cmp_ps_mask(a, b, _CMP_LT_OS)
+#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
+#[rustc_args_required_const(3)]
+pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
+ assert!(imm8 >= 0 && imm8 <= 255);
+ let rol = vprolq(a.as_i64x8(), imm8);
+ transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi64&expand=4693)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmplt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
- _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LT_OS)
+#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
+ assert!(imm8 >= 0 && imm8 <= 255);
+ let rol = vprolq(a.as_i64x8(), imm8);
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, rol, zero))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
+/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnlt_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi64&expand=4730)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
- _mm512_cmp_ps_mask(a, b, _CMP_NLT_US)
+#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i {
+ assert!(imm8 >= 0 && imm8 <= 255);
+ transmute(vprorq(a.as_i64x8(), imm8))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnlt_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi64&expand=4728)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmpnlt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
- _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLT_US)
+#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
+#[rustc_args_required_const(3)]
+pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
+ assert!(imm8 >= 0 && imm8 <= 255);
+ let ror = vprorq(a.as_i64x8(), imm8);
+ transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector.
+/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi64&expand=4729)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
- _mm512_cmp_ps_mask(a, b, _CMP_LE_OS)
+#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
+ assert!(imm8 >= 0 && imm8 <= 255);
+ let ror = vprorq(a.as_i64x8(), imm8);
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, ror, zero))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi32&expand=5310)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmple_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
- _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LE_OS)
+#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i {
+ assert!(imm8 <= 255);
+ transmute(vpsllid(a.as_i32x16(), imm8))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnle_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi32&expand=5308)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
- _mm512_cmp_ps_mask(a, b, _CMP_NLE_US)
+#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
+#[rustc_args_required_const(3)]
+pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
+ assert!(imm8 <= 255);
+ let shf = vpsllid(a.as_i32x16(), imm8);
+ transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnle_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi32&expand=5309)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmpnle_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
- _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLE_US)
+#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
+ assert!(imm8 <= 255);
+ let shf = vpsllid(a.as_i32x16(), imm8);
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, shf, zero))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi32&expand=5522)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
- _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ)
+#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i {
+ assert!(imm8 <= 255);
+ transmute(vpsrlid(a.as_i32x16(), imm8))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi32&expand=5520)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmpeq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
- _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ)
+#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
+#[rustc_args_required_const(3)]
+pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
+ assert!(imm8 <= 255);
+ let shf = vpsrlid(a.as_i32x16(), imm8);
+ transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi32&expand=5521)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
- _mm512_cmp_ps_mask(a, b, _CMP_NEQ_UQ)
+#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
+ assert!(imm8 <= 255);
+ let shf = vpsrlid(a.as_i32x16(), imm8);
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, shf, zero))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_ps_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi64&expand=5319)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmpneq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
- _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_UQ)
+#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i {
+ assert!(imm8 <= 255);
+ transmute(vpslliq(a.as_i64x8(), imm8))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op.
+/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi64&expand=5317)
#[inline]
#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
+#[rustc_args_required_const(3)]
+pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
+ assert!(imm8 <= 255);
+ let shf = vpslliq(a.as_i64x8(), imm8);
+ transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
+}
+
+/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi64&expand=5318)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
#[rustc_args_required_const(2)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_cmp_ps_mask(a: __m512, b: __m512, op: i32) -> __mmask16 {
- let neg_one = -1;
- macro_rules! call {
- ($imm5:expr) => {
- vcmpps(
- a.as_f32x16(),
- b.as_f32x16(),
- $imm5,
- neg_one,
- _MM_FROUND_CUR_DIRECTION,
- )
- };
- }
- let r = constify_imm5!(op, call);
- transmute(r)
+pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
+ assert!(imm8 <= 255);
+ let shf = vpslliq(a.as_i64x8(), imm8);
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, shf, zero))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op,
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi64&expand=5531)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i {
+ assert!(imm8 <= 255);
+ transmute(vpsrliq(a.as_i64x8(), imm8))
+}
+
+/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi64&expand=5529)
#[inline]
#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
#[rustc_args_required_const(3)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_mask_cmp_ps_mask(m: __mmask16, a: __m512, b: __m512, op: i32) -> __mmask16 {
- macro_rules! call {
- ($imm5:expr) => {
- vcmpps(
- a.as_f32x16(),
- b.as_f32x16(),
- $imm5,
- m as i16,
- _MM_FROUND_CUR_DIRECTION,
- )
- };
- }
- let r = constify_imm5!(op, call);
- transmute(r)
+pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
+ assert!(imm8 <= 255);
+ let shf = vpsrliq(a.as_i64x8(), imm8);
+ transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op.
+/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_ps_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi64&expand=5530)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2, 3)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm512_cmp_round_ps_mask(a: __m512, b: __m512, op: i32, sae: i32) -> __mmask16 {
- let neg_one = -1;
- macro_rules! call {
- ($imm5:expr, $imm4:expr) => {
- vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, neg_one, $imm4)
- };
- }
- let r = constify_imm5_sae!(op, sae, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
+ assert!(imm8 <= 255);
+ let shf = vpsrliq(a.as_i64x8(), imm8);
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, shf, zero))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op,
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_ps_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi32&expand=5280)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3, 4)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm512_mask_cmp_round_ps_mask(
- m: __mmask16,
- a: __m512,
- b: __m512,
- op: i32,
- sae: i32,
-) -> __mmask16 {
- macro_rules! call {
- ($imm5:expr, $imm4:expr) => {
- vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, m as i16, $imm4)
- };
- }
- let r = constify_imm5_sae!(op, sae, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(vpslld))]
+pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
+ transmute(vpslld(a.as_i32x16(), count.as_i32x4()))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_ps_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sll_epi32&expand=5278)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
- _mm512_cmp_ps_mask(a, b, _CMP_ORD_Q)
+#[cfg_attr(test, assert_instr(vpslld))]
+pub unsafe fn _mm512_mask_sll_epi32(
+ src: __m512i,
+ k: __mmask16,
+ a: __m512i,
+ count: __m128i,
+) -> __m512i {
+ let shf = _mm512_sll_epi32(a, count).as_i32x16();
+ transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_ps_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi32&expand=5279)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_mask_cmpord_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
- _mm512_mask_cmp_ps_mask(m, a, b, _CMP_ORD_Q)
+#[cfg_attr(test, assert_instr(vpslld))]
+pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
+ let shf = _mm512_sll_epi32(a, count).as_i32x16();
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, shf, zero))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_ps_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srl_epi32&expand=5492)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
- _mm512_cmp_ps_mask(a, b, _CMP_UNORD_Q)
+#[cfg_attr(test, assert_instr(vpsrld))]
+pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
+ transmute(vpsrld(a.as_i32x16(), count.as_i32x4()))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_ps_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srl_epi32&expand=5490)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_mask_cmpunord_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
- _mm512_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_Q)
+#[cfg_attr(test, assert_instr(vpsrld))]
+pub unsafe fn _mm512_mask_srl_epi32(
+ src: __m512i,
+ k: __mmask16,
+ a: __m512i,
+ count: __m128i,
+) -> __m512i {
+ let shf = _mm512_srl_epi32(a, count).as_i32x16();
+ transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
-/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srl_epi32&expand=5491)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
- _mm512_cmp_pd_mask(a, b, _CMP_LT_OS)
+#[cfg_attr(test, assert_instr(vpsrld))]
+pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
+ let shf = _mm512_srl_epi32(a, count).as_i32x16();
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, shf, zero))
}
-/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi64&expand=5289)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmplt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
- _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LT_OS)
+#[cfg_attr(test, assert_instr(vpsllq))]
+pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
+ transmute(vpsllq(a.as_i64x8(), count.as_i64x2()))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
+/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnlt_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sll_epi64&expand=5287)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
- _mm512_cmp_pd_mask(a, b, _CMP_NLT_US)
+#[cfg_attr(test, assert_instr(vpsllq))]
+pub unsafe fn _mm512_mask_sll_epi64(
+ src: __m512i,
+ k: __mmask8,
+ a: __m512i,
+ count: __m128i,
+) -> __m512i {
+ let shf = _mm512_sll_epi64(a, count).as_i64x8();
+ transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnlt_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
- _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US)
+#[cfg_attr(test, assert_instr(vpsllq))]
+pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
+ let shf = _mm512_sll_epi64(a, count).as_i64x8();
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, shf, zero))
}
-/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector.
+/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srl_epi64&expand=5501)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
- _mm512_cmp_pd_mask(a, b, _CMP_LE_OS)
+#[cfg_attr(test, assert_instr(vpsrlq))]
+pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
+ transmute(vpsrlq(a.as_i64x8(), count.as_i64x2()))
}
-/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srl_epi64&expand=5499)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmple_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
- _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LE_OS)
+#[cfg_attr(test, assert_instr(vpsrlq))]
+pub unsafe fn _mm512_mask_srl_epi64(
+ src: __m512i,
+ k: __mmask8,
+ a: __m512i,
+ count: __m128i,
+) -> __m512i {
+ let shf = _mm512_srl_epi64(a, count).as_i64x8();
+ transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
+/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnle_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
- _mm512_cmp_pd_mask(a, b, _CMP_NLE_US)
+#[cfg_attr(test, assert_instr(vpsrlq))]
+pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
+ let shf = _mm512_srl_epi64(a, count).as_i64x8();
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, shf, zero))
}
-/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnle_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi32&expand=5407)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmpnle_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
- _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLE_US)
+#[cfg_attr(test, assert_instr(vpsrad))]
+pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
+ transmute(vpsrad(a.as_i32x16(), count.as_i32x4()))
}
-/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi32&expand=5405)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
- _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ)
+#[cfg_attr(test, assert_instr(vpsrad))]
+pub unsafe fn _mm512_mask_sra_epi32(
+ src: __m512i,
+ k: __mmask16,
+ a: __m512i,
+ count: __m128i,
+) -> __m512i {
+ let shf = _mm512_sra_epi32(a, count).as_i32x16();
+ transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
-/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi32&expand=5406)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmpeq_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
- _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OQ)
+#[cfg_attr(test, assert_instr(vpsrad))]
+pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
+ let shf = _mm512_sra_epi32(a, count).as_i32x16();
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, shf, zero))
}
-/// Compare packed double-precision (64-bit) floating-point elements in a and b for inequality, and store the results in a mask vector.
+/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi64&expand=5416)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
- _mm512_cmp_pd_mask(a, b, _CMP_NEQ_UQ)
+#[cfg_attr(test, assert_instr(vpsraq))]
+pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
+ transmute(vpsraq(a.as_i64x8(), count.as_i64x2()))
}
-/// Compare packed double-precision (64-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_pd_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi64&expand=5414)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp))]
-pub unsafe fn _mm512_mask_cmpneq_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
- _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_UQ)
+#[cfg_attr(test, assert_instr(vpsraq))]
+pub unsafe fn _mm512_mask_sra_epi64(
+ src: __m512i,
+ k: __mmask8,
+ a: __m512i,
+ count: __m128i,
+) -> __m512i {
+ let shf = _mm512_sra_epi64(a, count).as_i64x8();
+ transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
-/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op.
+/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_pd_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi64&expand=5415)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_cmp_pd_mask(a: __m512d, b: __m512d, op: i32) -> __mmask8 {
- let neg_one = -1;
- macro_rules! call {
- ($imm5:expr) => {
- vcmppd(
- a.as_f64x8(),
- b.as_f64x8(),
- $imm5,
- neg_one,
- _MM_FROUND_CUR_DIRECTION,
- )
- };
- }
- let r = constify_imm5!(op, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(vpsraq))]
+pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
+ let shf = _mm512_sra_epi64(a, count).as_i64x8();
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, shf, zero))
}
-/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op,
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_pd_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi32&expand=5436)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i {
+ assert!(imm8 <= 255);
+ transmute(vpsraid(a.as_i32x16(), imm8))
+}
+
+/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi32&expand=5434)
#[inline]
#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
#[rustc_args_required_const(3)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_mask_cmp_pd_mask(m: __mmask8, a: __m512d, b: __m512d, op: i32) -> __mmask8 {
- macro_rules! call {
- ($imm5:expr) => {
- vcmppd(
- a.as_f64x8(),
- b.as_f64x8(),
- $imm5,
- m as i8,
- _MM_FROUND_CUR_DIRECTION,
- )
- };
- }
- let r = constify_imm5!(op, call);
- transmute(r)
+pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
+ assert!(imm8 <= 255);
+ let shf = vpsraid(a.as_i32x16(), imm8);
+ transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
-/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op.
+/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_pd_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi32&expand=5435)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2, 3)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm512_cmp_round_pd_mask(a: __m512d, b: __m512d, op: i32, sae: i32) -> __mmask8 {
- let neg_one = -1;
- macro_rules! call {
- ($imm5:expr, $imm4:expr) => {
- vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, neg_one, $imm4)
- };
- }
- let r = constify_imm5_sae!(op, sae, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
+ assert!(imm8 <= 255);
+ let shf = vpsraid(a.as_i32x16(), imm8);
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, shf, zero))
}
-/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op,
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_pd_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi64&expand=5445)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3, 4)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm512_mask_cmp_round_pd_mask(
- m: __mmask8,
- a: __m512d,
- b: __m512d,
- op: i32,
- sae: i32,
-) -> __mmask8 {
- macro_rules! call {
- ($imm5:expr, $imm4:expr) => {
- vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, m as i8, $imm4)
- };
- }
- let r = constify_imm5_sae!(op, sae, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i {
+ assert!(imm8 <= 255);
+ transmute(vpsraiq(a.as_i64x8(), imm8))
}
-/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
+/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_pd_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi64&expand=5443)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
- _mm512_cmp_pd_mask(a, b, _CMP_ORD_Q)
+#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
+#[rustc_args_required_const(3)]
+pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
+ assert!(imm8 <= 255);
+ let shf = vpsraiq(a.as_i64x8(), imm8);
+ transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
-/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
+/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_pd_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi64&expand=5444)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_mask_cmpord_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
- _mm512_mask_cmp_pd_mask(m, a, b, _CMP_ORD_Q)
+#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
+ assert!(imm8 <= 255);
+ let shf = vpsraiq(a.as_i64x8(), imm8);
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, shf, zero))
}
-/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_pd_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi32&expand=5465)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
- _mm512_cmp_pd_mask(a, b, _CMP_UNORD_Q)
+#[cfg_attr(test, assert_instr(vpsravd))]
+pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
+ transmute(vpsravd(a.as_i32x16(), count.as_i32x16()))
}
-/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_pd_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi32&expand=5463)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vcmp, op = 0))]
-pub unsafe fn _mm512_mask_cmpunord_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
- _mm512_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_Q)
+#[cfg_attr(test, assert_instr(vpsravd))]
+pub unsafe fn _mm512_mask_srav_epi32(
+ src: __m512i,
+ k: __mmask16,
+ a: __m512i,
+ count: __m512i,
+) -> __m512i {
+ let shf = _mm512_srav_epi32(a, count).as_i32x16();
+ transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
-/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
+/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ss_mask&expand=5236,755,757)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srav_epi32&expand=5464)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm_cmp_ss_mask(a: __m128, b: __m128, op: i32) -> __mmask8 {
- let neg_one = -1;
- macro_rules! call {
- ($imm5:expr) => {
- vcmpss(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION)
- };
- }
- let r = constify_imm5!(op, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(vpsravd))]
+pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
+ let shf = _mm512_srav_epi32(a, count).as_i32x16();
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, shf, zero))
}
-/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
+/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ss_mask&expand=5236,755,757)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi64&expand=5474)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm_mask_cmp_ss_mask(m: __mmask8, a: __m128, b: __m128, op: i32) -> __mmask8 {
- macro_rules! call {
- ($imm5:expr) => {
- vcmpss(a, b, $imm5, m as i8, _MM_FROUND_CUR_DIRECTION)
- };
- }
- let r = constify_imm5!(op, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(vpsravq))]
+pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
+ transmute(vpsravq(a.as_i64x8(), count.as_i64x8()))
}
-/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
+/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_ss_mask&expand=5236,755,757)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi64&expand=5472)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2, 3)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm_cmp_round_ss_mask(a: __m128, b: __m128, op: i32, sae: i32) -> __mmask8 {
- let neg_one = -1;
- macro_rules! call {
- ($imm5:expr, $imm4:expr) => {
- vcmpss(a, b, $imm5, neg_one, $imm4)
- };
- }
- let r = constify_imm5_sae!(op, sae, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(vpsravq))]
+pub unsafe fn _mm512_mask_srav_epi64(
+ src: __m512i,
+ k: __mmask8,
+ a: __m512i,
+ count: __m512i,
+) -> __m512i {
+ let shf = _mm512_srav_epi64(a, count).as_i64x8();
+ transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
-/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
+/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_ss_mask&expand=5236,755,757)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srav_epi64&expand=5473)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3, 4)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm_mask_cmp_round_ss_mask(
- m: __mmask8,
- a: __m128,
- b: __m128,
- op: i32,
- sae: i32,
-) -> __mmask8 {
- macro_rules! call {
- ($imm5:expr, $imm4:expr) => {
- vcmpss(a, b, $imm5, m as i8, $imm4)
- };
- }
- let r = constify_imm5_sae!(op, sae, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(vpsravq))]
+pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
+ let shf = _mm512_srav_epi64(a, count).as_i64x8();
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, shf, zero))
}
-/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
+/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sd_mask&expand=5236,755,757)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rolv_epi32&expand=4703)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm_cmp_sd_mask(a: __m128d, b: __m128d, op: i32) -> __mmask8 {
- let neg_one = -1;
- macro_rules! call {
- ($imm5:expr) => {
- vcmpsd(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION)
- };
- }
- let r = constify_imm5!(op, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(vprolvd))]
+pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vprolvd(a.as_i32x16(), b.as_i32x16()))
}
-/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
+/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sd_mask&expand=5236,755,757)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rolv_epi32&expand=4701)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm_mask_cmp_sd_mask(m: __mmask8, a: __m128d, b: __m128d, op: i32) -> __mmask8 {
- macro_rules! call {
- ($imm5:expr) => {
- vcmpsd(a, b, $imm5, m as i8, _MM_FROUND_CUR_DIRECTION)
- };
- }
- let r = constify_imm5!(op, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(vprolvd))]
+pub unsafe fn _mm512_mask_rolv_epi32(
+ src: __m512i,
+ k: __mmask16,
+ a: __m512i,
+ b: __m512i,
+) -> __m512i {
+ let rol = _mm512_rolv_epi32(a, b).as_i32x16();
+ transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
}
-/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
+/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sd_mask&expand=5236,755,757)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rolv_epi32&expand=4702)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2, 3)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm_cmp_round_sd_mask(a: __m128d, b: __m128d, op: i32, sae: i32) -> __mmask8 {
- let neg_one = -1;
- macro_rules! call {
- ($imm5:expr, $imm4:expr) => {
- vcmpsd(a, b, $imm5, neg_one, $imm4)
- };
- }
- let r = constify_imm5_sae!(op, sae, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(vprolvd))]
+pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+ let rol = _mm512_rolv_epi32(a, b).as_i32x16();
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, rol, zero))
}
-/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
+/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sd_mask&expand=5236,755,757)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi32&expand=4739)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3, 4)]
-#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
-pub unsafe fn _mm_mask_cmp_round_sd_mask(
- m: __mmask8,
- a: __m128d,
- b: __m128d,
- op: i32,
- sae: i32,
-) -> __mmask8 {
- macro_rules! call {
- ($imm5:expr, $imm4:expr) => {
- vcmpsd(a, b, $imm5, m as i8, $imm4)
- };
- }
- let r = constify_imm5_sae!(op, sae, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(vprorvd))]
+pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vprorvd(a.as_i32x16(), b.as_i32x16()))
}
-/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector.
+/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi32&expand=4737)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
- simd_bitmask::(simd_lt(a.as_u32x16(), b.as_u32x16()))
+#[cfg_attr(test, assert_instr(vprorvd))]
+pub unsafe fn _mm512_mask_rorv_epi32(
+ src: __m512i,
+ k: __mmask16,
+ a: __m512i,
+ b: __m512i,
+) -> __m512i {
+ let ror = _mm512_rorv_epi32(a, b).as_i32x16();
+ transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
}
-/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi32&expand=4738)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmplt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmplt_epu32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vprorvd))]
+pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+ let ror = _mm512_rorv_epi32(a, b).as_i32x16();
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, ror, zero))
}
-/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in a mask vector.
+/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rolv_epi64&expand=4712)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
- simd_bitmask::(simd_gt(a.as_u32x16(), b.as_u32x16()))
+#[cfg_attr(test, assert_instr(vprolvq))]
+pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vprolvq(a.as_i64x8(), b.as_i64x8()))
}
-/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rolv_epi64&expand=4710)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpgt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmpgt_epu32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vprolvq))]
+pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+ let rol = _mm512_rolv_epi64(a, b).as_i64x8();
+ transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
}
-/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
+/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rolv_epi64&expand=4711)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
- simd_bitmask::(simd_le(a.as_u32x16(), b.as_u32x16()))
+#[cfg_attr(test, assert_instr(vprolvq))]
+pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+ let rol = _mm512_rolv_epi64(a, b).as_i64x8();
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, rol, zero))
}
-/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi64&expand=4748)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmple_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmple_epu32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vprorvq))]
+pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vprorvq(a.as_i64x8(), b.as_i64x8()))
}
-/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
+/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi64&expand=4746)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
- simd_bitmask::(simd_ge(a.as_u32x16(), b.as_u32x16()))
+#[cfg_attr(test, assert_instr(vprorvq))]
+pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+ let ror = _mm512_rorv_epi64(a, b).as_i64x8();
+ transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
}
-/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi64&expand=4747)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpge_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmpge_epu32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vprorvq))]
+pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+ let ror = _mm512_rorv_epi64(a, b).as_i64x8();
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, ror, zero))
}
-/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sllv_epi32&expand=5342)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
- simd_bitmask::(simd_eq(a.as_u32x16(), b.as_u32x16()))
+#[cfg_attr(test, assert_instr(vpsllvd))]
+pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
+ transmute(vpsllvd(a.as_i32x16(), count.as_i32x16()))
}
-/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi32&expand=5340)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpeq_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmpeq_epu32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpsllvd))]
+pub unsafe fn _mm512_mask_sllv_epi32(
+ src: __m512i,
+ k: __mmask16,
+ a: __m512i,
+ count: __m512i,
+) -> __m512i {
+ let shf = _mm512_sllv_epi32(a, count).as_i32x16();
+ transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
-/// Compare packed unsigned 32-bit integers in a and b for inequality, and store the results in a mask vector.
+/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi32&expand=5341)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
- simd_bitmask::(simd_ne(a.as_u32x16(), b.as_u32x16()))
+#[cfg_attr(test, assert_instr(vpsllvd))]
+pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
+ let shf = _mm512_sllv_epi32(a, count).as_i32x16();
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, shf, zero))
}
-/// Compare packed unsigned 32-bit integers in a and b for inequality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu32_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srlv_epi32&expand=5554)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpneq_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmpneq_epu32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpsrlvd))]
+pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
+ transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16()))
}
-/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by op.
+/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu32_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srlv_epi32&expand=5552)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2)]
-#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
-pub unsafe fn _mm512_cmp_epu32_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask16 {
- let neg_one = -1;
- macro_rules! call {
- ($imm3:expr) => {
- vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one)
- };
- }
- let r = constify_imm3!(op, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(vpsrlvd))]
+pub unsafe fn _mm512_mask_srlv_epi32(
+ src: __m512i,
+ k: __mmask16,
+ a: __m512i,
+ count: __m512i,
+) -> __m512i {
+ let shf = _mm512_srlv_epi32(a, count).as_i32x16();
+ transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
-/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by op,
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu32_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi32&expand=5553)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3)]
-#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
-pub unsafe fn _mm512_mask_cmp_epu32_mask(
- m: __mmask16,
- a: __m512i,
- b: __m512i,
- op: _MM_CMPINT_ENUM,
-) -> __mmask16 {
- macro_rules! call {
- ($imm3:expr) => {
- vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, m as i16)
- };
- }
- let r = constify_imm3!(op, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(vpsrlvd))]
+pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
+ let shf = _mm512_srlv_epi32(a, count).as_i32x16();
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, shf, zero))
}
-/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector.
+/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sllv_epi64&expand=5351)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
- simd_bitmask::(simd_lt(a.as_i32x16(), b.as_i32x16()))
+#[cfg_attr(test, assert_instr(vpsllvq))]
+pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
+ transmute(vpsllvq(a.as_i64x8(), count.as_i64x8()))
}
-/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi64&expand=5349)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmplt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmplt_epi32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpsllvq))]
+pub unsafe fn _mm512_mask_sllv_epi64(
+ src: __m512i,
+ k: __mmask8,
+ a: __m512i,
+ count: __m512i,
+) -> __m512i {
+ let shf = _mm512_sllv_epi64(a, count).as_i64x8();
+ transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
-/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in a mask vector.
+/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi64&expand=5350)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
- simd_bitmask::(simd_gt(a.as_i32x16(), b.as_i32x16()))
+#[cfg_attr(test, assert_instr(vpsllvq))]
+pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
+ let shf = _mm512_sllv_epi64(a, count).as_i64x8();
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, shf, zero))
}
-/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srlv_epi64&expand=5563)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpgt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmpgt_epi32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpsrlvq))]
+pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
+ transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8()))
}
-/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
+/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mask_srlv_epi64&expand=5561)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
- simd_bitmask::(simd_le(a.as_i32x16(), b.as_i32x16()))
+#[cfg_attr(test, assert_instr(vpsrlvq))]
+pub unsafe fn _mm512_mask_srlv_epi64(
+ src: __m512i,
+ k: __mmask8,
+ a: __m512i,
+ count: __m512i,
+) -> __m512i {
+ let shf = _mm512_srlv_epi64(a, count).as_i64x8();
+ transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
-/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi64&expand=5562)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmple_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmple_epi32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpsrlvq))]
+pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
+ let shf = _mm512_srlv_epi64(a, count).as_i64x8();
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, shf, zero))
}
-/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
+/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi32&expand=272)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
- simd_bitmask::(simd_ge(a.as_i32x16(), b.as_i32x16()))
+#[cfg_attr(test, assert_instr(vpandq))]
+pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
+ transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
}
-/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Performs element-by-element bitwise AND between packed 32-bit integer elements of v2 and v3, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_and_epi32&expand=273)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpge_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmpge_epi32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpandd))]
+pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+ let and = _mm512_and_epi32(a, b).as_i32x16();
+ transmute(simd_select_bitmask(k, and, src.as_i32x16()))
}
-/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector.
+/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_and_epi32&expand=274)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
- simd_bitmask::(simd_eq(a.as_i32x16(), b.as_i32x16()))
+#[cfg_attr(test, assert_instr(vpandd))]
+pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+ let and = _mm512_and_epi32(a, b).as_i32x16();
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, and, zero))
}
-/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi64&expand=279)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpeq_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmpeq_epi32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpandq))]
+pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
+ transmute(simd_and(a.as_i64x8(), b.as_i64x8()))
}
-/// Compare packed signed 32-bit integers in a and b for inequality, and store the results in a mask vector.
+/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_and_epi64&expand=280)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
- simd_bitmask::(simd_ne(a.as_i32x16(), b.as_i32x16()))
+#[cfg_attr(test, assert_instr(vpandq))]
+pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+ let and = _mm512_and_epi64(a, b).as_i64x8();
+ transmute(simd_select_bitmask(k, and, src.as_i64x8()))
}
-/// Compare packed signed 32-bit integers in a and b for inequality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_and_Epi32&expand=274)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpneq_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmpneq_epi32_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpandq))]
+pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+ let and = _mm512_and_epi64(a, b).as_i64x8();
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, and, zero))
}
-/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by op.
+/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi32_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_si512&expand=302)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2)]
-#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
-pub unsafe fn _mm512_cmp_epi32_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask16 {
- let neg_one = -1;
- macro_rules! call {
- ($imm3:expr) => {
- vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one)
- };
- }
- let r = constify_imm3!(op, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(vpandq))]
+pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
+ transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
}
-/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by op,
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi32_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi32&expand=4042)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3)]
-#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
-pub unsafe fn _mm512_mask_cmp_epi32_mask(
- m: __mmask16,
- a: __m512i,
- b: __m512i,
- op: _MM_CMPINT_ENUM,
-) -> __mmask16 {
- macro_rules! call {
- ($imm3:expr) => {
- vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, m as i16)
- };
- }
- let r = constify_imm3!(op, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(vporq))]
+pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
+ transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
}
-/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_or_epi32&expand=4040)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
- simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8()))
+#[cfg_attr(test, assert_instr(vpord))]
+pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+ let or = _mm512_or_epi32(a, b).as_i32x16();
+ transmute(simd_select_bitmask(k, or, src.as_i32x16()))
}
-/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_or_epi32&expand=4041)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmplt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmplt_epu64_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpord))]
+pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+ let or = _mm512_or_epi32(a, b).as_i32x16();
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, or, zero))
}
-/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector.
+/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi64&expand=4051)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
- simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8()))
+#[cfg_attr(test, assert_instr(vporq))]
+pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
+ transmute(simd_or(a.as_i64x8(), b.as_i64x8()))
}
-/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_or_epi64&expand=4049)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpgt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmpgt_epu64_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vporq))]
+pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+ let or = _mm512_or_epi64(a, b).as_i64x8();
+ transmute(simd_select_bitmask(k, or, src.as_i64x8()))
}
-/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
+/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_or_epi64&expand=4050)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
- simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8()))
+#[cfg_attr(test, assert_instr(vporq))]
+pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+ let or = _mm512_or_epi64(a, b).as_i64x8();
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, or, zero))
}
-/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_si512&expand=4072)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmple_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmple_epu64_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vporq))]
+pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
+ transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
}
-/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
+/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi32&expand=6142)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
- simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8()))
+#[cfg_attr(test, assert_instr(vpxorq))]
+pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
+ transmute(simd_xor(a.as_i32x16(), b.as_i32x16()))
}
-/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_xor_epi32&expand=6140)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpge_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmpge_epu64_mask(b, a) & m
+#[cfg_attr(test, assert_instr(vpxord))]
+pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+ let xor = _mm512_xor_epi32(a, b).as_i32x16();
+ transmute(simd_select_bitmask(k, xor, src.as_i32x16()))
}
-/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector.
+/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_xor_epi32&expand=6141)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
- simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8()))
+#[cfg_attr(test, assert_instr(vpxord))]
+pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+ let xor = _mm512_xor_epi32(a, b).as_i32x16();
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, xor, zero))
}
-/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi64&expand=6151)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpeq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmpeq_epu64_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpxorq))]
+pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
+ transmute(simd_xor(a.as_i64x8(), b.as_i64x8()))
}
-/// Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector.
+/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_xor_epi64&expand=6149)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
- simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8()))
+#[cfg_attr(test, assert_instr(vpxorq))]
+pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+ let xor = _mm512_xor_epi64(a, b).as_i64x8();
+ transmute(simd_select_bitmask(k, xor, src.as_i64x8()))
}
-/// Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu64_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_xor_epi64&expand=6150)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpneq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmpneq_epu64_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vpxorq))]
+pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+ let xor = _mm512_xor_epi64(a, b).as_i64x8();
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, xor, zero))
}
-/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op.
+/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_si512&expand=6172)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2)]
-#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
-pub unsafe fn _mm512_cmp_epu64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask8 {
- let neg_one = -1;
- macro_rules! call {
- ($imm3:expr) => {
- vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one)
- };
- }
- let r = constify_imm3!(op, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(vpxorq))]
+pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
+ transmute(simd_xor(a.as_i32x16(), b.as_i32x16()))
}
-/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op,
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kand_mask16&expand=3212)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3)]
-#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
-pub unsafe fn _mm512_mask_cmp_epu64_mask(
- m: __mmask8,
- a: __m512i,
- b: __m512i,
- op: _MM_CMPINT_ENUM,
-) -> __mmask8 {
- macro_rules! call {
- ($imm3:expr) => {
- vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, m as i8)
- };
- }
- let r = constify_imm3!(op, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
+pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
+ transmute(kandw(a, b))
}
-/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector.
+/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kand&expand=3210)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
- simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8()))
+#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
+pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
+ transmute(kandw(a, b))
}
-/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kor_mask16&expand=3239)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmplt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmplt_epi64_mask(a, b) & m
+#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
+pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
+ transmute(korw(a, b))
}
-/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in a mask vector.
+/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kor&expand=3237)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
- simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8()))
+#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
+pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
+ transmute(korw(a, b))
}
-/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kxor_mask16&expand=3291)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpgt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmpgt_epi64_mask(a, b) & m
+#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
+pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
+ transmute(kxorw(a, b))
}
-/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
+/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kxor&expand=3289)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
- simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8()))
+#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
+pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
+ transmute(kxorw(a, b))
}
-/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Sets packed 32-bit integers in `dst` with the supplied values.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi64)
+/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmple_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmple_epi64_mask(a, b) & m
+pub unsafe fn _mm512_set_ps(
+ e0: f32,
+ e1: f32,
+ e2: f32,
+ e3: f32,
+ e4: f32,
+ e5: f32,
+ e6: f32,
+ e7: f32,
+ e8: f32,
+ e9: f32,
+ e10: f32,
+ e11: f32,
+ e12: f32,
+ e13: f32,
+ e14: f32,
+ e15: f32,
+) -> __m512 {
+ _mm512_setr_ps(
+ e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
+ )
}
-/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
+/// Sets packed 32-bit integers in `dst` with the supplied values in
+/// reverse order.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi64)
+/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
- simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8()))
+pub unsafe fn _mm512_setr_ps(
+ e0: f32,
+ e1: f32,
+ e2: f32,
+ e3: f32,
+ e4: f32,
+ e5: f32,
+ e6: f32,
+ e7: f32,
+ e8: f32,
+ e9: f32,
+ e10: f32,
+ e11: f32,
+ e12: f32,
+ e13: f32,
+ e14: f32,
+ e15: f32,
+) -> __m512 {
+ let r = f32x16::new(
+ e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
+ );
+ transmute(r)
}
-/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi64)
+/// Broadcast 64-bit float `a` to all elements of `dst`.
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpge_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmpge_epi64_mask(b, a) & m
+pub unsafe fn _mm512_set1_pd(a: f64) -> __m512d {
+ transmute(f64x8::splat(a))
}
-/// Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi64)
+/// Broadcast 32-bit float `a` to all elements of `dst`.
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
- simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8()))
+pub unsafe fn _mm512_set1_ps(a: f32) -> __m512 {
+ transmute(f32x16::splat(a))
}
-/// Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector k
+/// Sets packed 32-bit integers in `dst` with the supplied values.
+#[inline]
+#[target_feature(enable = "avx512f")]
+pub unsafe fn _mm512_set_epi32(
+ e15: i32,
+ e14: i32,
+ e13: i32,
+ e12: i32,
+ e11: i32,
+ e10: i32,
+ e9: i32,
+ e8: i32,
+ e7: i32,
+ e6: i32,
+ e5: i32,
+ e4: i32,
+ e3: i32,
+ e2: i32,
+ e1: i32,
+ e0: i32,
+) -> __m512i {
+ _mm512_setr_epi32(
+ e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
+ )
+}
+
+/// Broadcast 32-bit integer `a` to all elements of `dst`.
+#[inline]
+#[target_feature(enable = "avx512f")]
+pub unsafe fn _mm512_set1_epi32(a: i32) -> __m512i {
+ transmute(i32x16::splat(a))
+}
+
+/// Broadcast 64-bit integer `a` to all elements of `dst`.
+#[inline]
+#[target_feature(enable = "avx512f")]
+pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
+ transmute(i64x8::splat(a))
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_ps)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
+ _mm512_cmp_ps_mask(a, b, _CMP_LT_OS)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_ps)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpeq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmpeq_epi64_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmplt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
+ _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LT_OS)
}
-/// Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector.
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnlt_ps)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
- simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8()))
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
+ _mm512_cmp_ps_mask(a, b, _CMP_NLT_US)
}
-/// Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector k
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnlt_ps)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmpneq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmpneq_epi64_mask(a, b) & m
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmpnlt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
+ _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLT_US)
}
-/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op.
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_ps)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(2)]
-#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
-pub unsafe fn _mm512_cmp_epi64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask8 {
- let neg_one = -1;
- macro_rules! call {
- ($imm3:expr) => {
- vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one)
- };
- }
- let r = constify_imm3!(op, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
+ _mm512_cmp_ps_mask(a, b, _CMP_LE_OS)
}
-/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op,
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64_mask)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_ps)
#[inline]
#[target_feature(enable = "avx512f")]
-#[rustc_args_required_const(3)]
-#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
-pub unsafe fn _mm512_mask_cmp_epi64_mask(
- m: __mmask8,
- a: __m512i,
- b: __m512i,
- op: _MM_CMPINT_ENUM,
-) -> __mmask8 {
- macro_rules! call {
- ($imm3:expr) => {
- vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, m as i8)
- };
- }
- let r = constify_imm3!(op, call);
- transmute(r)
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmple_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
+ _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LE_OS)
}
-/// Returns vector of type `__m512d` with undefined elements.
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnle_ps)
#[inline]
#[target_feature(enable = "avx512f")]
-// This intrinsic has no corresponding instruction.
-pub unsafe fn _mm512_undefined_pd() -> __m512d {
- _mm512_set1_pd(0.0)
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
+ _mm512_cmp_ps_mask(a, b, _CMP_NLE_US)
}
-/// Returns vector of type `__m512` with undefined elements.
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnle_ps)
#[inline]
#[target_feature(enable = "avx512f")]
-// This intrinsic has no corresponding instruction.
-pub unsafe fn _mm512_undefined_ps() -> __m512 {
- _mm512_set1_ps(0.0)
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmpnle_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
+ _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLE_US)
}
-/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
-/// floating-point elements) from memory into result.
-/// `mem_addr` does not need to be aligned on any particular boundary.
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_ps)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vmovups))]
-pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
- ptr::read_unaligned(mem_addr as *const __m512d)
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
+ _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ)
}
-/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
-/// floating-point elements) from `a` into memory.
-/// `mem_addr` does not need to be aligned on any particular boundary.
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_ps)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vmovups))]
-pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
- ptr::write_unaligned(mem_addr as *mut __m512d, a);
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmpeq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
+ _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ)
}
-/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
-/// floating-point elements) from memory into result.
-/// `mem_addr` does not need to be aligned on any particular boundary.
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_ps)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vmovups))]
-pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
- ptr::read_unaligned(mem_addr as *const __m512)
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
+ _mm512_cmp_ps_mask(a, b, _CMP_NEQ_UQ)
}
-/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
-/// floating-point elements) from `a` into memory.
-/// `mem_addr` does not need to be aligned on any particular boundary.
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ps)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_ps_mask)
#[inline]
#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vmovups))]
-#[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
- ptr::write_unaligned(mem_addr as *mut __m512, a);
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmpneq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
+ _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_UQ)
}
-/// Sets packed 64-bit integers in `dst` with the supplied values in
-/// reverse order.
+/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op.
///
-/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask)
#[inline]
#[target_feature(enable = "avx512f")]
-pub unsafe fn _mm512_setr_pd(
- e0: f64,
- e1: f64,
- e2: f64,
- e3: f64,
- e4: f64,
- e5: f64,
- e6: f64,
- e7: f64,
-) -> __m512d {
- let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_cmp_ps_mask(a: __m512, b: __m512, op: i32) -> __mmask16 {
+ let neg_one = -1;
+ macro_rules! call {
+ ($imm5:expr) => {
+ vcmpps(
+ a.as_f32x16(),
+ b.as_f32x16(),
+ $imm5,
+ neg_one,
+ _MM_FROUND_CUR_DIRECTION,
+ )
+ };
+ }
+ let r = constify_imm5!(op, call);
transmute(r)
}
-/// Sets packed 64-bit integers in `dst` with the supplied values.
+/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op,
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask)
#[inline]
#[target_feature(enable = "avx512f")]
-pub unsafe fn _mm512_set_pd(
- e0: f64,
- e1: f64,
- e2: f64,
- e3: f64,
- e4: f64,
- e5: f64,
- e6: f64,
- e7: f64,
-) -> __m512d {
- _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
-}
-
-/// Equal
-pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
-/// Less-than
-pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
-/// Less-than-or-equal
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmp_ps_mask(m: __mmask16, a: __m512, b: __m512, op: i32) -> __mmask16 {
+ macro_rules! call {
+ ($imm5:expr) => {
+ vcmpps(
+ a.as_f32x16(),
+ b.as_f32x16(),
+ $imm5,
+ m as i16,
+ _MM_FROUND_CUR_DIRECTION,
+ )
+ };
+ }
+ let r = constify_imm5!(op, call);
+ transmute(r)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_ps_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2, 3)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm512_cmp_round_ps_mask(a: __m512, b: __m512, op: i32, sae: i32) -> __mmask16 {
+ let neg_one = -1;
+ macro_rules! call {
+ ($imm5:expr, $imm4:expr) => {
+ vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, neg_one, $imm4)
+ };
+ }
+ let r = constify_imm5_sae!(op, sae, call);
+ transmute(r)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op,
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_ps_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3, 4)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm512_mask_cmp_round_ps_mask(
+ m: __mmask16,
+ a: __m512,
+ b: __m512,
+ op: i32,
+ sae: i32,
+) -> __mmask16 {
+ macro_rules! call {
+ ($imm5:expr, $imm4:expr) => {
+ vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, m as i16, $imm4)
+ };
+ }
+ let r = constify_imm5_sae!(op, sae, call);
+ transmute(r)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_ps_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
+ _mm512_cmp_ps_mask(a, b, _CMP_ORD_Q)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_ps_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmpord_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
+ _mm512_mask_cmp_ps_mask(m, a, b, _CMP_ORD_Q)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_ps_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
+ _mm512_cmp_ps_mask(a, b, _CMP_UNORD_Q)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_ps_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmpunord_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
+ _mm512_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_Q)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
+ _mm512_cmp_pd_mask(a, b, _CMP_LT_OS)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmplt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
+ _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LT_OS)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnlt_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
+ _mm512_cmp_pd_mask(a, b, _CMP_NLT_US)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnlt_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
+ _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
+ _mm512_cmp_pd_mask(a, b, _CMP_LE_OS)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmple_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
+ _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LE_OS)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnle_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
+ _mm512_cmp_pd_mask(a, b, _CMP_NLE_US)
+}
+
+/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnle_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmpnle_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
+ _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLE_US)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
+ _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmpeq_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
+ _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OQ)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b for inequality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
+ _mm512_cmp_pd_mask(a, b, _CMP_NEQ_UQ)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_pd_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp))]
+pub unsafe fn _mm512_mask_cmpneq_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
+ _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_UQ)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_pd_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_cmp_pd_mask(a: __m512d, b: __m512d, op: i32) -> __mmask8 {
+ let neg_one = -1;
+ macro_rules! call {
+ ($imm5:expr) => {
+ vcmppd(
+ a.as_f64x8(),
+ b.as_f64x8(),
+ $imm5,
+ neg_one,
+ _MM_FROUND_CUR_DIRECTION,
+ )
+ };
+ }
+ let r = constify_imm5!(op, call);
+ transmute(r)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op,
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_pd_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmp_pd_mask(m: __mmask8, a: __m512d, b: __m512d, op: i32) -> __mmask8 {
+ macro_rules! call {
+ ($imm5:expr) => {
+ vcmppd(
+ a.as_f64x8(),
+ b.as_f64x8(),
+ $imm5,
+ m as i8,
+ _MM_FROUND_CUR_DIRECTION,
+ )
+ };
+ }
+ let r = constify_imm5!(op, call);
+ transmute(r)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_pd_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2, 3)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm512_cmp_round_pd_mask(a: __m512d, b: __m512d, op: i32, sae: i32) -> __mmask8 {
+ let neg_one = -1;
+ macro_rules! call {
+ ($imm5:expr, $imm4:expr) => {
+ vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, neg_one, $imm4)
+ };
+ }
+ let r = constify_imm5_sae!(op, sae, call);
+ transmute(r)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op,
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_pd_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3, 4)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm512_mask_cmp_round_pd_mask(
+ m: __mmask8,
+ a: __m512d,
+ b: __m512d,
+ op: i32,
+ sae: i32,
+) -> __mmask8 {
+ macro_rules! call {
+ ($imm5:expr, $imm4:expr) => {
+ vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, m as i8, $imm4)
+ };
+ }
+ let r = constify_imm5_sae!(op, sae, call);
+ transmute(r)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_pd_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
+ _mm512_cmp_pd_mask(a, b, _CMP_ORD_Q)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_pd_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmpord_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
+ _mm512_mask_cmp_pd_mask(m, a, b, _CMP_ORD_Q)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_pd_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
+ _mm512_cmp_pd_mask(a, b, _CMP_UNORD_Q)
+}
+
+/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_pd_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmpunord_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
+ _mm512_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_Q)
+}
+
+/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ss_mask&expand=5236,755,757)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm_cmp_ss_mask(a: __m128, b: __m128, op: i32) -> __mmask8 {
+ let neg_one = -1;
+ macro_rules! call {
+ ($imm5:expr) => {
+ vcmpss(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION)
+ };
+ }
+ let r = constify_imm5!(op, call);
+ transmute(r)
+}
+
+/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ss_mask&expand=5236,755,757)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm_mask_cmp_ss_mask(m: __mmask8, a: __m128, b: __m128, op: i32) -> __mmask8 {
+ macro_rules! call {
+ ($imm5:expr) => {
+ vcmpss(a, b, $imm5, m as i8, _MM_FROUND_CUR_DIRECTION)
+ };
+ }
+ let r = constify_imm5!(op, call);
+ transmute(r)
+}
+
+/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_ss_mask&expand=5236,755,757)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2, 3)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm_cmp_round_ss_mask(a: __m128, b: __m128, op: i32, sae: i32) -> __mmask8 {
+ let neg_one = -1;
+ macro_rules! call {
+ ($imm5:expr, $imm4:expr) => {
+ vcmpss(a, b, $imm5, neg_one, $imm4)
+ };
+ }
+ let r = constify_imm5_sae!(op, sae, call);
+ transmute(r)
+}
+
+/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_ss_mask&expand=5236,755,757)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3, 4)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm_mask_cmp_round_ss_mask(
+ m: __mmask8,
+ a: __m128,
+ b: __m128,
+ op: i32,
+ sae: i32,
+) -> __mmask8 {
+ macro_rules! call {
+ ($imm5:expr, $imm4:expr) => {
+ vcmpss(a, b, $imm5, m as i8, $imm4)
+ };
+ }
+ let r = constify_imm5_sae!(op, sae, call);
+ transmute(r)
+}
+
+/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sd_mask&expand=5236,755,757)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm_cmp_sd_mask(a: __m128d, b: __m128d, op: i32) -> __mmask8 {
+ let neg_one = -1;
+ macro_rules! call {
+ ($imm5:expr) => {
+ vcmpsd(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION)
+ };
+ }
+ let r = constify_imm5!(op, call);
+ transmute(r)
+}
+
+/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sd_mask&expand=5236,755,757)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm_mask_cmp_sd_mask(m: __mmask8, a: __m128d, b: __m128d, op: i32) -> __mmask8 {
+ macro_rules! call {
+ ($imm5:expr) => {
+ vcmpsd(a, b, $imm5, m as i8, _MM_FROUND_CUR_DIRECTION)
+ };
+ }
+ let r = constify_imm5!(op, call);
+ transmute(r)
+}
+
+/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sd_mask&expand=5236,755,757)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2, 3)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm_cmp_round_sd_mask(a: __m128d, b: __m128d, op: i32, sae: i32) -> __mmask8 {
+ let neg_one = -1;
+ macro_rules! call {
+ ($imm5:expr, $imm4:expr) => {
+ vcmpsd(a, b, $imm5, neg_one, $imm4)
+ };
+ }
+ let r = constify_imm5_sae!(op, sae, call);
+ transmute(r)
+}
+
+/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sd_mask&expand=5236,755,757)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3, 4)]
+#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
+pub unsafe fn _mm_mask_cmp_round_sd_mask(
+ m: __mmask8,
+ a: __m128d,
+ b: __m128d,
+ op: i32,
+ sae: i32,
+) -> __mmask8 {
+ macro_rules! call {
+ ($imm5:expr, $imm4:expr) => {
+ vcmpsd(a, b, $imm5, m as i8, $imm4)
+ };
+ }
+ let r = constify_imm5_sae!(op, sae, call);
+ transmute(r)
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+ simd_bitmask::(simd_lt(a.as_u32x16(), b.as_u32x16()))
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmplt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+ _mm512_cmplt_epu32_mask(a, b) & m
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+ simd_bitmask::(simd_gt(a.as_u32x16(), b.as_u32x16()))
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpgt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+ _mm512_cmpgt_epu32_mask(a, b) & m
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+ simd_bitmask::(simd_le(a.as_u32x16(), b.as_u32x16()))
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmple_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+ _mm512_cmple_epu32_mask(a, b) & m
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+ simd_bitmask::(simd_ge(a.as_u32x16(), b.as_u32x16()))
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpge_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+ _mm512_cmpge_epu32_mask(a, b) & m
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+ simd_bitmask::(simd_eq(a.as_u32x16(), b.as_u32x16()))
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpeq_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+ _mm512_cmpeq_epu32_mask(a, b) & m
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for inequality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+ simd_bitmask::(simd_ne(a.as_u32x16(), b.as_u32x16()))
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for inequality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu32_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpneq_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+ _mm512_cmpneq_epu32_mask(a, b) & m
+}
+
+/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by op.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu32_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_cmp_epu32_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask16 {
+ let neg_one = -1;
+ macro_rules! call {
+ ($imm3:expr) => {
+ vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one)
+ };
+ }
+ let r = constify_imm3!(op, call);
+ transmute(r)
+}
+
+/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by op,
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu32_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmp_epu32_mask(
+ m: __mmask16,
+ a: __m512i,
+ b: __m512i,
+ op: _MM_CMPINT_ENUM,
+) -> __mmask16 {
+ macro_rules! call {
+ ($imm3:expr) => {
+ vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, m as i16)
+ };
+ }
+ let r = constify_imm3!(op, call);
+ transmute(r)
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+ simd_bitmask::(simd_lt(a.as_i32x16(), b.as_i32x16()))
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmplt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+ _mm512_cmplt_epi32_mask(a, b) & m
+}
+
+/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+ simd_bitmask::(simd_gt(a.as_i32x16(), b.as_i32x16()))
+}
+
+/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpgt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+ _mm512_cmpgt_epi32_mask(a, b) & m
+}
+
+/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+ simd_bitmask::(simd_le(a.as_i32x16(), b.as_i32x16()))
+}
+
+/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmple_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+ _mm512_cmple_epi32_mask(a, b) & m
+}
+
+/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+ simd_bitmask::(simd_ge(a.as_i32x16(), b.as_i32x16()))
+}
+
+/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpge_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+ _mm512_cmpge_epi32_mask(a, b) & m
+}
+
+/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+ simd_bitmask::(simd_eq(a.as_i32x16(), b.as_i32x16()))
+}
+
+/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpeq_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+ _mm512_cmpeq_epi32_mask(a, b) & m
+}
+
+/// Compare packed signed 32-bit integers in a and b for inequality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+ simd_bitmask::(simd_ne(a.as_i32x16(), b.as_i32x16()))
+}
+
+/// Compare packed signed 32-bit integers in a and b for inequality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpneq_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+ _mm512_cmpneq_epi32_mask(a, b) & m
+}
+
+/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by op.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi32_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_cmp_epi32_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask16 {
+ let neg_one = -1;
+ macro_rules! call {
+ ($imm3:expr) => {
+ vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one)
+ };
+ }
+ let r = constify_imm3!(op, call);
+ transmute(r)
+}
+
+/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by op,
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi32_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmp_epi32_mask(
+ m: __mmask16,
+ a: __m512i,
+ b: __m512i,
+ op: _MM_CMPINT_ENUM,
+) -> __mmask16 {
+ macro_rules! call {
+ ($imm3:expr) => {
+ vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, m as i16)
+ };
+ }
+ let r = constify_imm3!(op, call);
+ transmute(r)
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+ simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8()))
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmplt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+ _mm512_cmplt_epu64_mask(a, b) & m
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+ simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8()))
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpgt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+ _mm512_cmpgt_epu64_mask(a, b) & m
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+ simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8()))
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmple_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+ _mm512_cmple_epu64_mask(a, b) & m
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+ simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8()))
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpge_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+ _mm512_cmpge_epu64_mask(b, a) & m
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+ simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8()))
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpeq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+ _mm512_cmpeq_epu64_mask(a, b) & m
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+ simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8()))
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu64_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpneq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+ _mm512_cmpneq_epu64_mask(a, b) & m
+}
+
+/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_cmp_epu64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask8 {
+ let neg_one = -1;
+ macro_rules! call {
+ ($imm3:expr) => {
+ vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one)
+ };
+ }
+ let r = constify_imm3!(op, call);
+ transmute(r)
+}
+
+/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op,
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmp_epu64_mask(
+ m: __mmask8,
+ a: __m512i,
+ b: __m512i,
+ op: _MM_CMPINT_ENUM,
+) -> __mmask8 {
+ macro_rules! call {
+ ($imm3:expr) => {
+ vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, m as i8)
+ };
+ }
+ let r = constify_imm3!(op, call);
+ transmute(r)
+}
+
+/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+ simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8()))
+}
+
+/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmplt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+ _mm512_cmplt_epi64_mask(a, b) & m
+}
+
+/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+ simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8()))
+}
+
+/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpgt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+ _mm512_cmpgt_epi64_mask(a, b) & m
+}
+
+/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+ simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8()))
+}
+
+/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmple_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+ _mm512_cmple_epi64_mask(a, b) & m
+}
+
+/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+ simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8()))
+}
+
+/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpge_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+ _mm512_cmpge_epi64_mask(b, a) & m
+}
+
+/// Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+ simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8()))
+}
+
+/// Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpeq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+ _mm512_cmpeq_epi64_mask(a, b) & m
+}
+
+/// Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+ simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8()))
+}
+
+/// Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpneq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+ _mm512_cmpneq_epi64_mask(a, b) & m
+}
+
+/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_cmp_epi64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask8 {
+ let neg_one = -1;
+ macro_rules! call {
+ ($imm3:expr) => {
+ vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one)
+ };
+ }
+ let r = constify_imm3!(op, call);
+ transmute(r)
+}
+
+/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op,
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmp_epi64_mask(
+ m: __mmask8,
+ a: __m512i,
+ b: __m512i,
+ op: _MM_CMPINT_ENUM,
+) -> __mmask8 {
+ macro_rules! call {
+ ($imm3:expr) => {
+ vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, m as i8)
+ };
+ }
+ let r = constify_imm3!(op, call);
+ transmute(r)
+}
+
+/// Returns vector of type `__m512d` with undefined elements.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+// This intrinsic has no corresponding instruction.
+pub unsafe fn _mm512_undefined_pd() -> __m512d {
+ _mm512_set1_pd(0.0)
+}
+
+/// Returns vector of type `__m512` with undefined elements.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ps)
+#[inline]
+#[target_feature(enable = "avx512f")]
+// This intrinsic has no corresponding instruction.
+pub unsafe fn _mm512_undefined_ps() -> __m512 {
+ _mm512_set1_ps(0.0)
+}
+
+/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
+/// floating-point elements) from memory into result.
+/// `mem_addr` does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vmovups))]
+pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
+ ptr::read_unaligned(mem_addr as *const __m512d)
+}
+
+/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
+/// floating-point elements) from `a` into memory.
+/// `mem_addr` does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vmovups))]
+pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
+ ptr::write_unaligned(mem_addr as *mut __m512d, a);
+}
+
+/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
+/// floating-point elements) from memory into result.
+/// `mem_addr` does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ps)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vmovups))]
+pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
+ ptr::read_unaligned(mem_addr as *const __m512)
+}
+
+/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
+/// floating-point elements) from `a` into memory.
+/// `mem_addr` does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ps)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vmovups))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
+ ptr::write_unaligned(mem_addr as *mut __m512, a);
+}
+
+/// Sets packed 64-bit integers in `dst` with the supplied values in
+/// reverse order.
+///
+/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+pub unsafe fn _mm512_setr_pd(
+ e0: f64,
+ e1: f64,
+ e2: f64,
+ e3: f64,
+ e4: f64,
+ e5: f64,
+ e6: f64,
+ e7: f64,
+) -> __m512d {
+ let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
+ transmute(r)
+}
+
+/// Sets packed 64-bit integers in `dst` with the supplied values.
+///
+/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+pub unsafe fn _mm512_set_pd(
+ e0: f64,
+ e1: f64,
+ e2: f64,
+ e3: f64,
+ e4: f64,
+ e5: f64,
+ e6: f64,
+ e7: f64,
+) -> __m512d {
+ _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
+}
+
+/// Equal
+pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
+/// Less-than
+pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
+/// Less-than-or-equal
pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
/// False
pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
@@ -2422,1034 +3736,2399 @@ pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
/// True
pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
-#[allow(improper_ctypes)]
-extern "C" {
- #[link_name = "llvm.x86.avx512.gather.dpd.512"]
- fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
- #[link_name = "llvm.x86.avx512.gather.dps.512"]
- fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
- #[link_name = "llvm.x86.avx512.gather.qpd.512"]
- fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
- #[link_name = "llvm.x86.avx512.gather.qps.512"]
- fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
- #[link_name = "llvm.x86.avx512.gather.dpq.512"]
- fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
- #[link_name = "llvm.x86.avx512.gather.dpi.512"]
- fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
- #[link_name = "llvm.x86.avx512.gather.qpq.512"]
- fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
- #[link_name = "llvm.x86.avx512.gather.qpi.512"]
- fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.x86.avx512.gather.dpd.512"]
+ fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
+ #[link_name = "llvm.x86.avx512.gather.dps.512"]
+ fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
+ #[link_name = "llvm.x86.avx512.gather.qpd.512"]
+ fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
+ #[link_name = "llvm.x86.avx512.gather.qps.512"]
+ fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
+ #[link_name = "llvm.x86.avx512.gather.dpq.512"]
+ fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
+ #[link_name = "llvm.x86.avx512.gather.dpi.512"]
+ fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
+ #[link_name = "llvm.x86.avx512.gather.qpq.512"]
+ fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
+ #[link_name = "llvm.x86.avx512.gather.qpi.512"]
+ fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
+
+ #[link_name = "llvm.x86.avx512.scatter.dpd.512"]
+ fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
+ #[link_name = "llvm.x86.avx512.scatter.dps.512"]
+ fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
+ #[link_name = "llvm.x86.avx512.scatter.qpd.512"]
+ fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
+ #[link_name = "llvm.x86.avx512.scatter.qps.512"]
+ fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
+ #[link_name = "llvm.x86.avx512.scatter.dpq.512"]
+ fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
+ #[link_name = "llvm.x86.avx512.scatter.dpi.512"]
+ fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
+ #[link_name = "llvm.x86.avx512.scatter.qpq.512"]
+ fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
+ #[link_name = "llvm.x86.avx512.scatter.qpi.512"]
+ fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
+
+ #[link_name = "llvm.x86.avx512.mask.cmp.ss"]
+ fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
+ #[link_name = "llvm.x86.avx512.mask.cmp.sd"]
+ fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
+ #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
+ fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
+ #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
+ fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
+ #[link_name = "llvm.x86.avx512.mask.ucmp.q.512"]
+ fn vpcmpuq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
+ #[link_name = "llvm.x86.avx512.mask.cmp.q.512"]
+ fn vpcmpq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
+ #[link_name = "llvm.x86.avx512.mask.ucmp.d.512"]
+ fn vpcmpud(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
+ #[link_name = "llvm.x86.avx512.mask.cmp.d.512"]
+ fn vpcmpd(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
+
+ #[link_name = "llvm.x86.avx512.mask.prol.d.512"]
+ fn vprold(a: i32x16, i8: i32) -> i32x16;
+ #[link_name = "llvm.x86.avx512.mask.pror.d.512"]
+ fn vprord(a: i32x16, i8: i32) -> i32x16;
+ #[link_name = "llvm.x86.avx512.mask.prol.q.512"]
+ fn vprolq(a: i64x8, i8: i32) -> i64x8;
+ #[link_name = "llvm.x86.avx512.mask.pror.q.512"]
+ fn vprorq(a: i64x8, i8: i32) -> i64x8;
+
+ #[link_name = "llvm.x86.avx512.mask.prolv.d.512"]
+ fn vprolvd(a: i32x16, b: i32x16) -> i32x16;
+ #[link_name = "llvm.x86.avx512.mask.prorv.d.512"]
+ fn vprorvd(a: i32x16, b: i32x16) -> i32x16;
+ #[link_name = "llvm.x86.avx512.mask.prolv.q.512"]
+ fn vprolvq(a: i64x8, b: i64x8) -> i64x8;
+ #[link_name = "llvm.x86.avx512.mask.prorv.q.512"]
+ fn vprorvq(a: i64x8, b: i64x8) -> i64x8;
+
+ #[link_name = "llvm.x86.avx512.psllv.d.512"]
+ fn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
+ #[link_name = "llvm.x86.avx512.psrlv.d.512"]
+ fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
+ #[link_name = "llvm.x86.avx512.psllv.q.512"]
+ fn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
+ #[link_name = "llvm.x86.avx512.psrlv.q.512"]
+ fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
+
+ #[link_name = "llvm.x86.avx512.pslli.d.512"]
+ fn vpsllid(a: i32x16, imm8: u32) -> i32x16;
+ #[link_name = "llvm.x86.avx512.psrli.d.512"]
+ fn vpsrlid(a: i32x16, imm8: u32) -> i32x16;
+ #[link_name = "llvm.x86.avx512.pslli.q.512"]
+ fn vpslliq(a: i64x8, imm8: u32) -> i64x8;
+ #[link_name = "llvm.x86.avx512.psrli.q.512"]
+ fn vpsrliq(a: i64x8, imm8: u32) -> i64x8;
+
+ #[link_name = "llvm.x86.avx512.psll.d.512"]
+ fn vpslld(a: i32x16, count: i32x4) -> i32x16;
+ #[link_name = "llvm.x86.avx512.psrl.d.512"]
+ fn vpsrld(a: i32x16, count: i32x4) -> i32x16;
+ #[link_name = "llvm.x86.avx512.psll.q.512"]
+ fn vpsllq(a: i64x8, count: i64x2) -> i64x8;
+ #[link_name = "llvm.x86.avx512.psrl.q.512"]
+ fn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
+
+ #[link_name = "llvm.x86.avx512.psra.d.512"]
+ fn vpsrad(a: i32x16, count: i32x4) -> i32x16;
+ #[link_name = "llvm.x86.avx512.psra.q.512"]
+ fn vpsraq(a: i64x8, count: i64x2) -> i64x8;
+
+ #[link_name = "llvm.x86.avx512.psrai.d.512"]
+ fn vpsraid(a: i32x16, imm8: u32) -> i32x16;
+ #[link_name = "llvm.x86.avx512.psrai.q.512"]
+ fn vpsraiq(a: i64x8, imm8: u32) -> i64x8;
+
+ #[link_name = "llvm.x86.avx512.psrav.d.512"]
+ fn vpsravd(a: i32x16, count: i32x16) -> i32x16;
+ #[link_name = "llvm.x86.avx512.psrav.q.512"]
+ fn vpsravq(a: i64x8, count: i64x8) -> i64x8;
+
+ #[link_name = "llvm.x86.avx512.kand.w"]
+ fn kandw(ma: u16, mb: u16) -> u16;
+ #[link_name = "llvm.x86.avx512.kor.w"]
+ fn korw(ma: u16, mb: u16) -> u16;
+ #[link_name = "llvm.x86.avx512.kxor.w"]
+ fn kxorw(ma: u16, mb: u16) -> u16;
+}
+
+#[cfg(test)]
+mod tests {
+ use std;
+ use stdarch_test::simd_test;
+
+ use crate::core_arch::x86::*;
+ use crate::hint::black_box;
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_abs_epi32() {
+ #[rustfmt::skip]
+ let a = _mm512_setr_epi32(
+ 0, 1, -1, i32::MAX,
+ i32::MIN, 100, -100, -32,
+ 0, 1, -1, i32::MAX,
+ i32::MIN, 100, -100, -32,
+ );
+ let r = _mm512_abs_epi32(a);
+ let e = _mm512_setr_epi32(
+ 0,
+ 1,
+ 1,
+ i32::MAX,
+ i32::MAX.wrapping_add(1),
+ 100,
+ 100,
+ 32,
+ 0,
+ 1,
+ 1,
+ i32::MAX,
+ i32::MAX.wrapping_add(1),
+ 100,
+ 100,
+ 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_abs_epi32() {
+ #[rustfmt::skip]
+ let a = _mm512_setr_epi32(
+ 0, 1, -1, i32::MAX,
+ i32::MIN, 100, -100, -32,
+ 0, 1, -1, i32::MAX,
+ i32::MIN, 100, -100, -32,
+ );
+ let r = _mm512_mask_abs_epi32(a, 0, a);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_abs_epi32(a, 0b11111111, a);
+ let e = _mm512_setr_epi32(
+ 0,
+ 1,
+ 1,
+ i32::MAX,
+ i32::MAX.wrapping_add(1),
+ 100,
+ 100,
+ 32,
+ 0,
+ 1,
+ -1,
+ i32::MAX,
+ i32::MIN,
+ 100,
+ -100,
+ -32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_abs_epi32() {
+ #[rustfmt::skip]
+ let a = _mm512_setr_epi32(
+ 0, 1, -1, i32::MAX,
+ i32::MIN, 100, -100, -32,
+ 0, 1, -1, i32::MAX,
+ i32::MIN, 100, -100, -32,
+ );
+ let r = _mm512_maskz_abs_epi32(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_abs_epi32(0b11111111, a);
+ let e = _mm512_setr_epi32(
+ 0,
+ 1,
+ 1,
+ i32::MAX,
+ i32::MAX.wrapping_add(1),
+ 100,
+ 100,
+ 32,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_i32gather_ps() {
+ let mut arr = [0f32; 256];
+ for i in 0..256 {
+ arr[i] = i as f32;
+ }
+ // A multiplier of 4 is word-addressing
+ #[rustfmt::skip]
+ let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
+ 120, 128, 136, 144, 152, 160, 168, 176);
+ let r = _mm512_i32gather_ps(index, arr.as_ptr() as *const u8, 4);
+ #[rustfmt::skip]
+ assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
+ 120., 128., 136., 144., 152., 160., 168., 176.));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_i32gather_ps() {
+ let mut arr = [0f32; 256];
+ for i in 0..256 {
+ arr[i] = i as f32;
+ }
+ let src = _mm512_set1_ps(2.);
+ let mask = 0b10101010_10101010;
+ #[rustfmt::skip]
+ let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
+ 120, 128, 136, 144, 152, 160, 168, 176);
+ // A multiplier of 4 is word-addressing
+ let r = _mm512_mask_i32gather_ps(src, mask, index, arr.as_ptr() as *const u8, 4);
+ #[rustfmt::skip]
+ assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
+ 2., 128., 2., 144., 2., 160., 2., 176.));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_i32gather_epi32() {
+ let mut arr = [0i32; 256];
+ for i in 0..256 {
+ arr[i] = i as i32;
+ }
+ // A multiplier of 4 is word-addressing
+ #[rustfmt::skip]
+ let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
+ 120, 128, 136, 144, 152, 160, 168, 176);
+ let r = _mm512_i32gather_epi32(index, arr.as_ptr() as *const u8, 4);
+ #[rustfmt::skip]
+ assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
+ 120, 128, 136, 144, 152, 160, 168, 176));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_i32gather_epi32() {
+ let mut arr = [0i32; 256];
+ for i in 0..256 {
+ arr[i] = i as i32;
+ }
+ let src = _mm512_set1_epi32(2);
+ let mask = 0b10101010_10101010;
+ #[rustfmt::skip]
+ let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
+ 128, 144, 160, 176, 192, 208, 224, 240);
+ // A multiplier of 4 is word-addressing
+ let r = _mm512_mask_i32gather_epi32(src, mask, index, arr.as_ptr() as *const u8, 4);
+ #[rustfmt::skip]
+ assert_eq_m512i(r, _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112,
+ 2, 144, 2, 176, 2, 208, 2, 240));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_i32scatter_ps() {
+ let mut arr = [0f32; 256];
+ #[rustfmt::skip]
+ let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
+ 128, 144, 160, 176, 192, 208, 224, 240);
+ let src = _mm512_setr_ps(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ // A multiplier of 4 is word-addressing
+ _mm512_i32scatter_ps(arr.as_mut_ptr() as *mut u8, index, src, 4);
+ let mut expected = [0f32; 256];
+ for i in 0..16 {
+ expected[i * 16] = (i + 1) as f32;
+ }
+ assert_eq!(&arr[..], &expected[..],);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_i32scatter_ps() {
+ let mut arr = [0f32; 256];
+ let mask = 0b10101010_10101010;
+ #[rustfmt::skip]
+ let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
+ 128, 144, 160, 176, 192, 208, 224, 240);
+ let src = _mm512_setr_ps(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ // A multiplier of 4 is word-addressing
+ _mm512_mask_i32scatter_ps(arr.as_mut_ptr() as *mut u8, mask, index, src, 4);
+ let mut expected = [0f32; 256];
+ for i in 0..8 {
+ expected[i * 32 + 16] = 2. * (i + 1) as f32;
+ }
+ assert_eq!(&arr[..], &expected[..],);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_i32scatter_epi32() {
+ let mut arr = [0i32; 256];
+ #[rustfmt::skip]
+
+ let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
+ 128, 144, 160, 176, 192, 208, 224, 240);
+ let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ // A multiplier of 4 is word-addressing
+ _mm512_i32scatter_epi32(arr.as_mut_ptr() as *mut u8, index, src, 4);
+ let mut expected = [0i32; 256];
+ for i in 0..16 {
+ expected[i * 16] = (i + 1) as i32;
+ }
+ assert_eq!(&arr[..], &expected[..],);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_i32scatter_epi32() {
+ let mut arr = [0i32; 256];
+ let mask = 0b10101010_10101010;
+ #[rustfmt::skip]
+ let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
+ 128, 144, 160, 176, 192, 208, 224, 240);
+ let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ // A multiplier of 4 is word-addressing
+ _mm512_mask_i32scatter_epi32(arr.as_mut_ptr() as *mut u8, mask, index, src, 4);
+ let mut expected = [0i32; 256];
+ for i in 0..8 {
+ expected[i * 32 + 16] = 2 * (i + 1) as i32;
+ }
+ assert_eq!(&arr[..], &expected[..],);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmplt_ps_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
+ 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
+ let b = _mm512_set1_ps(-1.);
+ let m = _mm512_cmplt_ps_mask(a, b);
+ assert_eq!(m, 0b00000101_00000101);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmplt_ps_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
+ 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
+ let b = _mm512_set1_ps(-1.);
+ let mask = 0b01100110_01100110;
+ let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
+ assert_eq!(r, 0b00000100_00000100);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpnlt_ps_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
+ 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
+ let b = _mm512_set1_ps(-1.);
+ assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
+ 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
+ let b = _mm512_set1_ps(-1.);
+ let mask = 0b01111010_01111010;
+ assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpnle_ps_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
+ 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
+ let b = _mm512_set1_ps(-1.);
+ let m = _mm512_cmpnle_ps_mask(b, a);
+ assert_eq!(m, 0b00001101_00001101);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpnle_ps_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
+ 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
+ let b = _mm512_set1_ps(-1.);
+ let mask = 0b01100110_01100110;
+ let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
+ assert_eq!(r, 0b00000100_00000100);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmple_ps_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
+ 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
+ let b = _mm512_set1_ps(-1.);
+ assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmple_ps_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
+ 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
+ let b = _mm512_set1_ps(-1.);
+ let mask = 0b01111010_01111010;
+ assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpeq_ps_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
+ 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
+ #[rustfmt::skip]
+ let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
+ 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
+ let m = _mm512_cmpeq_ps_mask(b, a);
+ assert_eq!(m, 0b11001101_11001101);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpeq_ps_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
+ 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
+ #[rustfmt::skip]
+ let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
+ 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
+ let mask = 0b01111010_01111010;
+ let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
+ assert_eq!(r, 0b01001000_01001000);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpneq_ps_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
+ 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
+ #[rustfmt::skip]
+ let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
+ 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
+ let m = _mm512_cmpneq_ps_mask(b, a);
+ assert_eq!(m, 0b00110010_00110010);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpneq_ps_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
+ 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
+ #[rustfmt::skip]
+ let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
+ 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
+ let mask = 0b01111010_01111010;
+ let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
+ assert_eq!(r, 0b00110010_00110010)
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmp_ps_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
+ 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
+ let b = _mm512_set1_ps(-1.);
+ let m = _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ);
+ assert_eq!(m, 0b00000101_00000101);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmp_ps_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
+ 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
+ let b = _mm512_set1_ps(-1.);
+ let mask = 0b01100110_01100110;
+ let r = _mm512_mask_cmp_ps_mask(mask, a, b, _CMP_LT_OQ);
+ assert_eq!(r, 0b00000100_00000100);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmp_round_ps_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
+ 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
+ let b = _mm512_set1_ps(-1.);
+ let m = _mm512_cmp_round_ps_mask(a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION);
+ assert_eq!(m, 0b00000101_00000101);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmp_round_ps_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
+ 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
+ let b = _mm512_set1_ps(-1.);
+ let mask = 0b01100110_01100110;
+ let r = _mm512_mask_cmp_round_ps_mask(mask, a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION);
+ assert_eq!(r, 0b00000100_00000100);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpord_ps_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
+ f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
+ #[rustfmt::skip]
+ let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
+ f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
+ let m = _mm512_cmpord_ps_mask(a, b);
+ assert_eq!(m, 0b00000101_00000101);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpord_ps_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
+ f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
+ #[rustfmt::skip]
+ let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
+ f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
+ let mask = 0b11000011_11000011;
+ let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
+ assert_eq!(m, 0b00000001_00000001);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpunord_ps_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
+ f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
+ #[rustfmt::skip]
+ let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
+ f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
+ let m = _mm512_cmpunord_ps_mask(a, b);
+
+ assert_eq!(m, 0b11111010_11111010);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpunord_ps_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
+ f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
+ #[rustfmt::skip]
+ let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
+ f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
+ let mask = 0b00001111_00001111;
+ let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
+ assert_eq!(m, 0b000001010_00001010);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cmp_ss_mask() {
+ let a = _mm_setr_ps(2., 1., 1., 1.);
+ let b = _mm_setr_ps(1., 2., 2., 2.);
+ let m = _mm_cmp_ss_mask(a, b, _CMP_GE_OS);
+ assert_eq!(m, 1);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_mask_cmp_ss_mask() {
+ let a = _mm_setr_ps(2., 1., 1., 1.);
+ let b = _mm_setr_ps(1., 2., 2., 2.);
+ let m = _mm_mask_cmp_ss_mask(0b10, a, b, _CMP_GE_OS);
+ assert_eq!(m, 0);
+ let m = _mm_mask_cmp_ss_mask(0b1, a, b, _CMP_GE_OS);
+ assert_eq!(m, 1);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cmp_round_ss_mask() {
+ let a = _mm_setr_ps(2., 1., 1., 1.);
+ let b = _mm_setr_ps(1., 2., 2., 2.);
+ let m = _mm_cmp_round_ss_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
+ assert_eq!(m, 1);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_mask_cmp_round_ss_mask() {
+ let a = _mm_setr_ps(2., 1., 1., 1.);
+ let b = _mm_setr_ps(1., 2., 2., 2.);
+ let m = _mm_mask_cmp_round_ss_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
+ assert_eq!(m, 0);
+ let m = _mm_mask_cmp_round_ss_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
+ assert_eq!(m, 1);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cmp_sd_mask() {
+ let a = _mm_setr_pd(2., 1.);
+ let b = _mm_setr_pd(1., 2.);
+ let m = _mm_cmp_sd_mask(a, b, _CMP_GE_OS);
+ assert_eq!(m, 1);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_mask_cmp_sd_mask() {
+ let a = _mm_setr_pd(2., 1.);
+ let b = _mm_setr_pd(1., 2.);
+ let m = _mm_mask_cmp_sd_mask(0b10, a, b, _CMP_GE_OS);
+ assert_eq!(m, 0);
+ let m = _mm_mask_cmp_sd_mask(0b1, a, b, _CMP_GE_OS);
+ assert_eq!(m, 1);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cmp_round_sd_mask() {
+ let a = _mm_setr_pd(2., 1.);
+ let b = _mm_setr_pd(1., 2.);
+ let m = _mm_cmp_round_sd_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
+ assert_eq!(m, 1);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_mask_cmp_round_sd_mask() {
+ let a = _mm_setr_pd(2., 1.);
+ let b = _mm_setr_pd(1., 2.);
+ let m = _mm_mask_cmp_round_sd_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
+ assert_eq!(m, 0);
+ let m = _mm_mask_cmp_round_sd_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
+ assert_eq!(m, 1);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmplt_epu32_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+ let b = _mm512_set1_epi32(-1);
+ let m = _mm512_cmplt_epu32_mask(a, b);
+ assert_eq!(m, 0b11001111_11001111);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmplt_epu32_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+ let b = _mm512_set1_epi32(-1);
+ let mask = 0b01111010_01111010;
+ let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
+ assert_eq!(r, 0b01001010_01001010);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpgt_epu32_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+ let b = _mm512_set1_epi32(-1);
+ let m = _mm512_cmpgt_epu32_mask(b, a);
+ assert_eq!(m, 0b11001111_11001111);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+ let b = _mm512_set1_epi32(-1);
+ let mask = 0b01111010_01111010;
+ let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
+ assert_eq!(r, 0b01001010_01001010);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmple_epu32_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+ let b = _mm512_set1_epi32(-1);
+ assert_eq!(
+ _mm512_cmple_epu32_mask(a, b),
+ !_mm512_cmpgt_epu32_mask(a, b)
+ )
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmple_epu32_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+ let b = _mm512_set1_epi32(-1);
+ let mask = 0b01111010_01111010;
+ assert_eq!(
+ _mm512_mask_cmple_epu32_mask(mask, a, b),
+ 0b01111010_01111010
+ );
+ }
- #[link_name = "llvm.x86.avx512.scatter.dpd.512"]
- fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
- #[link_name = "llvm.x86.avx512.scatter.dps.512"]
- fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
- #[link_name = "llvm.x86.avx512.scatter.qpd.512"]
- fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
- #[link_name = "llvm.x86.avx512.scatter.qps.512"]
- fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
- #[link_name = "llvm.x86.avx512.scatter.dpq.512"]
- fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
- #[link_name = "llvm.x86.avx512.scatter.dpi.512"]
- fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
- #[link_name = "llvm.x86.avx512.scatter.qpq.512"]
- fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
- #[link_name = "llvm.x86.avx512.scatter.qpi.512"]
- fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpge_epu32_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+ let b = _mm512_set1_epi32(-1);
+ assert_eq!(
+ _mm512_cmpge_epu32_mask(a, b),
+ !_mm512_cmplt_epu32_mask(a, b)
+ )
+ }
- #[link_name = "llvm.x86.avx512.mask.cmp.ss"]
- fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
- #[link_name = "llvm.x86.avx512.mask.cmp.sd"]
- fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
- #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
- fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
- #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
- fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
- #[link_name = "llvm.x86.avx512.mask.ucmp.q.512"]
- fn vpcmpuq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
- #[link_name = "llvm.x86.avx512.mask.cmp.q.512"]
- fn vpcmpq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
- #[link_name = "llvm.x86.avx512.mask.ucmp.d.512"]
- fn vpcmpud(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
- #[link_name = "llvm.x86.avx512.mask.cmp.d.512"]
- fn vpcmpd(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
-}
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpge_epu32_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+ let b = _mm512_set1_epi32(-1);
+ let mask = 0b01111010_01111010;
+ assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
+ }
-#[cfg(test)]
-mod tests {
- use std;
- use stdarch_test::simd_test;
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpeq_epu32_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+ let m = _mm512_cmpeq_epu32_mask(b, a);
+ assert_eq!(m, 0b11001111_11001111);
+ }
- use crate::core_arch::x86::*;
- use crate::hint::black_box;
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+ let mask = 0b01111010_01111010;
+ let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
+ assert_eq!(r, 0b01001010_01001010);
+ }
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_abs_epi32() {
+ unsafe fn test_mm512_cmpneq_epu32_mask() {
#[rustfmt::skip]
- let a = _mm512_setr_epi32(
- 0, 1, -1, i32::MAX,
- i32::MIN, 100, -100, -32,
- 0, 1, -1, i32::MAX,
- i32::MIN, 100, -100, -32,
- );
- let r = _mm512_abs_epi32(a);
- let e = _mm512_setr_epi32(
- 0,
- 1,
- 1,
- i32::MAX,
- i32::MAX.wrapping_add(1),
- 100,
- 100,
- 32,
- 0,
- 1,
- 1,
- i32::MAX,
- i32::MAX.wrapping_add(1),
- 100,
- 100,
- 32,
- );
- assert_eq_m512i(r, e);
+ let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+ let m = _mm512_cmpneq_epu32_mask(b, a);
+ assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_abs_epi32() {
+ unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
#[rustfmt::skip]
- let a = _mm512_setr_epi32(
- 0, 1, -1, i32::MAX,
- i32::MIN, 100, -100, -32,
- 0, 1, -1, i32::MAX,
- i32::MIN, 100, -100, -32,
- );
- let r = _mm512_mask_abs_epi32(a, 0, a);
- assert_eq_m512i(r, a);
- let r = _mm512_mask_abs_epi32(a, 0b11111111, a);
- let e = _mm512_setr_epi32(
- 0,
- 1,
- 1,
- i32::MAX,
- i32::MAX.wrapping_add(1),
- 100,
- 100,
- 32,
- 0,
- 1,
- -1,
- i32::MAX,
- i32::MIN,
- 100,
- -100,
- -32,
- );
- assert_eq_m512i(r, e);
+ let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
+ 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+ let mask = 0b01111010_01111010;
+ let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
+ assert_eq!(r, 0b00110010_00110010);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmp_epu32_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+ let b = _mm512_set1_epi32(-1);
+ let m = _mm512_cmp_epu32_mask(a, b, _MM_CMPINT_LT);
+ assert_eq!(m, 0b11001111_11001111);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmp_epu32_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+ let b = _mm512_set1_epi32(-1);
+ let mask = 0b01111010_01111010;
+ let r = _mm512_mask_cmp_epu32_mask(mask, a, b, _MM_CMPINT_LT);
+ assert_eq!(r, 0b01001010_01001010);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmplt_epi32_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+ let b = _mm512_set1_epi32(-1);
+ let m = _mm512_cmplt_epi32_mask(a, b);
+ assert_eq!(m, 0b00000101_00000101);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmplt_epi32_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+ let b = _mm512_set1_epi32(-1);
+ let mask = 0b01100110_01100110;
+ let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
+ assert_eq!(r, 0b00000100_00000100);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpgt_epi32_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
+ let b = _mm512_set1_epi32(-1);
+ let m = _mm512_cmpgt_epi32_mask(b, a);
+ assert_eq!(m, 0b00000101_00000101);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
+ let b = _mm512_set1_epi32(-1);
+ let mask = 0b01100110_01100110;
+ let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
+ assert_eq!(r, 0b00000100_00000100);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmple_epi32_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+ let b = _mm512_set1_epi32(-1);
+ assert_eq!(
+ _mm512_cmple_epi32_mask(a, b),
+ !_mm512_cmpgt_epi32_mask(a, b)
+ )
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_maskz_abs_epi32() {
+ unsafe fn test_mm512_mask_cmple_epi32_mask() {
#[rustfmt::skip]
- let a = _mm512_setr_epi32(
- 0, 1, -1, i32::MAX,
- i32::MIN, 100, -100, -32,
- 0, 1, -1, i32::MAX,
- i32::MIN, 100, -100, -32,
- );
- let r = _mm512_maskz_abs_epi32(0, a);
- assert_eq_m512i(r, _mm512_setzero_si512());
- let r = _mm512_maskz_abs_epi32(0b11111111, a);
- let e = _mm512_setr_epi32(
- 0,
- 1,
- 1,
- i32::MAX,
- i32::MAX.wrapping_add(1),
- 100,
- 100,
- 32,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- );
- assert_eq_m512i(r, e);
+ let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+ let b = _mm512_set1_epi32(-1);
+ let mask = 0b01111010_01111010;
+ assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_i32gather_ps() {
- let mut arr = [0f32; 256];
- for i in 0..256 {
- arr[i] = i as f32;
- }
- // A multiplier of 4 is word-addressing
+ unsafe fn test_mm512_cmpge_epi32_mask() {
#[rustfmt::skip]
- let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
- 120, 128, 136, 144, 152, 160, 168, 176);
- let r = _mm512_i32gather_ps(index, arr.as_ptr() as *const u8, 4);
+ let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+ let b = _mm512_set1_epi32(-1);
+ assert_eq!(
+ _mm512_cmpge_epi32_mask(a, b),
+ !_mm512_cmplt_epi32_mask(a, b)
+ )
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpge_epi32_mask() {
#[rustfmt::skip]
- assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
- 120., 128., 136., 144., 152., 160., 168., 176.));
+ let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+ let b = _mm512_set1_epi32(-1);
+ let mask = 0b01111010_01111010;
+ assert_eq!(
+ _mm512_mask_cmpge_epi32_mask(mask, a, b),
+ 0b01111010_01111010
+ );
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_i32gather_ps() {
- let mut arr = [0f32; 256];
- for i in 0..256 {
- arr[i] = i as f32;
- }
- let src = _mm512_set1_ps(2.);
- let mask = 0b10101010_10101010;
+ unsafe fn test_mm512_cmpeq_epi32_mask() {
#[rustfmt::skip]
- let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
- 120, 128, 136, 144, 152, 160, 168, 176);
- // A multiplier of 4 is word-addressing
- let r = _mm512_mask_i32gather_ps(src, mask, index, arr.as_ptr() as *const u8, 4);
+ let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
#[rustfmt::skip]
- assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
- 2., 128., 2., 144., 2., 160., 2., 176.));
+ let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+ let m = _mm512_cmpeq_epi32_mask(b, a);
+ assert_eq!(m, 0b11001111_11001111);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_i32gather_epi32() {
- let mut arr = [0i32; 256];
- for i in 0..256 {
- arr[i] = i as i32;
- }
- // A multiplier of 4 is word-addressing
+ unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
#[rustfmt::skip]
- let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
- 120, 128, 136, 144, 152, 160, 168, 176);
- let r = _mm512_i32gather_epi32(index, arr.as_ptr() as *const u8, 4);
+ let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
#[rustfmt::skip]
- assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
- 120, 128, 136, 144, 152, 160, 168, 176));
+ let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+ let mask = 0b01111010_01111010;
+ let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
+ assert_eq!(r, 0b01001010_01001010);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_i32gather_epi32() {
- let mut arr = [0i32; 256];
- for i in 0..256 {
- arr[i] = i as i32;
- }
- let src = _mm512_set1_epi32(2);
- let mask = 0b10101010_10101010;
+ unsafe fn test_mm512_cmpneq_epi32_mask() {
#[rustfmt::skip]
- let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
- 128, 144, 160, 176, 192, 208, 224, 240);
- // A multiplier of 4 is word-addressing
- let r = _mm512_mask_i32gather_epi32(src, mask, index, arr.as_ptr() as *const u8, 4);
+ let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
#[rustfmt::skip]
- assert_eq_m512i(r, _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112,
- 2, 144, 2, 176, 2, 208, 2, 240));
+ let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+ let m = _mm512_cmpneq_epi32_mask(b, a);
+ assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_i32scatter_ps() {
- let mut arr = [0f32; 256];
+ unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
#[rustfmt::skip]
- let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
- 128, 144, 160, 176, 192, 208, 224, 240);
- let src = _mm512_setr_ps(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
- );
- // A multiplier of 4 is word-addressing
- _mm512_i32scatter_ps(arr.as_mut_ptr() as *mut u8, index, src, 4);
- let mut expected = [0f32; 256];
- for i in 0..16 {
- expected[i * 16] = (i + 1) as f32;
- }
- assert_eq!(&arr[..], &expected[..],);
+ let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
+ 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+ let mask = 0b01111010_01111010;
+ let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
+ assert_eq!(r, 0b00110010_00110010)
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_i32scatter_ps() {
- let mut arr = [0f32; 256];
- let mask = 0b10101010_10101010;
+ unsafe fn test_mm512_cmp_epi32_mask() {
#[rustfmt::skip]
- let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
- 128, 144, 160, 176, 192, 208, 224, 240);
- let src = _mm512_setr_ps(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
- );
- // A multiplier of 4 is word-addressing
- _mm512_mask_i32scatter_ps(arr.as_mut_ptr() as *mut u8, mask, index, src, 4);
- let mut expected = [0f32; 256];
- for i in 0..8 {
- expected[i * 32 + 16] = 2. * (i + 1) as f32;
- }
- assert_eq!(&arr[..], &expected[..],);
+ let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
+ let b = _mm512_set1_epi32(-1);
+ let m = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LT);
+ assert_eq!(m, 0b00000101_00000101);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_i32scatter_epi32() {
- let mut arr = [0i32; 256];
+ unsafe fn test_mm512_mask_cmp_epi32_mask() {
#[rustfmt::skip]
+ let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+ 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
+ let b = _mm512_set1_epi32(-1);
+ let mask = 0b01100110_01100110;
+ let r = _mm512_mask_cmp_epi32_mask(mask, a, b, _MM_CMPINT_LT);
+ assert_eq!(r, 0b00000100_00000100);
+ }
- let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
- 128, 144, 160, 176, 192, 208, 224, 240);
- let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- // A multiplier of 4 is word-addressing
- _mm512_i32scatter_epi32(arr.as_mut_ptr() as *mut u8, index, src, 4);
- let mut expected = [0i32; 256];
- for i in 0..16 {
- expected[i * 16] = (i + 1) as i32;
- }
- assert_eq!(&arr[..], &expected[..],);
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_set_epi32() {
+ let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(
+ r,
+ _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
+ )
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_i32scatter_epi32() {
- let mut arr = [0i32; 256];
- let mask = 0b10101010_10101010;
- #[rustfmt::skip]
- let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
- 128, 144, 160, 176, 192, 208, 224, 240);
- let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- // A multiplier of 4 is word-addressing
- _mm512_mask_i32scatter_epi32(arr.as_mut_ptr() as *mut u8, mask, index, src, 4);
- let mut expected = [0i32; 256];
- for i in 0..8 {
- expected[i * 32 + 16] = 2 * (i + 1) as i32;
- }
- assert_eq!(&arr[..], &expected[..],);
+ unsafe fn test_mm512_setr_epi32() {
+ let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(
+ r,
+ _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
+ )
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_set1_epi32() {
+ let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+ assert_eq_m512i(r, _mm512_set1_epi32(2));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_setzero_si512() {
+ assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_set_ps() {
+ let r = _mm512_setr_ps(
+ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
+ );
+ assert_eq_m512(
+ r,
+ _mm512_set_ps(
+ 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
+ ),
+ )
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_setr_ps() {
+ let r = _mm512_set_ps(
+ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
+ );
+ assert_eq_m512(
+ r,
+ _mm512_setr_ps(
+ 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
+ ),
+ )
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmplt_ps_mask() {
+ unsafe fn test_mm512_set1_ps() {
#[rustfmt::skip]
- let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
- 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
- let b = _mm512_set1_ps(-1.);
- let m = _mm512_cmplt_ps_mask(a, b);
- assert_eq!(m, 0b00000101_00000101);
+ let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
+ 2., 2., 2., 2., 2., 2., 2., 2.);
+ assert_eq_m512(expected, _mm512_set1_ps(2.));
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmplt_ps_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
- 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
- let b = _mm512_set1_ps(-1.);
- let mask = 0b01100110_01100110;
- let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
- assert_eq!(r, 0b00000100_00000100);
+ unsafe fn test_mm512_setzero_ps() {
+ assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmpnlt_ps_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
- 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
- let b = _mm512_set1_ps(-1.);
- assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
+ unsafe fn test_mm512_loadu_pd() {
+ let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
+ let p = a.as_ptr();
+ let r = _mm512_loadu_pd(black_box(p));
+ let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
+ assert_eq_m512d(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
- 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
- let b = _mm512_set1_ps(-1.);
- let mask = 0b01111010_01111010;
- assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
+ unsafe fn test_mm512_storeu_pd() {
+ let a = _mm512_set1_pd(9.);
+ let mut r = _mm512_undefined_pd();
+ _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
+ assert_eq_m512d(r, a);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmpnle_ps_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
- 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
- let b = _mm512_set1_ps(-1.);
- let m = _mm512_cmpnle_ps_mask(b, a);
- assert_eq!(m, 0b00001101_00001101);
+ unsafe fn test_mm512_loadu_ps() {
+ let a = &[
+ 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
+ ];
+ let p = a.as_ptr();
+ let r = _mm512_loadu_ps(black_box(p));
+ let e = _mm512_setr_ps(
+ 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
+ );
+ assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmpnle_ps_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
- 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
- let b = _mm512_set1_ps(-1.);
- let mask = 0b01100110_01100110;
- let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
- assert_eq!(r, 0b00000100_00000100);
+ unsafe fn test_mm512_storeu_ps() {
+ let a = _mm512_set1_ps(9.);
+ let mut r = _mm512_undefined_ps();
+ _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
+ assert_eq_m512(r, a);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmple_ps_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
- 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
- let b = _mm512_set1_ps(-1.);
- assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
+ unsafe fn test_mm512_setr_pd() {
+ let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmple_ps_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
- 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
- let b = _mm512_set1_ps(-1.);
- let mask = 0b01111010_01111010;
- assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
+ unsafe fn test_mm512_set_pd() {
+ let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmpeq_ps_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
- 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
- #[rustfmt::skip]
- let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
- 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
- let m = _mm512_cmpeq_ps_mask(b, a);
- assert_eq!(m, 0b11001101_11001101);
+ unsafe fn test_mm512_rol_epi32() {
+ let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ let r = _mm512_rol_epi32(a, 1);
+ let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmpeq_ps_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
- 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
- #[rustfmt::skip]
- let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
- 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
- let mask = 0b01111010_01111010;
- let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
- assert_eq!(r, 0b01001000_01001000);
+ unsafe fn test_mm512_mask_rol_epi32() {
+ let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ let r = _mm512_mask_rol_epi32(a, 0, a, 1);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_rol_epi32(a, 0b11111111_11111111, a, 1);
+ let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmpneq_ps_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
- 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
- #[rustfmt::skip]
- let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
- 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
- let m = _mm512_cmpneq_ps_mask(b, a);
- assert_eq!(m, 0b00110010_00110010);
+ unsafe fn test_mm512_maskz_rol_epi32() {
+ let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
+ let r = _mm512_maskz_rol_epi32(0, a, 1);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_rol_epi32(0b00000000_11111111, a, 1);
+ let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmpneq_ps_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
- 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
- #[rustfmt::skip]
- let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
- 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
- let mask = 0b01111010_01111010;
- let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
- assert_eq!(r, 0b00110010_00110010)
+ unsafe fn test_mm512_ror_epi32() {
+ let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+ let r = _mm512_ror_epi32(a, 1);
+ let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmp_ps_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
- 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
- let b = _mm512_set1_ps(-1.);
- let m = _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ);
- assert_eq!(m, 0b00000101_00000101);
+ unsafe fn test_mm512_mask_ror_epi32() {
+ let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+ let r = _mm512_mask_ror_epi32(a, 0, a, 1);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_ror_epi32(a, 0b11111111_11111111, a, 1);
+ let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmp_ps_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
- 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
- let b = _mm512_set1_ps(-1.);
- let mask = 0b01100110_01100110;
- let r = _mm512_mask_cmp_ps_mask(mask, a, b, _CMP_LT_OQ);
- assert_eq!(r, 0b00000100_00000100);
+ unsafe fn test_mm512_maskz_ror_epi32() {
+ let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
+ let r = _mm512_maskz_ror_epi32(0, a, 1);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_ror_epi32(0b00000000_11111111, a, 1);
+ let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmp_round_ps_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
- 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
- let b = _mm512_set1_ps(-1.);
- let m = _mm512_cmp_round_ps_mask(a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION);
- assert_eq!(m, 0b00000101_00000101);
+ unsafe fn test_mm512_slli_epi32() {
+ let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ let r = _mm512_slli_epi32(a, 1);
+ let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmp_round_ps_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
- 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
- let b = _mm512_set1_ps(-1.);
- let mask = 0b01100110_01100110;
- let r = _mm512_mask_cmp_round_ps_mask(mask, a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION);
- assert_eq!(r, 0b00000100_00000100);
+ unsafe fn test_mm512_mask_slli_epi32() {
+ let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ let r = _mm512_mask_slli_epi32(a, 0, a, 1);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_slli_epi32(a, 0b11111111_11111111, a, 1);
+ let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmpord_ps_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
- f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
- #[rustfmt::skip]
- let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
- f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
- let m = _mm512_cmpord_ps_mask(a, b);
- assert_eq!(m, 0b00000101_00000101);
+ unsafe fn test_mm512_maskz_slli_epi32() {
+ let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
+ let r = _mm512_maskz_slli_epi32(0, a, 1);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_slli_epi32(0b00000000_11111111, a, 1);
+ let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmpord_ps_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
- f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
- #[rustfmt::skip]
- let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
- f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
- let mask = 0b11000011_11000011;
- let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
- assert_eq!(m, 0b00000001_00000001);
+ unsafe fn test_mm512_srli_epi32() {
+ let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+ let r = _mm512_srli_epi32(a, 1);
+ let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmpunord_ps_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
- f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
- #[rustfmt::skip]
- let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
- f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
- let m = _mm512_cmpunord_ps_mask(a, b);
+ unsafe fn test_mm512_mask_srli_epi32() {
+ let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+ let r = _mm512_mask_srli_epi32(a, 0, a, 1);
+ assert_eq_m512i(r, a);
- assert_eq!(m, 0b11111010_11111010);
+ let r = _mm512_mask_srli_epi32(a, 0b11111111_11111111, a, 1);
+ let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmpunord_ps_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
- f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
- #[rustfmt::skip]
- let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
- f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
- let mask = 0b00001111_00001111;
- let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
- assert_eq!(m, 0b000001010_00001010);
+ unsafe fn test_mm512_maskz_srli_epi32() {
+ let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
+ let r = _mm512_maskz_srli_epi32(0, a, 1);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_srli_epi32(0b00000000_11111111, a, 1);
+ let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm_cmp_ss_mask() {
- let a = _mm_setr_ps(2., 1., 1., 1.);
- let b = _mm_setr_ps(1., 2., 2., 2.);
- let m = _mm_cmp_ss_mask(a, b, _CMP_GE_OS);
- assert_eq!(m, 1);
+ unsafe fn test_mm512_rolv_epi32() {
+ let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+ let r = _mm512_rolv_epi32(a, b);
+
+ let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm_mask_cmp_ss_mask() {
- let a = _mm_setr_ps(2., 1., 1., 1.);
- let b = _mm_setr_ps(1., 2., 2., 2.);
- let m = _mm_mask_cmp_ss_mask(0b10, a, b, _CMP_GE_OS);
- assert_eq!(m, 0);
- let m = _mm_mask_cmp_ss_mask(0b1, a, b, _CMP_GE_OS);
- assert_eq!(m, 1);
+ unsafe fn test_mm512_mask_rolv_epi32() {
+ let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+ let r = _mm512_mask_rolv_epi32(a, 0, a, b);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b);
+
+ let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm_cmp_round_ss_mask() {
- let a = _mm_setr_ps(2., 1., 1., 1.);
- let b = _mm_setr_ps(1., 2., 2., 2.);
- let m = _mm_cmp_round_ss_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
- assert_eq!(m, 1);
- }
+ unsafe fn test_mm512_maskz_rolv_epi32() {
+ let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
+ let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+ let r = _mm512_maskz_rolv_epi32(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b);
- #[simd_test(enable = "avx512f")]
- unsafe fn test_mm_mask_cmp_round_ss_mask() {
- let a = _mm_setr_ps(2., 1., 1., 1.);
- let b = _mm_setr_ps(1., 2., 2., 2.);
- let m = _mm_mask_cmp_round_ss_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
- assert_eq!(m, 0);
- let m = _mm_mask_cmp_round_ss_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
- assert_eq!(m, 1);
+ let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm_cmp_sd_mask() {
- let a = _mm_setr_pd(2., 1.);
- let b = _mm_setr_pd(1., 2.);
- let m = _mm_cmp_sd_mask(a, b, _CMP_GE_OS);
- assert_eq!(m, 1);
+ unsafe fn test_mm512_rorv_epi32() {
+ let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+ let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+ let r = _mm512_rorv_epi32(a, b);
+
+ let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm_mask_cmp_sd_mask() {
- let a = _mm_setr_pd(2., 1.);
- let b = _mm_setr_pd(1., 2.);
- let m = _mm_mask_cmp_sd_mask(0b10, a, b, _CMP_GE_OS);
- assert_eq!(m, 0);
- let m = _mm_mask_cmp_sd_mask(0b1, a, b, _CMP_GE_OS);
- assert_eq!(m, 1);
+ unsafe fn test_mm512_mask_rorv_epi32() {
+ let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+ let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+ let r = _mm512_mask_rorv_epi32(a, 0, a, b);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b);
+
+ let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm_cmp_round_sd_mask() {
- let a = _mm_setr_pd(2., 1.);
- let b = _mm_setr_pd(1., 2.);
- let m = _mm_cmp_round_sd_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
- assert_eq!(m, 1);
+ unsafe fn test_mm512_maskz_rorv_epi32() {
+ let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
+ let b = _mm512_set_epi32(2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+ let r = _mm512_maskz_rorv_epi32(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b);
+
+ let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm_mask_cmp_round_sd_mask() {
- let a = _mm_setr_pd(2., 1.);
- let b = _mm_setr_pd(1., 2.);
- let m = _mm_mask_cmp_round_sd_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
- assert_eq!(m, 0);
- let m = _mm_mask_cmp_round_sd_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
- assert_eq!(m, 1);
+ unsafe fn test_mm512_sllv_epi32() {
+ let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+ let r = _mm512_sllv_epi32(a, count);
+
+ let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmplt_epu32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
- let b = _mm512_set1_epi32(-1);
- let m = _mm512_cmplt_epu32_mask(a, b);
- assert_eq!(m, 0b11001111_11001111);
+ unsafe fn test_mm512_mask_sllv_epi32() {
+ let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+ let r = _mm512_mask_sllv_epi32(a, 0, a, count);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count);
+
+ let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmplt_epu32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
- let b = _mm512_set1_epi32(-1);
- let mask = 0b01111010_01111010;
- let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
- assert_eq!(r, 0b01001010_01001010);
+ unsafe fn test_mm512_maskz_sllv_epi32() {
+ let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
+ let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+ let r = _mm512_maskz_sllv_epi32(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count);
+
+ let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmpgt_epu32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
- let b = _mm512_set1_epi32(-1);
- let m = _mm512_cmpgt_epu32_mask(b, a);
- assert_eq!(m, 0b11001111_11001111);
+ unsafe fn test_mm512_srlv_epi32() {
+ let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+ let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+ let r = _mm512_srlv_epi32(a, count);
+
+ let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
- let b = _mm512_set1_epi32(-1);
- let mask = 0b01111010_01111010;
- let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
- assert_eq!(r, 0b01001010_01001010);
+ unsafe fn test_mm512_mask_srlv_epi32() {
+ let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+ let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+ let r = _mm512_mask_srlv_epi32(a, 0, a, count);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count);
+
+ let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmple_epu32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
- let b = _mm512_set1_epi32(-1);
- assert_eq!(
- _mm512_cmple_epu32_mask(a, b),
- !_mm512_cmpgt_epu32_mask(a, b)
- )
+ unsafe fn test_mm512_maskz_srlv_epi32() {
+ let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
+ let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+
+ let r = _mm512_maskz_srlv_epi32(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count);
+
+ let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmple_epu32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
- let b = _mm512_set1_epi32(-1);
- let mask = 0b01111010_01111010;
- assert_eq!(
- _mm512_mask_cmple_epu32_mask(mask, a, b),
- 0b01111010_01111010
+ unsafe fn test_mm512_sll_epi32() {
+ let a = _mm512_set_epi32(
+ 1 << 31,
+ 1 << 0,
+ 1 << 1,
+ 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ );
+ let count = _mm_set_epi32(0, 0, 0, 2);
+ let r = _mm512_sll_epi32(a, count);
+ let e = _mm512_set_epi32(
+ 0,
+ 1 << 2,
+ 1 << 3,
+ 1 << 4,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmpge_epu32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
- let b = _mm512_set1_epi32(-1);
- assert_eq!(
- _mm512_cmpge_epu32_mask(a, b),
- !_mm512_cmplt_epu32_mask(a, b)
- )
+ unsafe fn test_mm512_mask_sll_epi32() {
+ let a = _mm512_set_epi32(
+ 1 << 31,
+ 1 << 0,
+ 1 << 1,
+ 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ );
+ let count = _mm_set_epi32(0, 0, 0, 2);
+ let r = _mm512_mask_sll_epi32(a, 0, a, count);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count);
+ let e = _mm512_set_epi32(
+ 0,
+ 1 << 2,
+ 1 << 3,
+ 1 << 4,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ );
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmpge_epu32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
- let b = _mm512_set1_epi32(-1);
- let mask = 0b01111010_01111010;
- assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
+ unsafe fn test_mm512_maskz_sll_epi32() {
+ let a = _mm512_set_epi32(
+ 1 << 31,
+ 1 << 0,
+ 1 << 1,
+ 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 31,
+ );
+ let count = _mm_set_epi32(2, 0, 0, 2);
+ let r = _mm512_maskz_sll_epi32(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count);
+ let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmpeq_epu32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
- #[rustfmt::skip]
- let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
- 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
- let m = _mm512_cmpeq_epu32_mask(b, a);
- assert_eq!(m, 0b11001111_11001111);
+ unsafe fn test_mm512_srl_epi32() {
+ let a = _mm512_set_epi32(
+ 1 << 31,
+ 1 << 0,
+ 1 << 1,
+ 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ );
+ let count = _mm_set_epi32(0, 0, 0, 2);
+ let r = _mm512_srl_epi32(a, count);
+ let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
- #[rustfmt::skip]
- let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
- 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
- let mask = 0b01111010_01111010;
- let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
- assert_eq!(r, 0b01001010_01001010);
- }
+ unsafe fn test_mm512_mask_srl_epi32() {
+ let a = _mm512_set_epi32(
+ 1 << 31,
+ 1 << 0,
+ 1 << 1,
+ 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ );
+ let count = _mm_set_epi32(0, 0, 0, 2);
+ let r = _mm512_mask_srl_epi32(a, 0, a, count);
+ assert_eq_m512i(r, a);
- #[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmpneq_epu32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
- #[rustfmt::skip]
- let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
- 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
- let m = _mm512_cmpneq_epu32_mask(b, a);
- assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
+ let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count);
+ let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
- 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
- #[rustfmt::skip]
- let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
- 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
- let mask = 0b01111010_01111010;
- let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
- assert_eq!(r, 0b00110010_00110010);
+ unsafe fn test_mm512_maskz_srl_epi32() {
+ let a = _mm512_set_epi32(
+ 1 << 31,
+ 1 << 0,
+ 1 << 1,
+ 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 31,
+ );
+ let count = _mm_set_epi32(2, 0, 0, 2);
+ let r = _mm512_maskz_srl_epi32(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count);
+ let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmp_epu32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
- let b = _mm512_set1_epi32(-1);
- let m = _mm512_cmp_epu32_mask(a, b, _MM_CMPINT_LT);
- assert_eq!(m, 0b11001111_11001111);
+ unsafe fn test_mm512_sra_epi32() {
+ let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
+ let count = _mm_set_epi32(1, 0, 0, 2);
+ let r = _mm512_sra_epi32(a, count);
+ let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmp_epu32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
- let b = _mm512_set1_epi32(-1);
- let mask = 0b01111010_01111010;
- let r = _mm512_mask_cmp_epu32_mask(mask, a, b, _MM_CMPINT_LT);
- assert_eq!(r, 0b01001010_01001010);
+ unsafe fn test_mm512_mask_sra_epi32() {
+ let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
+ let count = _mm_set_epi32(0, 0, 0, 2);
+ let r = _mm512_mask_sra_epi32(a, 0, a, count);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count);
+ let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmplt_epi32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
- let b = _mm512_set1_epi32(-1);
- let m = _mm512_cmplt_epi32_mask(a, b);
- assert_eq!(m, 0b00000101_00000101);
+ unsafe fn test_mm512_maskz_sra_epi32() {
+ let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
+ let count = _mm_set_epi32(2, 0, 0, 2);
+ let r = _mm512_maskz_sra_epi32(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count);
+ let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmplt_epi32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
- let b = _mm512_set1_epi32(-1);
- let mask = 0b01100110_01100110;
- let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
- assert_eq!(r, 0b00000100_00000100);
+ unsafe fn test_mm512_srav_epi32() {
+ let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
+ let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ let r = _mm512_srav_epi32(a, count);
+ let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmpgt_epi32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
- let b = _mm512_set1_epi32(-1);
- let m = _mm512_cmpgt_epi32_mask(b, a);
- assert_eq!(m, 0b00000101_00000101);
+ unsafe fn test_mm512_mask_srav_epi32() {
+ let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
+ let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
+ let r = _mm512_mask_srav_epi32(a, 0, a, count);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count);
+ let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
- let b = _mm512_set1_epi32(-1);
- let mask = 0b01100110_01100110;
- let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
- assert_eq!(r, 0b00000100_00000100);
+ unsafe fn test_mm512_maskz_srav_epi32() {
+ let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
+ let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2);
+ let r = _mm512_maskz_srav_epi32(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count);
+ let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmple_epi32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
- let b = _mm512_set1_epi32(-1);
- assert_eq!(
- _mm512_cmple_epi32_mask(a, b),
- !_mm512_cmpgt_epi32_mask(a, b)
- )
+ unsafe fn test_mm512_srai_epi32() {
+ let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
+ let r = _mm512_srai_epi32(a, 2);
+ let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmple_epi32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
- let b = _mm512_set1_epi32(-1);
- let mask = 0b01111010_01111010;
- assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
+ unsafe fn test_mm512_mask_srai_epi32() {
+ let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
+ let r = _mm512_mask_srai_epi32(a, 0, a, 2);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_srai_epi32(a, 0b11111111_11111111, a, 2);
+ let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmpge_epi32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
- let b = _mm512_set1_epi32(-1);
- assert_eq!(
- _mm512_cmpge_epi32_mask(a, b),
- !_mm512_cmplt_epi32_mask(a, b)
- )
+ unsafe fn test_mm512_maskz_srai_epi32() {
+ let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
+ let r = _mm512_maskz_srai_epi32(0, a, 2);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_srai_epi32(0b00000000_11111111, a, 2);
+ let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmpge_epi32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
- let b = _mm512_set1_epi32(-1);
- let mask = 0b01111010_01111010;
- assert_eq!(
- _mm512_mask_cmpge_epi32_mask(mask, a, b),
- 0b01111010_01111010
+ unsafe fn test_mm512_and_epi32() {
+ let a = _mm512_set_epi32(
+ 1 << 1 | 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 3,
+ );
+ let b = _mm512_set_epi32(
+ 1 << 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 3 | 1 << 4,
);
+ let r = _mm512_and_epi32(a, b);
+ let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmpeq_epi32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
- #[rustfmt::skip]
- let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
- 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
- let m = _mm512_cmpeq_epi32_mask(b, a);
- assert_eq!(m, 0b11001111_11001111);
+ unsafe fn test_mm512_mask_and_epi32() {
+ let a = _mm512_set_epi32(
+ 1 << 1 | 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 3,
+ );
+ let b = _mm512_set_epi32(
+ 1 << 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 3 | 1 << 4,
+ );
+ let r = _mm512_mask_and_epi32(a, 0, a, b);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b);
+ let e = _mm512_set_epi32(
+ 1 << 1 | 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 3,
+ );
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
- #[rustfmt::skip]
- let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
- 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
- let mask = 0b01111010_01111010;
- let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
- assert_eq!(r, 0b01001010_01001010);
- }
+ unsafe fn test_mm512_maskz_and_epi32() {
+ let a = _mm512_set_epi32(
+ 1 << 1 | 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 3,
+ );
+ let b = _mm512_set_epi32(
+ 1 << 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 3 | 1 << 4,
+ );
+ let r = _mm512_maskz_and_epi32(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
- #[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmpneq_epi32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
- #[rustfmt::skip]
- let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
- 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
- let m = _mm512_cmpneq_epi32_mask(b, a);
- assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
+ let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b);
+ let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
- 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
- #[rustfmt::skip]
- let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
- 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
- let mask = 0b01111010_01111010;
- let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
- assert_eq!(r, 0b00110010_00110010)
+ unsafe fn test_mm512_and_si512() {
+ let a = _mm512_set_epi32(
+ 1 << 1 | 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 3,
+ );
+ let b = _mm512_set_epi32(
+ 1 << 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 3 | 1 << 4,
+ );
+ let r = _mm512_and_epi32(a, b);
+ let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cmp_epi32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
- let b = _mm512_set1_epi32(-1);
- let m = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LT);
- assert_eq!(m, 0b00000101_00000101);
+ unsafe fn test_mm512_or_epi32() {
+ let a = _mm512_set_epi32(
+ 1 << 1 | 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 3,
+ );
+ let b = _mm512_set_epi32(
+ 1 << 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 3 | 1 << 4,
+ );
+ let r = _mm512_or_epi32(a, b);
+ let e = _mm512_set_epi32(
+ 1 << 1 | 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 3 | 1 << 4,
+ );
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cmp_epi32_mask() {
- #[rustfmt::skip]
- let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
- 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
- let b = _mm512_set1_epi32(-1);
- let mask = 0b01100110_01100110;
- let r = _mm512_mask_cmp_epi32_mask(mask, a, b, _MM_CMPINT_LT);
- assert_eq!(r, 0b00000100_00000100);
+ unsafe fn test_mm512_mask_or_epi32() {
+ let a = _mm512_set_epi32(
+ 1 << 1 | 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 3,
+ );
+ let b = _mm512_set_epi32(
+ 1 << 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 3 | 1 << 4,
+ );
+ let r = _mm512_mask_or_epi32(a, 0, a, b);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b);
+ let e = _mm512_set_epi32(
+ 1 << 1 | 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 3 | 1 << 4,
+ );
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_set_epi32() {
- let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
- assert_eq_m512i(
- r,
- _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
- )
+ unsafe fn test_mm512_maskz_or_epi32() {
+ let a = _mm512_set_epi32(
+ 1 << 1 | 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 3,
+ );
+ let b = _mm512_set_epi32(
+ 1 << 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 3 | 1 << 4,
+ );
+ let r = _mm512_maskz_or_epi32(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b);
+ let e = _mm512_set_epi32(
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 3 | 1 << 4,
+ );
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_setr_epi32() {
- let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
- assert_eq_m512i(
- r,
- _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
- )
+ unsafe fn test_mm512_or_si512() {
+ let a = _mm512_set_epi32(
+ 1 << 1 | 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 3,
+ );
+ let b = _mm512_set_epi32(
+ 1 << 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 3 | 1 << 4,
+ );
+ let r = _mm512_or_epi32(a, b);
+ let e = _mm512_set_epi32(
+ 1 << 1 | 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 3 | 1 << 4,
+ );
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_set1_epi32() {
- let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
- assert_eq_m512i(r, _mm512_set1_epi32(2));
+ unsafe fn test_mm512_xor_epi32() {
+ let a = _mm512_set_epi32(
+ 1 << 1 | 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 3,
+ );
+ let b = _mm512_set_epi32(
+ 1 << 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 3 | 1 << 4,
+ );
+ let r = _mm512_xor_epi32(a, b);
+ let e = _mm512_set_epi32(
+ 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 4,
+ );
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_setzero_si512() {
- assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
- }
+ unsafe fn test_mm512_mask_xor_epi32() {
+ let a = _mm512_set_epi32(
+ 1 << 1 | 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 3,
+ );
+ let b = _mm512_set_epi32(
+ 1 << 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 3 | 1 << 4,
+ );
+ let r = _mm512_mask_xor_epi32(a, 0, a, b);
+ assert_eq_m512i(r, a);
- #[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_set_ps() {
- let r = _mm512_setr_ps(
- 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
+ let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b);
+ let e = _mm512_set_epi32(
+ 1 << 1 | 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 4,
);
- assert_eq_m512(
- r,
- _mm512_set_ps(
- 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
- ),
- )
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_setr_ps() {
- let r = _mm512_set_ps(
- 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
+ unsafe fn test_mm512_maskz_xor_epi32() {
+ let a = _mm512_set_epi32(
+ 1 << 1 | 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 3,
);
- assert_eq_m512(
- r,
- _mm512_setr_ps(
- 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
- ),
- )
- }
+ let b = _mm512_set_epi32(
+ 1 << 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 3 | 1 << 4,
+ );
+ let r = _mm512_maskz_xor_epi32(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
- #[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_set1_ps() {
- #[rustfmt::skip]
- let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
- 2., 2., 2., 2., 2., 2., 2., 2.);
- assert_eq_m512(expected, _mm512_set1_ps(2.));
+ let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b);
+ let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4);
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_setzero_ps() {
- assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
+ unsafe fn test_mm512_xor_si512() {
+ let a = _mm512_set_epi32(
+ 1 << 1 | 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 3,
+ );
+ let b = _mm512_set_epi32(
+ 1 << 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 3 | 1 << 4,
+ );
+ let r = _mm512_xor_epi32(a, b);
+ let e = _mm512_set_epi32(
+ 1 << 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 4,
+ );
+ assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_loadu_pd() {
- let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
- let p = a.as_ptr();
- let r = _mm512_loadu_pd(black_box(p));
- let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
- assert_eq_m512d(r, e);
+ unsafe fn test_mm512_kand() {
+ let a: u16 = 0b11001100_00110011;
+ let b: u16 = 0b11001100_00110011;
+ let r = _mm512_kand(a, b);
+ let e: u16 = 0b11001100_00110011;
+ assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_storeu_pd() {
- let a = _mm512_set1_pd(9.);
- let mut r = _mm512_undefined_pd();
- _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
- assert_eq_m512d(r, a);
+ unsafe fn test_kand_mask16() {
+ let a: u16 = 0b11001100_00110011;
+ let b: u16 = 0b11001100_00110011;
+ let r = _kand_mask16(a, b);
+ let e: u16 = 0b11001100_00110011;
+ assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_loadu_ps() {
- let a = &[
- 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
- ];
- let p = a.as_ptr();
- let r = _mm512_loadu_ps(black_box(p));
- let e = _mm512_setr_ps(
- 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
- );
- assert_eq_m512(r, e);
+ unsafe fn test_mm512_kor() {
+ let a: u16 = 0b11001100_00110011;
+ let b: u16 = 0b00101110_00001011;
+ let r = _mm512_kor(a, b);
+ let e: u16 = 0b11101110_00111011;
+ assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_storeu_ps() {
- let a = _mm512_set1_ps(9.);
- let mut r = _mm512_undefined_ps();
- _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
- assert_eq_m512(r, a);
+ unsafe fn test_kor_mask16() {
+ let a: u16 = 0b11001100_00110011;
+ let b: u16 = 0b00101110_00001011;
+ let r = _kor_mask16(a, b);
+ let e: u16 = 0b11101110_00111011;
+ assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_setr_pd() {
- let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
- assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
+ unsafe fn test_mm512_kxor() {
+ let a: u16 = 0b11001100_00110011;
+ let b: u16 = 0b00101110_00001011;
+ let r = _mm512_kxor(a, b);
+ let e: u16 = 0b11100010_00111000;
+ assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_set_pd() {
- let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
- assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
+ unsafe fn test_kxor_mask16() {
+ let a: u16 = 0b11001100_00110011;
+ let b: u16 = 0b00101110_00001011;
+ let r = _kxor_mask16(a, b);
+ let e: u16 = 0b11100010_00111000;
+ assert_eq!(r, e);
}
}
diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index ce2970ee51..aa6857ecce 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -49,6 +49,42 @@ mod tests {
use crate::core_arch::x86::*;
use crate::core_arch::x86_64::*;
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_abs_epi64() {
+ let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
+ let r = _mm512_abs_epi64(a);
+ let e = _mm512_setr_epi64(0, 1, 1, i64::MAX, i64::MAX.wrapping_add(1), 100, 100, 32);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_abs_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_setr_epi64(
+ 0, 1, -1, i64::MAX,
+ i64::MIN, 100, -100, -32
+ );
+ let r = _mm512_mask_abs_epi64(a, 0, a);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_abs_epi64(a, 0b11111111, a);
+ let e = _mm512_setr_epi64(0, 1, 1, i64::MAX, i64::MAX.wrapping_add(1), 100, 100, 32);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_abs_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_setr_epi64(
+ 0, 1, -1, i64::MAX,
+ i64::MIN, 100, -100, -32
+ );
+ let r = _mm512_maskz_abs_epi64(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_abs_epi64(0b01111111, a);
+ let e = _mm512_setr_epi64(0, 1, 1, i64::MAX, i64::MAX.wrapping_add(1), 100, 100, 0);
+ assert_eq_m512i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setzero_pd() {
assert_eq_m512d(_mm512_setzero_pd(), _mm512_set1_pd(0.));
@@ -854,4 +890,1031 @@ mod tests {
}
assert_eq!(&arr[..], &expected[..],);
}
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_rol_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 63,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ );
+ let r = _mm512_rol_epi64(a, 1);
+ let e = _mm512_set_epi64(
+ 1 << 0,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_rol_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 63,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ );
+ let r = _mm512_mask_rol_epi64(a, 0, a, 1);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_rol_epi64(a, 0b11111111, a, 1);
+ let e = _mm512_set_epi64(
+ 1 << 0,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_rol_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 63,
+ );
+ let r = _mm512_maskz_rol_epi64(0, a, 1);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_rol_epi64(0b00001111, a, 1);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 1 << 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_ror_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 0,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ );
+ let r = _mm512_ror_epi64(a, 1);
+ let e = _mm512_set_epi64(
+ 1 << 63,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_ror_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 0,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ );
+ let r = _mm512_mask_ror_epi64(a, 0, a, 1);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_ror_epi64(a, 0b11111111, a, 1);
+ let e = _mm512_set_epi64(
+ 1 << 63,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_ror_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 0,
+ );
+ let r = _mm512_maskz_ror_epi64(0, a, 1);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_ror_epi64(0b00001111, a, 1);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 1 << 63);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_slli_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 63,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ );
+ let r = _mm512_slli_epi64(a, 1);
+ let e = _mm512_set_epi64(
+ 0,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_slli_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 63,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ );
+ let r = _mm512_mask_slli_epi64(a, 0, a, 1);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_slli_epi64(a, 0b11111111, a, 1);
+ let e = _mm512_set_epi64(
+ 0,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_slli_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 63,
+ );
+ let r = _mm512_maskz_slli_epi64(0, a, 1);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_slli_epi64(0b00001111, a, 1);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_srli_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 0,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ );
+ let r = _mm512_srli_epi64(a, 1);
+ let e = _mm512_set_epi64(
+ 0,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_srli_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 0,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ );
+ let r = _mm512_mask_srli_epi64(a, 0, a, 1);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_srli_epi64(a, 0b11111111, a, 1);
+ let e = _mm512_set_epi64(
+ 0,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_srli_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 0,
+ );
+ let r = _mm512_maskz_srli_epi64(0, a, 1);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_srli_epi64(0b00001111, a, 1);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_rolv_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 63,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ );
+ let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_rolv_epi64(a, b);
+ let e = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 0,
+ 1 << 34,
+ 1 << 35,
+ 1 << 36,
+ 1 << 37,
+ 1 << 38,
+ 1 << 39,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_rolv_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 63,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ );
+ let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_mask_rolv_epi64(a, 0, a, b);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_rolv_epi64(a, 0b11111111, a, b);
+ let e = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 0,
+ 1 << 34,
+ 1 << 35,
+ 1 << 36,
+ 1 << 37,
+ 1 << 38,
+ 1 << 39,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_rolv_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 62,
+ );
+ let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 2);
+ let r = _mm512_maskz_rolv_epi64(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_rolv_epi64(0b00001111, a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 36, 1 << 37, 1 << 38, 1 << 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_rorv_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 0,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ );
+ let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_rorv_epi64(a, b);
+ let e = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 63,
+ 1 << 30,
+ 1 << 29,
+ 1 << 28,
+ 1 << 27,
+ 1 << 26,
+ 1 << 25,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_rorv_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 0,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ );
+ let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_mask_rorv_epi64(a, 0, a, b);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_rorv_epi64(a, 0b11111111, a, b);
+ let e = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 63,
+ 1 << 30,
+ 1 << 29,
+ 1 << 28,
+ 1 << 27,
+ 1 << 26,
+ 1 << 25,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_rorv_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 0,
+ );
+ let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 2);
+ let r = _mm512_maskz_rorv_epi64(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_rorv_epi64(0b00001111, a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 28, 1 << 27, 1 << 26, 1 << 62);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_sllv_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 63,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ );
+ let count = _mm512_set_epi64(0, 2, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_sllv_epi64(a, count);
+ let e = _mm512_set_epi64(
+ 1 << 32,
+ 0,
+ 1 << 34,
+ 1 << 35,
+ 1 << 36,
+ 1 << 37,
+ 1 << 38,
+ 1 << 39,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_sllv_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 32,
+ 1 << 63,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ );
+ let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_mask_sllv_epi64(a, 0, a, count);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_sllv_epi64(a, 0b11111111, a, count);
+ let e = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 33,
+ 0,
+ 1 << 35,
+ 1 << 36,
+ 1 << 37,
+ 1 << 38,
+ 1 << 39,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_sllv_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 63,
+ );
+ let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 1);
+ let r = _mm512_maskz_sllv_epi64(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_sllv_epi64(0b00001111, a, count);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 36, 1 << 37, 1 << 38, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_srlv_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 0,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ );
+ let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_srlv_epi64(a, count);
+ let e = _mm512_set_epi64(
+ 1 << 32,
+ 0,
+ 1 << 30,
+ 1 << 29,
+ 1 << 28,
+ 1 << 27,
+ 1 << 26,
+ 1 << 25,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_srlv_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 0,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ );
+ let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_mask_srlv_epi64(a, 0, a, count);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_srlv_epi64(a, 0b11111111, a, count);
+ let e = _mm512_set_epi64(
+ 1 << 32,
+ 0,
+ 1 << 30,
+ 1 << 29,
+ 1 << 28,
+ 1 << 27,
+ 1 << 26,
+ 1 << 25,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_srlv_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 0,
+ );
+ let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_maskz_srlv_epi64(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_srlv_epi64(0b00001111, a, count);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 28, 1 << 27, 1 << 26, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_sll_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 63,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ );
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm512_sll_epi64(a, count);
+ let e = _mm512_set_epi64(
+ 0,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ );
+ assert_eq_m512i(r, e);
+
+ let count = _mm_set_epi64x(1, 0);
+ let r = _mm512_sll_epi64(a, count);
+ assert_eq_m512i(r, a);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_sll_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 63,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ );
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm512_mask_sll_epi64(a, 0, a, count);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_sll_epi64(a, 0b11111111, a, count);
+ let e = _mm512_set_epi64(
+ 0,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ 1 << 33,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_sll_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 63,
+ );
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm512_maskz_sll_epi64(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_sll_epi64(0b00001111, a, count);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_srl_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 0,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ );
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm512_srl_epi64(a, count);
+ let e = _mm512_set_epi64(
+ 0,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_srl_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 0,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ );
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm512_mask_srl_epi64(a, 0, a, count);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_srl_epi64(a, 0b11111111, a, count);
+ let e = _mm512_set_epi64(
+ 0,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ 1 << 31,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_srl_epi64() {
+ let a = _mm512_set_epi64(
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 32,
+ 1 << 0,
+ );
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm512_maskz_srl_epi64(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_srl_epi64(0b00001111, a, count);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_sra_epi64() {
+ let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
+ let count = _mm_set_epi64x(0, 2);
+ let r = _mm512_sra_epi64(a, count);
+ let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -4);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_sra_epi64() {
+ let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
+ let count = _mm_set_epi64x(0, 2);
+ let r = _mm512_mask_sra_epi64(a, 0, a, count);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_sra_epi64(a, 0b11111111, a, count);
+ let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -4);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_sra_epi64() {
+ let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
+ let count = _mm_set_epi64x(0, 2);
+ let r = _mm512_maskz_sra_epi64(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_sra_epi64(0b00001111, a, count);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 3, -4);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_srav_epi64() {
+ let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
+ let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1);
+ let r = _mm512_srav_epi64(a, count);
+ let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_srav_epi64() {
+ let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
+ let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1);
+ let r = _mm512_mask_srav_epi64(a, 0, a, count);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_srav_epi64(a, 0b11111111, a, count);
+ let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_srav_epi64() {
+ let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
+ let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1);
+ let r = _mm512_maskz_srav_epi64(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_srav_epi64(0b00001111, a, count);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 3, -8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_srai_epi64() {
+ let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16);
+ let r = _mm512_srai_epi64(a, 2);
+ let e = _mm512_set_epi64(0, -1, 3, 0, 0, 0, 0, -4);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_srai_epi64() {
+ let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16);
+ let r = _mm512_mask_srai_epi64(a, 0, a, 2);
+ assert_eq_m512i(r, a);
+
+ let r = _mm512_mask_srai_epi64(a, 0b11111111, a, 2);
+ let e = _mm512_set_epi64(0, -1, 3, 0, 0, 0, 0, -4);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_srai_epi64() {
+ let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16);
+ let r = _mm512_maskz_srai_epi64(0, a, 2);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+
+ let r = _mm512_maskz_srai_epi64(0b00001111, a, 2);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, -4);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_and_epi64() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_and_epi64(a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_and_epi64() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_mask_and_epi64(a, 0, a, b);
+ let e = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ assert_eq_m512i(r, e);
+
+ let r = _mm512_mask_and_epi64(a, 0b01111111, a, b);
+ let e = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_and_epi64() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_maskz_and_epi64(0, a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+
+ let r = _mm512_maskz_and_epi64(0b00001111, a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_and_si512() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_and_epi64(a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_or_epi64() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_or_epi64(a, b);
+ let e = _mm512_set_epi64(
+ 1 << 0 | 1 << 13 | 1 << 15,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 2 | 1 << 3,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_or_epi64() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_mask_or_epi64(a, 0, a, b);
+ let e = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ assert_eq_m512i(r, e);
+
+ let r = _mm512_mask_or_epi64(a, 0b11111111, a, b);
+ let e = _mm512_set_epi64(
+ 1 << 0 | 1 << 13 | 1 << 15,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 2 | 1 << 3,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_or_epi64() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_maskz_or_epi64(0, a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+
+ let r = _mm512_maskz_or_epi64(0b00001111, a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_or_si512() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_or_epi64(a, b);
+ let e = _mm512_set_epi64(
+ 1 << 0 | 1 << 13 | 1 << 15,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1 << 1 | 1 << 2 | 1 << 3,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_xor_epi64() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_xor_epi64(a, b);
+ let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_xor_epi64() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_mask_xor_epi64(a, 0, a, b);
+ let e = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ assert_eq_m512i(r, e);
+
+ let r = _mm512_mask_xor_epi64(a, 0b11111111, a, b);
+ let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_xor_epi64() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_maskz_xor_epi64(0, a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+
+ let r = _mm512_maskz_xor_epi64(0b00001111, a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_xor_si512() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_xor_epi64(a, b);
+ let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
}