Skip to content

Add 64 bit integer AVX512f comparisons and the intrinsics needed to test them #856

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
May 28, 2020
4 changes: 4 additions & 0 deletions crates/core_arch/src/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,7 @@ simd_ty!(i32x16[i32]:
simd_ty!(i64x8[i64]:
i64, i64, i64, i64, i64, i64, i64, i64
| x0, x1, x2, x3, x4, x5, x6, x7);

simd_ty!(u64x8[u64]:
u64, u64, u64, u64, u64, u64, u64, u64
| x0, x1, x2, x3, x4, x5, x6, x7);
126 changes: 126 additions & 0 deletions crates/core_arch/src/x86/avx512f.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,132 @@ pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
transmute(i64x8::splat(a))
}

/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8()))
}

///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmplt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmplt_epu64_mask(a, b) & m
}

/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8()))
}

///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpgt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpgt_epu64_mask(a, b) & m
}

/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8()))
}

///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpeq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpeq_epu64_mask(a, b) & m
}

/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8()))
}

///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmplt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmplt_epi64_mask(a, b) & m
}

/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8()))
}

///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpgt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpgt_epi64_mask(a, b) & m
}

/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8()))
}

///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpeq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpeq_epi64_mask(a, b) & m
}

#[cfg(test)]
mod tests {
use std;
Expand Down
14 changes: 14 additions & 0 deletions crates/core_arch/src/x86/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,10 @@ types! {
#[allow(non_camel_case_types)]
pub type __mmask16 = i16;

/// The `__mmask8` type used in AVX-512 intrinsics, a 8-bit integer
#[allow(non_camel_case_types)]
pub type __mmask8 = u8;

#[cfg(test)]
mod test;
#[cfg(test)]
Expand Down Expand Up @@ -509,6 +513,16 @@ pub(crate) trait m512iExt: Sized {
fn as_i32x16(self) -> crate::core_arch::simd::i32x16 {
unsafe { transmute(self.as_m512i()) }
}

#[inline]
fn as_u64x8(self) -> crate::core_arch::simd::u64x8 {
unsafe { transmute(self.as_m512i()) }
}

#[inline]
fn as_i64x8(self) -> crate::core_arch::simd::i64x8 {
unsafe { transmute(self.as_m512i()) }
}
}

impl m512iExt for __m512i {
Expand Down
165 changes: 165 additions & 0 deletions crates/core_arch/src/x86_64/avx512f.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
use crate::{
core_arch::{simd::*, x86::*},
mem::transmute,
};

/// Sets packed 64-bit integers in `dst` with the supplied values.
///
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_epi64)
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set_epi64(
e0: i64,
e1: i64,
e2: i64,
e3: i64,
e4: i64,
e5: i64,
e6: i64,
e7: i64,
) -> __m512i {
_mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
}

/// Sets packed 64-bit integers in `dst` with the supplied values in
/// reverse order.
///
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_epi64)
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr_epi64(
e0: i64,
e1: i64,
e2: i64,
e3: i64,
e4: i64,
e5: i64,
e6: i64,
e7: i64,
) -> __m512i {
let r = i64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
transmute(r)
}

#[cfg(test)]
mod tests {
use std;
use stdarch_test::simd_test;

use crate::core_arch::x86::*;
use crate::core_arch::x86_64::*;

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmplt_epu64_mask() {
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set1_epi64(-1);
let m = _mm512_cmplt_epu64_mask(a, b);
assert_eq!(m, 0b11001111);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmplt_epu64_mask() {
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set1_epi64(-1);
let mask = 0b01111010;
let r = _mm512_mask_cmplt_epu64_mask(mask, a, b);
assert_eq!(r, 0b01001010);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpgt_epu64_mask() {
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set1_epi64(-1);
let m = _mm512_cmpgt_epu64_mask(b, a);
assert_eq!(m, 0b11001111);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpgt_epu64_mask() {
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set1_epi64(-1);
let mask = 0b01111010;
let r = _mm512_mask_cmpgt_epu64_mask(mask, b, a);
assert_eq!(r, 0b01001010);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpeq_epu64_mask() {
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
let m = _mm512_cmpeq_epu64_mask(b, a);
assert_eq!(m, 0b11001111);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpeq_epu64_mask() {
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
let mask = 0b01111010;
let r = _mm512_mask_cmpeq_epu64_mask(mask, b, a);
assert_eq!(r, 0b01001010);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmplt_epi64_mask() {
let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set1_epi64(-1);
let m = _mm512_cmplt_epi64_mask(a, b);
assert_eq!(m, 0b00000101);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmplt_epi64_mask() {
let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set1_epi64(-1);
let mask = 0b01100110;
let r = _mm512_mask_cmplt_epi64_mask(mask, a, b);
assert_eq!(r, 0b00000100);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpgt_epi64_mask() {
let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set1_epi64(-1);
let m = _mm512_cmpgt_epi64_mask(b, a);
assert_eq!(m, 0b00000101);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpgt_epi64_mask() {
let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set1_epi64(-1);
let mask = 0b01100110;
let r = _mm512_mask_cmpgt_epi64_mask(mask, b, a);
assert_eq!(r, 0b00000100);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpeq_epi64_mask() {
let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
let m = _mm512_cmpeq_epi64_mask(b, a);
assert_eq!(m, 0b11001111);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpeq_epi64_mask() {
let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
let mask = 0b01111010;
let r = _mm512_mask_cmpeq_epi64_mask(mask, b, a);
assert_eq!(r, 0b01001010);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set_epi64() {
let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
assert_eq_m512i(r, _mm512_set_epi64(7, 6, 5, 4, 3, 2, 1, 0))
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setr_epi64() {
let r = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
assert_eq_m512i(r, _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0))
}
}
3 changes: 3 additions & 0 deletions crates/core_arch/src/x86_64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ pub use self::bmi2::*;
mod avx2;
pub use self::avx2::*;

mod avx512f;
pub use self::avx512f::*;

mod bswap;
pub use self::bswap::*;

Expand Down
1 change: 1 addition & 0 deletions crates/stdarch-verify/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
"__m512" => quote! { &M512 },
"__m512d" => quote! { &M512D },
"__m512i" => quote! { &M512I },
"__mmask8" => quote! { &MMASK8 },
"__mmask16" => quote! { &MMASK16 },
"__m64" => quote! { &M64 },
"bool" => quote! { &BOOL },
Expand Down
3 changes: 3 additions & 0 deletions crates/stdarch-verify/tests/x86-intel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ static M256D: Type = Type::M256D;
static M512: Type = Type::M512;
static M512I: Type = Type::M512I;
static M512D: Type = Type::M512D;
static MMASK8: Type = Type::MMASK8;
static MMASK16: Type = Type::MMASK16;

static TUPLE: Type = Type::Tuple;
Expand All @@ -76,6 +77,7 @@ enum Type {
M512,
M512D,
M512I,
MMASK8,
MMASK16,
Tuple,
CpuidResult,
Expand Down Expand Up @@ -653,6 +655,7 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
(&Type::ConstPtr(&Type::M512I), "__m512i const*") => {}
(&Type::ConstPtr(&Type::M512D), "__m512d const*") => {}

(&Type::MMASK8, "__mmask8") => {}
(&Type::MMASK16, "__mmask16") => {}

// This is a macro (?) in C which seems to mutate its arguments, but
Expand Down