diff --git a/coresimd/ppsv/api/float_math.rs b/coresimd/ppsv/api/float_math.rs new file mode 100644 index 0000000000..32de85eb55 --- /dev/null +++ b/coresimd/ppsv/api/float_math.rs @@ -0,0 +1,183 @@ +//! Float math + +macro_rules! impl_float_math { + ($id:ident) => { + impl $id { + /// Absolute-value + #[inline] + pub fn abs(self) -> Self { + use coresimd::ppsv::codegen::abs::FloatAbs; + FloatAbs::abs(self) + } + + /// Square-root + #[inline] + pub fn sqrt(self) -> Self { + use coresimd::ppsv::codegen::sqrt::FloatSqrt; + FloatSqrt::sqrt(self) + } + + /// Square-root estimate + #[inline] + pub fn sqrte(self) -> Self { + use coresimd::simd_llvm::simd_fsqrt; + unsafe { simd_fsqrt(self) } + } + + /// Reciprocal square-root estimate + #[inline] + pub fn rsqrte(self) -> Self { + unsafe { + use coresimd::simd_llvm::simd_fsqrt; + $id::splat(1.) / simd_fsqrt(self) + } + } + + /// Fused multiply add: `self * y + z` + #[inline] + pub fn fma(self, y: Self, z: Self) -> Self { + use coresimd::ppsv::codegen::fma::FloatFma; + FloatFma::fma(self, y, z) + } + + /// Sin + #[inline(always)] + pub fn sin(self) -> Self { + use coresimd::ppsv::codegen::sin::FloatSin; + FloatSin::sin(self) + } + + /// Cos + #[inline] + pub fn cos(self) -> Self { + use coresimd::ppsv::codegen::cos::FloatCos; + FloatCos::cos(self) + } + } + }; +} + +macro_rules! test_float_math { + ($id:ident, $elem_ty:ident) => { + + fn sqrt2() -> $elem_ty { + match ::mem::size_of::<$elem_ty>() { + 4 => 1.4142135 as $elem_ty, + 8 => 1.4142135623730951 as $elem_ty, + _ => unreachable!(), + } + } + + fn pi() -> $elem_ty { + match ::mem::size_of::<$elem_ty>() { + 4 => ::std::f32::consts::PI as $elem_ty, + 8 => ::std::f64::consts::PI as $elem_ty, + _ => unreachable!(), + } + } + + #[test] + fn abs() { + use coresimd::simd::*; + let o = $id::splat(1 as $elem_ty); + assert_eq!(o, o.abs()); + + let mo = $id::splat(-1 as $elem_ty); + assert_eq!(o, mo.abs()); + } + + #[test] + fn sqrt() { + use coresimd::simd::*; + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + assert_eq!(z, z.sqrt()); + assert_eq!(o, o.sqrt()); + + let t = $id::splat(2 as $elem_ty); + let e = $id::splat(sqrt2() as $elem_ty); + assert_eq!(e, t.sqrt()); + } + + #[test] + fn sqrte() { + use coresimd::simd::*; + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + assert_eq!(z, z.sqrte()); + assert_eq!(o, o.sqrte()); + + let t = $id::splat(2 as $elem_ty); + let e = $id::splat(sqrt2() as $elem_ty); + let error = (e - t.sqrte()).abs(); + let tol = $id::splat(2.4e-4 as $elem_ty); + + assert!(error.le(tol).all()); + } + + #[test] + fn rsqrte() { + use coresimd::simd::*; + let o = $id::splat(1 as $elem_ty); + assert_eq!(o, o.rsqrte()); + + let t = $id::splat(2 as $elem_ty); + let e = 1. / sqrt2(); + let error = (e - t.rsqrte()).abs(); + let tol = $id::splat(2.4e-4 as $elem_ty); + assert!(error.le(tol).all()); + } + + #[test] + fn fma() { + use coresimd::simd::*; + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + let t3 = $id::splat(3 as $elem_ty); + let f = $id::splat(4 as $elem_ty); + + assert_eq!(z, z.fma(z, z)); + assert_eq!(o, o.fma(o, z)); + assert_eq!(o, o.fma(z, o)); + assert_eq!(o, z.fma(o, o)); + + assert_eq!(t, o.fma(o, o)); + assert_eq!(t, o.fma(t, z)); + assert_eq!(t, t.fma(o, z)); + + assert_eq!(f, t.fma(t, z)); + assert_eq!(f, t.fma(o, t)); + assert_eq!(t3, t.fma(o, o)); + } + + #[test] + fn sin() { + use coresimd::simd::*; + let z = $id::splat(0 as $elem_ty); + let p = $id::splat(pi() as $elem_ty); + let ph = $id::splat(pi() as $elem_ty / 2.); + let o_r = $id::splat((pi() as $elem_ty / 2.).sin()); + let z_r = $id::splat((pi() as $elem_ty).sin()); + + assert_eq!(z, z.sin()); + assert_eq!(o_r, ph.sin()); + assert_eq!(z_r, p.sin()); + } + + #[test] + fn cos() { + use coresimd::simd::*; + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let p = $id::splat(pi() as $elem_ty); + let ph = $id::splat(pi() as $elem_ty / 2.); + let z_r = $id::splat((pi() as $elem_ty / 2.).cos()); + let o_r = $id::splat((pi() as $elem_ty).cos()); + + assert_eq!(o, z.cos()); + assert_eq!(z_r, ph.cos()); + assert_eq!(o_r, p.cos()); + } + }; +} diff --git a/coresimd/ppsv/api/mod.rs b/coresimd/ppsv/api/mod.rs index 1915f6aeb0..1ce658e2d9 100644 --- a/coresimd/ppsv/api/mod.rs +++ b/coresimd/ppsv/api/mod.rs @@ -84,6 +84,8 @@ mod default; #[macro_use] mod eq; #[macro_use] +mod float_math; +#[macro_use] mod fmt; #[macro_use] mod from; @@ -128,7 +130,8 @@ pub trait Lanes {} /// Defines a portable packed SIMD floating-point vector type. macro_rules! simd_f_ty { - ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident | + ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, + $test_macro:ident | $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { vector_impl!( [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], @@ -142,7 +145,8 @@ macro_rules! simd_f_ty { [impl_neg_op, $id, $elem_ty], [impl_partial_eq, $id], [impl_default, $id, $elem_ty], - [impl_float_minmax_ops, $id] + [impl_float_minmax_ops, $id], + [impl_float_math, $id] ); $test_macro!( @@ -160,6 +164,7 @@ macro_rules! simd_f_ty { test_default!($id, $elem_ty); test_mask_select!($mask_ty, $id, $elem_ty); test_float_minmax_ops!($id, $elem_ty); + test_float_math!($id, $elem_ty); } ); } @@ -167,7 +172,8 @@ macro_rules! simd_f_ty { /// Defines a portable packed SIMD signed-integer vector type. macro_rules! simd_i_ty { - ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident | + ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, + $test_macro:ident | $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { vector_impl!( [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], @@ -221,7 +227,8 @@ macro_rules! simd_i_ty { /// Defines a portable packed SIMD unsigned-integer vector type. macro_rules! simd_u_ty { - ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident | + ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, + $test_macro:ident | $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { vector_impl!( [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], diff --git a/coresimd/ppsv/codegen/abs.rs b/coresimd/ppsv/codegen/abs.rs new file mode 100644 index 0000000000..edca549c24 --- /dev/null +++ b/coresimd/ppsv/codegen/abs.rs @@ -0,0 +1,43 @@ +//! Vector absolute value + +use coresimd::simd::*; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.fabs.v2f32"] + fn abs_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.fabs.v4f32"] + fn abs_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.fabs.v8f32"] + fn abs_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.fabs.v16f32"] + fn abs_v16f32(x: f32x16) -> f32x16; + #[link_name = "llvm.fabs.v2f64"] + fn abs_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.fabs.v4f64"] + fn abs_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.fabs.v8f64"] + fn abs_v8f64(x: f64x8) -> f64x8; +} + +pub(crate) trait FloatAbs { + fn abs(self) -> Self; +} + +macro_rules! impl_fabs { + ($id:ident: $fn:ident) => { + impl FloatAbs for $id { + fn abs(self) -> Self { + unsafe { $fn(self) } + } + } + } +} + +impl_fabs!(f32x2: abs_v2f32); +impl_fabs!(f32x4: abs_v4f32); +impl_fabs!(f32x8: abs_v8f32); +impl_fabs!(f32x16: abs_v16f32); +impl_fabs!(f64x2: abs_v2f64); +impl_fabs!(f64x4: abs_v4f64); +impl_fabs!(f64x8: abs_v8f64); diff --git a/coresimd/ppsv/codegen/cos.rs b/coresimd/ppsv/codegen/cos.rs new file mode 100644 index 0000000000..fdc61ea464 --- /dev/null +++ b/coresimd/ppsv/codegen/cos.rs @@ -0,0 +1,43 @@ +//! Exact vector cos + +use coresimd::simd::*; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.cos.v2f32"] + fn cos_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.cos.v4f32"] + fn cos_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.cos.v8f32"] + fn cos_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.cos.v16f32"] + fn cos_v16f32(x: f32x16) -> f32x16; + #[link_name = "llvm.cos.v2f64"] + fn cos_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.cos.v4f64"] + fn cos_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.cos.v8f64"] + fn cos_v8f64(x: f64x8) -> f64x8; +} + +pub(crate) trait FloatCos { + fn cos(self) -> Self; +} + +macro_rules! impl_fcos { + ($id:ident: $fn:ident) => { + impl FloatCos for $id { + fn cos(self) -> Self { + unsafe { $fn(self) } + } + } + } +} + +impl_fcos!(f32x2: cos_v2f32); +impl_fcos!(f32x4: cos_v4f32); +impl_fcos!(f32x8: cos_v8f32); +impl_fcos!(f32x16: cos_v16f32); +impl_fcos!(f64x2: cos_v2f64); +impl_fcos!(f64x4: cos_v4f64); +impl_fcos!(f64x8: cos_v8f64); diff --git a/coresimd/ppsv/codegen/fma.rs b/coresimd/ppsv/codegen/fma.rs new file mode 100644 index 0000000000..9d63ac6bee --- /dev/null +++ b/coresimd/ppsv/codegen/fma.rs @@ -0,0 +1,43 @@ +//! Vector fused multiply add + +use coresimd::simd::*; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.fma.v2f32"] + fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2; + #[link_name = "llvm.fma.v4f32"] + fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4; + #[link_name = "llvm.fma.v8f32"] + fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8; + #[link_name = "llvm.fma.v16f32"] + fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16; + #[link_name = "llvm.fma.v2f64"] + fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2; + #[link_name = "llvm.fma.v4f64"] + fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4; + #[link_name = "llvm.fma.v8f64"] + fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8; +} + +pub(crate) trait FloatFma { + fn fma(self, y: Self, z: Self) -> Self; +} + +macro_rules! impl_fma { + ($id:ident: $fn:ident) => { + impl FloatFma for $id { + fn fma(self, y: Self, z: Self) -> Self { + unsafe { $fn(self, y, z) } + } + } + } +} + +impl_fma!(f32x2: fma_v2f32); +impl_fma!(f32x4: fma_v4f32); +impl_fma!(f32x8: fma_v8f32); +impl_fma!(f32x16: fma_v16f32); +impl_fma!(f64x2: fma_v2f64); +impl_fma!(f64x4: fma_v4f64); +impl_fma!(f64x8: fma_v8f64); diff --git a/coresimd/ppsv/codegen/mod.rs b/coresimd/ppsv/codegen/mod.rs index 448587b795..a1e8c24f6b 100644 --- a/coresimd/ppsv/codegen/mod.rs +++ b/coresimd/ppsv/codegen/mod.rs @@ -4,3 +4,9 @@ pub mod wrapping; pub mod masks_reductions; + +pub mod sqrt; +pub mod abs; +pub mod fma; +pub mod sin; +pub mod cos; diff --git a/coresimd/ppsv/codegen/sin.rs b/coresimd/ppsv/codegen/sin.rs new file mode 100644 index 0000000000..cf7f3dea20 --- /dev/null +++ b/coresimd/ppsv/codegen/sin.rs @@ -0,0 +1,43 @@ +//! Exact vector sin + +use coresimd::simd::*; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.sin.v2f32"] + fn sin_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.sin.v4f32"] + fn sin_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.sin.v8f32"] + fn sin_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.sin.v16f32"] + fn sin_v16f32(x: f32x16) -> f32x16; + #[link_name = "llvm.sin.v2f64"] + fn sin_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.sin.v4f64"] + fn sin_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.sin.v8f64"] + fn sin_v8f64(x: f64x8) -> f64x8; +} + +pub(crate) trait FloatSin { + fn sin(self) -> Self; +} + +macro_rules! impl_fsin { + ($id:ident: $fn:ident) => { + impl FloatSin for $id { + fn sin(self) -> Self { + unsafe { $fn(self) } + } + } + } +} + +impl_fsin!(f32x2: sin_v2f32); +impl_fsin!(f32x4: sin_v4f32); +impl_fsin!(f32x8: sin_v8f32); +impl_fsin!(f32x16: sin_v16f32); +impl_fsin!(f64x2: sin_v2f64); +impl_fsin!(f64x4: sin_v4f64); +impl_fsin!(f64x8: sin_v8f64); diff --git a/coresimd/ppsv/codegen/sqrt.rs b/coresimd/ppsv/codegen/sqrt.rs new file mode 100644 index 0000000000..8e86650555 --- /dev/null +++ b/coresimd/ppsv/codegen/sqrt.rs @@ -0,0 +1,43 @@ +//! Exact vector square-root + +use coresimd::simd::*; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.sqrt.v2f32"] + fn sqrt_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.sqrt.v4f32"] + fn sqrt_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.sqrt.v8f32"] + fn sqrt_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.sqrt.v16f32"] + fn sqrt_v16f32(x: f32x16) -> f32x16; + #[link_name = "llvm.sqrt.v2f64"] + fn sqrt_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.sqrt.v4f64"] + fn sqrt_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.sqrt.v8f64"] + fn sqrt_v8f64(x: f64x8) -> f64x8; +} + +pub(crate) trait FloatSqrt { + fn sqrt(self) -> Self; +} + +macro_rules! impl_fsqrt { + ($id:ident: $fn:ident) => { + impl FloatSqrt for $id { + fn sqrt(self) -> Self { + unsafe { $fn(self) } + } + } + } +} + +impl_fsqrt!(f32x2: sqrt_v2f32); +impl_fsqrt!(f32x4: sqrt_v4f32); +impl_fsqrt!(f32x8: sqrt_v8f32); +impl_fsqrt!(f32x16: sqrt_v16f32); +impl_fsqrt!(f64x2: sqrt_v2f64); +impl_fsqrt!(f64x4: sqrt_v4f64); +impl_fsqrt!(f64x8: sqrt_v8f64); diff --git a/coresimd/simd_llvm.rs b/coresimd/simd_llvm.rs index fdcf4f9cb0..c83c2d4b35 100644 --- a/coresimd/simd_llvm.rs +++ b/coresimd/simd_llvm.rs @@ -49,6 +49,9 @@ extern "platform-intrinsic" { pub fn simd_select(m: M, a: T, b: T) -> T; pub fn simd_fmin(a: T, b: T) -> T; -// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/416 -// pub fn simd_fmax(a: T, b: T) -> T; + // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/416 + // pub fn simd_fmax(a: T, b: T) -> T; + + pub fn simd_fsqrt(a: T) -> T; + pub fn simd_fma(a: T, b: T, c: T) -> T; }