From 909e2782f4ed78de2efbf19c3d0f1aaa513a3b68 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Mon, 4 Jun 2018 14:17:24 +0200 Subject: [PATCH 1/2] add vertical float math: abs, sqrt, sqrte, rsqrte, fma --- coresimd/ppsv/api/float_math.rs | 132 ++++++++++++++++++++++++++++++++ coresimd/ppsv/api/mod.rs | 15 +++- coresimd/ppsv/codegen/abs.rs | 43 +++++++++++ coresimd/ppsv/codegen/fma.rs | 43 +++++++++++ coresimd/ppsv/codegen/mod.rs | 4 + coresimd/ppsv/codegen/sqrt.rs | 43 +++++++++++ coresimd/simd_llvm.rs | 7 +- 7 files changed, 281 insertions(+), 6 deletions(-) create mode 100644 coresimd/ppsv/api/float_math.rs create mode 100644 coresimd/ppsv/codegen/abs.rs create mode 100644 coresimd/ppsv/codegen/fma.rs create mode 100644 coresimd/ppsv/codegen/sqrt.rs diff --git a/coresimd/ppsv/api/float_math.rs b/coresimd/ppsv/api/float_math.rs new file mode 100644 index 0000000000..e0178b703c --- /dev/null +++ b/coresimd/ppsv/api/float_math.rs @@ -0,0 +1,132 @@ +//! Float math + +macro_rules! impl_float_math { + ($id:ident) => { + impl $id { + /// Absolute-value + #[inline] + pub fn abs(self) -> Self { + use coresimd::ppsv::codegen::abs::FloatAbs; + FloatAbs::abs(self) + } + + /// Square-root + #[inline] + pub fn sqrt(self) -> Self { + use coresimd::ppsv::codegen::sqrt::FloatSqrt; + FloatSqrt::sqrt(self) + } + + /// Square-root estimate + #[inline] + pub fn sqrte(self) -> Self { + use coresimd::simd_llvm::simd_fsqrt; + unsafe { simd_fsqrt(self) } + } + + /// Reciprocal square-root estimate + #[inline] + pub fn rsqrte(self) -> Self { + unsafe { + use coresimd::simd_llvm::simd_fsqrt; + $id::splat(1.) / simd_fsqrt(self) + } + } + + /// Fused multiply add: `self * y + z` + #[inline] + pub fn fma(self, y: Self, z: Self) -> Self { + use coresimd::ppsv::codegen::fma::FloatFma; + FloatFma::fma(self, y, z) + } + } + }; +} + +macro_rules! test_float_math { + ($id:ident, $elem_ty:ident) => { + + fn sqrt2() -> $elem_ty { + match ::mem::size_of::<$elem_ty>() { + 4 => 1.4142135 as $elem_ty, + 8 => 1.4142135623730951 as $elem_ty, + _ => unreachable!(), + } + } + + #[test] + fn abs() { + use coresimd::simd::*; + let o = $id::splat(1 as $elem_ty); + assert_eq!(o, o.abs()); + + let mo = $id::splat(-1 as $elem_ty); + assert_eq!(o, mo.abs()); + } + + #[test] + fn sqrt() { + use coresimd::simd::*; + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + assert_eq!(z, z.sqrt()); + assert_eq!(o, o.sqrt()); + + let t = $id::splat(2 as $elem_ty); + let e = $id::splat(sqrt2() as $elem_ty); + assert_eq!(e, t.sqrt()); + } + + #[test] + fn sqrte() { + use coresimd::simd::*; + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + assert_eq!(z, z.sqrte()); + assert_eq!(o, o.sqrte()); + + let t = $id::splat(2 as $elem_ty); + let e = $id::splat(sqrt2() as $elem_ty); + let error = (e - t.sqrte()).abs(); + let tol = $id::splat(2.4e-4 as $elem_ty); + + assert!(error.le(tol).all()); + } + + #[test] + fn rsqrte() { + use coresimd::simd::*; + let o = $id::splat(1 as $elem_ty); + assert_eq!(o, o.rsqrte()); + + let t = $id::splat(2 as $elem_ty); + let e = 1. / sqrt2(); + let error = (e - t.rsqrte()).abs(); + let tol = $id::splat(2.4e-4 as $elem_ty); + assert!(error.le(tol).all()); + } + + #[test] + fn fma() { + use coresimd::simd::*; + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + let t3 = $id::splat(3 as $elem_ty); + let f = $id::splat(4 as $elem_ty); + + assert_eq!(z, z.fma(z, z)); + assert_eq!(o, o.fma(o, z)); + assert_eq!(o, o.fma(z, o)); + assert_eq!(o, z.fma(o, o)); + + assert_eq!(t, o.fma(o, o)); + assert_eq!(t, o.fma(t, z)); + assert_eq!(t, t.fma(o, z)); + + assert_eq!(f, t.fma(t, z)); + assert_eq!(f, t.fma(o, t)); + assert_eq!(t3, t.fma(t, o)); + } + }; +} diff --git a/coresimd/ppsv/api/mod.rs b/coresimd/ppsv/api/mod.rs index 1915f6aeb0..1ce658e2d9 100644 --- a/coresimd/ppsv/api/mod.rs +++ b/coresimd/ppsv/api/mod.rs @@ -84,6 +84,8 @@ mod default; #[macro_use] mod eq; #[macro_use] +mod float_math; +#[macro_use] mod fmt; #[macro_use] mod from; @@ -128,7 +130,8 @@ pub trait Lanes {} /// Defines a portable packed SIMD floating-point vector type. macro_rules! simd_f_ty { - ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident | + ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, + $test_macro:ident | $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { vector_impl!( [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], @@ -142,7 +145,8 @@ macro_rules! simd_f_ty { [impl_neg_op, $id, $elem_ty], [impl_partial_eq, $id], [impl_default, $id, $elem_ty], - [impl_float_minmax_ops, $id] + [impl_float_minmax_ops, $id], + [impl_float_math, $id] ); $test_macro!( @@ -160,6 +164,7 @@ macro_rules! simd_f_ty { test_default!($id, $elem_ty); test_mask_select!($mask_ty, $id, $elem_ty); test_float_minmax_ops!($id, $elem_ty); + test_float_math!($id, $elem_ty); } ); } @@ -167,7 +172,8 @@ macro_rules! simd_f_ty { /// Defines a portable packed SIMD signed-integer vector type. macro_rules! simd_i_ty { - ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident | + ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, + $test_macro:ident | $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { vector_impl!( [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], @@ -221,7 +227,8 @@ macro_rules! simd_i_ty { /// Defines a portable packed SIMD unsigned-integer vector type. macro_rules! simd_u_ty { - ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident | + ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, + $test_macro:ident | $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { vector_impl!( [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], diff --git a/coresimd/ppsv/codegen/abs.rs b/coresimd/ppsv/codegen/abs.rs new file mode 100644 index 0000000000..edca549c24 --- /dev/null +++ b/coresimd/ppsv/codegen/abs.rs @@ -0,0 +1,43 @@ +//! Vector absolute value + +use coresimd::simd::*; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.fabs.v2f32"] + fn abs_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.fabs.v4f32"] + fn abs_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.fabs.v8f32"] + fn abs_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.fabs.v16f32"] + fn abs_v16f32(x: f32x16) -> f32x16; + #[link_name = "llvm.fabs.v2f64"] + fn abs_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.fabs.v4f64"] + fn abs_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.fabs.v8f64"] + fn abs_v8f64(x: f64x8) -> f64x8; +} + +pub(crate) trait FloatAbs { + fn abs(self) -> Self; +} + +macro_rules! impl_fabs { + ($id:ident: $fn:ident) => { + impl FloatAbs for $id { + fn abs(self) -> Self { + unsafe { $fn(self) } + } + } + } +} + +impl_fabs!(f32x2: abs_v2f32); +impl_fabs!(f32x4: abs_v4f32); +impl_fabs!(f32x8: abs_v8f32); +impl_fabs!(f32x16: abs_v16f32); +impl_fabs!(f64x2: abs_v2f64); +impl_fabs!(f64x4: abs_v4f64); +impl_fabs!(f64x8: abs_v8f64); diff --git a/coresimd/ppsv/codegen/fma.rs b/coresimd/ppsv/codegen/fma.rs new file mode 100644 index 0000000000..9d63ac6bee --- /dev/null +++ b/coresimd/ppsv/codegen/fma.rs @@ -0,0 +1,43 @@ +//! Vector fused multiply add + +use coresimd::simd::*; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.fma.v2f32"] + fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2; + #[link_name = "llvm.fma.v4f32"] + fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4; + #[link_name = "llvm.fma.v8f32"] + fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8; + #[link_name = "llvm.fma.v16f32"] + fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16; + #[link_name = "llvm.fma.v2f64"] + fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2; + #[link_name = "llvm.fma.v4f64"] + fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4; + #[link_name = "llvm.fma.v8f64"] + fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8; +} + +pub(crate) trait FloatFma { + fn fma(self, y: Self, z: Self) -> Self; +} + +macro_rules! impl_fma { + ($id:ident: $fn:ident) => { + impl FloatFma for $id { + fn fma(self, y: Self, z: Self) -> Self { + unsafe { $fn(self, y, z) } + } + } + } +} + +impl_fma!(f32x2: fma_v2f32); +impl_fma!(f32x4: fma_v4f32); +impl_fma!(f32x8: fma_v8f32); +impl_fma!(f32x16: fma_v16f32); +impl_fma!(f64x2: fma_v2f64); +impl_fma!(f64x4: fma_v4f64); +impl_fma!(f64x8: fma_v8f64); diff --git a/coresimd/ppsv/codegen/mod.rs b/coresimd/ppsv/codegen/mod.rs index 448587b795..2791e0670e 100644 --- a/coresimd/ppsv/codegen/mod.rs +++ b/coresimd/ppsv/codegen/mod.rs @@ -4,3 +4,7 @@ pub mod wrapping; pub mod masks_reductions; + +pub mod sqrt; +pub mod abs; +pub mod fma; diff --git a/coresimd/ppsv/codegen/sqrt.rs b/coresimd/ppsv/codegen/sqrt.rs new file mode 100644 index 0000000000..8e86650555 --- /dev/null +++ b/coresimd/ppsv/codegen/sqrt.rs @@ -0,0 +1,43 @@ +//! Exact vector square-root + +use coresimd::simd::*; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.sqrt.v2f32"] + fn sqrt_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.sqrt.v4f32"] + fn sqrt_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.sqrt.v8f32"] + fn sqrt_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.sqrt.v16f32"] + fn sqrt_v16f32(x: f32x16) -> f32x16; + #[link_name = "llvm.sqrt.v2f64"] + fn sqrt_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.sqrt.v4f64"] + fn sqrt_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.sqrt.v8f64"] + fn sqrt_v8f64(x: f64x8) -> f64x8; +} + +pub(crate) trait FloatSqrt { + fn sqrt(self) -> Self; +} + +macro_rules! impl_fsqrt { + ($id:ident: $fn:ident) => { + impl FloatSqrt for $id { + fn sqrt(self) -> Self { + unsafe { $fn(self) } + } + } + } +} + +impl_fsqrt!(f32x2: sqrt_v2f32); +impl_fsqrt!(f32x4: sqrt_v4f32); +impl_fsqrt!(f32x8: sqrt_v8f32); +impl_fsqrt!(f32x16: sqrt_v16f32); +impl_fsqrt!(f64x2: sqrt_v2f64); +impl_fsqrt!(f64x4: sqrt_v4f64); +impl_fsqrt!(f64x8: sqrt_v8f64); diff --git a/coresimd/simd_llvm.rs b/coresimd/simd_llvm.rs index fdcf4f9cb0..c83c2d4b35 100644 --- a/coresimd/simd_llvm.rs +++ b/coresimd/simd_llvm.rs @@ -49,6 +49,9 @@ extern "platform-intrinsic" { pub fn simd_select(m: M, a: T, b: T) -> T; pub fn simd_fmin(a: T, b: T) -> T; -// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/416 -// pub fn simd_fmax(a: T, b: T) -> T; + // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/416 + // pub fn simd_fmax(a: T, b: T) -> T; + + pub fn simd_fsqrt(a: T) -> T; + pub fn simd_fma(a: T, b: T, c: T) -> T; } From 75009a8f2465c97dc6b2958f94872fedd2ac385c Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Mon, 4 Jun 2018 17:07:09 +0200 Subject: [PATCH 2/2] add float math: sin, cos --- coresimd/ppsv/api/float_math.rs | 53 ++++++++++++++++++++++++++++++++- coresimd/ppsv/codegen/cos.rs | 43 ++++++++++++++++++++++++++ coresimd/ppsv/codegen/mod.rs | 2 ++ coresimd/ppsv/codegen/sin.rs | 43 ++++++++++++++++++++++++++ 4 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 coresimd/ppsv/codegen/cos.rs create mode 100644 coresimd/ppsv/codegen/sin.rs diff --git a/coresimd/ppsv/api/float_math.rs b/coresimd/ppsv/api/float_math.rs index e0178b703c..32de85eb55 100644 --- a/coresimd/ppsv/api/float_math.rs +++ b/coresimd/ppsv/api/float_math.rs @@ -39,6 +39,20 @@ macro_rules! impl_float_math { use coresimd::ppsv::codegen::fma::FloatFma; FloatFma::fma(self, y, z) } + + /// Sin + #[inline(always)] + pub fn sin(self) -> Self { + use coresimd::ppsv::codegen::sin::FloatSin; + FloatSin::sin(self) + } + + /// Cos + #[inline] + pub fn cos(self) -> Self { + use coresimd::ppsv::codegen::cos::FloatCos; + FloatCos::cos(self) + } } }; } @@ -54,6 +68,14 @@ macro_rules! test_float_math { } } + fn pi() -> $elem_ty { + match ::mem::size_of::<$elem_ty>() { + 4 => ::std::f32::consts::PI as $elem_ty, + 8 => ::std::f64::consts::PI as $elem_ty, + _ => unreachable!(), + } + } + #[test] fn abs() { use coresimd::simd::*; @@ -126,7 +148,36 @@ macro_rules! test_float_math { assert_eq!(f, t.fma(t, z)); assert_eq!(f, t.fma(o, t)); - assert_eq!(t3, t.fma(t, o)); + assert_eq!(t3, t.fma(o, o)); + } + + #[test] + fn sin() { + use coresimd::simd::*; + let z = $id::splat(0 as $elem_ty); + let p = $id::splat(pi() as $elem_ty); + let ph = $id::splat(pi() as $elem_ty / 2.); + let o_r = $id::splat((pi() as $elem_ty / 2.).sin()); + let z_r = $id::splat((pi() as $elem_ty).sin()); + + assert_eq!(z, z.sin()); + assert_eq!(o_r, ph.sin()); + assert_eq!(z_r, p.sin()); + } + + #[test] + fn cos() { + use coresimd::simd::*; + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let p = $id::splat(pi() as $elem_ty); + let ph = $id::splat(pi() as $elem_ty / 2.); + let z_r = $id::splat((pi() as $elem_ty / 2.).cos()); + let o_r = $id::splat((pi() as $elem_ty).cos()); + + assert_eq!(o, z.cos()); + assert_eq!(z_r, ph.cos()); + assert_eq!(o_r, p.cos()); } }; } diff --git a/coresimd/ppsv/codegen/cos.rs b/coresimd/ppsv/codegen/cos.rs new file mode 100644 index 0000000000..fdc61ea464 --- /dev/null +++ b/coresimd/ppsv/codegen/cos.rs @@ -0,0 +1,43 @@ +//! Exact vector cos + +use coresimd::simd::*; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.cos.v2f32"] + fn cos_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.cos.v4f32"] + fn cos_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.cos.v8f32"] + fn cos_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.cos.v16f32"] + fn cos_v16f32(x: f32x16) -> f32x16; + #[link_name = "llvm.cos.v2f64"] + fn cos_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.cos.v4f64"] + fn cos_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.cos.v8f64"] + fn cos_v8f64(x: f64x8) -> f64x8; +} + +pub(crate) trait FloatCos { + fn cos(self) -> Self; +} + +macro_rules! impl_fcos { + ($id:ident: $fn:ident) => { + impl FloatCos for $id { + fn cos(self) -> Self { + unsafe { $fn(self) } + } + } + } +} + +impl_fcos!(f32x2: cos_v2f32); +impl_fcos!(f32x4: cos_v4f32); +impl_fcos!(f32x8: cos_v8f32); +impl_fcos!(f32x16: cos_v16f32); +impl_fcos!(f64x2: cos_v2f64); +impl_fcos!(f64x4: cos_v4f64); +impl_fcos!(f64x8: cos_v8f64); diff --git a/coresimd/ppsv/codegen/mod.rs b/coresimd/ppsv/codegen/mod.rs index 2791e0670e..a1e8c24f6b 100644 --- a/coresimd/ppsv/codegen/mod.rs +++ b/coresimd/ppsv/codegen/mod.rs @@ -8,3 +8,5 @@ pub mod masks_reductions; pub mod sqrt; pub mod abs; pub mod fma; +pub mod sin; +pub mod cos; diff --git a/coresimd/ppsv/codegen/sin.rs b/coresimd/ppsv/codegen/sin.rs new file mode 100644 index 0000000000..cf7f3dea20 --- /dev/null +++ b/coresimd/ppsv/codegen/sin.rs @@ -0,0 +1,43 @@ +//! Exact vector sin + +use coresimd::simd::*; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.sin.v2f32"] + fn sin_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.sin.v4f32"] + fn sin_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.sin.v8f32"] + fn sin_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.sin.v16f32"] + fn sin_v16f32(x: f32x16) -> f32x16; + #[link_name = "llvm.sin.v2f64"] + fn sin_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.sin.v4f64"] + fn sin_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.sin.v8f64"] + fn sin_v8f64(x: f64x8) -> f64x8; +} + +pub(crate) trait FloatSin { + fn sin(self) -> Self; +} + +macro_rules! impl_fsin { + ($id:ident: $fn:ident) => { + impl FloatSin for $id { + fn sin(self) -> Self { + unsafe { $fn(self) } + } + } + } +} + +impl_fsin!(f32x2: sin_v2f32); +impl_fsin!(f32x4: sin_v4f32); +impl_fsin!(f32x8: sin_v8f32); +impl_fsin!(f32x16: sin_v16f32); +impl_fsin!(f64x2: sin_v2f64); +impl_fsin!(f64x4: sin_v4f64); +impl_fsin!(f64x8: sin_v8f64);