From 909e2782f4ed78de2efbf19c3d0f1aaa513a3b68 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Mon, 4 Jun 2018 14:17:24 +0200
Subject: [PATCH 1/2] add vertical float math: abs, sqrt, sqrte, rsqrte, fma

---
 coresimd/ppsv/api/float_math.rs | 132 ++++++++++++++++++++++++++++++++
 coresimd/ppsv/api/mod.rs        |  15 +++-
 coresimd/ppsv/codegen/abs.rs    |  43 +++++++++++
 coresimd/ppsv/codegen/fma.rs    |  43 +++++++++++
 coresimd/ppsv/codegen/mod.rs    |   4 +
 coresimd/ppsv/codegen/sqrt.rs   |  43 +++++++++++
 coresimd/simd_llvm.rs           |   7 +-
 7 files changed, 281 insertions(+), 6 deletions(-)
 create mode 100644 coresimd/ppsv/api/float_math.rs
 create mode 100644 coresimd/ppsv/codegen/abs.rs
 create mode 100644 coresimd/ppsv/codegen/fma.rs
 create mode 100644 coresimd/ppsv/codegen/sqrt.rs

diff --git a/coresimd/ppsv/api/float_math.rs b/coresimd/ppsv/api/float_math.rs
new file mode 100644
index 0000000000..e0178b703c
--- /dev/null
+++ b/coresimd/ppsv/api/float_math.rs
@@ -0,0 +1,132 @@
+//! Float math
+
+macro_rules! impl_float_math {
+    ($id:ident) => {
+        impl $id {
+            /// Absolute-value
+            #[inline]
+            pub fn abs(self) -> Self {
+                use coresimd::ppsv::codegen::abs::FloatAbs;
+                FloatAbs::abs(self)
+            }
+
+            /// Square-root
+            #[inline]
+            pub fn sqrt(self) -> Self {
+                use coresimd::ppsv::codegen::sqrt::FloatSqrt;
+                FloatSqrt::sqrt(self)
+            }
+
+            /// Square-root estimate
+            #[inline]
+            pub fn sqrte(self) -> Self {
+                use coresimd::simd_llvm::simd_fsqrt;
+                unsafe { simd_fsqrt(self) }
+            }
+
+            /// Reciprocal square-root estimate
+            #[inline]
+            pub fn rsqrte(self) -> Self {
+                unsafe {
+                    use coresimd::simd_llvm::simd_fsqrt;
+                    $id::splat(1.) / simd_fsqrt(self)
+                }
+            }
+
+            /// Fused multiply add: `self * y + z`
+            #[inline]
+            pub fn fma(self, y: Self, z: Self) -> Self {
+                use coresimd::ppsv::codegen::fma::FloatFma;
+                FloatFma::fma(self, y, z)
+            }
+        }
+    };
+}
+
+macro_rules! test_float_math {
+    ($id:ident, $elem_ty:ident) => {
+
+        fn sqrt2() -> $elem_ty {
+            match ::mem::size_of::<$elem_ty>() {
+                4 => 1.4142135 as $elem_ty,
+                8 => 1.4142135623730951 as $elem_ty,
+                _ => unreachable!(),
+            }
+        }
+
+        #[test]
+        fn abs() {
+            use coresimd::simd::*;
+            let o = $id::splat(1 as $elem_ty);
+            assert_eq!(o, o.abs());
+
+            let mo = $id::splat(-1 as $elem_ty);
+            assert_eq!(o, mo.abs());
+        }
+
+        #[test]
+        fn sqrt() {
+            use coresimd::simd::*;
+            let z = $id::splat(0 as $elem_ty);
+            let o = $id::splat(1 as $elem_ty);
+            assert_eq!(z, z.sqrt());
+            assert_eq!(o, o.sqrt());
+
+            let t = $id::splat(2 as $elem_ty);
+            let e = $id::splat(sqrt2() as $elem_ty);
+            assert_eq!(e, t.sqrt());
+        }
+
+        #[test]
+        fn sqrte() {
+            use coresimd::simd::*;
+            let z = $id::splat(0 as $elem_ty);
+            let o = $id::splat(1 as $elem_ty);
+            assert_eq!(z, z.sqrte());
+            assert_eq!(o, o.sqrte());
+
+            let t = $id::splat(2 as $elem_ty);
+            let e = $id::splat(sqrt2() as $elem_ty);
+            let error = (e - t.sqrte()).abs();
+            let tol = $id::splat(2.4e-4 as $elem_ty);
+
+            assert!(error.le(tol).all());
+        }
+
+        #[test]
+        fn rsqrte() {
+            use coresimd::simd::*;
+            let o = $id::splat(1 as $elem_ty);
+            assert_eq!(o, o.rsqrte());
+
+            let t = $id::splat(2 as $elem_ty);
+            let e = 1. / sqrt2();
+            let error = (e - t.rsqrte()).abs();
+            let tol = $id::splat(2.4e-4 as $elem_ty);
+            assert!(error.le(tol).all());
+        }
+
+        #[test]
+        fn fma() {
+            use coresimd::simd::*;
+            let z = $id::splat(0 as $elem_ty);
+            let o = $id::splat(1 as $elem_ty);
+            let t = $id::splat(2 as $elem_ty);
+            let t3 = $id::splat(3 as $elem_ty);
+            let f = $id::splat(4 as $elem_ty);
+
+            assert_eq!(z, z.fma(z, z));
+            assert_eq!(o, o.fma(o, z));
+            assert_eq!(o, o.fma(z, o));
+            assert_eq!(o, z.fma(o, o));
+
+            assert_eq!(t, o.fma(o, o));
+            assert_eq!(t, o.fma(t, z));
+            assert_eq!(t, t.fma(o, z));
+
+            assert_eq!(f, t.fma(t, z));
+            assert_eq!(f, t.fma(o, t));
+            assert_eq!(t3, t.fma(t, o));
+        }
+    };
+}
diff --git a/coresimd/ppsv/api/mod.rs b/coresimd/ppsv/api/mod.rs
index 1915f6aeb0..1ce658e2d9 100644
--- a/coresimd/ppsv/api/mod.rs
+++ b/coresimd/ppsv/api/mod.rs
@@ -84,6 +84,8 @@ mod default;
 #[macro_use]
 mod eq;
 #[macro_use]
+mod float_math;
+#[macro_use]
 mod fmt;
 #[macro_use]
 mod from;
@@ -128,7 +130,8 @@ pub trait Lanes<A> {}
 
 /// Defines a portable packed SIMD floating-point vector type.
 macro_rules! simd_f_ty {
-    ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident |
+    ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident,
+     $test_macro:ident |
      $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
         vector_impl!(
             [define_ty, $id, $($elem_tys),+ | $(#[$doc])*],
@@ -142,7 +145,8 @@ macro_rules! simd_f_ty {
             [impl_neg_op, $id, $elem_ty],
             [impl_partial_eq, $id],
             [impl_default, $id, $elem_ty],
-            [impl_float_minmax_ops, $id]
+            [impl_float_minmax_ops, $id],
+            [impl_float_math, $id]
         );
 
         $test_macro!(
@@ -160,6 +164,7 @@ macro_rules! simd_f_ty {
                 test_default!($id, $elem_ty);
                 test_mask_select!($mask_ty, $id, $elem_ty);
                 test_float_minmax_ops!($id, $elem_ty);
+                test_float_math!($id, $elem_ty);
             }
         );
     }
@@ -167,7 +172,8 @@ macro_rules! simd_f_ty {
 
 /// Defines a portable packed SIMD signed-integer vector type.
 macro_rules! simd_i_ty {
-    ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident |
+    ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident,
+     $test_macro:ident |
      $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
         vector_impl!(
             [define_ty, $id, $($elem_tys),+ | $(#[$doc])*],
@@ -221,7 +227,8 @@ macro_rules! simd_i_ty {
 
 /// Defines a portable packed SIMD unsigned-integer vector type.
 macro_rules! simd_u_ty {
-    ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident |
+    ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident,
+     $test_macro:ident |
      $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
         vector_impl!(
             [define_ty, $id, $($elem_tys),+ | $(#[$doc])*],
diff --git a/coresimd/ppsv/codegen/abs.rs b/coresimd/ppsv/codegen/abs.rs
new file mode 100644
index 0000000000..edca549c24
--- /dev/null
+++ b/coresimd/ppsv/codegen/abs.rs
@@ -0,0 +1,43 @@
+//! Vector absolute value
+
+use coresimd::simd::*;
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.fabs.v2f32"]
+    fn abs_v2f32(x: f32x2) -> f32x2;
+    #[link_name = "llvm.fabs.v4f32"]
+    fn abs_v4f32(x: f32x4) -> f32x4;
+    #[link_name = "llvm.fabs.v8f32"]
+    fn abs_v8f32(x: f32x8) -> f32x8;
+    #[link_name = "llvm.fabs.v16f32"]
+    fn abs_v16f32(x: f32x16) -> f32x16;
+    #[link_name = "llvm.fabs.v2f64"]
+    fn abs_v2f64(x: f64x2) -> f64x2;
+    #[link_name = "llvm.fabs.v4f64"]
+    fn abs_v4f64(x: f64x4) -> f64x4;
+    #[link_name = "llvm.fabs.v8f64"]
+    fn abs_v8f64(x: f64x8) -> f64x8;
+}
+
+pub(crate) trait FloatAbs {
+    fn abs(self) -> Self;
+}
+
+macro_rules! impl_fabs {
+    ($id:ident: $fn:ident) => {
+        impl FloatAbs for $id {
+            fn abs(self) -> Self {
+                unsafe { $fn(self) }
+            }
+        }
+    }
+}
+
+impl_fabs!(f32x2: abs_v2f32);
+impl_fabs!(f32x4: abs_v4f32);
+impl_fabs!(f32x8: abs_v8f32);
+impl_fabs!(f32x16: abs_v16f32);
+impl_fabs!(f64x2: abs_v2f64);
+impl_fabs!(f64x4: abs_v4f64);
+impl_fabs!(f64x8: abs_v8f64);
diff --git a/coresimd/ppsv/codegen/fma.rs b/coresimd/ppsv/codegen/fma.rs
new file mode 100644
index 0000000000..9d63ac6bee
--- /dev/null
+++ b/coresimd/ppsv/codegen/fma.rs
@@ -0,0 +1,43 @@
+//! Vector fused multiply add
+
+use coresimd::simd::*;
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.fma.v2f32"]
+    fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2;
+    #[link_name = "llvm.fma.v4f32"]
+    fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4;
+    #[link_name = "llvm.fma.v8f32"]
+    fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8;
+    #[link_name = "llvm.fma.v16f32"]
+    fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16;
+    #[link_name = "llvm.fma.v2f64"]
+    fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2;
+    #[link_name = "llvm.fma.v4f64"]
+    fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4;
+    #[link_name = "llvm.fma.v8f64"]
+    fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8;
+}
+
+pub(crate) trait FloatFma {
+    fn fma(self, y: Self, z: Self) -> Self;
+}
+
+macro_rules! impl_fma {
+    ($id:ident: $fn:ident) => {
+        impl FloatFma for $id {
+            fn fma(self, y: Self, z: Self) -> Self {
+                unsafe { $fn(self, y, z) }
+            }
+        }
+    }
+}
+
+impl_fma!(f32x2: fma_v2f32);
+impl_fma!(f32x4: fma_v4f32);
+impl_fma!(f32x8: fma_v8f32);
+impl_fma!(f32x16: fma_v16f32);
+impl_fma!(f64x2: fma_v2f64);
+impl_fma!(f64x4: fma_v4f64);
+impl_fma!(f64x8: fma_v8f64);
diff --git a/coresimd/ppsv/codegen/mod.rs b/coresimd/ppsv/codegen/mod.rs
index 448587b795..2791e0670e 100644
--- a/coresimd/ppsv/codegen/mod.rs
+++ b/coresimd/ppsv/codegen/mod.rs
@@ -4,3 +4,7 @@
 pub mod wrapping;
 
 pub mod masks_reductions;
+
+pub mod sqrt;
+pub mod abs;
+pub mod fma;
diff --git a/coresimd/ppsv/codegen/sqrt.rs b/coresimd/ppsv/codegen/sqrt.rs
new file mode 100644
index 0000000000..8e86650555
--- /dev/null
+++ b/coresimd/ppsv/codegen/sqrt.rs
@@ -0,0 +1,43 @@
+//! Exact vector square-root
+
+use coresimd::simd::*;
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.sqrt.v2f32"]
+    fn sqrt_v2f32(x: f32x2) -> f32x2;
+    #[link_name = "llvm.sqrt.v4f32"]
+    fn sqrt_v4f32(x: f32x4) -> f32x4;
+    #[link_name = "llvm.sqrt.v8f32"]
+    fn sqrt_v8f32(x: f32x8) -> f32x8;
+    #[link_name = "llvm.sqrt.v16f32"]
+    fn sqrt_v16f32(x: f32x16) -> f32x16;
+    #[link_name = "llvm.sqrt.v2f64"]
+    fn sqrt_v2f64(x: f64x2) -> f64x2;
+    #[link_name = "llvm.sqrt.v4f64"]
+    fn sqrt_v4f64(x: f64x4) -> f64x4;
+    #[link_name = "llvm.sqrt.v8f64"]
+    fn sqrt_v8f64(x: f64x8) -> f64x8;
+}
+
+pub(crate) trait FloatSqrt {
+    fn sqrt(self) -> Self;
+}
+
+macro_rules! impl_fsqrt {
+    ($id:ident: $fn:ident) => {
+        impl FloatSqrt for $id {
+            fn sqrt(self) -> Self {
+                unsafe { $fn(self) }
+            }
+        }
+    }
+}
+
+impl_fsqrt!(f32x2: sqrt_v2f32);
+impl_fsqrt!(f32x4: sqrt_v4f32);
+impl_fsqrt!(f32x8: sqrt_v8f32);
+impl_fsqrt!(f32x16: sqrt_v16f32);
+impl_fsqrt!(f64x2: sqrt_v2f64);
+impl_fsqrt!(f64x4: sqrt_v4f64);
+impl_fsqrt!(f64x8: sqrt_v8f64);
diff --git a/coresimd/simd_llvm.rs b/coresimd/simd_llvm.rs
index fdcf4f9cb0..c83c2d4b35 100644
--- a/coresimd/simd_llvm.rs
+++ b/coresimd/simd_llvm.rs
@@ -49,6 +49,9 @@ extern "platform-intrinsic" {
     pub fn simd_select<M, T>(m: M, a: T, b: T) -> T;
 
     pub fn simd_fmin<T>(a: T, b: T) -> T;
-// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/416
-// pub fn simd_fmax<T>(a: T, b: T) -> T;
+    // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/416
+    // pub fn simd_fmax<T>(a: T, b: T) -> T;
+
+    pub fn simd_fsqrt<T>(a: T) -> T;
+    pub fn simd_fma<T>(a: T, b: T, c: T) -> T;
 }

From 75009a8f2465c97dc6b2958f94872fedd2ac385c Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Mon, 4 Jun 2018 17:07:09 +0200
Subject: [PATCH 2/2] add float math: sin, cos

---
 coresimd/ppsv/api/float_math.rs | 53 ++++++++++++++++++++++++++++++++-
 coresimd/ppsv/codegen/cos.rs    | 43 ++++++++++++++++++++++++++
 coresimd/ppsv/codegen/mod.rs    |  2 ++
 coresimd/ppsv/codegen/sin.rs    | 43 ++++++++++++++++++++++++++
 4 files changed, 140 insertions(+), 1 deletion(-)
 create mode 100644 coresimd/ppsv/codegen/cos.rs
 create mode 100644 coresimd/ppsv/codegen/sin.rs

diff --git a/coresimd/ppsv/api/float_math.rs b/coresimd/ppsv/api/float_math.rs
index e0178b703c..32de85eb55 100644
--- a/coresimd/ppsv/api/float_math.rs
+++ b/coresimd/ppsv/api/float_math.rs
@@ -39,6 +39,20 @@ macro_rules! impl_float_math {
                 use coresimd::ppsv::codegen::fma::FloatFma;
                 FloatFma::fma(self, y, z)
             }
+
+            /// Sin
+            #[inline(always)]
+            pub fn sin(self) -> Self {
+                use coresimd::ppsv::codegen::sin::FloatSin;
+                FloatSin::sin(self)
+            }
+
+            /// Cos
+            #[inline]
+            pub fn cos(self) -> Self {
+                use coresimd::ppsv::codegen::cos::FloatCos;
+                FloatCos::cos(self)
+            }
         }
     };
 }
@@ -54,6 +68,14 @@ macro_rules! test_float_math {
             }
         }
 
+        fn pi() -> $elem_ty {
+            match ::mem::size_of::<$elem_ty>() {
+                4 => ::std::f32::consts::PI as $elem_ty,
+                8 => ::std::f64::consts::PI as $elem_ty,
+                _ => unreachable!(),
+            }
+        }
+
         #[test]
         fn abs() {
             use coresimd::simd::*;
@@ -126,7 +148,36 @@ macro_rules! test_float_math {
 
             assert_eq!(f, t.fma(t, z));
             assert_eq!(f, t.fma(o, t));
-            assert_eq!(t3, t.fma(t, o));
+            assert_eq!(t3, t.fma(o, o));
+        }
+
+        #[test]
+        fn sin() {
+            use coresimd::simd::*;
+            let z = $id::splat(0 as $elem_ty);
+            let p = $id::splat(pi() as $elem_ty);
+            let ph = $id::splat(pi() as $elem_ty / 2.);
+            let o_r = $id::splat((pi() as $elem_ty / 2.).sin());
+            let z_r = $id::splat((pi() as $elem_ty).sin());
+
+            assert_eq!(z, z.sin());
+            assert_eq!(o_r, ph.sin());
+            assert_eq!(z_r, p.sin());
+        }
+
+        #[test]
+        fn cos() {
+            use coresimd::simd::*;
+            let z = $id::splat(0 as $elem_ty);
+            let o = $id::splat(1 as $elem_ty);
+            let p = $id::splat(pi() as $elem_ty);
+            let ph = $id::splat(pi() as $elem_ty / 2.);
+            let z_r = $id::splat((pi() as $elem_ty / 2.).cos());
+            let o_r = $id::splat((pi() as $elem_ty).cos());
+
+            assert_eq!(o, z.cos());
+            assert_eq!(z_r, ph.cos());
+            assert_eq!(o_r, p.cos());
         }
     };
 }
diff --git a/coresimd/ppsv/codegen/cos.rs b/coresimd/ppsv/codegen/cos.rs
new file mode 100644
index 0000000000..fdc61ea464
--- /dev/null
+++ b/coresimd/ppsv/codegen/cos.rs
@@ -0,0 +1,43 @@
+//! Exact vector cos
+
+use coresimd::simd::*;
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.cos.v2f32"]
+    fn cos_v2f32(x: f32x2) -> f32x2;
+    #[link_name = "llvm.cos.v4f32"]
+    fn cos_v4f32(x: f32x4) -> f32x4;
+    #[link_name = "llvm.cos.v8f32"]
+    fn cos_v8f32(x: f32x8) -> f32x8;
+    #[link_name = "llvm.cos.v16f32"]
+    fn cos_v16f32(x: f32x16) -> f32x16;
+    #[link_name = "llvm.cos.v2f64"]
+    fn cos_v2f64(x: f64x2) -> f64x2;
+    #[link_name = "llvm.cos.v4f64"]
+    fn cos_v4f64(x: f64x4) -> f64x4;
+    #[link_name = "llvm.cos.v8f64"]
+    fn cos_v8f64(x: f64x8) -> f64x8;
+}
+
+pub(crate) trait FloatCos {
+    fn cos(self) -> Self;
+}
+
+macro_rules! impl_fcos {
+    ($id:ident: $fn:ident) => {
+        impl FloatCos for $id {
+            fn cos(self) -> Self {
+                unsafe { $fn(self) }
+            }
+        }
+    }
+}
+
+impl_fcos!(f32x2: cos_v2f32);
+impl_fcos!(f32x4: cos_v4f32);
+impl_fcos!(f32x8: cos_v8f32);
+impl_fcos!(f32x16: cos_v16f32);
+impl_fcos!(f64x2: cos_v2f64);
+impl_fcos!(f64x4: cos_v4f64);
+impl_fcos!(f64x8: cos_v8f64);
diff --git a/coresimd/ppsv/codegen/mod.rs b/coresimd/ppsv/codegen/mod.rs
index 2791e0670e..a1e8c24f6b 100644
--- a/coresimd/ppsv/codegen/mod.rs
+++ b/coresimd/ppsv/codegen/mod.rs
@@ -8,3 +8,5 @@ pub mod masks_reductions;
 pub mod sqrt;
 pub mod abs;
 pub mod fma;
+pub mod sin;
+pub mod cos;
diff --git a/coresimd/ppsv/codegen/sin.rs b/coresimd/ppsv/codegen/sin.rs
new file mode 100644
index 0000000000..cf7f3dea20
--- /dev/null
+++ b/coresimd/ppsv/codegen/sin.rs
@@ -0,0 +1,43 @@
+//! Exact vector sin
+
+use coresimd::simd::*;
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.sin.v2f32"]
+    fn sin_v2f32(x: f32x2) -> f32x2;
+    #[link_name = "llvm.sin.v4f32"]
+    fn sin_v4f32(x: f32x4) -> f32x4;
+    #[link_name = "llvm.sin.v8f32"]
+    fn sin_v8f32(x: f32x8) -> f32x8;
+    #[link_name = "llvm.sin.v16f32"]
+    fn sin_v16f32(x: f32x16) -> f32x16;
+    #[link_name = "llvm.sin.v2f64"]
+    fn sin_v2f64(x: f64x2) -> f64x2;
+    #[link_name = "llvm.sin.v4f64"]
+    fn sin_v4f64(x: f64x4) -> f64x4;
+    #[link_name = "llvm.sin.v8f64"]
+    fn sin_v8f64(x: f64x8) -> f64x8;
+}
+
+pub(crate) trait FloatSin {
+    fn sin(self) -> Self;
+}
+
+macro_rules! impl_fsin {
+    ($id:ident: $fn:ident) => {
+        impl FloatSin for $id {
+            fn sin(self) -> Self {
+                unsafe { $fn(self) }
+            }
+        }
+    }
+}
+
+impl_fsin!(f32x2: sin_v2f32);
+impl_fsin!(f32x4: sin_v4f32);
+impl_fsin!(f32x8: sin_v8f32);
+impl_fsin!(f32x16: sin_v16f32);
+impl_fsin!(f64x2: sin_v2f64);
+impl_fsin!(f64x4: sin_v4f64);
+impl_fsin!(f64x8: sin_v8f64);