From 322b8f7c5fb0074c1cfc959b8bba4e991d604438 Mon Sep 17 00:00:00 2001
From: Lzu Tao <taolzu@gmail.com>
Date: Mon, 27 May 2019 17:00:03 +0700
Subject: [PATCH 1/7] Add newlib fdlibm

Ported from https://github.com/eblot/newlib/blob/master/newlib/libm/common/fdlibm.h
---
 src/math/mod.rs | 94 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)

diff --git a/src/math/mod.rs b/src/math/mod.rs
index 48b400a92..d1a872759 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -344,3 +344,97 @@ fn with_set_low_word(f: f64, lo: u32) -> f64 {
 fn combine_words(hi: u32, lo: u32) -> f64 {
     f64::from_bits((hi as u64) << 32 | lo as u64)
 }
+
+mod fdlibm {
+    /* @(#)fdlibm.h 5.1 93/09/24 */
+    /*
+     * ====================================================
+     * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+     *
+     * Developed at SunPro, a Sun Microsystems, Inc. business.
+     * Permission to use, copy, modify, and distribute this
+     * software is freely granted, provided that this notice
+     * is preserved.
+     * ====================================================
+     */
+
+    #![allow(dead_code)]
+
+    /* Most routines need to check whether a float is finite, infinite, or not a
+    number, and many need to know whether the result of an operation will
+    overflow.  These conditions depend on whether the largest exponent is
+    used for NaNs & infinities, or whether it's used for finite numbers. */
+
+    /// True if a positive float with bitmask X is finite.
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn FLT_UWORD_IS_FINITE(x: u32) -> bool {
+        x < INFINITE
+    }
+
+    /// True if a positive float with bitmask X is not a number.
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn FLT_UWORD_IS_NAN(x: u32) -> bool {
+        x > INFINITE
+    }
+
+    /// True if a positive float with bitmask X is +infinity.
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn FLT_UWORD_IS_INFINITE(x: u32) -> bool {
+        x == INFINITE
+    }
+
+    const INFINITE: u32 = 0x7f80_0000;
+
+    /// The bitmask of FLT_MAX.
+    pub const FLT_UWORD_MAX: u32 = 0x7f7f_ffff;
+    /// The bitmask of FLT_MAX/2.
+    pub const FLT_UWORD_HALF_MAX: u32 = FLT_UWORD_MAX - (1 << 23);
+    /// The bitmask of the largest finite exponent (129 if the largest
+    /// exponent is used for finite numbers, 128 otherwise).
+    pub const FLT_UWORD_EXP_MAX: u32 = 0x4300_0000;
+    /// The bitmask of log(FLT_MAX), rounded down.  This value is the largest
+    /// input that can be passed to exp() without producing overflow.
+    pub const FLT_UWORD_LOG_MAX: u32 = 0x42b1_7217;
+    /// The bitmask of log(2*FLT_MAX), rounded down.  This value is the
+    /// largest input than can be passed to cosh() without producing
+    /// overflow.
+    pub const FLT_UWORD_LOG_2MAX: u32 = 0x42b2_d4fc;
+    /// The largest biased exponent that can be used for finite numbers
+    /// (255 if the largest exponent is used for finite numbers, 254
+    /// otherwise)
+    pub const FLT_LARGEST_EXP: u32 = FLT_UWORD_MAX >> 23;
+    pub const HUGE: f32 = 3.402_823_466_385_288_60e+38;
+
+    /* Many routines check for zero and subnormal numbers.  Such things depend
+    on whether the target supports denormals or not */
+
+    /// True if a positive float with bitmask X is +0.  Without denormals,
+    /// any float with a zero exponent is a +0 representation.  With
+    /// denormals, the only +0 representation is a 0 bitmask.
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn FLT_UWORD_IS_ZERO(x: u32) -> bool {
+        x == 0
+    }
+
+    /// True if a non-zero positive float with bitmask X is subnormal.
+    /// (Routines should check for zeros first.)
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn FLT_UWORD_IS_SUBNORMAL(x: u32) -> bool {
+        x < 0x0080_0000
+    }
+
+    /// The bitmask of the smallest float above +0.  Call this number REAL_FLT_MIN...
+    pub const FLT_UWORD_MIN: u32 = 0x0000_0001;
+    /// The bitmask of the float representation of REAL_FLT_MIN's exponent.
+    pub const FLT_UWORD_EXP_MIN: u32 = 0x4316_0000;
+    /// The bitmask of |log(REAL_FLT_MIN)|, rounding down.
+    pub const FLT_UWORD_LOG_MIN: u32 = 0x42cf_f1b5;
+    /// REAL_FLT_MIN's exponent - EXP_BIAS (1 if denormals are not supported,
+    /// -22 if they are).
+    pub const FLT_SMALLEST_EXP: i32 = -22;
+}

From ce5dd4701e1690e56450f7b84e81ab31c6b33f68 Mon Sep 17 00:00:00 2001
From: Lzu Tao <taolzu@gmail.com>
Date: Sun, 7 Jul 2019 15:26:08 +0700
Subject: [PATCH 2/7] WIP make functions pub(crate)

---
 src/math/mod.rs | 52 +++++++++++++++++++++++--------------------------
 1 file changed, 24 insertions(+), 28 deletions(-)

diff --git a/src/math/mod.rs b/src/math/mod.rs
index d1a872759..be28649e7 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -346,6 +346,9 @@ fn combine_words(hi: u32, lo: u32) -> f64 {
 }
 
 mod fdlibm {
+    #![allow(dead_code)] // FIXME: remove it after other dependent codes merged
+    #![allow(non_snake_case)]
+
     /* @(#)fdlibm.h 5.1 93/09/24 */
     /*
      * ====================================================
@@ -358,55 +361,50 @@ mod fdlibm {
      * ====================================================
      */
 
-    #![allow(dead_code)]
-
     /* Most routines need to check whether a float is finite, infinite, or not a
     number, and many need to know whether the result of an operation will
     overflow.  These conditions depend on whether the largest exponent is
     used for NaNs & infinities, or whether it's used for finite numbers. */
 
     /// True if a positive float with bitmask X is finite.
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn FLT_UWORD_IS_FINITE(x: u32) -> bool {
+    #[inline(always)]
+    pub(crate) fn FLT_UWORD_IS_FINITE(x: u32) -> bool {
         x < INFINITE
     }
 
     /// True if a positive float with bitmask X is not a number.
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn FLT_UWORD_IS_NAN(x: u32) -> bool {
+    #[inline(always)]
+    pub(crate) fn FLT_UWORD_IS_NAN(x: u32) -> bool {
         x > INFINITE
     }
 
     /// True if a positive float with bitmask X is +infinity.
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn FLT_UWORD_IS_INFINITE(x: u32) -> bool {
+    #[inline(always)]
+    pub(crate) fn FLT_UWORD_IS_INFINITE(x: u32) -> bool {
         x == INFINITE
     }
 
     const INFINITE: u32 = 0x7f80_0000;
 
     /// The bitmask of FLT_MAX.
-    pub const FLT_UWORD_MAX: u32 = 0x7f7f_ffff;
+    pub(crate) const FLT_UWORD_MAX: u32 = 0x7f7f_ffff;
     /// The bitmask of FLT_MAX/2.
-    pub const FLT_UWORD_HALF_MAX: u32 = FLT_UWORD_MAX - (1 << 23);
+    pub(crate) const FLT_UWORD_HALF_MAX: u32 = FLT_UWORD_MAX - (1 << 23);
     /// The bitmask of the largest finite exponent (129 if the largest
     /// exponent is used for finite numbers, 128 otherwise).
-    pub const FLT_UWORD_EXP_MAX: u32 = 0x4300_0000;
+    pub(crate) const FLT_UWORD_EXP_MAX: u32 = 0x4300_0000;
     /// The bitmask of log(FLT_MAX), rounded down.  This value is the largest
     /// input that can be passed to exp() without producing overflow.
-    pub const FLT_UWORD_LOG_MAX: u32 = 0x42b1_7217;
+    pub(crate) const FLT_UWORD_LOG_MAX: u32 = 0x42b1_7217;
     /// The bitmask of log(2*FLT_MAX), rounded down.  This value is the
     /// largest input than can be passed to cosh() without producing
     /// overflow.
-    pub const FLT_UWORD_LOG_2MAX: u32 = 0x42b2_d4fc;
+    pub(crate) const FLT_UWORD_LOG_2MAX: u32 = 0x42b2_d4fc;
     /// The largest biased exponent that can be used for finite numbers
     /// (255 if the largest exponent is used for finite numbers, 254
     /// otherwise)
-    pub const FLT_LARGEST_EXP: u32 = FLT_UWORD_MAX >> 23;
-    pub const HUGE: f32 = 3.402_823_466_385_288_60e+38;
+    pub(crate) const FLT_LARGEST_EXP: u32 = FLT_UWORD_MAX >> 23;
+    pub(crate) const HUGE: f32 = 3.402_823_466_385_288_60e+38;
 
     /* Many routines check for zero and subnormal numbers.  Such things depend
     on whether the target supports denormals or not */
@@ -414,27 +412,25 @@ mod fdlibm {
     /// True if a positive float with bitmask X is +0.  Without denormals,
     /// any float with a zero exponent is a +0 representation.  With
     /// denormals, the only +0 representation is a 0 bitmask.
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn FLT_UWORD_IS_ZERO(x: u32) -> bool {
+    #[inline(always)]
+    pub(crate) fn FLT_UWORD_IS_ZERO(x: u32) -> bool {
         x == 0
     }
 
     /// True if a non-zero positive float with bitmask X is subnormal.
     /// (Routines should check for zeros first.)
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn FLT_UWORD_IS_SUBNORMAL(x: u32) -> bool {
+    #[inline(always)]
+    pub(crate) fn FLT_UWORD_IS_SUBNORMAL(x: u32) -> bool {
         x < 0x0080_0000
     }
 
     /// The bitmask of the smallest float above +0.  Call this number REAL_FLT_MIN...
-    pub const FLT_UWORD_MIN: u32 = 0x0000_0001;
+    pub(crate) const FLT_UWORD_MIN: u32 = 0x0000_0001;
     /// The bitmask of the float representation of REAL_FLT_MIN's exponent.
-    pub const FLT_UWORD_EXP_MIN: u32 = 0x4316_0000;
+    pub(crate) const FLT_UWORD_EXP_MIN: u32 = 0x4316_0000;
     /// The bitmask of |log(REAL_FLT_MIN)|, rounding down.
-    pub const FLT_UWORD_LOG_MIN: u32 = 0x42cf_f1b5;
+    pub(crate) const FLT_UWORD_LOG_MIN: u32 = 0x42cf_f1b5;
     /// REAL_FLT_MIN's exponent - EXP_BIAS (1 if denormals are not supported,
     /// -22 if they are).
-    pub const FLT_SMALLEST_EXP: i32 = -22;
+    pub(crate) const FLT_SMALLEST_EXP: i32 = -22;
 }

From 80a730c6de17a5dd2f87f1601d8b84cb00fd63e5 Mon Sep 17 00:00:00 2001
From: Lzu Tao <taolzu@gmail.com>
Date: Sun, 7 Jul 2019 16:05:38 +0700
Subject: [PATCH 3/7] WIP do not always inline, let LLVM decide

---
 src/math/mod.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/math/mod.rs b/src/math/mod.rs
index be28649e7..3c7d05111 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -367,19 +367,19 @@ mod fdlibm {
     used for NaNs & infinities, or whether it's used for finite numbers. */
 
     /// True if a positive float with bitmask X is finite.
-    #[inline(always)]
+    #[inline]
     pub(crate) fn FLT_UWORD_IS_FINITE(x: u32) -> bool {
         x < INFINITE
     }
 
     /// True if a positive float with bitmask X is not a number.
-    #[inline(always)]
+    #[inline]
     pub(crate) fn FLT_UWORD_IS_NAN(x: u32) -> bool {
         x > INFINITE
     }
 
     /// True if a positive float with bitmask X is +infinity.
-    #[inline(always)]
+    #[inline]
     pub(crate) fn FLT_UWORD_IS_INFINITE(x: u32) -> bool {
         x == INFINITE
     }
@@ -412,14 +412,14 @@ mod fdlibm {
     /// True if a positive float with bitmask X is +0.  Without denormals,
     /// any float with a zero exponent is a +0 representation.  With
     /// denormals, the only +0 representation is a 0 bitmask.
-    #[inline(always)]
+    #[inline]
     pub(crate) fn FLT_UWORD_IS_ZERO(x: u32) -> bool {
         x == 0
     }
 
     /// True if a non-zero positive float with bitmask X is subnormal.
     /// (Routines should check for zeros first.)
-    #[inline(always)]
+    #[inline]
     pub(crate) fn FLT_UWORD_IS_SUBNORMAL(x: u32) -> bool {
         x < 0x0080_0000
     }

From 0b3ea35e30e783efe61c54810ac34795498e5de7 Mon Sep 17 00:00:00 2001
From: Lzu Tao <taolzu@gmail.com>
Date: Tue, 28 May 2019 09:44:51 +0700
Subject: [PATCH 4/7] Move src/math/cbrtf.rs -> src/musl/cbrtf.rs

---
 src/musl.rs                 | 3 +++
 src/{math => musl}/cbrtf.rs | 0
 2 files changed, 3 insertions(+)
 create mode 100644 src/musl.rs
 rename src/{math => musl}/cbrtf.rs (100%)

diff --git a/src/musl.rs b/src/musl.rs
new file mode 100644
index 000000000..203c379b9
--- /dev/null
+++ b/src/musl.rs
@@ -0,0 +1,3 @@
+mod cbrtf;
+
+pub use self::cbrtf::cbrtf;
diff --git a/src/math/cbrtf.rs b/src/musl/cbrtf.rs
similarity index 100%
rename from src/math/cbrtf.rs
rename to src/musl/cbrtf.rs

From b718f26d8098f51c1bfac389c17255a297a08b49 Mon Sep 17 00:00:00 2001
From: Lzu Tao <taolzu@gmail.com>
Date: Sun, 7 Jul 2019 09:13:14 +0700
Subject: [PATCH 5/7] chore: Remove musl module

---
 src/musl.rs       |  3 --
 src/musl/cbrtf.rs | 76 -----------------------------------------------
 2 files changed, 79 deletions(-)
 delete mode 100644 src/musl.rs
 delete mode 100644 src/musl/cbrtf.rs

diff --git a/src/musl.rs b/src/musl.rs
deleted file mode 100644
index 203c379b9..000000000
--- a/src/musl.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-mod cbrtf;
-
-pub use self::cbrtf::cbrtf;
diff --git a/src/musl/cbrtf.rs b/src/musl/cbrtf.rs
deleted file mode 100644
index 6e589c099..000000000
--- a/src/musl/cbrtf.rs
+++ /dev/null
@@ -1,76 +0,0 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtf.c */
-/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- * Debugged and optimized by Bruce D. Evans.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/* cbrtf(x)
- * Return cube root of x
- */
-
-use core::f32;
-
-const B1: u32 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */
-const B2: u32 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */
-
-/// Cube root (f32)
-///
-/// Computes the cube root of the argument.
-#[inline]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn cbrtf(x: f32) -> f32 {
-    let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24
-
-    let mut r: f64;
-    let mut t: f64;
-    let mut ui: u32 = x.to_bits();
-    let mut hx: u32 = ui & 0x7fffffff;
-
-    if hx >= 0x7f800000 {
-        /* cbrt(NaN,INF) is itself */
-        return x + x;
-    }
-
-    /* rough cbrt to 5 bits */
-    if hx < 0x00800000 {
-        /* zero or subnormal? */
-        if hx == 0 {
-            return x; /* cbrt(+-0) is itself */
-        }
-        ui = (x * x1p24).to_bits();
-        hx = ui & 0x7fffffff;
-        hx = hx / 3 + B2;
-    } else {
-        hx = hx / 3 + B1;
-    }
-    ui &= 0x80000000;
-    ui |= hx;
-
-    /*
-     * First step Newton iteration (solving t*t-x/t == 0) to 16 bits.  In
-     * double precision so that its terms can be arranged for efficiency
-     * without causing overflow or underflow.
-     */
-    t = f32::from_bits(ui) as f64;
-    r = t * t * t;
-    t = t * (x as f64 + x as f64 + r) / (x as f64 + r + r);
-
-    /*
-     * Second step Newton iteration to 47 bits.  In double precision for
-     * efficiency and accuracy.
-     */
-    r = t * t * t;
-    t = t * (x as f64 + x as f64 + r) / (x as f64 + r + r);
-
-    /* rounding to 24 bits is perfect in round-to-nearest mode */
-    t as f32
-}

From d153151d85edeb64cbd8015984c4d347f986a2e4 Mon Sep 17 00:00:00 2001
From: Lzu Tao <taolzu@gmail.com>
Date: Tue, 28 May 2019 10:08:41 +0700
Subject: [PATCH 6/7] Add newlib cbrtf

---
 src/math/cbrtf.rs | 62 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 src/math/cbrtf.rs

diff --git a/src/math/cbrtf.rs b/src/math/cbrtf.rs
new file mode 100644
index 000000000..bcfec2904
--- /dev/null
+++ b/src/math/cbrtf.rs
@@ -0,0 +1,62 @@
+/* sf_cbrt.c -- float version of s_cbrt.c.
+ * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
+ */
+
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ *
+ */
+
+use super::fdlibm::{FLT_UWORD_IS_FINITE, FLT_UWORD_IS_SUBNORMAL, FLT_UWORD_IS_ZERO};
+
+const B1: u32 = 709_958_130; /* B1 = (84+2/3-0.03306235651)*2**23 */
+const B2: u32 = 642_849_266; /* B2 = (76+2/3-0.03306235651)*2**23 */
+
+const C: f32 = 5.428_571_701_0e-01; /* 19/35        = 0x3f0a_f8b0 */
+const D: f32 = -7.053_061_127_7e-01; /* -864/1225   = 0xbf34_8ef1 */
+const E: f32 = 1.414_285_659_8e+00; /* 99/70        = 0x3fb5_0750 */
+const F: f32 = 1.607_142_806_1e+00; /* 45/28        = 0x3fcd_b6db */
+const G: f32 = 3.571_428_656_6e-01; /* 5/14         = 0x3eb6_db6e */
+
+/// Cube root (f32)
+///
+/// Computes the cube root of the argument.
+#[inline]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub extern "C" fn cbrtf(x: f32) -> f32 {
+    let hx: u32 = x.to_bits() & 0x7fff_ffff; /* |x| */
+    let sign: u32 = x.to_bits() & 0x8000_0000; /* sign = sign(x) */
+
+    if !FLT_UWORD_IS_FINITE(hx) {
+        return x + x; /* cbrt(NaN,INF) is itself */
+    }
+    if FLT_UWORD_IS_ZERO(hx) {
+        return x; /* cbrt(0) is itself */
+    }
+
+    let x = f32::from_bits(hx); /* x <- |x| */
+    /* rough cbrt to 5 bits */
+    let mut t: f32 = if FLT_UWORD_IS_SUBNORMAL(hx) {
+        /* subnormal number */
+        let t: f32 = f32::from_bits(0x4b80_0000);
+        let high: u32 = (x * t).to_bits(); /* x * (2 ** 24)*/
+        f32::from_bits((high / 3).wrapping_add(B2))
+    } else {
+        f32::from_bits((hx / 3).wrapping_add(B1))
+    };
+
+    /* new cbrt to 23 bits */
+    let r: f32 = t * t / x;
+    let s: f32 = C + r * t;
+    t *= G + F / (s + E + D / s);
+
+    /* restore the sign bit */
+    f32::from_bits(t.to_bits() | sign)
+}

From e68ca244ef6cc19728cde7103de50f24d0814168 Mon Sep 17 00:00:00 2001
From: Lzu Tao <taolzu@gmail.com>
Date: Sun, 7 Jul 2019 15:21:00 +0700
Subject: [PATCH 7/7] WIP make const

---
 src/math/cbrtf.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/math/cbrtf.rs b/src/math/cbrtf.rs
index bcfec2904..a8a426e54 100644
--- a/src/math/cbrtf.rs
+++ b/src/math/cbrtf.rs
@@ -45,8 +45,8 @@ pub extern "C" fn cbrtf(x: f32) -> f32 {
     /* rough cbrt to 5 bits */
     let mut t: f32 = if FLT_UWORD_IS_SUBNORMAL(hx) {
         /* subnormal number */
-        let t: f32 = f32::from_bits(0x4b80_0000);
-        let high: u32 = (x * t).to_bits(); /* x * (2 ** 24)*/
+        const X1P24: f32 = 16777216f32; // 2 ** 24
+        let high: u32 = (x * X1P24).to_bits();
         f32::from_bits((high / 3).wrapping_add(B2))
     } else {
         f32::from_bits((hx / 3).wrapping_add(B1))