|
12 | 12 | #include "src/__support/CPP/array.h"
|
13 | 13 | #include "src/__support/FPUtil/FPBits.h"
|
14 | 14 | #include "src/__support/FPUtil/PolyEval.h"
|
| 15 | +#include "src/__support/FPUtil/cast.h" |
15 | 16 | #include "src/__support/FPUtil/multiply_add.h"
|
16 | 17 | #include "src/__support/FPUtil/nearest_integer.h"
|
17 | 18 | #include "src/__support/macros/attributes.h"
|
@@ -174,6 +175,119 @@ LIBC_INLINE ExpRangeReduction exp10_range_reduction(float16 x) {
|
174 | 175 | return {exp2_hi_mid, exp10_lo};
|
175 | 176 | }
|
176 | 177 |
|
| 178 | +// Generated by Sollya with the following commands: |
| 179 | +// > display = hexadecimal; |
| 180 | +// > round(log2(exp(1)), SG, RN); |
| 181 | +static constexpr float LOG2F_E = 0x1.715476p+0f; |
| 182 | + |
| 183 | +// Generated by Sollya with the following commands: |
| 184 | +// > display = hexadecimal; |
| 185 | +// > round(log(2), SG, RN); |
| 186 | +static constexpr float LOGF_2 = 0x1.62e43p-1f; |
| 187 | + |
| 188 | +// Generated by Sollya with the following commands: |
| 189 | +// > display = hexadecimal; |
| 190 | +// > for i from 0 to 31 do printsingle(round(2^(i * 2^-5), SG, RN)); |
| 191 | +static constexpr cpp::array<uint32_t, 32> EXP2_MID_5_BITS = { |
| 192 | + 0x3f80'0000U, 0x3f82'cd87U, 0x3f85'aac3U, 0x3f88'980fU, 0x3f8b'95c2U, |
| 193 | + 0x3f8e'a43aU, 0x3f91'c3d3U, 0x3f94'f4f0U, 0x3f98'37f0U, 0x3f9b'8d3aU, |
| 194 | + 0x3f9e'f532U, 0x3fa2'7043U, 0x3fa5'fed7U, 0x3fa9'a15bU, 0x3fad'583fU, |
| 195 | + 0x3fb1'23f6U, 0x3fb5'04f3U, 0x3fb8'fbafU, 0x3fbd'08a4U, 0x3fc1'2c4dU, |
| 196 | + 0x3fc5'672aU, 0x3fc9'b9beU, 0x3fce'248cU, 0x3fd2'a81eU, 0x3fd7'44fdU, |
| 197 | + 0x3fdb'fbb8U, 0x3fe0'ccdfU, 0x3fe5'b907U, 0x3fea'c0c7U, 0x3fef'e4baU, |
| 198 | + 0x3ff5'257dU, 0x3ffa'83b3U, |
| 199 | +}; |
| 200 | + |
| 201 | +// This function correctly calculates sinh(x) and cosh(x) by calculating exp(x) |
| 202 | +// and exp(-x) simultaneously. |
| 203 | +// To compute e^x, we perform the following range reduction: |
| 204 | +// find hi, mid, lo such that: |
| 205 | +// x = (hi + mid) * log(2) + lo, in which |
| 206 | +// hi is an integer, |
| 207 | +// 0 <= mid * 2^5 < 32 is an integer |
| 208 | +// -2^(-5) <= lo * log2(e) <= 2^-5. |
| 209 | +// In particular, |
| 210 | +// hi + mid = round(x * log2(e) * 2^5) * 2^(-5). |
| 211 | +// Then, |
| 212 | +// e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo. |
| 213 | +// We store 2^mid in the lookup table EXP2_MID_5_BITS, and compute 2^hi * 2^mid |
| 214 | +// by adding hi to the exponent field of 2^mid. |
| 215 | +// e^lo is computed using a degree-3 minimax polynomial generated by Sollya: |
| 216 | +// e^lo ~ P(lo) |
| 217 | +// = 1 + lo + c2 * lo^2 + ... + c5 * lo^5 |
| 218 | +// = (1 + c2*lo^2 + c4*lo^4) + lo * (1 + c3*lo^2 + c5*lo^4) |
| 219 | +// = P_even + lo * P_odd |
| 220 | +// To compute e^(-x), notice that: |
| 221 | +// e^(-x) = 2^(-(hi + mid)) * e^(-lo) |
| 222 | +// ~ 2^(-(hi + mid)) * P(-lo) |
| 223 | +// = 2^(-(hi + mid)) * (P_even - lo * P_odd) |
| 224 | +// So: |
| 225 | +// sinh(x) = (e^x - e^(-x)) / 2 |
| 226 | +// ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) - |
| 227 | +// 2^(-(hi + mid)) * (P_even - lo * P_odd)) |
| 228 | +// = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) + |
| 229 | +// lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid)))) |
| 230 | +// And similarly: |
| 231 | +// cosh(x) = (e^x + e^(-x)) / 2 |
| 232 | +// ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) + |
| 233 | +// lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid)))) |
| 234 | +// The main point of these formulas is that the expensive part of calculating |
| 235 | +// the polynomials approximating lower parts of e^x and e^(-x) is shared and |
| 236 | +// only done once. |
| 237 | +template <bool IsSinh> LIBC_INLINE float16 eval_sinh_or_cosh(float16 x) { |
| 238 | + float xf = x; |
| 239 | + float kf = fputil::nearest_integer(xf * (LOG2F_E * 0x1.0p+5f)); |
| 240 | + int x_hi_mid_p = static_cast<int>(kf); |
| 241 | + int x_hi_mid_m = -x_hi_mid_p; |
| 242 | + |
| 243 | + unsigned x_hi_p = static_cast<unsigned>(x_hi_mid_p) >> 5; |
| 244 | + unsigned x_hi_m = static_cast<unsigned>(x_hi_mid_m) >> 5; |
| 245 | + unsigned x_mid_p = static_cast<unsigned>(x_hi_mid_p) & 0x1f; |
| 246 | + unsigned x_mid_m = static_cast<unsigned>(x_hi_mid_m) & 0x1f; |
| 247 | + |
| 248 | + uint32_t exp2_hi_mid_bits_p = |
| 249 | + EXP2_MID_5_BITS[x_mid_p] + |
| 250 | + static_cast<uint32_t>(x_hi_p << fputil::FPBits<float>::FRACTION_LEN); |
| 251 | + uint32_t exp2_hi_mid_bits_m = |
| 252 | + EXP2_MID_5_BITS[x_mid_m] + |
| 253 | + static_cast<uint32_t>(x_hi_m << fputil::FPBits<float>::FRACTION_LEN); |
| 254 | + // exp2_hi_mid_p = 2^(hi + mid) |
| 255 | + float exp2_hi_mid_p = fputil::FPBits<float>(exp2_hi_mid_bits_p).get_val(); |
| 256 | + // exp2_hi_mid_m = 2^(-(hi + mid)) |
| 257 | + float exp2_hi_mid_m = fputil::FPBits<float>(exp2_hi_mid_bits_m).get_val(); |
| 258 | + |
| 259 | + // exp2_hi_mid_sum = 2^(hi + mid) + 2^(-(hi + mid)) |
| 260 | + float exp2_hi_mid_sum = exp2_hi_mid_p + exp2_hi_mid_m; |
| 261 | + // exp2_hi_mid_diff = 2^(hi + mid) - 2^(-(hi + mid)) |
| 262 | + float exp2_hi_mid_diff = exp2_hi_mid_p - exp2_hi_mid_m; |
| 263 | + |
| 264 | + // lo = x - (hi + mid) = round(x * log2(e) * 2^5) * log(2) * (-2^(-5)) + x |
| 265 | + float lo = fputil::multiply_add(kf, LOGF_2 * -0x1.0p-5f, xf); |
| 266 | + float lo_sq = lo * lo; |
| 267 | + |
| 268 | + // Degree-3 minimax polynomial generated by Sollya with the following |
| 269 | + // commands: |
| 270 | + // > display = hexadecimal; |
| 271 | + // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-5, 2^-5]); |
| 272 | + // > 1 + x * P; |
| 273 | + constexpr cpp::array<float, 4> COEFFS = {0x1p+0f, 0x1p+0f, 0x1.0004p-1f, |
| 274 | + 0x1.555778p-3f}; |
| 275 | + float half_p_odd = |
| 276 | + fputil::polyeval(lo_sq, COEFFS[1] * 0.5f, COEFFS[3] * 0.5f); |
| 277 | + float half_p_even = |
| 278 | + fputil::polyeval(lo_sq, COEFFS[0] * 0.5f, COEFFS[2] * 0.5f); |
| 279 | + |
| 280 | + // sinh(x) = lo * (0.5 * P_odd * (2^(hi + mid) + 2^(-(hi + mid)))) + |
| 281 | + // (0.5 * P_even * (2^(hi + mid) - 2^(-(hi + mid)))) |
| 282 | + if constexpr (IsSinh) |
| 283 | + return fputil::cast<float16>(fputil::multiply_add( |
| 284 | + lo, half_p_odd * exp2_hi_mid_sum, half_p_even * exp2_hi_mid_diff)); |
| 285 | + // cosh(x) = lo * (0.5 * P_odd * (2^(hi + mid) - 2^(-(hi + mid)))) + |
| 286 | + // (0.5 * P_even * (2^(hi + mid) + 2^(-(hi + mid)))) |
| 287 | + return fputil::cast<float16>(fputil::multiply_add( |
| 288 | + lo, half_p_odd * exp2_hi_mid_diff, half_p_even * exp2_hi_mid_sum)); |
| 289 | +} |
| 290 | + |
177 | 291 | } // namespace LIBC_NAMESPACE_DECL
|
178 | 292 |
|
179 | 293 | #endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H
|
0 commit comments