llvm · overmighty · Oct 17, 2024 · Aug 24, 2024 · Oct 15, 2024 · Oct 17, 2024
diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
@@ -521,6 +521,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.canonicalizef16
     libc.src.math.ceilf16
     libc.src.math.copysignf16
+    libc.src.math.coshf16
     libc.src.math.exp10f16
     libc.src.math.exp10m1f16
     libc.src.math.exp2f16
@@ -585,6 +586,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.scalbnf16
     libc.src.math.setpayloadf16
     libc.src.math.setpayloadsigf16
+    libc.src.math.sinhf16
     libc.src.math.totalorderf16
     libc.src.math.totalordermagf16
     libc.src.math.truncf16

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
@@ -610,6 +610,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.canonicalizef16
     libc.src.math.ceilf16
     libc.src.math.copysignf16
+    libc.src.math.coshf16
     libc.src.math.exp10f16
     libc.src.math.exp10m1f16
     libc.src.math.exp2f16
@@ -678,6 +679,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.scalbnf16
     libc.src.math.setpayloadf16
     libc.src.math.setpayloadsigf16
+    libc.src.math.sinhf16
     libc.src.math.sinpif16
     libc.src.math.totalorderf16
     libc.src.math.totalordermagf16

diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
@@ -278,7 +278,7 @@ Higher Math Functions
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | cos       | |check|          | |check|         |                        |                      |                        | 7.12.4.5               | F.10.1.5                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| cosh      | |check|          |                 |                        |                      |                        | 7.12.5.4               | F.10.2.4                   |
+| cosh      | |check|          |                 |                        | |check|              |                        | 7.12.5.4               | F.10.2.4                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | cospi     | |check|          |                 |                        |                      |                        | 7.12.4.12              | F.10.1.12                  |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
@@ -340,7 +340,7 @@ Higher Math Functions
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | sincos    | |check|          | |check|         |                        |                      |                        |                        |                            |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| sinh      | |check|          |                 |                        |                      |                        | 7.12.5.5               | F.10.2.5                   |
+| sinh      | |check|          |                 |                        | |check|              |                        | 7.12.5.5               | F.10.2.5                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | sinpi     | |check|          |                 |                        |  |check|             |                        | 7.12.4.13              | F.10.1.13                  |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+

diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
@@ -792,7 +792,11 @@ def StdC : StandardSpec<"stdc"> {
           FunctionSpec<"pow", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
 
           FunctionSpec<"coshf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
+          GuardedFunctionSpec<"coshf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
+
           FunctionSpec<"sinhf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
+          GuardedFunctionSpec<"sinhf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
+
           FunctionSpec<"tanhf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
 
           FunctionSpec<"acosf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,

diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
@@ -89,8 +89,11 @@ add_math_entrypoint_object(copysignf128)
 
 add_math_entrypoint_object(cos)
 add_math_entrypoint_object(cosf)
+
 add_math_entrypoint_object(cosh)
 add_math_entrypoint_object(coshf)
+add_math_entrypoint_object(coshf16)
+
 add_math_entrypoint_object(cospif)
 
 add_math_entrypoint_object(daddl)
@@ -481,6 +484,7 @@ add_math_entrypoint_object(sinpif16)
 
 add_math_entrypoint_object(sinh)
 add_math_entrypoint_object(sinhf)
+add_math_entrypoint_object(sinhf16)
 
 add_math_entrypoint_object(sqrt)
 add_math_entrypoint_object(sqrtf)

diff --git a/libc/src/math/coshf16.h b/libc/src/math/coshf16.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for coshf16 -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_COSHF16_H
+#define LLVM_LIBC_SRC_MATH_COSHF16_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+float16 coshf16(float16 x);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_COSHF16_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
@@ -4218,6 +4218,25 @@ add_entrypoint_object(
     -O3
 )
 
+add_entrypoint_object(
+  coshf16
+  SRCS
+    coshf16.cpp
+  HDRS
+    ../coshf16.h
+  DEPENDS
+    .expxf16
+    libc.hdr.errno_macros
+    libc.hdr.fenv_macros
+    libc.src.__support.FPUtil.except_value_utils
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.macros.optimization
+  COMPILE_OPTIONS
+    -O3
+)
+
 add_entrypoint_object(
   sinhf
   SRCS
@@ -4233,6 +4252,25 @@ add_entrypoint_object(
     -O3
 )
 
+add_entrypoint_object(
+  sinhf16
+  SRCS
+    sinhf16.cpp
+  HDRS
+    ../sinhf16.h
+  DEPENDS
+    .expxf16
+    libc.hdr.errno_macros
+    libc.hdr.fenv_macros
+    libc.src.__support.FPUtil.except_value_utils
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.macros.optimization
+  COMPILE_OPTIONS
+    -O3
+)
+
 add_entrypoint_object(
   tanhf
   SRCS
@@ -5297,6 +5335,7 @@ add_header_library(
     expxf16.h
   DEPENDS
     libc.src.__support.CPP.array
+    libc.src.__support.FPUtil.cast
     libc.src.__support.FPUtil.fp_bits
     libc.src.__support.FPUtil.multiply_add
     libc.src.__support.FPUtil.nearest_integer

diff --git a/libc/src/math/generic/coshf16.cpp b/libc/src/math/generic/coshf16.cpp
@@ -0,0 +1,103 @@
+//===-- Half-precision cosh(x) function -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/coshf16.h"
+#include "expxf16.h"
+#include "hdr/errno_macros.h"
+#include "hdr/fenv_macros.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+static constexpr fputil::ExceptValues<float16, 9> COSHF16_EXCEPTS_POS = {{
+    // x = 0x1.6ap-5, coshf16(x) = 0x1p+0 (RZ)
+    {0x29a8U, 0x3c00U, 1U, 0U, 1U},
+    // x = 0x1.8c4p+0, coshf16(x) = 0x1.3a8p+1 (RZ)
+    {0x3e31U, 0x40eaU, 1U, 0U, 0U},
+    // x = 0x1.994p+0, coshf16(x) = 0x1.498p+1 (RZ)
+    {0x3e65U, 0x4126U, 1U, 0U, 0U},
+    // x = 0x1.b6p+0, coshf16(x) = 0x1.6d8p+1 (RZ)
+    {0x3ed8U, 0x41b6U, 1U, 0U, 1U},
+    // x = 0x1.aap+1, coshf16(x) = 0x1.be8p+3 (RZ)
+    {0x42a8U, 0x4afaU, 1U, 0U, 1U},
+    // x = 0x1.cc4p+1, coshf16(x) = 0x1.23cp+4 (RZ)
+    {0x4331U, 0x4c8fU, 1U, 0U, 0U},
+    // x = 0x1.288p+2, coshf16(x) = 0x1.9b4p+5 (RZ)
+    {0x44a2U, 0x526dU, 1U, 0U, 0U},
+    // x = 0x1.958p+2, coshf16(x) = 0x1.1a4p+8 (RZ)
+    {0x4656U, 0x5c69U, 1U, 0U, 0U},
+    // x = 0x1.5fp+3, coshf16(x) = 0x1.c54p+14 (RZ)
+    {0x497cU, 0x7715U, 1U, 0U, 1U},
+}};
+
+static constexpr fputil::ExceptValues<float16, 4> COSHF16_EXCEPTS_NEG = {{
+    // x = -0x1.6ap-5, coshf16(x) = 0x1p+0 (RZ)
+    {0xa9a8U, 0x3c00U, 1U, 0U, 1U},
+    // x = -0x1.b6p+0, coshf16(x) = 0x1.6d8p+1 (RZ)
+    {0xbed8U, 0x41b6U, 1U, 0U, 1U},
+    // x = -0x1.288p+2, coshf16(x) = 0x1.9b4p+5 (RZ)
+    {0xc4a2U, 0x526dU, 1U, 0U, 0U},
+    // x = -0x1.5fp+3, coshf16(x) = 0x1.c54p+14 (RZ)
+    {0xc97cU, 0x7715U, 1U, 0U, 1U},
+}};
+
+LLVM_LIBC_FUNCTION(float16, coshf16, (float16 x)) {
+  using FPBits = fputil::FPBits<float16>;
+  FPBits x_bits(x);
+
+  uint16_t x_u = x_bits.uintval();
+  uint16_t x_abs = x_u & 0x7fffU;
+
+  // When |x| >= acosh(2^16), or x is NaN.
+  if (LIBC_UNLIKELY(x_abs >= 0x49e5U)) {
+    // cosh(NaN) = NaN
+    if (x_bits.is_nan()) {
+      if (x_bits.is_signaling_nan()) {
+        fputil::raise_except_if_required(FE_INVALID);
+        return FPBits::quiet_nan().get_val();
+      }
+
+      return x;
+    }
+
+    // When |x| >= acosh(2^16).
+    if (x_abs >= 0x49e5U) {
+      // cosh(+/-inf) = +inf
+      if (x_bits.is_inf())
+        return FPBits::inf().get_val();
+
+      switch (fputil::quick_get_round()) {
+      case FE_TONEAREST:
+      case FE_UPWARD:
+        fputil::set_errno_if_required(ERANGE);
+        fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT);
+        return FPBits::inf().get_val();
+      default:
+        return FPBits::max_normal().get_val();
+      }
+    }
+  }
+
+  if (x_bits.is_pos()) {
+    if (auto r = COSHF16_EXCEPTS_POS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
+      return r.value();
+  } else {
+    if (auto r = COSHF16_EXCEPTS_NEG.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
+      return r.value();
+  }
+
+  return eval_sinh_or_cosh</*IsSinh=*/false>(x);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/expxf16.h b/libc/src/math/generic/expxf16.h
@@ -12,6 +12,7 @@
 #include "src/__support/CPP/array.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/cast.h"
 #include "src/__support/FPUtil/multiply_add.h"
 #include "src/__support/FPUtil/nearest_integer.h"
 #include "src/__support/macros/attributes.h"
@@ -174,6 +175,119 @@ LIBC_INLINE ExpRangeReduction exp10_range_reduction(float16 x) {
   return {exp2_hi_mid, exp10_lo};
 }
 
+// Generated by Sollya with the following commands:
+//   > display = hexadecimal;
+//   > round(log2(exp(1)), SG, RN);
+static constexpr float LOG2F_E = 0x1.715476p+0f;
+
+// Generated by Sollya with the following commands:
+//   > display = hexadecimal;
+//   > round(log(2), SG, RN);
+static constexpr float LOGF_2 = 0x1.62e43p-1f;
+
+// Generated by Sollya with the following commands:
+//   > display = hexadecimal;
+//   > for i from 0 to 31 do printsingle(round(2^(i * 2^-5), SG, RN));
+static constexpr cpp::array<uint32_t, 32> EXP2_MID_5_BITS = {
+    0x3f80'0000U, 0x3f82'cd87U, 0x3f85'aac3U, 0x3f88'980fU, 0x3f8b'95c2U,
+    0x3f8e'a43aU, 0x3f91'c3d3U, 0x3f94'f4f0U, 0x3f98'37f0U, 0x3f9b'8d3aU,
+    0x3f9e'f532U, 0x3fa2'7043U, 0x3fa5'fed7U, 0x3fa9'a15bU, 0x3fad'583fU,
+    0x3fb1'23f6U, 0x3fb5'04f3U, 0x3fb8'fbafU, 0x3fbd'08a4U, 0x3fc1'2c4dU,
+    0x3fc5'672aU, 0x3fc9'b9beU, 0x3fce'248cU, 0x3fd2'a81eU, 0x3fd7'44fdU,
+    0x3fdb'fbb8U, 0x3fe0'ccdfU, 0x3fe5'b907U, 0x3fea'c0c7U, 0x3fef'e4baU,
+    0x3ff5'257dU, 0x3ffa'83b3U,
+};
+
+// This function correctly calculates sinh(x) and cosh(x) by calculating exp(x)
+// and exp(-x) simultaneously.
+// To compute e^x, we perform the following range reduction:
+// find hi, mid, lo such that:
+//   x = (hi + mid) * log(2) + lo, in which
+//     hi is an integer,
+//     0 <= mid * 2^5 < 32 is an integer
+//     -2^(-5) <= lo * log2(e) <= 2^-5.
+// In particular,
+//   hi + mid = round(x * log2(e) * 2^5) * 2^(-5).
+// Then,
+//   e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo.
+// We store 2^mid in the lookup table EXP2_MID_5_BITS, and compute 2^hi * 2^mid
+// by adding hi to the exponent field of 2^mid.
+// e^lo is computed using a degree-3 minimax polynomial generated by Sollya:
+//   e^lo ~ P(lo)
+//        = 1 + lo + c2 * lo^2 + ... + c5 * lo^5
+//        = (1 + c2*lo^2 + c4*lo^4) + lo * (1 + c3*lo^2 + c5*lo^4)
+//        = P_even + lo * P_odd
+// To compute e^(-x), notice that:
+//   e^(-x) = 2^(-(hi + mid)) * e^(-lo)
+//          ~ 2^(-(hi + mid)) * P(-lo)
+//          = 2^(-(hi + mid)) * (P_even - lo * P_odd)
+// So:
+//   sinh(x) = (e^x - e^(-x)) / 2
+//           ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) -
+//                    2^(-(hi + mid)) * (P_even - lo * P_odd))
+//           = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) +
+//                    lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid))))
+// And similarly:
+//   cosh(x) = (e^x + e^(-x)) / 2
+//           ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) +
+//                    lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid))))
+// The main point of these formulas is that the expensive part of calculating
+// the polynomials approximating lower parts of e^x and e^(-x) is shared and
+// only done once.
+template <bool IsSinh> LIBC_INLINE float16 eval_sinh_or_cosh(float16 x) {
+  float xf = x;
+  float kf = fputil::nearest_integer(xf * (LOG2F_E * 0x1.0p+5f));
+  int x_hi_mid_p = static_cast<int>(kf);
+  int x_hi_mid_m = -x_hi_mid_p;
+
+  unsigned x_hi_p = static_cast<unsigned>(x_hi_mid_p) >> 5;
+  unsigned x_hi_m = static_cast<unsigned>(x_hi_mid_m) >> 5;
+  unsigned x_mid_p = static_cast<unsigned>(x_hi_mid_p) & 0x1f;
+  unsigned x_mid_m = static_cast<unsigned>(x_hi_mid_m) & 0x1f;
+
+  uint32_t exp2_hi_mid_bits_p =
+      EXP2_MID_5_BITS[x_mid_p] +
+      static_cast<uint32_t>(x_hi_p << fputil::FPBits<float>::FRACTION_LEN);
+  uint32_t exp2_hi_mid_bits_m =
+      EXP2_MID_5_BITS[x_mid_m] +
+      static_cast<uint32_t>(x_hi_m << fputil::FPBits<float>::FRACTION_LEN);
+  // exp2_hi_mid_p = 2^(hi + mid)
+  float exp2_hi_mid_p = fputil::FPBits<float>(exp2_hi_mid_bits_p).get_val();
+  // exp2_hi_mid_m = 2^(-(hi + mid))
+  float exp2_hi_mid_m = fputil::FPBits<float>(exp2_hi_mid_bits_m).get_val();
+
+  // exp2_hi_mid_sum = 2^(hi + mid) + 2^(-(hi + mid))
+  float exp2_hi_mid_sum = exp2_hi_mid_p + exp2_hi_mid_m;
+  // exp2_hi_mid_diff = 2^(hi + mid) - 2^(-(hi + mid))
+  float exp2_hi_mid_diff = exp2_hi_mid_p - exp2_hi_mid_m;
+
+  // lo = x - (hi + mid) = round(x * log2(e) * 2^5) * log(2) * (-2^(-5)) + x
+  float lo = fputil::multiply_add(kf, LOGF_2 * -0x1.0p-5f, xf);
+  float lo_sq = lo * lo;
+
+  // Degree-3 minimax polynomial generated by Sollya with the following
+  // commands:
+  //   > display = hexadecimal;
+  //   > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-5, 2^-5]);
+  //   > 1 + x * P;
+  constexpr cpp::array<float, 4> COEFFS = {0x1p+0f, 0x1p+0f, 0x1.0004p-1f,
+                                           0x1.555778p-3f};
+  float half_p_odd =
+      fputil::polyeval(lo_sq, COEFFS[1] * 0.5f, COEFFS[3] * 0.5f);
+  float half_p_even =
+      fputil::polyeval(lo_sq, COEFFS[0] * 0.5f, COEFFS[2] * 0.5f);
+
+  // sinh(x) = lo * (0.5 * P_odd * (2^(hi + mid) + 2^(-(hi + mid)))) +
+  //                (0.5 * P_even * (2^(hi + mid) - 2^(-(hi + mid))))
+  if constexpr (IsSinh)
+    return fputil::cast<float16>(fputil::multiply_add(
+        lo, half_p_odd * exp2_hi_mid_sum, half_p_even * exp2_hi_mid_diff));
+  // cosh(x) = lo * (0.5 * P_odd * (2^(hi + mid) - 2^(-(hi + mid)))) +
+  //                (0.5 * P_even * (2^(hi + mid) + 2^(-(hi + mid))))
+  return fputil::cast<float16>(fputil::multiply_add(
+      lo, half_p_odd * exp2_hi_mid_diff, half_p_even * exp2_hi_mid_sum));
+}
+
 } // namespace LIBC_NAMESPACE_DECL
 
 #endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H