Skip to content

[libc][math][c23] Add rsqrtf16() function #137545

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
libc.src.math.rintf16
libc.src.math.roundevenf16
libc.src.math.roundf16
libc.src.math.rsqrtf16
libc.src.math.scalblnf16
libc.src.math.scalbnf16
libc.src.math.setpayloadf16
Expand Down
2 changes: 1 addition & 1 deletion libc/docs/headers/math/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ Higher Math Functions
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| rootn | | | | | | 7.12.7.8 | F.10.4.8 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| rsqrt | | | | | | 7.12.7.9 | F.10.4.9 |
| rsqrt | | | | |check| | | 7.12.7.9 | F.10.4.9 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| sin | |check| | |check| | | |check| | | 7.12.4.6 | F.10.1.6 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
Expand Down
7 changes: 7 additions & 0 deletions libc/include/math.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2237,6 +2237,13 @@ functions:
return_type: long double
arguments:
- type: long double
- name: rsqrtf16
standards:
- stdc
return_type: _Float16
arguments:
- type: _Float16
guard: LIBC_TYPES_HAS_FLOAT16
- name: scalbln
standards:
- stdc
Expand Down
2 changes: 2 additions & 0 deletions libc/src/math/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,8 @@ add_math_entrypoint_object(roundevenl)
add_math_entrypoint_object(roundevenf16)
add_math_entrypoint_object(roundevenf128)

add_math_entrypoint_object(rsqrtf16)

add_math_entrypoint_object(scalbln)
add_math_entrypoint_object(scalblnf)
add_math_entrypoint_object(scalblnl)
Expand Down
19 changes: 19 additions & 0 deletions libc/src/math/generic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -955,6 +955,25 @@ add_entrypoint_object(
libc.src.__support.FPUtil.nearest_integer_operations
)

add_entrypoint_object(
rsqrtf16
SRCS
rsqrtf16.cpp
HDRS
../rsqrtf16.h
DEPENDS
libc.hdr.errno_macros
libc.hdr.fenv_macros
libc.src.__support.FPUtil.cast
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.fma
libc.src.__support.FPUtil.manipulation_functions
libc.src.__support.FPUtil.polyeval
libc.src.__support.macros.optimization
libc.src.__support.macros.properties.types
)

add_entrypoint_object(
lround
SRCS
Expand Down
120 changes: 120 additions & 0 deletions libc/src/math/generic/rsqrtf16.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
//===-- Half-precision rsqrt function -------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception.
//
//===----------------------------------------------------------------------===//

#include "src/math/rsqrtf16.h"
#include "hdr/errno_macros.h"
#include "hdr/fenv_macros.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FMA.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/ManipulationFunctions.h"
#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/cast.h"
#include "src/__support/macros/optimization.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
using FPBits = fputil::FPBits<float16>;
FPBits xbits(x);

uint16_t x_u = xbits.uintval();
uint16_t x_abs = x_u & 0x7fff;
uint16_t x_sign = x_u >> 15;

// x is NaN
if (LIBC_UNLIKELY(xbits.is_nan())) {
if (xbits.is_signaling_nan()) {
fputil::raise_except_if_required(FE_INVALID);
return FPBits::quiet_nan().get_val();
}
return x;
}

// |x| = 0
if (LIBC_UNLIKELY(x_abs == 0x0)) {
fputil::raise_except_if_required(FE_DIVBYZERO);
fputil::set_errno_if_required(ERANGE);
return FPBits::inf(Sign::POS).get_val();
}

// -inf <= x < 0
if (LIBC_UNLIKELY(x_sign == 1)) {
fputil::raise_except_if_required(FE_INVALID);
fputil::set_errno_if_required(EDOM);
return FPBits::quiet_nan().get_val();
}

// x = +inf => rsqrt(x) = 0
if (LIBC_UNLIKELY(xbits.is_inf())) {
return fputil::cast<float16>(0.0f);
}

// x is valid, estimate the result
// Range reduction:
// x can be expressed as m*2^e, where e - int exponent and m - mantissa
// rsqrtf16(x) = rsqrtf16(m*2^e)
// rsqrtf16(m*2^e) = 1/sqrt(m) * 1/sqrt(2^e) = 1/sqrt(m) * 1/2^(e/2)
// 1/sqrt(m) * 1/2^(e/2) = 1/sqrt(m) * 2^(-e/2)

float xf = x;
int exponent;
float mantissa = fputil::frexp(xf, exponent);

float result;
int exp_floored = -(exponent >> 1);

if (mantissa == 0.5f) {
// When mantissa is 0.5f, x was a power of 2 (or subnormal that normalizes this way).
// 1/sqrt(0.5f) = sqrt(2.0f) = 0x1.6a09e6p0f
// If exponent is odd (exponent = 2k + 1):
// rsqrt(x) = (1/sqrt(0.5)) * 2^(-(2k+1)/2) = sqrt(2) * 2^(-k-0.5)
// = sqrt(2) * 2^(-k) * (1/sqrt(2)) = 2^(-k)
// exp_floored = -((2k+1)>>1) = -(k) = -k
// So result = ldexp(1.0f, exp_floored)
// If exponent is even (exponent = 2k):
// rsqrt(x) = (1/sqrt(0.5)) * 2^(-2k/2) = sqrt(2) * 2^(-k)
// exp_floored = -((2k)>>1) = -(k) = -k
// So result = ldexp(sqrt(2.0f), exp_floored)
if (exponent & 1) {
result = fputil::ldexp(1.0f, exp_floored);
} else {
constexpr float SQRT_2_F = 0x1.6a09e6p0f; // sqrt(2.0f)
result = fputil::ldexp(SQRT_2_F, exp_floored);
}
} else {
// 6-degree polynomial generated using Sollya
// P = fpminimax(1/sqrt(x), [|0,1,2,3,4,5|], [|SG...|], [0.5, 1]);
float interm = fputil::polyeval(
mantissa, 0x1.9c81c4p1f, -0x1.e2c57cp2f, 0x1.91e8bp3f,
-0x1.899954p3f, 0x1.9edcp2f, -0x1.6bd93cp0f);

// Apply one Newton-Raphson iteration to refine the approximation of
// 1/sqrt(mantissa) y_new = y_old * (1.5 - 0.5 * mantissa * y_old^2) Using
// fputil::fma for potential precision benefits in the factor calculation
float interm_sq = interm * interm;
float factor = fputil::fma<float>(-0.5f * mantissa, interm_sq, 1.5f);
float interm_refined = interm * factor;

// Apply a second Newton-Raphson iteration
// y_new = y_old * (1.5 - 0.5 * mantissa * y_old^2)
float interm_refined_sq = interm_refined * interm_refined;
float factor2 =
fputil::fma<float>(-0.5f * mantissa, interm_refined_sq, 1.5f);
float interm_refined2 = interm_refined * factor2;

result = fputil::ldexp(interm_refined2, exp_floored);
if (exponent & 1) {
const float ONE_OVER_SQRT2 = 0x1.6a09e6p-1f;
result = fputil::fma<float>(result, ONE_OVER_SQRT2, 0.0f);
}
}

return fputil::cast<float16>(result);
}
} // namespace LIBC_NAMESPACE_DECL
21 changes: 21 additions & 0 deletions libc/src/math/rsqrtf16.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//===-- Implementation header for rsqrtf16 ----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_MATH_RSQRTF16_H
#define LLVM_LIBC_SRC_MATH_RSQRTF16_H

#include "src/__support/macros/config.h"
#include "src/__support/macros/properties/types.h"

namespace LIBC_NAMESPACE_DECL {

float16 rsqrtf16(float16 x);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_MATH_RSQRTF16_H
11 changes: 11 additions & 0 deletions libc/test/src/math/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1560,6 +1560,17 @@ add_fp_unittest(
libc.src.math.sqrtl
)

add_fp_unittest(
rsqrtf16_test
NEED_MPFR
SUITE
libc-math-unittests
SRCS
rsqrtf16_test.cpp
DEPENDS
libc.src.math.rsqrtf16
)

add_fp_unittest(
sqrtf16_test
NEED_MPFR
Expand Down
42 changes: 42 additions & 0 deletions libc/test/src/math/rsqrtf16_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
//===-- Exhaustive test for rsqrtf16 --------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/math/rsqrtf16.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
#include "utils/MPFRWrapper/MPFRUtils.h"

using LlvmLibcRsqrtf16Test = LIBC_NAMESPACE::testing::FPTest<float16>;

namespace mpfr = LIBC_NAMESPACE::testing::mpfr;

// Range: [0, Inf]
static constexpr uint16_t POS_START = 0x0000U;
static constexpr uint16_t POS_STOP = 0x7c00U;

// Range: [-Inf, 0]
static constexpr uint16_t NEG_START = 0x8000U;
static constexpr uint16_t NEG_STOP = 0xfc00U;

TEST_F(LlvmLibcRsqrtf16Test, PositiveRange) {
for (uint16_t v = POS_START; v <= POS_STOP; ++v) {
float16 x = FPBits(v).get_val();

EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Rsqrt, x,
LIBC_NAMESPACE::rsqrtf16(x), 0.5);
}
}

TEST_F(LlvmLibcRsqrtf16Test, NegativeRange) {
for (uint16_t v = NEG_START; v <= NEG_STOP; ++v) {
float16 x = FPBits(v).get_val();

EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Rsqrt, x,
LIBC_NAMESPACE::rsqrtf16(x), 0.5);
}
}
11 changes: 11 additions & 0 deletions libc/test/src/math/smoke/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2958,6 +2958,17 @@ add_fp_unittest(
libc.src.math.sqrtl
)

add_fp_unittest(
rsqrtf16_test
SUITE
libc-math-smoke-tests
SRCS
rsqrtf16_test.cpp
DEPENDS
libc.src.errno.errno
libc.src.math.rsqrtf16
)

add_fp_unittest(
sqrtf16_test
SUITE
Expand Down
37 changes: 37 additions & 0 deletions libc/test/src/math/smoke/rsqrtf16_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//===-- Unittests for rsqrtf16 --------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception.
//
//===----------------------------------------------------------------------===//

#include "src/errno/libc_errno.h"
#include "src/math/rsqrtf16.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"

using LlvmLibcRsqrtf16Test = LIBC_NAMESPACE::testing::FPTest<float16>;
TEST_F(LlvmLibcRsqrtf16Test, SpecialNumbers) {
LIBC_NAMESPACE::libc_errno = 0;
EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::rsqrtf16(aNaN));
EXPECT_MATH_ERRNO(0);

EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::rsqrtf16(sNaN), FE_INVALID);
EXPECT_MATH_ERRNO(0);

EXPECT_FP_EQ(inf, LIBC_NAMESPACE::rsqrtf16(0.0f));
EXPECT_MATH_ERRNO(ERANGE);

EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::rsqrtf16(1.0f));
EXPECT_MATH_ERRNO(0);

EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::rsqrtf16(inf));
EXPECT_MATH_ERRNO(0);

EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::rsqrtf16(neg_inf));
EXPECT_MATH_ERRNO(EDOM);

EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::rsqrtf16(-2.0f));
EXPECT_MATH_ERRNO(EDOM);
}
6 changes: 6 additions & 0 deletions libc/utils/MPFRWrapper/MPCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,12 @@ MPFRNumber MPFRNumber::rint(mpfr_rnd_t rnd) const {
return result;
}

MPFRNumber MPFRNumber::rsqrt() const {
MPFRNumber result(*this);
mpfr_rec_sqrt(result.value, value, mpfr_rounding);
return result;
}

MPFRNumber MPFRNumber::mod_2pi() const {
MPFRNumber result(0.0, 1280);
MPFRNumber _2pi(0.0, 1280);
Expand Down
1 change: 1 addition & 0 deletions libc/utils/MPFRWrapper/MPCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ class MPFRNumber {
bool round_to_long(long &result) const;
bool round_to_long(mpfr_rnd_t rnd, long &result) const;
MPFRNumber rint(mpfr_rnd_t rnd) const;
MPFRNumber rsqrt() const;
MPFRNumber mod_2pi() const;
MPFRNumber mod_pi_over_2() const;
MPFRNumber mod_pi_over_4() const;
Expand Down
2 changes: 2 additions & 0 deletions libc/utils/MPFRWrapper/MPFRUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ unary_operation(Operation op, InputType input, unsigned int precision,
return mpfrInput.round();
case Operation::RoundEven:
return mpfrInput.roundeven();
case Operation::Rsqrt:
return mpfrInput.rsqrt();
case Operation::Sin:
return mpfrInput.sin();
case Operation::Sinpi:
Expand Down
1 change: 1 addition & 0 deletions libc/utils/MPFRWrapper/MPFRUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ enum class Operation : int {
ModPIOver4,
Round,
RoundEven,
Rsqrt,
Sin,
Sinpi,
Sinh,
Expand Down
Loading