[clang] constexpr built-in fma function. #113020

c8ef · 2024-10-19T03:46:04Z

According to P0533R9, the C++ standard library functions fma are now constexpr:

  constexpr floating-point-type fma(floating-point-type x, floating-point-type y,
                                    floating-point-type z);
  constexpr float               fmaf(float x, float y, float z);
  constexpr long double         fmal(long double x, long double y, long double z);

To implement this feature in libc++, we must make the built-in fma function constexpr. This patch adds the implementation of a constexpr fma function for the current constant evaluator and the new bytecode interpreter.

llvmbot · 2024-10-19T04:18:05Z

@llvm/pr-subscribers-clang

Author: None (c8ef)

Changes

According to P0533R9, the C++ standard library functions fma are now constexpr:

  constexpr floating-point-type fma(floating-point-type x, floating-point-type y,
                                    floating-point-type z);
  constexpr float               fmaf(float x, float y, float z);
  constexpr long double         fmal(long double x, long double y, long double z);

To implement this feature in libc++, we must make the built-in fma function constexpr. This patch adds the implementation of a constexpr fma function for the current constant evaluator and the new bytecode interpreter.

Full diff: https://github.com/llvm/llvm-project/pull/113020.diff

6 Files Affected:

(modified) clang/docs/ReleaseNotes.rst (+1)
(modified) clang/include/clang/Basic/Builtins.td (+1)
(modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+37)
(modified) clang/lib/AST/ExprConstant.cpp (+16)
(modified) clang/test/AST/ByteCode/builtin-functions.cpp (+9)
(modified) clang/test/Sema/constant-builtins-2.c (+7)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index b7a6ace8bb895d..605d55a9e51f37 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -273,6 +273,7 @@ Non-comprehensive list of changes in this release
 - Plugins can now define custom attributes that apply to statements
   as well as declarations.
 - ``__builtin_abs`` function can now be used in constant expressions.
+- ``__builtin_fma`` function can now be used in constant expressions.
 
 New Compiler Flags
 ------------------
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 90475a361bb8f8..55f470a9f715b9 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -3723,6 +3723,7 @@ def Fma : FPMathTemplate, LibBuiltin<"math.h"> {
   let Attributes = [NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T, T, T)";
   let AddBuiltinPrefixedAlias = 1;
+  let OnlyBuiltinPrefixedAliasIsConstexpr = 1;
 }
 
 def Fmax : FPMathTemplate, LibBuiltin<"math.h"> {
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index d4a8e6c2035ee5..145f4627dd73da 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -142,6 +142,19 @@ static bool retPrimValue(InterpState &S, CodePtr OpPC, APValue &Result,
 #undef RET_CASE
 }
 
+/// Get rounding mode to use in evaluation of the specified expression.
+///
+/// If rounding mode is unknown at compile time, still try to evaluate the
+/// expression. If the result is exact, it does not depend on rounding mode.
+/// So return "tonearest" mode instead of "dynamic".
+static llvm::RoundingMode getActiveRoundingMode(InterpState &S, const Expr *E) {
+  llvm::RoundingMode RM =
+      E->getFPFeaturesInEffect(S.getLangOpts()).getRoundingMode();
+  if (RM == llvm::RoundingMode::Dynamic)
+    RM = llvm::RoundingMode::NearestTiesToEven;
+  return RM;
+}
+
 static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC,
                                                   const InterpFrame *Frame,
                                                   const CallExpr *Call) {
@@ -549,6 +562,22 @@ static bool interp__builtin_fpclassify(InterpState &S, CodePtr OpPC,
   return true;
 }
 
+static bool interp__builtin_fma(InterpState &S, CodePtr OpPC,
+                                const InterpFrame *Frame, const Function *Func,
+                                const CallExpr *Call) {
+  const Floating &X = getParam<Floating>(Frame, 0);
+  const Floating &Y = getParam<Floating>(Frame, 1);
+  const Floating &Z = getParam<Floating>(Frame, 2);
+  Floating Result;
+
+  llvm::RoundingMode RM = getActiveRoundingMode(S, Call);
+  Floating::mul(X, Y, RM, &Result);
+  Floating::add(Result, Z, RM, &Result);
+
+  S.Stk.push<Floating>(Result);
+  return true;
+}
+
 // The C standard says "fabs raises no floating-point exceptions,
 // even if x is a signaling NaN. The returned value is independent of
 // the current rounding direction mode."  Therefore constant folding can
@@ -1814,6 +1843,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
       return false;
     break;
 
+  case Builtin::BI__builtin_fma:
+  case Builtin::BI__builtin_fmaf:
+  case Builtin::BI__builtin_fmal:
+  case Builtin::BI__builtin_fmaf128:
+    if (!interp__builtin_fma(S, OpPC, Frame, F, Call))
+      return false;
+    break;
+
   case Builtin::BI__builtin_fabs:
   case Builtin::BI__builtin_fabsf:
   case Builtin::BI__builtin_fabsl:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 8e36cad2d2c6e7..685ce8a63f6c9e 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -15314,6 +15314,22 @@ bool FloatExprEvaluator::VisitCallExpr(const CallExpr *E) {
       Result.changeSign();
     return true;
 
+  case Builtin::BI__builtin_fma:
+  case Builtin::BI__builtin_fmaf:
+  case Builtin::BI__builtin_fmal:
+  case Builtin::BI__builtin_fmaf128: {
+    APFloat Y(0.), Z(0.);
+    if (!EvaluateFloat(E->getArg(0), Result, Info) ||
+        !EvaluateFloat(E->getArg(1), Y, Info) ||
+        !EvaluateFloat(E->getArg(2), Z, Info))
+      return false;
+
+    llvm::RoundingMode RM = getActiveRoundingMode(Info, E);
+    Result.multiply(Y, RM);
+    Result.add(Z, RM);
+    return true;
+  }
+
   case Builtin::BI__arithmetic_fence:
     return EvaluateFloat(E->getArg(0), Result, Info);
 
diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp
index b5d334178f8213..0dba62e252b3d7 100644
--- a/clang/test/AST/ByteCode/builtin-functions.cpp
+++ b/clang/test/AST/ByteCode/builtin-functions.cpp
@@ -265,6 +265,15 @@ namespace fpclassify {
   char classify_subnorm [__builtin_fpclassify(-1, -1, -1, +1, -1, 1.0e-38f)];
 }
 
+namespace fma {
+  static_assert(__builtin_fma(1.0, 1.0, 1.0) == 2.0);
+  static_assert(__builtin_fma(1.0, -1.0, 1.0) == 0.0);
+  static_assert(__builtin_fmaf(1.0f, 1.0f, 1.0f) == 2.0f);
+  static_assert(__builtin_fmaf(1.0f, -1.0f, 1.0f) == 0.0f);
+  static_assert(__builtin_fmal(1.0L, 1.0L, 1.0L) == 2.0L);
+  static_assert(__builtin_fmal(1.0L, -1.0L, 1.0L) == 0.0L);
+} // namespace fma
+
 namespace abs {
   static_assert(__builtin_abs(14) == 14, "");
   static_assert(__builtin_labs(14L) == 14L, "");
diff --git a/clang/test/Sema/constant-builtins-2.c b/clang/test/Sema/constant-builtins-2.c
index e465a3c5f0ad86..2a2dbc2caee1d1 100644
--- a/clang/test/Sema/constant-builtins-2.c
+++ b/clang/test/Sema/constant-builtins-2.c
@@ -54,6 +54,13 @@ long double  g18 = __builtin_copysignl(1.0L, -1.0L);
 __float128   g18_2 = __builtin_copysignf128(1.0q, -1.0q);
 #endif
 
+double g19 = __builtin_fma(1.0, 1.0, 1.0);
+float g20 = __builtin_fmaf(1.0f, 1.0f, 1.0f);
+long double g21 = __builtin_fmal(1.0L, 1.0L, 1.0L);
+#if defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)
+__float128 g21_2 = __builtin_fma(1.0q, 1.0q, 1.0q);
+#endif
+
 char classify_nan     [__builtin_fpclassify(+1, -1, -1, -1, -1, __builtin_nan(""))];
 char classify_snan    [__builtin_fpclassify(+1, -1, -1, -1, -1, __builtin_nans(""))];
 char classify_inf     [__builtin_fpclassify(-1, +1, -1, -1, -1, __builtin_inf())];

c8ef · 2024-10-19T04:18:38Z

I'm not sure why there are noises in the Windows CI, as they seem unrelated. The Linux CI is working as expected.

clang/lib/AST/ExprConstant.cpp

c8ef · 2024-10-21T09:53:10Z

Dear reviewers, would you please take another look? @tbaederr @philnik777

philnik777 · 2024-10-21T10:16:44Z

This is definitely a requirement to implement this, but we also have to make the non-__builtin_-prefixed versions constexpr, since libc++ doesn't control their definitions. I'm pretty sure there was a PR that does this for some math function, but I can't find it right now.

philnik777 · 2024-10-21T10:22:01Z

Ah, it's #88978

c8ef · 2024-10-21T11:26:02Z

Actually, my reason for this patch is not necessarily related to libc++, but more about #51787. This issue requires the implementation of constexpr vector builtins. After some attempts, I found that some scalar versions of them cannot be used in a constexpr context. Therefore, I decided to implement those first.

Regarding the patch itself, is it good to go or does it need further revision? @philnik777

c8ef · 2024-10-22T11:37:54Z

Friendly ping~ @cor3ntin @hubert-reinterpretcast @tbaederr @philnik777

philnik777 · 2024-10-22T12:28:46Z

Sorry, I don't feel qualified to approve this.

tbaederr · 2024-10-23T23:33:26Z

Ping @cor3ntin since he mentioned on Discord that he's available for reviews this week :)

cor3ntin · 2024-10-24T05:11:11Z

I am waiting for Hubert as I don't trust my knowledge of floating points to offer meaningful feedback here, sorry

c8ef · 2024-10-26T00:00:26Z

Hi @cor3ntin @tbaederr, since @hubert-reinterpretcast is unavailable for review, could you please help me find someone else who is capable and available to review this? Thank you!

AaronBallman · 2024-10-29T16:22:05Z

Given the massive amount of effort it takes to implement constant expression floating-point math and the various edge cases it involves, I think the goal is for Clang to share as much code with llvm-libc as possible because they have to implement all the same logic anyway. I'm imagining a situation where we have some sort of library layer for the actual implementation of the functionality, llvm-libc would then link against that library layer, exposing the C interfaces for it, and Clang would link against that library layer and use it to implement a tablegen driven series of __builtin_whatever functions that would be implemented in Clang's constant expression evaluator(s) and libc++ would then be able to make use of directly.

We'd have to be careful for that library layer to be designed so that we get the target environment's floating-point semantics instead of the host environment's and it would need to be aware of the floating-point environment changes that come through things like pragmas.

CC @michaelrj-google for opinions, but I think we may want to have the broader design in mind before we start adding piecemeal support for constexpr float functionality even if we don't think we need it for fma support right now.

michaelrj-google · 2024-10-29T18:03:53Z

I've created an issue for planning out the LLVM-libc + clang code sharing (#114109). That being said I think FMA might be fine to implement within clang, due to its simplicity. Additionally, LLVM-libc may call __builtin_fma: https://github.com/llvm/llvm-project/blob/main/libc/src/__support/FPUtil/FMA.h#L32

CC: @lntue

lntue · 2024-10-29T18:33:59Z

clang/lib/AST/ByteCode/InterpBuiltin.cpp

+  Floating::mul(X, Y, RM, &Result);
+  Floating::add(Result, Z, RM, &Result);


Sorry I'm not so familiar with Floating type in here, but are they perform the operations in arbitrary precision? If these are performed in the same precision as the inputs then this will not work correctly with double rounding errors, overflow, and underflow. If there are higher intermediate precision to be used then it can be simplified a bit.

A simple example of double rounding errors for floating point type T you can try is:

x = 1 + std::numeric_limits<T>::epsilon(); y = 1 + std::numeric_limits<T>::epsilon(); z = std::numeric_limits<T>::epsilon() * 0.5 ;

then for the default rounding mode:

T(T(x * y) + z) = 1 + 2 * std::numeric_limits<T>::epsilon(); FMA(x, y, z) = 1 + 3 * std::numeric_limits<T>::epsilon();

I'm not entirely sure about this either, thanks for pointing it out! Regarding the type, Floating is simply a wrapper for APFloat.

lntue · 2024-10-29T18:39:16Z

I've created an issue for planning out the LLVM-libc + clang code sharing (#114109). That being said I think FMA might be fine to implement within clang, due to its simplicity. Additionally, LLVM-libc may call __builtin_fma: https://github.com/llvm/llvm-project/blob/main/libc/src/__support/FPUtil/FMA.h#L32

CC: @lntue

On a side note, a completely correct FMA without hardware support is not straightforward, especially when the wider floating point type for intermediate computations is not available. LLVM libc uses __builtin_fma only when there is hardware support in order to get both fma instructions generated and compiler optimization support, since the latter one does not work so well with inline assembly.

efriedma-quic · 2024-10-29T19:01:44Z

I suspect fma is not one of the functions we want to try to share, at least not short-term. It's one of the core IEEE operations, LLVM has APFloat::fusedMultiplyAdd since forever, and there's probably some weirdness with dependencies if we try to share it because other operations use it.

andykaylor · 2024-10-29T19:26:25Z

clang/lib/AST/ByteCode/InterpBuiltin.cpp

+  const Floating &Z = getParam<Floating>(Frame, 2);
+  Floating Result;
+
+  llvm::RoundingMode RM = getActiveRoundingMode(S, Call);


I have questions about the rounding mode. Is getActiveRoundingMode() trying to account for the FE_ROUND pragma setting? If that pragma isn't used and we aren't compiling in strict mode, this should give us "NearestTiesToEven", right?

But if the rounding mode is dynamic, then I think we need to know if the call is in a constexpr. Consider:

float f1() { fesetround(FE_UPWARD); constrexpr float x = __builtin_fma(1.0f, 0.0f, 0.1f); // constexpr evaluates at the default rounding mode? float y = __builtin_fma(1.0f, 0.1f, 0.1f); // Non-constexpr should use the dynamic rounding mode? return x - y; }

I'm not 100% certain about the language rules here, but at least in C my understanding is that initialization of non-static, non-constexp expressions should be done "as if" evaluated at runtime.

llvm-project/clang/lib/AST/ExprConstant.cpp

Lines 2776 to 2782 in 61a456b

static llvm::RoundingMode getActiveRoundingMode(EvalInfo &Info, const Expr *E) {

llvm::RoundingMode RM =

E->getFPFeaturesInEffect(Info.Ctx.getLangOpts()).getRoundingMode();

if (RM == llvm::RoundingMode::Dynamic)

RM = llvm::RoundingMode::NearestTiesToEven;

return RM;

}

I actually obtained this from the old constant evaluator, but I'm not sure if it has the same rounding issue.

AaronBallman · 2024-10-30T12:59:57Z

I suspect fma is not one of the functions we want to try to share, at least not short-term. It's one of the core IEEE operations, LLVM has APFloat::fusedMultiplyAdd since forever, and there's probably some weirdness with dependencies if we try to share it because other operations use it.

That's fair, I don't insist on sharing here, but it is something we should be planning for with the wider set of changes IMO.

hubert-reinterpretcast · 2024-11-01T00:41:32Z

A problem with the current patch is that it does not evaluate, even in constant expression contexts, cases that require rounding:

extern constexpr float onepluszeroeps = __builtin_fmaf(__FLT_EPSILON__, .0f, 1.f);
extern constexpr float oneplushalfeps = __builtin_fmaf(__FLT_EPSILON__, .5f, 1.f);

<stdin>:2:24: error: constexpr variable 'oneplushalfeps' must be initialized by a constant expression
    2 | extern constexpr float oneplushalfeps = __builtin_fmaf(__FLT_EPSILON__, .5f, 1.f);
      |                        ^                ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1 error generated.

hubert-reinterpretcast

Needs tests for rounding cases and observance of rounding modes.

constexpr fma

93c625a

c8ef changed the title ~~Draft~~ [clang] constexpr built-in fma function. Oct 19, 2024

c8ef marked this pull request as ready for review October 19, 2024 04:17

llvmbot added clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" labels Oct 19, 2024

c8ef requested review from cor3ntin, AaronBallman, tbaederr and Sirraide October 19, 2024 04:17

cor3ntin requested a review from hubert-reinterpretcast October 19, 2024 06:41

tbaederr reviewed Oct 19, 2024

View reviewed changes

clang/lib/AST/ExprConstant.cpp Outdated Show resolved Hide resolved

c8ef added 2 commits October 19, 2024 08:34

address review comments

89a87ca

fix

37093d7

c8ef requested a review from tbaederr October 19, 2024 09:15

Merge branch 'main' into fma

2d2c580

tbaederr requested a review from philnik777 October 20, 2024 04:06

Merge branch 'main' into fma

cd91e46

c8ef requested a review from shafik October 23, 2024 15:51

This was referenced Oct 24, 2024

[ConstantFold] Fold log1p and log1pf when the input parameter is a constant value. #112113

Merged

[Clang] Support elementwise/reduction builtins in constexpr contexts. #51787

Open

RKSimon self-requested a review October 24, 2024 14:10

RKSimon requested review from andykaylor and jcranmer-intel October 28, 2024 11:54

lntue reviewed Oct 29, 2024

View reviewed changes

andykaylor reviewed Oct 29, 2024

View reviewed changes

hubert-reinterpretcast reviewed Nov 1, 2024

View reviewed changes

c8ef closed this Nov 9, 2024

c8ef deleted the fma branch November 9, 2024 01:21

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[clang] constexpr built-in fma function. #113020

[clang] constexpr built-in fma function. #113020

c8ef commented Oct 19, 2024 •

edited

Loading

llvmbot commented Oct 19, 2024

c8ef commented Oct 19, 2024

c8ef commented Oct 21, 2024

philnik777 commented Oct 21, 2024

philnik777 commented Oct 21, 2024

c8ef commented Oct 21, 2024

c8ef commented Oct 22, 2024

philnik777 commented Oct 22, 2024

tbaederr commented Oct 23, 2024

cor3ntin commented Oct 24, 2024

c8ef commented Oct 26, 2024

AaronBallman commented Oct 29, 2024

michaelrj-google commented Oct 29, 2024

lntue Oct 29, 2024

c8ef Oct 30, 2024

lntue commented Oct 29, 2024

efriedma-quic commented Oct 29, 2024

andykaylor Oct 29, 2024

c8ef Oct 30, 2024

AaronBallman commented Oct 30, 2024

hubert-reinterpretcast commented Nov 1, 2024

hubert-reinterpretcast left a comment

		Floating::mul(X, Y, RM, &Result);
		Floating::add(Result, Z, RM, &Result);

	static llvm::RoundingMode getActiveRoundingMode(EvalInfo &Info, const Expr *E) {
	llvm::RoundingMode RM =
	E->getFPFeaturesInEffect(Info.Ctx.getLangOpts()).getRoundingMode();
	if (RM == llvm::RoundingMode::Dynamic)
	RM = llvm::RoundingMode::NearestTiesToEven;
	return RM;
	}

[clang] constexpr built-in fma function. #113020

[clang] constexpr built-in fma function. #113020

Conversation

c8ef commented Oct 19, 2024 • edited Loading

llvmbot commented Oct 19, 2024

c8ef commented Oct 19, 2024

c8ef commented Oct 21, 2024

philnik777 commented Oct 21, 2024

philnik777 commented Oct 21, 2024

c8ef commented Oct 21, 2024

c8ef commented Oct 22, 2024

philnik777 commented Oct 22, 2024

tbaederr commented Oct 23, 2024

cor3ntin commented Oct 24, 2024

c8ef commented Oct 26, 2024

AaronBallman commented Oct 29, 2024

michaelrj-google commented Oct 29, 2024

lntue Oct 29, 2024

Choose a reason for hiding this comment

c8ef Oct 30, 2024

Choose a reason for hiding this comment

lntue commented Oct 29, 2024

efriedma-quic commented Oct 29, 2024

andykaylor Oct 29, 2024

Choose a reason for hiding this comment

c8ef Oct 30, 2024

Choose a reason for hiding this comment

AaronBallman commented Oct 30, 2024

hubert-reinterpretcast commented Nov 1, 2024

hubert-reinterpretcast left a comment

Choose a reason for hiding this comment

c8ef commented Oct 19, 2024 •

edited

Loading