-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[clang] constexpr built-in fma function. #113020
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang Author: None (c8ef) ChangesAccording to P0533R9, the C++ standard library functions constexpr floating-point-type fma(floating-point-type x, floating-point-type y,
floating-point-type z);
constexpr float fmaf(float x, float y, float z);
constexpr long double fmal(long double x, long double y, long double z); To implement this feature in libc++, we must make the built-in Full diff: https://github.com/llvm/llvm-project/pull/113020.diff 6 Files Affected:
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index b7a6ace8bb895d..605d55a9e51f37 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -273,6 +273,7 @@ Non-comprehensive list of changes in this release
- Plugins can now define custom attributes that apply to statements
as well as declarations.
- ``__builtin_abs`` function can now be used in constant expressions.
+- ``__builtin_fma`` function can now be used in constant expressions.
New Compiler Flags
------------------
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 90475a361bb8f8..55f470a9f715b9 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -3723,6 +3723,7 @@ def Fma : FPMathTemplate, LibBuiltin<"math.h"> {
let Attributes = [NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T, T, T)";
let AddBuiltinPrefixedAlias = 1;
+ let OnlyBuiltinPrefixedAliasIsConstexpr = 1;
}
def Fmax : FPMathTemplate, LibBuiltin<"math.h"> {
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index d4a8e6c2035ee5..145f4627dd73da 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -142,6 +142,19 @@ static bool retPrimValue(InterpState &S, CodePtr OpPC, APValue &Result,
#undef RET_CASE
}
+/// Get rounding mode to use in evaluation of the specified expression.
+///
+/// If rounding mode is unknown at compile time, still try to evaluate the
+/// expression. If the result is exact, it does not depend on rounding mode.
+/// So return "tonearest" mode instead of "dynamic".
+static llvm::RoundingMode getActiveRoundingMode(InterpState &S, const Expr *E) {
+ llvm::RoundingMode RM =
+ E->getFPFeaturesInEffect(S.getLangOpts()).getRoundingMode();
+ if (RM == llvm::RoundingMode::Dynamic)
+ RM = llvm::RoundingMode::NearestTiesToEven;
+ return RM;
+}
+
static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC,
const InterpFrame *Frame,
const CallExpr *Call) {
@@ -549,6 +562,22 @@ static bool interp__builtin_fpclassify(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_fma(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame, const Function *Func,
+ const CallExpr *Call) {
+ const Floating &X = getParam<Floating>(Frame, 0);
+ const Floating &Y = getParam<Floating>(Frame, 1);
+ const Floating &Z = getParam<Floating>(Frame, 2);
+ Floating Result;
+
+ llvm::RoundingMode RM = getActiveRoundingMode(S, Call);
+ Floating::mul(X, Y, RM, &Result);
+ Floating::add(Result, Z, RM, &Result);
+
+ S.Stk.push<Floating>(Result);
+ return true;
+}
+
// The C standard says "fabs raises no floating-point exceptions,
// even if x is a signaling NaN. The returned value is independent of
// the current rounding direction mode." Therefore constant folding can
@@ -1814,6 +1843,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
return false;
break;
+ case Builtin::BI__builtin_fma:
+ case Builtin::BI__builtin_fmaf:
+ case Builtin::BI__builtin_fmal:
+ case Builtin::BI__builtin_fmaf128:
+ if (!interp__builtin_fma(S, OpPC, Frame, F, Call))
+ return false;
+ break;
+
case Builtin::BI__builtin_fabs:
case Builtin::BI__builtin_fabsf:
case Builtin::BI__builtin_fabsl:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 8e36cad2d2c6e7..685ce8a63f6c9e 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -15314,6 +15314,22 @@ bool FloatExprEvaluator::VisitCallExpr(const CallExpr *E) {
Result.changeSign();
return true;
+ case Builtin::BI__builtin_fma:
+ case Builtin::BI__builtin_fmaf:
+ case Builtin::BI__builtin_fmal:
+ case Builtin::BI__builtin_fmaf128: {
+ APFloat Y(0.), Z(0.);
+ if (!EvaluateFloat(E->getArg(0), Result, Info) ||
+ !EvaluateFloat(E->getArg(1), Y, Info) ||
+ !EvaluateFloat(E->getArg(2), Z, Info))
+ return false;
+
+ llvm::RoundingMode RM = getActiveRoundingMode(Info, E);
+ Result.multiply(Y, RM);
+ Result.add(Z, RM);
+ return true;
+ }
+
case Builtin::BI__arithmetic_fence:
return EvaluateFloat(E->getArg(0), Result, Info);
diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp
index b5d334178f8213..0dba62e252b3d7 100644
--- a/clang/test/AST/ByteCode/builtin-functions.cpp
+++ b/clang/test/AST/ByteCode/builtin-functions.cpp
@@ -265,6 +265,15 @@ namespace fpclassify {
char classify_subnorm [__builtin_fpclassify(-1, -1, -1, +1, -1, 1.0e-38f)];
}
+namespace fma {
+ static_assert(__builtin_fma(1.0, 1.0, 1.0) == 2.0);
+ static_assert(__builtin_fma(1.0, -1.0, 1.0) == 0.0);
+ static_assert(__builtin_fmaf(1.0f, 1.0f, 1.0f) == 2.0f);
+ static_assert(__builtin_fmaf(1.0f, -1.0f, 1.0f) == 0.0f);
+ static_assert(__builtin_fmal(1.0L, 1.0L, 1.0L) == 2.0L);
+ static_assert(__builtin_fmal(1.0L, -1.0L, 1.0L) == 0.0L);
+} // namespace fma
+
namespace abs {
static_assert(__builtin_abs(14) == 14, "");
static_assert(__builtin_labs(14L) == 14L, "");
diff --git a/clang/test/Sema/constant-builtins-2.c b/clang/test/Sema/constant-builtins-2.c
index e465a3c5f0ad86..2a2dbc2caee1d1 100644
--- a/clang/test/Sema/constant-builtins-2.c
+++ b/clang/test/Sema/constant-builtins-2.c
@@ -54,6 +54,13 @@ long double g18 = __builtin_copysignl(1.0L, -1.0L);
__float128 g18_2 = __builtin_copysignf128(1.0q, -1.0q);
#endif
+double g19 = __builtin_fma(1.0, 1.0, 1.0);
+float g20 = __builtin_fmaf(1.0f, 1.0f, 1.0f);
+long double g21 = __builtin_fmal(1.0L, 1.0L, 1.0L);
+#if defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)
+__float128 g21_2 = __builtin_fma(1.0q, 1.0q, 1.0q);
+#endif
+
char classify_nan [__builtin_fpclassify(+1, -1, -1, -1, -1, __builtin_nan(""))];
char classify_snan [__builtin_fpclassify(+1, -1, -1, -1, -1, __builtin_nans(""))];
char classify_inf [__builtin_fpclassify(-1, +1, -1, -1, -1, __builtin_inf())];
|
I'm not sure why there are noises in the Windows CI, as they seem unrelated. The Linux CI is working as expected. |
Dear reviewers, would you please take another look? @tbaederr @philnik777 |
This is definitely a requirement to implement this, but we also have to make the non- |
Ah, it's #88978 |
Actually, my reason for this patch is not necessarily related to libc++, but more about #51787. This issue requires the implementation of constexpr vector builtins. After some attempts, I found that some scalar versions of them cannot be used in a constexpr context. Therefore, I decided to implement those first. Regarding the patch itself, is it good to go or does it need further revision? @philnik777 |
Friendly ping~ @cor3ntin @hubert-reinterpretcast @tbaederr @philnik777 |
Sorry, I don't feel qualified to approve this. |
Ping @cor3ntin since he mentioned on Discord that he's available for reviews this week :) |
I am waiting for Hubert as I don't trust my knowledge of floating points to offer meaningful feedback here, sorry |
Hi @cor3ntin @tbaederr, since @hubert-reinterpretcast is unavailable for review, could you please help me find someone else who is capable and available to review this? Thank you! |
Given the massive amount of effort it takes to implement constant expression floating-point math and the various edge cases it involves, I think the goal is for Clang to share as much code with llvm-libc as possible because they have to implement all the same logic anyway. I'm imagining a situation where we have some sort of library layer for the actual implementation of the functionality, llvm-libc would then link against that library layer, exposing the C interfaces for it, and Clang would link against that library layer and use it to implement a tablegen driven series of We'd have to be careful for that library layer to be designed so that we get the target environment's floating-point semantics instead of the host environment's and it would need to be aware of the floating-point environment changes that come through things like pragmas. CC @michaelrj-google for opinions, but I think we may want to have the broader design in mind before we start adding piecemeal support for constexpr float functionality even if we don't think we need it for |
I've created an issue for planning out the LLVM-libc + clang code sharing (#114109). That being said I think FMA might be fine to implement within clang, due to its simplicity. Additionally, LLVM-libc may call CC: @lntue |
Floating::mul(X, Y, RM, &Result); | ||
Floating::add(Result, Z, RM, &Result); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry I'm not so familiar with Floating
type in here, but are they perform the operations in arbitrary precision? If these are performed in the same precision as the inputs then this will not work correctly with double rounding errors, overflow, and underflow. If there are higher intermediate precision to be used then it can be simplified a bit.
A simple example of double rounding errors for floating point type T
you can try is:
x = 1 + std::numeric_limits<T>::epsilon();
y = 1 + std::numeric_limits<T>::epsilon();
z = std::numeric_limits<T>::epsilon() * 0.5 ;
then for the default rounding mode:
T(T(x * y) + z) = 1 + 2 * std::numeric_limits<T>::epsilon();
FMA(x, y, z) = 1 + 3 * std::numeric_limits<T>::epsilon();
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not entirely sure about this either, thanks for pointing it out! Regarding the type, Floating
is simply a wrapper for APFloat
.
On a side note, a completely correct FMA without hardware support is not straightforward, especially when the wider floating point type for intermediate computations is not available. LLVM libc uses |
I suspect fma is not one of the functions we want to try to share, at least not short-term. It's one of the core IEEE operations, LLVM has APFloat::fusedMultiplyAdd since forever, and there's probably some weirdness with dependencies if we try to share it because other operations use it. |
const Floating &Z = getParam<Floating>(Frame, 2); | ||
Floating Result; | ||
|
||
llvm::RoundingMode RM = getActiveRoundingMode(S, Call); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have questions about the rounding mode. Is getActiveRoundingMode() trying to account for the FE_ROUND pragma setting? If that pragma isn't used and we aren't compiling in strict mode, this should give us "NearestTiesToEven", right?
But if the rounding mode is dynamic, then I think we need to know if the call is in a constexpr. Consider:
float f1() {
fesetround(FE_UPWARD);
constrexpr float x = __builtin_fma(1.0f, 0.0f, 0.1f); // constexpr evaluates at the default rounding mode?
float y = __builtin_fma(1.0f, 0.1f, 0.1f); // Non-constexpr should use the dynamic rounding mode?
return x - y;
}
I'm not 100% certain about the language rules here, but at least in C my understanding is that initialization of non-static, non-constexp expressions should be done "as if" evaluated at runtime.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
llvm-project/clang/lib/AST/ExprConstant.cpp
Lines 2776 to 2782 in 61a456b
static llvm::RoundingMode getActiveRoundingMode(EvalInfo &Info, const Expr *E) { | |
llvm::RoundingMode RM = | |
E->getFPFeaturesInEffect(Info.Ctx.getLangOpts()).getRoundingMode(); | |
if (RM == llvm::RoundingMode::Dynamic) | |
RM = llvm::RoundingMode::NearestTiesToEven; | |
return RM; | |
} |
I actually obtained this from the old constant evaluator, but I'm not sure if it has the same rounding issue.
That's fair, I don't insist on sharing here, but it is something we should be planning for with the wider set of changes IMO. |
A problem with the current patch is that it does not evaluate, even in constant expression contexts, cases that require rounding: extern constexpr float onepluszeroeps = __builtin_fmaf(__FLT_EPSILON__, .0f, 1.f);
extern constexpr float oneplushalfeps = __builtin_fmaf(__FLT_EPSILON__, .5f, 1.f);
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Needs tests for rounding cases and observance of rounding modes.
According to P0533R9, the C++ standard library functions
fma
are nowconstexpr
:To implement this feature in libc++, we must make the built-in
fma
functionconstexpr
. This patch adds the implementation of aconstexpr
fma function for the current constant evaluator and the new bytecode interpreter.