WIP/POC: Constant Fold Logf128 calls

MDevereau · MDevereau · commit 3c2444dc4eba · 2024-03-08T15:35:30.000Z
This is a proof of concept/work in progress patch.

This patch enables ConstantFolding of log FP128 calls.

This is achieved by querying with CMake if the host
system has the logf128 symbol available. If so, replace
the runtime call with the compile time constant returned
from logf128.

This approach could be considered controversial as cross-compiled
llvm executables using shared objects may not have the logf128 symbol
available at runtime.

The implementation of logf128 may also yield different results
on different targets, such as x86 using fp80 precision instead
of the full fp128 range on other targets.

This approach relies on unit tests, as more commonplace Clang/C
tests and opt/llc/IR tests are not applicable since they
are ignorant to the result of the compile time CMake check.
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
@@ -299,6 +299,7 @@ class IEEEFloat final : public APFloatBase {
   IEEEFloat(const fltSemantics &, integerPart);
   IEEEFloat(const fltSemantics &, uninitializedTag);
   IEEEFloat(const fltSemantics &, const APInt &);
+  explicit IEEEFloat(long double ld);
   explicit IEEEFloat(double d);
   explicit IEEEFloat(float f);
   IEEEFloat(const IEEEFloat &);
@@ -354,6 +355,7 @@ class IEEEFloat final : public APFloatBase {
   Expected<opStatus> convertFromString(StringRef, roundingMode);
   APInt bitcastToAPInt() const;
   double convertToDouble() const;
+  long double convertToQuad() const;
   float convertToFloat() const;
 
   /// @}
@@ -942,6 +944,7 @@ class APFloat : public APFloatBase {
   APFloat(const fltSemantics &Semantics, uninitializedTag)
       : U(Semantics, uninitialized) {}
   APFloat(const fltSemantics &Semantics, const APInt &I) : U(Semantics, I) {}
+  explicit APFloat(long double ld) : U(IEEEFloat(ld), IEEEquad()) {}
   explicit APFloat(double d) : U(IEEEFloat(d), IEEEdouble()) {}
   explicit APFloat(float f) : U(IEEEFloat(f), IEEEsingle()) {}
   APFloat(const APFloat &RHS) = default;
@@ -1218,6 +1221,13 @@ class APFloat : public APFloatBase {
   /// shorter semantics, like IEEEsingle and others.
   double convertToDouble() const;
 
+  /// Converts this APFloat to host float value.
+  ///
+  /// \pre The APFloat must be built using semantics, that can be represented by
+  /// the host float type without loss of precision. It can be IEEEquad and
+  /// shorter semantics, like IEEEdouble and others.
+  long double convertToQuad() const;
+
   /// Converts this APFloat to host float value.
   ///
   /// \pre The APFloat must be built using semantics, that can be represented by
diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
@@ -1663,6 +1663,11 @@ class [[nodiscard]] APInt {
   /// any bit width. Exactly 64 bits will be translated.
   double bitsToDouble() const { return llvm::bit_cast<double>(getWord(0)); }
 
+  long double bitsToQuad() const {
+    __uint128_t ul = ((__uint128_t)U.pVal[1] << 64) + U.pVal[0];
+    return llvm::bit_cast<long double>(ul);
+  }
+
   /// Converts APInt bits to a float
   ///
   /// The conversion does not do a translation from integer to float, it just
@@ -1688,6 +1693,16 @@ class [[nodiscard]] APInt {
     return APInt(sizeof(float) * CHAR_BIT, llvm::bit_cast<uint32_t>(V));
   }
 
+  static APInt longDoubleToBits(long double V) {
+    assert(sizeof(long double) == 16 && "Expected 16 byte long double");
+
+    const uint64_t Words[2] = {
+        static_cast<uint64_t>(V),
+        static_cast<uint64_t>(llvm::bit_cast<__uint128_t>(V) >> 64),
+    };
+    return APInt(sizeof(long double) * CHAR_BIT, 2, Words);
+  }
+
   /// @}
   /// \name Mathematics Operations
   /// @{
diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h
@@ -289,6 +289,8 @@ class ConstantFP final : public ConstantData {
   /// host double and as the target format.
   static Constant *get(Type *Ty, double V);
 
+  static Constant *get128(Type *Ty, long double V);
+
   /// If Ty is a vector type, return a Constant with a splat of the given
   /// value. Otherwise return a ConstantFP for the given value.
   static Constant *get(Type *Ty, const APFloat &V);
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
@@ -161,3 +161,9 @@ add_llvm_component_library(LLVMAnalysis
   Support
   TargetParser
   )
+
+include(CheckCXXSymbolExists)
+check_cxx_symbol_exists(logf128 math.h HAS_LOGF128)
+if(HAS_LOGF128)
+ target_compile_definitions(LLVMAnalysis PRIVATE HAS_LOGF128)
+endif()
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1678,9 +1678,9 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
            Name == "floor" || Name == "floorf" ||
            Name == "fmod" || Name == "fmodf";
   case 'l':
-    return Name == "log" || Name == "logf" ||
-           Name == "log2" || Name == "log2f" ||
-           Name == "log10" || Name == "log10f";
+    return Name == "log" || Name == "logf" || Name == "log2" ||
+           Name == "log2f" || Name == "log10" || Name == "log10f" ||
+           Name == "logl";
   case 'n':
     return Name == "nearbyint" || Name == "nearbyintf";
   case 'p':
@@ -1763,6 +1763,15 @@ inline bool llvm_fenv_testexcept() {
   return false;
 }
 
+Constant *ConstantFoldLogf128(const APFloat &V, Type *Ty) {
+#ifdef HAS_LOGF128
+  long double l = logf128(V.convertToQuad());
+  return ConstantFP::get128(Ty, l);
+#else
+  return nullptr;
+#endif
+}
+
 Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V,
                          Type *Ty) {
   llvm_fenv_clearexcept();
@@ -2094,7 +2103,8 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
     if (IntrinsicID == Intrinsic::canonicalize)
       return constantFoldCanonicalize(Ty, Call, U);
 
-    if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
+    if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy() &&
+        !Ty->isFP128Ty())
       return nullptr;
 
     // Use internal versions of these intrinsics.
@@ -2209,6 +2219,8 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
     switch (IntrinsicID) {
       default: break;
       case Intrinsic::log:
+        if (Ty->isFP128Ty())
+          return ConstantFoldLogf128(APF, Ty);
         return ConstantFoldFP(log, APF, Ty);
       case Intrinsic::log2:
         // TODO: What about hosts that lack a C99 library?
@@ -2338,6 +2350,11 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
       if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))
         return ConstantFoldFP(log, APF, Ty);
       break;
+    case LibFunc_logl:
+      if (!APF.isNegative() && !APF.isZero() && TLI->has(Func) &&
+          Ty->isFP128Ty())
+        return ConstantFoldLogf128(APF, Ty);
+      break;
     case LibFunc_log2:
     case LibFunc_log2f:
     case LibFunc_log2_finite:
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
@@ -976,6 +976,22 @@ Constant *ConstantFP::get(Type *Ty, double V) {
   return C;
 }
 
+Constant *ConstantFP::get128(Type *Ty, long double V) {
+  LLVMContext &Context = Ty->getContext();
+
+  APFloat FV(V);
+  bool ignored;
+  FV.convert(Ty->getScalarType()->getFltSemantics(),
+             APFloat::rmNearestTiesToEven, &ignored);
+  Constant *C = get(Context, FV);
+
+  // For vectors, broadcast the value.
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantVector::getSplat(VTy->getElementCount(), C);
+
+  return C;
+}
+
 Constant *ConstantFP::get(Type *Ty, const APFloat &V) {
   ConstantFP *C = get(Ty->getContext(), V);
   assert(C->getType() == Ty->getScalarType() &&
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
@@ -3670,6 +3670,13 @@ double IEEEFloat::convertToDouble() const {
   return api.bitsToDouble();
 }
 
+long double IEEEFloat::convertToQuad() const {
+  assert(semantics == (const llvm::fltSemantics *)&semIEEEquad &&
+         "Float semantics are not IEEEquads");
+  APInt api = bitcastToAPInt();
+  return api.bitsToQuad();
+}
+
 /// Integer bit is explicit in this format.  Intel hardware (387 and later)
 /// does not support these bit patterns:
 ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
@@ -3958,6 +3965,10 @@ IEEEFloat::IEEEFloat(double d) {
   initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
 }
 
+IEEEFloat::IEEEFloat(long double ld) {
+  initFromAPInt(&semIEEEquad, APInt::longDoubleToBits(ld));
+}
+
 namespace {
   void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
     Buffer.append(Str.begin(), Str.end());
@@ -5265,6 +5276,19 @@ double APFloat::convertToDouble() const {
   return Temp.getIEEE().convertToDouble();
 }
 
+long double APFloat::convertToQuad() const {
+  if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad)
+    return getIEEE().convertToQuad();
+  assert(getSemantics().isRepresentableBy(semIEEEquad) &&
+         "Float semantics is not representable by IEEEquad");
+  APFloat Temp = *this;
+  bool LosesInfo;
+  opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo);
+  assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
+  (void)St;
+  return Temp.getIEEE().convertToQuad();
+}
+
 float APFloat::convertToFloat() const {
   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
     return getIEEE().convertToFloat();
diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt
@@ -51,6 +51,7 @@ set(ANALYSIS_TEST_SOURCES
   ValueLatticeTest.cpp
   ValueTrackingTest.cpp
   VectorUtilsTest.cpp
+  ConstantLogf128.cpp
   )
 
 set(MLGO_TESTS TFUtilsTest.cpp)
@@ -80,5 +81,11 @@ if(NOT WIN32)
   export_executable_symbols_for_plugins(AnalysisTests)
 endif()
 
+include(CheckCXXSymbolExists)
+check_cxx_symbol_exists(logf128 math.h HAS_LOGF128)
+if(HAS_LOGF128)
+  target_compile_definitions(AnalysisTests PRIVATE HAS_LOGF128)
+endif()
+
 add_subdirectory(InlineAdvisorPlugin)
 add_subdirectory(InlineOrderPlugin)
diff --git a/llvm/unittests/Analysis/ConstantLogf128.cpp b/llvm/unittests/Analysis/ConstantLogf128.cpp
@@ -0,0 +1,74 @@
+//===- unittests/CodeGen/BufferSourceTest.cpp - MemoryBuffer source tests -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+class ConstantFoldLogf128Fixture
+    : public ::testing ::TestWithParam<std::string> {
+protected:
+  std::string FuncName;
+};
+
+TEST_P(ConstantFoldLogf128Fixture, ConstantFoldLogf128) {
+  LLVMContext Context;
+  IRBuilder<> Builder(Context);
+  Module MainModule("Logf128TestModule", Context);
+  MainModule.setTargetTriple("aarch64-unknown-linux");
+
+  Type *FP128Ty = Type::getFP128Ty(Context);
+  FunctionType *FP128Prototype = FunctionType::get(FP128Ty, false);
+  Function *Logf128TestFunction = Function::Create(
+      FP128Prototype, Function::ExternalLinkage, "logf128test", MainModule);
+  BasicBlock *EntryBlock =
+      BasicBlock::Create(Context, "entry", Logf128TestFunction);
+  Builder.SetInsertPoint(EntryBlock);
+
+  FunctionType *FP128FP128Prototype =
+      FunctionType::get(FP128Ty, {FP128Ty}, false);
+  Constant *Constant2L = ConstantFP::get128(FP128Ty, 2.0L);
+
+  std::string FunctionName = GetParam();
+  Function *Logl = Function::Create(
+      FP128FP128Prototype, Function::ExternalLinkage, FunctionName, MainModule);
+  CallInst *LoglCall = Builder.CreateCall(Logl, Constant2L);
+
+  TargetLibraryInfoImpl TLII(Triple(MainModule.getTargetTriple()));
+  TargetLibraryInfo TLI(TLII, Logf128TestFunction);
+  Constant *FoldResult = ConstantFoldCall(LoglCall, Logl, Constant2L, &TLI);
+
+#ifndef HAS_LOGF128
+  ASSERT_TRUE(FoldResult == nullptr);
+#else
+  auto ConstantLog = dyn_cast<ConstantFP>(FoldResult);
+  ASSERT_TRUE(ConstantLog);
+
+  APFloat APF = ConstantLog->getValueAPF();
+  char LongDoubleHexString[0xFF];
+  unsigned Size =
+      APF.convertToHexString(LongDoubleHexString, 32, true,
+                             APFloatBase::roundingMode::NearestTiesToAway);
+  EXPECT_GT(Size, 0U);
+
+  ASSERT_STREQ(LongDoubleHexString,
+               std::string("0X1.62E42FEFA39E0000000000000000000P-1").c_str());
+#endif
+}
+
+INSTANTIATE_TEST_SUITE_P(ConstantFoldLogf128, ConstantFoldLogf128Fixture,
+                         ::testing::Values("logl", "llvm.log.f128"));
+
+} // end anonymous namespace