Fix mechanism propagating mangled names for TLI function mappings (#66656)

JolantaJensen · web-flow · commit 01797dad8686 · 2023-10-02T18:58:39.000+01:00
Currently the mappings from TLI are used to generate the list of
available "scalar to vector" mappings attached to scalar calls as
"vector-function-abi-variant" LLVM IR attribute. Function names from TLI
are wrapped in mangled name following the pattern:
_ZGV&lt;isa&gt;&lt;mask&gt;&lt;vlen&gt;&lt;parameters&gt;_&lt;scalar_name&gt;[(&lt;vector_redirection&gt;)]
The problem is the mangled name uses _LLVM_ as the ISA name which
prevents the compiler to compute vectorization factor for scalable
vectors as it cannot make any decision based on the _LLVM_ ISA. If we
use "s" as the ISA name, the compiler can make decisions based on VFABI
specification where SVE spacific rules are described.

This patch is only a refactoring stage where there is no change to the
compiler's behaviour.
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -24,14 +24,46 @@ class Function;
 class Module;
 class Triple;
 
-/// Describes a possible vectorization of a function.
-/// Function 'VectorFnName' is equivalent to 'ScalarFnName' vectorized
-/// by a factor 'VectorizationFactor'.
-struct VecDesc {
+/// Provides info so a possible vectorization of a function can be
+/// computed. Function 'VectorFnName' is equivalent to 'ScalarFnName'
+/// vectorized by a factor 'VectorizationFactor'.
+/// The VABIPrefix string holds information about isa, mask, vlen,
+/// and vparams so a scalar-to-vector mapping of the form:
+///    _ZGV<isa><mask><vlen><vparams>_<scalarname>(<vectorname>)
+/// can be constructed where:
+///
+/// <isa> = "_LLVM_"
+/// <mask> = "M" if masked, "N" if no mask.
+/// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor`
+///          field of the `VecDesc` struct. If the number of lanes is scalable
+///          then 'x' is printed instead.
+/// <vparams> = "v", as many as are the numArgs.
+/// <scalarname> = the name of the scalar function.
+/// <vectorname> = the name of the vector function.
+class VecDesc {
   StringRef ScalarFnName;
   StringRef VectorFnName;
   ElementCount VectorizationFactor;
   bool Masked;
+  StringRef VABIPrefix;
+
+public:
+  VecDesc() = delete;
+  VecDesc(StringRef ScalarFnName, StringRef VectorFnName,
+          ElementCount VectorizationFactor, bool Masked, StringRef VABIPrefix)
+      : ScalarFnName(ScalarFnName), VectorFnName(VectorFnName),
+        VectorizationFactor(VectorizationFactor), Masked(Masked),
+        VABIPrefix(VABIPrefix) {}
+
+  StringRef getScalarFnName() const { return ScalarFnName; }
+  StringRef getVectorFnName() const { return VectorFnName; }
+  ElementCount getVectorizationFactor() const { return VectorizationFactor; }
+  bool isMasked() const { return Masked; }
+  StringRef getVABIPrefix() const { return VABIPrefix; }
+
+  /// Returns a vector function ABI variant string on the form:
+  ///    _ZGV<isa><mask><vlen><vparams>_<scalarname>(<vectorname>)
+  std::string getVectorFunctionABIVariantString() const;
 };
 
   enum LibFunc : unsigned {
@@ -176,6 +208,12 @@ class TargetLibraryInfoImpl {
   StringRef getVectorizedFunction(StringRef F, const ElementCount &VF,
                                   bool Masked) const;
 
+  /// Return a pointer to a VecDesc object holding all info for scalar to vector
+  /// mappings in TLI for the equivalent of F, vectorized with factor VF.
+  /// If no such mapping exists, return nullpointer.
+  const VecDesc *getVectorMappingInfo(StringRef F, const ElementCount &VF,
+                                      bool Masked) const;
+
   /// Set to true iff i32 parameters to library functions should have signext
   /// or zeroext attributes if they correspond to C-level int or unsigned int,
   /// respectively.
@@ -354,6 +392,10 @@ class TargetLibraryInfo {
                                   bool Masked = false) const {
     return Impl->getVectorizedFunction(F, VF, Masked);
   }
+  const VecDesc *getVectorMappingInfo(StringRef F, const ElementCount &VF,
+                                      bool Masked) const {
+    return Impl->getVectorMappingInfo(F, VF, Masked);
+  }
 
   /// Tests if the function is both available and a candidate for optimized code
   /// generation.
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -182,27 +182,6 @@ static constexpr char const *_LLVM_Scalarize_ = "_LLVM_Scalarize_";
 std::optional<VFInfo> tryDemangleForVFABI(StringRef MangledName,
                                           const Module &M);
 
-/// This routine mangles the given VectorName according to the LangRef
-/// specification for vector-function-abi-variant attribute and is specific to
-/// the TLI mappings. It is the responsibility of the caller to make sure that
-/// this is only used if all parameters in the vector function are vector type.
-/// This returned string holds scalar-to-vector mapping:
-///    _ZGV<isa><mask><vlen><vparams>_<scalarname>(<vectorname>)
-///
-/// where:
-///
-/// <isa> = "_LLVM_"
-/// <mask> = "M" if masked, "N" if no mask.
-/// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor`
-///          field of the `VecDesc` struct. If the number of lanes is scalable
-///          then 'x' is printed instead.
-/// <vparams> = "v", as many as are the numArgs.
-/// <scalarname> = the name of the scalar function.
-/// <vectorname> = the name of the vector function.
-std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName,
-                                unsigned numArgs, ElementCount VF,
-                                bool Masked = false);
-
 /// Retrieve the `VFParamKind` from a string token.
 VFParamKind getVFParamKindFromString(const StringRef Token);
 
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -12,6 +12,7 @@
 
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
@@ -44,6 +45,13 @@ StringLiteral const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] =
 #include "llvm/Analysis/TargetLibraryInfo.def"
 };
 
+std::string VecDesc::getVectorFunctionABIVariantString() const {
+  SmallString<256> Buffer;
+  llvm::raw_svector_ostream Out(Buffer);
+  Out << VABIPrefix << "_" << ScalarFnName << "(" << VectorFnName << ")";
+  return std::string(Out.str());
+}
+
 // Recognized types of library function arguments and return types.
 enum FuncArgTypeID : char {
   Void = 0, // Must be zero.
@@ -1138,15 +1146,15 @@ void TargetLibraryInfoImpl::disableAllFunctions() {
 }
 
 static bool compareByScalarFnName(const VecDesc &LHS, const VecDesc &RHS) {
-  return LHS.ScalarFnName < RHS.ScalarFnName;
+  return LHS.getScalarFnName() < RHS.getScalarFnName();
 }
 
 static bool compareByVectorFnName(const VecDesc &LHS, const VecDesc &RHS) {
-  return LHS.VectorFnName < RHS.VectorFnName;
+  return LHS.getVectorFnName() < RHS.getVectorFnName();
 }
 
 static bool compareWithScalarFnName(const VecDesc &LHS, StringRef S) {
-  return LHS.ScalarFnName < S;
+  return LHS.getScalarFnName() < S;
 }
 
 void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) {
@@ -1203,17 +1211,20 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
   case SLEEFGNUABI: {
     const VecDesc VecFuncs_VF2[] = {
 #define TLI_DEFINE_SLEEFGNUABI_VF2_VECFUNCS
-#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF, /* MASK = */ false},
+#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, VABI_PREFIX)                         \
+  {SCAL, VEC, VF, /* MASK = */ false, VABI_PREFIX},
 #include "llvm/Analysis/VecFuncs.def"
     };
     const VecDesc VecFuncs_VF4[] = {
 #define TLI_DEFINE_SLEEFGNUABI_VF4_VECFUNCS
-#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF, /* MASK = */ false},
+#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, VABI_PREFIX)                         \
+  {SCAL, VEC, VF, /* MASK = */ false, VABI_PREFIX},
 #include "llvm/Analysis/VecFuncs.def"
     };
     const VecDesc VecFuncs_VFScalable[] = {
 #define TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS
-#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK) {SCAL, VEC, VF, MASK},
+#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX)                   \
+  {SCAL, VEC, VF, MASK, VABI_PREFIX},
 #include "llvm/Analysis/VecFuncs.def"
     };
 
@@ -1232,7 +1243,8 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
   case ArmPL: {
     const VecDesc VecFuncs[] = {
 #define TLI_DEFINE_ARMPL_VECFUNCS
-#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK) {SCAL, VEC, VF, MASK},
+#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX)                   \
+  {SCAL, VEC, VF, MASK, VABI_PREFIX},
 #include "llvm/Analysis/VecFuncs.def"
     };
 
@@ -1258,23 +1270,32 @@ bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
 
   std::vector<VecDesc>::const_iterator I =
       llvm::lower_bound(VectorDescs, funcName, compareWithScalarFnName);
-  return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName;
+  return I != VectorDescs.end() && StringRef(I->getScalarFnName()) == funcName;
 }
 
 StringRef TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
                                                        const ElementCount &VF,
                                                        bool Masked) const {
+  const VecDesc *VD = getVectorMappingInfo(F, VF, Masked);
+  if (VD)
+    return VD->getVectorFnName();
+  return StringRef();
+}
+
+const VecDesc *
+TargetLibraryInfoImpl::getVectorMappingInfo(StringRef F, const ElementCount &VF,
+                                            bool Masked) const {
   F = sanitizeFunctionName(F);
   if (F.empty())
-    return F;
+    return nullptr;
   std::vector<VecDesc>::const_iterator I =
       llvm::lower_bound(VectorDescs, F, compareWithScalarFnName);
-  while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) {
-    if ((I->VectorizationFactor == VF) && (I->Masked == Masked))
-      return I->VectorFnName;
+  while (I != VectorDescs.end() && StringRef(I->getScalarFnName()) == F) {
+    if ((I->getVectorizationFactor() == VF) && (I->isMasked() == Masked))
+      return &(*I);
     ++I;
   }
-  return StringRef();
+  return nullptr;
 }
 
 TargetLibraryInfo TargetLibraryAnalysis::run(const Function &F,
@@ -1346,11 +1367,11 @@ void TargetLibraryInfoImpl::getWidestVF(StringRef ScalarF,
 
   std::vector<VecDesc>::const_iterator I =
       llvm::lower_bound(VectorDescs, ScalarF, compareWithScalarFnName);
-  while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == ScalarF) {
+  while (I != VectorDescs.end() && StringRef(I->getScalarFnName()) == ScalarF) {
     ElementCount *VF =
-        I->VectorizationFactor.isScalable() ? &ScalableVF : &FixedVF;
-    if (ElementCount::isKnownGT(I->VectorizationFactor, *VF))
-      *VF = I->VectorizationFactor;
+        I->getVectorizationFactor().isScalable() ? &ScalableVF : &FixedVF;
+    if (ElementCount::isKnownGT(I->getVectorizationFactor(), *VF))
+      *VF = I->getVectorizationFactor();
     ++I;
   }
 }
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
@@ -1453,22 +1453,6 @@ void InterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const {
 }
 }
 
-std::string VFABI::mangleTLIVectorName(StringRef VectorName,
-                                       StringRef ScalarName, unsigned numArgs,
-                                       ElementCount VF, bool Masked) {
-  SmallString<256> Buffer;
-  llvm::raw_svector_ostream Out(Buffer);
-  Out << "_ZGV" << VFABI::_LLVM_ << (Masked ? "M" : "N");
-  if (VF.isScalable())
-    Out << 'x';
-  else
-    Out << VF.getFixedValue();
-  for (unsigned I = 0; I < numArgs; ++I)
-    Out << "v";
-  Out << "_" << ScalarName << "(" << VectorName << ")";
-  return std::string(Out.str());
-}
-
 void VFABI::getVectorVariantNames(
     const CallInst &CI, SmallVectorImpl<std::string> &VariantMappings) {
   const StringRef S = CI.getFnAttr(VFABI::MappingsAttrName).getValueAsString();
diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -155,8 +155,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
   // Try to find the mapping for the scalar version of this intrinsic
   // and the exact vector width of the call operands in the
   // TargetLibraryInfo.
-  const std::string TLIName =
-      std::string(TLI.getVectorizedFunction(ScalarName, VF));
+  StringRef TLIName = TLI.getVectorizedFunction(ScalarName, VF);
 
   LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
                     << ScalarName << "` and vector width " << VF << ".\n");
diff --git a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
@@ -91,18 +91,16 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
                                                    Mappings.end());
 
   auto AddVariantDecl = [&](const ElementCount &VF, bool Predicate) {
-    const std::string TLIName =
-        std::string(TLI.getVectorizedFunction(ScalarName, VF, Predicate));
-    if (!TLIName.empty()) {
-      std::string MangledName = VFABI::mangleTLIVectorName(
-          TLIName, ScalarName, CI.arg_size(), VF, Predicate);
+    const VecDesc *VD = TLI.getVectorMappingInfo(ScalarName, VF, Predicate);
+    if (VD) {
+      std::string MangledName = VD->getVectorFunctionABIVariantString();
       if (!OriginalSetOfMappings.count(MangledName)) {
         Mappings.push_back(MangledName);
         ++NumCallInjected;
       }
-      Function *VariantF = M->getFunction(TLIName);
+      Function *VariantF = M->getFunction(VD->getVectorFnName());
       if (!VariantF)
-        addVariantDeclaration(CI, VF, Predicate, TLIName);
+        addVariantDeclaration(CI, VF, Predicate, VD->getVectorFnName());
     }
   };
 
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll
@@ -195,7 +195,7 @@ for.end:
   ret void
 }
 
-declare float @atan2f(float) nounwind readnone
+declare float @atan2f(float, float) nounwind readnone
 define void @atan2f_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @atan2f_v4f32(
 ; CHECK: call <4 x float> @_simd_atan2_f4(
@@ -208,7 +208,7 @@ for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
   %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
   %lv = load float, ptr %gep.y, align 4
-  %call = tail call float @atan2f(float %lv)
+  %call = tail call float @atan2f(float %lv, float %lv)
   %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
   store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
@@ -219,7 +219,7 @@ for.end:
   ret void
 }
 
-declare double @atan2(double) nounwind readnone
+declare double @atan2(double, double) nounwind readnone
 define void @atan2_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @atan2_v2f64(
 ; CHECK: call <2 x double> @_simd_atan2_d2(
@@ -232,7 +232,7 @@ for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
   %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
   %lv = load double, ptr %gep.y, align 4
-  %call = tail call double @atan2(double %lv)
+  %call = tail call double @atan2(double %lv, double %lv)
   %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
   store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
@@ -387,7 +387,7 @@ for.end:
   ret void
 }
 
-declare float @powf(float) nounwind readnone
+declare float @powf(float, float) nounwind readnone
 define void @powf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @powf_v4f32(
 ; CHECK: call <4 x float> @_simd_pow_f4(
@@ -400,7 +400,7 @@ for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
   %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
   %lv = load float, ptr %gep.y, align 4
-  %call = tail call float @powf(float %lv)
+  %call = tail call float @powf(float %lv, float %lv)
   %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
   store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
@@ -411,7 +411,7 @@ for.end:
   ret void
 }
 
-declare double @pow(double) nounwind readnone
+declare double @pow(double, double) nounwind readnone
 define void @pow_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @pow_v2f64(
 ; CHECK: call <2 x double> @_simd_pow_d2(
@@ -424,7 +424,7 @@ for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
   %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
   %lv = load double, ptr %gep.y, align 4
-  %call = tail call double @pow(double %lv)
+  %call = tail call double @pow(double %lv, double %lv)
   %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
   store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-calls.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-calls.ll
@@ -65,8 +65,8 @@ declare float @acosf(float) #0
 declare double @atan(double) #0
 declare float @atanf(float) #0
 
-declare double @atan2(double) #0
-declare float @atan2f(float) #0
+declare double @atan2(double, double) #0
+declare float @atan2f(float, float) #0
 
 declare double @sinh(double) #0
 declare float @sinhf(float) #0
@@ -1210,7 +1210,7 @@ for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
-  %call = tail call double @atan2(double %conv)
+  %call = tail call double @atan2(double %conv, double %conv)
   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
   store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1233,7 +1233,7 @@ for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
-  %call = tail call float @atan2f(float %conv)
+  %call = tail call float @atan2f(float %conv, float %conv)
   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
   store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
diff --git a/llvm/unittests/Analysis/VectorFunctionABITest.cpp b/llvm/unittests/Analysis/VectorFunctionABITest.cpp