From da847da3ce4a68b5702126c1965e6f6b5b1f463f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 18 Aug 2025 12:30:01 +0100 Subject: [PATCH 1/2] [TTI] Remove Args argument from getOperandsScalarizationOverhead (NFC). Remove the ArrayRef Args operand from getOperandsScalarizationOverhead and require that the callers de-duplicate arguments and filter constant operands. Removing the Value * based Args argument enables callers where no Value * operands are available to use the function in a follow-up: computing the scalarization cost directly for a VPlan recipe. It also allows more accurate cost-estimates in the future: for example, when vectorizing a loop, we could also skip operands that are live-ins, as those also do not require scalarization. --- .../llvm/Analysis/TargetTransformInfo.h | 8 ++-- .../llvm/Analysis/TargetTransformInfoImpl.h | 3 +- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 46 +++++++++++-------- llvm/lib/Analysis/TargetTransformInfo.cpp | 5 +- .../Transforms/Vectorize/LoopVectorize.cpp | 15 ++++-- 5 files changed, 45 insertions(+), 32 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 1e03209e888bf..c4ba8e9857dc4 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -961,12 +961,10 @@ class TargetTransformInfo { TTI::TargetCostKind CostKind, bool ForPoisonSrc = true, ArrayRef VL = {}) const; - /// Estimate the overhead of scalarizing an instructions unique - /// non-constant operands. The (potentially vector) types to use for each of - /// argument are passes via Tys. + /// Estimate the overhead of scalarizing operands with the given types. The + /// (potentially vector) types to use for each of argument are passes via Tys. LLVM_ABI InstructionCost getOperandsScalarizationOverhead( - ArrayRef Args, ArrayRef Tys, - TTI::TargetCostKind CostKind) const; + ArrayRef Tys, TTI::TargetCostKind CostKind) const; /// If target has efficient vector element load/store instructions, it can /// return true here so that insertion/extraction costs are not added to diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 252acf381a8e1..43813d2f3acb5 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -459,8 +459,7 @@ class TargetTransformInfoImplBase { } virtual InstructionCost - getOperandsScalarizationOverhead(ArrayRef Args, - ArrayRef Tys, + getOperandsScalarizationOverhead(ArrayRef Tys, TTI::TargetCostKind CostKind) const { return 0; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 27320b510b950..6322cf493639a 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -18,6 +18,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LoopInfo.h" @@ -347,6 +348,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return Cost; } + /// Filter out constant and duplicated entries in \p Ops and return a vector + /// containing the corresponding types. + static SmallVector + filterConstantAndDuplicatedOperands(ArrayRef Ops, + ArrayRef Tys) { + SmallPtrSet UniqueOperands; + SmallVector FilteredTys; + for (const auto &[Op, Ty] : zip_equal(Ops, Tys)) { + if (isa(Op) || !UniqueOperands.insert(Op).second) + continue; + FilteredTys.push_back(Ty); + } + return FilteredTys; + } + protected: explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL) : BaseT(DL) {} @@ -935,29 +951,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { CostKind); } - /// Estimate the overhead of scalarizing an instructions unique - /// non-constant operands. The (potentially vector) types to use for each of + /// Estimate the overhead of scalarizing an instructions + /// operands. The (potentially vector) types to use for each of /// argument are passes via Tys. InstructionCost getOperandsScalarizationOverhead( - ArrayRef Args, ArrayRef Tys, - TTI::TargetCostKind CostKind) const override { - assert(Args.size() == Tys.size() && "Expected matching Args and Tys"); - + ArrayRef Tys, TTI::TargetCostKind CostKind) const override { InstructionCost Cost = 0; - SmallPtrSet UniqueOperands; - for (int I = 0, E = Args.size(); I != E; I++) { + for (Type *Ty : Tys) { // Disregard things like metadata arguments. - const Value *A = Args[I]; - Type *Ty = Tys[I]; if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy() && !Ty->isPtrOrPtrVectorTy()) continue; - if (!isa(A) && UniqueOperands.insert(A).second) { - if (auto *VecTy = dyn_cast(Ty)) - Cost += getScalarizationOverhead(VecTy, /*Insert*/ false, - /*Extract*/ true, CostKind); - } + if (auto *VecTy = dyn_cast(Ty)) + Cost += getScalarizationOverhead(VecTy, /*Insert*/ false, + /*Extract*/ true, CostKind); } return Cost; @@ -974,7 +982,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { InstructionCost Cost = getScalarizationOverhead( RetTy, /*Insert*/ true, /*Extract*/ false, CostKind); if (!Args.empty()) - Cost += getOperandsScalarizationOverhead(Args, Tys, CostKind); + Cost += getOperandsScalarizationOverhead( + filterConstantAndDuplicatedOperands(Args, Tys), CostKind); else // When no information on arguments is provided, we add the cost // associated with one argument as a heuristic. @@ -2170,8 +2179,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { /*Insert=*/true, /*Extract=*/false, CostKind); } } - ScalarizationCost += - getOperandsScalarizationOverhead(Args, ICA.getArgTypes(), CostKind); + ScalarizationCost += getOperandsScalarizationOverhead( + filterConstantAndDuplicatedOperands(Args, ICA.getArgTypes()), + CostKind); } IntrinsicCostAttributes Attrs(IID, RetTy, ICA.getArgTypes(), FMF, I, diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 323ab8b1ddad1..4ac8f03e6dbf5 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -637,9 +637,8 @@ InstructionCost TargetTransformInfo::getScalarizationOverhead( } InstructionCost TargetTransformInfo::getOperandsScalarizationOverhead( - ArrayRef Args, ArrayRef Tys, - TTI::TargetCostKind CostKind) const { - return TTIImpl->getOperandsScalarizationOverhead(Args, Tys, CostKind); + ArrayRef Tys, TTI::TargetCostKind CostKind) const { + return TTIImpl->getOperandsScalarizationOverhead(Tys, CostKind); } bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 5e7f6523cd86d..7fc87a0b49f70 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1697,8 +1697,16 @@ class LoopVectorizationCostModel { /// Returns a range containing only operands needing to be extracted. SmallVector filterExtractingOperands(Instruction::op_range Ops, ElementCount VF) const { - return SmallVector(make_filter_range( - Ops, [this, VF](Value *V) { return this->needsExtract(V, VF); })); + + SmallPtrSet UniqueOperands; + SmallVector Res; + for (Value *Op : Ops) { + if (isa(Op) || !UniqueOperands.insert(Op).second || + !needsExtract(Op, VF)) + continue; + Res.push_back(Op); + } + return Res; } public: @@ -5610,8 +5618,7 @@ LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I, SmallVector Tys; for (auto *V : filterExtractingOperands(Ops, VF)) Tys.push_back(maybeVectorizeType(V->getType(), VF)); - return Cost + TTI.getOperandsScalarizationOverhead( - filterExtractingOperands(Ops, VF), Tys, CostKind); + return Cost + TTI.getOperandsScalarizationOverhead(Tys, CostKind); } void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) { From bbb3e6e7fc0f0f80a623ed49d5b5ac1220e2b8c3 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 20 Aug 2025 20:36:28 +0100 Subject: [PATCH 2/2] !fixup update comments, thanks --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 6322cf493639a..0a10b51f97c63 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -349,7 +349,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { } /// Filter out constant and duplicated entries in \p Ops and return a vector - /// containing the corresponding types. + /// containing the types from \p Tys corresponding to the remaining operands. static SmallVector filterConstantAndDuplicatedOperands(ArrayRef Ops, ArrayRef Tys) { @@ -951,7 +951,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { CostKind); } - /// Estimate the overhead of scalarizing an instructions + /// Estimate the overhead of scalarizing an instruction's /// operands. The (potentially vector) types to use for each of /// argument are passes via Tys. InstructionCost getOperandsScalarizationOverhead(