-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[TTI] Support scalable offsets in getScalingFactorCost #88113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[TTI] Support scalable offsets in getScalingFactorCost #88113
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-analysis Author: Graham Hunter (huntergr-arm) ChangesPart of the work to support vscale-relative immediates in LSR. No tests added yet, but I'd like feedback on the approach. Full diff: https://github.com/llvm/llvm-project/pull/88113.diff 10 Files Affected:
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index fa9392b86c15b9..4c6b8e312786cc 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -835,8 +835,8 @@ class TargetTransformInfo {
/// TODO: Handle pre/postinc as well.
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale,
- unsigned AddrSpace = 0) const;
+ int64_t Scale, unsigned AddrSpace = 0,
+ int64_t ScalableOffset = 0) const;
/// Return true if the loop strength reduce pass should make
/// Instruction* based TTI queries to isLegalAddressingMode(). This is
@@ -1894,7 +1894,8 @@ class TargetTransformInfo::Concept {
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
- unsigned AddrSpace) = 0;
+ unsigned AddrSpace,
+ int64_t ScalableOffset) = 0;
virtual bool LSRWithInstrQueries() = 0;
virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
virtual bool isProfitableToHoist(Instruction *I) = 0;
@@ -2406,10 +2407,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
}
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale,
- unsigned AddrSpace) override {
+ int64_t Scale, unsigned AddrSpace,
+ int64_t ScalableOffset) override {
return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
- AddrSpace);
+ AddrSpace, ScalableOffset);
}
bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
bool isTruncateFree(Type *Ty1, Type *Ty2) override {
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 63c2ef8912b29c..72c7b805abbb67 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -327,11 +327,11 @@ class TargetTransformInfoImplBase {
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale,
- unsigned AddrSpace) const {
+ int64_t Scale, unsigned AddrSpace,
+ int64_t ScalableOffset) const {
// Guess that all legal addressing mode are free.
if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
- AddrSpace))
+ AddrSpace, /*I=*/nullptr, ScalableOffset))
return 0;
return -1;
}
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 42d8f74fd427fb..7f42e239d85d96 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -406,12 +406,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale, unsigned AddrSpace) {
+ int64_t Scale, unsigned AddrSpace,
+ int64_t ScalableOffset) {
TargetLoweringBase::AddrMode AM;
AM.BaseGV = BaseGV;
AM.BaseOffs = BaseOffset;
AM.HasBaseReg = HasBaseReg;
AM.Scale = Scale;
+ AM.ScalableOffset = ScalableOffset;
if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace))
return 0;
return -1;
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 5f933b4587843c..d00ab62bad9fad 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -532,9 +532,9 @@ bool TargetTransformInfo::prefersVectorizedAddressing() const {
InstructionCost TargetTransformInfo::getScalingFactorCost(
Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale, unsigned AddrSpace) const {
+ int64_t Scale, unsigned AddrSpace, int64_t ScalableOffset) const {
InstructionCost Cost = TTIImpl->getScalingFactorCost(
- Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace);
+ Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace, ScalableOffset);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index ee7137b92445bb..2b75f0ea2d4d6f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4118,10 +4118,9 @@ bool AArch64TTIImpl::preferPredicateOverEpilogue(TailFoldingInfo *TFI) {
return NumInsns >= SVETailFoldInsnThreshold;
}
-InstructionCost
-AArch64TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
- int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale, unsigned AddrSpace) const {
+InstructionCost AArch64TTIImpl::getScalingFactorCost(
+ Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
+ int64_t Scale, unsigned AddrSpace, int64_t ScalableOffset) const {
// Scaling factors are not free at all.
// Operands | Rt Latency
// -------------------------------------------
@@ -4134,6 +4133,7 @@ AArch64TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
AM.BaseOffs = BaseOffset;
AM.HasBaseReg = HasBaseReg;
AM.Scale = Scale;
+ AM.ScalableOffset = ScalableOffset;
if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace))
// Scale represents reg2 * scale, thus account for 1 if
// it is not equal to 0 or 1.
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index de39dea2be43e1..0f7315446c70d4 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -407,7 +407,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
/// If the AM is not supported, it returns a negative value.
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale, unsigned AddrSpace) const;
+ int64_t Scale, unsigned AddrSpace,
+ int64_t ScalableOffset) const;
/// @}
bool enableSelectOptimize() { return ST->enableSelectOptimize(); }
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 3be894ad3bef2c..73e47fbea23057 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -2572,12 +2572,14 @@ bool ARMTTIImpl::preferPredicatedReductionSelect(
InstructionCost ARMTTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
- unsigned AddrSpace) const {
+ unsigned AddrSpace,
+ int64_t ScalableOffset) const {
TargetLoweringBase::AddrMode AM;
AM.BaseGV = BaseGV;
AM.BaseOffs = BaseOffset;
AM.HasBaseReg = HasBaseReg;
AM.Scale = Scale;
+ AM.ScalableOffset = ScalableOffset;
if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace)) {
if (ST->hasFPAO())
return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index bb4b321b530091..10e4b2977a563a 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -303,7 +303,8 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
/// If the AM is not supported, the return value must be negative.
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale, unsigned AddrSpace) const;
+ int64_t Scale, unsigned AddrSpace,
+ int64_t ScalableOffset) const;
bool maybeLoweredToCall(Instruction &I);
bool isLoweredToCall(const Function *F);
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 5d1810b5bc2c6f..0cfa1da2ce7d78 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -6670,7 +6670,8 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
InstructionCost X86TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
- unsigned AddrSpace) const {
+ unsigned AddrSpace,
+ int64_t ScalableOffset) const {
// Scaling factors are not free at all.
// An indexed folded instruction, i.e., inst (reg1, reg2, scale),
// will take 2 allocations in the out of order engine instead of 1
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 985b00438ce878..060b2b98b341da 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -253,7 +253,8 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
/// If the AM is not supported, it returns a negative value.
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale, unsigned AddrSpace) const;
+ int64_t Scale, unsigned AddrSpace,
+ int64_t ScalableOffset) const;
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
const TargetTransformInfo::LSRCost &C2);
|
While I appreciate it comes with a significant refactoring cost my initial reaction is these interfaces should take |
58204d6
to
2a61c4d
Compare
Rebased, switched to StackOffset. There's a potential problem with different approaches here -- we're using StackOffset here now, but two scalars in isLegalAddressingMode, and we have two different functions for isLegalAddImmediate. I think we should be more consistent, though I don't have a preference on which one to go with. Another possibility is to significantly change the interface of this function, to only take the Scale argument. This interface is only used in LSR, and basically acts as a wrapper around isLegalAddressingMode -- it just returns an InstructionCost based on the Scale if the addressing mode is legal, and asserts otherwise (the base dispatcher asserts if the Cost is negative, and the specialized versions return -1 if the mode was not legal). So you already need to know that the addressing mode is valid, and you just want to know any additional cost of a scaled register. |
@@ -834,7 +834,7 @@ class TargetTransformInfo { | |||
/// If the AM is not supported, it returns a negative value. | |||
/// TODO: Handle pre/postinc as well. | |||
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, | |||
int64_t BaseOffset, bool HasBaseReg, | |||
StackOffset BaseOffset, bool HasBaseReg, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems we now have three approaches for passing a 'scalable' and a 'fixed-length' offset to interfaces for this:
- One using
StackOffset
(this patch forgetScalingFactorCost
) - One passing in two separate variables (
isLegalAddressingMode
) - Another one with two separate interfaces (
isLegalAddImmediate
vsisLegalAddScalableImmediate
)
It would be good to settle on a single approach. @paulwalker-arm is there a particular reason you're pushing for (1) for this interface?
If we go for (1), I think we should rename StackOffset
to MemOffset
to make it more generic, because in this instance the offset does not necessarily apply only to stack addresses.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I made my request because the function was changed to take both a fixed-length and a scalar offset, which is exactly why StackOffset
exists (albeit, as you say, with a name that's now out of date). I think all offset related common code interfaces should be unified to accept the fact they can be fixed or scaled and thus use whichever of the TypeSize.h family of types that best fits the specific need. I'll note we are missing a generic type like TypeSize
but signed, to make this truly possible.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FWIW, my original patches did have a new type like that -- I originally called it AddressOffset, then TargetImmediate. I got some feedback that separating the terms out might be better, so we ended up with different int64_ts.
If we end up with an address formula with both fixed and scalable immediate terms, we'll have to prioritize one over the for a given target other due to the way LSR currently forms the Uses it will evaluate. AFAIK nobody implements instructions with mixed fixed and scalable immediates, so one or the other immediate would need to become part of the base for that formula.
Hi. An AddrMode should be thought of as a formula - a sum of factors that make up the addressing mode.
IMO the BaseOffset and ScalableOffset are separate quantities that should not be conflated. This isn't like TypeSize where it needs to be one value that is fixed or scalable, it is two values that can be treated separately. (Which has the added benefit that all the backend that don't care about scalable vectors can keep not caring about them, as ScalableOffset will always be 0 and they can treat BaseOffs as a simple integer). #88124 has quite a lot going on though, it is not obvious to me what this would mean for LSR to treat BaseOffset and ScalableOffset separately throughout? |
I agree, but my formula is "BaseGV + StackOffset + BaseReg + Scale*ScaleReg", which results in common code that matches the other instances where an offset has fixed and scalable parts. StackOffset is not like TypeSize being it represents both fixed and scalable offsets independently and thus any backend can simply ignore the scalable side the same as they'd ignore ScalableOffset. I'm not hugely familiar with this code so I don't know if we truly need such independence, but if we don't then I'd still prefer the use of a common type rather than say, passing an int and bool separately. I just see no reason to treat scalable vectors as a second class citizen. |
AM.HasBaseReg = HasBaseReg; | ||
AM.Scale = Scale; | ||
assert(!BaseOffset.getScalable() && "Scalable offsets unsupported"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure this assert is necessary. I'd just assign AM.ScalableOffset
as normal. It's really up to the users of AM
to assert ScalableOffset
is zero. This is essentially what would need to happen is AM
used the correct types to start within. In general though I'd expect most target to simply ignore the ScalableOffset
field.
AM.HasBaseReg = HasBaseReg; | ||
AM.Scale = Scale; | ||
assert(!BaseOffset.getScalable() && "Scalable offsets unsupported"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As above, this is the responsibility of the users of AM
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Based on the current definition of TargetLoweringBase::AddrMode
I'm happy with this change. As Sander points out, there's now multiple ways to represent the same thing, which is messy and so I'd love to see some follow on work to make TargetLoweringBase::AddrMode
use the correct types, be that StackOffset
if the current requirement of BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*vscale
is what we want, or something else if the actual goal is for BaseOffs
to be fixed OR scalable.
@@ -32,6 +32,8 @@ class Function; | |||
/// Base class for use as a mix-in that aids implementing | |||
/// a TargetTransformInfo-compatible class. | |||
class TargetTransformInfoImplBase { | |||
friend class TargetTransformInfo; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just wondered why this is needed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's a leftover from a previous attempt at making an overloaded method to call the existing fixed-only method from the base implementation if there wasn't an override of the newer method. It didn't work out, so I've removed it. Thanks for spotting it.
Part of the work to support vscale-relative immediates in LSR.
No tests added yet, but I'd like feedback on the approach.