-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[AArch64][GISel] Support SVE with 128-bit min-size for G_LOAD and G_STORE #92130
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
a61fb1a
135c91f
e2ec951
70b3f21
73a618b
b19d1b4
fb48ea1
9dedf0e
ad331f9
0c3cce3
4503074
22de2ad
3be060c
b5c72d7
2518b20
5a288d8
f1a4d7b
09570db
551fec4
5995c15
e53b252
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -652,17 +652,17 @@ bool GIMatchTableExecutor::executeMatchTable( | |
MachineMemOperand *MMO = | ||
*(State.MIs[InsnID]->memoperands_begin() + MMOIdx); | ||
|
||
unsigned Size = MRI.getType(MO.getReg()).getSizeInBits(); | ||
const auto Size = MRI.getType(MO.getReg()).getSizeInBits(); | ||
if (MatcherOpcode == GIM_CheckMemorySizeEqualToLLT && | ||
MMO->getSizeInBits().getValue() != Size) { | ||
MMO->getSizeInBits() != Size) { | ||
if (handleReject() == RejectAndGiveUp) | ||
return false; | ||
} else if (MatcherOpcode == GIM_CheckMemorySizeLessThanLLT && | ||
MMO->getSizeInBits().getValue() >= Size) { | ||
MMO->getSizeInBits().getValue() >= Size.getKnownMinValue()) { | ||
|
||
if (handleReject() == RejectAndGiveUp) | ||
return false; | ||
} else if (MatcherOpcode == GIM_CheckMemorySizeGreaterThanLLT && | ||
MMO->getSizeInBits().getValue() <= Size) | ||
MMO->getSizeInBits().getValue() <= Size.getKnownMinValue()) | ||
if (handleReject() == RejectAndGiveUp) | ||
return false; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1413,7 +1413,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { | |
|
||
bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { | ||
const StoreInst &SI = cast<StoreInst>(U); | ||
if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0) | ||
if (DL->getTypeStoreSize(SI.getValueOperand()->getType()).isZero()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems unrelated change. Isn't it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's needed, otherwise we implicitly cast a ScalableVectorTy into ScalarTy and will fail. TypeSize::operator TypeSize::ScalarTy() const {
if (isScalable()) {
reportInvalidSizeRequest(
"Cannot implicitly convert a scalable size to a fixed-width size in "
"`TypeSize::operator ScalarTy()`");
return getKnownMinValue();
}
return getFixedValue();
} |
||
return true; | ||
|
||
ArrayRef<Register> Vals = getOrCreateVRegs(*SI.getValueOperand()); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26375,12 +26375,20 @@ bool AArch64TargetLowering::shouldLocalize( | |
return TargetLoweringBase::shouldLocalize(MI, TTI); | ||
} | ||
|
||
static bool isScalableTySupported(const unsigned Op) { | ||
return Op == Instruction::Load || Op == Instruction::Store; | ||
} | ||
|
||
bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const { | ||
if (Inst.getType()->isScalableTy()) | ||
return true; | ||
const auto ScalableTySupported = isScalableTySupported(Inst.getOpcode()); | ||
|
||
// Fallback for scalable vectors | ||
if (Inst.getType()->isScalableTy() && !ScalableTySupported) { | ||
|
||
return true; | ||
} | ||
|
||
for (unsigned i = 0; i < Inst.getNumOperands(); ++i) | ||
if (Inst.getOperand(i)->getType()->isScalableTy()) | ||
if (Inst.getOperand(i)->getType()->isScalableTy() && !ScalableTySupported) | ||
Him188 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
return true; | ||
|
||
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,7 @@ | |
def GPRRegBank : RegisterBank<"GPR", [XSeqPairsClass]>; | ||
|
||
/// Floating Point/Vector Registers: B, H, S, D, Q. | ||
def FPRRegBank : RegisterBank<"FPR", [QQQQ]>; | ||
def FPRRegBank : RegisterBank<"FPR", [QQQQ, ZPR]>; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment needs update to include ZPR too. |
||
|
||
/// Conditional register: NZCV. | ||
def CCRegBank : RegisterBank<"CC", [CCR]>; |
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -901,6 +901,27 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
return GenericOpc; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
/// Select the AArch64 opcode for the G_LOAD or G_STORE operation for scalable | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
/// vectors. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
/// \p ElementSize size of the element of the scalable vector | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
static unsigned selectLoadStoreSVEOp(const unsigned GenericOpc, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc, | |
/// appropriate for the (value) register bank \p RegBankID and of memory access | |
/// size \p OpSize. This returns the variant with the base+unsigned-immediate | |
/// addressing mode (e.g., LDRXui). | |
/// \returns \p GenericOpc if the combination is unsupported. | |
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, | |
unsigned OpSize) { | |
const bool isStore = GenericOpc == TargetOpcode::G_STORE; | |
switch (RegBankID) { | |
case AArch64::GPRRegBankID: | |
switch (OpSize) { | |
case 8: | |
return isStore ? AArch64::STRBBui : AArch64::LDRBBui; | |
case 16: | |
return isStore ? AArch64::STRHHui : AArch64::LDRHHui; | |
case 32: | |
return isStore ? AArch64::STRWui : AArch64::LDRWui; | |
case 64: | |
return isStore ? AArch64::STRXui : AArch64::LDRXui; | |
} | |
break; | |
case AArch64::FPRRegBankID: | |
switch (OpSize) { | |
case 8: | |
return isStore ? AArch64::STRBui : AArch64::LDRBui; | |
case 16: | |
return isStore ? AArch64::STRHui : AArch64::LDRHui; | |
case 32: | |
return isStore ? AArch64::STRSui : AArch64::LDRSui; | |
case 64: | |
return isStore ? AArch64::STRDui : AArch64::LDRDui; | |
case 128: | |
return isStore ? AArch64::STRQui : AArch64::LDRQui; | |
} | |
break; | |
} | |
return GenericOpc; | |
} |
Currently we are already doing selectLoadStoreUIOp
manually. I'm following the same idea.
Making them select through TableGen would require more work and can be a separate patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did some investigation and found that the patch is actually using the existing tablegen-ed patterns in selectImpl
. The added instruction selector code is just skipped.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this code actually used? Or is the tablegen pattern already used? I'm not sure how the predicates work otherwise.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I reverted the entire file AArch64InstructionSelector.cpp because we can reuse tablegen patterns. So I think we don't need to change here.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Formatting.
Him188 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We will need truncate / zext / sext support eventually. Please format the entire patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should I add truncate / zext / sext in this patch?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That can be separate - one step at a time, and it would be good to get the basics in first :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please format the entire patch.
All formatting done
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LLVM prefers removing brackets from single statements.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,6 +61,79 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) | |
const LLT v2s64 = LLT::fixed_vector(2, 64); | ||
const LLT v2p0 = LLT::fixed_vector(2, p0); | ||
|
||
// Scalable vector sizes range from 128 to 2048 | ||
// Note that subtargets may not support the full range. | ||
// See [ScalableVecTypes] below. | ||
const LLT nxv16s8 = LLT::scalable_vector(16, s8); | ||
const LLT nxv32s8 = LLT::scalable_vector(32, s8); | ||
const LLT nxv64s8 = LLT::scalable_vector(64, s8); | ||
const LLT nxv128s8 = LLT::scalable_vector(128, s8); | ||
const LLT nxv256s8 = LLT::scalable_vector(256, s8); | ||
|
||
|
||
const LLT nxv8s16 = LLT::scalable_vector(8, s16); | ||
const LLT nxv16s16 = LLT::scalable_vector(16, s16); | ||
const LLT nxv32s16 = LLT::scalable_vector(32, s16); | ||
const LLT nxv64s16 = LLT::scalable_vector(64, s16); | ||
const LLT nxv128s16 = LLT::scalable_vector(128, s16); | ||
|
||
const LLT nxv4s32 = LLT::scalable_vector(4, s32); | ||
const LLT nxv8s32 = LLT::scalable_vector(8, s32); | ||
const LLT nxv16s32 = LLT::scalable_vector(16, s32); | ||
const LLT nxv32s32 = LLT::scalable_vector(32, s32); | ||
const LLT nxv64s32 = LLT::scalable_vector(64, s32); | ||
|
||
const LLT nxv2s64 = LLT::scalable_vector(2, s64); | ||
const LLT nxv4s64 = LLT::scalable_vector(4, s64); | ||
const LLT nxv8s64 = LLT::scalable_vector(8, s64); | ||
const LLT nxv16s64 = LLT::scalable_vector(16, s64); | ||
const LLT nxv32s64 = LLT::scalable_vector(32, s64); | ||
|
||
const LLT nxv2p0 = LLT::scalable_vector(2, p0); | ||
const LLT nxv4p0 = LLT::scalable_vector(4, p0); | ||
const LLT nxv8p0 = LLT::scalable_vector(8, p0); | ||
const LLT nxv16p0 = LLT::scalable_vector(16, p0); | ||
const LLT nxv32p0 = LLT::scalable_vector(32, p0); | ||
|
||
const auto ScalableVec128 = { | ||
nxv16s8, nxv8s16, nxv4s32, nxv2s64, nxv2p0, | ||
}; | ||
const auto ScalableVec256 = { | ||
nxv32s8, nxv16s16, nxv8s32, nxv4s64, nxv4p0, | ||
}; | ||
const auto ScalableVec512 = { | ||
nxv64s8, nxv32s16, nxv16s32, nxv8s64, nxv8p0, | ||
}; | ||
const auto ScalableVec1024 = { | ||
nxv128s8, nxv64s16, nxv32s32, nxv16s64, nxv16p0, | ||
}; | ||
const auto ScalableVec2048 = { | ||
nxv256s8, nxv128s16, nxv64s32, nxv32s64, nxv32p0, | ||
}; | ||
|
||
/// Scalable vector types supported by the sub target. | ||
/// Empty if SVE is not supported. | ||
SmallVector<LLT> ScalableVecTypes; | ||
|
||
if (ST.hasSVE()) { | ||
// Add scalable vector types that are supported by the subtarget | ||
const auto MinSize = ST.getMinSVEVectorSizeInBits(); | ||
|
||
auto MaxSize = ST.getMaxSVEVectorSizeInBits(); | ||
if (MaxSize == 0) { | ||
// Unknown max size, assume the target supports all sizes. | ||
MaxSize = 2048; | ||
} | ||
if (MinSize <= 128 && 128 <= MaxSize) | ||
ScalableVecTypes.append(ScalableVec128); | ||
if (MinSize <= 256 && 256 <= MaxSize) | ||
ScalableVecTypes.append(ScalableVec256); | ||
if (MinSize <= 512 && 512 <= MaxSize) | ||
ScalableVecTypes.append(ScalableVec512); | ||
if (MinSize <= 1024 && 1024 <= MaxSize) | ||
ScalableVecTypes.append(ScalableVec1024); | ||
if (MinSize <= 2048 && 2048 <= MaxSize) | ||
ScalableVecTypes.append(ScalableVec2048); | ||
} | ||
|
||
std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */ | ||
v16s8, v8s16, v4s32, | ||
v2s64, v2p0, | ||
|
@@ -329,6 +402,18 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) | |
return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0; | ||
}; | ||
|
||
const auto IsSameScalableVecTy = [=](const LegalityQuery &Query) { | ||
// Legal if loading a scalable vector type | ||
// into a scalable vector register of the exactly same type | ||
if (!Query.Types[0].isScalableVector() || Query.Types[1] != p0) | ||
return false; | ||
if (Query.MMODescrs[0].MemoryTy != Query.Types[0]) | ||
return false; | ||
if (Query.MMODescrs[0].AlignInBits < 128) | ||
return false; | ||
return is_contained(ScalableVecTypes, Query.Types[0]); | ||
}; | ||
|
||
getActionDefinitionsBuilder(G_LOAD) | ||
.customIf([=](const LegalityQuery &Query) { | ||
return HasRCPC3 && Query.Types[0] == s128 && | ||
|
@@ -354,6 +439,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) | |
// These extends are also legal | ||
.legalForTypesWithMemDesc( | ||
{{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}}) | ||
.legalIf(IsSameScalableVecTy) | ||
.widenScalarToNextPow2(0, /* MinSize = */ 8) | ||
.clampMaxNumElements(0, s8, 16) | ||
.clampMaxNumElements(0, s16, 8) | ||
|
@@ -398,7 +484,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) | |
{s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64 | ||
{p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8}, | ||
{v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8}, | ||
{v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}}) | ||
{v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}, | ||
}) | ||
|
||
.legalIf(IsSameScalableVecTy) | ||
.clampScalar(0, s8, s64) | ||
.lowerIf([=](const LegalityQuery &Query) { | ||
return Query.Types[0].isScalar() && | ||
|
@@ -440,8 +528,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) | |
{p0, v4s32, v4s32, 8}, | ||
{p0, v2s64, v2s64, 8}, | ||
{p0, v2p0, v2p0, 8}, | ||
{p0, s128, s128, 8}, | ||
}) | ||
{p0, s128, s128, 8}}) | ||
|
||
.unsupported(); | ||
|
||
auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -309,7 +309,7 @@ bool matchSplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI) { | |
if (!Store.isSimple()) | ||
return false; | ||
LLT ValTy = MRI.getType(Store.getValueReg()); | ||
if (!ValTy.isVector() || ValTy.getSizeInBits() != 128) | ||
if (!ValTy.isVector() || ValTy.getSizeInBits().getKnownMinValue() != 128) | ||
|
||
return false; | ||
if (Store.getMemSizeInBits() != ValTy.getSizeInBits()) | ||
return false; // Don't split truncating stores. | ||
|
@@ -657,8 +657,8 @@ bool AArch64PostLegalizerCombiner::optimizeConsecutiveMemOpAddressing( | |
Register PtrBaseReg; | ||
APInt Offset; | ||
LLT StoredValTy = MRI.getType(St->getValueReg()); | ||
unsigned ValSize = StoredValTy.getSizeInBits(); | ||
if (ValSize < 32 || St->getMMO().getSizeInBits() != ValSize) | ||
const auto ValSize = StoredValTy.getSizeInBits(); | ||
|
||
if (ValSize.getKnownMinValue() < 32 || St->getMMO().getSizeInBits() != ValSize) | ||
continue; | ||
|
||
Register PtrReg = St->getPointerReg(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -257,6 +257,7 @@ AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, | |
case AArch64::QQRegClassID: | ||
case AArch64::QQQRegClassID: | ||
case AArch64::QQQQRegClassID: | ||
case AArch64::ZPRRegClassID: | ||
return getRegBank(AArch64::FPRRegBankID); | ||
case AArch64::GPR32commonRegClassID: | ||
case AArch64::GPR32RegClassID: | ||
|
@@ -740,11 +741,14 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { | |
LLT Ty = MRI.getType(MO.getReg()); | ||
if (!Ty.isValid()) | ||
continue; | ||
OpSize[Idx] = Ty.getSizeInBits(); | ||
OpSize[Idx] = Ty.getSizeInBits().getKnownMinValue(); | ||
|
||
// As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs. | ||
// As a top-level guess, scalable vectors go in SVRs, non-scalable | ||
// vectors go in FPRs, scalars and pointers in GPRs. | ||
// For floating-point instructions, scalars go in FPRs. | ||
if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) || | ||
if (Ty.isScalableVector()) | ||
|
||
OpRegBankIdx[Idx] = PMI_FirstFPR; | ||
else if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) || | ||
|
||
Ty.getSizeInBits() > 64) | ||
OpRegBankIdx[Idx] = PMI_FirstFPR; | ||
else | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use TypeSize.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done