-
Notifications
You must be signed in to change notification settings - Fork 15.1k
AMDGPU: Start using RegClassByHwMode for wavesize operands #159884
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/arsenm/amdgpu/use-regclassbyhwmode-av-align-registers
Are you sure you want to change the base?
AMDGPU: Start using RegClassByHwMode for wavesize operands #159884
Conversation
Make sure we cannot be in a mode with both wavesizes. This prevents assertions in a future change. This should probably just be an error, but we do not have a good way to report errors from the MCSubtargetInfo constructor. This breaks the assembler test which enables both, but this behavior is not really useful. Maybe it's better to just delete the test.
This eliminates the pseudo registerclasses used to hack the wave register class, which are now replaced with RegClassByHwMode, so most of the diff is from register class ID renumbering.
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesThis eliminates the pseudo registerclasses used to hack the Patch is 214.10 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/159884.diff 43 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index c454154156ccf..dc4d1019fb6d3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1238,6 +1238,19 @@ def FeatureSetPrioIncWgInst : SubtargetFeature<"setprio-inc-wg-inst",
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
+// Ugly hack to accomodate an assembling modules with mixed
+// wavesizes. Ideally we would have a mapping symbol in assembly which
+// would keep track of which sections of code should be treated as
+// wave32 and wave64. Instead what users do is assemble with both
+// wavesizes enabled. We translate this into this special mode so this
+// only influences assembler behavior and nothing else.
+def FeatureAssemblerPermissiveWavesize : SubtargetFeature<
+ "assembler-permissive-wavesize",
+ "AssemblerPermissiveWavesize",
+ "true",
+ "allow parsing wave32 and wave64 variants of instructions"
+>;
+
class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
"max-private-element-size-"#size,
"MaxPrivateElementSize",
@@ -2932,15 +2945,46 @@ def HasSetPrioIncWgInst : Predicate<"Subtarget->hasSetPrioIncWgInst()">,
def NeedsAlignedVGPRs : Predicate<"Subtarget->needsAlignedVGPRs()">,
AssemblerPredicate<(all_of FeatureRequiresAlignedVGPRs)>;
+def NotNeedsAlignedVGPRs : Predicate<"!Subtarget->needsAlignedVGPRs()">,
+ AssemblerPredicate<(all_of (not FeatureRequiresAlignedVGPRs))>;
+
+def isWave32 : Predicate<"Subtarget->isWave32()">,
+ AssemblerPredicate <(any_of FeatureWavefrontSize32,
+ FeatureAssemblerPermissiveWavesize)>;
+def isWave64 : Predicate<"Subtarget->isWave64()">,
+ AssemblerPredicate <(any_of FeatureWavefrontSize64,
+ FeatureAssemblerPermissiveWavesize)>;
+
+def isWave32Strict : Predicate<"Subtarget->isWave32()">,
+ AssemblerPredicate <(all_of FeatureWavefrontSize32)>;
+def isWave64Strict : Predicate<"Subtarget->isWave64()">,
+ AssemblerPredicate <(all_of FeatureWavefrontSize64)>;
+
//===----------------------------------------------------------------------===//
// HwModes
//===----------------------------------------------------------------------===//
-// gfx90a-gfx950. Has AGPRs, and also the align2 VGPR/AGPR requirement
+defvar DefaultMode_Wave64 = DefaultMode;
+defvar DefaultMode_Wave32 = HwMode<[isWave32, NotNeedsAlignedVGPRs]>;
+
+// gfx90a-gfx950. Has AGPRs, and also the align2 VGPR/AGPR requirement. Implied
+// wave64.
def AVAlign2LoadStoreMode : HwMode<[HasMAIInsts, NeedsAlignedVGPRs]>;
// gfx1250, has alignment requirement but no AGPRs.
-def AlignedVGPRNoAGPRMode : HwMode<[NotHasMAIInsts, NeedsAlignedVGPRs]>;
+def AlignedVGPRNoAGPRMode_Wave32 : HwMode<[NotHasMAIInsts, NeedsAlignedVGPRs, isWave32Strict]>;
+def AlignedVGPRNoAGPRMode_Wave64 : HwMode<[NotHasMAIInsts, NeedsAlignedVGPRs, isWave64Strict]>;
+
+// FIXME: This should be able to only define a separate hwmode that
+// only depends on wavesize for just ValueTypes. These use different
+// HwMode namespaces. If we don't define the full set of modes used
+// for RegClassByHwMode, tablegen crashes for some reason
+def WaveSizeVT : ValueTypeByHwMode<[
+ DefaultMode_Wave64,
+ AVAlign2LoadStoreMode,
+ AlignedVGPRNoAGPRMode_Wave64,
+ DefaultMode_Wave32,
+ AlignedVGPRNoAGPRMode_Wave32], [i64, i64, i64, i32, i32]>;
// Include AMDGPU TD files
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 848d9a5a9eb98..37d4844002fd7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -287,9 +287,6 @@ unsigned AMDGPURegisterBankInfo::getBreakDownCost(
const RegisterBank &
AMDGPURegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
LLT Ty) const {
- if (&RC == &AMDGPU::SReg_1RegClass)
- return AMDGPU::VCCRegBank;
-
// We promote real scalar booleans to SReg_32. Any SGPR using s1 is really a
// VCC-like use.
if (TRI->isSGPRClass(&RC)) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
index 1c1a6dac75a17..c37d3096afd3e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
@@ -15,7 +15,7 @@ def VGPRRegBank : RegisterBank<"VGPR",
>;
// It is helpful to distinguish conditions from ordinary SGPRs.
-def VCCRegBank : RegisterBank <"VCC", [SReg_1]>;
+def VCCRegBank : RegisterBank<"VCC", [SReg_32, SReg_64]>;
def AGPRRegBank : RegisterBank <"AGPR",
[AGPR_LO16, AGPR_32, AReg_64, AReg_96, AReg_128, AReg_160, AReg_192, AReg_224, AReg_256, AReg_288, AReg_320, AReg_352, AReg_384, AReg_512, AReg_1024]
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index be265b56e7496..297991154b884 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1247,6 +1247,12 @@ raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
// AsmParser
//===----------------------------------------------------------------------===//
+// TODO: define GET_SUBTARGET_FEATURE_NAME
+#define GET_REGISTER_MATCHER
+#include "AMDGPUGenAsmMatcher.inc"
+#undef GET_REGISTER_MATCHER
+#undef GET_SUBTARGET_FEATURE_NAME
+
// Holds info related to the current kernel, e.g. count of SGPRs used.
// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
// .amdgpu_hsa_kernel or at EOF.
@@ -1545,6 +1551,10 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
return AMDGPU::isGFX10_BEncoding(getSTI());
}
+ bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
+
+ bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
+
bool hasInv2PiInlineImm() const {
return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
}
@@ -1608,6 +1618,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
return &MII;
}
+ // FIXME: This should not be used. Instead, should use queries derived from
+ // getAvailableFeatures().
const FeatureBitset &getFeatureBits() const {
return getSTI().getFeatureBits();
}
@@ -2264,9 +2276,8 @@ bool AMDGPUOperand::isSDWAInt32Operand() const {
}
bool AMDGPUOperand::isBoolReg() const {
- auto FB = AsmParser->getFeatureBits();
- return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
- (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
+ return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
+ (AsmParser->isWave32() && isSCSrc_b32()));
}
uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
@@ -4984,9 +4995,8 @@ bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
// Check if VCC register matches wavefront size
bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
- auto FB = getFeatureBits();
- return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
- (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
+ return (Reg == AMDGPU::VCC && isWave64()) ||
+ (Reg == AMDGPU::VCC_LO && isWave32());
}
// One unique literal can be used. VOP3 literal is only allowed in GFX10+
@@ -5671,7 +5681,7 @@ bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
// Check if this instruction may be used with a different wavesize.
if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
-
+ // FIXME: Use getAvailableFeatures, and do not manually recompute
FeatureBitset FeaturesWS32 = getFeatureBits();
FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
.flip(AMDGPU::FeatureWavefrontSize32);
@@ -6426,10 +6436,10 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
if (!isGFX10Plus())
return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
- if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
+ if (!isWave32())
return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
} else {
- if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
+ if (!isWave64())
return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
}
}
@@ -6438,10 +6448,10 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
if (C.wavefront_size == 5) {
if (!isGFX10Plus())
return TokError("wavefront_size=5 is only allowed on GFX10+");
- if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
+ if (!isWave32())
return TokError("wavefront_size=5 requires +WavefrontSize32");
} else if (C.wavefront_size == 6) {
- if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
+ if (!isWave64())
return TokError("wavefront_size=6 requires +WavefrontSize64");
}
}
@@ -10344,7 +10354,6 @@ LLVMInitializeAMDGPUAsmParser() {
RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
}
-#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
#define GET_MNEMONIC_SPELL_CHECKER
#define GET_MNEMONIC_CHECKER
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 920a47b5afe07..02417a8877b0e 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -99,6 +99,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool EnableDS128 = false;
bool EnablePRTStrictNull = false;
bool DumpCode = false;
+ bool AssemblerPermissiveWavesize = false;
// Subtarget statically properties set by tablegen
bool FP64 = false;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 7ee2eb24131c0..d13926d29b986 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -789,14 +789,24 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
// Intention: print disassembler message when invalid code is decoded,
// for example sgpr register used in VReg or VISrc(VReg or imm) operand.
const MCOperandInfo &OpInfo = Desc.operands()[OpNo];
- int16_t RCID = MII.getOpRegClassID(
- OpInfo, STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo));
- if (RCID != -1) {
+ if (OpInfo.RegClass != -1) {
+ int16_t RCID = MII.getOpRegClassID(
+ OpInfo, STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo));
const MCRegisterClass &RC = MRI.getRegClass(RCID);
auto Reg = mc2PseudoReg(Op.getReg());
if (!RC.contains(Reg) && !isInlineValue(Reg)) {
- O << "/*Invalid register, operand has \'" << MRI.getRegClassName(&RC)
- << "\' register class*/";
+ bool IsWaveSizeOp = OpInfo.isLookupRegClassByHwMode() &&
+ (OpInfo.RegClass == AMDGPU::SReg_1 ||
+ OpInfo.RegClass == AMDGPU::SReg_1_XEXEC);
+ // Suppress this comment for a mismatched wavesize. Some users expect to
+ // be able to assemble and disassemble modules with mixed wavesizes, but
+ // we do not know the subtarget in different functions in MC.
+ //
+ // TODO: Should probably print it anyway, maybe a more specific version.
+ if (!IsWaveSizeOp) {
+ O << "/*Invalid register, operand has \'" << MRI.getRegClassName(&RC)
+ << "\' register class*/";
+ }
}
}
} else if (Op.isImm()) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index f2e2d0ed3f8a6..281a65799bdb2 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -82,20 +82,36 @@ createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
MCSubtargetInfo *STI =
createAMDGPUMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+ bool IsWave64 = STI->hasFeature(AMDGPU::FeatureWavefrontSize64);
+ bool IsWave32 = STI->hasFeature(AMDGPU::FeatureWavefrontSize32);
+
// FIXME: We should error for the default target.
if (STI->getFeatureBits().none())
STI->ToggleFeature(AMDGPU::FeatureSouthernIslands);
- if (!STI->hasFeature(AMDGPU::FeatureWavefrontSize64) &&
- !STI->hasFeature(AMDGPU::FeatureWavefrontSize32)) {
+ if (!IsWave64 && !IsWave32) {
// If there is no default wave size it must be a generation before gfx10,
// these have FeatureWavefrontSize64 in their definition already. For gfx10+
// set wave32 as a default.
STI->ToggleFeature(AMDGPU::isGFX10Plus(*STI)
? AMDGPU::FeatureWavefrontSize32
: AMDGPU::FeatureWavefrontSize64);
+ } else if (IsWave64 && IsWave32) {
+ // The wave size is mutually exclusive. If both somehow end up set, wave64
+ // wins if supported.
+ STI->ToggleFeature(AMDGPU::supportsWave32(*STI)
+ ? AMDGPU::FeatureWavefrontSize64
+ : AMDGPU::FeatureWavefrontSize32);
+
+ // If both wavesizes were manually requested, hack in a feature to permit
+ // assembling modules with mixed wavesizes.
+ STI->ToggleFeature(AMDGPU::FeatureAssemblerPermissiveWavesize);
}
+ assert((STI->hasFeature(AMDGPU::FeatureWavefrontSize64) !=
+ STI->hasFeature(AMDGPU::FeatureWavefrontSize32)) &&
+ "wavesize features are mutually exclusive");
+
return STI;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 1eae0de6f1b98..d25977c9f1b88 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -6,11 +6,6 @@
//
//===----------------------------------------------------------------------===//
-def isWave32 : Predicate<"Subtarget->isWave32()">,
- AssemblerPredicate <(all_of FeatureWavefrontSize32)>;
-def isWave64 : Predicate<"Subtarget->isWave64()">,
- AssemblerPredicate <(all_of FeatureWavefrontSize64)>;
-
class AMDGPUMnemonicAlias<string From, string To, string VariantName = "">
: MnemonicAlias<From, To, VariantName>, PredicateControl;
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 96131bd591a17..9b710013a09ce 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -75,7 +75,7 @@ Vreg1LoweringHelper::Vreg1LoweringHelper(MachineFunction *MF,
bool Vreg1LoweringHelper::cleanConstrainRegs(bool Changed) {
assert(Changed || ConstrainRegs.empty());
for (Register Reg : ConstrainRegs)
- MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass);
+ MRI->constrainRegClass(Reg, TII->getRegisterInfo().getWaveMaskRegClass());
ConstrainRegs.clear();
return Changed;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index c69b00accdb73..b019c98f15ee2 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -3897,13 +3897,10 @@ const TargetRegisterClass *SIRegisterInfo::getVGPR64Class() const {
: &AMDGPU::VReg_64RegClass;
}
+// FIXME: This should be deleted
const TargetRegisterClass *
SIRegisterInfo::getRegClass(unsigned RCID) const {
switch ((int)RCID) {
- case AMDGPU::SReg_1RegClassID:
- return getBoolRC();
- case AMDGPU::SReg_1_XEXECRegClassID:
- return getWaveMaskRegClass();
case -1:
return nullptr;
default:
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 784ce1def457d..01dc1e3307387 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -915,20 +915,6 @@ def SReg_64_Encodable : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v
let Size = 64;
}
-def SReg_1_XEXEC : SIRegisterClass<"AMDGPU", [i1], 32,
- (add SReg_64_XEXEC, SReg_32_XEXEC)> {
- let CopyCost = 1;
- let isAllocatable = 0;
- let HasSGPR = 1;
-}
-
-def SReg_1 : SIRegisterClass<"AMDGPU", [i1], 32,
- (add SReg_1_XEXEC, EXEC, EXEC_LO, EXEC_HI)> {
- let CopyCost = 1;
- let isAllocatable = 0;
- let HasSGPR = 1;
-}
-
multiclass SRegClass<int numRegs,
list<ValueType> regTypes,
SIRegisterTuples regList,
@@ -1208,6 +1194,34 @@ defm AV_512 : AVRegClass<16, VReg_512.RegTypes, (add VGPR_512), (add AGPR_512)>;
defm AV_1024 : AVRegClass<32, VReg_1024.RegTypes, (add VGPR_1024), (add AGPR_1024)>;
}
+def SReg_1_XEXEC : SIRegisterClassLike<0, false, false, true>,
+ RegClassByHwMode<
+ [DefaultMode_Wave64,
+ AlignedVGPRNoAGPRMode_Wave64,
+ AVAlign2LoadStoreMode,
+ DefaultMode_Wave32,
+ AlignedVGPRNoAGPRMode_Wave32],
+ [SReg_64_XEXEC,
+ SReg_64_XEXEC,
+ SReg_64_XEXEC,
+ SReg_32_XM0_XEXEC, // FIXME: Why do the wave32 cases exclude m0?
+ SReg_32_XM0_XEXEC]
+>;
+
+def SReg_1 : SIRegisterClassLike<0, false, false, true>,
+ RegClassByHwMode<
+ [DefaultMode_Wave64,
+ AlignedVGPRNoAGPRMode_Wave64,
+ AVAlign2LoadStoreMode,
+ DefaultMode_Wave32,
+ AlignedVGPRNoAGPRMode_Wave32],
+ [SReg_64,
+ SReg_64,
+ SReg_64,
+ SReg_32,
+ SReg_32]
+>;
+
//===----------------------------------------------------------------------===//
//
// AlignTarget classes. Artifical classes to swap between
@@ -1215,17 +1229,36 @@ defm AV_1024 : AVRegClass<32, VReg_1024.RegTypes, (add VGPR_1024), (add AGPR_102
//
//===----------------------------------------------------------------------===//
+// We have 3 orthogonal properties to consider. Unfortunately we need
+// to define the cross product of these states, minus unused
+// combinations.
+
def AV_LdSt_32_Target : RegClassByHwMode<
- [DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
- [VGPR_32, AV_32, VGPR_32]>, SIRegisterClassLike<32, true, true> {
+ [DefaultMode_Wave64,
+ DefaultMode_Wave32,
+ AVAlign2LoadStoreMode,
+ AlignedVGPRNoAGPRMode_Wave64,
+ AlignedVGPRNoAGPRMode_Wave32],
+ [VGPR_32,
+ VGPR_32,
+ AV_32,
+ VGPR_32,
+ VGPR_32]>,
+ SIRegisterClassLike<32, true, true> {
let DecoderMethod = "decodeAVLdSt";
}
foreach RegSize = [ 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 512, 1024 ] in {
def VReg_#RegSize#_AlignTarget : SIRegisterClassLike<RegSize, true>,
RegClassByHwMode<
- [DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
+ [DefaultMode_Wave64,
+ DefaultMode_Wave32,
+ AVAlign2LoadStoreMode,
+ AlignedVGPRNoAGPRMode_Wave64,
+ AlignedVGPRNoAGPRMode_Wave32],
[!cast<RegisterClass>("VReg_"#RegSize),
+ !cast<RegisterClass>("VReg_"#RegSize),
+ !cast<RegisterClass>("VReg_"#RegSize#_Align2),
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
!cast<RegisterClass>("VReg_"#RegSize#_Align2)]> {
let DecoderMethod = "DecodeVReg_"#RegSize#"RegisterClass";
@@ -1233,45 +1266,59 @@ foreach RegSize = [ 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 512, 10
def AReg_#RegSize#_AlignTarget : SIRegisterClassLike<RegSize, false, true>,
RegClassByHwMode<
- [DefaultMode, AVAlign2LoadStoreMode, /*Unused combination*/],
+ [DefaultMode_Wave64, /*unused combination*/ AVAlign2LoadStoreMode, /*Unused combination*/ /*Unused combination*/],
[!cast<RegisterClass>("AReg_"#RegSize),
+ /*unused combination*/
!cast<RegisterClass>("AReg_"#RegSize#_Align2)
+ /*Unused combination*/
/*Unused combination*/]> {
let DecoderMethod = "DecodeAReg_"#RegSize#"RegisterClass";
}
def AV_#RegSize#_AlignTarget : SIRegisterClassLike<RegSize, true, true>,
RegClassByHwMode<
- [DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
+ [DefaultMode_Wave32,
+ DefaultMode_Wave64,
+ AVAlign2LoadStoreMode,
+ AlignedVGPRNoAGPRMode_Wave64,
+ AlignedVGPRNoAGPRMode_Wa...
[truncated]
|
Depends on / includes #159234 |
// The wave size is mutually exclusive. If both somehow end up set, wave64 | ||
// wins if supported. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But from the code below it looks like wave32 wins if supported?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is from the other PR
This eliminates the pseudo registerclasses used to hack the
wave register class, which are now replaced with RegClassByHwMode,
so most of the diff is from register class ID renumbering.