Skip to content

Commit 7b3da7b

Browse files
author
Thorsten Schütt
authored
[GlobalISel][AArch64] Legalize G_ADD, G_SUB, G_AND, G_OR, and G_XOR for SVE (#110561)
Credits: #72976 LLVM ERROR: cannot select: %3:zpr(<vscale x 2 x s64>) = G_MUL %0:fpr, %1:fpr (in function: xmulnxv2i64) ;; mul define void @xmulnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) { entry: %c = mul <vscale x 2 x i64> %a, %b store <vscale x 2 x i64> %c, ptr %p, align 16 ret void } define void @mulnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) { entry: %c = mul <vscale x 4 x i32> %a, %b store <vscale x 4 x i32> %c, ptr %p, align 16 ret void } define void @mulnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) { entry: %c = mul <vscale x 8 x i16> %a, %b store <vscale x 8 x i16> %c, ptr %p, align 16 ret void } define void @mulnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) { entry: %c = mul <vscale x 16 x i8> %a, %b store <vscale x 16 x i8> %c, ptr %p, align 16 ret void }
1 parent 5621929 commit 7b3da7b

File tree

7 files changed

+257
-10
lines changed

7 files changed

+257
-10
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -998,8 +998,7 @@ class LegalizeRuleSet {
998998
LegalizeAction::WidenScalar,
999999
[=](const LegalityQuery &Query) {
10001000
const LLT VecTy = Query.Types[TypeIdx];
1001-
return VecTy.isVector() && !VecTy.isScalable() &&
1002-
VecTy.getSizeInBits() < VectorSize;
1001+
return VecTy.isFixedVector() && VecTy.getSizeInBits() < VectorSize;
10031002
},
10041003
[=](const LegalityQuery &Query) {
10051004
const LLT VecTy = Query.Types[TypeIdx];
@@ -1172,7 +1171,7 @@ class LegalizeRuleSet {
11721171
LegalizeAction::MoreElements,
11731172
[=](const LegalityQuery &Query) {
11741173
LLT VecTy = Query.Types[TypeIdx];
1175-
return VecTy.isVector() && VecTy.getElementType() == EltTy &&
1174+
return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
11761175
VecTy.getNumElements() < MinElements;
11771176
},
11781177
[=](const LegalityQuery &Query) {
@@ -1190,7 +1189,7 @@ class LegalizeRuleSet {
11901189
LegalizeAction::MoreElements,
11911190
[=](const LegalityQuery &Query) {
11921191
LLT VecTy = Query.Types[TypeIdx];
1193-
return VecTy.isVector() && VecTy.getElementType() == EltTy &&
1192+
return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
11941193
(VecTy.getNumElements() % NumElts != 0);
11951194
},
11961195
[=](const LegalityQuery &Query) {
@@ -1210,7 +1209,7 @@ class LegalizeRuleSet {
12101209
LegalizeAction::FewerElements,
12111210
[=](const LegalityQuery &Query) {
12121211
LLT VecTy = Query.Types[TypeIdx];
1213-
return VecTy.isVector() && VecTy.getElementType() == EltTy &&
1212+
return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
12141213
VecTy.getNumElements() > MaxElements;
12151214
},
12161215
[=](const LegalityQuery &Query) {
@@ -1231,6 +1230,9 @@ class LegalizeRuleSet {
12311230
assert(MinTy.getElementType() == MaxTy.getElementType() &&
12321231
"Expected element types to agree");
12331232

1233+
assert((!MinTy.isScalableVector() && !MaxTy.isScalableVector()) &&
1234+
"Unexpected scalable vectors");
1235+
12341236
const LLT EltTy = MinTy.getElementType();
12351237
return clampMinNumElements(TypeIdx, EltTy, MinTy.getNumElements())
12361238
.clampMaxNumElements(TypeIdx, EltTy, MaxTy.getNumElements());

llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,8 @@ unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx,
183183
const unsigned MinSize = Size.getKnownMinValue();
184184
assert((!Size.isScalable() || MinSize >= 128) &&
185185
"Scalable vector types should have size of at least 128 bits");
186+
if (Size.isScalable())
187+
return 3;
186188
if (MinSize <= 16)
187189
return 0;
188190
if (MinSize <= 32)

llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -393,8 +393,8 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
393393
// i1 is a special case because SDAG i1 true is naturally zero extended
394394
// when widened using ANYEXT. We need to do it explicitly here.
395395
auto &Flags = CurArgInfo.Flags[0];
396-
if (MRI.getType(CurVReg).getSizeInBits() == 1 && !Flags.isSExt() &&
397-
!Flags.isZExt()) {
396+
if (MRI.getType(CurVReg).getSizeInBits() == TypeSize::getFixed(1) &&
397+
!Flags.isSExt() && !Flags.isZExt()) {
398398
CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
399399
} else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) ==
400400
1) {

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,7 @@ getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
615615
unsigned RegBankID = RB.getID();
616616

617617
if (RegBankID == AArch64::GPRRegBankID) {
618+
assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
618619
if (SizeInBits <= 32)
619620
return GetAllRegSet ? &AArch64::GPR32allRegClass
620621
: &AArch64::GPR32RegClass;
@@ -626,6 +627,12 @@ getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
626627
}
627628

628629
if (RegBankID == AArch64::FPRRegBankID) {
630+
if (SizeInBits.isScalable()) {
631+
assert(SizeInBits == TypeSize::getScalable(128) &&
632+
"Unexpected scalable register size");
633+
return &AArch64::ZPRRegClass;
634+
}
635+
629636
switch (SizeInBits) {
630637
default:
631638
return nullptr;
@@ -964,7 +971,8 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
964971
// then we can pull it into the helpers that get the appropriate class for a
965972
// register bank. Or make a new helper that carries along some constraint
966973
// information.
967-
if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
974+
if (SrcRegBank != DstRegBank &&
975+
(DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))
968976
SrcSize = DstSize = TypeSize::getFixed(32);
969977

970978
return {getMinClassForRegBank(SrcRegBank, SrcSize, true),

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
9191

9292
const bool HasCSSC = ST.hasCSSC();
9393
const bool HasRCPC3 = ST.hasRCPC3();
94+
const bool HasSVE = ST.hasSVE();
9495

9596
getActionDefinitionsBuilder(
9697
{G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
@@ -127,7 +128,34 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
127128
.clampNumElements(0, v2s64, v2s64)
128129
.moreElementsToNextPow2(0);
129130

130-
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
131+
getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
132+
.legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
133+
.legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
134+
.widenScalarToNextPow2(0)
135+
.clampScalar(0, s32, s64)
136+
.clampMaxNumElements(0, s8, 16)
137+
.clampMaxNumElements(0, s16, 8)
138+
.clampNumElements(0, v2s32, v4s32)
139+
.clampNumElements(0, v2s64, v2s64)
140+
.minScalarOrEltIf(
141+
[=](const LegalityQuery &Query) {
142+
return Query.Types[0].getNumElements() <= 2;
143+
},
144+
0, s32)
145+
.minScalarOrEltIf(
146+
[=](const LegalityQuery &Query) {
147+
return Query.Types[0].getNumElements() <= 4;
148+
},
149+
0, s16)
150+
.minScalarOrEltIf(
151+
[=](const LegalityQuery &Query) {
152+
return Query.Types[0].getNumElements() <= 16;
153+
},
154+
0, s8)
155+
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
156+
.moreElementsToNextPow2(0);
157+
158+
getActionDefinitionsBuilder(G_MUL)
131159
.legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
132160
.widenScalarToNextPow2(0)
133161
.clampScalar(0, s32, s64)

llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
2525
#
2626
# DEBUG-NEXT: G_MUL (opcode {{[0-9]+}}): 1 type index, 0 imm indices
27-
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
2827
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
2928
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
3029
#
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc < %s -mtriple aarch64 -mattr=+sve -aarch64-enable-gisel-sve=1 | FileCheck %s
3+
; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s
4+
5+
;; add
6+
define <vscale x 2 x i64> @addnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
7+
; CHECK-LABEL: addnxv2i64:
8+
; CHECK: // %bb.0: // %entry
9+
; CHECK-NEXT: add z0.d, z0.d, z1.d
10+
; CHECK-NEXT: ret
11+
entry:
12+
%c = add <vscale x 2 x i64> %a, %b
13+
ret <vscale x 2 x i64> %c
14+
}
15+
16+
define <vscale x 4 x i32> @addnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
17+
; CHECK-LABEL: addnxv4i32:
18+
; CHECK: // %bb.0: // %entry
19+
; CHECK-NEXT: add z0.s, z0.s, z1.s
20+
; CHECK-NEXT: ret
21+
entry:
22+
%c = add <vscale x 4 x i32> %a, %b
23+
ret <vscale x 4 x i32> %c
24+
}
25+
26+
define <vscale x 8 x i16> @addnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
27+
; CHECK-LABEL: addnxv8i16:
28+
; CHECK: // %bb.0: // %entry
29+
; CHECK-NEXT: add z0.h, z0.h, z1.h
30+
; CHECK-NEXT: ret
31+
entry:
32+
%c = add <vscale x 8 x i16> %a, %b
33+
ret <vscale x 8 x i16> %c
34+
}
35+
36+
define <vscale x 16 x i8> @addnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
37+
; CHECK-LABEL: addnxv16i8:
38+
; CHECK: // %bb.0: // %entry
39+
; CHECK-NEXT: add z0.b, z0.b, z1.b
40+
; CHECK-NEXT: ret
41+
entry:
42+
%c = add <vscale x 16 x i8> %a, %b
43+
ret <vscale x 16 x i8> %c
44+
}
45+
46+
;; sub
47+
define <vscale x 2 x i64> @subnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
48+
; CHECK-LABEL: subnxv2i64:
49+
; CHECK: // %bb.0: // %entry
50+
; CHECK-NEXT: sub z0.d, z0.d, z1.d
51+
; CHECK-NEXT: ret
52+
entry:
53+
%c = sub <vscale x 2 x i64> %a, %b
54+
ret <vscale x 2 x i64> %c
55+
}
56+
57+
define <vscale x 4 x i32> @subnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
58+
; CHECK-LABEL: subnxv4i32:
59+
; CHECK: // %bb.0: // %entry
60+
; CHECK-NEXT: sub z0.s, z0.s, z1.s
61+
; CHECK-NEXT: ret
62+
entry:
63+
%c = sub <vscale x 4 x i32> %a, %b
64+
ret <vscale x 4 x i32> %c
65+
}
66+
67+
define <vscale x 8 x i16> @subnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
68+
; CHECK-LABEL: subnxv8i16:
69+
; CHECK: // %bb.0: // %entry
70+
; CHECK-NEXT: sub z0.h, z0.h, z1.h
71+
; CHECK-NEXT: ret
72+
entry:
73+
%c = sub <vscale x 8 x i16> %a, %b
74+
ret <vscale x 8 x i16> %c
75+
}
76+
77+
define <vscale x 16 x i8> @subnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
78+
; CHECK-LABEL: subnxv16i8:
79+
; CHECK: // %bb.0: // %entry
80+
; CHECK-NEXT: sub z0.b, z0.b, z1.b
81+
; CHECK-NEXT: ret
82+
entry:
83+
%c = sub <vscale x 16 x i8> %a, %b
84+
ret <vscale x 16 x i8> %c
85+
}
86+
87+
;; and
88+
define <vscale x 2 x i64> @andnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
89+
; CHECK-LABEL: andnxv2i64:
90+
; CHECK: // %bb.0: // %entry
91+
; CHECK-NEXT: and z0.d, z0.d, z1.d
92+
; CHECK-NEXT: ret
93+
entry:
94+
%c = and <vscale x 2 x i64> %a, %b
95+
ret <vscale x 2 x i64> %c
96+
}
97+
98+
define <vscale x 4 x i32> @andnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
99+
; CHECK-LABEL: andnxv4i32:
100+
; CHECK: // %bb.0: // %entry
101+
; CHECK-NEXT: and z0.d, z0.d, z1.d
102+
; CHECK-NEXT: ret
103+
entry:
104+
%c = and <vscale x 4 x i32> %a, %b
105+
ret <vscale x 4 x i32> %c
106+
}
107+
108+
define <vscale x 8 x i16> @andnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
109+
; CHECK-LABEL: andnxv8i16:
110+
; CHECK: // %bb.0: // %entry
111+
; CHECK-NEXT: and z0.d, z0.d, z1.d
112+
; CHECK-NEXT: ret
113+
entry:
114+
%c = and <vscale x 8 x i16> %a, %b
115+
ret <vscale x 8 x i16> %c
116+
}
117+
118+
define <vscale x 16 x i8> @andnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
119+
; CHECK-LABEL: andnxv16i8:
120+
; CHECK: // %bb.0: // %entry
121+
; CHECK-NEXT: and z0.d, z0.d, z1.d
122+
; CHECK-NEXT: ret
123+
entry:
124+
%c = and <vscale x 16 x i8> %a, %b
125+
ret <vscale x 16 x i8> %c
126+
}
127+
128+
;; or
129+
define <vscale x 2 x i64> @ornxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
130+
; CHECK-LABEL: ornxv2i64:
131+
; CHECK: // %bb.0: // %entry
132+
; CHECK-NEXT: orr z0.d, z0.d, z1.d
133+
; CHECK-NEXT: ret
134+
entry:
135+
%c = or <vscale x 2 x i64> %a, %b
136+
ret <vscale x 2 x i64> %c
137+
}
138+
139+
define <vscale x 4 x i32> @ornxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
140+
; CHECK-LABEL: ornxv4i32:
141+
; CHECK: // %bb.0: // %entry
142+
; CHECK-NEXT: orr z0.d, z0.d, z1.d
143+
; CHECK-NEXT: ret
144+
entry:
145+
%c = or <vscale x 4 x i32> %a, %b
146+
ret <vscale x 4 x i32> %c
147+
}
148+
149+
define <vscale x 8 x i16> @ornxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
150+
; CHECK-LABEL: ornxv8i16:
151+
; CHECK: // %bb.0: // %entry
152+
; CHECK-NEXT: orr z0.d, z0.d, z1.d
153+
; CHECK-NEXT: ret
154+
entry:
155+
%c = or <vscale x 8 x i16> %a, %b
156+
ret <vscale x 8 x i16> %c
157+
}
158+
159+
define <vscale x 16 x i8> @ornxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
160+
; CHECK-LABEL: ornxv16i8:
161+
; CHECK: // %bb.0: // %entry
162+
; CHECK-NEXT: orr z0.d, z0.d, z1.d
163+
; CHECK-NEXT: ret
164+
entry:
165+
%c = or <vscale x 16 x i8> %a, %b
166+
ret <vscale x 16 x i8> %c
167+
}
168+
169+
;; xor
170+
define <vscale x 2 x i64> @xornxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
171+
; CHECK-LABEL: xornxv2i64:
172+
; CHECK: // %bb.0: // %entry
173+
; CHECK-NEXT: eor z0.d, z0.d, z1.d
174+
; CHECK-NEXT: ret
175+
entry:
176+
%c = xor <vscale x 2 x i64> %a, %b
177+
ret <vscale x 2 x i64> %c
178+
}
179+
180+
define <vscale x 4 x i32> @xornxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
181+
; CHECK-LABEL: xornxv4i32:
182+
; CHECK: // %bb.0: // %entry
183+
; CHECK-NEXT: eor z0.d, z0.d, z1.d
184+
; CHECK-NEXT: ret
185+
entry:
186+
%c = xor <vscale x 4 x i32> %a, %b
187+
ret <vscale x 4 x i32> %c
188+
}
189+
190+
define <vscale x 8 x i16> @xornxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
191+
; CHECK-LABEL: xornxv8i16:
192+
; CHECK: // %bb.0: // %entry
193+
; CHECK-NEXT: eor z0.d, z0.d, z1.d
194+
; CHECK-NEXT: ret
195+
entry:
196+
%c = xor <vscale x 8 x i16> %a, %b
197+
ret <vscale x 8 x i16> %c
198+
}
199+
200+
define <vscale x 16 x i8> @xornxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
201+
; CHECK-LABEL: xornxv16i8:
202+
; CHECK: // %bb.0: // %entry
203+
; CHECK-NEXT: eor z0.d, z0.d, z1.d
204+
; CHECK-NEXT: ret
205+
entry:
206+
%c = xor <vscale x 16 x i8> %a, %b
207+
ret <vscale x 16 x i8> %c
208+
}

0 commit comments

Comments
 (0)