Skip to content

Commit 70568c9

Browse files
committed
Address comments and rebase patch
1 parent 4ddc200 commit 70568c9

File tree

3 files changed

+133
-22
lines changed

3 files changed

+133
-22
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23660,20 +23660,21 @@ static SDValue combineVScale1Load(LoadSDNode *LD, SelectionDAG &DAG,
2366023660
TargetLowering::DAGCombinerInfo &DCI,
2366123661
const AArch64Subtarget *Subtarget) {
2366223662
EVT MemVT = LD->getMemoryVT();
23663-
if (!DCI.isBeforeLegalize() || !Subtarget->hasNEON() ||
23664-
!MemVT.isScalableVector() || LD->getExtensionType() != ISD::NON_EXTLOAD ||
23663+
if (!DCI.isBeforeLegalize() || !Subtarget->isLittleEndian() ||
23664+
!Subtarget->hasNEON() || !MemVT.isScalableVector() ||
23665+
LD->getExtensionType() != ISD::NON_EXTLOAD ||
2366523666
MemVT.getSizeInBits().getKnownMinValue() != 128 ||
2366623667
Subtarget->getMaxSVEVectorSizeInBits() != 128)
2366723668
return SDValue();
2366823669

2366923670
SDLoc DL(LD);
23670-
MVT NewVT = MVT::getVectorVT(MemVT.getVectorElementType().getSimpleVT(),
23671+
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getVectorElementType(),
2367123672
MemVT.getVectorMinNumElements());
2367223673
SDValue NewLoad = DAG.getLoad(
2367323674
NewVT, DL, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
2367423675
LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
2367523676
SDValue Insert = convertToScalableVector(DAG, MemVT, NewLoad);
23676-
return DAG.getMergeValues({Insert, SDValue(cast<SDNode>(NewLoad), 1)}, DL);
23677+
return DAG.getMergeValues({Insert, NewLoad.getValue(1)}, DL);
2367723678
}
2367823679

2367923680
// Perform TBI simplification if supported by the target and try to break up
@@ -23980,21 +23981,21 @@ static SDValue combineVScale1Store(StoreSDNode *ST, SelectionDAG &DAG,
2398023981
const AArch64Subtarget *Subtarget) {
2398123982
SDValue Value = ST->getValue();
2398223983
EVT ValueVT = Value.getValueType();
23983-
if (ST->isVolatile() || !Subtarget->isLittleEndian() ||
23984-
!DCI.isBeforeLegalize() || !Subtarget->hasNEON() ||
23985-
!ValueVT.isScalableVector() || ST->isTruncatingStore() ||
23984+
if (!DCI.isBeforeLegalize() || !Subtarget->isLittleEndian() ||
23985+
!Subtarget->hasNEON() || !ValueVT.isScalableVector() ||
23986+
ST->isTruncatingStore() ||
2398623987
ValueVT.getSizeInBits().getKnownMinValue() != 128 ||
2398723988
Subtarget->getMaxSVEVectorSizeInBits() != 128)
2398823989
return SDValue();
2398923990

2399023991
SDLoc DL(ST);
23991-
MVT NewVT = MVT::getVectorVT(ValueVT.getVectorElementType().getSimpleVT(),
23992-
ValueVT.getVectorMinNumElements());
23992+
EVT NewVT =
23993+
EVT::getVectorVT(*DAG.getContext(), ValueVT.getVectorElementType(),
23994+
ValueVT.getVectorMinNumElements());
2399323995
SDValue NewValue = convertFromScalableVector(DAG, NewVT, Value);
23994-
SDValue NewStore = DAG.getStore(
23995-
ST->getChain(), DL, NewValue, ST->getBasePtr(), ST->getPointerInfo(),
23996-
ST->getOriginalAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo());
23997-
return NewStore;
23996+
return DAG.getStore(ST->getChain(), DL, NewValue, ST->getBasePtr(),
23997+
ST->getPointerInfo(), ST->getOriginalAlign(),
23998+
ST->getMemOperand()->getFlags(), ST->getAAInfo());
2399823999
}
2399924000

2400024001
static unsigned getFPSubregForVT(EVT VT) {

llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ define void @nxv16i8(ptr %ldptr, ptr %stptr) {
1717
;
1818
; CHECK-128-LABEL: nxv16i8:
1919
; CHECK-128: // %bb.0:
20-
; CHECK-128-NEXT: ldr z0, [x0, #16, mul vl]
21-
; CHECK-128-NEXT: str z0, [x1, #16, mul vl]
20+
; CHECK-128-NEXT: ldr q0, [x0, #256]
21+
; CHECK-128-NEXT: str q0, [x1, #256]
2222
; CHECK-128-NEXT: ret
2323
;
2424
; CHECK-256-LABEL: nxv16i8:
@@ -62,8 +62,8 @@ define void @nxv8i16(ptr %ldptr, ptr %stptr) {
6262
;
6363
; CHECK-128-LABEL: nxv8i16:
6464
; CHECK-128: // %bb.0:
65-
; CHECK-128-NEXT: ldr z0, [x0, #16, mul vl]
66-
; CHECK-128-NEXT: str z0, [x1, #16, mul vl]
65+
; CHECK-128-NEXT: ldr q0, [x0, #256]
66+
; CHECK-128-NEXT: str q0, [x1, #256]
6767
; CHECK-128-NEXT: ret
6868
;
6969
; CHECK-256-LABEL: nxv8i16:
@@ -107,8 +107,8 @@ define void @nxv4i32(ptr %ldptr, ptr %stptr) {
107107
;
108108
; CHECK-128-LABEL: nxv4i32:
109109
; CHECK-128: // %bb.0:
110-
; CHECK-128-NEXT: ldr z0, [x0, #16, mul vl]
111-
; CHECK-128-NEXT: str z0, [x1, #16, mul vl]
110+
; CHECK-128-NEXT: ldr q0, [x0, #256]
111+
; CHECK-128-NEXT: str q0, [x1, #256]
112112
; CHECK-128-NEXT: ret
113113
;
114114
; CHECK-256-LABEL: nxv4i32:
@@ -152,8 +152,8 @@ define void @nxv2i64(ptr %ldptr, ptr %stptr) {
152152
;
153153
; CHECK-128-LABEL: nxv2i64:
154154
; CHECK-128: // %bb.0:
155-
; CHECK-128-NEXT: ldr z0, [x0, #16, mul vl]
156-
; CHECK-128-NEXT: str z0, [x1, #16, mul vl]
155+
; CHECK-128-NEXT: ldr q0, [x0, #256]
156+
; CHECK-128-NEXT: str q0, [x1, #256]
157157
; CHECK-128-NEXT: ret
158158
;
159159
; CHECK-256-LABEL: nxv2i64:

llvm/test/CodeGen/AArch64/sve-unpred-loads-stores.ll

Lines changed: 111 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s --check-prefix=CHECK-128
3-
; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-max=128 < %s | not grep -e ldr -e str
3+
; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s --check-prefix=CHECK-BE-128
44

55
define <vscale x 16 x i8> @ld_nxv16i8(ptr %0) {
66
; CHECK-128-LABEL: ld_nxv16i8:
77
; CHECK-128: // %bb.0:
88
; CHECK-128-NEXT: ldr q0, [x0]
99
; CHECK-128-NEXT: ret
10+
;
11+
; CHECK-BE-128-LABEL: ld_nxv16i8:
12+
; CHECK-BE-128: // %bb.0:
13+
; CHECK-BE-128-NEXT: ptrue p0.b
14+
; CHECK-BE-128-NEXT: ld1b { z0.b }, p0/z, [x0]
15+
; CHECK-BE-128-NEXT: ret
1016
%2 = load <vscale x 16 x i8>, ptr %0, align 1
1117
ret <vscale x 16 x i8> %2
1218
}
@@ -16,6 +22,12 @@ define void @st_nxv16i8(ptr %0, <vscale x 16 x i8> %1) {
1622
; CHECK-128: // %bb.0:
1723
; CHECK-128-NEXT: str q0, [x0]
1824
; CHECK-128-NEXT: ret
25+
;
26+
; CHECK-BE-128-LABEL: st_nxv16i8:
27+
; CHECK-BE-128: // %bb.0:
28+
; CHECK-BE-128-NEXT: ptrue p0.b
29+
; CHECK-BE-128-NEXT: st1b { z0.b }, p0, [x0]
30+
; CHECK-BE-128-NEXT: ret
1931
store <vscale x 16 x i8> %1, ptr %0, align 1
2032
ret void
2133
}
@@ -25,6 +37,12 @@ define <vscale x 8 x i16> @ld_nxv8i16(ptr %0) {
2537
; CHECK-128: // %bb.0:
2638
; CHECK-128-NEXT: ldr q0, [x0]
2739
; CHECK-128-NEXT: ret
40+
;
41+
; CHECK-BE-128-LABEL: ld_nxv8i16:
42+
; CHECK-BE-128: // %bb.0:
43+
; CHECK-BE-128-NEXT: ptrue p0.h
44+
; CHECK-BE-128-NEXT: ld1h { z0.h }, p0/z, [x0]
45+
; CHECK-BE-128-NEXT: ret
2846
%2 = load <vscale x 8 x i16>, ptr %0, align 2
2947
ret <vscale x 8 x i16> %2
3048
}
@@ -34,6 +52,12 @@ define void @st_nxv8i16(ptr %0, <vscale x 8 x i16> %1) {
3452
; CHECK-128: // %bb.0:
3553
; CHECK-128-NEXT: str q0, [x0]
3654
; CHECK-128-NEXT: ret
55+
;
56+
; CHECK-BE-128-LABEL: st_nxv8i16:
57+
; CHECK-BE-128: // %bb.0:
58+
; CHECK-BE-128-NEXT: ptrue p0.h
59+
; CHECK-BE-128-NEXT: st1h { z0.h }, p0, [x0]
60+
; CHECK-BE-128-NEXT: ret
3761
store <vscale x 8 x i16> %1, ptr %0, align 2
3862
ret void
3963
}
@@ -43,6 +67,12 @@ define <vscale x 4 x i32> @ld_nxv4i32(ptr %0) {
4367
; CHECK-128: // %bb.0:
4468
; CHECK-128-NEXT: ldr q0, [x0]
4569
; CHECK-128-NEXT: ret
70+
;
71+
; CHECK-BE-128-LABEL: ld_nxv4i32:
72+
; CHECK-BE-128: // %bb.0:
73+
; CHECK-BE-128-NEXT: ptrue p0.s
74+
; CHECK-BE-128-NEXT: ld1w { z0.s }, p0/z, [x0]
75+
; CHECK-BE-128-NEXT: ret
4676
%2 = load <vscale x 4 x i32>, ptr %0, align 4
4777
ret <vscale x 4 x i32> %2
4878
}
@@ -52,6 +82,12 @@ define void @st_nxv4i32(ptr %0, <vscale x 4 x i32> %1) {
5282
; CHECK-128: // %bb.0:
5383
; CHECK-128-NEXT: str q0, [x0]
5484
; CHECK-128-NEXT: ret
85+
;
86+
; CHECK-BE-128-LABEL: st_nxv4i32:
87+
; CHECK-BE-128: // %bb.0:
88+
; CHECK-BE-128-NEXT: ptrue p0.s
89+
; CHECK-BE-128-NEXT: st1w { z0.s }, p0, [x0]
90+
; CHECK-BE-128-NEXT: ret
5591
store <vscale x 4 x i32> %1, ptr %0, align 4
5692
ret void
5793
}
@@ -61,6 +97,12 @@ define <vscale x 2 x i64> @ld_nxv2i64(ptr %0) {
6197
; CHECK-128: // %bb.0:
6298
; CHECK-128-NEXT: ldr q0, [x0]
6399
; CHECK-128-NEXT: ret
100+
;
101+
; CHECK-BE-128-LABEL: ld_nxv2i64:
102+
; CHECK-BE-128: // %bb.0:
103+
; CHECK-BE-128-NEXT: ptrue p0.d
104+
; CHECK-BE-128-NEXT: ld1d { z0.d }, p0/z, [x0]
105+
; CHECK-BE-128-NEXT: ret
64106
%2 = load <vscale x 2 x i64>, ptr %0, align 8
65107
ret <vscale x 2 x i64> %2
66108
}
@@ -70,6 +112,12 @@ define void @st_nxv2i64(ptr %0, <vscale x 2 x i64> %1) {
70112
; CHECK-128: // %bb.0:
71113
; CHECK-128-NEXT: str q0, [x0]
72114
; CHECK-128-NEXT: ret
115+
;
116+
; CHECK-BE-128-LABEL: st_nxv2i64:
117+
; CHECK-BE-128: // %bb.0:
118+
; CHECK-BE-128-NEXT: ptrue p0.d
119+
; CHECK-BE-128-NEXT: st1d { z0.d }, p0, [x0]
120+
; CHECK-BE-128-NEXT: ret
73121
store <vscale x 2 x i64> %1, ptr %0, align 8
74122
ret void
75123
}
@@ -79,6 +127,12 @@ define <vscale x 8 x half> @ld_nxv8f16(ptr %0) {
79127
; CHECK-128: // %bb.0:
80128
; CHECK-128-NEXT: ldr q0, [x0]
81129
; CHECK-128-NEXT: ret
130+
;
131+
; CHECK-BE-128-LABEL: ld_nxv8f16:
132+
; CHECK-BE-128: // %bb.0:
133+
; CHECK-BE-128-NEXT: ptrue p0.h
134+
; CHECK-BE-128-NEXT: ld1h { z0.h }, p0/z, [x0]
135+
; CHECK-BE-128-NEXT: ret
82136
%2 = load <vscale x 8 x half>, ptr %0, align 2
83137
ret <vscale x 8 x half> %2
84138
}
@@ -88,6 +142,12 @@ define void @st_nxv8f16(ptr %0, <vscale x 8 x half> %1) {
88142
; CHECK-128: // %bb.0:
89143
; CHECK-128-NEXT: str q0, [x0]
90144
; CHECK-128-NEXT: ret
145+
;
146+
; CHECK-BE-128-LABEL: st_nxv8f16:
147+
; CHECK-BE-128: // %bb.0:
148+
; CHECK-BE-128-NEXT: ptrue p0.h
149+
; CHECK-BE-128-NEXT: st1h { z0.h }, p0, [x0]
150+
; CHECK-BE-128-NEXT: ret
91151
store <vscale x 8 x half> %1, ptr %0, align 2
92152
ret void
93153
}
@@ -97,6 +157,12 @@ define <vscale x 4 x float> @ld_nxv4f32(ptr %0) {
97157
; CHECK-128: // %bb.0:
98158
; CHECK-128-NEXT: ldr q0, [x0]
99159
; CHECK-128-NEXT: ret
160+
;
161+
; CHECK-BE-128-LABEL: ld_nxv4f32:
162+
; CHECK-BE-128: // %bb.0:
163+
; CHECK-BE-128-NEXT: ptrue p0.s
164+
; CHECK-BE-128-NEXT: ld1w { z0.s }, p0/z, [x0]
165+
; CHECK-BE-128-NEXT: ret
100166
%2 = load <vscale x 4 x float>, ptr %0, align 4
101167
ret <vscale x 4 x float> %2
102168
}
@@ -106,6 +172,12 @@ define void @st_nxv4f32(ptr %0, <vscale x 4 x float> %1) {
106172
; CHECK-128: // %bb.0:
107173
; CHECK-128-NEXT: str q0, [x0]
108174
; CHECK-128-NEXT: ret
175+
;
176+
; CHECK-BE-128-LABEL: st_nxv4f32:
177+
; CHECK-BE-128: // %bb.0:
178+
; CHECK-BE-128-NEXT: ptrue p0.s
179+
; CHECK-BE-128-NEXT: st1w { z0.s }, p0, [x0]
180+
; CHECK-BE-128-NEXT: ret
109181
store <vscale x 4 x float> %1, ptr %0, align 4
110182
ret void
111183
}
@@ -115,6 +187,12 @@ define <vscale x 2 x double> @ld_nxv2f64(ptr %0) {
115187
; CHECK-128: // %bb.0:
116188
; CHECK-128-NEXT: ldr q0, [x0]
117189
; CHECK-128-NEXT: ret
190+
;
191+
; CHECK-BE-128-LABEL: ld_nxv2f64:
192+
; CHECK-BE-128: // %bb.0:
193+
; CHECK-BE-128-NEXT: ptrue p0.d
194+
; CHECK-BE-128-NEXT: ld1d { z0.d }, p0/z, [x0]
195+
; CHECK-BE-128-NEXT: ret
118196
%2 = load <vscale x 2 x double>, ptr %0, align 8
119197
ret <vscale x 2 x double> %2
120198
}
@@ -124,6 +202,38 @@ define void @st_nxv2f64(ptr %0, <vscale x 2 x double> %1) {
124202
; CHECK-128: // %bb.0:
125203
; CHECK-128-NEXT: str q0, [x0]
126204
; CHECK-128-NEXT: ret
205+
;
206+
; CHECK-BE-128-LABEL: st_nxv2f64:
207+
; CHECK-BE-128: // %bb.0:
208+
; CHECK-BE-128-NEXT: ptrue p0.d
209+
; CHECK-BE-128-NEXT: st1d { z0.d }, p0, [x0]
210+
; CHECK-BE-128-NEXT: ret
127211
store <vscale x 2 x double> %1, ptr %0, align 8
128212
ret void
129213
}
214+
215+
; Test LDP/STP fold.
216+
define void @ldp_stp_nxv16i8_offset(ptr %ldptr, ptr %stptr) {
217+
; CHECK-128-LABEL: ldp_stp_nxv16i8_offset:
218+
; CHECK-128: // %bb.0:
219+
; CHECK-128-NEXT: ldp q0, q1, [x0, #-16]
220+
; CHECK-128-NEXT: stp q0, q1, [x1, #-16]
221+
; CHECK-128-NEXT: ret
222+
;
223+
; CHECK-BE-128-LABEL: ldp_stp_nxv16i8_offset:
224+
; CHECK-BE-128: // %bb.0:
225+
; CHECK-BE-128-NEXT: ptrue p0.b
226+
; CHECK-BE-128-NEXT: mov x8, #-16 // =0xfffffffffffffff0
227+
; CHECK-BE-128-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
228+
; CHECK-BE-128-NEXT: ld1b { z1.b }, p0/z, [x0]
229+
; CHECK-BE-128-NEXT: st1b { z0.b }, p0, [x1, x8]
230+
; CHECK-BE-128-NEXT: st1b { z1.b }, p0, [x1]
231+
; CHECK-BE-128-NEXT: ret
232+
%ldptr.1 = getelementptr inbounds i8, ptr %ldptr, i64 -16
233+
%ld1 = load <vscale x 16 x i8>, ptr %ldptr.1, align 1
234+
%ld2 = load <vscale x 16 x i8>, ptr %ldptr, align 1
235+
%stptr.1 = getelementptr inbounds i8, ptr %stptr, i64 -16
236+
store <vscale x 16 x i8> %ld1, ptr %stptr.1, align 1
237+
store <vscale x 16 x i8> %ld2, ptr %stptr, align 1
238+
ret void
239+
}

0 commit comments

Comments
 (0)