Skip to content

Commit 23ea98f

Browse files
authored
[AArch64][SVE2] Do not emit RSHRNB for large shifts (#66672)
rshrnb's shift amount operand must be between 1-EltSizeInBits. This patch stops RSHRNB ISD nodes being emitted in this case
1 parent 3f78d6a commit 23ea98f

File tree

2 files changed

+65
-0
lines changed

2 files changed

+65
-0
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20237,6 +20237,8 @@ static SDValue trySimplifySrlAddToRshrnb(SDValue Srl, SelectionDAG &DAG,
2023720237
if (!SrlOp1)
2023820238
return SDValue();
2023920239
unsigned ShiftValue = SrlOp1->getZExtValue();
20240+
if (ShiftValue < 1 || ShiftValue > ResVT.getScalarSizeInBits())
20241+
return SDValue();
2024020242

2024120243
SDValue Add = Srl->getOperand(0);
2024220244
if (Add->getOpcode() != ISD::ADD || !Add->hasOneUse())

llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,23 @@ define void @neg_add_lshr_rshrnb_h_0(ptr %ptr, ptr %dst, i64 %index){
100100
ret void
101101
}
102102

103+
define void @neg_zero_shift(ptr %ptr, ptr %dst, i64 %index){
104+
; CHECK-LABEL: neg_zero_shift:
105+
; CHECK: // %bb.0:
106+
; CHECK-NEXT: ptrue p0.s
107+
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
108+
; CHECK-NEXT: add z0.s, z0.s, #1 // =0x1
109+
; CHECK-NEXT: st1h { z0.s }, p0, [x1, x2, lsl #1]
110+
; CHECK-NEXT: ret
111+
%load = load <vscale x 4 x i32>, ptr %ptr, align 2
112+
%1 = add <vscale x 4 x i32> %load, trunc (<vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) to <vscale x 4 x i32>)
113+
%2 = lshr <vscale x 4 x i32> %1, trunc (<vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 0, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) to <vscale x 4 x i32>)
114+
%3 = trunc <vscale x 4 x i32> %2 to <vscale x 4 x i16>
115+
%4 = getelementptr inbounds i16, ptr %dst, i64 %index
116+
store <vscale x 4 x i16> %3, ptr %4, align 1
117+
ret void
118+
}
119+
103120
define void @wide_add_shift_add_rshrnb_b(ptr %dest, i64 %index, <vscale x 16 x i16> %arg1){
104121
; CHECK-LABEL: wide_add_shift_add_rshrnb_b:
105122
; CHECK: // %bb.0:
@@ -142,6 +159,52 @@ define void @wide_add_shift_add_rshrnb_h(ptr %dest, i64 %index, <vscale x 8 x i3
142159
ret void
143160
}
144161

162+
define void @wide_add_shift_add_rshrnb_d(ptr %dest, i64 %index, <vscale x 4 x i64> %arg1){
163+
; CHECK-LABEL: wide_add_shift_add_rshrnb_d:
164+
; CHECK: // %bb.0:
165+
; CHECK-NEXT: ptrue p0.s
166+
; CHECK-NEXT: rshrnb z1.s, z1.d, #32
167+
; CHECK-NEXT: rshrnb z0.s, z0.d, #32
168+
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
169+
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, x1, lsl #2]
170+
; CHECK-NEXT: add z0.s, z1.s, z0.s
171+
; CHECK-NEXT: st1w { z0.s }, p0, [x0, x1, lsl #2]
172+
; CHECK-NEXT: ret
173+
%1 = add <vscale x 4 x i64> %arg1, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2147483648, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
174+
%2 = lshr <vscale x 4 x i64> %1, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 32, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
175+
%3 = getelementptr inbounds i32, ptr %dest, i64 %index
176+
%load = load <vscale x 4 x i32>, ptr %3, align 4
177+
%4 = trunc <vscale x 4 x i64> %2 to <vscale x 4 x i32>
178+
%5 = add <vscale x 4 x i32> %load, %4
179+
store <vscale x 4 x i32> %5, ptr %3, align 4
180+
ret void
181+
}
182+
183+
; Do not emit rshrnb if the shift amount is larger than the dest eltsize in bits
184+
define void @neg_wide_add_shift_add_rshrnb_d(ptr %dest, i64 %index, <vscale x 4 x i64> %arg1){
185+
; CHECK-LABEL: neg_wide_add_shift_add_rshrnb_d:
186+
; CHECK: // %bb.0:
187+
; CHECK-NEXT: mov z2.d, #0x800000000000
188+
; CHECK-NEXT: ptrue p0.s
189+
; CHECK-NEXT: add z0.d, z0.d, z2.d
190+
; CHECK-NEXT: add z1.d, z1.d, z2.d
191+
; CHECK-NEXT: lsr z1.d, z1.d, #48
192+
; CHECK-NEXT: lsr z0.d, z0.d, #48
193+
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
194+
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, x1, lsl #2]
195+
; CHECK-NEXT: add z0.s, z1.s, z0.s
196+
; CHECK-NEXT: st1w { z0.s }, p0, [x0, x1, lsl #2]
197+
; CHECK-NEXT: ret
198+
%1 = add <vscale x 4 x i64> %arg1, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 140737488355328, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
199+
%2 = lshr <vscale x 4 x i64> %1, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 48, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
200+
%3 = getelementptr inbounds i32, ptr %dest, i64 %index
201+
%load = load <vscale x 4 x i32>, ptr %3, align 4
202+
%4 = trunc <vscale x 4 x i64> %2 to <vscale x 4 x i32>
203+
%5 = add <vscale x 4 x i32> %load, %4
204+
store <vscale x 4 x i32> %5, ptr %3, align 4
205+
ret void
206+
}
207+
145208
define void @neg_trunc_lsr_add_op1_not_splat(ptr %ptr, ptr %dst, i64 %index, <vscale x 8 x i16> %add_op1){
146209
; CHECK-LABEL: neg_trunc_lsr_add_op1_not_splat:
147210
; CHECK: // %bb.0:

0 commit comments

Comments
 (0)