Skip to content

Commit 27bbe3e

Browse files
committed
Test cases for vscale immediates
1 parent 4349ffb commit 27bbe3e

File tree

1 file changed

+344
-0
lines changed

1 file changed

+344
-0
lines changed
Lines changed: 344 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,344 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc %s -o - | FileCheck %s --check-prefixes=COMMON,BASE
3+
;; Additional runlines to exercise lsr code which AArch64 normally wouldn't.
4+
; RUN: llc %s -o - -lsr-preferred-addressing-mode=preindexed | FileCheck %s --check-prefixes=COMMON,PREINDEX
5+
; RUN: llc %s -o - -lsr-preferred-addressing-mode=postindexed | FileCheck %s --check-prefixes=COMMON,POSTINDEX
6+
7+
target triple = "aarch64-unknown-linux-gnu"
8+
9+
define void @mulvl123_addressing(ptr %src, ptr %dst, i64 %count) #0 {
10+
; COMMON-LABEL: mulvl123_addressing:
11+
; COMMON: // %bb.0: // %entry
12+
; COMMON-NEXT: ptrue p0.b
13+
; COMMON-NEXT: mov x8, xzr
14+
; COMMON-NEXT: .LBB0_1: // %for.body
15+
; COMMON-NEXT: // =>This Inner Loop Header: Depth=1
16+
; COMMON-NEXT: ld1b { z0.b }, p0/z, [x0]
17+
; COMMON-NEXT: ld1b { z1.b }, p0/z, [x0, #1, mul vl]
18+
; COMMON-NEXT: ld1b { z2.b }, p0/z, [x0, #2, mul vl]
19+
; COMMON-NEXT: ld1b { z3.b }, p0/z, [x0, #3, mul vl]
20+
; COMMON-NEXT: addvl x0, x0, #5
21+
; COMMON-NEXT: umax z0.b, p0/m, z0.b, z1.b
22+
; COMMON-NEXT: movprfx z1, z2
23+
; COMMON-NEXT: umax z1.b, p0/m, z1.b, z3.b
24+
; COMMON-NEXT: umax z0.b, p0/m, z0.b, z1.b
25+
; COMMON-NEXT: st1b { z0.b }, p0, [x1, x8]
26+
; COMMON-NEXT: addvl x8, x8, #1
27+
; COMMON-NEXT: cmp x8, x2
28+
; COMMON-NEXT: b.lo .LBB0_1
29+
; COMMON-NEXT: // %bb.2: // %for.exit
30+
; COMMON-NEXT: ret
31+
entry:
32+
%vscale = tail call i64 @llvm.vscale.i64()
33+
%2 = shl nuw nsw i64 %vscale, 4
34+
%mul = shl nuw nsw i64 %vscale, 6
35+
br label %for.body
36+
37+
for.body:
38+
%src.addr = phi ptr [ %src, %entry ], [ %src.addr.next, %for.body ]
39+
%idx = phi i64 [ 0, %entry ], [ %idx.next, %for.body ]
40+
%arrayidx = getelementptr inbounds i8, ptr %src.addr, i64 %idx
41+
%3 = load <vscale x 16 x i8>, ptr %arrayidx
42+
%4 = getelementptr <vscale x 16 x i8>, ptr %arrayidx, i64 1
43+
%5 = load <vscale x 16 x i8>, ptr %4
44+
%6 = getelementptr <vscale x 16 x i8>, ptr %arrayidx, i64 2
45+
%7 = load <vscale x 16 x i8>, ptr %6
46+
%8 = getelementptr <vscale x 16 x i8>, ptr %arrayidx, i64 3
47+
%9 = load <vscale x 16 x i8>, ptr %8
48+
%10 = tail call <vscale x 16 x i8> @llvm.umax.nxv16i8(<vscale x 16 x i8> %3, <vscale x 16 x i8> %5)
49+
%11 = tail call <vscale x 16 x i8> @llvm.umax.nxv16i8(<vscale x 16 x i8> %7, <vscale x 16 x i8> %9)
50+
%12 = tail call <vscale x 16 x i8> @llvm.umax.nxv16i8(<vscale x 16 x i8> %10, <vscale x 16 x i8> %11)
51+
%src.addr.next = getelementptr inbounds i8, ptr %src.addr, i64 %mul
52+
%arrayidx4 = getelementptr inbounds i8, ptr %dst, i64 %idx
53+
store <vscale x 16 x i8> %12, ptr %arrayidx4
54+
%idx.next = add i64 %idx, %2
55+
%cmp = icmp ult i64 %idx.next, %count
56+
br i1 %cmp, label %for.body, label %for.exit
57+
58+
for.exit:
59+
ret void
60+
}
61+
62+
define void @many_mulvl1_addressing(ptr %src_rows, ptr %dst_rows, i64 %stride, i64 %count) #0 {
63+
; COMMON-LABEL: many_mulvl1_addressing:
64+
; COMMON: // %bb.0: // %entry
65+
; COMMON-NEXT: mov x9, x1
66+
; COMMON-NEXT: ptrue p0.b
67+
; COMMON-NEXT: add x10, x0, x2
68+
; COMMON-NEXT: inch x9
69+
; COMMON-NEXT: ptrue p1.h
70+
; COMMON-NEXT: mov x8, xzr
71+
; COMMON-NEXT: addvl x11, x10, #1
72+
; COMMON-NEXT: addvl x12, x0, #1
73+
; COMMON-NEXT: .LBB1_1: // %for.body
74+
; COMMON-NEXT: // =>This Inner Loop Header: Depth=1
75+
; COMMON-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
76+
; COMMON-NEXT: ld1b { z1.b }, p0/z, [x10, x8]
77+
; COMMON-NEXT: subs x3, x3, #1
78+
; COMMON-NEXT: ld1b { z2.b }, p0/z, [x12, x8]
79+
; COMMON-NEXT: ld1b { z3.b }, p0/z, [x11, x8]
80+
; COMMON-NEXT: add z0.b, z0.b, z1.b
81+
; COMMON-NEXT: add z1.b, z2.b, z3.b
82+
; COMMON-NEXT: st1b { z0.h }, p1, [x1, x8]
83+
; COMMON-NEXT: st1b { z1.h }, p1, [x9, x8]
84+
; COMMON-NEXT: addvl x8, x8, #2
85+
; COMMON-NEXT: b.ne .LBB1_1
86+
; COMMON-NEXT: // %bb.2: // %for.exit
87+
; COMMON-NEXT: ret
88+
entry:
89+
%vscale = tail call i64 @llvm.vscale.i64()
90+
%mul = shl nuw nsw i64 %vscale, 5
91+
br label %for.body
92+
93+
for.body:
94+
%src_row_addr = phi ptr [ %src_rows, %entry ], [ %add_ptr_src, %for.body ]
95+
%dst_row_addr = phi ptr [ %dst_rows, %entry ], [ %add_ptr_dst, %for.body ]
96+
%idx = phi i64 [ 0, %entry ], [ %inc, %for.body ]
97+
%2 = load <vscale x 16 x i8>, ptr %src_row_addr
98+
%3 = getelementptr <vscale x 16 x i8>, ptr %src_row_addr, i64 1
99+
%4 = load <vscale x 16 x i8>, ptr %3
100+
%arrayidx2 = getelementptr inbounds i8, ptr %src_row_addr, i64 %stride
101+
%5 = load <vscale x 16 x i8>, ptr %arrayidx2
102+
%6 = getelementptr <vscale x 16 x i8>, ptr %arrayidx2, i64 1
103+
%7 = load <vscale x 16 x i8>, ptr %6
104+
%8 = add <vscale x 16 x i8> %2, %5
105+
%9 = add <vscale x 16 x i8> %4, %7
106+
%10 = bitcast <vscale x 16 x i8> %8 to <vscale x 8 x i16>
107+
%11 = trunc <vscale x 8 x i16> %10 to <vscale x 8 x i8>
108+
store <vscale x 8 x i8> %11, ptr %dst_row_addr
109+
%12 = bitcast <vscale x 16 x i8> %9 to <vscale x 8 x i16>
110+
%13 = getelementptr <vscale x 8 x i8>, ptr %dst_row_addr, i64 1
111+
%14 = trunc <vscale x 8 x i16> %12 to <vscale x 8 x i8>
112+
store <vscale x 8 x i8> %14, ptr %13
113+
%add_ptr_src = getelementptr inbounds i8, ptr %src_row_addr, i64 %mul
114+
%add_ptr_dst = getelementptr inbounds i8, ptr %dst_row_addr, i64 %mul
115+
%inc = add nuw i64 %idx, 1
116+
%exitcond = icmp eq i64 %inc, %count
117+
br i1 %exitcond, label %for.exit, label %for.body
118+
119+
for.exit:
120+
ret void
121+
}
122+
123+
define void @fixed_iv_scalable_offset(ptr %src, ptr %dst, i64 %count) #0 {
124+
; BASE-LABEL: fixed_iv_scalable_offset:
125+
; BASE: // %bb.0: // %entry
126+
; BASE-NEXT: ptrue p0.s
127+
; BASE-NEXT: .LBB2_1: // %for.body
128+
; BASE-NEXT: // =>This Inner Loop Header: Depth=1
129+
; BASE-NEXT: ld1w { z0.s }, p0/z, [x0]
130+
; BASE-NEXT: ld1w { z1.s }, p0/z, [x0, #4, mul vl]
131+
; BASE-NEXT: subs x2, x2, #4
132+
; BASE-NEXT: add x0, x0, #16
133+
; BASE-NEXT: add z0.s, z0.s, z1.s
134+
; BASE-NEXT: st1w { z0.s }, p0, [x1]
135+
; BASE-NEXT: add x1, x1, #16
136+
; BASE-NEXT: b.ne .LBB2_1
137+
; BASE-NEXT: // %bb.2: // %for.exit
138+
; BASE-NEXT: ret
139+
;
140+
; PREINDEX-LABEL: fixed_iv_scalable_offset:
141+
; PREINDEX: // %bb.0: // %entry
142+
; PREINDEX-NEXT: ptrue p0.s
143+
; PREINDEX-NEXT: .LBB2_1: // %for.body
144+
; PREINDEX-NEXT: // =>This Inner Loop Header: Depth=1
145+
; PREINDEX-NEXT: ld1w { z0.s }, p0/z, [x0]
146+
; PREINDEX-NEXT: ld1w { z1.s }, p0/z, [x0, #4, mul vl]
147+
; PREINDEX-NEXT: subs x2, x2, #4
148+
; PREINDEX-NEXT: add x0, x0, #16
149+
; PREINDEX-NEXT: add z0.s, z0.s, z1.s
150+
; PREINDEX-NEXT: st1w { z0.s }, p0, [x1]
151+
; PREINDEX-NEXT: add x1, x1, #16
152+
; PREINDEX-NEXT: b.ne .LBB2_1
153+
; PREINDEX-NEXT: // %bb.2: // %for.exit
154+
; PREINDEX-NEXT: ret
155+
;
156+
; POSTINDEX-LABEL: fixed_iv_scalable_offset:
157+
; POSTINDEX: // %bb.0: // %entry
158+
; POSTINDEX-NEXT: ptrue p0.s
159+
; POSTINDEX-NEXT: addvl x8, x0, #4
160+
; POSTINDEX-NEXT: .LBB2_1: // %for.body
161+
; POSTINDEX-NEXT: // =>This Inner Loop Header: Depth=1
162+
; POSTINDEX-NEXT: ld1w { z0.s }, p0/z, [x0]
163+
; POSTINDEX-NEXT: ld1w { z1.s }, p0/z, [x8]
164+
; POSTINDEX-NEXT: subs x2, x2, #4
165+
; POSTINDEX-NEXT: add x8, x8, #16
166+
; POSTINDEX-NEXT: add x0, x0, #16
167+
; POSTINDEX-NEXT: add z0.s, z0.s, z1.s
168+
; POSTINDEX-NEXT: st1w { z0.s }, p0, [x1]
169+
; POSTINDEX-NEXT: add x1, x1, #16
170+
; POSTINDEX-NEXT: b.ne .LBB2_1
171+
; POSTINDEX-NEXT: // %bb.2: // %for.exit
172+
; POSTINDEX-NEXT: ret
173+
entry:
174+
%vscale = tail call i64 @llvm.vscale.i64()
175+
%mul = shl nuw nsw i64 %vscale, 4
176+
br label %for.body
177+
178+
for.body:
179+
%iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
180+
%src.ptr = getelementptr inbounds i32, ptr %src, i64 %iv
181+
%data = load <vscale x 4 x i32>, ptr %src.ptr
182+
%src.ptr.offset = getelementptr inbounds i32, ptr %src.ptr, i64 %mul
183+
%data2 = load <vscale x 4 x i32>, ptr %src.ptr.offset
184+
%add = add <vscale x 4 x i32> %data, %data2
185+
%dst.ptr = getelementptr i32, ptr %dst, i64 %iv
186+
store <vscale x 4 x i32> %add, ptr %dst.ptr
187+
%inc = add nuw i64 %iv, 4
188+
%exit.cond = icmp eq i64 %inc, %count
189+
br i1 %exit.cond, label %for.exit, label %for.body
190+
191+
for.exit:
192+
ret void
193+
}
194+
195+
define void @mixed_offsets_scalable_then_fixed(ptr %src, ptr %dst, i64 %count) #0 {
196+
; COMMON-LABEL: mixed_offsets_scalable_then_fixed:
197+
; COMMON: // %bb.0: // %entry
198+
; COMMON-NEXT: ptrue p0.s
199+
; COMMON-NEXT: mov x8, xzr
200+
; COMMON-NEXT: addvl x9, x0, #4
201+
; COMMON-NEXT: mov x10, #8 // =0x8
202+
; COMMON-NEXT: .LBB3_1: // %for.body
203+
; COMMON-NEXT: // =>This Inner Loop Header: Depth=1
204+
; COMMON-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
205+
; COMMON-NEXT: ld1w { z1.s }, p0/z, [x9, x8, lsl #2]
206+
; COMMON-NEXT: add x11, x9, x8, lsl #2
207+
; COMMON-NEXT: ld1w { z2.s }, p0/z, [x11, x10, lsl #2]
208+
; COMMON-NEXT: add z0.s, z0.s, z1.s
209+
; COMMON-NEXT: add z0.s, z0.s, z2.s
210+
; COMMON-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
211+
; COMMON-NEXT: incw x8
212+
; COMMON-NEXT: cmp x2, x8
213+
; COMMON-NEXT: b.ne .LBB3_1
214+
; COMMON-NEXT: // %bb.2: // %for.exit
215+
; COMMON-NEXT: ret
216+
entry:
217+
%vscale = tail call i64 @llvm.vscale.i64()
218+
%mul = shl nuw nsw i64 %vscale, 4
219+
%vl = shl nuw nsw i64 %vscale, 2
220+
br label %for.body
221+
222+
for.body:
223+
%iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
224+
%src.ptr = getelementptr inbounds i32, ptr %src, i64 %iv
225+
%data = load <vscale x 4 x i32>, ptr %src.ptr
226+
%src.ptr.sc_off = getelementptr inbounds i32, ptr %src.ptr, i64 %mul
227+
%data2 = load <vscale x 4 x i32>, ptr %src.ptr.sc_off
228+
%src.ptr.fx_off = getelementptr inbounds i32, ptr %src.ptr.sc_off, i64 8
229+
%data3 = load <vscale x 4 x i32>, ptr %src.ptr.fx_off
230+
%add = add <vscale x 4 x i32> %data, %data2
231+
%add2 = add <vscale x 4 x i32> %add, %data3
232+
%dst.ptr = getelementptr i32, ptr %dst, i64 %iv
233+
store <vscale x 4 x i32> %add2, ptr %dst.ptr
234+
%inc = add nuw i64 %iv, %vl
235+
%exit.cond = icmp eq i64 %inc, %count
236+
br i1 %exit.cond, label %for.exit, label %for.body
237+
238+
for.exit:
239+
ret void
240+
}
241+
242+
define void @mixed_offsets_fixed_then_scalable(ptr %src, ptr %dst, i64 %count) #0 {
243+
; COMMON-LABEL: mixed_offsets_fixed_then_scalable:
244+
; COMMON: // %bb.0: // %entry
245+
; COMMON-NEXT: addvl x9, x0, #4
246+
; COMMON-NEXT: ptrue p0.s
247+
; COMMON-NEXT: mov x8, xzr
248+
; COMMON-NEXT: add x9, x9, #32
249+
; COMMON-NEXT: mov x10, #8 // =0x8
250+
; COMMON-NEXT: .LBB4_1: // %for.body
251+
; COMMON-NEXT: // =>This Inner Loop Header: Depth=1
252+
; COMMON-NEXT: add x11, x0, x8, lsl #2
253+
; COMMON-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
254+
; COMMON-NEXT: ld1w { z2.s }, p0/z, [x9, x8, lsl #2]
255+
; COMMON-NEXT: ld1w { z1.s }, p0/z, [x11, x10, lsl #2]
256+
; COMMON-NEXT: add z0.s, z0.s, z1.s
257+
; COMMON-NEXT: add z0.s, z0.s, z2.s
258+
; COMMON-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
259+
; COMMON-NEXT: incw x8
260+
; COMMON-NEXT: cmp x2, x8
261+
; COMMON-NEXT: b.ne .LBB4_1
262+
; COMMON-NEXT: // %bb.2: // %for.exit
263+
; COMMON-NEXT: ret
264+
entry:
265+
%vscale = tail call i64 @llvm.vscale.i64()
266+
%mul = shl nuw nsw i64 %vscale, 4
267+
%vl = shl nuw nsw i64 %vscale, 2
268+
br label %for.body
269+
270+
for.body:
271+
%iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
272+
%src.ptr = getelementptr inbounds i32, ptr %src, i64 %iv
273+
%data = load <vscale x 4 x i32>, ptr %src.ptr
274+
%src.ptr.fx_off = getelementptr inbounds i32, ptr %src.ptr, i64 8
275+
%data2 = load <vscale x 4 x i32>, ptr %src.ptr.fx_off
276+
%src.ptr.sc_off = getelementptr inbounds i32, ptr %src.ptr.fx_off, i64 %mul
277+
%data3 = load <vscale x 4 x i32>, ptr %src.ptr.sc_off
278+
%add = add <vscale x 4 x i32> %data, %data2
279+
%add2 = add <vscale x 4 x i32> %add, %data3
280+
%dst.ptr = getelementptr i32, ptr %dst, i64 %iv
281+
store <vscale x 4 x i32> %add2, ptr %dst.ptr
282+
%inc = add nuw i64 %iv, %vl
283+
%exit.cond = icmp eq i64 %inc, %count
284+
br i1 %exit.cond, label %for.exit, label %for.body
285+
286+
for.exit:
287+
ret void
288+
}
289+
290+
;; FIXME: There's an opportunity here (that we currently miss) to define the phi
291+
;; on the middle access, and have negative and positive scalable immediates.
292+
;;
293+
;; Currently we generate a scalable offset for the load in range of the base,
294+
;; and a register to store the offset for the access that's out of range of the
295+
;; base (but in range of the other).
296+
;;
297+
define void @three_access_wide_gap(ptr %src, ptr %dst, i64 %count) #0 {
298+
; COMMON-LABEL: three_access_wide_gap:
299+
; COMMON: // %bb.0: // %entry
300+
; COMMON-NEXT: ptrue p0.s
301+
; COMMON-NEXT: mov x8, xzr
302+
; COMMON-NEXT: addvl x9, x0, #8
303+
; COMMON-NEXT: addvl x10, x0, #4
304+
; COMMON-NEXT: .LBB5_1: // %for.body
305+
; COMMON-NEXT: // =>This Inner Loop Header: Depth=1
306+
; COMMON-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
307+
; COMMON-NEXT: ld1w { z1.s }, p0/z, [x10, x8, lsl #2]
308+
; COMMON-NEXT: ld1w { z2.s }, p0/z, [x9, x8, lsl #2]
309+
; COMMON-NEXT: add z0.s, z0.s, z1.s
310+
; COMMON-NEXT: add z0.s, z0.s, z2.s
311+
; COMMON-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
312+
; COMMON-NEXT: incw x8
313+
; COMMON-NEXT: cmp x2, x8
314+
; COMMON-NEXT: b.ne .LBB5_1
315+
; COMMON-NEXT: // %bb.2: // %for.exit
316+
; COMMON-NEXT: ret
317+
entry:
318+
%vscale = tail call i64 @llvm.vscale.i64()
319+
%mul = mul nuw nsw i64 %vscale, 16
320+
%mul2 = mul nuw nsw i64 %vscale, 16
321+
%vl = mul nuw nsw i64 %vscale, 4
322+
br label %for.body
323+
324+
for.body:
325+
%iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
326+
%src.ptr = getelementptr inbounds i32, ptr %src, i64 %iv
327+
%data = load <vscale x 4 x i32>, ptr %src.ptr
328+
%src.ptr.sc_off = getelementptr inbounds i32, ptr %src.ptr, i64 %mul
329+
%data2 = load <vscale x 4 x i32>, ptr %src.ptr.sc_off
330+
%src.ptr.sc_off2 = getelementptr inbounds i32, ptr %src.ptr.sc_off, i64 %mul2
331+
%data3 = load <vscale x 4 x i32>, ptr %src.ptr.sc_off2
332+
%add = add <vscale x 4 x i32> %data, %data2
333+
%add2 = add <vscale x 4 x i32> %add, %data3
334+
%dst.ptr = getelementptr i32, ptr %dst, i64 %iv
335+
store <vscale x 4 x i32> %add2, ptr %dst.ptr
336+
%inc = add nuw i64 %iv, %vl
337+
%exit.cond = icmp eq i64 %inc, %count
338+
br i1 %exit.cond, label %for.exit, label %for.body
339+
340+
for.exit:
341+
ret void
342+
}
343+
344+
attributes #0 = { "target-features"="+sve2" vscale_range(1,16) }

0 commit comments

Comments
 (0)