1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2
2
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s --check-prefix=CHECK-128
3
- ; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-max=128 < %s | not grep -e ldr -e str
3
+ ; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s --check-prefix=CHECK-BE-128
4
4
5
5
define <vscale x 16 x i8 > @ld_nxv16i8 (ptr %0 ) {
6
6
; CHECK-128-LABEL: ld_nxv16i8:
7
7
; CHECK-128: // %bb.0:
8
8
; CHECK-128-NEXT: ldr q0, [x0]
9
9
; CHECK-128-NEXT: ret
10
+ ;
11
+ ; CHECK-BE-128-LABEL: ld_nxv16i8:
12
+ ; CHECK-BE-128: // %bb.0:
13
+ ; CHECK-BE-128-NEXT: ptrue p0.b
14
+ ; CHECK-BE-128-NEXT: ld1b { z0.b }, p0/z, [x0]
15
+ ; CHECK-BE-128-NEXT: ret
10
16
%2 = load <vscale x 16 x i8 >, ptr %0 , align 1
11
17
ret <vscale x 16 x i8 > %2
12
18
}
@@ -16,6 +22,12 @@ define void @st_nxv16i8(ptr %0, <vscale x 16 x i8> %1) {
16
22
; CHECK-128: // %bb.0:
17
23
; CHECK-128-NEXT: str q0, [x0]
18
24
; CHECK-128-NEXT: ret
25
+ ;
26
+ ; CHECK-BE-128-LABEL: st_nxv16i8:
27
+ ; CHECK-BE-128: // %bb.0:
28
+ ; CHECK-BE-128-NEXT: ptrue p0.b
29
+ ; CHECK-BE-128-NEXT: st1b { z0.b }, p0, [x0]
30
+ ; CHECK-BE-128-NEXT: ret
19
31
store <vscale x 16 x i8 > %1 , ptr %0 , align 1
20
32
ret void
21
33
}
@@ -25,6 +37,12 @@ define <vscale x 8 x i16> @ld_nxv8i16(ptr %0) {
25
37
; CHECK-128: // %bb.0:
26
38
; CHECK-128-NEXT: ldr q0, [x0]
27
39
; CHECK-128-NEXT: ret
40
+ ;
41
+ ; CHECK-BE-128-LABEL: ld_nxv8i16:
42
+ ; CHECK-BE-128: // %bb.0:
43
+ ; CHECK-BE-128-NEXT: ptrue p0.h
44
+ ; CHECK-BE-128-NEXT: ld1h { z0.h }, p0/z, [x0]
45
+ ; CHECK-BE-128-NEXT: ret
28
46
%2 = load <vscale x 8 x i16 >, ptr %0 , align 2
29
47
ret <vscale x 8 x i16 > %2
30
48
}
@@ -34,6 +52,12 @@ define void @st_nxv8i16(ptr %0, <vscale x 8 x i16> %1) {
34
52
; CHECK-128: // %bb.0:
35
53
; CHECK-128-NEXT: str q0, [x0]
36
54
; CHECK-128-NEXT: ret
55
+ ;
56
+ ; CHECK-BE-128-LABEL: st_nxv8i16:
57
+ ; CHECK-BE-128: // %bb.0:
58
+ ; CHECK-BE-128-NEXT: ptrue p0.h
59
+ ; CHECK-BE-128-NEXT: st1h { z0.h }, p0, [x0]
60
+ ; CHECK-BE-128-NEXT: ret
37
61
store <vscale x 8 x i16 > %1 , ptr %0 , align 2
38
62
ret void
39
63
}
@@ -43,6 +67,12 @@ define <vscale x 4 x i32> @ld_nxv4i32(ptr %0) {
43
67
; CHECK-128: // %bb.0:
44
68
; CHECK-128-NEXT: ldr q0, [x0]
45
69
; CHECK-128-NEXT: ret
70
+ ;
71
+ ; CHECK-BE-128-LABEL: ld_nxv4i32:
72
+ ; CHECK-BE-128: // %bb.0:
73
+ ; CHECK-BE-128-NEXT: ptrue p0.s
74
+ ; CHECK-BE-128-NEXT: ld1w { z0.s }, p0/z, [x0]
75
+ ; CHECK-BE-128-NEXT: ret
46
76
%2 = load <vscale x 4 x i32 >, ptr %0 , align 4
47
77
ret <vscale x 4 x i32 > %2
48
78
}
@@ -52,6 +82,12 @@ define void @st_nxv4i32(ptr %0, <vscale x 4 x i32> %1) {
52
82
; CHECK-128: // %bb.0:
53
83
; CHECK-128-NEXT: str q0, [x0]
54
84
; CHECK-128-NEXT: ret
85
+ ;
86
+ ; CHECK-BE-128-LABEL: st_nxv4i32:
87
+ ; CHECK-BE-128: // %bb.0:
88
+ ; CHECK-BE-128-NEXT: ptrue p0.s
89
+ ; CHECK-BE-128-NEXT: st1w { z0.s }, p0, [x0]
90
+ ; CHECK-BE-128-NEXT: ret
55
91
store <vscale x 4 x i32 > %1 , ptr %0 , align 4
56
92
ret void
57
93
}
@@ -61,6 +97,12 @@ define <vscale x 2 x i64> @ld_nxv2i64(ptr %0) {
61
97
; CHECK-128: // %bb.0:
62
98
; CHECK-128-NEXT: ldr q0, [x0]
63
99
; CHECK-128-NEXT: ret
100
+ ;
101
+ ; CHECK-BE-128-LABEL: ld_nxv2i64:
102
+ ; CHECK-BE-128: // %bb.0:
103
+ ; CHECK-BE-128-NEXT: ptrue p0.d
104
+ ; CHECK-BE-128-NEXT: ld1d { z0.d }, p0/z, [x0]
105
+ ; CHECK-BE-128-NEXT: ret
64
106
%2 = load <vscale x 2 x i64 >, ptr %0 , align 8
65
107
ret <vscale x 2 x i64 > %2
66
108
}
@@ -70,6 +112,12 @@ define void @st_nxv2i64(ptr %0, <vscale x 2 x i64> %1) {
70
112
; CHECK-128: // %bb.0:
71
113
; CHECK-128-NEXT: str q0, [x0]
72
114
; CHECK-128-NEXT: ret
115
+ ;
116
+ ; CHECK-BE-128-LABEL: st_nxv2i64:
117
+ ; CHECK-BE-128: // %bb.0:
118
+ ; CHECK-BE-128-NEXT: ptrue p0.d
119
+ ; CHECK-BE-128-NEXT: st1d { z0.d }, p0, [x0]
120
+ ; CHECK-BE-128-NEXT: ret
73
121
store <vscale x 2 x i64 > %1 , ptr %0 , align 8
74
122
ret void
75
123
}
@@ -79,6 +127,12 @@ define <vscale x 8 x half> @ld_nxv8f16(ptr %0) {
79
127
; CHECK-128: // %bb.0:
80
128
; CHECK-128-NEXT: ldr q0, [x0]
81
129
; CHECK-128-NEXT: ret
130
+ ;
131
+ ; CHECK-BE-128-LABEL: ld_nxv8f16:
132
+ ; CHECK-BE-128: // %bb.0:
133
+ ; CHECK-BE-128-NEXT: ptrue p0.h
134
+ ; CHECK-BE-128-NEXT: ld1h { z0.h }, p0/z, [x0]
135
+ ; CHECK-BE-128-NEXT: ret
82
136
%2 = load <vscale x 8 x half >, ptr %0 , align 2
83
137
ret <vscale x 8 x half > %2
84
138
}
@@ -88,6 +142,12 @@ define void @st_nxv8f16(ptr %0, <vscale x 8 x half> %1) {
88
142
; CHECK-128: // %bb.0:
89
143
; CHECK-128-NEXT: str q0, [x0]
90
144
; CHECK-128-NEXT: ret
145
+ ;
146
+ ; CHECK-BE-128-LABEL: st_nxv8f16:
147
+ ; CHECK-BE-128: // %bb.0:
148
+ ; CHECK-BE-128-NEXT: ptrue p0.h
149
+ ; CHECK-BE-128-NEXT: st1h { z0.h }, p0, [x0]
150
+ ; CHECK-BE-128-NEXT: ret
91
151
store <vscale x 8 x half > %1 , ptr %0 , align 2
92
152
ret void
93
153
}
@@ -97,6 +157,12 @@ define <vscale x 4 x float> @ld_nxv4f32(ptr %0) {
97
157
; CHECK-128: // %bb.0:
98
158
; CHECK-128-NEXT: ldr q0, [x0]
99
159
; CHECK-128-NEXT: ret
160
+ ;
161
+ ; CHECK-BE-128-LABEL: ld_nxv4f32:
162
+ ; CHECK-BE-128: // %bb.0:
163
+ ; CHECK-BE-128-NEXT: ptrue p0.s
164
+ ; CHECK-BE-128-NEXT: ld1w { z0.s }, p0/z, [x0]
165
+ ; CHECK-BE-128-NEXT: ret
100
166
%2 = load <vscale x 4 x float >, ptr %0 , align 4
101
167
ret <vscale x 4 x float > %2
102
168
}
@@ -106,6 +172,12 @@ define void @st_nxv4f32(ptr %0, <vscale x 4 x float> %1) {
106
172
; CHECK-128: // %bb.0:
107
173
; CHECK-128-NEXT: str q0, [x0]
108
174
; CHECK-128-NEXT: ret
175
+ ;
176
+ ; CHECK-BE-128-LABEL: st_nxv4f32:
177
+ ; CHECK-BE-128: // %bb.0:
178
+ ; CHECK-BE-128-NEXT: ptrue p0.s
179
+ ; CHECK-BE-128-NEXT: st1w { z0.s }, p0, [x0]
180
+ ; CHECK-BE-128-NEXT: ret
109
181
store <vscale x 4 x float > %1 , ptr %0 , align 4
110
182
ret void
111
183
}
@@ -115,6 +187,12 @@ define <vscale x 2 x double> @ld_nxv2f64(ptr %0) {
115
187
; CHECK-128: // %bb.0:
116
188
; CHECK-128-NEXT: ldr q0, [x0]
117
189
; CHECK-128-NEXT: ret
190
+ ;
191
+ ; CHECK-BE-128-LABEL: ld_nxv2f64:
192
+ ; CHECK-BE-128: // %bb.0:
193
+ ; CHECK-BE-128-NEXT: ptrue p0.d
194
+ ; CHECK-BE-128-NEXT: ld1d { z0.d }, p0/z, [x0]
195
+ ; CHECK-BE-128-NEXT: ret
118
196
%2 = load <vscale x 2 x double >, ptr %0 , align 8
119
197
ret <vscale x 2 x double > %2
120
198
}
@@ -124,6 +202,38 @@ define void @st_nxv2f64(ptr %0, <vscale x 2 x double> %1) {
124
202
; CHECK-128: // %bb.0:
125
203
; CHECK-128-NEXT: str q0, [x0]
126
204
; CHECK-128-NEXT: ret
205
+ ;
206
+ ; CHECK-BE-128-LABEL: st_nxv2f64:
207
+ ; CHECK-BE-128: // %bb.0:
208
+ ; CHECK-BE-128-NEXT: ptrue p0.d
209
+ ; CHECK-BE-128-NEXT: st1d { z0.d }, p0, [x0]
210
+ ; CHECK-BE-128-NEXT: ret
127
211
store <vscale x 2 x double > %1 , ptr %0 , align 8
128
212
ret void
129
213
}
214
+
215
+ ; Test LDP/STP fold.
216
+ define void @ldp_stp_nxv16i8_offset (ptr %ldptr , ptr %stptr ) {
217
+ ; CHECK-128-LABEL: ldp_stp_nxv16i8_offset:
218
+ ; CHECK-128: // %bb.0:
219
+ ; CHECK-128-NEXT: ldp q0, q1, [x0, #-16]
220
+ ; CHECK-128-NEXT: stp q0, q1, [x1, #-16]
221
+ ; CHECK-128-NEXT: ret
222
+ ;
223
+ ; CHECK-BE-128-LABEL: ldp_stp_nxv16i8_offset:
224
+ ; CHECK-BE-128: // %bb.0:
225
+ ; CHECK-BE-128-NEXT: ptrue p0.b
226
+ ; CHECK-BE-128-NEXT: mov x8, #-16 // =0xfffffffffffffff0
227
+ ; CHECK-BE-128-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
228
+ ; CHECK-BE-128-NEXT: ld1b { z1.b }, p0/z, [x0]
229
+ ; CHECK-BE-128-NEXT: st1b { z0.b }, p0, [x1, x8]
230
+ ; CHECK-BE-128-NEXT: st1b { z1.b }, p0, [x1]
231
+ ; CHECK-BE-128-NEXT: ret
232
+ %ldptr.1 = getelementptr inbounds i8 , ptr %ldptr , i64 -16
233
+ %ld1 = load <vscale x 16 x i8 >, ptr %ldptr.1 , align 1
234
+ %ld2 = load <vscale x 16 x i8 >, ptr %ldptr , align 1
235
+ %stptr.1 = getelementptr inbounds i8 , ptr %stptr , i64 -16
236
+ store <vscale x 16 x i8 > %ld1 , ptr %stptr.1 , align 1
237
+ store <vscale x 16 x i8 > %ld2 , ptr %stptr , align 1
238
+ ret void
239
+ }
0 commit comments