@@ -236,22 +236,20 @@ define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
236
236
; CHECK-NEXT: sunpklo z4.d, z2.s
237
237
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
238
238
; CHECK-NEXT: sunpklo z0.s, z0.h
239
- ; CHECK-NEXT: mov z7.d, z1.d
240
- ; CHECK-NEXT: sunpklo z2.d, z2.s
239
+ ; CHECK-NEXT: sunpklo z7.d, z1.s
240
+ ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
241
241
; CHECK-NEXT: sunpklo z5.d, z3.s
242
242
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
243
- ; CHECK-NEXT: ext z7.b, z7.b, z1.b, #8
243
+ ; CHECK-NEXT: sunpklo z2.d, z2.s
244
244
; CHECK-NEXT: sunpklo z1.d, z1.s
245
- ; CHECK-NEXT: mov z6.d, z0.d
245
+ ; CHECK-NEXT: sunpklo z6.d, z0.s
246
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
246
247
; CHECK-NEXT: sunpklo z3.d, z3.s
247
248
; CHECK-NEXT: stp q4, q2, [x0]
248
- ; CHECK-NEXT: sunpklo z4.d, z7.s
249
- ; CHECK-NEXT: ext z6.b, z6.b, z0.b, #8
250
249
; CHECK-NEXT: sunpklo z0.d, z0.s
250
+ ; CHECK-NEXT: stp q7, q1, [x0, #32]
251
251
; CHECK-NEXT: stp q5, q3, [x0, #64]
252
- ; CHECK-NEXT: sunpklo z2.d, z6.s
253
- ; CHECK-NEXT: stp q1, q4, [x0, #32]
254
- ; CHECK-NEXT: stp q0, q2, [x0, #96]
252
+ ; CHECK-NEXT: stp q6, q0, [x0, #96]
255
253
; CHECK-NEXT: ret
256
254
%b = sext <16 x i8 > %a to <16 x i64 >
257
255
store <16 x i64 > %b , ptr %out
@@ -263,63 +261,57 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
263
261
; CHECK: // %bb.0:
264
262
; CHECK-NEXT: ldp q1, q0, [x0]
265
263
; CHECK-NEXT: add z0.b, z0.b, z0.b
266
- ; CHECK-NEXT: add z1.b, z1.b, z1.b
267
- ; CHECK-NEXT: mov z2.d, z0.d
268
- ; CHECK-NEXT: sunpklo z0.h, z0.b
269
- ; CHECK-NEXT: mov z3.d, z1.d
270
- ; CHECK-NEXT: sunpklo z1.h, z1.b
264
+ ; CHECK-NEXT: add z2.b, z1.b, z1.b
265
+ ; CHECK-NEXT: sunpklo z3.h, z0.b
266
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
267
+ ; CHECK-NEXT: sunpklo z1.h, z2.b
271
268
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
269
+ ; CHECK-NEXT: sunpklo z0.h, z0.b
270
+ ; CHECK-NEXT: sunpklo z4.s, z3.h
272
271
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
273
- ; CHECK-NEXT: sunpklo z4.s, z0.h
274
- ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
275
272
; CHECK-NEXT: sunpklo z5.s, z1.h
276
- ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
277
273
; CHECK-NEXT: sunpklo z2.h, z2.b
278
- ; CHECK-NEXT: sunpklo z3.h, z3.b
279
- ; CHECK-NEXT: sunpklo z0.s, z0.h
280
- ; CHECK-NEXT: sunpklo z16.d, z4.s
274
+ ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
275
+ ; CHECK-NEXT: sunpklo z6.s, z0.h
276
+ ; CHECK-NEXT: sunpklo z3.s, z3.h
277
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
278
+ ; CHECK-NEXT: sunpklo z7.d, z4.s
281
279
; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8
282
- ; CHECK-NEXT: sunpklo z1.s, z1.h
283
- ; CHECK-NEXT: sunpklo z17.d, z5.s
280
+ ; CHECK-NEXT: sunpklo z16.d, z5.s
284
281
; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8
285
- ; CHECK-NEXT: sunpklo z6.s, z2.h
286
- ; CHECK-NEXT: sunpklo z7.s, z3.h
282
+ ; CHECK-NEXT: sunpklo z17.s, z2.h
287
283
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
288
- ; CHECK-NEXT: sunpklo z4.d, z4.s
284
+ ; CHECK-NEXT: sunpklo z1.s, z1.h
285
+ ; CHECK-NEXT: sunpklo z0.s, z0.h
286
+ ; CHECK-NEXT: sunpklo z18.d, z6.s
287
+ ; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
288
+ ; CHECK-NEXT: sunpklo z19.d, z3.s
289
289
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
290
- ; CHECK-NEXT: sunpklo z19 .d, z0 .s
290
+ ; CHECK-NEXT: sunpklo z4 .d, z4 .s
291
291
; CHECK-NEXT: sunpklo z5.d, z5.s
292
- ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
293
292
; CHECK-NEXT: sunpklo z2.s, z2.h
294
- ; CHECK-NEXT: sunpklo z18.d, z6.s
295
- ; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
296
- ; CHECK-NEXT: sunpklo z3.s, z3.h
297
- ; CHECK-NEXT: stp q16, q4, [x1, #128]
298
- ; CHECK-NEXT: mov z16.d, z7.d
299
- ; CHECK-NEXT: sunpklo z0.d, z0.s
300
- ; CHECK-NEXT: stp q17, q5, [x1]
301
- ; CHECK-NEXT: sunpklo z5.d, z7.s
302
- ; CHECK-NEXT: sunpklo z4.d, z6.s
303
- ; CHECK-NEXT: mov z6.d, z1.d
304
- ; CHECK-NEXT: ext z16.b, z16.b, z7.b, #8
305
- ; CHECK-NEXT: mov z7.d, z2.d
306
- ; CHECK-NEXT: stp q19, q0, [x1, #160]
307
- ; CHECK-NEXT: sunpklo z0.d, z2.s
308
- ; CHECK-NEXT: ext z6.b, z6.b, z1.b, #8
309
- ; CHECK-NEXT: sunpklo z1.d, z1.s
310
- ; CHECK-NEXT: stp q18, q4, [x1, #192]
311
- ; CHECK-NEXT: mov z4.d, z3.d
312
- ; CHECK-NEXT: ext z7.b, z7.b, z2.b, #8
313
- ; CHECK-NEXT: sunpklo z16.d, z16.s
314
293
; CHECK-NEXT: sunpklo z6.d, z6.s
315
- ; CHECK-NEXT: ext z4.b, z4.b, z3.b, #8
316
- ; CHECK-NEXT: sunpklo z2.d, z7.s
317
294
; CHECK-NEXT: sunpklo z3.d, z3.s
318
- ; CHECK-NEXT: stp q5, q16, [x1, #64]
319
- ; CHECK-NEXT: stp q1, q6, [x1, #32]
320
- ; CHECK-NEXT: sunpklo z1.d, z4.s
321
- ; CHECK-NEXT: stp q0, q2, [x1, #224]
322
- ; CHECK-NEXT: stp q3, q1, [x1, #96]
295
+ ; CHECK-NEXT: stp q16, q5, [x1]
296
+ ; CHECK-NEXT: sunpklo z5.d, z1.s
297
+ ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
298
+ ; CHECK-NEXT: stp q7, q4, [x1, #128]
299
+ ; CHECK-NEXT: sunpklo z4.d, z17.s
300
+ ; CHECK-NEXT: ext z17.b, z17.b, z17.b, #8
301
+ ; CHECK-NEXT: stp q18, q6, [x1, #192]
302
+ ; CHECK-NEXT: sunpklo z6.d, z0.s
303
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
304
+ ; CHECK-NEXT: stp q19, q3, [x1, #160]
305
+ ; CHECK-NEXT: sunpklo z3.d, z2.s
306
+ ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
307
+ ; CHECK-NEXT: sunpklo z7.d, z17.s
308
+ ; CHECK-NEXT: sunpklo z1.d, z1.s
309
+ ; CHECK-NEXT: sunpklo z0.d, z0.s
310
+ ; CHECK-NEXT: sunpklo z2.d, z2.s
311
+ ; CHECK-NEXT: stp q5, q1, [x1, #32]
312
+ ; CHECK-NEXT: stp q4, q7, [x1, #64]
313
+ ; CHECK-NEXT: stp q3, q2, [x1, #96]
314
+ ; CHECK-NEXT: stp q6, q0, [x1, #224]
323
315
; CHECK-NEXT: ret
324
316
%a = load <32 x i8 >, ptr %in
325
317
%b = add <32 x i8 > %a , %a
@@ -661,22 +653,20 @@ define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
661
653
; CHECK-NEXT: uunpklo z4.d, z2.s
662
654
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
663
655
; CHECK-NEXT: uunpklo z0.s, z0.h
664
- ; CHECK-NEXT: mov z7.d, z1.d
665
- ; CHECK-NEXT: uunpklo z2.d, z2.s
656
+ ; CHECK-NEXT: uunpklo z7.d, z1.s
657
+ ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
666
658
; CHECK-NEXT: uunpklo z5.d, z3.s
667
659
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
668
- ; CHECK-NEXT: ext z7.b, z7.b, z1.b, #8
660
+ ; CHECK-NEXT: uunpklo z2.d, z2.s
669
661
; CHECK-NEXT: uunpklo z1.d, z1.s
670
- ; CHECK-NEXT: mov z6.d, z0.d
662
+ ; CHECK-NEXT: uunpklo z6.d, z0.s
663
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
671
664
; CHECK-NEXT: uunpklo z3.d, z3.s
672
665
; CHECK-NEXT: stp q4, q2, [x0]
673
- ; CHECK-NEXT: uunpklo z4.d, z7.s
674
- ; CHECK-NEXT: ext z6.b, z6.b, z0.b, #8
675
666
; CHECK-NEXT: uunpklo z0.d, z0.s
667
+ ; CHECK-NEXT: stp q7, q1, [x0, #32]
676
668
; CHECK-NEXT: stp q5, q3, [x0, #64]
677
- ; CHECK-NEXT: uunpklo z2.d, z6.s
678
- ; CHECK-NEXT: stp q1, q4, [x0, #32]
679
- ; CHECK-NEXT: stp q0, q2, [x0, #96]
669
+ ; CHECK-NEXT: stp q6, q0, [x0, #96]
680
670
; CHECK-NEXT: ret
681
671
%b = zext <16 x i8 > %a to <16 x i64 >
682
672
store <16 x i64 > %b , ptr %out
@@ -688,63 +678,57 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) {
688
678
; CHECK: // %bb.0:
689
679
; CHECK-NEXT: ldp q1, q0, [x0]
690
680
; CHECK-NEXT: add z0.b, z0.b, z0.b
691
- ; CHECK-NEXT: add z1.b, z1.b, z1.b
692
- ; CHECK-NEXT: mov z2.d, z0.d
693
- ; CHECK-NEXT: uunpklo z0.h, z0.b
694
- ; CHECK-NEXT: mov z3.d, z1.d
695
- ; CHECK-NEXT: uunpklo z1.h, z1.b
681
+ ; CHECK-NEXT: add z2.b, z1.b, z1.b
682
+ ; CHECK-NEXT: uunpklo z3.h, z0.b
683
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
684
+ ; CHECK-NEXT: uunpklo z1.h, z2.b
696
685
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
686
+ ; CHECK-NEXT: uunpklo z0.h, z0.b
687
+ ; CHECK-NEXT: uunpklo z4.s, z3.h
697
688
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
698
- ; CHECK-NEXT: uunpklo z4.s, z0.h
699
- ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
700
689
; CHECK-NEXT: uunpklo z5.s, z1.h
701
- ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
702
690
; CHECK-NEXT: uunpklo z2.h, z2.b
703
- ; CHECK-NEXT: uunpklo z3.h, z3.b
704
- ; CHECK-NEXT: uunpklo z0.s, z0.h
705
- ; CHECK-NEXT: uunpklo z16.d, z4.s
691
+ ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
692
+ ; CHECK-NEXT: uunpklo z6.s, z0.h
693
+ ; CHECK-NEXT: uunpklo z3.s, z3.h
694
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
695
+ ; CHECK-NEXT: uunpklo z7.d, z4.s
706
696
; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8
707
- ; CHECK-NEXT: uunpklo z1.s, z1.h
708
- ; CHECK-NEXT: uunpklo z17.d, z5.s
697
+ ; CHECK-NEXT: uunpklo z16.d, z5.s
709
698
; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8
710
- ; CHECK-NEXT: uunpklo z6.s, z2.h
711
- ; CHECK-NEXT: uunpklo z7.s, z3.h
699
+ ; CHECK-NEXT: uunpklo z17.s, z2.h
712
700
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
713
- ; CHECK-NEXT: uunpklo z4.d, z4.s
701
+ ; CHECK-NEXT: uunpklo z1.s, z1.h
702
+ ; CHECK-NEXT: uunpklo z0.s, z0.h
703
+ ; CHECK-NEXT: uunpklo z18.d, z6.s
704
+ ; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
705
+ ; CHECK-NEXT: uunpklo z19.d, z3.s
714
706
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
715
- ; CHECK-NEXT: uunpklo z19 .d, z0 .s
707
+ ; CHECK-NEXT: uunpklo z4 .d, z4 .s
716
708
; CHECK-NEXT: uunpklo z5.d, z5.s
717
- ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
718
709
; CHECK-NEXT: uunpklo z2.s, z2.h
719
- ; CHECK-NEXT: uunpklo z18.d, z6.s
720
- ; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
721
- ; CHECK-NEXT: uunpklo z3.s, z3.h
722
- ; CHECK-NEXT: stp q16, q4, [x1, #128]
723
- ; CHECK-NEXT: mov z16.d, z7.d
724
- ; CHECK-NEXT: uunpklo z0.d, z0.s
725
- ; CHECK-NEXT: stp q17, q5, [x1]
726
- ; CHECK-NEXT: uunpklo z5.d, z7.s
727
- ; CHECK-NEXT: uunpklo z4.d, z6.s
728
- ; CHECK-NEXT: mov z6.d, z1.d
729
- ; CHECK-NEXT: ext z16.b, z16.b, z7.b, #8
730
- ; CHECK-NEXT: mov z7.d, z2.d
731
- ; CHECK-NEXT: stp q19, q0, [x1, #160]
732
- ; CHECK-NEXT: uunpklo z0.d, z2.s
733
- ; CHECK-NEXT: ext z6.b, z6.b, z1.b, #8
734
- ; CHECK-NEXT: uunpklo z1.d, z1.s
735
- ; CHECK-NEXT: stp q18, q4, [x1, #192]
736
- ; CHECK-NEXT: mov z4.d, z3.d
737
- ; CHECK-NEXT: ext z7.b, z7.b, z2.b, #8
738
- ; CHECK-NEXT: uunpklo z16.d, z16.s
739
710
; CHECK-NEXT: uunpklo z6.d, z6.s
740
- ; CHECK-NEXT: ext z4.b, z4.b, z3.b, #8
741
- ; CHECK-NEXT: uunpklo z2.d, z7.s
742
711
; CHECK-NEXT: uunpklo z3.d, z3.s
743
- ; CHECK-NEXT: stp q5, q16, [x1, #64]
744
- ; CHECK-NEXT: stp q1, q6, [x1, #32]
745
- ; CHECK-NEXT: uunpklo z1.d, z4.s
746
- ; CHECK-NEXT: stp q0, q2, [x1, #224]
747
- ; CHECK-NEXT: stp q3, q1, [x1, #96]
712
+ ; CHECK-NEXT: stp q16, q5, [x1]
713
+ ; CHECK-NEXT: uunpklo z5.d, z1.s
714
+ ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
715
+ ; CHECK-NEXT: stp q7, q4, [x1, #128]
716
+ ; CHECK-NEXT: uunpklo z4.d, z17.s
717
+ ; CHECK-NEXT: ext z17.b, z17.b, z17.b, #8
718
+ ; CHECK-NEXT: stp q18, q6, [x1, #192]
719
+ ; CHECK-NEXT: uunpklo z6.d, z0.s
720
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
721
+ ; CHECK-NEXT: stp q19, q3, [x1, #160]
722
+ ; CHECK-NEXT: uunpklo z3.d, z2.s
723
+ ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
724
+ ; CHECK-NEXT: uunpklo z7.d, z17.s
725
+ ; CHECK-NEXT: uunpklo z1.d, z1.s
726
+ ; CHECK-NEXT: uunpklo z0.d, z0.s
727
+ ; CHECK-NEXT: uunpklo z2.d, z2.s
728
+ ; CHECK-NEXT: stp q5, q1, [x1, #32]
729
+ ; CHECK-NEXT: stp q4, q7, [x1, #64]
730
+ ; CHECK-NEXT: stp q3, q2, [x1, #96]
731
+ ; CHECK-NEXT: stp q6, q0, [x1, #224]
748
732
; CHECK-NEXT: ret
749
733
%a = load <32 x i8 >, ptr %in
750
734
%b = add <32 x i8 > %a , %a
0 commit comments