Skip to content

Commit 46ff959

Browse files
committed
[CodeGen] Increase NumVisited limit in TwoAddressInstructionPass to 64
Now that hardware has progressed, we can greatly increase the limit to something larger, allowing room for more optimization.
1 parent f4c1e87 commit 46ff959

32 files changed

+16344
-16571
lines changed

llvm/lib/CodeGen/TwoAddressInstructionPass.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -921,7 +921,7 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill(
921921
// Debug or pseudo instructions cannot be counted against the limit.
922922
if (OtherMI.isDebugOrPseudoInstr())
923923
continue;
924-
if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
924+
if (NumVisited > 64)
925925
return false;
926926
++NumVisited;
927927
if (OtherMI.hasUnmodeledSideEffects() || OtherMI.isCall() ||
@@ -1094,7 +1094,7 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI(
10941094
// Debug or pseudo instructions cannot be counted against the limit.
10951095
if (OtherMI.isDebugOrPseudoInstr())
10961096
continue;
1097-
if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
1097+
if (NumVisited > 64)
10981098
return false;
10991099
++NumVisited;
11001100
if (OtherMI.hasUnmodeledSideEffects() || OtherMI.isCall() ||

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll

+92-108
Original file line numberDiff line numberDiff line change
@@ -236,22 +236,20 @@ define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
236236
; CHECK-NEXT: sunpklo z4.d, z2.s
237237
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
238238
; CHECK-NEXT: sunpklo z0.s, z0.h
239-
; CHECK-NEXT: mov z7.d, z1.d
240-
; CHECK-NEXT: sunpklo z2.d, z2.s
239+
; CHECK-NEXT: sunpklo z7.d, z1.s
240+
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
241241
; CHECK-NEXT: sunpklo z5.d, z3.s
242242
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
243-
; CHECK-NEXT: ext z7.b, z7.b, z1.b, #8
243+
; CHECK-NEXT: sunpklo z2.d, z2.s
244244
; CHECK-NEXT: sunpklo z1.d, z1.s
245-
; CHECK-NEXT: mov z6.d, z0.d
245+
; CHECK-NEXT: sunpklo z6.d, z0.s
246+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
246247
; CHECK-NEXT: sunpklo z3.d, z3.s
247248
; CHECK-NEXT: stp q4, q2, [x0]
248-
; CHECK-NEXT: sunpklo z4.d, z7.s
249-
; CHECK-NEXT: ext z6.b, z6.b, z0.b, #8
250249
; CHECK-NEXT: sunpklo z0.d, z0.s
250+
; CHECK-NEXT: stp q7, q1, [x0, #32]
251251
; CHECK-NEXT: stp q5, q3, [x0, #64]
252-
; CHECK-NEXT: sunpklo z2.d, z6.s
253-
; CHECK-NEXT: stp q1, q4, [x0, #32]
254-
; CHECK-NEXT: stp q0, q2, [x0, #96]
252+
; CHECK-NEXT: stp q6, q0, [x0, #96]
255253
; CHECK-NEXT: ret
256254
%b = sext <16 x i8> %a to <16 x i64>
257255
store <16 x i64> %b, ptr %out
@@ -263,63 +261,57 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
263261
; CHECK: // %bb.0:
264262
; CHECK-NEXT: ldp q1, q0, [x0]
265263
; CHECK-NEXT: add z0.b, z0.b, z0.b
266-
; CHECK-NEXT: add z1.b, z1.b, z1.b
267-
; CHECK-NEXT: mov z2.d, z0.d
268-
; CHECK-NEXT: sunpklo z0.h, z0.b
269-
; CHECK-NEXT: mov z3.d, z1.d
270-
; CHECK-NEXT: sunpklo z1.h, z1.b
264+
; CHECK-NEXT: add z2.b, z1.b, z1.b
265+
; CHECK-NEXT: sunpklo z3.h, z0.b
266+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
267+
; CHECK-NEXT: sunpklo z1.h, z2.b
271268
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
269+
; CHECK-NEXT: sunpklo z0.h, z0.b
270+
; CHECK-NEXT: sunpklo z4.s, z3.h
272271
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
273-
; CHECK-NEXT: sunpklo z4.s, z0.h
274-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
275272
; CHECK-NEXT: sunpklo z5.s, z1.h
276-
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
277273
; CHECK-NEXT: sunpklo z2.h, z2.b
278-
; CHECK-NEXT: sunpklo z3.h, z3.b
279-
; CHECK-NEXT: sunpklo z0.s, z0.h
280-
; CHECK-NEXT: sunpklo z16.d, z4.s
274+
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
275+
; CHECK-NEXT: sunpklo z6.s, z0.h
276+
; CHECK-NEXT: sunpklo z3.s, z3.h
277+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
278+
; CHECK-NEXT: sunpklo z7.d, z4.s
281279
; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8
282-
; CHECK-NEXT: sunpklo z1.s, z1.h
283-
; CHECK-NEXT: sunpklo z17.d, z5.s
280+
; CHECK-NEXT: sunpklo z16.d, z5.s
284281
; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8
285-
; CHECK-NEXT: sunpklo z6.s, z2.h
286-
; CHECK-NEXT: sunpklo z7.s, z3.h
282+
; CHECK-NEXT: sunpklo z17.s, z2.h
287283
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
288-
; CHECK-NEXT: sunpklo z4.d, z4.s
284+
; CHECK-NEXT: sunpklo z1.s, z1.h
285+
; CHECK-NEXT: sunpklo z0.s, z0.h
286+
; CHECK-NEXT: sunpklo z18.d, z6.s
287+
; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
288+
; CHECK-NEXT: sunpklo z19.d, z3.s
289289
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
290-
; CHECK-NEXT: sunpklo z19.d, z0.s
290+
; CHECK-NEXT: sunpklo z4.d, z4.s
291291
; CHECK-NEXT: sunpklo z5.d, z5.s
292-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
293292
; CHECK-NEXT: sunpklo z2.s, z2.h
294-
; CHECK-NEXT: sunpklo z18.d, z6.s
295-
; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
296-
; CHECK-NEXT: sunpklo z3.s, z3.h
297-
; CHECK-NEXT: stp q16, q4, [x1, #128]
298-
; CHECK-NEXT: mov z16.d, z7.d
299-
; CHECK-NEXT: sunpklo z0.d, z0.s
300-
; CHECK-NEXT: stp q17, q5, [x1]
301-
; CHECK-NEXT: sunpklo z5.d, z7.s
302-
; CHECK-NEXT: sunpklo z4.d, z6.s
303-
; CHECK-NEXT: mov z6.d, z1.d
304-
; CHECK-NEXT: ext z16.b, z16.b, z7.b, #8
305-
; CHECK-NEXT: mov z7.d, z2.d
306-
; CHECK-NEXT: stp q19, q0, [x1, #160]
307-
; CHECK-NEXT: sunpklo z0.d, z2.s
308-
; CHECK-NEXT: ext z6.b, z6.b, z1.b, #8
309-
; CHECK-NEXT: sunpklo z1.d, z1.s
310-
; CHECK-NEXT: stp q18, q4, [x1, #192]
311-
; CHECK-NEXT: mov z4.d, z3.d
312-
; CHECK-NEXT: ext z7.b, z7.b, z2.b, #8
313-
; CHECK-NEXT: sunpklo z16.d, z16.s
314293
; CHECK-NEXT: sunpklo z6.d, z6.s
315-
; CHECK-NEXT: ext z4.b, z4.b, z3.b, #8
316-
; CHECK-NEXT: sunpklo z2.d, z7.s
317294
; CHECK-NEXT: sunpklo z3.d, z3.s
318-
; CHECK-NEXT: stp q5, q16, [x1, #64]
319-
; CHECK-NEXT: stp q1, q6, [x1, #32]
320-
; CHECK-NEXT: sunpklo z1.d, z4.s
321-
; CHECK-NEXT: stp q0, q2, [x1, #224]
322-
; CHECK-NEXT: stp q3, q1, [x1, #96]
295+
; CHECK-NEXT: stp q16, q5, [x1]
296+
; CHECK-NEXT: sunpklo z5.d, z1.s
297+
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
298+
; CHECK-NEXT: stp q7, q4, [x1, #128]
299+
; CHECK-NEXT: sunpklo z4.d, z17.s
300+
; CHECK-NEXT: ext z17.b, z17.b, z17.b, #8
301+
; CHECK-NEXT: stp q18, q6, [x1, #192]
302+
; CHECK-NEXT: sunpklo z6.d, z0.s
303+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
304+
; CHECK-NEXT: stp q19, q3, [x1, #160]
305+
; CHECK-NEXT: sunpklo z3.d, z2.s
306+
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
307+
; CHECK-NEXT: sunpklo z7.d, z17.s
308+
; CHECK-NEXT: sunpklo z1.d, z1.s
309+
; CHECK-NEXT: sunpklo z0.d, z0.s
310+
; CHECK-NEXT: sunpklo z2.d, z2.s
311+
; CHECK-NEXT: stp q5, q1, [x1, #32]
312+
; CHECK-NEXT: stp q4, q7, [x1, #64]
313+
; CHECK-NEXT: stp q3, q2, [x1, #96]
314+
; CHECK-NEXT: stp q6, q0, [x1, #224]
323315
; CHECK-NEXT: ret
324316
%a = load <32 x i8>, ptr %in
325317
%b = add <32 x i8> %a, %a
@@ -661,22 +653,20 @@ define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
661653
; CHECK-NEXT: uunpklo z4.d, z2.s
662654
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
663655
; CHECK-NEXT: uunpklo z0.s, z0.h
664-
; CHECK-NEXT: mov z7.d, z1.d
665-
; CHECK-NEXT: uunpklo z2.d, z2.s
656+
; CHECK-NEXT: uunpklo z7.d, z1.s
657+
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
666658
; CHECK-NEXT: uunpklo z5.d, z3.s
667659
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
668-
; CHECK-NEXT: ext z7.b, z7.b, z1.b, #8
660+
; CHECK-NEXT: uunpklo z2.d, z2.s
669661
; CHECK-NEXT: uunpklo z1.d, z1.s
670-
; CHECK-NEXT: mov z6.d, z0.d
662+
; CHECK-NEXT: uunpklo z6.d, z0.s
663+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
671664
; CHECK-NEXT: uunpklo z3.d, z3.s
672665
; CHECK-NEXT: stp q4, q2, [x0]
673-
; CHECK-NEXT: uunpklo z4.d, z7.s
674-
; CHECK-NEXT: ext z6.b, z6.b, z0.b, #8
675666
; CHECK-NEXT: uunpklo z0.d, z0.s
667+
; CHECK-NEXT: stp q7, q1, [x0, #32]
676668
; CHECK-NEXT: stp q5, q3, [x0, #64]
677-
; CHECK-NEXT: uunpklo z2.d, z6.s
678-
; CHECK-NEXT: stp q1, q4, [x0, #32]
679-
; CHECK-NEXT: stp q0, q2, [x0, #96]
669+
; CHECK-NEXT: stp q6, q0, [x0, #96]
680670
; CHECK-NEXT: ret
681671
%b = zext <16 x i8> %a to <16 x i64>
682672
store <16 x i64> %b, ptr %out
@@ -688,63 +678,57 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) {
688678
; CHECK: // %bb.0:
689679
; CHECK-NEXT: ldp q1, q0, [x0]
690680
; CHECK-NEXT: add z0.b, z0.b, z0.b
691-
; CHECK-NEXT: add z1.b, z1.b, z1.b
692-
; CHECK-NEXT: mov z2.d, z0.d
693-
; CHECK-NEXT: uunpklo z0.h, z0.b
694-
; CHECK-NEXT: mov z3.d, z1.d
695-
; CHECK-NEXT: uunpklo z1.h, z1.b
681+
; CHECK-NEXT: add z2.b, z1.b, z1.b
682+
; CHECK-NEXT: uunpklo z3.h, z0.b
683+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
684+
; CHECK-NEXT: uunpklo z1.h, z2.b
696685
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
686+
; CHECK-NEXT: uunpklo z0.h, z0.b
687+
; CHECK-NEXT: uunpklo z4.s, z3.h
697688
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
698-
; CHECK-NEXT: uunpklo z4.s, z0.h
699-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
700689
; CHECK-NEXT: uunpklo z5.s, z1.h
701-
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
702690
; CHECK-NEXT: uunpklo z2.h, z2.b
703-
; CHECK-NEXT: uunpklo z3.h, z3.b
704-
; CHECK-NEXT: uunpklo z0.s, z0.h
705-
; CHECK-NEXT: uunpklo z16.d, z4.s
691+
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
692+
; CHECK-NEXT: uunpklo z6.s, z0.h
693+
; CHECK-NEXT: uunpklo z3.s, z3.h
694+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
695+
; CHECK-NEXT: uunpklo z7.d, z4.s
706696
; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8
707-
; CHECK-NEXT: uunpklo z1.s, z1.h
708-
; CHECK-NEXT: uunpklo z17.d, z5.s
697+
; CHECK-NEXT: uunpklo z16.d, z5.s
709698
; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8
710-
; CHECK-NEXT: uunpklo z6.s, z2.h
711-
; CHECK-NEXT: uunpklo z7.s, z3.h
699+
; CHECK-NEXT: uunpklo z17.s, z2.h
712700
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
713-
; CHECK-NEXT: uunpklo z4.d, z4.s
701+
; CHECK-NEXT: uunpklo z1.s, z1.h
702+
; CHECK-NEXT: uunpklo z0.s, z0.h
703+
; CHECK-NEXT: uunpklo z18.d, z6.s
704+
; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
705+
; CHECK-NEXT: uunpklo z19.d, z3.s
714706
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
715-
; CHECK-NEXT: uunpklo z19.d, z0.s
707+
; CHECK-NEXT: uunpklo z4.d, z4.s
716708
; CHECK-NEXT: uunpklo z5.d, z5.s
717-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
718709
; CHECK-NEXT: uunpklo z2.s, z2.h
719-
; CHECK-NEXT: uunpklo z18.d, z6.s
720-
; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
721-
; CHECK-NEXT: uunpklo z3.s, z3.h
722-
; CHECK-NEXT: stp q16, q4, [x1, #128]
723-
; CHECK-NEXT: mov z16.d, z7.d
724-
; CHECK-NEXT: uunpklo z0.d, z0.s
725-
; CHECK-NEXT: stp q17, q5, [x1]
726-
; CHECK-NEXT: uunpklo z5.d, z7.s
727-
; CHECK-NEXT: uunpklo z4.d, z6.s
728-
; CHECK-NEXT: mov z6.d, z1.d
729-
; CHECK-NEXT: ext z16.b, z16.b, z7.b, #8
730-
; CHECK-NEXT: mov z7.d, z2.d
731-
; CHECK-NEXT: stp q19, q0, [x1, #160]
732-
; CHECK-NEXT: uunpklo z0.d, z2.s
733-
; CHECK-NEXT: ext z6.b, z6.b, z1.b, #8
734-
; CHECK-NEXT: uunpklo z1.d, z1.s
735-
; CHECK-NEXT: stp q18, q4, [x1, #192]
736-
; CHECK-NEXT: mov z4.d, z3.d
737-
; CHECK-NEXT: ext z7.b, z7.b, z2.b, #8
738-
; CHECK-NEXT: uunpklo z16.d, z16.s
739710
; CHECK-NEXT: uunpklo z6.d, z6.s
740-
; CHECK-NEXT: ext z4.b, z4.b, z3.b, #8
741-
; CHECK-NEXT: uunpklo z2.d, z7.s
742711
; CHECK-NEXT: uunpklo z3.d, z3.s
743-
; CHECK-NEXT: stp q5, q16, [x1, #64]
744-
; CHECK-NEXT: stp q1, q6, [x1, #32]
745-
; CHECK-NEXT: uunpklo z1.d, z4.s
746-
; CHECK-NEXT: stp q0, q2, [x1, #224]
747-
; CHECK-NEXT: stp q3, q1, [x1, #96]
712+
; CHECK-NEXT: stp q16, q5, [x1]
713+
; CHECK-NEXT: uunpklo z5.d, z1.s
714+
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
715+
; CHECK-NEXT: stp q7, q4, [x1, #128]
716+
; CHECK-NEXT: uunpklo z4.d, z17.s
717+
; CHECK-NEXT: ext z17.b, z17.b, z17.b, #8
718+
; CHECK-NEXT: stp q18, q6, [x1, #192]
719+
; CHECK-NEXT: uunpklo z6.d, z0.s
720+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
721+
; CHECK-NEXT: stp q19, q3, [x1, #160]
722+
; CHECK-NEXT: uunpklo z3.d, z2.s
723+
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
724+
; CHECK-NEXT: uunpklo z7.d, z17.s
725+
; CHECK-NEXT: uunpklo z1.d, z1.s
726+
; CHECK-NEXT: uunpklo z0.d, z0.s
727+
; CHECK-NEXT: uunpklo z2.d, z2.s
728+
; CHECK-NEXT: stp q5, q1, [x1, #32]
729+
; CHECK-NEXT: stp q4, q7, [x1, #64]
730+
; CHECK-NEXT: stp q3, q2, [x1, #96]
731+
; CHECK-NEXT: stp q6, q0, [x1, #224]
748732
; CHECK-NEXT: ret
749733
%a = load <32 x i8>, ptr %in
750734
%b = add <32 x i8> %a, %a

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll

+34-44
Original file line numberDiff line numberDiff line change
@@ -207,36 +207,31 @@ define void @ucvtf_v16i16_v16f64(ptr %a, ptr %b) {
207207
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
208208
; CHECK-NEXT: uunpklo z0.s, z0.h
209209
; CHECK-NEXT: uunpklo z1.s, z1.h
210-
; CHECK-NEXT: mov z4.d, z2.d
211-
; CHECK-NEXT: mov z7.d, z3.d
212-
; CHECK-NEXT: mov z5.d, z0.d
213-
; CHECK-NEXT: ext z4.b, z4.b, z2.b, #8
210+
; CHECK-NEXT: uunpklo z4.d, z2.s
211+
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
212+
; CHECK-NEXT: uunpklo z7.d, z3.s
213+
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
214+
; CHECK-NEXT: uunpklo z5.d, z0.s
215+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
214216
; CHECK-NEXT: uunpklo z2.d, z2.s
215-
; CHECK-NEXT: mov z6.d, z1.d
216-
; CHECK-NEXT: ext z7.b, z7.b, z3.b, #8
217+
; CHECK-NEXT: uunpklo z6.d, z1.s
218+
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
217219
; CHECK-NEXT: uunpklo z3.d, z3.s
218-
; CHECK-NEXT: ext z5.b, z5.b, z0.b, #8
219-
; CHECK-NEXT: uunpklo z4.d, z4.s
220+
; CHECK-NEXT: ucvtf z4.d, p0/m, z4.d
220221
; CHECK-NEXT: uunpklo z0.d, z0.s
221-
; CHECK-NEXT: ext z6.b, z6.b, z1.b, #8
222222
; CHECK-NEXT: uunpklo z1.d, z1.s
223223
; CHECK-NEXT: ucvtf z2.d, p0/m, z2.d
224+
; CHECK-NEXT: ucvtf z5.d, p0/m, z5.d
225+
; CHECK-NEXT: ucvtf z6.d, p0/m, z6.d
224226
; CHECK-NEXT: ucvtf z3.d, p0/m, z3.d
225-
; CHECK-NEXT: uunpklo z7.d, z7.s
226-
; CHECK-NEXT: uunpklo z5.d, z5.s
227-
; CHECK-NEXT: ucvtf z4.d, p0/m, z4.d
228227
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
229-
; CHECK-NEXT: uunpklo z6.d, z6.s
230228
; CHECK-NEXT: ucvtf z1.d, p0/m, z1.d
231-
; CHECK-NEXT: ucvtf z5.d, p0/m, z5.d
232-
; CHECK-NEXT: stp q2, q4, [x1, #64]
233-
; CHECK-NEXT: movprfx z2, z6
234-
; CHECK-NEXT: ucvtf z2.d, p0/m, z6.d
235-
; CHECK-NEXT: stp q1, q2, [x1, #32]
236-
; CHECK-NEXT: stp q0, q5, [x1, #96]
237-
; CHECK-NEXT: movprfx z0, z7
238-
; CHECK-NEXT: ucvtf z0.d, p0/m, z7.d
239-
; CHECK-NEXT: stp q3, q0, [x1]
229+
; CHECK-NEXT: stp q4, q2, [x1, #64]
230+
; CHECK-NEXT: movprfx z2, z7
231+
; CHECK-NEXT: ucvtf z2.d, p0/m, z7.d
232+
; CHECK-NEXT: stp q2, q3, [x1]
233+
; CHECK-NEXT: stp q5, q0, [x1, #96]
234+
; CHECK-NEXT: stp q6, q1, [x1, #32]
240235
; CHECK-NEXT: ret
241236
%op1 = load <16 x i16>, ptr %a
242237
%res = uitofp <16 x i16> %op1 to <16 x double>
@@ -780,36 +775,31 @@ define void @scvtf_v16i16_v16f64(ptr %a, ptr %b) {
780775
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
781776
; CHECK-NEXT: sunpklo z0.s, z0.h
782777
; CHECK-NEXT: sunpklo z1.s, z1.h
783-
; CHECK-NEXT: mov z4.d, z2.d
784-
; CHECK-NEXT: mov z7.d, z3.d
785-
; CHECK-NEXT: mov z5.d, z0.d
786-
; CHECK-NEXT: ext z4.b, z4.b, z2.b, #8
778+
; CHECK-NEXT: sunpklo z4.d, z2.s
779+
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
780+
; CHECK-NEXT: sunpklo z7.d, z3.s
781+
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
782+
; CHECK-NEXT: sunpklo z5.d, z0.s
783+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
787784
; CHECK-NEXT: sunpklo z2.d, z2.s
788-
; CHECK-NEXT: mov z6.d, z1.d
789-
; CHECK-NEXT: ext z7.b, z7.b, z3.b, #8
785+
; CHECK-NEXT: sunpklo z6.d, z1.s
786+
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
790787
; CHECK-NEXT: sunpklo z3.d, z3.s
791-
; CHECK-NEXT: ext z5.b, z5.b, z0.b, #8
792-
; CHECK-NEXT: sunpklo z4.d, z4.s
788+
; CHECK-NEXT: scvtf z4.d, p0/m, z4.d
793789
; CHECK-NEXT: sunpklo z0.d, z0.s
794-
; CHECK-NEXT: ext z6.b, z6.b, z1.b, #8
795790
; CHECK-NEXT: sunpklo z1.d, z1.s
796791
; CHECK-NEXT: scvtf z2.d, p0/m, z2.d
792+
; CHECK-NEXT: scvtf z5.d, p0/m, z5.d
793+
; CHECK-NEXT: scvtf z6.d, p0/m, z6.d
797794
; CHECK-NEXT: scvtf z3.d, p0/m, z3.d
798-
; CHECK-NEXT: sunpklo z7.d, z7.s
799-
; CHECK-NEXT: sunpklo z5.d, z5.s
800-
; CHECK-NEXT: scvtf z4.d, p0/m, z4.d
801795
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
802-
; CHECK-NEXT: sunpklo z6.d, z6.s
803796
; CHECK-NEXT: scvtf z1.d, p0/m, z1.d
804-
; CHECK-NEXT: scvtf z5.d, p0/m, z5.d
805-
; CHECK-NEXT: stp q2, q4, [x1, #64]
806-
; CHECK-NEXT: movprfx z2, z6
807-
; CHECK-NEXT: scvtf z2.d, p0/m, z6.d
808-
; CHECK-NEXT: stp q1, q2, [x1, #32]
809-
; CHECK-NEXT: stp q0, q5, [x1, #96]
810-
; CHECK-NEXT: movprfx z0, z7
811-
; CHECK-NEXT: scvtf z0.d, p0/m, z7.d
812-
; CHECK-NEXT: stp q3, q0, [x1]
797+
; CHECK-NEXT: stp q4, q2, [x1, #64]
798+
; CHECK-NEXT: movprfx z2, z7
799+
; CHECK-NEXT: scvtf z2.d, p0/m, z7.d
800+
; CHECK-NEXT: stp q2, q3, [x1]
801+
; CHECK-NEXT: stp q5, q0, [x1, #96]
802+
; CHECK-NEXT: stp q6, q1, [x1, #32]
813803
; CHECK-NEXT: ret
814804
%op1 = load <16 x i16>, ptr %a
815805
%res = sitofp <16 x i16> %op1 to <16 x double>

0 commit comments

Comments
 (0)