[CodeGen] Increase NumVisited limit in TwoAddressInstructionPass to 64 #80627

AZero13 · 2024-02-05T02:01:38Z

Now that hardware has progressed, we can greatly increase the limit to something larger, allowing room for more optimization.

llvmbot · 2024-02-05T02:02:14Z

@llvm/pr-subscribers-backend-x86
@llvm/pr-subscribers-backend-arm

@llvm/pr-subscribers-backend-aarch64

Author: AtariDreams (AtariDreams)

Changes

Now that hardware has progressed, we do not need an arbitrary limit anymore.

Full diff: https://github.com/llvm/llvm-project/pull/80627.diff

3 Files Affected:

(modified) llvm/lib/CodeGen/TwoAddressInstructionPass.cpp (-8)
(modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll (+88-96)
(modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll (+32-34)

diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 74d7904aee33a..9e466391385cd 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -914,16 +914,12 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill(
   }
 
   // Check if the reschedule will not break dependencies.
-  unsigned NumVisited = 0;
   MachineBasicBlock::iterator KillPos = KillMI;
   ++KillPos;
   for (MachineInstr &OtherMI : make_range(End, KillPos)) {
     // Debug or pseudo instructions cannot be counted against the limit.
     if (OtherMI.isDebugOrPseudoInstr())
       continue;
-    if (NumVisited > 10)  // FIXME: Arbitrary limit to reduce compile time cost.
-      return false;
-    ++NumVisited;
     if (OtherMI.hasUnmodeledSideEffects() || OtherMI.isCall() ||
         OtherMI.isBranch() || OtherMI.isTerminator())
       // Don't move pass calls, etc.
@@ -1088,15 +1084,11 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI(
   }
 
   // Check if the reschedule will not break depedencies.
-  unsigned NumVisited = 0;
   for (MachineInstr &OtherMI :
        make_range(mi, MachineBasicBlock::iterator(KillMI))) {
     // Debug or pseudo instructions cannot be counted against the limit.
     if (OtherMI.isDebugOrPseudoInstr())
       continue;
-    if (NumVisited > 10)  // FIXME: Arbitrary limit to reduce compile time cost.
-      return false;
-    ++NumVisited;
     if (OtherMI.hasUnmodeledSideEffects() || OtherMI.isCall() ||
         OtherMI.isBranch() || OtherMI.isTerminator())
       // Don't move pass calls, etc.
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
index c7a89612d278f..68f09bf0e5932 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
@@ -236,22 +236,20 @@ define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
 ; CHECK-NEXT:    sunpklo z4.d, z2.s
 ; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    mov z7.d, z1.d
-; CHECK-NEXT:    sunpklo z2.d, z2.s
+; CHECK-NEXT:    sunpklo z7.d, z1.s
+; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    sunpklo z5.d, z3.s
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
-; CHECK-NEXT:    ext z7.b, z7.b, z1.b, #8
+; CHECK-NEXT:    sunpklo z2.d, z2.s
 ; CHECK-NEXT:    sunpklo z1.d, z1.s
-; CHECK-NEXT:    mov z6.d, z0.d
+; CHECK-NEXT:    sunpklo z6.d, z0.s
+; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z3.d, z3.s
 ; CHECK-NEXT:    stp q4, q2, [x0]
-; CHECK-NEXT:    sunpklo z4.d, z7.s
-; CHECK-NEXT:    ext z6.b, z6.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
+; CHECK-NEXT:    stp q7, q1, [x0, #32]
 ; CHECK-NEXT:    stp q5, q3, [x0, #64]
-; CHECK-NEXT:    sunpklo z2.d, z6.s
-; CHECK-NEXT:    stp q1, q4, [x0, #32]
-; CHECK-NEXT:    stp q0, q2, [x0, #96]
+; CHECK-NEXT:    stp q6, q0, [x0, #96]
 ; CHECK-NEXT:    ret
   %b = sext <16 x i8> %a to <16 x i64>
   store <16 x i64> %b, ptr %out
@@ -264,62 +262,60 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    add z0.b, z0.b, z0.b
 ; CHECK-NEXT:    add z1.b, z1.b, z1.b
-; CHECK-NEXT:    mov z2.d, z0.d
+; CHECK-NEXT:    sunpklo z2.h, z0.b
+; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    sunpklo z3.h, z1.b
+; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
-; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    sunpklo z4.s, z2.h
 ; CHECK-NEXT:    sunpklo z1.h, z1.b
+; CHECK-NEXT:    sunpklo z5.s, z3.h
 ; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
-; CHECK-NEXT:    sunpklo z4.s, z0.h
+; CHECK-NEXT:    sunpklo z6.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z5.s, z1.h
-; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
-; CHECK-NEXT:    sunpklo z2.h, z2.b
-; CHECK-NEXT:    sunpklo z3.h, z3.b
-; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    sunpklo z16.d, z4.s
+; CHECK-NEXT:    sunpklo z7.d, z4.s
 ; CHECK-NEXT:    ext z4.b, z4.b, z4.b, #8
-; CHECK-NEXT:    sunpklo z1.s, z1.h
+; CHECK-NEXT:    sunpklo z2.s, z2.h
+; CHECK-NEXT:    sunpklo z3.s, z3.h
+; CHECK-NEXT:    sunpklo z16.s, z1.h
 ; CHECK-NEXT:    sunpklo z17.d, z5.s
 ; CHECK-NEXT:    ext z5.b, z5.b, z5.b, #8
-; CHECK-NEXT:    sunpklo z6.s, z2.h
-; CHECK-NEXT:    sunpklo z7.s, z3.h
-; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
+; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
+; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    sunpklo z4.d, z4.s
-; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
-; CHECK-NEXT:    sunpklo z19.d, z0.s
-; CHECK-NEXT:    sunpklo z5.d, z5.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z2.s, z2.h
 ; CHECK-NEXT:    sunpklo z18.d, z6.s
 ; CHECK-NEXT:    ext z6.b, z6.b, z6.b, #8
-; CHECK-NEXT:    sunpklo z3.s, z3.h
-; CHECK-NEXT:    stp q16, q4, [x1, #128]
-; CHECK-NEXT:    mov z16.d, z7.d
-; CHECK-NEXT:    sunpklo z0.d, z0.s
-; CHECK-NEXT:    stp q17, q5, [x1]
-; CHECK-NEXT:    sunpklo z5.d, z7.s
-; CHECK-NEXT:    sunpklo z4.d, z6.s
-; CHECK-NEXT:    mov z6.d, z1.d
-; CHECK-NEXT:    ext z16.b, z16.b, z7.b, #8
+; CHECK-NEXT:    sunpklo z5.d, z5.s
+; CHECK-NEXT:    sunpklo z1.s, z1.h
+; CHECK-NEXT:    sunpklo z19.d, z16.s
+; CHECK-NEXT:    sunpklo z6.d, z6.s
+; CHECK-NEXT:    ext z16.b, z16.b, z16.b, #8
+; CHECK-NEXT:    stp q7, q4, [x1, #128]
 ; CHECK-NEXT:    mov z7.d, z2.d
-; CHECK-NEXT:    stp q19, q0, [x1, #160]
-; CHECK-NEXT:    sunpklo z0.d, z2.s
-; CHECK-NEXT:    ext z6.b, z6.b, z1.b, #8
-; CHECK-NEXT:    sunpklo z1.d, z1.s
-; CHECK-NEXT:    stp q18, q4, [x1, #192]
 ; CHECK-NEXT:    mov z4.d, z3.d
-; CHECK-NEXT:    ext z7.b, z7.b, z2.b, #8
+; CHECK-NEXT:    stp q17, q5, [x1]
+; CHECK-NEXT:    mov z5.d, z0.d
 ; CHECK-NEXT:    sunpklo z16.d, z16.s
-; CHECK-NEXT:    sunpklo z6.d, z6.s
+; CHECK-NEXT:    ext z7.b, z7.b, z2.b, #8
 ; CHECK-NEXT:    ext z4.b, z4.b, z3.b, #8
-; CHECK-NEXT:    sunpklo z2.d, z7.s
+; CHECK-NEXT:    stp q18, q6, [x1, #192]
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    sunpklo z2.d, z2.s
 ; CHECK-NEXT:    sunpklo z3.d, z3.s
-; CHECK-NEXT:    stp q5, q16, [x1, #64]
-; CHECK-NEXT:    stp q1, q6, [x1, #32]
-; CHECK-NEXT:    sunpklo z1.d, z4.s
+; CHECK-NEXT:    ext z5.b, z5.b, z0.b, #8
+; CHECK-NEXT:    sunpklo z0.d, z0.s
+; CHECK-NEXT:    sunpklo z7.d, z7.s
+; CHECK-NEXT:    sunpklo z4.d, z4.s
+; CHECK-NEXT:    stp q19, q16, [x1, #64]
+; CHECK-NEXT:    ext z6.b, z6.b, z1.b, #8
+; CHECK-NEXT:    sunpklo z1.d, z1.s
+; CHECK-NEXT:    stp q3, q4, [x1, #32]
+; CHECK-NEXT:    sunpklo z3.d, z6.s
+; CHECK-NEXT:    stp q2, q7, [x1, #160]
+; CHECK-NEXT:    sunpklo z2.d, z5.s
+; CHECK-NEXT:    stp q1, q3, [x1, #96]
 ; CHECK-NEXT:    stp q0, q2, [x1, #224]
-; CHECK-NEXT:    stp q3, q1, [x1, #96]
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %in
   %b = add <32 x i8> %a, %a
@@ -661,22 +657,20 @@ define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
 ; CHECK-NEXT:    uunpklo z4.d, z2.s
 ; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    mov z7.d, z1.d
-; CHECK-NEXT:    uunpklo z2.d, z2.s
+; CHECK-NEXT:    uunpklo z7.d, z1.s
+; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    uunpklo z5.d, z3.s
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
-; CHECK-NEXT:    ext z7.b, z7.b, z1.b, #8
+; CHECK-NEXT:    uunpklo z2.d, z2.s
 ; CHECK-NEXT:    uunpklo z1.d, z1.s
-; CHECK-NEXT:    mov z6.d, z0.d
+; CHECK-NEXT:    uunpklo z6.d, z0.s
+; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z3.d, z3.s
 ; CHECK-NEXT:    stp q4, q2, [x0]
-; CHECK-NEXT:    uunpklo z4.d, z7.s
-; CHECK-NEXT:    ext z6.b, z6.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    stp q7, q1, [x0, #32]
 ; CHECK-NEXT:    stp q5, q3, [x0, #64]
-; CHECK-NEXT:    uunpklo z2.d, z6.s
-; CHECK-NEXT:    stp q1, q4, [x0, #32]
-; CHECK-NEXT:    stp q0, q2, [x0, #96]
+; CHECK-NEXT:    stp q6, q0, [x0, #96]
 ; CHECK-NEXT:    ret
   %b = zext <16 x i8> %a to <16 x i64>
   store <16 x i64> %b, ptr %out
@@ -689,62 +683,60 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) {
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    add z0.b, z0.b, z0.b
 ; CHECK-NEXT:    add z1.b, z1.b, z1.b
-; CHECK-NEXT:    mov z2.d, z0.d
+; CHECK-NEXT:    uunpklo z2.h, z0.b
+; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    uunpklo z3.h, z1.b
+; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
-; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    uunpklo z4.s, z2.h
 ; CHECK-NEXT:    uunpklo z1.h, z1.b
+; CHECK-NEXT:    uunpklo z5.s, z3.h
 ; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
-; CHECK-NEXT:    uunpklo z4.s, z0.h
+; CHECK-NEXT:    uunpklo z6.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z5.s, z1.h
-; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
-; CHECK-NEXT:    uunpklo z2.h, z2.b
-; CHECK-NEXT:    uunpklo z3.h, z3.b
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    uunpklo z16.d, z4.s
+; CHECK-NEXT:    uunpklo z7.d, z4.s
 ; CHECK-NEXT:    ext z4.b, z4.b, z4.b, #8
-; CHECK-NEXT:    uunpklo z1.s, z1.h
+; CHECK-NEXT:    uunpklo z2.s, z2.h
+; CHECK-NEXT:    uunpklo z3.s, z3.h
+; CHECK-NEXT:    uunpklo z16.s, z1.h
 ; CHECK-NEXT:    uunpklo z17.d, z5.s
 ; CHECK-NEXT:    ext z5.b, z5.b, z5.b, #8
-; CHECK-NEXT:    uunpklo z6.s, z2.h
-; CHECK-NEXT:    uunpklo z7.s, z3.h
-; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
+; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z4.d, z4.s
-; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
-; CHECK-NEXT:    uunpklo z19.d, z0.s
-; CHECK-NEXT:    uunpklo z5.d, z5.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z2.s, z2.h
 ; CHECK-NEXT:    uunpklo z18.d, z6.s
 ; CHECK-NEXT:    ext z6.b, z6.b, z6.b, #8
-; CHECK-NEXT:    uunpklo z3.s, z3.h
-; CHECK-NEXT:    stp q16, q4, [x1, #128]
-; CHECK-NEXT:    mov z16.d, z7.d
-; CHECK-NEXT:    uunpklo z0.d, z0.s
-; CHECK-NEXT:    stp q17, q5, [x1]
-; CHECK-NEXT:    uunpklo z5.d, z7.s
-; CHECK-NEXT:    uunpklo z4.d, z6.s
-; CHECK-NEXT:    mov z6.d, z1.d
-; CHECK-NEXT:    ext z16.b, z16.b, z7.b, #8
+; CHECK-NEXT:    uunpklo z5.d, z5.s
+; CHECK-NEXT:    uunpklo z1.s, z1.h
+; CHECK-NEXT:    uunpklo z19.d, z16.s
+; CHECK-NEXT:    uunpklo z6.d, z6.s
+; CHECK-NEXT:    ext z16.b, z16.b, z16.b, #8
+; CHECK-NEXT:    stp q7, q4, [x1, #128]
 ; CHECK-NEXT:    mov z7.d, z2.d
-; CHECK-NEXT:    stp q19, q0, [x1, #160]
-; CHECK-NEXT:    uunpklo z0.d, z2.s
-; CHECK-NEXT:    ext z6.b, z6.b, z1.b, #8
-; CHECK-NEXT:    uunpklo z1.d, z1.s
-; CHECK-NEXT:    stp q18, q4, [x1, #192]
 ; CHECK-NEXT:    mov z4.d, z3.d
-; CHECK-NEXT:    ext z7.b, z7.b, z2.b, #8
+; CHECK-NEXT:    stp q17, q5, [x1]
+; CHECK-NEXT:    mov z5.d, z0.d
 ; CHECK-NEXT:    uunpklo z16.d, z16.s
-; CHECK-NEXT:    uunpklo z6.d, z6.s
+; CHECK-NEXT:    ext z7.b, z7.b, z2.b, #8
 ; CHECK-NEXT:    ext z4.b, z4.b, z3.b, #8
-; CHECK-NEXT:    uunpklo z2.d, z7.s
+; CHECK-NEXT:    stp q18, q6, [x1, #192]
+; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    uunpklo z2.d, z2.s
 ; CHECK-NEXT:    uunpklo z3.d, z3.s
-; CHECK-NEXT:    stp q5, q16, [x1, #64]
-; CHECK-NEXT:    stp q1, q6, [x1, #32]
-; CHECK-NEXT:    uunpklo z1.d, z4.s
+; CHECK-NEXT:    ext z5.b, z5.b, z0.b, #8
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    uunpklo z7.d, z7.s
+; CHECK-NEXT:    uunpklo z4.d, z4.s
+; CHECK-NEXT:    stp q19, q16, [x1, #64]
+; CHECK-NEXT:    ext z6.b, z6.b, z1.b, #8
+; CHECK-NEXT:    uunpklo z1.d, z1.s
+; CHECK-NEXT:    stp q3, q4, [x1, #32]
+; CHECK-NEXT:    uunpklo z3.d, z6.s
+; CHECK-NEXT:    stp q2, q7, [x1, #160]
+; CHECK-NEXT:    uunpklo z2.d, z5.s
+; CHECK-NEXT:    stp q1, q3, [x1, #96]
 ; CHECK-NEXT:    stp q0, q2, [x1, #224]
-; CHECK-NEXT:    stp q3, q1, [x1, #96]
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %in
   %b = add <32 x i8> %a, %a
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
index c110e89326cc0..9d84af1c60cdd 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
@@ -207,36 +207,35 @@ define void @ucvtf_v16i16_v16f64(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
-; CHECK-NEXT:    mov z4.d, z2.d
+; CHECK-NEXT:    uunpklo z4.d, z2.s
+; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
 ; CHECK-NEXT:    mov z7.d, z3.d
-; CHECK-NEXT:    mov z5.d, z0.d
-; CHECK-NEXT:    ext z4.b, z4.b, z2.b, #8
+; CHECK-NEXT:    uunpklo z5.d, z0.s
+; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z2.d, z2.s
 ; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    ucvtf z4.d, p0/m, z4.d
 ; CHECK-NEXT:    ext z7.b, z7.b, z3.b, #8
 ; CHECK-NEXT:    uunpklo z3.d, z3.s
-; CHECK-NEXT:    ext z5.b, z5.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z4.d, z4.s
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    ext z6.b, z6.b, z1.b, #8
 ; CHECK-NEXT:    uunpklo z1.d, z1.s
 ; CHECK-NEXT:    ucvtf z2.d, p0/m, z2.d
-; CHECK-NEXT:    ucvtf z3.d, p0/m, z3.d
+; CHECK-NEXT:    ucvtf z5.d, p0/m, z5.d
 ; CHECK-NEXT:    uunpklo z7.d, z7.s
-; CHECK-NEXT:    uunpklo z5.d, z5.s
-; CHECK-NEXT:    ucvtf z4.d, p0/m, z4.d
 ; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uunpklo z6.d, z6.s
 ; CHECK-NEXT:    ucvtf z1.d, p0/m, z1.d
-; CHECK-NEXT:    ucvtf z5.d, p0/m, z5.d
-; CHECK-NEXT:    stp q2, q4, [x1, #64]
-; CHECK-NEXT:    movprfx z2, z6
-; CHECK-NEXT:    ucvtf z2.d, p0/m, z6.d
-; CHECK-NEXT:    stp q1, q2, [x1, #32]
-; CHECK-NEXT:    stp q0, q5, [x1, #96]
-; CHECK-NEXT:    movprfx z0, z7
-; CHECK-NEXT:    ucvtf z0.d, p0/m, z7.d
-; CHECK-NEXT:    stp q3, q0, [x1]
+; CHECK-NEXT:    stp q4, q2, [x1, #64]
+; CHECK-NEXT:    movprfx z4, z6
+; CHECK-NEXT:    ucvtf z4.d, p0/m, z6.d
+; CHECK-NEXT:    movprfx z2, z3
+; CHECK-NEXT:    ucvtf z2.d, p0/m, z3.d
+; CHECK-NEXT:    movprfx z3, z7
+; CHECK-NEXT:    ucvtf z3.d, p0/m, z7.d
+; CHECK-NEXT:    stp q2, q3, [x1]
+; CHECK-NEXT:    stp q5, q0, [x1, #96]
+; CHECK-NEXT:    stp q1, q4, [x1, #32]
 ; CHECK-NEXT:    ret
   %op1 = load <16 x i16>, ptr %a
   %res = uitofp <16 x i16> %op1 to <16 x double>
@@ -780,36 +779,35 @@ define void @scvtf_v16i16_v16f64(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    sunpklo z1.s, z1.h
-; CHECK-NEXT:    mov z4.d, z2.d
+; CHECK-NEXT:    sunpklo z4.d, z2.s
+; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
 ; CHECK-NEXT:    mov z7.d, z3.d
-; CHECK-NEXT:    mov z5.d, z0.d
-; CHECK-NEXT:    ext z4.b, z4.b, z2.b, #8
+; CHECK-NEXT:    sunpklo z5.d, z0.s
+; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z2.d, z2.s
 ; CHECK-NEXT:    mov z6.d, z1.d
+; CHECK-NEXT:    scvtf z4.d, p0/m, z4.d
 ; CHECK-NEXT:    ext z7.b, z7.b, z3.b, #8
 ; CHECK-NEXT:    sunpklo z3.d, z3.s
-; CHECK-NEXT:    ext z5.b, z5.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z4.d, z4.s
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
 ; CHECK-NEXT:    ext z6.b, z6.b, z1.b, #8
 ; CHECK-NEXT:    sunpklo z1.d, z1.s
 ; CHECK-NEXT:    scvtf z2.d, p0/m, z2.d
-; CHECK-NEXT:    scvtf z3.d, p0/m, z3.d
+; CHECK-NEXT:    scvtf z5.d, p0/m, z5.d
 ; CHECK-NEXT:    sunpklo z7.d, z7.s
-; CHECK-NEXT:    sunpklo z5.d, z5.s
-; CHECK-NEXT:    scvtf z4.d, p0/m, z4.d
 ; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
 ; CHECK-NEXT:    sunpklo z6.d, z6.s
 ; CHECK-NEXT:    scvtf z1.d, p0/m, z1.d
-; CHECK-NEXT:    scvtf z5.d, p0/m, z5.d
-; CHECK-NEXT:    stp q2, q4, [x1, #64]
-; CHECK-NEXT:    movprfx z2, z6
-; CHECK-NEXT:    scvtf z2.d, p0/m, z6.d
-; CHECK-NEXT:    stp q1, q2, [x1, #32]
-; CHECK-NEXT:    stp q0, q5, [x1, #96]
-; CHECK-NEXT:    movprfx z0, z7
-; CHECK-NEXT:    scvtf z0.d, p0/m, z7.d
-; CHECK-NEXT:    stp q3, q0, [x1]
+; CHECK-NEXT:    stp q4, q2, [x1, #64]
+; CHECK-NEXT:    movprfx z4, z6
+; CHECK-NEXT:    scvtf z4.d, p0/m, z6.d
+; CHECK-NEXT:    movprfx z2, z3
+; CHECK-NEXT:    scvtf z2.d, p0/m, z3.d
+; CHECK-NEXT:    movprfx z3, z7
+; CHECK-NEXT:    scvtf z3.d, p0/m, z7.d
+; CHECK-NEXT:    stp q2, q3, [x1]
+; CHECK-NEXT:    stp q5, q0, [x1, #96]
+; CHECK-NEXT:    stp q1, q4, [x1, #32]
 ; CHECK-NEXT:    ret
   %op1 = load <16 x i16>, ptr %a
   %res = sitofp <16 x i16> %op1 to <16 x double>

github-actions · 2024-02-05T03:41:03Z

⚠️ We detected that you are using a GitHub private e-mail address to contribute to the repo.
Please turn off Keep my email addresses private setting in your account.

github-actions · 2024-02-05T03:47:48Z

⚠️ We detected that you are using a GitHub private e-mail address to contribute to the repo.
Please turn off Keep my email addresses private setting in your account.

github-actions · 2024-02-05T04:02:05Z

⚠️ We detected that you are using a GitHub private e-mail address to contribute to the repo.
Please turn off Keep my email addresses private setting in your account.

RKSimon · 2024-02-05T14:43:10Z

@AtariDreams please can you work with @nikic to investigate the effect on compile time : https://llvm-compile-time-tracker.com/

AZero13 · 2024-02-05T18:06:49Z

@AtariDreams please can you work with @nikic to investigate the effect on compile time : https://llvm-compile-time-tracker.com/

Maybe I should find out when the returns diminish enough and set that as the limit
Across 32 changed tests, I have a delta of around 250 instructions saved.

github-actions · 2024-02-05T18:14:59Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/lib/CodeGen/TwoAddressInstructionPass.cpp

AZero13 · 2024-03-03T16:52:31Z

Should the bound maybe be a cl::opt?

I don't think that is needed.

AZero13 · 2024-03-03T16:52:41Z

@topperc Thoughts?

efriedma-quic · 2024-03-04T07:09:53Z

I'm skeptical it's a good idea to remove the limit completely. The reason we have thresholds like this is that it allows us to use simple algorithms that would otherwise be O(n^2) or worse. Usually what happens is that the code appears to work fine on common benchmarks, but then someone files a bug report saying the compiler times out in specific cases.

nikic · 2024-03-04T08:56:26Z

The previously version of this PR that just raised the limits a bit looked fine to me.

Should the bound maybe be a cl::opt?

I don't think that is needed.

It's indeed not needed, but it's pretty common to use a cl::opt for such cutoffs, so it's easier to test different value for them.

Now that hardware has progressed, we can greatly increase the limit to something larger, allowing room for more optimization.

…mmand line option Pulled out of comment made on llvm#80627

…mand line option Pulled out of comment made on llvm#80627 - to simplify further investigation into visit limits. Since 10 was the limit over a decade ago, I have decided to increase it by 10-fold because that is around the number where compile time vs. benefit starts to wear off for the tests that changed codegen.

llvmbot added the backend:AArch64 label Feb 5, 2024

AZero13 force-pushed the increase branch from 2ac5a83 to f677a7d Compare February 5, 2024 02:02

asl force-pushed the increase branch from f677a7d to e2d410f Compare February 5, 2024 03:40

asl mentioned this pull request Feb 5, 2024

Add github workflow that checks if a private email address was used to contribute to the repo and warn in this case #80514

Merged

AZero13 force-pushed the increase branch from e2d410f to d0813b9 Compare February 5, 2024 03:46

llvmbot added backend:ARM backend:X86 labels Feb 5, 2024

AZero13 force-pushed the increase branch from d0813b9 to edb1cc6 Compare February 5, 2024 04:01

RKSimon requested review from arsenm, nikic and topperc February 5, 2024 14:42

AZero13 force-pushed the increase branch 5 times, most recently from c217f64 to f995e46 Compare February 5, 2024 19:47

AZero13 changed the title ~~Remove NumVisited~~ Increase NumVisited limit to 16 Feb 5, 2024

AZero13 force-pushed the increase branch 2 times, most recently from 35846cb to 069afb0 Compare February 5, 2024 19:50

AZero13 changed the title ~~Increase NumVisited limit to 16~~ Increase NumVisited limit to 18 Feb 5, 2024

nikic reviewed Feb 5, 2024

View reviewed changes

llvm/lib/CodeGen/TwoAddressInstructionPass.cpp Outdated Show resolved Hide resolved

AZero13 force-pushed the increase branch from 069afb0 to 7626c6c Compare February 5, 2024 20:14

AZero13 requested a review from nikic February 5, 2024 21:25

AZero13 force-pushed the increase branch from 6632d5e to a02a3d6 Compare March 3, 2024 16:51

AZero13 force-pushed the increase branch from a02a3d6 to 680ec2c Compare March 3, 2024 17:08

AZero13 force-pushed the increase branch from 680ec2c to 4749188 Compare March 4, 2024 16:43

AZero13 changed the title ~~[CodeGen] Remove NumVisited limit in TwoAddressInstructionPass~~ [CodeGen] Increase NumVisited limit in TwoAddressInstructionPass to 64 Mar 4, 2024

AZero13 force-pushed the increase branch 4 times, most recently from a32bec2 to 56b3903 Compare March 5, 2024 19:24

AZero13 force-pushed the increase branch from 56b3903 to 4004181 Compare March 11, 2024 18:08

[CodeGen] Increase NumVisited limit in TwoAddressInstructionPass to 64

46ff959

Now that hardware has progressed, we can greatly increase the limit to something larger, allowing room for more optimization.

AZero13 force-pushed the increase branch from 4004181 to 46ff959 Compare March 11, 2024 20:29

RKSimon added a commit to RKSimon/llvm-project that referenced this pull request Mar 11, 2024

[CodeGen] TwoAddressInstructionPass - Control NumVisited limit via co…

89cd569

…mmand line option Pulled out of comment made on llvm#80627

RKSimon mentioned this pull request Mar 11, 2024

[CodeGen] TwoAddressInstructionPass - Control NumVisited limit via command line option #84845

Open

AZero13 closed this Jul 23, 2024

AZero13 deleted the increase branch July 23, 2024 00:53

AZero13 mentioned this pull request Jul 23, 2024

[CodeGen] TwoAddressInstructionPass: Update default option #100046

Closed

AZero13 mentioned this pull request Jul 23, 2024

[CodeGen] TwoAddressInstructionPass: Control NumVisited limit via command line option #100125

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[CodeGen] Increase NumVisited limit in TwoAddressInstructionPass to 64 #80627

[CodeGen] Increase NumVisited limit in TwoAddressInstructionPass to 64 #80627

Uh oh!

AZero13 commented Feb 5, 2024 •

edited

Loading

Uh oh!

llvmbot commented Feb 5, 2024 •

edited

Loading

Uh oh!

github-actions bot commented Feb 5, 2024

Uh oh!

github-actions bot commented Feb 5, 2024

Uh oh!

github-actions bot commented Feb 5, 2024

Uh oh!

RKSimon commented Feb 5, 2024

Uh oh!

AZero13 commented Feb 5, 2024

Uh oh!

github-actions bot commented Feb 5, 2024 •

edited

Loading

Uh oh!

Uh oh!

AZero13 commented Mar 3, 2024

Uh oh!

AZero13 commented Mar 3, 2024

Uh oh!

efriedma-quic commented Mar 4, 2024

Uh oh!

nikic commented Mar 4, 2024

Uh oh!

Uh oh!

[CodeGen] Increase NumVisited limit in TwoAddressInstructionPass to 64 #80627

[CodeGen] Increase NumVisited limit in TwoAddressInstructionPass to 64 #80627

Uh oh!

Conversation

AZero13 commented Feb 5, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Feb 5, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Feb 5, 2024

Uh oh!

github-actions bot commented Feb 5, 2024

Uh oh!

github-actions bot commented Feb 5, 2024

Uh oh!

RKSimon commented Feb 5, 2024

Uh oh!

AZero13 commented Feb 5, 2024

Uh oh!

github-actions bot commented Feb 5, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

AZero13 commented Mar 3, 2024

Uh oh!

AZero13 commented Mar 3, 2024

Uh oh!

efriedma-quic commented Mar 4, 2024

Uh oh!

nikic commented Mar 4, 2024

Uh oh!

Uh oh!

AZero13 commented Feb 5, 2024 •

edited

Loading

llvmbot commented Feb 5, 2024 •

edited

Loading

github-actions bot commented Feb 5, 2024 •

edited

Loading