Skip to content

Commit fd64f2c

Browse files
committed
[CodeGen] TwoAddressInstructionPass: Control NumVisited limit via command line option
Pulled out of comment made on #80627 - to simplify further investigation into visit limits. Since 10 was the limit over a decade ago, I have decided to increase it by 10-fold because that is around the number where compile time vs. benefit starts to wear off for the tests that changed codegen.
1 parent d6905ea commit fd64f2c

35 files changed

+19633
-19825
lines changed

llvm/lib/CodeGen/TwoAddressInstructionPass.cpp

+10-3
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,13 @@ EnableRescheduling("twoaddr-reschedule",
8080
cl::desc("Coalesce copies by rescheduling (default=true)"),
8181
cl::init(true), cl::Hidden);
8282

83+
// Limit the number of rescheduling visits to dependent instructions.
84+
// FIXME: Arbitrary limit to reduce compile time cost.
85+
static cl::opt<unsigned>
86+
MaxVisits("twoaddr-visit-limit", cl::Hidden, cl::init(100),
87+
cl::desc("Maximum number of rescheduling visits to dependent "
88+
"instructions (0 = no limit)"));
89+
8390
// Limit the number of dataflow edges to traverse when evaluating the benefit
8491
// of commuting operands.
8592
static cl::opt<unsigned> MaxDataFlowEdge(
@@ -994,7 +1001,7 @@ bool TwoAddressInstructionImpl::rescheduleMIBelowKill(
9941001
// Debug or pseudo instructions cannot be counted against the limit.
9951002
if (OtherMI.isDebugOrPseudoInstr())
9961003
continue;
997-
if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
1004+
if (MaxVisits && NumVisited > MaxVisits)
9981005
return false;
9991006
++NumVisited;
10001007
if (OtherMI.hasUnmodeledSideEffects() || OtherMI.isCall() ||
@@ -1160,14 +1167,14 @@ bool TwoAddressInstructionImpl::rescheduleKillAboveMI(
11601167
}
11611168
}
11621169

1163-
// Check if the reschedule will not break depedencies.
1170+
// Check if the reschedule will not break dependencies.
11641171
unsigned NumVisited = 0;
11651172
for (MachineInstr &OtherMI :
11661173
make_range(mi, MachineBasicBlock::iterator(KillMI))) {
11671174
// Debug or pseudo instructions cannot be counted against the limit.
11681175
if (OtherMI.isDebugOrPseudoInstr())
11691176
continue;
1170-
if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
1177+
if (MaxVisits && NumVisited > MaxVisits)
11711178
return false;
11721179
++NumVisited;
11731180
if (OtherMI.hasUnmodeledSideEffects() || OtherMI.isCall() ||

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll

+78-90
Original file line numberDiff line numberDiff line change
@@ -1148,63 +1148,57 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
11481148
; CHECK: // %bb.0:
11491149
; CHECK-NEXT: ldp q1, q0, [x0]
11501150
; CHECK-NEXT: add z0.b, z0.b, z0.b
1151-
; CHECK-NEXT: add z1.b, z1.b, z1.b
1152-
; CHECK-NEXT: mov z2.d, z0.d
1153-
; CHECK-NEXT: sunpklo z0.h, z0.b
1154-
; CHECK-NEXT: mov z3.d, z1.d
1155-
; CHECK-NEXT: sunpklo z1.h, z1.b
1151+
; CHECK-NEXT: add z2.b, z1.b, z1.b
1152+
; CHECK-NEXT: sunpklo z3.h, z0.b
1153+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
1154+
; CHECK-NEXT: sunpklo z1.h, z2.b
11561155
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
1156+
; CHECK-NEXT: sunpklo z0.h, z0.b
1157+
; CHECK-NEXT: sunpklo z4.s, z3.h
11571158
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
1158-
; CHECK-NEXT: sunpklo z4.s, z0.h
1159-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
11601159
; CHECK-NEXT: sunpklo z5.s, z1.h
1161-
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
11621160
; CHECK-NEXT: sunpklo z2.h, z2.b
1163-
; CHECK-NEXT: sunpklo z3.h, z3.b
1164-
; CHECK-NEXT: sunpklo z0.s, z0.h
1165-
; CHECK-NEXT: sunpklo z16.d, z4.s
1161+
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
1162+
; CHECK-NEXT: sunpklo z6.s, z0.h
1163+
; CHECK-NEXT: sunpklo z3.s, z3.h
1164+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
1165+
; CHECK-NEXT: sunpklo z7.d, z4.s
11661166
; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8
1167-
; CHECK-NEXT: sunpklo z1.s, z1.h
1168-
; CHECK-NEXT: sunpklo z17.d, z5.s
1167+
; CHECK-NEXT: sunpklo z16.d, z5.s
11691168
; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8
1170-
; CHECK-NEXT: sunpklo z6.s, z2.h
1171-
; CHECK-NEXT: sunpklo z7.s, z3.h
1169+
; CHECK-NEXT: sunpklo z17.s, z2.h
11721170
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
1173-
; CHECK-NEXT: sunpklo z4.d, z4.s
1171+
; CHECK-NEXT: sunpklo z1.s, z1.h
1172+
; CHECK-NEXT: sunpklo z0.s, z0.h
1173+
; CHECK-NEXT: sunpklo z18.d, z6.s
1174+
; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
1175+
; CHECK-NEXT: sunpklo z19.d, z3.s
11741176
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
1175-
; CHECK-NEXT: sunpklo z19.d, z0.s
1177+
; CHECK-NEXT: sunpklo z4.d, z4.s
11761178
; CHECK-NEXT: sunpklo z5.d, z5.s
1177-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
11781179
; CHECK-NEXT: sunpklo z2.s, z2.h
1179-
; CHECK-NEXT: sunpklo z18.d, z6.s
1180-
; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
1181-
; CHECK-NEXT: sunpklo z3.s, z3.h
1182-
; CHECK-NEXT: stp q16, q4, [x1, #128]
1183-
; CHECK-NEXT: mov z16.d, z7.d
1184-
; CHECK-NEXT: sunpklo z0.d, z0.s
1185-
; CHECK-NEXT: stp q17, q5, [x1]
1186-
; CHECK-NEXT: sunpklo z5.d, z7.s
1187-
; CHECK-NEXT: sunpklo z4.d, z6.s
1188-
; CHECK-NEXT: mov z6.d, z1.d
1189-
; CHECK-NEXT: ext z16.b, z16.b, z7.b, #8
1190-
; CHECK-NEXT: mov z7.d, z2.d
1191-
; CHECK-NEXT: stp q19, q0, [x1, #160]
1192-
; CHECK-NEXT: sunpklo z0.d, z2.s
1193-
; CHECK-NEXT: ext z6.b, z6.b, z1.b, #8
1194-
; CHECK-NEXT: sunpklo z1.d, z1.s
1195-
; CHECK-NEXT: stp q18, q4, [x1, #192]
1196-
; CHECK-NEXT: mov z4.d, z3.d
1197-
; CHECK-NEXT: ext z7.b, z7.b, z2.b, #8
1198-
; CHECK-NEXT: sunpklo z16.d, z16.s
11991180
; CHECK-NEXT: sunpklo z6.d, z6.s
1200-
; CHECK-NEXT: ext z4.b, z4.b, z3.b, #8
1201-
; CHECK-NEXT: sunpklo z2.d, z7.s
12021181
; CHECK-NEXT: sunpklo z3.d, z3.s
1203-
; CHECK-NEXT: stp q5, q16, [x1, #64]
1204-
; CHECK-NEXT: stp q1, q6, [x1, #32]
1205-
; CHECK-NEXT: sunpklo z1.d, z4.s
1206-
; CHECK-NEXT: stp q0, q2, [x1, #224]
1207-
; CHECK-NEXT: stp q3, q1, [x1, #96]
1182+
; CHECK-NEXT: stp q16, q5, [x1]
1183+
; CHECK-NEXT: sunpklo z5.d, z1.s
1184+
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
1185+
; CHECK-NEXT: stp q7, q4, [x1, #128]
1186+
; CHECK-NEXT: sunpklo z4.d, z17.s
1187+
; CHECK-NEXT: ext z17.b, z17.b, z17.b, #8
1188+
; CHECK-NEXT: stp q18, q6, [x1, #192]
1189+
; CHECK-NEXT: sunpklo z6.d, z0.s
1190+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
1191+
; CHECK-NEXT: stp q19, q3, [x1, #160]
1192+
; CHECK-NEXT: sunpklo z3.d, z2.s
1193+
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
1194+
; CHECK-NEXT: sunpklo z7.d, z17.s
1195+
; CHECK-NEXT: sunpklo z1.d, z1.s
1196+
; CHECK-NEXT: sunpklo z0.d, z0.s
1197+
; CHECK-NEXT: sunpklo z2.d, z2.s
1198+
; CHECK-NEXT: stp q5, q1, [x1, #32]
1199+
; CHECK-NEXT: stp q4, q7, [x1, #64]
1200+
; CHECK-NEXT: stp q3, q2, [x1, #96]
1201+
; CHECK-NEXT: stp q6, q0, [x1, #224]
12081202
; CHECK-NEXT: ret
12091203
;
12101204
; NONEON-NOSVE-LABEL: sext_v32i8_v32i64:
@@ -3133,63 +3127,57 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) {
31333127
; CHECK: // %bb.0:
31343128
; CHECK-NEXT: ldp q1, q0, [x0]
31353129
; CHECK-NEXT: add z0.b, z0.b, z0.b
3136-
; CHECK-NEXT: add z1.b, z1.b, z1.b
3137-
; CHECK-NEXT: mov z2.d, z0.d
3138-
; CHECK-NEXT: uunpklo z0.h, z0.b
3139-
; CHECK-NEXT: mov z3.d, z1.d
3140-
; CHECK-NEXT: uunpklo z1.h, z1.b
3130+
; CHECK-NEXT: add z2.b, z1.b, z1.b
3131+
; CHECK-NEXT: uunpklo z3.h, z0.b
3132+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
3133+
; CHECK-NEXT: uunpklo z1.h, z2.b
31413134
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
3135+
; CHECK-NEXT: uunpklo z0.h, z0.b
3136+
; CHECK-NEXT: uunpklo z4.s, z3.h
31423137
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
3143-
; CHECK-NEXT: uunpklo z4.s, z0.h
3144-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
31453138
; CHECK-NEXT: uunpklo z5.s, z1.h
3146-
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
31473139
; CHECK-NEXT: uunpklo z2.h, z2.b
3148-
; CHECK-NEXT: uunpklo z3.h, z3.b
3149-
; CHECK-NEXT: uunpklo z0.s, z0.h
3150-
; CHECK-NEXT: uunpklo z16.d, z4.s
3140+
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
3141+
; CHECK-NEXT: uunpklo z6.s, z0.h
3142+
; CHECK-NEXT: uunpklo z3.s, z3.h
3143+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
3144+
; CHECK-NEXT: uunpklo z7.d, z4.s
31513145
; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8
3152-
; CHECK-NEXT: uunpklo z1.s, z1.h
3153-
; CHECK-NEXT: uunpklo z17.d, z5.s
3146+
; CHECK-NEXT: uunpklo z16.d, z5.s
31543147
; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8
3155-
; CHECK-NEXT: uunpklo z6.s, z2.h
3156-
; CHECK-NEXT: uunpklo z7.s, z3.h
3148+
; CHECK-NEXT: uunpklo z17.s, z2.h
31573149
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
3158-
; CHECK-NEXT: uunpklo z4.d, z4.s
3150+
; CHECK-NEXT: uunpklo z1.s, z1.h
3151+
; CHECK-NEXT: uunpklo z0.s, z0.h
3152+
; CHECK-NEXT: uunpklo z18.d, z6.s
3153+
; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
3154+
; CHECK-NEXT: uunpklo z19.d, z3.s
31593155
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
3160-
; CHECK-NEXT: uunpklo z19.d, z0.s
3156+
; CHECK-NEXT: uunpklo z4.d, z4.s
31613157
; CHECK-NEXT: uunpklo z5.d, z5.s
3162-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
31633158
; CHECK-NEXT: uunpklo z2.s, z2.h
3164-
; CHECK-NEXT: uunpklo z18.d, z6.s
3165-
; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
3166-
; CHECK-NEXT: uunpklo z3.s, z3.h
3167-
; CHECK-NEXT: stp q16, q4, [x1, #128]
3168-
; CHECK-NEXT: mov z16.d, z7.d
3169-
; CHECK-NEXT: uunpklo z0.d, z0.s
3170-
; CHECK-NEXT: stp q17, q5, [x1]
3171-
; CHECK-NEXT: uunpklo z5.d, z7.s
3172-
; CHECK-NEXT: uunpklo z4.d, z6.s
3173-
; CHECK-NEXT: mov z6.d, z1.d
3174-
; CHECK-NEXT: ext z16.b, z16.b, z7.b, #8
3175-
; CHECK-NEXT: mov z7.d, z2.d
3176-
; CHECK-NEXT: stp q19, q0, [x1, #160]
3177-
; CHECK-NEXT: uunpklo z0.d, z2.s
3178-
; CHECK-NEXT: ext z6.b, z6.b, z1.b, #8
3179-
; CHECK-NEXT: uunpklo z1.d, z1.s
3180-
; CHECK-NEXT: stp q18, q4, [x1, #192]
3181-
; CHECK-NEXT: mov z4.d, z3.d
3182-
; CHECK-NEXT: ext z7.b, z7.b, z2.b, #8
3183-
; CHECK-NEXT: uunpklo z16.d, z16.s
31843159
; CHECK-NEXT: uunpklo z6.d, z6.s
3185-
; CHECK-NEXT: ext z4.b, z4.b, z3.b, #8
3186-
; CHECK-NEXT: uunpklo z2.d, z7.s
31873160
; CHECK-NEXT: uunpklo z3.d, z3.s
3188-
; CHECK-NEXT: stp q5, q16, [x1, #64]
3189-
; CHECK-NEXT: stp q1, q6, [x1, #32]
3190-
; CHECK-NEXT: uunpklo z1.d, z4.s
3191-
; CHECK-NEXT: stp q0, q2, [x1, #224]
3192-
; CHECK-NEXT: stp q3, q1, [x1, #96]
3161+
; CHECK-NEXT: stp q16, q5, [x1]
3162+
; CHECK-NEXT: uunpklo z5.d, z1.s
3163+
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
3164+
; CHECK-NEXT: stp q7, q4, [x1, #128]
3165+
; CHECK-NEXT: uunpklo z4.d, z17.s
3166+
; CHECK-NEXT: ext z17.b, z17.b, z17.b, #8
3167+
; CHECK-NEXT: stp q18, q6, [x1, #192]
3168+
; CHECK-NEXT: uunpklo z6.d, z0.s
3169+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
3170+
; CHECK-NEXT: stp q19, q3, [x1, #160]
3171+
; CHECK-NEXT: uunpklo z3.d, z2.s
3172+
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
3173+
; CHECK-NEXT: uunpklo z7.d, z17.s
3174+
; CHECK-NEXT: uunpklo z1.d, z1.s
3175+
; CHECK-NEXT: uunpklo z0.d, z0.s
3176+
; CHECK-NEXT: uunpklo z2.d, z2.s
3177+
; CHECK-NEXT: stp q5, q1, [x1, #32]
3178+
; CHECK-NEXT: stp q4, q7, [x1, #64]
3179+
; CHECK-NEXT: stp q3, q2, [x1, #96]
3180+
; CHECK-NEXT: stp q6, q0, [x1, #224]
31933181
; CHECK-NEXT: ret
31943182
;
31953183
; NONEON-NOSVE-LABEL: zext_v32i8_v32i64:

0 commit comments

Comments
 (0)