Skip to content

Commit 02afcbf

Browse files
JonPsson1tstellar
authored andcommitted
[SystemZ] Fix compile time regression in adjustInliningThreshold(). (llvm#137527)
Instead of always iterating over all GlobalVariable:s in the Module to find the case where both Caller and Callee is using the same GV heavily, first scan Callee (only if less than 200 instructions) for all GVs used more than 10 times, and then do the counting for the Caller for just those relevant GVs. The limit of 200 instructions makes sense as this aims to inline a relatively small function using a GV +10 times. This resolves the compile time problem with zig where it is on main (compared to removing the heuristic) a 380% increase, but with this change <0.5% increase (total user compile time with opt). Fixes llvm#134714. (cherry picked from commit 98b895d)
1 parent c877757 commit 02afcbf

File tree

1 file changed

+30
-18
lines changed

1 file changed

+30
-18
lines changed

llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp

+30-18
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "llvm/CodeGen/BasicTTIImpl.h"
1919
#include "llvm/CodeGen/TargetLowering.h"
2020
#include "llvm/IR/DerivedTypes.h"
21+
#include "llvm/IR/InstIterator.h"
2122
#include "llvm/IR/IntrinsicInst.h"
2223
#include "llvm/IR/Intrinsics.h"
2324
#include "llvm/Support/Debug.h"
@@ -80,7 +81,6 @@ unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
8081
const Function *Callee = CB->getCalledFunction();
8182
if (!Callee)
8283
return 0;
83-
const Module *M = Caller->getParent();
8484

8585
// Increase the threshold if an incoming argument is used only as a memcpy
8686
// source.
@@ -92,25 +92,37 @@ unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
9292
}
9393
}
9494

95-
// Give bonus for globals used much in both caller and callee.
96-
std::set<const GlobalVariable *> CalleeGlobals;
97-
std::set<const GlobalVariable *> CallerGlobals;
98-
for (const GlobalVariable &Global : M->globals())
99-
for (const User *U : Global.users())
100-
if (const Instruction *User = dyn_cast<Instruction>(U)) {
101-
if (User->getParent()->getParent() == Callee)
102-
CalleeGlobals.insert(&Global);
103-
if (User->getParent()->getParent() == Caller)
104-
CallerGlobals.insert(&Global);
95+
// Give bonus for globals used much in both caller and a relatively small
96+
// callee.
97+
unsigned InstrCount = 0;
98+
SmallDenseMap<const Value *, unsigned> Ptr2NumUses;
99+
for (auto &I : instructions(Callee)) {
100+
if (++InstrCount == 200) {
101+
Ptr2NumUses.clear();
102+
break;
103+
}
104+
if (const auto *SI = dyn_cast<StoreInst>(&I)) {
105+
if (!SI->isVolatile())
106+
if (auto *GV = dyn_cast<GlobalVariable>(SI->getPointerOperand()))
107+
Ptr2NumUses[GV]++;
108+
} else if (const auto *LI = dyn_cast<LoadInst>(&I)) {
109+
if (!LI->isVolatile())
110+
if (auto *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand()))
111+
Ptr2NumUses[GV]++;
112+
} else if (const auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
113+
if (auto *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand())) {
114+
unsigned NumStores = 0, NumLoads = 0;
115+
countNumMemAccesses(GEP, NumStores, NumLoads, Callee);
116+
Ptr2NumUses[GV] += NumLoads + NumStores;
105117
}
106-
for (auto *GV : CalleeGlobals)
107-
if (CallerGlobals.count(GV)) {
108-
unsigned CalleeStores = 0, CalleeLoads = 0;
118+
}
119+
}
120+
121+
for (auto [Ptr, NumCalleeUses] : Ptr2NumUses)
122+
if (NumCalleeUses > 10) {
109123
unsigned CallerStores = 0, CallerLoads = 0;
110-
countNumMemAccesses(GV, CalleeStores, CalleeLoads, Callee);
111-
countNumMemAccesses(GV, CallerStores, CallerLoads, Caller);
112-
if ((CalleeStores + CalleeLoads) > 10 &&
113-
(CallerStores + CallerLoads) > 10) {
124+
countNumMemAccesses(Ptr, CallerStores, CallerLoads, Caller);
125+
if (CallerStores + CallerLoads > 10) {
114126
Bonus = 1000;
115127
break;
116128
}

0 commit comments

Comments
 (0)