Skip to content

Commit d2a7280

Browse files
committed
[CGData][MachineOutliner] Global Outlining2
This commit introduces support for outlining functions across modules using codegen data generated from previous codegen. The codegen data currently manages the outlined hash tree, which records outlining instances that occurred locally in the past. The machine outliner now operates in one of three modes: 1. CGDataMode::None: This is the default outliner mode that uses the suffix tree to identify (local) outlining candidates within a module. This mode is also used by (full)LTO to maintain optimal behavior with the combined module. 2. CGDataMode::Write (`codegen-data-generate`): This mode is identical to the default mode, but it also publishes the stable hash sequences of instructions in the outlined functions into a local outlined hash tree. It then encodes this into the `__llvm_outline` section, which will be dead-stripped at link time. 3. CGDataMode::Read (`codegen-data-use-path={.cgdata}`): This mode reads a codegen data file (.cgdata) and initializes a global outlined hash tree. This tree is used to generate global outlining candidates. Note that the codegen data file has been post-processed with the raw `__llvm_outline` sections from all native objects using the `llvm-cgdata` tool (or a linker, `LLD`, or a new ThinLTO pipeline later).
1 parent f9ad249 commit d2a7280

16 files changed

+897
-4
lines changed

llvm/include/llvm/ADT/StableHashing.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,12 @@ inline stable_hash stable_hash_combine(stable_hash A, stable_hash B,
5353
// Removes suffixes introduced by LLVM from the name to enhance stability and
5454
// maintain closeness to the original name across different builds.
5555
inline StringRef get_stable_name(StringRef Name) {
56+
// Return the part after ".content." that represents contents.
57+
auto [P0, S0] = Name.rsplit(".content.");
58+
if (!S0.empty())
59+
return S0;
60+
61+
// Ignore these suffixes.
5662
auto [P1, S1] = Name.rsplit(".llvm.");
5763
auto [P2, S2] = P1.rsplit(".__uniq.");
5864
return P2;

llvm/include/llvm/CodeGen/MachineOutliner.h

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "llvm/CodeGen/LiveRegUnits.h"
1919
#include "llvm/CodeGen/MachineFunction.h"
2020
#include "llvm/CodeGen/MachineRegisterInfo.h"
21+
#include "llvm/CodeGen/MachineStableHash.h"
2122
#include <initializer_list>
2223

2324
namespace llvm {
@@ -234,11 +235,11 @@ struct OutlinedFunction {
234235
unsigned FrameConstructionID = 0;
235236

236237
/// Return the number of candidates for this \p OutlinedFunction.
237-
unsigned getOccurrenceCount() const { return Candidates.size(); }
238+
virtual unsigned getOccurrenceCount() const { return Candidates.size(); }
238239

239240
/// Return the number of bytes it would take to outline this
240241
/// function.
241-
unsigned getOutliningCost() const {
242+
virtual unsigned getOutliningCost() const {
242243
unsigned CallOverhead = 0;
243244
for (const Candidate &C : Candidates)
244245
CallOverhead += C.getCallOverhead();
@@ -272,7 +273,42 @@ struct OutlinedFunction {
272273
}
273274

274275
OutlinedFunction() = delete;
276+
virtual ~OutlinedFunction() = default;
275277
};
278+
279+
/// The information necessary to create an outlined function that is matched
280+
/// globally.
281+
struct GlobalOutlinedFunction : public OutlinedFunction {
282+
explicit GlobalOutlinedFunction(std::unique_ptr<OutlinedFunction> OF,
283+
unsigned GlobalOccurrenceCount)
284+
: OutlinedFunction(*OF), GlobalOccurrenceCount(GlobalOccurrenceCount) {}
285+
286+
unsigned GlobalOccurrenceCount;
287+
288+
/// Return the number of times that appear globally.
289+
/// Global outlining candidate is uniquely created per each match, but this
290+
/// might be erased out when it's overlapped with the previous outlining
291+
/// instance.
292+
unsigned getOccurrenceCount() const override {
293+
assert(Candidates.size() <= 1);
294+
return Candidates.empty() ? 0 : GlobalOccurrenceCount;
295+
}
296+
297+
/// Return the outlining cost using the global occurrence count
298+
/// with the same cost as the first (unique) candidate.
299+
unsigned getOutliningCost() const override {
300+
assert(Candidates.size() <= 1);
301+
unsigned CallOverhead =
302+
Candidates.empty()
303+
? 0
304+
: Candidates[0].getCallOverhead() * getOccurrenceCount();
305+
return CallOverhead + SequenceSize + FrameOverhead;
306+
}
307+
308+
GlobalOutlinedFunction() = delete;
309+
~GlobalOutlinedFunction() = default;
310+
};
311+
276312
} // namespace outliner
277313
} // namespace llvm
278314

llvm/lib/CGData/CodeGenData.cpp

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,13 @@
2424
using namespace llvm;
2525
using namespace cgdata;
2626

27+
cl::opt<bool>
28+
CodeGenDataGenerate("codegen-data-generate", cl::init(false), cl::Hidden,
29+
cl::desc("Emit CodeGen Data into custom sections"));
30+
cl::opt<std::string>
31+
CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden,
32+
cl::desc("File path to where .cgdata file is read"));
33+
2734
static std::string getCGDataErrString(cgdata_error Err,
2835
const std::string &ErrMsg = "") {
2936
std::string Msg;
@@ -132,7 +139,24 @@ CodeGenData &CodeGenData::getInstance() {
132139
std::call_once(CodeGenData::OnceFlag, []() {
133140
Instance = std::unique_ptr<CodeGenData>(new CodeGenData());
134141

135-
// TODO: Initialize writer or reader mode for the client optimization.
142+
if (CodeGenDataGenerate)
143+
Instance->EmitCGData = true;
144+
else if (!CodeGenDataUsePath.empty()) {
145+
// Initialize the global CGData if the input file name is given.
146+
// We do not error-out when failing to parse the input file.
147+
// Instead, just emit an warning message and fall back as if no CGData
148+
// were available.
149+
auto FS = vfs::getRealFileSystem();
150+
auto ReaderOrErr = CodeGenDataReader::create(CodeGenDataUsePath, *FS);
151+
if (Error E = ReaderOrErr.takeError()) {
152+
warn(std::move(E), CodeGenDataUsePath);
153+
return;
154+
}
155+
// Publish each CGData based on the data type in the header.
156+
auto Reader = ReaderOrErr->get();
157+
if (Reader->hasOutlinedHashTree())
158+
Instance->publishOutlinedHashTree(Reader->releaseOutlinedHashTree());
159+
}
136160
});
137161
return *(Instance.get());
138162
}

llvm/lib/CodeGen/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ add_llvm_component_library(LLVMCodeGen
267267
Analysis
268268
BitReader
269269
BitWriter
270+
CGData
270271
CodeGenTypes
271272
Core
272273
MC

0 commit comments

Comments
 (0)