Skip to content

Commit cb23c67

Browse files
committed
[CGData][MachineOutliner] Global Outlining2
This commit introduces support for outlining functions across modules using codegen data generated from previous codegen. The codegen data currently manages the outlined hash tree, which records outlining instances that occurred locally in the past. The machine outliner now operates in one of three modes: 1. CGDataMode::None: This is the default outliner mode that uses the suffix tree to identify (local) outlining candidates within a module. This mode is also used by (full)LTO to maintain optimal behavior with the combined module. 2. CGDataMode::Write (`codegen-data-generate`): This mode is identical to the default mode, but it also publishes the stable hash sequences of instructions in the outlined functions into a local outlined hash tree. It then encodes this into the `__llvm_outline` section, which will be dead-stripped at link time. 3. CGDataMode::Read (`codegen-data-use-path={.cgdata}`): This mode reads a codegen data file (.cgdata) and initializes a global outlined hash tree. This tree is used to generate global outlining candidates. Note that the codegen data file has been post-processed with the raw `__llvm_outline` sections from all native objects using the `llvm-cgdata` tool (or a linker, `LLD`, or a new ThinLTO pipeline later).
1 parent 3ed5913 commit cb23c67

14 files changed

+750
-19
lines changed

llvm/include/llvm/CodeGen/MachineOutliner.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "llvm/CodeGen/LiveRegUnits.h"
1919
#include "llvm/CodeGen/MachineFunction.h"
2020
#include "llvm/CodeGen/MachineRegisterInfo.h"
21+
#include "llvm/CodeGen/MachineStableHash.h"
2122
#include <initializer_list>
2223

2324
namespace llvm {
@@ -274,6 +275,41 @@ struct OutlinedFunction {
274275
OutlinedFunction() = delete;
275276
virtual ~OutlinedFunction() = default;
276277
};
278+
279+
/// The information necessary to create an outlined function that is matched
280+
/// globally.
281+
struct GlobalOutlinedFunction : public OutlinedFunction {
282+
GlobalOutlinedFunction(OutlinedFunction &OF, unsigned GlobalOccurrenceCount)
283+
: OutlinedFunction(OF.Candidates, OF.SequenceSize, OF.FrameOverhead,
284+
OF.FrameConstructionID),
285+
GlobalOccurrenceCount(GlobalOccurrenceCount) {}
286+
287+
unsigned GlobalOccurrenceCount;
288+
289+
/// Return the number of times that appear globally.
290+
/// Global outlining candidate is uniquely created per each match, but this
291+
/// might be erased out when it's overlapped with the previous outlining
292+
/// instance.
293+
unsigned getOccurrenceCount() const override {
294+
assert(Candidates.size() <= 1);
295+
return Candidates.empty() ? 0 : GlobalOccurrenceCount;
296+
}
297+
298+
/// Return the outlining cost using the global occurrence count
299+
/// with the same cost as the first (unique) candidate.
300+
unsigned getOutliningCost() const override {
301+
assert(Candidates.size() <= 1);
302+
unsigned CallOverhead =
303+
Candidates.empty()
304+
? 0
305+
: Candidates[0].getCallOverhead() * getOccurrenceCount();
306+
return CallOverhead + SequenceSize + FrameOverhead;
307+
}
308+
309+
GlobalOutlinedFunction() = delete;
310+
~GlobalOutlinedFunction() = default;
311+
};
312+
277313
} // namespace outliner
278314
} // namespace llvm
279315

llvm/lib/CGData/CodeGenData.cpp

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,13 @@
2424
using namespace llvm;
2525
using namespace cgdata;
2626

27+
cl::opt<bool>
28+
CodeGenDataGenerate("codegen-data-generate", cl::init(false), cl::Hidden,
29+
cl::desc("Emit CodeGen Data into custom sections"));
30+
cl::opt<std::string>
31+
CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden,
32+
cl::desc("File path to where .cgdata file is read"));
33+
2734
static std::string getCGDataErrString(cgdata_error Err,
2835
const std::string &ErrMsg = "") {
2936
std::string Msg;
@@ -132,7 +139,24 @@ CodeGenData &CodeGenData::getInstance() {
132139
std::call_once(CodeGenData::OnceFlag, []() {
133140
Instance = std::unique_ptr<CodeGenData>(new CodeGenData());
134141

135-
// TODO: Initialize writer or reader mode for the client optimization.
142+
if (CodeGenDataGenerate)
143+
Instance->EmitCGData = true;
144+
else if (!CodeGenDataUsePath.empty()) {
145+
// Initialize the global CGData if the input file name is given.
146+
// We do not error-out when failing to parse the input file.
147+
// Instead, just emit an warning message and fall back as if no CGData
148+
// were available.
149+
auto FS = vfs::getRealFileSystem();
150+
auto ReaderOrErr = CodeGenDataReader::create(CodeGenDataUsePath, *FS);
151+
if (Error E = ReaderOrErr.takeError()) {
152+
warn(std::move(E), CodeGenDataUsePath);
153+
return;
154+
}
155+
// Publish each CGData based on the data type in the header.
156+
auto Reader = ReaderOrErr->get();
157+
if (Reader->hasOutlinedHashTree())
158+
Instance->publishOutlinedHashTree(Reader->releaseOutlinedHashTree());
159+
}
136160
});
137161
return *(Instance.get());
138162
}

llvm/lib/CodeGen/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ add_llvm_component_library(LLVMCodeGen
267267
Analysis
268268
BitReader
269269
BitWriter
270+
CGData
270271
CodeGenTypes
271272
Core
272273
MC

0 commit comments

Comments
 (0)