Skip to content

Commit 433d63a

Browse files
kyulee-comDanielCChen
authored andcommitted
[CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (llvm#90933)
This feature is enabled by `-codegen-data-thinlto-two-rounds`, which effectively runs the `-codegen-data-generate` and `-codegen-data-use` in two rounds to enable global outlining with ThinLTO. 1. The first round: Run both optimization + codegen with a scratch output. Before running codegen, we serialize the optimized bitcode modules to a temporary path. 2. From the scratch object files, we merge them into the codegen data. 3. The second round: Read the optimized bitcode modules and start the codegen only this time. Using the codegen data, the machine outliner effectively performs the global outlining. Depends on llvm#90934, llvm#110461 and llvm#110463. This is a patch for https://discourse.llvm.org/t/rfc-enhanced-machine-outliner-part-2-thinlto-nolto/78753.
1 parent 8184a6d commit 433d63a

14 files changed

+689
-16
lines changed

clang/lib/CodeGen/BackendUtil.cpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -1321,10 +1321,11 @@ static void runThinLTOBackend(
13211321
Conf.CGFileType = getCodeGenFileType(Action);
13221322
break;
13231323
}
1324-
if (Error E = thinBackend(
1325-
Conf, -1, AddStream, *M, *CombinedIndex, ImportList,
1326-
ModuleToDefinedGVSummaries[M->getModuleIdentifier()],
1327-
/* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) {
1324+
if (Error E =
1325+
thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList,
1326+
ModuleToDefinedGVSummaries[M->getModuleIdentifier()],
1327+
/*ModuleMap=*/nullptr, Conf.CodeGenOnly,
1328+
/*IRAddStream=*/nullptr, CGOpts.CmdArgs)) {
13281329
handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
13291330
errs() << "Error running ThinLTO backend: " << EIB.message() << '\n';
13301331
});

llvm/include/llvm/CGData/CodeGenData.h

+70
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,13 @@
1515
#define LLVM_CGDATA_CODEGENDATA_H
1616

1717
#include "llvm/ADT/BitmaskEnum.h"
18+
#include "llvm/ADT/StableHashing.h"
1819
#include "llvm/Bitcode/BitcodeReader.h"
1920
#include "llvm/CGData/OutlinedHashTree.h"
2021
#include "llvm/CGData/OutlinedHashTreeRecord.h"
2122
#include "llvm/IR/Module.h"
2223
#include "llvm/Object/ObjectFile.h"
24+
#include "llvm/Support/Caching.h"
2325
#include "llvm/Support/ErrorHandling.h"
2426
#include "llvm/TargetParser/Triple.h"
2527
#include <mutex>
@@ -164,6 +166,74 @@ publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
164166
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
165167
}
166168

169+
struct StreamCacheData {
170+
/// Backing buffer for serialized data stream.
171+
SmallVector<SmallString<0>> Outputs;
172+
/// Callback function to add serialized data to the stream.
173+
AddStreamFn AddStream;
174+
/// Backing buffer for cached data.
175+
SmallVector<std::unique_ptr<MemoryBuffer>> Files;
176+
/// Cache mechanism for storing data.
177+
FileCache Cache;
178+
179+
StreamCacheData(unsigned Size, const FileCache &OrigCache,
180+
const Twine &CachePrefix)
181+
: Outputs(Size), Files(Size) {
182+
AddStream = [&](size_t Task, const Twine &ModuleName) {
183+
return std::make_unique<CachedFileStream>(
184+
std::make_unique<raw_svector_ostream>(Outputs[Task]));
185+
};
186+
187+
if (OrigCache.isValid()) {
188+
auto CGCacheOrErr =
189+
localCache("ThinLTO", CachePrefix, OrigCache.getCacheDirectoryPath(),
190+
[&](size_t Task, const Twine &ModuleName,
191+
std::unique_ptr<MemoryBuffer> MB) {
192+
Files[Task] = std::move(MB);
193+
});
194+
if (Error Err = CGCacheOrErr.takeError())
195+
report_fatal_error(std::move(Err));
196+
Cache = std::move(*CGCacheOrErr);
197+
}
198+
}
199+
StreamCacheData() = delete;
200+
201+
/// Retrieve results from either the cache or the stream.
202+
std::unique_ptr<SmallVector<StringRef>> getResult() {
203+
unsigned NumOutputs = Outputs.size();
204+
auto Result = std::make_unique<SmallVector<StringRef>>(NumOutputs);
205+
for (unsigned I = 0; I < NumOutputs; ++I)
206+
if (Files[I])
207+
(*Result)[I] = Files[I]->getBuffer();
208+
else
209+
(*Result)[I] = Outputs[I];
210+
return Result;
211+
}
212+
};
213+
214+
/// Save \p TheModule before the first codegen round.
215+
/// \p Task represents the partition number in the parallel code generation
216+
/// process. \p AddStream is the callback used to add the serialized module to
217+
/// the stream.
218+
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task,
219+
AddStreamFn AddStream);
220+
221+
/// Load the optimized bitcode module for the second codegen round.
222+
/// \p OrigModule is the original bitcode module.
223+
/// \p Task identifies the partition number in the parallel code generation
224+
/// process. \p Context provides the environment settings for module operations.
225+
/// \p IRFiles contains optimized bitcode module files needed for loading.
226+
/// \return A unique_ptr to the loaded Module, or nullptr if loading fails.
227+
std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
228+
unsigned Task,
229+
LLVMContext &Context,
230+
ArrayRef<StringRef> IRFiles);
231+
232+
/// Merge the codegen data from the scratch objects \p ObjectFiles from the
233+
/// first codegen round.
234+
/// \return the combined hash of the merged codegen data.
235+
Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjectFiles);
236+
167237
void warn(Error E, StringRef Whence = "");
168238
void warn(Twine Message, std::string Whence = "", std::string Hint = "");
169239

llvm/include/llvm/CGData/CodeGenDataReader.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,11 @@ class CodeGenDataReader {
5454
/// Extract the cgdata embedded in sections from the given object file and
5555
/// merge them into the GlobalOutlineRecord. This is a static helper that
5656
/// is used by `llvm-cgdata --merge` or ThinLTO's two-codegen rounds.
57+
/// Optionally, \p CombinedHash can be used to compuate the combined hash of
58+
/// the merged data.
5759
static Error mergeFromObjectFile(const object::ObjectFile *Obj,
58-
OutlinedHashTreeRecord &GlobalOutlineRecord);
60+
OutlinedHashTreeRecord &GlobalOutlineRecord,
61+
stable_hash *CombinedHash = nullptr);
5962

6063
protected:
6164
/// The outlined hash tree that has been read. When it's released by

llvm/include/llvm/LTO/LTO.h

+3
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ std::string computeLTOCacheKey(
7575
const DenseSet<GlobalValue::GUID> &CfiFunctionDefs = {},
7676
const DenseSet<GlobalValue::GUID> &CfiFunctionDecls = {});
7777

78+
/// Recomputes the LTO cache key for a given key with an extra identifier.
79+
std::string recomputeLTOCacheKey(const std::string &Key, StringRef ExtraID);
80+
7881
namespace lto {
7982

8083
StringLiteral getThinLTODefaultCPU(const Triple &TheTriple);

llvm/include/llvm/LTO/LTOBackend.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,15 @@ Error backend(const Config &C, AddStreamFn AddStream,
5151
/// are saved in the ModuleMap. If \p ModuleMap is nullptr, module files will
5252
/// be mapped to memory on demand and at any given time during importing, only
5353
/// one source module will be kept open at the most. If \p CodeGenOnly is true,
54-
/// the backend will skip optimization and only perform code generation.
54+
/// the backend will skip optimization and only perform code generation. If
55+
/// \p IRAddStream is not nullptr, it will be called just before code generation
56+
/// to serialize the optimized IR.
5557
Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream,
5658
Module &M, const ModuleSummaryIndex &CombinedIndex,
5759
const FunctionImporter::ImportMapTy &ImportList,
5860
const GVSummaryMapTy &DefinedGlobals,
5961
MapVector<StringRef, BitcodeModule> *ModuleMap,
60-
bool CodeGenOnly,
62+
bool CodeGenOnly, AddStreamFn IRAddStream = nullptr,
6163
const std::vector<uint8_t> &CmdArgs = std::vector<uint8_t>());
6264

6365
Error finalizeOptimizationRemarks(

llvm/lib/CGData/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ add_llvm_component_library(LLVMCGData
1212
intrinsics_gen
1313

1414
LINK_COMPONENTS
15+
BitReader
16+
BitWriter
1517
Core
1618
Support
1719
Object

llvm/lib/CGData/CodeGenData.cpp

+66-1
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@
1515
#include "llvm/CGData/CodeGenDataReader.h"
1616
#include "llvm/CGData/OutlinedHashTreeRecord.h"
1717
#include "llvm/Object/ObjectFile.h"
18+
#include "llvm/Support/Caching.h"
1819
#include "llvm/Support/CommandLine.h"
1920
#include "llvm/Support/FileSystem.h"
21+
#include "llvm/Support/Path.h"
2022
#include "llvm/Support/WithColor.h"
2123

2224
#define DEBUG_TYPE "cg-data"
@@ -30,6 +32,11 @@ cl::opt<bool>
3032
cl::opt<std::string>
3133
CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden,
3234
cl::desc("File path to where .cgdata file is read"));
35+
cl::opt<bool> CodeGenDataThinLTOTwoRounds(
36+
"codegen-data-thinlto-two-rounds", cl::init(false), cl::Hidden,
37+
cl::desc("Enable two-round ThinLTO code generation. The first round "
38+
"emits codegen data, while the second round uses the emitted "
39+
"codegen data for further optimizations."));
3340

3441
static std::string getCGDataErrString(cgdata_error Err,
3542
const std::string &ErrMsg = "") {
@@ -139,7 +146,7 @@ CodeGenData &CodeGenData::getInstance() {
139146
std::call_once(CodeGenData::OnceFlag, []() {
140147
Instance = std::unique_ptr<CodeGenData>(new CodeGenData());
141148

142-
if (CodeGenDataGenerate)
149+
if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds)
143150
Instance->EmitCGData = true;
144151
else if (!CodeGenDataUsePath.empty()) {
145152
// Initialize the global CGData if the input file name is given.
@@ -215,6 +222,64 @@ void warn(Error E, StringRef Whence) {
215222
}
216223
}
217224

225+
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task,
226+
AddStreamFn AddStream) {
227+
LLVM_DEBUG(dbgs() << "Saving module: " << TheModule.getModuleIdentifier()
228+
<< " in Task " << Task << "\n");
229+
Expected<std::unique_ptr<CachedFileStream>> StreamOrErr =
230+
AddStream(Task, TheModule.getModuleIdentifier());
231+
if (Error Err = StreamOrErr.takeError())
232+
report_fatal_error(std::move(Err));
233+
std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr;
234+
235+
WriteBitcodeToFile(TheModule, *Stream->OS,
236+
/*ShouldPreserveUseListOrder=*/true);
237+
}
238+
239+
std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
240+
unsigned Task,
241+
LLVMContext &Context,
242+
ArrayRef<StringRef> IRFiles) {
243+
LLVM_DEBUG(dbgs() << "Loading module: " << OrigModule.getModuleIdentifier()
244+
<< " in Task " << Task << "\n");
245+
auto FileBuffer = MemoryBuffer::getMemBuffer(
246+
IRFiles[Task], "in-memory IR file", /*RequiresNullTerminator=*/false);
247+
auto RestoredModule = parseBitcodeFile(*FileBuffer, Context);
248+
if (!RestoredModule)
249+
report_fatal_error(
250+
Twine("Failed to parse optimized bitcode loaded for Task: ") +
251+
Twine(Task) + "\n");
252+
253+
// Restore the original module identifier.
254+
(*RestoredModule)->setModuleIdentifier(OrigModule.getModuleIdentifier());
255+
return std::move(*RestoredModule);
256+
}
257+
258+
Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) {
259+
OutlinedHashTreeRecord GlobalOutlineRecord;
260+
stable_hash CombinedHash = 0;
261+
for (auto File : ObjFiles) {
262+
if (File.empty())
263+
continue;
264+
std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
265+
File, "in-memory object file", /*RequiresNullTerminator=*/false);
266+
Expected<std::unique_ptr<object::ObjectFile>> BinOrErr =
267+
object::ObjectFile::createObjectFile(Buffer->getMemBufferRef());
268+
if (!BinOrErr)
269+
return BinOrErr.takeError();
270+
271+
std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get();
272+
if (auto E = CodeGenDataReader::mergeFromObjectFile(
273+
Obj.get(), GlobalOutlineRecord, &CombinedHash))
274+
return E;
275+
}
276+
277+
if (!GlobalOutlineRecord.empty())
278+
cgdata::publishOutlinedHashTree(std::move(GlobalOutlineRecord.HashTree));
279+
280+
return CombinedHash;
281+
}
282+
218283
} // end namespace cgdata
219284

220285
} // end namespace llvm

llvm/lib/CGData/CodeGenDataReader.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
3131
}
3232

3333
Error CodeGenDataReader::mergeFromObjectFile(
34-
const object::ObjectFile *Obj,
35-
OutlinedHashTreeRecord &GlobalOutlineRecord) {
34+
const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord,
35+
stable_hash *CombinedHash) {
3636
Triple TT = Obj->makeTriple();
3737
auto CGOutLineName =
3838
getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
@@ -48,6 +48,9 @@ Error CodeGenDataReader::mergeFromObjectFile(
4848
auto *EndData = Data + ContentsOrErr->size();
4949

5050
if (*NameOrErr == CGOutLineName) {
51+
if (CombinedHash)
52+
*CombinedHash =
53+
stable_hash_combine(*CombinedHash, xxh3_64bits(*ContentsOrErr));
5154
// In case dealing with an executable that has concatenated cgdata,
5255
// we want to merge them into a single cgdata.
5356
// Although it's not a typical workflow, we support this scenario.

llvm/lib/LTO/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ add_llvm_component_library(LLVMLTO
2121
BinaryFormat
2222
BitReader
2323
BitWriter
24+
CGData
2425
CodeGen
2526
CodeGenTypes
2627
Core

0 commit comments

Comments
 (0)