diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index 210b79e3a0adc..6c4f4001aab35 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/AutoUpgrade.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalObject.h" @@ -30,6 +29,7 @@ #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/IRMover.h" +#include "llvm/ProfileData/PGOCtxProfReader.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -185,6 +185,10 @@ static cl::opt ImportAssumeUniqueLocal( "user specify the full module path."), cl::Hidden); +static cl::opt + ContextualProfile("thinlto-pgo-ctx-prof", + cl::desc("Path to a contextual profile."), cl::Hidden); + namespace llvm { extern cl::opt EnableMemProfContextDisambiguation; } @@ -604,13 +608,7 @@ class WorkloadImportsManager : public ModuleImportsManager { LLVM_DEBUG(dbgs() << "[Workload] Done\n"); } -public: - WorkloadImportsManager( - function_ref - IsPrevailing, - const ModuleSummaryIndex &Index, - DenseMap *ExportLists) - : ModuleImportsManager(IsPrevailing, Index, ExportLists) { + void loadFromJson() { // Since the workload def uses names, we need a quick lookup // name->ValueInfo. StringMap NameToValueInfo; @@ -680,15 +678,81 @@ class WorkloadImportsManager : public ModuleImportsManager { } Set.insert(ElemIt->second); } - LLVM_DEBUG({ + } + } + + void loadFromCtxProf() { + std::error_code EC; + auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(ContextualProfile); + if (std::error_code EC = BufferOrErr.getError()) { + report_fatal_error("Failed to open contextual profile file"); + return; + } + auto Buffer = std::move(BufferOrErr.get()); + + PGOCtxProfileReader Reader(Buffer->getBuffer()); + auto Ctx = Reader.loadContexts(); + if (!Ctx) { + report_fatal_error("Failed to parse contextual profiles"); + return; + } + const auto &CtxMap = *Ctx; + DenseSet ContainedGUIDs; + for (const auto &[RootGuid, Root] : CtxMap) { + // Avoid ContainedGUIDs to get in/out of scope. Reuse its memory for + // subsequent roots, but clear its contents. + ContainedGUIDs.clear(); + + auto RootVI = Index.getValueInfo(RootGuid); + if (!RootVI) { + LLVM_DEBUG(dbgs() << "[Workload] Root " << RootGuid + << " not found in this linkage unit.\n"); + continue; + } + if (RootVI.getSummaryList().size() != 1) { + LLVM_DEBUG(dbgs() << "[Workload] Root " << RootGuid + << " should have exactly one summary, but has " + << RootVI.getSummaryList().size() << ". Skipping.\n"); + continue; + } + StringRef RootDefiningModule = + RootVI.getSummaryList().front()->modulePath(); + LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << RootGuid + << " is : " << RootDefiningModule << "\n"); + auto &Set = Workloads[RootDefiningModule]; + Root.getContainedGuids(ContainedGUIDs); + for (auto Guid : ContainedGUIDs) + if (auto VI = Index.getValueInfo(Guid)) + Set.insert(VI); + } + } + +public: + WorkloadImportsManager( + function_ref + IsPrevailing, + const ModuleSummaryIndex &Index, + DenseMap *ExportLists) + : ModuleImportsManager(IsPrevailing, Index, ExportLists) { + if (ContextualProfile.empty() == WorkloadDefinitions.empty()) { + report_fatal_error( + "Pass only one of: -thinlto-pgo-ctx-prof or -thinlto-workload-def"); + return; + } + if (!ContextualProfile.empty()) + loadFromCtxProf(); + else + loadFromJson(); + LLVM_DEBUG({ + for (const auto &[Root, Set] : Workloads) { dbgs() << "[Workload] Root: " << Root << " we have " << Set.size() << " distinct callees.\n"; for (const auto &VI : Set) { dbgs() << "[Workload] Root: " << Root << " Would include: " << VI.getGUID() << "\n"; } - }); - } + } + }); } }; @@ -697,7 +761,7 @@ std::unique_ptr ModuleImportsManager::create( IsPrevailing, const ModuleSummaryIndex &Index, DenseMap *ExportLists) { - if (WorkloadDefinitions.empty()) { + if (WorkloadDefinitions.empty() && ContextualProfile.empty()) { LLVM_DEBUG(dbgs() << "[Workload] Using the regular imports manager.\n"); return std::unique_ptr( new ModuleImportsManager(IsPrevailing, Index, ExportLists)); diff --git a/llvm/test/ThinLTO/X86/ctxprof.ll b/llvm/test/ThinLTO/X86/ctxprof.ll new file mode 100644 index 0000000000000..4c86ec9f4c479 --- /dev/null +++ b/llvm/test/ThinLTO/X86/ctxprof.ll @@ -0,0 +1,73 @@ +; Test workload based importing via -thinlto-pgo-ctx-prof +; Use external linkage symbols so we don't depend on module paths which are +; used when computing the GUIDs of internal linkage symbols. +; The functionality is shared with what workload.ll tests, so here we only care +; about testing the ctx profile is loaded and handled correctly. +; +; Set up +; RUN: rm -rf %t +; RUN: mkdir -p %t +; RUN: split-file %s %t +; +; RUN: opt -module-summary %t/m1.ll -o %t/m1.bc +; RUN: opt -module-summary %t/m2.ll -o %t/m2.bc +; RUN: llvm-dis %t/m1.bc -o - | FileCheck %s --check-prefix=GUIDS-1 +; RUN: llvm-dis %t/m2.bc -o - | FileCheck %s --check-prefix=GUIDS-2 +; +; GUIDS-1: name: "m1_f1" +; GUIDS-1-SAME: guid = 6019442868614718803 +; GUIDS-2: name: "m2_f1" +; GUIDS-2-SAME: guid = 15593096274670919754 +; +; RUN: rm -rf %t_baseline +; RUN: rm -rf %t_exp +; RUN: mkdir -p %t_baseline +; RUN: mkdir -p %t_exp +; +; Normal run. m1 shouldn't get m2_f1 because it's not referenced from there, and +; m1_f1 shouldn't go to m2. +; +; RUN: llvm-lto2 run %t/m1.bc %t/m2.bc \ +; RUN: -o %t_baseline/result.o -save-temps \ +; RUN: -r %t/m1.bc,m1_f1,plx \ +; RUN: -r %t/m2.bc,m2_f1,plx +; RUN: llvm-dis %t_baseline/result.o.1.3.import.bc -o - | FileCheck %s --check-prefix=NOPROF-1 +; RUN: llvm-dis %t_baseline/result.o.2.3.import.bc -o - | FileCheck %s --check-prefix=NOPROF-2 +; +; NOPROF-1-NOT: m2_f1() +; NOPROF-2-NOT: m1_f1() +; +; The run with workload definitions - same other options. +; +; RUN: echo '[ \ +; RUN: {"Guid": 6019442868614718803, "Counters": [1], "Callsites": [[{"Guid": 15593096274670919754, "Counters": [1]}]]}, \ +; RUN: {"Guid": 15593096274670919754, "Counters": [1], "Callsites": [[{"Guid": 6019442868614718803, "Counters": [1]}]]} \ +; RUN: ]' > %t_exp/ctxprof.json +; RUN: llvm-ctxprof-util fromJSON --input %t_exp/ctxprof.json --output %t_exp/ctxprof.bitstream +; RUN: llvm-lto2 run %t/m1.bc %t/m2.bc \ +; RUN: -o %t_exp/result.o -save-temps \ +; RUN: -thinlto-pgo-ctx-prof=%t_exp/ctxprof.bitstream \ +; RUN: -r %t/m1.bc,m1_f1,plx \ +; RUN: -r %t/m2.bc,m2_f1,plx +; RUN: llvm-dis %t_exp/result.o.1.3.import.bc -o - | FileCheck %s --check-prefix=FIRST +; RUN: llvm-dis %t_exp/result.o.2.3.import.bc -o - | FileCheck %s --check-prefix=SECOND +; +; +; FIRST: m2_f1() +; SECOND: m1_f1() +; +;--- m1.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +define dso_local void @m1_f1() { + ret void +} + +;--- m2.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +define dso_local void @m2_f1() { + ret void +}