diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h index 10aef6f6067b6..80edd19ea8f8f 100644 --- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h +++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h @@ -15,6 +15,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PassManager.h" #include "llvm/ProfileData/PGOCtxProfReader.h" +#include namespace llvm { @@ -63,6 +64,16 @@ class PGOContextualProfile { return getDefinedFunctionGUID(F) != 0; } + uint32_t getNumCounters(const Function &F) const { + assert(isFunctionKnown(F)); + return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCounterIndex; + } + + uint32_t getNumCallsites(const Function &F) const { + assert(isFunctionKnown(F)); + return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCallsiteIndex; + } + uint32_t allocateNextCounterIndex(const Function &F) { assert(isFunctionKnown(F)); return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCounterIndex++; @@ -91,11 +102,11 @@ class PGOContextualProfile { }; class CtxProfAnalysis : public AnalysisInfoMixin { - StringRef Profile; + const std::optional Profile; public: static AnalysisKey Key; - explicit CtxProfAnalysis(StringRef Profile = ""); + explicit CtxProfAnalysis(std::optional Profile = std::nullopt); using Result = PGOContextualProfile; @@ -113,9 +124,7 @@ class CtxProfAnalysisPrinterPass : public PassInfoMixin { public: enum class PrintMode { Everything, JSON }; - explicit CtxProfAnalysisPrinterPass(raw_ostream &OS, - PrintMode Mode = PrintMode::Everything) - : OS(OS), Mode(Mode) {} + explicit CtxProfAnalysisPrinterPass(raw_ostream &OS); PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); static bool isRequired() { return true; } diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 71a96e0671c2f..fc8d1b3d1947e 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -1516,6 +1516,8 @@ class InstrProfInstBase : public IntrinsicInst { return const_cast(getArgOperand(0))->stripPointerCasts(); } + void setNameValue(Value *V) { setArgOperand(0, V); } + // The hash of the CFG for the instrumented function. ConstantInt *getHash() const { return cast(const_cast(getArgOperand(1))); diff --git a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h index f7f88966f7573..e03481916dd48 100644 --- a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h +++ b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h @@ -74,6 +74,13 @@ class PGOCtxProfContext final { Iter->second.emplace(Other.guid(), std::move(Other)); } + void ingestAllContexts(uint32_t CSId, CallTargetMapTy &&Other) { + auto [_, Inserted] = callsites().try_emplace(CSId, std::move(Other)); + (void)Inserted; + assert(Inserted && + "CSId was expected to be newly created as result of e.g. inlining"); + } + void resizeCounters(uint32_t Size) { Counters.resize(Size); } bool hasCallsite(uint32_t I) const { diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index 6226062dd713f..2ddcfeb1501e2 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -20,6 +20,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CtxProfAnalysis.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/ValueHandle.h" @@ -270,6 +271,17 @@ InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool InsertLifetime = true, Function *ForwardVarArgsTo = nullptr); +/// Same as above, but it will update the contextual profile. If the contextual +/// profile is invalid (i.e. not loaded because it is not present), it defaults +/// to the behavior of the non-contextual profile updating variant above. This +/// makes it easy to drop-in replace uses of the non-contextual overload. +InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, + CtxProfAnalysis::Result &CtxProf, + bool MergeAttributes = false, + AAResults *CalleeAAR = nullptr, + bool InsertLifetime = true, + Function *ForwardVarArgsTo = nullptr); + /// Clones a loop \p OrigLoop. Returns the loop and the blocks in \p /// Blocks. /// diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp index 2cd3f2114397e..457a4dcc79684 100644 --- a/llvm/lib/Analysis/CtxProfAnalysis.cpp +++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp @@ -29,6 +29,15 @@ cl::opt UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file")); +static cl::opt PrintLevel( + "ctx-profile-printer-level", + cl::init(CtxProfAnalysisPrinterPass::PrintMode::JSON), cl::Hidden, + cl::values(clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::Everything, + "everything", "print everything - most verbose"), + clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::JSON, "json", + "just the json representation of the profile")), + cl::desc("Verbosity level of the contextual profile printer pass.")); + namespace llvm { namespace json { Value toJSON(const PGOCtxProfContext &P) { @@ -96,12 +105,20 @@ GlobalValue::GUID AssignGUIDPass::getGUID(const Function &F) { } AnalysisKey CtxProfAnalysis::Key; -CtxProfAnalysis::CtxProfAnalysis(StringRef Profile) - : Profile(Profile.empty() ? UseCtxProfile : Profile) {} +CtxProfAnalysis::CtxProfAnalysis(std::optional Profile) + : Profile([&]() -> std::optional { + if (Profile) + return *Profile; + if (UseCtxProfile.getNumOccurrences()) + return UseCtxProfile; + return std::nullopt; + }()) {} PGOContextualProfile CtxProfAnalysis::run(Module &M, ModuleAnalysisManager &MAM) { - ErrorOr> MB = MemoryBuffer::getFile(Profile); + if (!Profile) + return {}; + ErrorOr> MB = MemoryBuffer::getFile(*Profile); if (auto EC = MB.getError()) { M.getContext().emitError("could not open contextual profile file: " + EC.message()); @@ -150,7 +167,6 @@ PGOContextualProfile CtxProfAnalysis::run(Module &M, // If we made it this far, the Result is valid - which we mark by setting // .Profiles. // Trim first the roots that aren't in this module. - DenseSet ProfiledGUIDs; for (auto &[RootGuid, _] : llvm::make_early_inc_range(*MaybeCtx)) if (!Result.FuncInfo.contains(RootGuid)) MaybeCtx->erase(RootGuid); @@ -165,11 +181,14 @@ PGOContextualProfile::getDefinedFunctionGUID(const Function &F) const { return 0; } +CtxProfAnalysisPrinterPass::CtxProfAnalysisPrinterPass(raw_ostream &OS) + : OS(OS), Mode(PrintLevel) {} + PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &MAM) { CtxProfAnalysis::Result &C = MAM.getResult(M); if (!C) { - M.getContext().emitError("Invalid CtxProfAnalysis"); + OS << "No contextual profile was provided.\n"; return PreservedAnalyses::all(); } diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp index 5e91ab80d7505..b7e4531c8e390 100644 --- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp +++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/CtxProfAnalysis.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InlineOrder.h" @@ -113,6 +114,8 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, return PreservedAnalyses::all(); } + auto &CtxProf = MAM.getResult(M); + bool Changed = false; ProfileSummaryInfo *PSI = MAM.getCachedResult(M); @@ -213,7 +216,7 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, &FAM.getResult(Callee)); InlineResult IR = - InlineFunction(*CB, IFI, /*MergeAttributes=*/true, + InlineFunction(*CB, IFI, CtxProf, /*MergeAttributes=*/true, &FAM.getResult(*CB->getCaller())); if (!IR.isSuccess()) { Advice->recordUnsuccessfulInlining(IR); diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 94e87656a192c..799ef3ab021d3 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/CtxProfAnalysis.h" #include "llvm/Analysis/IndirectCallVisitor.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryProfileInfo.h" @@ -46,6 +47,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/EHPersonalities.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstrTypes.h" @@ -71,6 +73,7 @@ #include #include #include +#include #include #include #include @@ -2116,6 +2119,240 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind, } } +// In contextual profiling, when an inline succeeds, we want to remap the +// indices of the callee into the index space of the caller. We can't just leave +// them as-is because the same callee may appear in other places in this caller +// (other callsites), and its (callee's) counters and sub-contextual profile +// tree would be potentially different. +// Not all BBs of the callee may survive the opportunistic DCE InlineFunction +// does (same goes for callsites in the callee). +// We will return a pair of vectors, one for basic block IDs and one for +// callsites. For such a vector V, V[Idx] will be -1 if the callee +// instrumentation with index Idx did not survive inlining, and a new value +// otherwise. +// This function will update the caller's instrumentation intrinsics +// accordingly, mapping indices as described above. We also replace the "name" +// operand because we use it to distinguish between "own" instrumentation and +// "from callee" instrumentation when performing the traversal of the CFG of the +// caller. We traverse depth-first from the callsite's BB and up to the point we +// hit BBs owned by the caller. +// The return values will be then used to update the contextual +// profile. Note: we only update the "name" and "index" operands in the +// instrumentation intrinsics, we leave the hash and total nr of indices as-is, +// it's not worth updating those. +static const std::pair, std::vector> +remapIndices(Function &Caller, BasicBlock *StartBB, + CtxProfAnalysis::Result &CtxProf, uint32_t CalleeCounters, + uint32_t CalleeCallsites) { + // We'll allocate a new ID to imported callsite counters and callsites. We're + // using -1 to indicate a counter we delete. Most likely the entry ID, for + // example, will be deleted - we don't want 2 IDs in the same BB, and the + // entry would have been cloned in the callsite's old BB. + std::vector CalleeCounterMap; + std::vector CalleeCallsiteMap; + CalleeCounterMap.resize(CalleeCounters, -1); + CalleeCallsiteMap.resize(CalleeCallsites, -1); + + auto RewriteInstrIfNeeded = [&](InstrProfIncrementInst &Ins) -> bool { + if (Ins.getNameValue() == &Caller) + return false; + const auto OldID = static_cast(Ins.getIndex()->getZExtValue()); + if (CalleeCounterMap[OldID] == -1) + CalleeCounterMap[OldID] = CtxProf.allocateNextCounterIndex(Caller); + const auto NewID = static_cast(CalleeCounterMap[OldID]); + + Ins.setNameValue(&Caller); + Ins.setIndex(NewID); + return true; + }; + + auto RewriteCallsiteInsIfNeeded = [&](InstrProfCallsite &Ins) -> bool { + if (Ins.getNameValue() == &Caller) + return false; + const auto OldID = static_cast(Ins.getIndex()->getZExtValue()); + if (CalleeCallsiteMap[OldID] == -1) + CalleeCallsiteMap[OldID] = CtxProf.allocateNextCallsiteIndex(Caller); + const auto NewID = static_cast(CalleeCallsiteMap[OldID]); + + Ins.setNameValue(&Caller); + Ins.setIndex(NewID); + return true; + }; + + std::deque Worklist; + DenseSet Seen; + // We will traverse the BBs starting from the callsite BB. The callsite BB + // will have at least a BB ID - maybe its own, and in any case the one coming + // from the cloned function's entry BB. The other BBs we'll start seeing from + // there on may or may not have BB IDs. BBs with IDs belonging to our caller + // are definitely not coming from the imported function and form a boundary + // past which we don't need to traverse anymore. BBs may have no + // instrumentation (because we originally inserted instrumentation as per + // MST), in which case we'll traverse past them. An invariant we'll keep is + // that a BB will have at most 1 BB ID. For example, in the callsite BB, we + // will delete the callee BB's instrumentation. This doesn't result in + // information loss: the entry BB of the callee will have the same count as + // the callsite's BB. At the end of this traversal, all the callee's + // instrumentation would be mapped into the caller's instrumentation index + // space. Some of the callee's counters may be deleted (as mentioned, this + // should result in no loss of information). + Worklist.push_back(StartBB); + while (!Worklist.empty()) { + auto *BB = Worklist.front(); + Worklist.pop_front(); + bool Changed = false; + auto *BBID = CtxProfAnalysis::getBBInstrumentation(*BB); + if (BBID) { + Changed |= RewriteInstrIfNeeded(*BBID); + // this may be the entryblock from the inlined callee, coming into a BB + // that didn't have instrumentation because of MST decisions. Let's make + // sure it's placed accordingly. This is a noop elsewhere. + BBID->moveBefore(&*BB->getFirstInsertionPt()); + } + for (auto &I : llvm::make_early_inc_range(*BB)) { + if (auto *Inc = dyn_cast(&I)) { + if (Inc != BBID) { + // If we're here it means that the BB had more than 1 IDs, presumably + // some coming from the callee. We "made up our mind" to keep the + // first one (which may or may not have been originally the caller's). + // All the others are superfluous and we delete them. + Inc->eraseFromParent(); + Changed = true; + } + } else if (auto *CS = dyn_cast(&I)) { + Changed |= RewriteCallsiteInsIfNeeded(*CS); + } + } + if (!BBID || Changed) + for (auto *Succ : successors(BB)) + if (Seen.insert(Succ).second) + Worklist.push_back(Succ); + } + + assert( + llvm::all_of(CalleeCounterMap, [&](const auto &V) { return V != 0; }) && + "Counter index mapping should be either to -1 or to non-zero index, " + "because the 0 " + "index corresponds to the entry BB of the caller"); + assert( + llvm::all_of(CalleeCallsiteMap, [&](const auto &V) { return V != 0; }) && + "Callsite index mapping should be either to -1 or to non-zero index, " + "because there should have been at least a callsite - the inlined one " + "- which would have had a 0 index."); + + return {std::move(CalleeCounterMap), std::move(CalleeCallsiteMap)}; +} + +// Inline. If successful, update the contextual profile (if a valid one is +// given). +// The contextual profile data is organized in trees, as follows: +// - each node corresponds to a function +// - the root of each tree corresponds to an "entrypoint" - e.g. +// RPC handler for server side +// - the path from the root to a node is a particular call path +// - the counters stored in a node are counter values observed in that +// particular call path ("context") +// - the edges between nodes are annotated with callsite IDs. +// +// Updating the contextual profile after an inlining means, at a high level, +// copying over the data of the callee, **intentionally without any value +// scaling**, and copying over the callees of the inlined callee. +llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, + CtxProfAnalysis::Result &CtxProf, + bool MergeAttributes, + AAResults *CalleeAAR, + bool InsertLifetime, + Function *ForwardVarArgsTo) { + if (!CtxProf) + return InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime, + ForwardVarArgsTo); + + auto &Caller = *CB.getCaller(); + auto &Callee = *CB.getCalledFunction(); + auto *StartBB = CB.getParent(); + + // Get some preliminary data about the callsite before it might get inlined. + // Inlining shouldn't delete the callee, but it's cleaner (and low-cost) to + // get this data upfront and rely less on InlineFunction's behavior. + const auto CalleeGUID = AssignGUIDPass::getGUID(Callee); + auto *CallsiteIDIns = CtxProfAnalysis::getCallsiteInstrumentation(CB); + const auto CallsiteID = + static_cast(CallsiteIDIns->getIndex()->getZExtValue()); + + const auto NumCalleeCounters = CtxProf.getNumCounters(Callee); + const auto NumCalleeCallsites = CtxProf.getNumCallsites(Callee); + + auto Ret = InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime, + ForwardVarArgsTo); + if (!Ret.isSuccess()) + return Ret; + + // Inlining succeeded, we don't need the instrumentation of the inlined + // callsite. + CallsiteIDIns->eraseFromParent(); + + // Assinging Maps and then capturing references into it in the lambda because + // captured structured bindings are a C++20 extension. We do also need a + // capture here, though. + const auto IndicesMaps = remapIndices(Caller, StartBB, CtxProf, + NumCalleeCounters, NumCalleeCallsites); + const uint32_t NewCountersSize = CtxProf.getNumCounters(Caller); + + auto Updater = [&](PGOCtxProfContext &Ctx) { + assert(Ctx.guid() == AssignGUIDPass::getGUID(Caller)); + const auto &[CalleeCounterMap, CalleeCallsiteMap] = IndicesMaps; + assert( + (Ctx.counters().size() + + llvm::count_if(CalleeCounterMap, [](auto V) { return V != -1; }) == + NewCountersSize) && + "The caller's counters size should have grown by the number of new " + "distinct counters inherited from the inlined callee."); + Ctx.resizeCounters(NewCountersSize); + // If the callsite wasn't exercised in this context, the value of the + // counters coming from it is 0 - which it is right now, after resizing them + // - and so we're done. + auto CSIt = Ctx.callsites().find(CallsiteID); + if (CSIt == Ctx.callsites().end()) + return; + auto CalleeCtxIt = CSIt->second.find(CalleeGUID); + // The callsite was exercised, but not with this callee (so presumably this + // is an indirect callsite). Again, we're done here. + if (CalleeCtxIt == CSIt->second.end()) + return; + + // Let's pull in the counter values and the subcontexts coming from the + // inlined callee. + auto &CalleeCtx = CalleeCtxIt->second; + assert(CalleeCtx.guid() == CalleeGUID); + + for (auto I = 0U; I < CalleeCtx.counters().size(); ++I) { + const int64_t NewIndex = CalleeCounterMap[I]; + if (NewIndex >= 0) { + assert(NewIndex != 0 && "counter index mapping shouldn't happen to a 0 " + "index, that's the caller's entry BB"); + Ctx.counters()[NewIndex] = CalleeCtx.counters()[I]; + } + } + for (auto &[I, OtherSet] : CalleeCtx.callsites()) { + const int64_t NewCSIdx = CalleeCallsiteMap[I]; + if (NewCSIdx >= 0) { + assert(NewCSIdx != 0 && + "callsite index mapping shouldn't happen to a 0 index, the " + "caller must've had at least one callsite (with such an index)"); + Ctx.ingestAllContexts(NewCSIdx, std::move(OtherSet)); + } + } + // We know the traversal is preorder, so it wouldn't have yet looked at the + // sub-contexts of this context that it's currently visiting. Meaning, the + // erase below invalidates no iterators. + auto Deleted = Ctx.callsites().erase(CallsiteID); + assert(Deleted); + (void)Deleted; + }; + CtxProf.update(Updater, &Caller); + return Ret; +} + /// This function inlines the called function into the basic block of the /// caller. This returns false if it is not possible to inline this call. /// The program is still in a well defined state if this occurs though. diff --git a/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll b/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll index 06ba8b3542f7d..5284f3a3c7c4e 100644 --- a/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll +++ b/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll @@ -24,7 +24,7 @@ ; RUN: -r %t/m2.bc,f1 \ ; RUN: -r %t/m2.bc,f3 \ ; RUN: -r %t/m2.bc,entrypoint,plx -; RUN: opt --passes='function-import,require,print' \ +; RUN: opt --passes='function-import,require,print' -ctx-profile-printer-level=everything \ ; RUN: -summary-file=%t/m2.bc.thinlto.bc -use-ctx-profile=%t/profile.ctxprofdata %t/m2.bc \ ; RUN: -S -o %t/m2.post.ll 2> %t/profile.txt ; RUN: diff %t/expected.txt %t/profile.txt diff --git a/llvm/test/Analysis/CtxProfAnalysis/inline.ll b/llvm/test/Analysis/CtxProfAnalysis/inline.ll new file mode 100644 index 0000000000000..875bc4938653b --- /dev/null +++ b/llvm/test/Analysis/CtxProfAnalysis/inline.ll @@ -0,0 +1,109 @@ +; RUN: rm -rf %t +; RUN: split-file %s %t +; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata + +; RUN: opt -passes='module-inline,print' -ctx-profile-printer-level=everything %t/module.ll -S \ +; RUN: -use-ctx-profile=%t/profile.ctxprofdata -ctx-profile-printer-level=json \ +; RUN: -o - 2> %t/profile-final.txt | FileCheck %s +; RUN: %python %S/json_equals.py %t/profile-final.txt %t/expected.json + +; There are 2 calls to @a from @entrypoint. We only inline the one callsite +; marked as alwaysinline, the rest are blocked (marked noinline). After the inline, +; the updated contextual profile should still have the same tree for the non-inlined case. +; For the inlined case, we should observe, for the @entrypoint context: +; - an empty callsite where the inlined one was (first one, i.e. 0) +; - more counters appended to the old counter list (because we ingested the +; ones from @a). The values are copied. +; - a new callsite to @b +; CHECK-LABEL: @entrypoint +; CHECK-LABEL: yes: +; CHECK: call void @llvm.instrprof.increment(ptr @entrypoint, i64 0, i32 3, i32 1) +; CHECK-NEXT: br label %loop.i +; CHECK-LABEL: loop.i: +; CHECK-NEXT: %indvar.i = phi i32 [ %indvar.next.i, %loop.i ], [ 0, %yes ] +; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @entrypoint, i64 0, i32 2, i32 3) +; CHECK-NEXT: %b.i = add i32 %x, %indvar.i +; CHECK-NEXT: call void @llvm.instrprof.callsite(ptr @entrypoint, i64 0, i32 1, i32 2, ptr @b) +; CHECK-NEXT: %call3.i = call i32 @b() #1 +; CHECK-LABEL: no: +; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @entrypoint, i64 0, i32 3, i32 2) +; CHECK-NEXT: call void @llvm.instrprof.callsite(ptr @entrypoint, i64 0, i32 2, i32 1, ptr @a) +; CHECK-NEXT: %call2 = call i32 @a(i32 %x) #1 +; CHECK-NEXT: br label %exit + + +;--- module.ll +define i32 @entrypoint(i32 %x) !guid !0 { + call void @llvm.instrprof.increment(ptr @entrypoint, i64 0, i32 3, i32 0) + %t = icmp eq i32 %x, 0 + br i1 %t, label %yes, label %no +yes: + call void @llvm.instrprof.increment(ptr @entrypoint, i64 0, i32 3, i32 1) + call void @llvm.instrprof.callsite(ptr @entrypoint, i64 0, i32 2, i32 0, ptr @a) + %call1 = call i32 @a(i32 %x) alwaysinline + br label %exit +no: + call void @llvm.instrprof.increment(ptr @entrypoint, i64 0, i32 3, i32 2) + call void @llvm.instrprof.callsite(ptr @entrypoint, i64 0, i32 2, i32 1, ptr @a) + %call2 = call i32 @a(i32 %x) noinline + br label %exit +exit: + %ret = phi i32 [%call1, %yes], [%call2, %no] + ret i32 %ret +} + +define i32 @a(i32 %x) !guid !1 { +entry: + call void @llvm.instrprof.increment(ptr @a, i64 0, i32 2, i32 0) + br label %loop +loop: + %indvar = phi i32 [%indvar.next, %loop], [0, %entry] + call void @llvm.instrprof.increment(ptr @a, i64 0, i32 2, i32 1) + %b = add i32 %x, %indvar + call void @llvm.instrprof.callsite(ptr @a, i64 0, i32 1, i32 0, ptr @b) + %call3 = call i32 @b() noinline + %indvar.next = add i32 %indvar, %call3 + %cond = icmp slt i32 %indvar.next, %x + br i1 %cond, label %loop, label %exit +exit: + ret i32 8 +} + +define i32 @b() !guid !2 { + call void @llvm.instrprof.increment(ptr @b, i64 0, i32 1, i32 0) + ret i32 1 +} + +!0 = !{i64 1000} +!1 = !{i64 1001} +!2 = !{i64 1002} +;--- profile.json +[ + { "Guid": 1000, + "Counters": [10, 2, 8], + "Callsites": [ + [ { "Guid": 1001, + "Counters": [2, 100], + "Callsites": [[{"Guid": 1002, "Counters": [100]}]]} + ], + [ { "Guid": 1001, + "Counters": [8, 500], + "Callsites": [[{"Guid": 1002, "Counters": [500]}]]} + ] + ] + } +] +;--- expected.json +[ + { "Guid": 1000, + "Counters": [10, 2, 8, 100], + "Callsites": [ + [], + [ { "Guid": 1001, + "Counters": [8, 500], + "Callsites": [[{"Guid": 1002, "Counters": [500]}]]} + ], + [{ "Guid": 1002, "Counters": [100]}] + ] + } +] diff --git a/llvm/test/Analysis/CtxProfAnalysis/json_equals.py b/llvm/test/Analysis/CtxProfAnalysis/json_equals.py new file mode 100644 index 0000000000000..8b94dda5528c5 --- /dev/null +++ b/llvm/test/Analysis/CtxProfAnalysis/json_equals.py @@ -0,0 +1,15 @@ +import json +import sys + + +def to_json(fname: str): + with open(fname) as f: + return json.load(f) + + +a = to_json(sys.argv[1]) +b = to_json(sys.argv[2]) + +if a == b: + exit(0) +exit(1) diff --git a/llvm/test/Analysis/CtxProfAnalysis/load.ll b/llvm/test/Analysis/CtxProfAnalysis/load.ll index fa09474f43315..7d92f9678e7c3 100644 --- a/llvm/test/Analysis/CtxProfAnalysis/load.ll +++ b/llvm/test/Analysis/CtxProfAnalysis/load.ll @@ -3,10 +3,10 @@ ; RUN: rm -rf %t ; RUN: split-file %s %t ; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata -; RUN: not opt -passes='require,print' \ -; RUN: %t/example.ll -S 2>&1 | FileCheck %s --check-prefix=NO-FILE +; RUN: opt -passes='require,print' -ctx-profile-printer-level=everything \ +; RUN: %t/example.ll -S 2>&1 | FileCheck %s --check-prefix=NO-CTX -; RUN: not opt -passes='require,print' \ +; RUN: not opt -passes='require,print' -ctx-profile-printer-level=everything \ ; RUN: -use-ctx-profile=does_not_exist.ctxprofdata %t/example.ll -S 2>&1 | FileCheck %s --check-prefix=NO-FILE ; RUN: opt -module-summary -passes='thinlto-pre-link' \ @@ -14,11 +14,12 @@ ; RUN: opt -module-summary -passes='thinlto-pre-link' -use-ctx-profile=%t/profile.ctxprofdata \ ; RUN: %t/example.ll -S -o %t/prelink.ll -; RUN: opt -passes='require,print' \ +; RUN: opt -passes='require,print' -ctx-profile-printer-level=everything \ ; RUN: -use-ctx-profile=%t/profile.ctxprofdata %t/prelink.ll -S 2> %t/output.txt ; RUN: diff %t/expected-profile-output.txt %t/output.txt ; NO-FILE: error: could not open contextual profile file +; NO-CTX: No contextual profile was provided ; ; This is the reference profile, laid out in the format the json formatter will ; output it from opt. diff --git a/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp b/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp index 36c64b9f333d7..dcb1c10433ccf 100644 --- a/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp +++ b/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp @@ -570,8 +570,7 @@ define i32 @f4() !guid !3 { std::string Str; raw_string_ostream OS(Str); - CtxProfAnalysisPrinterPass Printer( - OS, CtxProfAnalysisPrinterPass::PrintMode::JSON); + CtxProfAnalysisPrinterPass Printer(OS); Printer.run(*M, MAM); const char *Expected = R"json( [