diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index a6118726945e8..adebbb5eeba32 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -304,6 +304,10 @@ static cl::opt UseLoopVersioningLICM( "enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass")); +static cl::opt + UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, + cl::desc("Use the specified contextual profile file")); + namespace llvm { extern cl::opt EnableMemProfContextDisambiguation; @@ -1176,8 +1180,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // Enable contextual profiling instrumentation. const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink && PGOCtxProfLoweringPass::isContextualIRPGOEnabled(); + const bool IsCtxProfUse = !UseCtxProfile.empty() && !PGOOpt && + Phase == ThinOrFullLTOPhase::ThinLTOPreLink; - if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen) + if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen || + IsCtxProfUse) addPreInlinerPasses(MPM, Level, Phase); // Add all the requested passes for instrumentation PGO, if requested. @@ -1187,8 +1194,13 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, PGOOpt->FS); - } else if (IsCtxProfGen) { + } else if (IsCtxProfGen || IsCtxProfUse) { MPM.addPass(PGOInstrumentationGen(false)); + // In pre-link, we just want the instrumented IR. We use the contextual + // profile in the post-thinlink phase. + // The instrumentation will be removed in post-thinlink after IPO. + if (IsCtxProfUse) + return MPM; addPostPGOLoopRotation(MPM, Level); MPM.addPass(PGOCtxProfLoweringPass()); } @@ -1655,6 +1667,11 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { // can. MPM.addPass(buildModuleSimplificationPipeline( Level, ThinOrFullLTOPhase::ThinLTOPreLink)); + // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let + // thinlto use the contextual info to perform imports; then use the contextual + // profile in the post-thinlink phase. + if (!UseCtxProfile.empty() && !PGOOpt) + return MPM; // Run partial inlining pass to partially inline functions that have // large bodies. diff --git a/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll new file mode 100644 index 0000000000000..b50a815be5abf --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; There is no profile, but that's OK because the prelink does not care about +; the content of the profile, just that we intend to use one. +; There is no scenario currently of doing ctx profile use without thinlto. +; +; RUN: opt -passes='thinlto-pre-link' -use-ctx-profile=something_that_does_not_exist %s -S | FileCheck %s + +declare void @bar() + +define void @foo(i32 %a, ptr %fct) { +; CHECK-LABEL: define void @foo( +; CHECK-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0 +; CHECK-NEXT: br i1 [[T]], label %[[YES:.*]], label %[[NO:.*]] +; CHECK: [[YES]]: +; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[FCT]] to i64 +; CHECK-NEXT: call void @llvm.instrprof.value.profile(ptr @__profn_foo, i64 728453322856651412, i64 [[TMP1]], i32 0, i32 0) +; CHECK-NEXT: call void [[FCT]](i32 0) +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[NO]]: +; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0) +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; + %t = icmp eq i32 %a, 0 + br i1 %t, label %yes, label %no +yes: + call void %fct(i32 %a) + br label %exit +no: + call void @bar() + br label %exit +exit: + ret void +}