diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index e765bbf637a66..64f0020a170aa 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -643,7 +643,7 @@ static void addKCFIPass(const Triple &TargetTriple, const LangOptions &LangOpts, // Ensure we lower KCFI operand bundles with -O0. PB.registerOptimizerLastEPCallback( - [&](ModulePassManager &MPM, OptimizationLevel Level) { + [&](ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase) { if (Level == OptimizationLevel::O0 && LangOpts.Sanitize.has(SanitizerKind::KCFI)) MPM.addPass(createModuleToFunctionPassAdaptor(KCFIPass())); @@ -662,8 +662,8 @@ static void addKCFIPass(const Triple &TargetTriple, const LangOptions &LangOpts, static void addSanitizers(const Triple &TargetTriple, const CodeGenOptions &CodeGenOpts, const LangOptions &LangOpts, PassBuilder &PB) { - auto SanitizersCallback = [&](ModulePassManager &MPM, - OptimizationLevel Level) { + auto SanitizersCallback = [&](ModulePassManager &MPM, OptimizationLevel Level, + ThinOrFullLTOPhase) { if (CodeGenOpts.hasSanitizeCoverage()) { auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts); MPM.addPass(SanitizerCoveragePass( @@ -749,7 +749,7 @@ static void addSanitizers(const Triple &TargetTriple, PB.registerOptimizerEarlyEPCallback( [SanitizersCallback](ModulePassManager &MPM, OptimizationLevel Level) { ModulePassManager NewMPM; - SanitizersCallback(NewMPM, Level); + SanitizersCallback(NewMPM, Level, ThinOrFullLTOPhase::None); if (!NewMPM.isEmpty()) { // Sanitizers can abandon. NewMPM.addPass(RequireAnalysisPass()); @@ -1018,11 +1018,12 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // TODO: Consider passing the MemoryProfileOutput to the pass builder via // the PGOOptions, and set this up there. if (!CodeGenOpts.MemoryProfileOutput.empty()) { - PB.registerOptimizerLastEPCallback( - [](ModulePassManager &MPM, OptimizationLevel Level) { - MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); - MPM.addPass(ModuleMemProfilerPass()); - }); + PB.registerOptimizerLastEPCallback([](ModulePassManager &MPM, + OptimizationLevel Level, + ThinOrFullLTOPhase) { + MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); + MPM.addPass(ModuleMemProfilerPass()); + }); } if (CodeGenOpts.FatLTO) { diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 474a19531ff5d..4c2763404ff05 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -497,7 +497,8 @@ class PassBuilder { /// This extension point allows adding optimizations at the very end of the /// function optimization pipeline. void registerOptimizerLastEPCallback( - const std::function &C) { + const std::function &C) { OptimizerLastEPCallbacks.push_back(C); } @@ -630,7 +631,8 @@ class PassBuilder { void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level); void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, - OptimizationLevel Level); + OptimizationLevel Level, + ThinOrFullLTOPhase Phase); void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level); void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, @@ -755,7 +757,9 @@ class PassBuilder { // Module callbacks SmallVector, 2> OptimizerEarlyEPCallbacks; - SmallVector, 2> + SmallVector, + 2> OptimizerLastEPCallbacks; SmallVector, 2> FullLinkTimeOptimizationEarlyEPCallbacks; diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 757b20dcd6693..5827a485879d3 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -365,9 +365,10 @@ void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, C(MPM, Level); } void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, - OptimizationLevel Level) { + OptimizationLevel Level, + ThinOrFullLTOPhase Phase) { for (auto &C : OptimizerLastEPCallbacks) - C(MPM, Level); + C(MPM, Level, Phase); } void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks( ModulePassManager &MPM, OptimizationLevel Level) { @@ -1524,7 +1525,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM), PTO.EagerlyInvalidateAnalyses)); - invokeOptimizerLastEPCallbacks(MPM, Level); + invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase); // Split out cold code. Splitting is done late to avoid hiding context from // other optimizations and inadvertently regressing performance. The tradeoff @@ -1671,7 +1672,8 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { // optimization is going to be done in PostLink stage, but clang can't add // callbacks there in case of in-process ThinLTO called by linker. invokeOptimizerEarlyEPCallbacks(MPM, Level); - invokeOptimizerLastEPCallbacks(MPM, Level); + invokeOptimizerLastEPCallbacks(MPM, Level, + ThinOrFullLTOPhase::ThinLTOPreLink); // Emit annotation remarks. addAnnotationRemarksPass(MPM); @@ -2159,7 +2161,7 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, CoroPM.addPass(GlobalDCEPass()); MPM.addPass(CoroConditionalWrapper(std::move(CoroPM))); - invokeOptimizerLastEPCallbacks(MPM, Level); + invokeOptimizerLastEPCallbacks(MPM, Level, ThinOrFullLTOPhase::None); if (LTOPreLink) addRequiredLTOPreLinkPasses(MPM); diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 46cc5f349555a..50aef36724f70 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -287,8 +287,13 @@ class AMDGPUAttributorPass : public PassInfoMixin { private: TargetMachine &TM; + /// Asserts whether we can assume whole program visibility. + bool HasWholeProgramVisibility = false; + public: - AMDGPUAttributorPass(TargetMachine &TM) : TM(TM){}; + AMDGPUAttributorPass(TargetMachine &TM, + bool HasWholeProgramVisibility = false) + : TM(TM), HasWholeProgramVisibility(HasWholeProgramVisibility) {}; PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 9d3c9e1e2ef9f..ab98da31b050f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -1023,7 +1023,8 @@ static void addPreloadKernArgHint(Function &F, TargetMachine &TM) { } } -static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) { +static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, + bool HasWholeProgramVisibility) { SetVector Functions; for (Function &F : M) { if (!F.isIntrinsic()) @@ -1038,9 +1039,10 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) { &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID, - &AAUnderlyingObjects::ID}); + &AAUnderlyingObjects::ID, &AAIndirectCallInfo::ID}); AttributorConfig AC(CGUpdater); + AC.IsClosedWorldModule = HasWholeProgramVisibility; AC.Allowed = &Allowed; AC.IsModulePass = true; AC.DefaultInitializeLiveInternals = false; @@ -1086,7 +1088,7 @@ class AMDGPUAttributorLegacy : public ModulePass { bool runOnModule(Module &M) override { AnalysisGetter AG(this); - return runImpl(M, AG, *TM); + return runImpl(M, AG, *TM, /*HasWholeProgramVisibility=*/false); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -1107,8 +1109,9 @@ PreservedAnalyses llvm::AMDGPUAttributorPass::run(Module &M, AnalysisGetter AG(FAM); // TODO: Probably preserves CFG - return runImpl(M, AG, TM) ? PreservedAnalyses::none() - : PreservedAnalyses::all(); + return runImpl(M, AG, TM, HasWholeProgramVisibility) + ? PreservedAnalyses::none() + : PreservedAnalyses::all(); } char AMDGPUAttributorLegacy::ID = 0; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index c8fb68d1c0b0c..50cc2d871d4ec 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -735,12 +735,15 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { }); // FIXME: Why is AMDGPUAttributor not in CGSCC? - PB.registerOptimizerLastEPCallback( - [this](ModulePassManager &MPM, OptimizationLevel Level) { - if (Level != OptimizationLevel::O0) { - MPM.addPass(AMDGPUAttributorPass(*this)); - } - }); + PB.registerOptimizerLastEPCallback([this](ModulePassManager &MPM, + OptimizationLevel Level, + ThinOrFullLTOPhase Phase) { + if (Level != OptimizationLevel::O0) { + MPM.addPass(AMDGPUAttributorPass( + *this, Phase == ThinOrFullLTOPhase::FullLTOPostLink || + Phase == ThinOrFullLTOPhase::ThinLTOPostLink)); + } + }); PB.registerFullLinkTimeOptimizationLastEPCallback( [this](ModulePassManager &PM, OptimizationLevel Level) { diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll index 33b1cc65dc569..e5d440b96349f 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll @@ -231,7 +231,19 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) { ; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr( ; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty -; CHECK-NEXT: call void [[FPTR]]() +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] +; CHECK: 2: +; CHECK-NEXT: call void @also_empty() +; CHECK-NEXT: br label [[TMP6:%.*]] +; CHECK: 3: +; CHECK-NEXT: br i1 true, label [[TMP4:%.*]], label [[TMP5:%.*]] +; CHECK: 4: +; CHECK-NEXT: call void @empty() +; CHECK-NEXT: br label [[TMP6]] +; CHECK: 5: +; CHECK-NEXT: unreachable +; CHECK: 6: ; CHECK-NEXT: ret void ; %fptr = select i1 %cond, ptr @empty, ptr @also_empty diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp index 374698083763b..522a8c06d83c0 100644 --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -310,7 +310,7 @@ static void registerEPCallbacks(PassBuilder &PB) { }); if (tryParsePipelineText(PB, OptimizerLastEPPipeline)) PB.registerOptimizerLastEPCallback( - [&PB](ModulePassManager &PM, OptimizationLevel) { + [&PB](ModulePassManager &PM, OptimizationLevel, ThinOrFullLTOPhase) { ExitOnError Err("Unable to parse OptimizerLastEP pipeline: "); Err(PB.parsePassPipeline(PM, OptimizerLastEPPipeline)); });