diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 17710eb94b6de..2569d2c34a68f 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1828,6 +1828,15 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MPM.addPass(PGOIndirectCallPromotion( true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); + // Promoting by-reference arguments to by-value exposes more constants to + // IPSCCP. + CGSCCPassManager CGPM; + CGPM.addPass(PostOrderFunctionAttrsPass()); + CGPM.addPass(ArgumentPromotionPass()); + CGPM.addPass( + createCGSCCToFunctionPassAdaptor(SROAPass(SROAOptions::ModifyCFG))); + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); + // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. @@ -1840,10 +1849,6 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MPM.addPass(CalledValuePropagationPass()); } - // Now deduce any function attributes based in the current code. - MPM.addPass( - createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass())); - // Do RPO function attribute inference across the module to forward-propagate // attributes where applicable. // FIXME: Is this really an optimization rather than a canonicalization? diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll index 5543472df685b..36a4f4784f6b7 100644 --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -41,24 +41,24 @@ ; CHECK-O23SZ-NEXT: PGOIndirectCallPromotion ; CHECK-O23SZ-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-O23SZ-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis -; CHECK-O23SZ-NEXT: Running pass: IPSCCPPass +; CHECK-O23SZ-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC +; CHECK-O23SZ-NEXT: Running analysis: LazyCallGraphAnalysis +; CHECK-O23SZ-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy +; CHECK-O23SZ-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}> +; CHECK-O23SZ-NEXT: Running pass: PostOrderFunctionAttrsPass +; CHECK-O23SZ-NEXT: Running analysis: AAManager +; CHECK-O23SZ-NEXT: Running analysis: BasicAA ; CHECK-O23SZ-NEXT: Running analysis: AssumptionAnalysis on foo +; CHECK-O23SZ-NEXT: Running analysis: ScopedNoAliasAA +; CHECK-O23SZ-NEXT: Running analysis: TypeBasedAA +; CHECK-O23SZ-NEXT: Running analysis: OuterAnalysisManagerProxy +; CHECK-O23SZ-NEXT: Running pass: ArgumentPromotionPass +; CHECK-O23SZ-NEXT: Running pass: SROAPass +; CHECK-O23SZ-NEXT: Running pass: IPSCCPPass ; CHECK-O23SZ-NEXT: Running pass: CalledValuePropagationPass -; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC -; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis -; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis -; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy -; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}> -; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass -; CHECK-O-NEXT: Running analysis: AAManager -; CHECK-O-NEXT: Running analysis: BasicAA -; CHECK-O1-NEXT: Running analysis: AssumptionAnalysis on foo -; CHECK-O1-NEXT: Running analysis: TargetIRAnalysis -; CHECK-O1-NEXT: Running analysis: DominatorTreeAnalysis -; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA -; CHECK-O-NEXT: Running analysis: TypeBasedAA -; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: ReversePostOrderFunctionAttrsPass +; CHECK-O1-NEXT: Running analysis: LazyCallGraphAnalysis +; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O-NEXT: Running pass: GlobalSplitPass ; CHECK-O-NEXT: Running pass: WholeProgramDevirtPass ; CHECK-O1-NEXT: Running pass: LowerTypeTestsPass diff --git a/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll b/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll new file mode 100644 index 0000000000000..72921acba5969 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='lto' -S < %s | FileCheck %s + +; We should be able to propagate the constants from @parent to @child. + +define void @parent(ptr %p) { +; CHECK-LABEL: define void @parent( +; CHECK-SAME: ptr nocapture [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: tail call fastcc void @child(ptr [[P]]) +; CHECK-NEXT: ret void +; + %c = alloca i32 + store i32 5, ptr %c + %n = alloca i32 + store i32 1024, ptr %n + call void @child(ptr %p, ptr %n, ptr %c) + ret void +} + +define internal void @child(ptr %p, ptr %n, ptr %c) noinline { +; CHECK-LABEL: define internal fastcc void @child( +; CHECK-SAME: ptr nocapture [[P:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_COND:.*]] +; CHECK: [[FOR_COND]]: +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], 1024 +; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[FOR_END:.*]], label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[IDXPROM:%.*]] = zext nneg i32 [[I_0]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP0]], 5 +; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 +; CHECK-NEXT: br label %[[FOR_COND]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: ret void +; +entry: + br label %for.cond + +for.cond: + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %n.val = load i32, ptr %n + %cmp = icmp ne i32 %i.0, %n.val + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + br label %for.end + +for.body: + %idxprom = sext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idxprom + %0 = load i32, ptr %arrayidx, align 4 + %c.val = load i32, ptr %c + %mul = mul i32 %0, %c.val + store i32 %mul, ptr %arrayidx, align 4 + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: + ret void +} +