From f9abf531d4b3405c117e24f91af99db7364373ca Mon Sep 17 00:00:00 2001 From: Hari Limaye Date: Thu, 17 Oct 2024 22:42:36 +0000 Subject: [PATCH 1/6] Precommit test for "[LTO] Run Argument Promotion before IPSCCP" --- .../PhaseOrdering/lto-argpromotion-ipsccp.ll | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll diff --git a/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll b/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll new file mode 100644 index 0000000000000..1ed523dfa7a90 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='lto' -S < %s | FileCheck %s + +; FIXME: We should be able to propagate the constants from @parent to @child. + +define void @parent(ptr %p) { +; CHECK-LABEL: define void @parent( +; CHECK-SAME: ptr nocapture [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: tail call fastcc void @child(ptr [[P]], i32 1024, i32 5) +; CHECK-NEXT: ret void +; + %c = alloca i32 + store i32 5, ptr %c + %n = alloca i32 + store i32 1024, ptr %n + call void @child(ptr %p, ptr %n, ptr %c) + ret void +} + +define internal void @child(ptr %p, ptr %n, ptr %c) noinline { +; CHECK-LABEL: define internal fastcc void @child( +; CHECK-SAME: ptr nocapture [[P:%.*]], i32 [[N_0_VAL:%.*]], i32 [[C_0_VAL:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_COND:.*]] +; CHECK: [[FOR_COND]]: +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], [[N_0_VAL]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[FOR_END:.*]], label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[IDXPROM:%.*]] = zext nneg i32 [[I_0]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP0]], [[C_0_VAL]] +; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 +; CHECK-NEXT: br label %[[FOR_COND]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: ret void +; +entry: + br label %for.cond + +for.cond: + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %n.val = load i32, ptr %n + %cmp = icmp ne i32 %i.0, %n.val + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + br label %for.end + +for.body: + %idxprom = sext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idxprom + %0 = load i32, ptr %arrayidx, align 4 + %c.val = load i32, ptr %c + %mul = mul i32 %0, %c.val + store i32 %mul, ptr %arrayidx, align 4 + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: + ret void +} + From e94b2b9fe6efd2bedf659867a357193e61a2b216 Mon Sep 17 00:00:00 2001 From: Hari Limaye Date: Tue, 24 Sep 2024 16:12:29 +0000 Subject: [PATCH 2/6] Reland "[LTO] Run Argument Promotion before IPSCCP" (#111839) Run ArgumentPromotion before IPSCCP in the LTO pipeline, to expose more constants to be propagated. We also run PostOrderFunctionAttrs to improve the information available to ArgumentPromotion's alias analysis, and SROA to clean up allocas. --- llvm/lib/Passes/PassBuilderPipelines.cpp | 9 +++++++++ llvm/test/Other/new-pm-lto-defaults.ll | 9 ++++++--- .../PhaseOrdering/lto-argpromotion-ipsccp.ll | 10 +++++----- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 17710eb94b6de..0a9a40d297883 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1828,6 +1828,15 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MPM.addPass(PGOIndirectCallPromotion( true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); + // Promoting by-reference arguments to by-value exposes more constants to + // IPSCCP. + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( + PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true))); + MPM.addPass( + createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass())); + MPM.addPass( + createModuleToFunctionPassAdaptor(SROAPass(SROAOptions::ModifyCFG))); + // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll index 5543472df685b..2dd754ecef4d7 100644 --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -41,14 +41,17 @@ ; CHECK-O23SZ-NEXT: PGOIndirectCallPromotion ; CHECK-O23SZ-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-O23SZ-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis -; CHECK-O23SZ-NEXT: Running pass: IPSCCPPass -; CHECK-O23SZ-NEXT: Running analysis: AssumptionAnalysis on foo -; CHECK-O23SZ-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}> +; CHECK-O23SZ-NEXT: Running pass: PostOrderFunctionAttrsPass +; CHECK-O23SZ-NEXT: Running pass: ArgumentPromotionPass +; CHECK-O23SZ-NEXT: Running pass: SROAPass +; CHECK-O23SZ-NEXT: Running analysis: AssumptionAnalysis on foo +; CHECK-O23SZ-NEXT: Running pass: IPSCCPPass +; CHECK-O23SZ-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: AAManager ; CHECK-O-NEXT: Running analysis: BasicAA diff --git a/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll b/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll index 1ed523dfa7a90..72921acba5969 100644 --- a/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll +++ b/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -passes='lto' -S < %s | FileCheck %s -; FIXME: We should be able to propagate the constants from @parent to @child. +; We should be able to propagate the constants from @parent to @child. define void @parent(ptr %p) { ; CHECK-LABEL: define void @parent( ; CHECK-SAME: ptr nocapture [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: tail call fastcc void @child(ptr [[P]], i32 1024, i32 5) +; CHECK-NEXT: tail call fastcc void @child(ptr [[P]]) ; CHECK-NEXT: ret void ; %c = alloca i32 @@ -19,18 +19,18 @@ define void @parent(ptr %p) { define internal void @child(ptr %p, ptr %n, ptr %c) noinline { ; CHECK-LABEL: define internal fastcc void @child( -; CHECK-SAME: ptr nocapture [[P:%.*]], i32 [[N_0_VAL:%.*]], i32 [[C_0_VAL:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] { +; CHECK-SAME: ptr nocapture [[P:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[FOR_COND:.*]] ; CHECK: [[FOR_COND]]: ; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_INC:.*]] ] -; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], [[N_0_VAL]] +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], 1024 ; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[FOR_END:.*]], label %[[FOR_INC]] ; CHECK: [[FOR_INC]]: ; CHECK-NEXT: [[IDXPROM:%.*]] = zext nneg i32 [[I_0]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[IDXPROM]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP0]], [[C_0_VAL]] +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP0]], 5 ; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 ; CHECK-NEXT: br label %[[FOR_COND]] From b776bb36a4c7bf1afa4b98c04e49fe3b2395fa9c Mon Sep 17 00:00:00 2001 From: Hari Limaye Date: Fri, 18 Oct 2024 23:14:13 +0000 Subject: [PATCH 3/6] Move POFA earlier instead of running twice (For >01) --- llvm/lib/Passes/PassBuilderPipelines.cpp | 13 ++++++++----- llvm/test/Other/new-pm-lto-defaults.ll | 12 +++++------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 0a9a40d297883..651a16e1f2ae6 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1830,8 +1830,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // Promoting by-reference arguments to by-value exposes more constants to // IPSCCP. - MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( - PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true))); + MPM.addPass( + createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass())); MPM.addPass( createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass())); MPM.addPass( @@ -1849,9 +1849,12 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MPM.addPass(CalledValuePropagationPass()); } - // Now deduce any function attributes based in the current code. - MPM.addPass( - createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass())); + // For higher optimization levels this Pass has just run, so don't repeat it. + if (Level.getSpeedupLevel() == 1) { + // Now deduce any function attributes based on the current code. + MPM.addPass( + createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass())); + } // Do RPO function attribute inference across the module to forward-propagate // attributes where applicable. diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll index 2dd754ecef4d7..3a49903be429c 100644 --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -46,21 +46,19 @@ ; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}> -; CHECK-O23SZ-NEXT: Running pass: PostOrderFunctionAttrsPass -; CHECK-O23SZ-NEXT: Running pass: ArgumentPromotionPass -; CHECK-O23SZ-NEXT: Running pass: SROAPass -; CHECK-O23SZ-NEXT: Running analysis: AssumptionAnalysis on foo -; CHECK-O23SZ-NEXT: Running pass: IPSCCPPass -; CHECK-O23SZ-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: AAManager ; CHECK-O-NEXT: Running analysis: BasicAA -; CHECK-O1-NEXT: Running analysis: AssumptionAnalysis on foo +; CHECK-O: Running analysis: AssumptionAnalysis on foo ; CHECK-O1-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O1-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA ; CHECK-O-NEXT: Running analysis: TypeBasedAA ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy +; CHECK-O23SZ-NEXT: Running pass: ArgumentPromotionPass +; CHECK-O23SZ-NEXT: Running pass: SROAPass +; CHECK-O23SZ-NEXT: Running pass: IPSCCPPass +; CHECK-O23SZ-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: ReversePostOrderFunctionAttrsPass ; CHECK-O-NEXT: Running pass: GlobalSplitPass ; CHECK-O-NEXT: Running pass: WholeProgramDevirtPass From 8466049b4e7018774de5f4c2b5668a733a5270e5 Mon Sep 17 00:00:00 2001 From: Hari Limaye Date: Tue, 5 Nov 2024 11:13:39 +0000 Subject: [PATCH 4/6] Don't bother running POFA Pass at -01 --- llvm/lib/Passes/PassBuilderPipelines.cpp | 7 ------ llvm/test/Other/new-pm-lto-defaults.ll | 27 ++++++++++++------------ 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 651a16e1f2ae6..93175568cfb47 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1849,13 +1849,6 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MPM.addPass(CalledValuePropagationPass()); } - // For higher optimization levels this Pass has just run, so don't repeat it. - if (Level.getSpeedupLevel() == 1) { - // Now deduce any function attributes based on the current code. - MPM.addPass( - createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass())); - } - // Do RPO function attribute inference across the module to forward-propagate // attributes where applicable. // FIXME: Is this really an optimization rather than a canonicalization? diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll index 3a49903be429c..36a4f4784f6b7 100644 --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -41,25 +41,24 @@ ; CHECK-O23SZ-NEXT: PGOIndirectCallPromotion ; CHECK-O23SZ-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-O23SZ-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis -; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC -; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis -; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis -; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy -; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}> -; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass -; CHECK-O-NEXT: Running analysis: AAManager -; CHECK-O-NEXT: Running analysis: BasicAA -; CHECK-O: Running analysis: AssumptionAnalysis on foo -; CHECK-O1-NEXT: Running analysis: TargetIRAnalysis -; CHECK-O1-NEXT: Running analysis: DominatorTreeAnalysis -; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA -; CHECK-O-NEXT: Running analysis: TypeBasedAA -; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy +; CHECK-O23SZ-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC +; CHECK-O23SZ-NEXT: Running analysis: LazyCallGraphAnalysis +; CHECK-O23SZ-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy +; CHECK-O23SZ-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}> +; CHECK-O23SZ-NEXT: Running pass: PostOrderFunctionAttrsPass +; CHECK-O23SZ-NEXT: Running analysis: AAManager +; CHECK-O23SZ-NEXT: Running analysis: BasicAA +; CHECK-O23SZ-NEXT: Running analysis: AssumptionAnalysis on foo +; CHECK-O23SZ-NEXT: Running analysis: ScopedNoAliasAA +; CHECK-O23SZ-NEXT: Running analysis: TypeBasedAA +; CHECK-O23SZ-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O23SZ-NEXT: Running pass: ArgumentPromotionPass ; CHECK-O23SZ-NEXT: Running pass: SROAPass ; CHECK-O23SZ-NEXT: Running pass: IPSCCPPass ; CHECK-O23SZ-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: ReversePostOrderFunctionAttrsPass +; CHECK-O1-NEXT: Running analysis: LazyCallGraphAnalysis +; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O-NEXT: Running pass: GlobalSplitPass ; CHECK-O-NEXT: Running pass: WholeProgramDevirtPass ; CHECK-O1-NEXT: Running pass: LowerTypeTestsPass From b2e6b6ef78037dd51a7c03abd6ddb67ab4cb7507 Mon Sep 17 00:00:00 2001 From: Hari Limaye Date: Tue, 5 Nov 2024 14:27:46 +0000 Subject: [PATCH 5/6] Use a single CGSCCPassManager --- llvm/lib/Passes/PassBuilderPipelines.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 93175568cfb47..4c61f9d192093 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1830,10 +1830,10 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // Promoting by-reference arguments to by-value exposes more constants to // IPSCCP. - MPM.addPass( - createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass())); - MPM.addPass( - createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass())); + CGSCCPassManager CGPM; + CGPM.addPass(PostOrderFunctionAttrsPass()); + CGPM.addPass(ArgumentPromotionPass()); + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); MPM.addPass( createModuleToFunctionPassAdaptor(SROAPass(SROAOptions::ModifyCFG))); From 034acb1642677614572a04eaebd5eed1fdf9fbbc Mon Sep 17 00:00:00 2001 From: Hari Limaye Date: Tue, 5 Nov 2024 15:40:36 +0000 Subject: [PATCH 6/6] Also run SROA in the CGSCCPassManager --- llvm/lib/Passes/PassBuilderPipelines.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 4c61f9d192093..2569d2c34a68f 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1833,9 +1833,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, CGSCCPassManager CGPM; CGPM.addPass(PostOrderFunctionAttrsPass()); CGPM.addPass(ArgumentPromotionPass()); + CGPM.addPass( + createCGSCCToFunctionPassAdaptor(SROAPass(SROAOptions::ModifyCFG))); MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); - MPM.addPass( - createModuleToFunctionPassAdaptor(SROAPass(SROAOptions::ModifyCFG))); // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function