diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h index ff5af5988656a..e4972cb1cef84 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -176,7 +176,7 @@ class InstCostVisitor : public InstVisitor { SCCPSolver &Solver) : GetBFI(GetBFI), F(F), DL(DL), TTI(TTI), Solver(Solver) {} - bool isBlockExecutable(BasicBlock *BB) { + bool isBlockExecutable(BasicBlock *BB) const { return Solver.isBlockExecutable(BB) && !DeadBlocks.contains(BB); } @@ -189,8 +189,9 @@ class InstCostVisitor : public InstVisitor { private: friend class InstVisitor; - static bool canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ, - DenseSet &DeadBlocks); + Constant *findConstantFor(Value *V) const; + + bool canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ) const; Cost getCodeSizeSavingsForUser(Instruction *User, Value *Use = nullptr, Constant *C = nullptr); diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 17d8283255d24..96956481df2f6 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -90,13 +90,12 @@ static cl::opt SpecializeLiteralConstant( "Enable specialization of functions that take a literal constant as an " "argument")); -bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ, - DenseSet &DeadBlocks) { +bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB, + BasicBlock *Succ) const { unsigned I = 0; - return all_of(predecessors(Succ), - [&I, BB, Succ, &DeadBlocks] (BasicBlock *Pred) { + return all_of(predecessors(Succ), [&I, BB, Succ, this](BasicBlock *Pred) { return I++ < MaxBlockPredecessors && - (Pred == BB || Pred == Succ || DeadBlocks.contains(Pred)); + (Pred == BB || Pred == Succ || !isBlockExecutable(Pred)); }); } @@ -116,6 +115,7 @@ Cost InstCostVisitor::estimateBasicBlocks( // These blocks are considered dead as far as the InstCostVisitor // is concerned. They haven't been proven dead yet by the Solver, // but may become if we propagate the specialization arguments. + assert(Solver.isBlockExecutable(BB) && "BB already found dead by IPSCCP!"); if (!DeadBlocks.insert(BB).second) continue; @@ -134,16 +134,17 @@ Cost InstCostVisitor::estimateBasicBlocks( // Keep adding dead successors to the list as long as they are // executable and only reachable from dead blocks. for (BasicBlock *SuccBB : successors(BB)) - if (isBlockExecutable(SuccBB) && - canEliminateSuccessor(BB, SuccBB, DeadBlocks)) + if (isBlockExecutable(SuccBB) && canEliminateSuccessor(BB, SuccBB)) WorkList.push_back(SuccBB); } return CodeSize; } -static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) { +Constant *InstCostVisitor::findConstantFor(Value *V) const { if (auto *C = dyn_cast(V)) return C; + if (auto *C = Solver.getConstantOrNull(V)) + return C; return KnownConstants.lookup(V); } @@ -266,7 +267,7 @@ Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) { for (const auto &Case : I.cases()) { BasicBlock *BB = Case.getCaseSuccessor(); if (BB != Succ && isBlockExecutable(BB) && - canEliminateSuccessor(I.getParent(), BB, DeadBlocks)) + canEliminateSuccessor(I.getParent(), BB)) WorkList.push_back(BB); } @@ -283,8 +284,7 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) { // Initialize the worklist with the dead successor as long as // it is executable and has a unique predecessor. SmallVector WorkList; - if (isBlockExecutable(Succ) && - canEliminateSuccessor(I.getParent(), Succ, DeadBlocks)) + if (isBlockExecutable(Succ) && canEliminateSuccessor(I.getParent(), Succ)) WorkList.push_back(Succ); return estimateBasicBlocks(WorkList); @@ -312,10 +312,10 @@ bool InstCostVisitor::discoverTransitivelyIncomingValues( // Disregard self-references and dead incoming values. if (auto *Inst = dyn_cast(V)) - if (Inst == PN || DeadBlocks.contains(PN->getIncomingBlock(I))) + if (Inst == PN || !isBlockExecutable(PN->getIncomingBlock(I))) continue; - if (Constant *C = findConstantFor(V, KnownConstants)) { + if (Constant *C = findConstantFor(V)) { // Not all incoming values are the same constant. Bail immediately. if (C != Const) return false; @@ -347,10 +347,10 @@ Constant *InstCostVisitor::visitPHINode(PHINode &I) { // Disregard self-references and dead incoming values. if (auto *Inst = dyn_cast(V)) - if (Inst == &I || DeadBlocks.contains(I.getIncomingBlock(Idx))) + if (Inst == &I || !isBlockExecutable(I.getIncomingBlock(Idx))) continue; - if (Constant *C = findConstantFor(V, KnownConstants)) { + if (Constant *C = findConstantFor(V)) { if (!Const) Const = C; // Not all incoming values are the same constant. Bail immediately. @@ -415,7 +415,7 @@ Constant *InstCostVisitor::visitCallBase(CallBase &I) { for (unsigned Idx = 0, E = I.getNumOperands() - 1; Idx != E; ++Idx) { Value *V = I.getOperand(Idx); - Constant *C = findConstantFor(V, KnownConstants); + Constant *C = findConstantFor(V); if (!C) return nullptr; Operands.push_back(C); @@ -439,7 +439,7 @@ Constant *InstCostVisitor::visitGetElementPtrInst(GetElementPtrInst &I) { for (unsigned Idx = 0, E = I.getNumOperands(); Idx != E; ++Idx) { Value *V = I.getOperand(Idx); - Constant *C = findConstantFor(V, KnownConstants); + Constant *C = findConstantFor(V); if (!C) return nullptr; Operands.push_back(C); @@ -455,9 +455,9 @@ Constant *InstCostVisitor::visitSelectInst(SelectInst &I) { if (I.getCondition() == LastVisited->first) { Value *V = LastVisited->second->isZeroValue() ? I.getFalseValue() : I.getTrueValue(); - return findConstantFor(V, KnownConstants); + return findConstantFor(V); } - if (Constant *Condition = findConstantFor(I.getCondition(), KnownConstants)) + if (Constant *Condition = findConstantFor(I.getCondition())) if ((I.getTrueValue() == LastVisited->first && Condition->isOneValue()) || (I.getFalseValue() == LastVisited->first && Condition->isZeroValue())) return LastVisited->second; @@ -475,7 +475,7 @@ Constant *InstCostVisitor::visitCmpInst(CmpInst &I) { Constant *Const = LastVisited->second; bool ConstOnRHS = I.getOperand(1) == LastVisited->first; Value *V = ConstOnRHS ? I.getOperand(0) : I.getOperand(1); - Constant *Other = findConstantFor(V, KnownConstants); + Constant *Other = findConstantFor(V); if (Other) { if (ConstOnRHS) @@ -503,7 +503,7 @@ Constant *InstCostVisitor::visitBinaryOperator(BinaryOperator &I) { bool ConstOnRHS = I.getOperand(1) == LastVisited->first; Value *V = ConstOnRHS ? I.getOperand(0) : I.getOperand(1); - Constant *Other = findConstantFor(V, KnownConstants); + Constant *Other = findConstantFor(V); Value *OtherVal = Other ? Other : V; Value *ConstVal = LastVisited->second; diff --git a/llvm/test/Transforms/FunctionSpecialization/solver-constants.ll b/llvm/test/Transforms/FunctionSpecialization/solver-constants.ll new file mode 100644 index 0000000000000..516fd5cb49bb7 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/solver-constants.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 +; RUN: opt -passes="ipsccp" -funcspec-min-function-size=1 \ +; RUN: -funcspec-for-literal-constant=true \ +; RUN: -funcspec-min-codesize-savings=50 \ +; RUN: -funcspec-min-latency-savings=0 \ +; RUN: -S < %s | FileCheck %s + +; Verify that we are able to estimate the codesize savings arising from a branch +; based on a binary operator, where one operand is already found constant by +; IPSCCP. +define i32 @main(i1 %flag) { + %notspec = call i32 @test(i1 %flag, i1 false) + %spec = call i32 @test(i1 false, i1 false) + %sum = add i32 %notspec, %spec + ret i32 %sum +} + +define internal i32 @test(i1 %argflag, i1 %constflag) { +entry: + %cond = or i1 %argflag, %constflag + br i1 %cond, label %if.then, label %if.end + +if.then: + call void @do_something() + call void @do_something() + call void @do_something() + call void @do_something() + br label %if.end + +if.end: + %res = phi i32 [ 0, %entry ], [ 1, %if.then] + ret i32 %res +} + +declare void @do_something() +; CHECK-LABEL: define range(i32 0, 2) i32 @main( +; CHECK-SAME: i1 [[FLAG:%.*]]) { +; CHECK-NEXT: [[NOTSPEC:%.*]] = call i32 @test(i1 [[FLAG]], i1 false) +; CHECK-NEXT: [[SPEC:%.*]] = call i32 @test.specialized.1(i1 false, i1 false) +; CHECK-NEXT: [[SUM:%.*]] = add nuw nsw i32 [[NOTSPEC]], 0 +; CHECK-NEXT: ret i32 [[SUM]] +; +; +; CHECK-LABEL: define internal range(i32 0, 2) i32 @test( +; CHECK-SAME: i1 [[ARGFLAG:%.*]], i1 [[CONSTFLAG:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[COND:%.*]] = or i1 [[ARGFLAG]], false +; CHECK-NEXT: br i1 [[COND]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: call void @do_something() +; CHECK-NEXT: call void @do_something() +; CHECK-NEXT: call void @do_something() +; CHECK-NEXT: call void @do_something() +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 1, %[[IF_THEN]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +; +; CHECK-LABEL: define internal i32 @test.specialized.1( +; CHECK-SAME: i1 [[ARGFLAG:%.*]], i1 [[CONSTFLAG:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[IF_END:.*]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: ret i32 poison +; diff --git a/llvm/test/Transforms/FunctionSpecialization/solver-dead-blocks.ll b/llvm/test/Transforms/FunctionSpecialization/solver-dead-blocks.ll new file mode 100644 index 0000000000000..05368e934ebb6 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/solver-dead-blocks.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 +; RUN: opt -passes="ipsccp" -funcspec-min-function-size=1 \ +; RUN: -funcspec-for-literal-constant=true \ +; RUN: -funcspec-min-codesize-savings=50 \ +; RUN: -funcspec-min-latency-savings=0 \ +; RUN: -S < %s | FileCheck %s + +; Verify that we are able to estimate the codesize savings arising from a block +; which is found dead, where the block has a predecessor that was found dead by +; IPSCCP. +define i32 @main(i1 %flag) { + %notspec = call i32 @test(i1 %flag, i1 true) + %spec = call i32 @test(i1 true, i1 true) + %sum = add i32 %notspec, %spec + ret i32 %sum +} + +define internal i32 @test(i1 %argflag, i1 %constflag) { +entry: + br i1 %argflag, label %block1, label %block3 + +block1: + br i1 %constflag, label %end, label %block2 + +block2: + br label %block3 + +block3: + call void @do_something() + call void @do_something() + call void @do_something() + call void @do_something() + br label %end + +end: + %res = phi i32 [ 0, %block1 ], [ 1, %block3] + ret i32 %res +} + +declare void @do_something() +; CHECK-LABEL: define range(i32 0, 2) i32 @main( +; CHECK-SAME: i1 [[FLAG:%.*]]) { +; CHECK-NEXT: [[NOTSPEC:%.*]] = call i32 @test(i1 [[FLAG]], i1 true) +; CHECK-NEXT: [[SPEC:%.*]] = call i32 @test.specialized.1(i1 true, i1 true) +; CHECK-NEXT: [[SUM:%.*]] = add nuw nsw i32 [[NOTSPEC]], 0 +; CHECK-NEXT: ret i32 [[SUM]] +; +; +; CHECK-LABEL: define internal range(i32 0, 2) i32 @test( +; CHECK-SAME: i1 [[ARGFLAG:%.*]], i1 [[CONSTFLAG:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[ARGFLAG]], label %[[BLOCK1:.*]], label %[[BLOCK3:.*]] +; CHECK: [[BLOCK1]]: +; CHECK-NEXT: br label %[[END:.*]] +; CHECK: [[BLOCK3]]: +; CHECK-NEXT: call void @do_something() +; CHECK-NEXT: call void @do_something() +; CHECK-NEXT: call void @do_something() +; CHECK-NEXT: call void @do_something() +; CHECK-NEXT: br label %[[END]] +; CHECK: [[END]]: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, %[[BLOCK1]] ], [ 1, %[[BLOCK3]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +; +; CHECK-LABEL: define internal i32 @test.specialized.1( +; CHECK-SAME: i1 [[ARGFLAG:%.*]], i1 [[CONSTFLAG:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[BLOCK1:.*]] +; CHECK: [[BLOCK1]]: +; CHECK-NEXT: br label %[[END:.*]] +; CHECK: [[END]]: +; CHECK-NEXT: ret i32 poison +;