Skip to content

Commit 21e3a21

Browse files
authored
[InstCombine] Replace an integer comparison of a phi node with multiple ucmp/scmp operands and a constant with phi of individual comparisons of original intrinsic's arguments (#107769)
When we have a `phi` instruction with more than one of its incoming values being a call to `ucmp` or `scmp`, which is then compared with an integer constant, we can move the comparison through the `phi` into the incoming basic blocks because we know that a comparison of `ucmp`/`scmp` with a constant will be simplified by the next iteration of InstCombine. There's a high chance that other similar patterns can be identified, in which case they can be easily handled by the same code by moving the check for "simplifiable" instructions into a lambda.
1 parent 29e5fe7 commit 21e3a21

File tree

2 files changed

+185
-38
lines changed

2 files changed

+185
-38
lines changed

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

+50-38
Original file line numberDiff line numberDiff line change
@@ -1809,8 +1809,8 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
18091809
// Check to see whether the instruction can be folded into each phi operand.
18101810
// If there is one operand that does not fold, remember the BB it is in.
18111811
SmallVector<Value *> NewPhiValues;
1812-
BasicBlock *NonSimplifiedBB = nullptr;
1813-
Value *NonSimplifiedInVal = nullptr;
1812+
SmallVector<unsigned int> OpsToMoveUseToIncomingBB;
1813+
bool SeenNonSimplifiedInVal = false;
18141814
for (unsigned i = 0; i != NumPHIValues; ++i) {
18151815
Value *InVal = PN->getIncomingValue(i);
18161816
BasicBlock *InBB = PN->getIncomingBlock(i);
@@ -1820,35 +1820,64 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
18201820
continue;
18211821
}
18221822

1823-
if (NonSimplifiedBB) return nullptr; // More than one non-simplified value.
1823+
// If the only use of phi is comparing it with a constant then we can
1824+
// put this comparison in the incoming BB directly after a ucmp/scmp call
1825+
// because we know that it will simplify to a single icmp.
1826+
// NOTE: the single-use check here is not only to ensure that the
1827+
// optimization is profitable, but also to avoid creating a potentially
1828+
// invalid phi node when we have a multi-edge in the CFG.
1829+
const APInt *Ignored;
1830+
if (isa<CmpIntrinsic>(InVal) && InVal->hasOneUse() &&
1831+
match(&I, m_ICmp(m_Specific(PN), m_APInt(Ignored)))) {
1832+
OpsToMoveUseToIncomingBB.push_back(i);
1833+
NewPhiValues.push_back(nullptr);
1834+
continue;
1835+
}
1836+
1837+
if (SeenNonSimplifiedInVal)
1838+
return nullptr; // More than one non-simplified value.
1839+
SeenNonSimplifiedInVal = true;
1840+
1841+
// If there is exactly one non-simplified value, we can insert a copy of the
1842+
// operation in that block. However, if this is a critical edge, we would
1843+
// be inserting the computation on some other paths (e.g. inside a loop).
1844+
// Only do this if the pred block is unconditionally branching into the phi
1845+
// block. Also, make sure that the pred block is not dead code.
1846+
BranchInst *BI = dyn_cast<BranchInst>(InBB->getTerminator());
1847+
if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(InBB))
1848+
return nullptr;
18241849

1825-
NonSimplifiedBB = InBB;
1826-
NonSimplifiedInVal = InVal;
18271850
NewPhiValues.push_back(nullptr);
1851+
OpsToMoveUseToIncomingBB.push_back(i);
18281852

18291853
// If the InVal is an invoke at the end of the pred block, then we can't
18301854
// insert a computation after it without breaking the edge.
18311855
if (isa<InvokeInst>(InVal))
1832-
if (cast<Instruction>(InVal)->getParent() == NonSimplifiedBB)
1856+
if (cast<Instruction>(InVal)->getParent() == InBB)
18331857
return nullptr;
18341858

18351859
// Do not push the operation across a loop backedge. This could result in
18361860
// an infinite combine loop, and is generally non-profitable (especially
18371861
// if the operation was originally outside the loop).
1838-
if (isBackEdge(NonSimplifiedBB, PN->getParent()))
1862+
if (isBackEdge(InBB, PN->getParent()))
18391863
return nullptr;
18401864
}
18411865

1842-
// If there is exactly one non-simplified value, we can insert a copy of the
1843-
// operation in that block. However, if this is a critical edge, we would be
1844-
// inserting the computation on some other paths (e.g. inside a loop). Only
1845-
// do this if the pred block is unconditionally branching into the phi block.
1846-
// Also, make sure that the pred block is not dead code.
1847-
if (NonSimplifiedBB != nullptr) {
1848-
BranchInst *BI = dyn_cast<BranchInst>(NonSimplifiedBB->getTerminator());
1849-
if (!BI || !BI->isUnconditional() ||
1850-
!DT.isReachableFromEntry(NonSimplifiedBB))
1851-
return nullptr;
1866+
// Clone the instruction that uses the phi node and move it into the incoming
1867+
// BB because we know that the next iteration of InstCombine will simplify it.
1868+
for (auto OpIndex : OpsToMoveUseToIncomingBB) {
1869+
Value *Op = PN->getIncomingValue(OpIndex);
1870+
BasicBlock *OpBB = PN->getIncomingBlock(OpIndex);
1871+
1872+
Instruction *Clone = I.clone();
1873+
for (Use &U : Clone->operands()) {
1874+
if (U == PN)
1875+
U = Op;
1876+
else
1877+
U = U->DoPHITranslation(PN->getParent(), OpBB);
1878+
}
1879+
Clone = InsertNewInstBefore(Clone, OpBB->getTerminator()->getIterator());
1880+
NewPhiValues[OpIndex] = Clone;
18521881
}
18531882

18541883
// Okay, we can do the transformation: create the new PHI node.
@@ -1857,30 +1886,13 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
18571886
NewPN->takeName(PN);
18581887
NewPN->setDebugLoc(PN->getDebugLoc());
18591888

1860-
// If we are going to have to insert a new computation, do so right before the
1861-
// predecessor's terminator.
1862-
Instruction *Clone = nullptr;
1863-
if (NonSimplifiedBB) {
1864-
Clone = I.clone();
1865-
for (Use &U : Clone->operands()) {
1866-
if (U == PN)
1867-
U = NonSimplifiedInVal;
1868-
else
1869-
U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB);
1870-
}
1871-
InsertNewInstBefore(Clone, NonSimplifiedBB->getTerminator()->getIterator());
1872-
}
1873-
1874-
for (unsigned i = 0; i != NumPHIValues; ++i) {
1875-
if (NewPhiValues[i])
1876-
NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
1877-
else
1878-
NewPN->addIncoming(Clone, PN->getIncomingBlock(i));
1879-
}
1889+
for (unsigned i = 0; i != NumPHIValues; ++i)
1890+
NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
18801891

18811892
for (User *U : make_early_inc_range(PN->users())) {
18821893
Instruction *User = cast<Instruction>(U);
1883-
if (User == &I) continue;
1894+
if (User == &I)
1895+
continue;
18841896
replaceInstUsesWith(*User, NewPN);
18851897
eraseInstFromFunction(*User);
18861898
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3+
4+
declare void @use(i8 %value);
5+
6+
; Since we know that any comparison of ucmp/scmp with a constant will result in
7+
; a comparison of ucmp/scmp's operands, we can propagate such a comparison
8+
; through the phi node and let the next iteration of instcombine simplify it.
9+
define i1 @icmp_of_phi_of_scmp_with_constant(i1 %c, i16 %x, i16 %y)
10+
; CHECK-LABEL: define i1 @icmp_of_phi_of_scmp_with_constant(
11+
; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]]) {
12+
; CHECK-NEXT: [[ENTRY:.*:]]
13+
; CHECK-NEXT: br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]]
14+
; CHECK: [[TRUE]]:
15+
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i16 [[X]], [[Y]]
16+
; CHECK-NEXT: br label %[[EXIT:.*]]
17+
; CHECK: [[FALSE]]:
18+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i16 [[Y]], [[X]]
19+
; CHECK-NEXT: br label %[[EXIT]]
20+
; CHECK: [[EXIT]]:
21+
; CHECK-NEXT: [[R:%.*]] = phi i1 [ [[TMP0]], %[[TRUE]] ], [ [[TMP1]], %[[FALSE]] ]
22+
; CHECK-NEXT: ret i1 [[R]]
23+
;
24+
{
25+
entry:
26+
br i1 %c, label %true, label %false
27+
true:
28+
%cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y)
29+
br label %exit
30+
false:
31+
%cmp2 = call i8 @llvm.scmp(i16 %y, i16 %x)
32+
br label %exit
33+
exit:
34+
%phi = phi i8 [%cmp1, %true], [%cmp2, %false]
35+
%r = icmp slt i8 %phi, 0
36+
ret i1 %r
37+
}
38+
39+
; When one of the incoming values is ucmp/scmp and the other is not we can still perform the transformation
40+
define i1 @icmp_of_phi_of_one_scmp_with_constant(i1 %c, i16 %x, i16 %y, i8 %false_val)
41+
; CHECK-LABEL: define i1 @icmp_of_phi_of_one_scmp_with_constant(
42+
; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]], i8 [[FALSE_VAL:%.*]]) {
43+
; CHECK-NEXT: [[ENTRY:.*:]]
44+
; CHECK-NEXT: br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]]
45+
; CHECK: [[TRUE]]:
46+
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i16 [[X]], [[Y]]
47+
; CHECK-NEXT: br label %[[EXIT:.*]]
48+
; CHECK: [[FALSE]]:
49+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[FALSE_VAL]], 0
50+
; CHECK-NEXT: br label %[[EXIT]]
51+
; CHECK: [[EXIT]]:
52+
; CHECK-NEXT: [[PHI:%.*]] = phi i1 [ [[TMP0]], %[[TRUE]] ], [ [[TMP1]], %[[FALSE]] ]
53+
; CHECK-NEXT: ret i1 [[PHI]]
54+
;
55+
{
56+
entry:
57+
br i1 %c, label %true, label %false
58+
true:
59+
%cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y)
60+
br label %exit
61+
false:
62+
br label %exit
63+
exit:
64+
%phi = phi i8 [%cmp1, %true], [%false_val, %false]
65+
%r = icmp slt i8 %phi, 0
66+
ret i1 %r
67+
}
68+
69+
; Negative test: the RHS of comparison that uses the phi node is not constant
70+
define i1 @icmp_of_phi_of_scmp_with_non_constant(i1 %c, i16 %x, i16 %y, i8 %cmp)
71+
; CHECK-LABEL: define i1 @icmp_of_phi_of_scmp_with_non_constant(
72+
; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]], i8 [[CMP:%.*]]) {
73+
; CHECK-NEXT: [[ENTRY:.*:]]
74+
; CHECK-NEXT: br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]]
75+
; CHECK: [[TRUE]]:
76+
; CHECK-NEXT: [[CMP1:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[X]], i16 [[Y]])
77+
; CHECK-NEXT: br label %[[EXIT:.*]]
78+
; CHECK: [[FALSE]]:
79+
; CHECK-NEXT: [[CMP2:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[Y]], i16 [[X]])
80+
; CHECK-NEXT: br label %[[EXIT]]
81+
; CHECK: [[EXIT]]:
82+
; CHECK-NEXT: [[PHI:%.*]] = phi i8 [ [[CMP1]], %[[TRUE]] ], [ [[CMP2]], %[[FALSE]] ]
83+
; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[PHI]], [[CMP]]
84+
; CHECK-NEXT: ret i1 [[R]]
85+
;
86+
{
87+
entry:
88+
br i1 %c, label %true, label %false
89+
true:
90+
%cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y)
91+
br label %exit
92+
false:
93+
%cmp2 = call i8 @llvm.scmp(i16 %y, i16 %x)
94+
br label %exit
95+
exit:
96+
%phi = phi i8 [%cmp1, %true], [%cmp2, %false]
97+
%r = icmp slt i8 %phi, %cmp
98+
ret i1 %r
99+
}
100+
101+
; Negative test: more than one incoming value of the phi node is not one-use
102+
define i1 @icmp_of_phi_of_scmp_with_constant_not_one_use(i1 %c, i16 %x, i16 %y)
103+
; CHECK-LABEL: define i1 @icmp_of_phi_of_scmp_with_constant_not_one_use(
104+
; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]]) {
105+
; CHECK-NEXT: [[ENTRY:.*:]]
106+
; CHECK-NEXT: br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]]
107+
; CHECK: [[TRUE]]:
108+
; CHECK-NEXT: [[CMP1:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[X]], i16 [[Y]])
109+
; CHECK-NEXT: call void @use(i8 [[CMP1]])
110+
; CHECK-NEXT: br label %[[EXIT:.*]]
111+
; CHECK: [[FALSE]]:
112+
; CHECK-NEXT: [[CMP2:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[Y]], i16 [[X]])
113+
; CHECK-NEXT: call void @use(i8 [[CMP2]])
114+
; CHECK-NEXT: br label %[[EXIT]]
115+
; CHECK: [[EXIT]]:
116+
; CHECK-NEXT: [[PHI:%.*]] = phi i8 [ [[CMP1]], %[[TRUE]] ], [ [[CMP2]], %[[FALSE]] ]
117+
; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[PHI]], 0
118+
; CHECK-NEXT: ret i1 [[R]]
119+
;
120+
{
121+
entry:
122+
br i1 %c, label %true, label %false
123+
true:
124+
%cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y)
125+
call void @use(i8 %cmp1)
126+
br label %exit
127+
false:
128+
%cmp2 = call i8 @llvm.scmp(i16 %y, i16 %x)
129+
call void @use(i8 %cmp2)
130+
br label %exit
131+
exit:
132+
%phi = phi i8 [%cmp1, %true], [%cmp2, %false]
133+
%r = icmp slt i8 %phi, 0
134+
ret i1 %r
135+
}

0 commit comments

Comments
 (0)