-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[InstCombine] Relax the conditons of fold of ucmp
/scmp
into phi by allowing the phi node to use the result of ucmp
/scmp
more than once
#109593
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Volodymyr Vasylkun (Poseydon42) ChangesThis extends the optimisation implemented in #107769 by relaxing the condtions to make it happen. Now, the value produced by Full diff: https://github.com/llvm/llvm-project/pull/109593.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index f6a0f5880cd5c7..0627f76f10891f 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1830,7 +1830,7 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
// optimization is profitable, but also to avoid creating a potentially
// invalid phi node when we have a multi-edge in the CFG.
const APInt *Ignored;
- if (isa<CmpIntrinsic>(InVal) && InVal->hasOneUse() &&
+ if (isa<CmpIntrinsic>(InVal) && InVal->hasOneUser() &&
match(&I, m_ICmp(m_Specific(PN), m_APInt(Ignored)))) {
OpsToMoveUseToIncomingBB.push_back(i);
NewPhiValues.push_back(nullptr);
@@ -1868,18 +1868,25 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
// Clone the instruction that uses the phi node and move it into the incoming
// BB because we know that the next iteration of InstCombine will simplify it.
+ SmallDenseMap<BasicBlock *, Instruction *> Clones;
for (auto OpIndex : OpsToMoveUseToIncomingBB) {
Value *Op = PN->getIncomingValue(OpIndex);
BasicBlock *OpBB = PN->getIncomingBlock(OpIndex);
- Instruction *Clone = I.clone();
- for (Use &U : Clone->operands()) {
- if (U == PN)
- U = Op;
- else
- U = U->DoPHITranslation(PN->getParent(), OpBB);
+ auto CloneIt = Clones.find(OpBB);
+ if (CloneIt == Clones.end()) {
+ Instruction *Clone = I.clone();
+ for (Use &U : Clone->operands()) {
+ if (U == PN)
+ U = Op;
+ else
+ U = U->DoPHITranslation(PN->getParent(), OpBB);
+ }
+ Clone = InsertNewInstBefore(Clone, OpBB->getTerminator()->getIterator());
+ CloneIt = Clones.insert(std::make_pair(OpBB, Clone)).first;
}
- Clone = InsertNewInstBefore(Clone, OpBB->getTerminator()->getIterator());
+
+ Instruction *Clone = CloneIt->second;
NewPhiValues[OpIndex] = Clone;
}
diff --git a/llvm/test/Transforms/InstCombine/phi-with-multiple-unsimplifiable-values.ll b/llvm/test/Transforms/InstCombine/phi-with-multiple-unsimplifiable-values.ll
index 2b75d5c5475117..cd40aa92ed4fd1 100644
--- a/llvm/test/Transforms/InstCombine/phi-with-multiple-unsimplifiable-values.ll
+++ b/llvm/test/Transforms/InstCombine/phi-with-multiple-unsimplifiable-values.ll
@@ -133,3 +133,35 @@ exit:
%r = icmp slt i8 %phi, 0
ret i1 %r
}
+
+; Same as the first transformation, but the phi node uses the result of scmp twice. This verifies that we don't clone values more than once per block
+define i1 @icmp_of_phi_of_scmp_with_constant_one_user_two_uses(i8 %c, i16 %x, i16 %y, i8 %false_val) {
+; CHECK-LABEL: define i1 @icmp_of_phi_of_scmp_with_constant_one_user_two_uses(
+; CHECK-SAME: i8 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]], i8 [[FALSE_VAL:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i16 [[X]], [[Y]]
+; CHECK-NEXT: switch i8 [[C]], label %[[BB_2:.*]] [
+; CHECK-NEXT: i8 0, label %[[BB:.*]]
+; CHECK-NEXT: i8 1, label %[[BB]]
+; CHECK-NEXT: ]
+; CHECK: [[BB_2]]:
+; CHECK-NEXT: br label %[[BB]]
+; CHECK: [[BB]]:
+; CHECK-NEXT: [[R:%.*]] = phi i1 [ [[TMP0]], %[[ENTRY]] ], [ [[TMP0]], %[[ENTRY]] ], [ false, %[[BB_2]] ]
+; CHECK-NEXT: ret i1 [[R]]
+;
+entry:
+ %cmp = call i8 @llvm.scmp(i16 %x, i16 %y)
+ switch i8 %c, label %bb_2 [
+ i8 0, label %bb
+ i8 1, label %bb
+ ]
+
+bb_2:
+ br label %bb
+
+bb:
+ %phi = phi i8 [ %cmp, %entry ], [ %cmp, %entry ], [ 0, %bb_2 ]
+ %r = icmp slt i8 %phi, 0
+ ret i1 %r
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Confirmed that the original regression is fixed by this patch + #107314. Thank you!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This extends the optimisation implemented in #107769 by relaxing the condtions to make it happen. Now, the value produced by
ucmp
/scmp
doesn't need to be one-use, but only one-user, meaning it can be present in a single phi node more than once.