-
Notifications
You must be signed in to change notification settings - Fork 14k
[DAG] add (~a | x) & (a | y) -> (a & (x ^ y)) ^y for foldMaskedMerge #144342
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-x86 Author: woruyu (woruyu) ChangesSummaryThis PR resolves #143864 Add (~a | x) & (a | y) -> (a & (x ^ y)) ^y for foldMaskedMerge func using SDPatternMatch aftering adding this pattern, run Full diff: https://github.com/llvm/llvm-project/pull/144342.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5d62ded171f4f..f131822e11468 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7204,6 +7204,38 @@ static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand,
return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
}
+/// Fold "masked merge" expressions like `(m & x) | (~m & y)` and its DeMorgan
+/// variant `(~m | x) & (m | y)` into the equivalent `((x ^ y) & m) ^ y)`
+/// pattern. This is typically a better representation for targets without a
+/// fused "and-not" operation.
+static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG,
+ const TargetLowering &TLI, const SDLoc &DL) {
+ // Note that masked-merge variants using XOR or ADD expressions are
+ // normalized to OR by InstCombine so we only check for OR or AND.
+ assert(Node->getOpcode() == ISD::OR ||
+ Node->getOpcode() == ISD::AND &&
+ "Must be called with ISD::OR or ISD::AND node");
+
+ // If the target supports and-not, don't fold this.
+ if (TLI.hasAndNot(SDValue(Node, 0)))
+ return SDValue();
+
+ SDValue M, X, Y;
+
+ if (sd_match(Node,
+ m_Or(m_OneUse(m_And(m_OneUse(m_Not(m_Value(M))), m_Value(Y))),
+ m_OneUse(m_And(m_Deferred(M), m_Value(X))))) ||
+ sd_match(Node, m_And(m_OneUse(m_Or(m_Not(m_Value(M)), m_Value(X))),
+ m_OneUse(m_Or(m_Deferred(M), m_Value(Y)))))) {
+ EVT VT = M.getValueType();
+ SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, Y);
+ SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor, M);
+ return DAG.getNode(ISD::XOR, DL, VT, And, Y);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -7644,6 +7676,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
return R;
+ if (VT.isScalarInteger() && VT != MVT::i1)
+ if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
+ return R;
+
return SDValue();
}
@@ -8128,32 +8164,6 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
return SDValue();
}
-/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the
-/// equivalent `((x ^ y) & m) ^ y)` pattern.
-/// This is typically a better representation for targets without a fused
-/// "and-not" operation.
-static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG,
- const TargetLowering &TLI, const SDLoc &DL) {
- // Note that masked-merge variants using XOR or ADD expressions are
- // normalized to OR by InstCombine so we only check for OR.
- assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node");
-
- // If the target supports and-not, don't fold this.
- if (TLI.hasAndNot(SDValue(Node, 0)))
- return SDValue();
-
- SDValue M, X, Y;
- if (sd_match(Node,
- m_Or(m_OneUse(m_And(m_OneUse(m_Not(m_Value(M))), m_Value(Y))),
- m_OneUse(m_And(m_Deferred(M), m_Value(X)))))) {
- EVT VT = M.getValueType();
- SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, Y);
- SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor, M);
- return DAG.getNode(ISD::XOR, DL, VT, And, Y);
- }
- return SDValue();
-}
-
SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
diff --git a/llvm/test/CodeGen/X86/fold-masked-merge-demorgan.ll b/llvm/test/CodeGen/X86/fold-masked-merge-demorgan.ll
new file mode 100644
index 0000000000000..6357680f7586e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fold-masked-merge-demorgan.ll
@@ -0,0 +1,267 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -o - %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=CHECK,NOBMI
+; RUN: llc -o - %s -mtriple=x86_64-- -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,BMI
+;
+; test that masked-merge code is generated as "xor;and;xor" sequence or
+; "andn ; and; or" if and-not is available.
+
+define i32 @masked_merge0_demorgan(i32 %a0, i32 %a1, i32 %a2) {
+; NOBMI-LABEL: masked_merge0_demorgan:
+; NOBMI: # %bb.0:
+; NOBMI-NEXT: movl %esi, %eax
+; NOBMI-NEXT: xorl %edx, %eax
+; NOBMI-NEXT: andl %edi, %eax
+; NOBMI-NEXT: xorl %edx, %eax
+; NOBMI-NEXT: retq
+;
+; BMI-LABEL: masked_merge0_demorgan:
+; BMI: # %bb.0:
+; BMI-NEXT: orl %edi, %edx
+; BMI-NEXT: andnl %edi, %esi, %eax
+; BMI-NEXT: andnl %edx, %eax, %eax
+; BMI-NEXT: retq
+ %not = xor i32 %a0, -1
+ %or0 = or i32 %not, %a1
+ %or1 = or i32 %a0, %a2
+ %and = and i32 %or0, %or1
+ ret i32 %and
+}
+
+define i16 @masked_merge1_demorgan(i16 %a0, i16 %a1, i16 %a2) {
+; NOBMI-LABEL: masked_merge1_demorgan:
+; NOBMI: # %bb.0:
+; NOBMI-NEXT: movl %esi, %eax
+; NOBMI-NEXT: xorl %edx, %eax
+; NOBMI-NEXT: andl %edi, %eax
+; NOBMI-NEXT: xorl %edx, %eax
+; NOBMI-NEXT: # kill: def $ax killed $ax killed $eax
+; NOBMI-NEXT: retq
+;
+; BMI-LABEL: masked_merge1_demorgan:
+; BMI: # %bb.0:
+; BMI-NEXT: andnl %edx, %edi, %eax
+; BMI-NEXT: andl %edi, %esi
+; BMI-NEXT: orl %esi, %eax
+; BMI-NEXT: # kill: def $ax killed $ax killed $eax
+; BMI-NEXT: retq
+ %not = xor i16 %a0, -1
+ %or0 = or i16 %not, %a1
+ %or1 = or i16 %a0, %a2
+ %and = and i16 %or0, %or1
+ ret i16 %and
+}
+
+define i8 @masked_merge2_demorgan(i8 %a0, i8 %a1, i8 %a2) {
+; CHECK-LABEL: masked_merge2_demorgan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %esi, %eax
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
+ %not = xor i8 %a0, -1
+ %or0 = or i8 %not, %a1
+ %or1 = or i8 %a0, %a1
+ %and = and i8 %or0, %or1
+ ret i8 %and
+}
+
+define i64 @masked_merge3_demorgan(i64 %a0, i64 %a1, i64 %a2) {
+; NOBMI-LABEL: masked_merge3_demorgan:
+; NOBMI: # %bb.0:
+; NOBMI-NEXT: movq %rsi, %rax
+; NOBMI-NEXT: notq %rdx
+; NOBMI-NEXT: xorq %rdx, %rax
+; NOBMI-NEXT: notq %rax
+; NOBMI-NEXT: andq %rdi, %rax
+; NOBMI-NEXT: xorq %rdx, %rax
+; NOBMI-NEXT: retq
+;
+; BMI-LABEL: masked_merge3_demorgan:
+; BMI: # %bb.0:
+; BMI-NEXT: andnq %rdx, %rdi, %rax
+; BMI-NEXT: andq %rdi, %rsi
+; BMI-NEXT: notq %rsi
+; BMI-NEXT: andnq %rsi, %rax, %rax
+; BMI-NEXT: retq
+ %not_a0 = xor i64 %a0, -1
+ %not_a1 = xor i64 %a1, -1
+ %not_a2 = xor i64 %a2, -1
+ %or0 = or i64 %not_a0, %not_a1
+ %or1 = or i64 %a0, %not_a2
+ %and = and i64 %or0, %or1
+ ret i64 %and
+}
+
+define i32 @not_a_masked_merge0_demorgan(i32 %a0, i32 %a1, i32 %a2) {
+; CHECK-LABEL: not_a_masked_merge0_demorgan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: orl %edi, %edx
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: negl %eax
+; CHECK-NEXT: orl %esi, %eax
+; CHECK-NEXT: andl %edx, %eax
+; CHECK-NEXT: retq
+ %not_a_not = sub i32 0, %a0
+ %or0 = or i32 %not_a_not, %a1
+ %or1 = or i32 %a0, %a2
+ %and = and i32 %or0, %or1
+ ret i32 %and
+}
+
+; not a masked merge: `not` operand does not match another `and`-operand.
+define i32 @not_a_masked_merge1_demorgan(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
+; NOBMI-LABEL: not_a_masked_merge1_demorgan:
+; NOBMI: # %bb.0:
+; NOBMI-NEXT: movl %ecx, %eax
+; NOBMI-NEXT: orl %edx, %edi
+; NOBMI-NEXT: notl %eax
+; NOBMI-NEXT: orl %esi, %eax
+; NOBMI-NEXT: andl %edi, %eax
+; NOBMI-NEXT: retq
+;
+; BMI-LABEL: not_a_masked_merge1_demorgan:
+; BMI: # %bb.0:
+; BMI-NEXT: orl %edx, %edi
+; BMI-NEXT: andnl %ecx, %esi, %eax
+; BMI-NEXT: andnl %edi, %eax, %eax
+; BMI-NEXT: retq
+ %or1 = or i32 %a0, %a2
+ %not = xor i32 %a3, -1
+ %or0 = or i32 %not, %a1
+ %and = and i32 %or0, %or1
+ ret i32 %and
+}
+
+; not a masked merge: one of the operands of `and` is not an `or`.
+define i32 @not_a_masked_merge2_demorgan(i32 %a0, i32 %a1, i32 %a2) {
+; NOBMI-LABEL: not_a_masked_merge2_demorgan:
+; NOBMI: # %bb.0:
+; NOBMI-NEXT: movl %edi, %eax
+; NOBMI-NEXT: andl %edi, %edx
+; NOBMI-NEXT: notl %eax
+; NOBMI-NEXT: orl %esi, %eax
+; NOBMI-NEXT: andl %edx, %eax
+; NOBMI-NEXT: retq
+;
+; BMI-LABEL: not_a_masked_merge2_demorgan:
+; BMI: # %bb.0:
+; BMI-NEXT: andl %edi, %edx
+; BMI-NEXT: andnl %edi, %esi, %eax
+; BMI-NEXT: andnl %edx, %eax, %eax
+; BMI-NEXT: retq
+ %not_an_or1 = and i32 %a0, %a2
+ %not = xor i32 %a0, -1
+ %or0 = or i32 %not, %a1
+ %and = and i32 %or0, %not_an_or1
+ ret i32 %and
+}
+
+define i32 @not_a_masked_merge3_demorgan(i32 %a0, i32 %a1, i32 %a2) {
+; NOBMI-LABEL: not_a_masked_merge3_demorgan:
+; NOBMI: # %bb.0:
+; NOBMI-NEXT: movl %esi, %eax
+; NOBMI-NEXT: orl %edi, %edx
+; NOBMI-NEXT: xorl %edi, %eax
+; NOBMI-NEXT: notl %eax
+; NOBMI-NEXT: andl %edx, %eax
+; NOBMI-NEXT: retq
+;
+; BMI-LABEL: not_a_masked_merge3_demorgan:
+; BMI: # %bb.0:
+; BMI-NEXT: orl %edi, %edx
+; BMI-NEXT: xorl %edi, %esi
+; BMI-NEXT: andnl %edx, %esi, %eax
+; BMI-NEXT: retq
+ %or1 = or i32 %a0, %a2
+ %not = xor i32 %a0, -1
+ %not_an_or0 = xor i32 %not, %a1
+ %and = and i32 %not_an_or0, %or1
+ ret i32 %and
+}
+
+; not a masked merge: `not` operand must not be on same `or`.
+define i32 @not_a_masked_merge4_demorgan(i32 %a0, i32 %a1, i32 %a2) {
+; CHECK-LABEL: not_a_masked_merge4_demorgan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: orl %edx, %eax
+; CHECK-NEXT: retq
+ %or1 = or i32 %a0, %a2
+ %not = xor i32 %a1, -1
+ %or0 = or i32 %not, %a1
+ %and = and i32 %or0, %or1
+ ret i32 %and
+}
+
+; should not transform when operands have multiple users.
+define i32 @masked_merge_no_transform0_demorgan(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
+; NOBMI-LABEL: masked_merge_no_transform0_demorgan:
+; NOBMI: # %bb.0:
+; NOBMI-NEXT: orl %edi, %edx
+; NOBMI-NEXT: movl %edi, %eax
+; NOBMI-NEXT: notl %eax
+; NOBMI-NEXT: orl %esi, %eax
+; NOBMI-NEXT: andl %edx, %eax
+; NOBMI-NEXT: movl %edx, (%rcx)
+; NOBMI-NEXT: retq
+;
+; BMI-LABEL: masked_merge_no_transform0_demorgan:
+; BMI: # %bb.0:
+; BMI-NEXT: orl %edi, %edx
+; BMI-NEXT: andnl %edi, %esi, %eax
+; BMI-NEXT: andnl %edx, %eax, %eax
+; BMI-NEXT: movl %edx, (%rcx)
+; BMI-NEXT: retq
+ %not = xor i32 %a0, -1
+ %or0 = or i32 %not, %a1
+ %or1 = or i32 %a0, %a2
+ %and = and i32 %or0, %or1
+ store i32 %or1, ptr %p1
+ ret i32 %and
+}
+
+; should not transform when operands have multiple users.
+define i32 @masked_merge_no_transform1_demorgan(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
+; NOBMI-LABEL: masked_merge_no_transform1_demorgan:
+; NOBMI: # %bb.0:
+; NOBMI-NEXT: movl %esi, %eax
+; NOBMI-NEXT: xorl %edx, %eax
+; NOBMI-NEXT: andl %edi, %eax
+; NOBMI-NEXT: notl %edi
+; NOBMI-NEXT: xorl %edx, %eax
+; NOBMI-NEXT: movl %edi, (%rcx)
+; NOBMI-NEXT: retq
+;
+; BMI-LABEL: masked_merge_no_transform1_demorgan:
+; BMI: # %bb.0:
+; BMI-NEXT: orl %edi, %edx
+; BMI-NEXT: andnl %edi, %esi, %eax
+; BMI-NEXT: notl %edi
+; BMI-NEXT: andnl %edx, %eax, %eax
+; BMI-NEXT: movl %edi, (%rcx)
+; BMI-NEXT: retq
+ %not = xor i32 %a0, -1
+ %or0 = or i32 %not, %a1
+ %or1 = or i32 %a0, %a2
+ %and = and i32 %or0, %or1
+ store i32 %not, ptr %p1
+ ret i32 %and
+}
+
+; should not transform when operands have multiple users.
+define i32 @masked_merge_no_transform2_demorgan(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
+; CHECK-LABEL: masked_merge_no_transform2_demorgan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edx, %eax
+; CHECK-NEXT: orl %edi, %eax
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: orl %esi, %edi
+; CHECK-NEXT: andl %edi, %eax
+; CHECK-NEXT: movl %edi, (%rcx)
+; CHECK-NEXT: retq
+ %not = xor i32 %a0, -1
+ %or0 = or i32 %not, %a1
+ %or1 = or i32 %a0, %a2
+ %and = and i32 %or0, %or1
+ store i32 %or0, ptr %p1
+ ret i32 %and
+}
\ No newline at end of file
|
Based on last experience(#143855), I know these test cases are related to (m & x) | (~m & y)
in order to wirte (~a | x) & (a | y) testcase, i pick up fold-masked-merge.ll as reference to write fold-masked-merge-demorgan.ll, any suggestions on testcase coverage here? |
@RKSimon, hello, any suggestions for modifications? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please can you pull out the new tests into a pre-commit patch against current trunk codgen? I'll get that committed and then you can update this patch to show the codegen changes
8610ae3
to
dd1ad26
Compare
@RKSimon , hello, pre-commit patch is in the first commit, codegen changes is on the second. |
Thanks @woruyu - I've moved foldMaskedMerge into place so the diff is clearer - please can you merge against trunk? |
Reduces diff in llvm#144342
dd1ad26
to
436993e
Compare
@RKSimon ,sad to be late,I have updated to trunk! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
Reduces diff in llvm#144342
Summary
This PR resolves #143864
Add (~a | x) & (a | y) -> (a & (x ^ y)) ^y for foldMaskedMerge func using SDPatternMatch
aftering adding this pattern, run
ninja check-llvm-codegen
, all other cases remain unchanged, so I add a testcase(fold-masked-merge-demorgan.ll) for it