diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 934199e414c7b..0e078f9dd88b4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7206,24 +7206,30 @@ static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W); } -/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the -/// equivalent `((x ^ y) & m) ^ y)` pattern. -/// This is typically a better representation for targets without a fused -/// "and-not" operation. +/// Fold "masked merge" expressions like `(m & x) | (~m & y)` and its DeMorgan +/// variant `(~m | x) & (m | y)` into the equivalent `((x ^ y) & m) ^ y)` +/// pattern. This is typically a better representation for targets without a +/// fused "and-not" operation. static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG, const TargetLowering &TLI, const SDLoc &DL) { // Note that masked-merge variants using XOR or ADD expressions are - // normalized to OR by InstCombine so we only check for OR. - assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node"); + // normalized to OR by InstCombine so we only check for OR or AND. + assert(Node->getOpcode() == ISD::OR || + Node->getOpcode() == ISD::AND && + "Must be called with ISD::OR or ISD::AND node"); // If the target supports and-not, don't fold this. if (TLI.hasAndNot(SDValue(Node, 0))) return SDValue(); SDValue M, X, Y; + if (sd_match(Node, m_Or(m_OneUse(m_And(m_OneUse(m_Not(m_Value(M))), m_Value(Y))), - m_OneUse(m_And(m_Deferred(M), m_Value(X)))))) { + m_OneUse(m_And(m_Deferred(M), m_Value(X))))) || + sd_match(Node, + m_And(m_OneUse(m_Or(m_OneUse(m_Not(m_Value(M))), m_Value(X))), + m_OneUse(m_Or(m_Deferred(M), m_Value(Y)))))) { EVT VT = M.getValueType(); SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, Y); SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor, M); @@ -7678,6 +7684,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG)) return R; + if (VT.isScalarInteger() && VT != MVT::i1) + if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL)) + return R; + return SDValue(); } diff --git a/llvm/test/CodeGen/X86/fold-masked-merge-demorgan.ll b/llvm/test/CodeGen/X86/fold-masked-merge-demorgan.ll new file mode 100644 index 0000000000000..fe27b3c73be08 --- /dev/null +++ b/llvm/test/CodeGen/X86/fold-masked-merge-demorgan.ll @@ -0,0 +1,267 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -o - %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=CHECK,NOBMI +; RUN: llc -o - %s -mtriple=x86_64-- -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,BMI +; +; test that masked-merge code is generated as "xor;and;xor" sequence or +; "andn ; and; or" if and-not is available. + +define i32 @masked_merge0_demorgan(i32 %a0, i32 %a1, i32 %a2) { +; NOBMI-LABEL: masked_merge0_demorgan: +; NOBMI: # %bb.0: +; NOBMI-NEXT: movl %esi, %eax +; NOBMI-NEXT: xorl %edx, %eax +; NOBMI-NEXT: andl %edi, %eax +; NOBMI-NEXT: xorl %edx, %eax +; NOBMI-NEXT: retq +; +; BMI-LABEL: masked_merge0_demorgan: +; BMI: # %bb.0: +; BMI-NEXT: orl %edi, %edx +; BMI-NEXT: andnl %edi, %esi, %eax +; BMI-NEXT: andnl %edx, %eax, %eax +; BMI-NEXT: retq + %not = xor i32 %a0, -1 + %or0 = or i32 %not, %a1 + %or1 = or i32 %a0, %a2 + %and = and i32 %or0, %or1 + ret i32 %and +} + +define i16 @masked_merge1_demorgan(i16 %a0, i16 %a1, i16 %a2) { +; NOBMI-LABEL: masked_merge1_demorgan: +; NOBMI: # %bb.0: +; NOBMI-NEXT: movl %esi, %eax +; NOBMI-NEXT: xorl %edx, %eax +; NOBMI-NEXT: andl %edi, %eax +; NOBMI-NEXT: xorl %edx, %eax +; NOBMI-NEXT: # kill: def $ax killed $ax killed $eax +; NOBMI-NEXT: retq +; +; BMI-LABEL: masked_merge1_demorgan: +; BMI: # %bb.0: +; BMI-NEXT: andnl %edx, %edi, %eax +; BMI-NEXT: andl %edi, %esi +; BMI-NEXT: orl %esi, %eax +; BMI-NEXT: # kill: def $ax killed $ax killed $eax +; BMI-NEXT: retq + %not = xor i16 %a0, -1 + %or0 = or i16 %not, %a1 + %or1 = or i16 %a0, %a2 + %and = and i16 %or0, %or1 + ret i16 %and +} + +define i8 @masked_merge2_demorgan(i8 %a0, i8 %a1, i8 %a2) { +; CHECK-LABEL: masked_merge2_demorgan: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: retq + %not = xor i8 %a0, -1 + %or0 = or i8 %not, %a1 + %or1 = or i8 %a0, %a1 + %and = and i8 %or0, %or1 + ret i8 %and +} + +define i64 @masked_merge3_demorgan(i64 %a0, i64 %a1, i64 %a2) { +; NOBMI-LABEL: masked_merge3_demorgan: +; NOBMI: # %bb.0: +; NOBMI-NEXT: movq %rsi, %rax +; NOBMI-NEXT: notq %rdx +; NOBMI-NEXT: xorq %rdx, %rax +; NOBMI-NEXT: notq %rax +; NOBMI-NEXT: andq %rdi, %rax +; NOBMI-NEXT: xorq %rdx, %rax +; NOBMI-NEXT: retq +; +; BMI-LABEL: masked_merge3_demorgan: +; BMI: # %bb.0: +; BMI-NEXT: andnq %rdx, %rdi, %rax +; BMI-NEXT: andq %rdi, %rsi +; BMI-NEXT: notq %rsi +; BMI-NEXT: andnq %rsi, %rax, %rax +; BMI-NEXT: retq + %not_a0 = xor i64 %a0, -1 + %not_a1 = xor i64 %a1, -1 + %not_a2 = xor i64 %a2, -1 + %or0 = or i64 %not_a0, %not_a1 + %or1 = or i64 %a0, %not_a2 + %and = and i64 %or0, %or1 + ret i64 %and +} + +define i32 @not_a_masked_merge0_demorgan(i32 %a0, i32 %a1, i32 %a2) { +; CHECK-LABEL: not_a_masked_merge0_demorgan: +; CHECK: # %bb.0: +; CHECK-NEXT: orl %edi, %edx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: negl %eax +; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: andl %edx, %eax +; CHECK-NEXT: retq + %not_a_not = sub i32 0, %a0 + %or0 = or i32 %not_a_not, %a1 + %or1 = or i32 %a0, %a2 + %and = and i32 %or0, %or1 + ret i32 %and +} + +; not a masked merge: `not` operand does not match another `and`-operand. +define i32 @not_a_masked_merge1_demorgan(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { +; NOBMI-LABEL: not_a_masked_merge1_demorgan: +; NOBMI: # %bb.0: +; NOBMI-NEXT: movl %ecx, %eax +; NOBMI-NEXT: orl %edx, %edi +; NOBMI-NEXT: notl %eax +; NOBMI-NEXT: orl %esi, %eax +; NOBMI-NEXT: andl %edi, %eax +; NOBMI-NEXT: retq +; +; BMI-LABEL: not_a_masked_merge1_demorgan: +; BMI: # %bb.0: +; BMI-NEXT: orl %edx, %edi +; BMI-NEXT: andnl %ecx, %esi, %eax +; BMI-NEXT: andnl %edi, %eax, %eax +; BMI-NEXT: retq + %or1 = or i32 %a0, %a2 + %not = xor i32 %a3, -1 + %or0 = or i32 %not, %a1 + %and = and i32 %or0, %or1 + ret i32 %and +} + +; not a masked merge: one of the operands of `and` is not an `or`. +define i32 @not_a_masked_merge2_demorgan(i32 %a0, i32 %a1, i32 %a2) { +; NOBMI-LABEL: not_a_masked_merge2_demorgan: +; NOBMI: # %bb.0: +; NOBMI-NEXT: movl %edi, %eax +; NOBMI-NEXT: andl %edi, %edx +; NOBMI-NEXT: notl %eax +; NOBMI-NEXT: orl %esi, %eax +; NOBMI-NEXT: andl %edx, %eax +; NOBMI-NEXT: retq +; +; BMI-LABEL: not_a_masked_merge2_demorgan: +; BMI: # %bb.0: +; BMI-NEXT: andl %edi, %edx +; BMI-NEXT: andnl %edi, %esi, %eax +; BMI-NEXT: andnl %edx, %eax, %eax +; BMI-NEXT: retq + %not_an_or1 = and i32 %a0, %a2 + %not = xor i32 %a0, -1 + %or0 = or i32 %not, %a1 + %and = and i32 %or0, %not_an_or1 + ret i32 %and +} + +define i32 @not_a_masked_merge3_demorgan(i32 %a0, i32 %a1, i32 %a2) { +; NOBMI-LABEL: not_a_masked_merge3_demorgan: +; NOBMI: # %bb.0: +; NOBMI-NEXT: movl %esi, %eax +; NOBMI-NEXT: orl %edi, %edx +; NOBMI-NEXT: xorl %edi, %eax +; NOBMI-NEXT: notl %eax +; NOBMI-NEXT: andl %edx, %eax +; NOBMI-NEXT: retq +; +; BMI-LABEL: not_a_masked_merge3_demorgan: +; BMI: # %bb.0: +; BMI-NEXT: orl %edi, %edx +; BMI-NEXT: xorl %edi, %esi +; BMI-NEXT: andnl %edx, %esi, %eax +; BMI-NEXT: retq + %or1 = or i32 %a0, %a2 + %not = xor i32 %a0, -1 + %not_an_or0 = xor i32 %not, %a1 + %and = and i32 %not_an_or0, %or1 + ret i32 %and +} + +; not a masked merge: `not` operand must not be on same `or`. +define i32 @not_a_masked_merge4_demorgan(i32 %a0, i32 %a1, i32 %a2) { +; CHECK-LABEL: not_a_masked_merge4_demorgan: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orl %edx, %eax +; CHECK-NEXT: retq + %or1 = or i32 %a0, %a2 + %not = xor i32 %a1, -1 + %or0 = or i32 %not, %a1 + %and = and i32 %or0, %or1 + ret i32 %and +} + +; should not transform when operands have multiple users. +define i32 @masked_merge_no_transform0_demorgan(i32 %a0, i32 %a1, i32 %a2, ptr %p1) { +; NOBMI-LABEL: masked_merge_no_transform0_demorgan: +; NOBMI: # %bb.0: +; NOBMI-NEXT: orl %edi, %edx +; NOBMI-NEXT: movl %edi, %eax +; NOBMI-NEXT: notl %eax +; NOBMI-NEXT: orl %esi, %eax +; NOBMI-NEXT: andl %edx, %eax +; NOBMI-NEXT: movl %edx, (%rcx) +; NOBMI-NEXT: retq +; +; BMI-LABEL: masked_merge_no_transform0_demorgan: +; BMI: # %bb.0: +; BMI-NEXT: orl %edi, %edx +; BMI-NEXT: andnl %edi, %esi, %eax +; BMI-NEXT: andnl %edx, %eax, %eax +; BMI-NEXT: movl %edx, (%rcx) +; BMI-NEXT: retq + %not = xor i32 %a0, -1 + %or0 = or i32 %not, %a1 + %or1 = or i32 %a0, %a2 + %and = and i32 %or0, %or1 + store i32 %or1, ptr %p1 + ret i32 %and +} + +; should not transform when operands have multiple users. +define i32 @masked_merge_no_transform1_demorgan(i32 %a0, i32 %a1, i32 %a2, ptr %p1) { +; NOBMI-LABEL: masked_merge_no_transform1_demorgan: +; NOBMI: # %bb.0: +; NOBMI-NEXT: movl %edx, %eax +; NOBMI-NEXT: orl %edi, %eax +; NOBMI-NEXT: notl %edi +; NOBMI-NEXT: orl %edi, %esi +; NOBMI-NEXT: andl %esi, %eax +; NOBMI-NEXT: movl %edi, (%rcx) +; NOBMI-NEXT: retq +; +; BMI-LABEL: masked_merge_no_transform1_demorgan: +; BMI: # %bb.0: +; BMI-NEXT: orl %edi, %edx +; BMI-NEXT: andnl %edi, %esi, %eax +; BMI-NEXT: notl %edi +; BMI-NEXT: andnl %edx, %eax, %eax +; BMI-NEXT: movl %edi, (%rcx) +; BMI-NEXT: retq + %not = xor i32 %a0, -1 + %or0 = or i32 %not, %a1 + %or1 = or i32 %a0, %a2 + %and = and i32 %or0, %or1 + store i32 %not, ptr %p1 + ret i32 %and +} + +; should not transform when operands have multiple users. +define i32 @masked_merge_no_transform2_demorgan(i32 %a0, i32 %a1, i32 %a2, ptr %p1) { +; CHECK-LABEL: masked_merge_no_transform2_demorgan: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: notl %edi +; CHECK-NEXT: orl %esi, %edi +; CHECK-NEXT: andl %edi, %eax +; CHECK-NEXT: movl %edi, (%rcx) +; CHECK-NEXT: retq + %not = xor i32 %a0, -1 + %or0 = or i32 %not, %a1 + %or1 = or i32 %a0, %a2 + %and = and i32 %or0, %or1 + store i32 %or0, ptr %p1 + ret i32 %and +}