Skip to content

Commit 8610ae3

Browse files
committed
[DAG] add (~a | x) & (a | y) -> (a & (x ^ y)) ^y for foldMaskedMerge
1 parent 01f9dff commit 8610ae3

File tree

2 files changed

+303
-26
lines changed

2 files changed

+303
-26
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -7204,6 +7204,38 @@ static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand,
72047204
return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
72057205
}
72067206

7207+
/// Fold "masked merge" expressions like `(m & x) | (~m & y)` and its DeMorgan
7208+
/// variant `(~m | x) & (m | y)` into the equivalent `((x ^ y) & m) ^ y)`
7209+
/// pattern. This is typically a better representation for targets without a
7210+
/// fused "and-not" operation.
7211+
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG,
7212+
const TargetLowering &TLI, const SDLoc &DL) {
7213+
// Note that masked-merge variants using XOR or ADD expressions are
7214+
// normalized to OR by InstCombine so we only check for OR or AND.
7215+
assert(Node->getOpcode() == ISD::OR ||
7216+
Node->getOpcode() == ISD::AND &&
7217+
"Must be called with ISD::OR or ISD::AND node");
7218+
7219+
// If the target supports and-not, don't fold this.
7220+
if (TLI.hasAndNot(SDValue(Node, 0)))
7221+
return SDValue();
7222+
7223+
SDValue M, X, Y;
7224+
7225+
if (sd_match(Node,
7226+
m_Or(m_OneUse(m_And(m_OneUse(m_Not(m_Value(M))), m_Value(Y))),
7227+
m_OneUse(m_And(m_Deferred(M), m_Value(X))))) ||
7228+
sd_match(Node, m_And(m_OneUse(m_Or(m_Not(m_Value(M)), m_Value(X))),
7229+
m_OneUse(m_Or(m_Deferred(M), m_Value(Y)))))) {
7230+
EVT VT = M.getValueType();
7231+
SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, Y);
7232+
SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor, M);
7233+
return DAG.getNode(ISD::XOR, DL, VT, And, Y);
7234+
}
7235+
7236+
return SDValue();
7237+
}
7238+
72077239
SDValue DAGCombiner::visitAND(SDNode *N) {
72087240
SDValue N0 = N->getOperand(0);
72097241
SDValue N1 = N->getOperand(1);
@@ -7644,6 +7676,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
76447676
if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
76457677
return R;
76467678

7679+
if (VT.isScalarInteger() && VT != MVT::i1)
7680+
if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
7681+
return R;
7682+
76477683
return SDValue();
76487684
}
76497685

@@ -8128,32 +8164,6 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
81288164
return SDValue();
81298165
}
81308166

8131-
/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the
8132-
/// equivalent `((x ^ y) & m) ^ y)` pattern.
8133-
/// This is typically a better representation for targets without a fused
8134-
/// "and-not" operation.
8135-
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG,
8136-
const TargetLowering &TLI, const SDLoc &DL) {
8137-
// Note that masked-merge variants using XOR or ADD expressions are
8138-
// normalized to OR by InstCombine so we only check for OR.
8139-
assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node");
8140-
8141-
// If the target supports and-not, don't fold this.
8142-
if (TLI.hasAndNot(SDValue(Node, 0)))
8143-
return SDValue();
8144-
8145-
SDValue M, X, Y;
8146-
if (sd_match(Node,
8147-
m_Or(m_OneUse(m_And(m_OneUse(m_Not(m_Value(M))), m_Value(Y))),
8148-
m_OneUse(m_And(m_Deferred(M), m_Value(X)))))) {
8149-
EVT VT = M.getValueType();
8150-
SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, Y);
8151-
SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor, M);
8152-
return DAG.getNode(ISD::XOR, DL, VT, And, Y);
8153-
}
8154-
return SDValue();
8155-
}
8156-
81578167
SDValue DAGCombiner::visitOR(SDNode *N) {
81588168
SDValue N0 = N->getOperand(0);
81598169
SDValue N1 = N->getOperand(1);
Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -o - %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=CHECK,NOBMI
3+
; RUN: llc -o - %s -mtriple=x86_64-- -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,BMI
4+
;
5+
; test that masked-merge code is generated as "xor;and;xor" sequence or
6+
; "andn ; and; or" if and-not is available.
7+
8+
define i32 @masked_merge0_demorgan(i32 %a0, i32 %a1, i32 %a2) {
9+
; NOBMI-LABEL: masked_merge0_demorgan:
10+
; NOBMI: # %bb.0:
11+
; NOBMI-NEXT: movl %esi, %eax
12+
; NOBMI-NEXT: xorl %edx, %eax
13+
; NOBMI-NEXT: andl %edi, %eax
14+
; NOBMI-NEXT: xorl %edx, %eax
15+
; NOBMI-NEXT: retq
16+
;
17+
; BMI-LABEL: masked_merge0_demorgan:
18+
; BMI: # %bb.0:
19+
; BMI-NEXT: orl %edi, %edx
20+
; BMI-NEXT: andnl %edi, %esi, %eax
21+
; BMI-NEXT: andnl %edx, %eax, %eax
22+
; BMI-NEXT: retq
23+
%not = xor i32 %a0, -1
24+
%or0 = or i32 %not, %a1
25+
%or1 = or i32 %a0, %a2
26+
%and = and i32 %or0, %or1
27+
ret i32 %and
28+
}
29+
30+
define i16 @masked_merge1_demorgan(i16 %a0, i16 %a1, i16 %a2) {
31+
; NOBMI-LABEL: masked_merge1_demorgan:
32+
; NOBMI: # %bb.0:
33+
; NOBMI-NEXT: movl %esi, %eax
34+
; NOBMI-NEXT: xorl %edx, %eax
35+
; NOBMI-NEXT: andl %edi, %eax
36+
; NOBMI-NEXT: xorl %edx, %eax
37+
; NOBMI-NEXT: # kill: def $ax killed $ax killed $eax
38+
; NOBMI-NEXT: retq
39+
;
40+
; BMI-LABEL: masked_merge1_demorgan:
41+
; BMI: # %bb.0:
42+
; BMI-NEXT: andnl %edx, %edi, %eax
43+
; BMI-NEXT: andl %edi, %esi
44+
; BMI-NEXT: orl %esi, %eax
45+
; BMI-NEXT: # kill: def $ax killed $ax killed $eax
46+
; BMI-NEXT: retq
47+
%not = xor i16 %a0, -1
48+
%or0 = or i16 %not, %a1
49+
%or1 = or i16 %a0, %a2
50+
%and = and i16 %or0, %or1
51+
ret i16 %and
52+
}
53+
54+
define i8 @masked_merge2_demorgan(i8 %a0, i8 %a1, i8 %a2) {
55+
; CHECK-LABEL: masked_merge2_demorgan:
56+
; CHECK: # %bb.0:
57+
; CHECK-NEXT: movl %esi, %eax
58+
; CHECK-NEXT: # kill: def $al killed $al killed $eax
59+
; CHECK-NEXT: retq
60+
%not = xor i8 %a0, -1
61+
%or0 = or i8 %not, %a1
62+
%or1 = or i8 %a0, %a1
63+
%and = and i8 %or0, %or1
64+
ret i8 %and
65+
}
66+
67+
define i64 @masked_merge3_demorgan(i64 %a0, i64 %a1, i64 %a2) {
68+
; NOBMI-LABEL: masked_merge3_demorgan:
69+
; NOBMI: # %bb.0:
70+
; NOBMI-NEXT: movq %rsi, %rax
71+
; NOBMI-NEXT: notq %rdx
72+
; NOBMI-NEXT: xorq %rdx, %rax
73+
; NOBMI-NEXT: notq %rax
74+
; NOBMI-NEXT: andq %rdi, %rax
75+
; NOBMI-NEXT: xorq %rdx, %rax
76+
; NOBMI-NEXT: retq
77+
;
78+
; BMI-LABEL: masked_merge3_demorgan:
79+
; BMI: # %bb.0:
80+
; BMI-NEXT: andnq %rdx, %rdi, %rax
81+
; BMI-NEXT: andq %rdi, %rsi
82+
; BMI-NEXT: notq %rsi
83+
; BMI-NEXT: andnq %rsi, %rax, %rax
84+
; BMI-NEXT: retq
85+
%not_a0 = xor i64 %a0, -1
86+
%not_a1 = xor i64 %a1, -1
87+
%not_a2 = xor i64 %a2, -1
88+
%or0 = or i64 %not_a0, %not_a1
89+
%or1 = or i64 %a0, %not_a2
90+
%and = and i64 %or0, %or1
91+
ret i64 %and
92+
}
93+
94+
define i32 @not_a_masked_merge0_demorgan(i32 %a0, i32 %a1, i32 %a2) {
95+
; CHECK-LABEL: not_a_masked_merge0_demorgan:
96+
; CHECK: # %bb.0:
97+
; CHECK-NEXT: orl %edi, %edx
98+
; CHECK-NEXT: movl %edi, %eax
99+
; CHECK-NEXT: negl %eax
100+
; CHECK-NEXT: orl %esi, %eax
101+
; CHECK-NEXT: andl %edx, %eax
102+
; CHECK-NEXT: retq
103+
%not_a_not = sub i32 0, %a0
104+
%or0 = or i32 %not_a_not, %a1
105+
%or1 = or i32 %a0, %a2
106+
%and = and i32 %or0, %or1
107+
ret i32 %and
108+
}
109+
110+
; not a masked merge: `not` operand does not match another `and`-operand.
111+
define i32 @not_a_masked_merge1_demorgan(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
112+
; NOBMI-LABEL: not_a_masked_merge1_demorgan:
113+
; NOBMI: # %bb.0:
114+
; NOBMI-NEXT: movl %ecx, %eax
115+
; NOBMI-NEXT: orl %edx, %edi
116+
; NOBMI-NEXT: notl %eax
117+
; NOBMI-NEXT: orl %esi, %eax
118+
; NOBMI-NEXT: andl %edi, %eax
119+
; NOBMI-NEXT: retq
120+
;
121+
; BMI-LABEL: not_a_masked_merge1_demorgan:
122+
; BMI: # %bb.0:
123+
; BMI-NEXT: orl %edx, %edi
124+
; BMI-NEXT: andnl %ecx, %esi, %eax
125+
; BMI-NEXT: andnl %edi, %eax, %eax
126+
; BMI-NEXT: retq
127+
%or1 = or i32 %a0, %a2
128+
%not = xor i32 %a3, -1
129+
%or0 = or i32 %not, %a1
130+
%and = and i32 %or0, %or1
131+
ret i32 %and
132+
}
133+
134+
; not a masked merge: one of the operands of `and` is not an `or`.
135+
define i32 @not_a_masked_merge2_demorgan(i32 %a0, i32 %a1, i32 %a2) {
136+
; NOBMI-LABEL: not_a_masked_merge2_demorgan:
137+
; NOBMI: # %bb.0:
138+
; NOBMI-NEXT: movl %edi, %eax
139+
; NOBMI-NEXT: andl %edi, %edx
140+
; NOBMI-NEXT: notl %eax
141+
; NOBMI-NEXT: orl %esi, %eax
142+
; NOBMI-NEXT: andl %edx, %eax
143+
; NOBMI-NEXT: retq
144+
;
145+
; BMI-LABEL: not_a_masked_merge2_demorgan:
146+
; BMI: # %bb.0:
147+
; BMI-NEXT: andl %edi, %edx
148+
; BMI-NEXT: andnl %edi, %esi, %eax
149+
; BMI-NEXT: andnl %edx, %eax, %eax
150+
; BMI-NEXT: retq
151+
%not_an_or1 = and i32 %a0, %a2
152+
%not = xor i32 %a0, -1
153+
%or0 = or i32 %not, %a1
154+
%and = and i32 %or0, %not_an_or1
155+
ret i32 %and
156+
}
157+
158+
define i32 @not_a_masked_merge3_demorgan(i32 %a0, i32 %a1, i32 %a2) {
159+
; NOBMI-LABEL: not_a_masked_merge3_demorgan:
160+
; NOBMI: # %bb.0:
161+
; NOBMI-NEXT: movl %esi, %eax
162+
; NOBMI-NEXT: orl %edi, %edx
163+
; NOBMI-NEXT: xorl %edi, %eax
164+
; NOBMI-NEXT: notl %eax
165+
; NOBMI-NEXT: andl %edx, %eax
166+
; NOBMI-NEXT: retq
167+
;
168+
; BMI-LABEL: not_a_masked_merge3_demorgan:
169+
; BMI: # %bb.0:
170+
; BMI-NEXT: orl %edi, %edx
171+
; BMI-NEXT: xorl %edi, %esi
172+
; BMI-NEXT: andnl %edx, %esi, %eax
173+
; BMI-NEXT: retq
174+
%or1 = or i32 %a0, %a2
175+
%not = xor i32 %a0, -1
176+
%not_an_or0 = xor i32 %not, %a1
177+
%and = and i32 %not_an_or0, %or1
178+
ret i32 %and
179+
}
180+
181+
; not a masked merge: `not` operand must not be on same `or`.
182+
define i32 @not_a_masked_merge4_demorgan(i32 %a0, i32 %a1, i32 %a2) {
183+
; CHECK-LABEL: not_a_masked_merge4_demorgan:
184+
; CHECK: # %bb.0:
185+
; CHECK-NEXT: movl %edi, %eax
186+
; CHECK-NEXT: orl %edx, %eax
187+
; CHECK-NEXT: retq
188+
%or1 = or i32 %a0, %a2
189+
%not = xor i32 %a1, -1
190+
%or0 = or i32 %not, %a1
191+
%and = and i32 %or0, %or1
192+
ret i32 %and
193+
}
194+
195+
; should not transform when operands have multiple users.
196+
define i32 @masked_merge_no_transform0_demorgan(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
197+
; NOBMI-LABEL: masked_merge_no_transform0_demorgan:
198+
; NOBMI: # %bb.0:
199+
; NOBMI-NEXT: orl %edi, %edx
200+
; NOBMI-NEXT: movl %edi, %eax
201+
; NOBMI-NEXT: notl %eax
202+
; NOBMI-NEXT: orl %esi, %eax
203+
; NOBMI-NEXT: andl %edx, %eax
204+
; NOBMI-NEXT: movl %edx, (%rcx)
205+
; NOBMI-NEXT: retq
206+
;
207+
; BMI-LABEL: masked_merge_no_transform0_demorgan:
208+
; BMI: # %bb.0:
209+
; BMI-NEXT: orl %edi, %edx
210+
; BMI-NEXT: andnl %edi, %esi, %eax
211+
; BMI-NEXT: andnl %edx, %eax, %eax
212+
; BMI-NEXT: movl %edx, (%rcx)
213+
; BMI-NEXT: retq
214+
%not = xor i32 %a0, -1
215+
%or0 = or i32 %not, %a1
216+
%or1 = or i32 %a0, %a2
217+
%and = and i32 %or0, %or1
218+
store i32 %or1, ptr %p1
219+
ret i32 %and
220+
}
221+
222+
; should not transform when operands have multiple users.
223+
define i32 @masked_merge_no_transform1_demorgan(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
224+
; NOBMI-LABEL: masked_merge_no_transform1_demorgan:
225+
; NOBMI: # %bb.0:
226+
; NOBMI-NEXT: movl %esi, %eax
227+
; NOBMI-NEXT: xorl %edx, %eax
228+
; NOBMI-NEXT: andl %edi, %eax
229+
; NOBMI-NEXT: notl %edi
230+
; NOBMI-NEXT: xorl %edx, %eax
231+
; NOBMI-NEXT: movl %edi, (%rcx)
232+
; NOBMI-NEXT: retq
233+
;
234+
; BMI-LABEL: masked_merge_no_transform1_demorgan:
235+
; BMI: # %bb.0:
236+
; BMI-NEXT: orl %edi, %edx
237+
; BMI-NEXT: andnl %edi, %esi, %eax
238+
; BMI-NEXT: notl %edi
239+
; BMI-NEXT: andnl %edx, %eax, %eax
240+
; BMI-NEXT: movl %edi, (%rcx)
241+
; BMI-NEXT: retq
242+
%not = xor i32 %a0, -1
243+
%or0 = or i32 %not, %a1
244+
%or1 = or i32 %a0, %a2
245+
%and = and i32 %or0, %or1
246+
store i32 %not, ptr %p1
247+
ret i32 %and
248+
}
249+
250+
; should not transform when operands have multiple users.
251+
define i32 @masked_merge_no_transform2_demorgan(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
252+
; CHECK-LABEL: masked_merge_no_transform2_demorgan:
253+
; CHECK: # %bb.0:
254+
; CHECK-NEXT: movl %edx, %eax
255+
; CHECK-NEXT: orl %edi, %eax
256+
; CHECK-NEXT: notl %edi
257+
; CHECK-NEXT: orl %esi, %edi
258+
; CHECK-NEXT: andl %edi, %eax
259+
; CHECK-NEXT: movl %edi, (%rcx)
260+
; CHECK-NEXT: retq
261+
%not = xor i32 %a0, -1
262+
%or0 = or i32 %not, %a1
263+
%or1 = or i32 %a0, %a2
264+
%and = and i32 %or0, %or1
265+
store i32 %or0, ptr %p1
266+
ret i32 %and
267+
}

0 commit comments

Comments
 (0)