Skip to content

Commit 58d4afa

Browse files
committed
[DAGCombiner] Do not always fold FREEZE over BUILD_VECTOR (#85932)
Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all ones" or "constant" into something that depends on freeze(undef), as that would destroy those properties. Instead we replace undef by 0/-1 in such vectors, making it possible to fold away the freeze. We typically use -1 if the BUILD_VECTOR would identify as "all ones", and otherwise we use the value 0.
1 parent 5012ee3 commit 58d4afa

File tree

2 files changed

+26
-50
lines changed

2 files changed

+26
-50
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+20
Original file line numberDiff line numberDiff line change
@@ -15451,6 +15451,26 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1545115451
N0.getOpcode() == ISD::BUILD_PAIR ||
1545215452
N0.getOpcode() == ISD::CONCAT_VECTORS;
1545315453

15454+
// Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
15455+
// ones" or "constant" into something that depends on FrozenUndef. We can
15456+
// instead pick undef values to keep those properties, while at the same time
15457+
// folding away the freeze.
15458+
// If we implement a more general solution for folding away freeze(undef) in
15459+
// the future, then this special handling can be removed.
15460+
if (N0.getOpcode() == ISD::BUILD_VECTOR) {
15461+
SDLoc DL(N0);
15462+
MVT VT = N0.getSimpleValueType();
15463+
if (llvm::ISD::isBuildVectorAllOnes(N0.getNode()))
15464+
return DAG.getAllOnesConstant(DL, VT);
15465+
if (llvm::ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
15466+
SmallVector<SDValue, 8> NewVecC;
15467+
for (const SDValue &Op : N0->op_values())
15468+
NewVecC.push_back(
15469+
Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
15470+
return DAG.getBuildVector(VT, DL, NewVecC);
15471+
}
15472+
}
15473+
1545415474
SmallSetVector<SDValue, 8> MaybePoisonOperands;
1545515475
for (SDValue Op : N0->ops()) {
1545615476
if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,

llvm/test/CodeGen/X86/freeze-binary.ll

+6-50
Original file line numberDiff line numberDiff line change
@@ -202,27 +202,13 @@ define <4 x i32> @freeze_add_vec(<4 x i32> %a0) nounwind {
202202
define <4 x i32> @freeze_add_vec_undef(<4 x i32> %a0) nounwind {
203203
; X86-LABEL: freeze_add_vec_undef:
204204
; X86: # %bb.0:
205-
; X86-NEXT: pushl %ebp
206-
; X86-NEXT: movl %esp, %ebp
207-
; X86-NEXT: andl $-16, %esp
208-
; X86-NEXT: subl $32, %esp
209-
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
210-
; X86-NEXT: movl $3, {{[0-9]+}}(%esp)
211-
; X86-NEXT: movl $2, {{[0-9]+}}(%esp)
212-
; X86-NEXT: movl $1, (%esp)
213-
; X86-NEXT: paddd (%esp), %xmm0
214205
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
215-
; X86-NEXT: movl %ebp, %esp
216-
; X86-NEXT: popl %ebp
206+
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
217207
; X86-NEXT: retl
218208
;
219209
; X64-LABEL: freeze_add_vec_undef:
220210
; X64: # %bb.0:
221-
; X64-NEXT: movabsq $8589934593, %rax # imm = 0x200000001
222-
; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
223-
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
224-
; X64-NEXT: movl $3, -{{[0-9]+}}(%rsp)
225-
; X64-NEXT: vpaddd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
211+
; X64-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
226212
; X64-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
227213
; X64-NEXT: retq
228214
%x = add <4 x i32> %a0, <i32 1, i32 2, i32 3, i32 undef>
@@ -287,27 +273,13 @@ define <4 x i32> @freeze_sub_vec(<4 x i32> %a0) nounwind {
287273
define <4 x i32> @freeze_sub_vec_undef(<4 x i32> %a0) nounwind {
288274
; X86-LABEL: freeze_sub_vec_undef:
289275
; X86: # %bb.0:
290-
; X86-NEXT: pushl %ebp
291-
; X86-NEXT: movl %esp, %ebp
292-
; X86-NEXT: andl $-16, %esp
293-
; X86-NEXT: subl $32, %esp
294-
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
295-
; X86-NEXT: movl $3, {{[0-9]+}}(%esp)
296-
; X86-NEXT: movl $2, {{[0-9]+}}(%esp)
297-
; X86-NEXT: movl $1, (%esp)
298-
; X86-NEXT: psubd (%esp), %xmm0
299276
; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
300-
; X86-NEXT: movl %ebp, %esp
301-
; X86-NEXT: popl %ebp
277+
; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
302278
; X86-NEXT: retl
303279
;
304280
; X64-LABEL: freeze_sub_vec_undef:
305281
; X64: # %bb.0:
306-
; X64-NEXT: movabsq $8589934593, %rax # imm = 0x200000001
307-
; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
308-
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
309-
; X64-NEXT: movl $3, -{{[0-9]+}}(%rsp)
310-
; X64-NEXT: vpsubd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
282+
; X64-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
311283
; X64-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
312284
; X64-NEXT: retq
313285
%x = sub <4 x i32> %a0, <i32 1, i32 2, i32 3, i32 undef>
@@ -373,29 +345,13 @@ define <8 x i16> @freeze_mul_vec(<8 x i16> %a0) nounwind {
373345
define <8 x i16> @freeze_mul_vec_undef(<8 x i16> %a0) nounwind {
374346
; X86-LABEL: freeze_mul_vec_undef:
375347
; X86: # %bb.0:
376-
; X86-NEXT: pushl %ebp
377-
; X86-NEXT: movl %esp, %ebp
378-
; X86-NEXT: andl $-16, %esp
379-
; X86-NEXT: subl $32, %esp
380-
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
381-
; X86-NEXT: movw $1, {{[0-9]+}}(%esp)
382-
; X86-NEXT: movl $196612, {{[0-9]+}}(%esp) # imm = 0x30004
383-
; X86-NEXT: movl $262147, {{[0-9]+}}(%esp) # imm = 0x40003
384-
; X86-NEXT: movl $131073, (%esp) # imm = 0x20001
385-
; X86-NEXT: pmullw (%esp), %xmm0
386348
; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
387-
; X86-NEXT: movl %ebp, %esp
388-
; X86-NEXT: popl %ebp
349+
; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
389350
; X86-NEXT: retl
390351
;
391352
; X64-LABEL: freeze_mul_vec_undef:
392353
; X64: # %bb.0:
393-
; X64-NEXT: movabsq $1125912791875585, %rax # imm = 0x4000300020001
394-
; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
395-
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
396-
; X64-NEXT: movw $1, -{{[0-9]+}}(%rsp)
397-
; X64-NEXT: movl $196612, -{{[0-9]+}}(%rsp) # imm = 0x30004
398-
; X64-NEXT: vpmullw -{{[0-9]+}}(%rsp), %xmm0, %xmm0
354+
; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
399355
; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
400356
; X64-NEXT: retq
401357
%x = mul <8 x i16> %a0, <i16 1, i16 2, i16 3, i16 4, i16 4, i16 3, i16 undef, i16 1>

0 commit comments

Comments
 (0)