Skip to content

[DAGCombiner] Freeze maybe poison operands when folding select to logic #84924

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11501,28 +11501,28 @@ static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL,
if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
return SDValue();

// select Cond, Cond, F --> or Cond, F
// select Cond, 1, F --> or Cond, F
// select Cond, Cond, F --> or Cond, freeze(F)
// select Cond, 1, F --> or Cond, freeze(F)
if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
return matcher.getNode(ISD::OR, DL, VT, Cond, F);
return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(F));

// select Cond, T, Cond --> and Cond, T
// select Cond, T, 0 --> and Cond, T
// select Cond, T, Cond --> and Cond, freeze(T)
// select Cond, T, 0 --> and Cond, freeze(T)
if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
return matcher.getNode(ISD::AND, DL, VT, Cond, T);
return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));

// select Cond, T, 1 --> or (not Cond), T
// select Cond, T, 1 --> or (not Cond), freeze(T)
if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
SDValue NotCond =
matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
return matcher.getNode(ISD::OR, DL, VT, NotCond, T);
return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(T));
}

// select Cond, 0, F --> and (not Cond), F
// select Cond, 0, F --> and (not Cond), freeze(F)
if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
SDValue NotCond =
matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
return matcher.getNode(ISD::AND, DL, VT, NotCond, F);
return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
}

return SDValue();
Expand Down Expand Up @@ -11550,37 +11550,37 @@ static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
else
return SDValue();

// (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
// (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
if (isNullOrNullSplat(N2)) {
SDLoc DL(N);
SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
}

// (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
// (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
if (isAllOnesOrAllOnesSplat(N1)) {
SDLoc DL(N);
SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
}

// If we have to invert the sign bit mask, only do that transform if the
// target has a bitwise 'and not' instruction (the invert is free).
// (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
// (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
SDLoc DL(N);
SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
SDValue Not = DAG.getNOT(DL, Sra, VT);
return DAG.getNode(ISD::AND, DL, VT, Not, N2);
return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
}

// TODO: There's another pattern in this family, but it may require
// implementing hasOrNot() to check for profitability:
// (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
// (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)

return SDValue();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,8 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
; CHECK-NEXT: mov z6.d, z1.d
; CHECK-NEXT: mov z7.d, z0.d
; CHECK-NEXT: add x2, x2, x11
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0
; CHECK-NEXT: and p1.b, p1/z, p1.b, p2.b
; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
Expand Down
28 changes: 9 additions & 19 deletions llvm/test/CodeGen/AArch64/fast-isel-select.ll
Original file line number Diff line number Diff line change
Expand Up @@ -500,31 +500,20 @@ define float @select_icmp_sle(i32 %x, i32 %y, float %a, float %b) {

; Test peephole optimizations for select.
define zeroext i1 @select_opt1(i1 zeroext %c, i1 zeroext %a) {
; CHECK-SDAGISEL-LABEL: select_opt1:
; CHECK-SDAGISEL: ; %bb.0:
; CHECK-SDAGISEL-NEXT: orr w0, w0, w1
; CHECK-SDAGISEL-NEXT: ret
;
; CHECK-FASTISEL-LABEL: select_opt1:
; CHECK-FASTISEL: ; %bb.0:
; CHECK-FASTISEL-NEXT: orr w8, w0, w1
; CHECK-FASTISEL-NEXT: and w0, w8, #0x1
; CHECK-FASTISEL-NEXT: ret
;
; CHECK-GISEL-LABEL: select_opt1:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: orr w8, w0, w1
; CHECK-GISEL-NEXT: and w0, w8, #0x1
; CHECK-GISEL-NEXT: ret
; CHECK-LABEL: select_opt1:
; CHECK: ; %bb.0:
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%1 = select i1 %c, i1 true, i1 %a
ret i1 %1
}

define zeroext i1 @select_opt2(i1 zeroext %c, i1 zeroext %a) {
; CHECK-SDAGISEL-LABEL: select_opt2:
; CHECK-SDAGISEL: ; %bb.0:
; CHECK-SDAGISEL-NEXT: eor w8, w0, #0x1
; CHECK-SDAGISEL-NEXT: orr w0, w8, w1
; CHECK-SDAGISEL-NEXT: orn w8, w1, w0
; CHECK-SDAGISEL-NEXT: and w0, w8, #0x1
; CHECK-SDAGISEL-NEXT: ret
;
; CHECK-FASTISEL-LABEL: select_opt2:
Expand All @@ -547,7 +536,8 @@ define zeroext i1 @select_opt2(i1 zeroext %c, i1 zeroext %a) {
define zeroext i1 @select_opt3(i1 zeroext %c, i1 zeroext %a) {
; CHECK-SDAGISEL-LABEL: select_opt3:
; CHECK-SDAGISEL: ; %bb.0:
; CHECK-SDAGISEL-NEXT: bic w0, w1, w0
; CHECK-SDAGISEL-NEXT: eor w8, w0, #0x1
; CHECK-SDAGISEL-NEXT: and w0, w8, w1
; CHECK-SDAGISEL-NEXT: ret
;
; CHECK-FASTISEL-LABEL: select_opt3:
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
Original file line number Diff line number Diff line change
Expand Up @@ -319,8 +319,9 @@ define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %a) {
define i32 @ctz_and_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: ctz_and_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: cmpne p2.b, p1/z, z0.b, z1.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p2.b
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.b
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ define i64 @scalable_int_min_max(ptr %arg, ptr %arg1, <vscale x 2 x ptr> %i37, <
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z4.s
; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z3.s
; CHECK-NEXT: add z0.d, z2.d, z1.d
; CHECK-NEXT: bic p2.b, p1/z, p1.b, p2.b
; CHECK-NEXT: not p2.b, p0/z, p2.b
; CHECK-NEXT: and p2.b, p1/z, p1.b, p2.b
; CHECK-NEXT: mov z0.d, p2/m, z2.d
; CHECK-NEXT: sel z0.d, p1, z0.d, z2.d
; CHECK-NEXT: uaddv d0, p0, z0.d
Expand Down
64 changes: 24 additions & 40 deletions llvm/test/CodeGen/AMDGPU/div_i128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -482,28 +482,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v9
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15]
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15]
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9]
; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7
Expand All @@ -514,7 +507,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
; GFX9-O0-NEXT: s_mov_b32 s14, s13
; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
Expand Down Expand Up @@ -1048,10 +1040,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
; GFX9-O0-NEXT: s_mov_b32 s5, s6
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
Expand Down Expand Up @@ -2695,28 +2687,21 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v9
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15]
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15]
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9]
; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7
Expand All @@ -2727,7 +2712,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
; GFX9-O0-NEXT: s_mov_b32 s14, s13
; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
Expand Down Expand Up @@ -3261,10 +3245,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
; GFX9-O0-NEXT: s_mov_b32 s5, s6
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
Expand Down
Loading
Loading