Skip to content

Commit b3c55b7

Browse files
authored
[SelectionDAG] Handle more opcodes in canCreateUndefOrPoison (#84921)
[SelectionDAG] Handle more opcodes in canCreateUndefOrPoison Handle SELECT_CC similarly as SETCC. Handle these operations that only propagate poison/undef based on the input operands: SADDSAT, UADDSAT, SSUBSAT, USUBSAT, MULHU, MULHS, SMIN, SMAX, UMIN, UMAX These operations may create poison based on shift amount and exact flag being violated: SRL, SRA One goal here is to allow pushing freeze through these operations when allowed, as well as letting analyses such as isGuaranteedNotToBeUndefOrPoison to not break on such operations. Since some problems have been observed with pushing freeze through SRA/SRL we block that explicitly in DAGCombiner::visitFreeze now. That way we can still model SRA/SRL properly in SelectionDAG::canCreateUndefOrPoison, e.g. when used by isGuaranteedNotToBeUndefOrPoison, even if we do not want to push freeze through those instructions.
1 parent 6cd6bde commit b3c55b7

File tree

7 files changed

+556
-563
lines changed

7 files changed

+556
-563
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -15459,6 +15459,12 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1545915459
if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
1546015460
return N0;
1546115461

15462+
// We currently avoid folding freeze over SRA/SRL, due to the problems seen
15463+
// with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
15464+
// example https://reviews.llvm.org/D136529#4120959.
15465+
if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
15466+
return SDValue();
15467+
1546215468
// Fold freeze(op(x, ...)) -> op(freeze(x), ...).
1546315469
// Try to push freeze through instructions that propagate but don't produce
1546415470
// poison as far as possible. If an operand of freeze follows three

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

+15-1
Original file line numberDiff line numberDiff line change
@@ -5137,6 +5137,16 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
51375137
case ISD::FREEZE:
51385138
case ISD::CONCAT_VECTORS:
51395139
case ISD::INSERT_SUBVECTOR:
5140+
case ISD::SADDSAT:
5141+
case ISD::UADDSAT:
5142+
case ISD::SSUBSAT:
5143+
case ISD::USUBSAT:
5144+
case ISD::MULHU:
5145+
case ISD::MULHS:
5146+
case ISD::SMIN:
5147+
case ISD::SMAX:
5148+
case ISD::UMIN:
5149+
case ISD::UMAX:
51405150
case ISD::AND:
51415151
case ISD::XOR:
51425152
case ISD::ROTL:
@@ -5157,6 +5167,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
51575167
case ISD::BUILD_PAIR:
51585168
return false;
51595169

5170+
case ISD::SELECT_CC:
51605171
case ISD::SETCC: {
51615172
// Integer setcc cannot create undef or poison.
51625173
if (Op.getOperand(0).getValueType().isInteger())
@@ -5166,7 +5177,8 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
51665177
// based on options and flags. The options and flags also cause special
51675178
// nonan condition codes to be used. Those condition codes may be preserved
51685179
// even if the nonan flag is dropped somewhere.
5169-
ISD::CondCode CCCode = cast<CondCodeSDNode>(Op.getOperand(2))->get();
5180+
unsigned CCOp = Opcode == ISD::SETCC ? 2 : 4;
5181+
ISD::CondCode CCCode = cast<CondCodeSDNode>(Op.getOperand(CCOp))->get();
51705182
if (((unsigned)CCCode & 0x10U))
51715183
return true;
51725184

@@ -5183,6 +5195,8 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
51835195
return false;
51845196

51855197
case ISD::SHL:
5198+
case ISD::SRL:
5199+
case ISD::SRA:
51865200
// If the max shift amount isn't in range, then the shift can create poison.
51875201
return !getValidMaximumShiftAmountConstant(Op, DemandedElts);
51885202

llvm/test/CodeGen/AMDGPU/div_i128.ll

+53-43
Original file line numberDiff line numberDiff line change
@@ -282,21 +282,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
282282
; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
283283
; GFX9-O0-NEXT: v_mov_b32_e32 v16, v1
284284
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v15
285-
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v16
285+
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v16
286286
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v13
287-
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v14
287+
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v14
288288
; GFX9-O0-NEXT: v_sub_co_u32_e32 v9, vcc, v9, v4
289-
; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
290-
; GFX9-O0-NEXT: v_subb_co_u32_e32 v13, vcc, v10, v4, vcc
291289
; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v5, v6, vcc
290+
; GFX9-O0-NEXT: v_subb_co_u32_e32 v13, vcc, v10, v4, vcc
291+
; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
292292
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
293293
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
294-
; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
295-
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5
294+
; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
295+
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
296296
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
297297
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
298-
; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
299-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v1
298+
; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
299+
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v1
300300
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
301301
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
302302
; GFX9-O0-NEXT: v_xor_b32_e64 v1, v5, v1
@@ -312,21 +312,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
312312
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
313313
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v1
314314
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7
315-
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v8
316-
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v11
315+
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr7_vgpr8 killed $exec
316+
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11
317317
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v12
318318
; GFX9-O0-NEXT: v_sub_co_u32_e32 v1, vcc, v1, v3
319-
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc
320-
; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v8, v3, vcc
321-
; GFX9-O0-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v5, vcc
319+
; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v8, v5, vcc
320+
; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v7, v3, vcc
321+
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v2, v5, vcc
322322
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
323323
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
324-
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
325-
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v2
324+
; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
325+
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8
326326
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
327327
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
328-
; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
329-
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v7
328+
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
329+
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7
330330
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, v6
331331
; GFX9-O0-NEXT: v_xor_b32_e64 v3, v3, v4
332332
; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
@@ -339,18 +339,26 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
339339
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
340340
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
341341
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
342-
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
342+
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
343+
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12
344+
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
343345
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
344-
; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
345-
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
346+
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
347+
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2
348+
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
349+
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
346350
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
347-
; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
348-
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
351+
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
352+
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13
353+
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14
354+
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
349355
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
350-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
351-
; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
356+
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
357+
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9
358+
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
359+
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
352360
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
353-
; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
361+
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
354362
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v12
355363
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v2
356364
; GFX9-O0-NEXT: v_or_b32_e64 v3, v8, v7
@@ -403,7 +411,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
403411
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
404412
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v5
405413
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
406-
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[11:12], s[6:7]
414+
; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[6:7]
415+
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[11:12], s[12:13]
407416
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[12:13]
408417
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
409418
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8
@@ -439,7 +448,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
439448
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
440449
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5
441450
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
442-
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[13:14], s[6:7]
451+
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
452+
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[13:14], s[8:9]
443453
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[8:9]
444454
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
445455
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
@@ -690,10 +700,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
690700
; GFX9-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
691701
; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
692702
; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
693-
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
694-
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
695-
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
696-
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
703+
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
704+
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
705+
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
706+
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
697707
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
698708
; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
699709
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
@@ -903,14 +913,14 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
903913
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
904914
; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
905915
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
906-
; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
907-
; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
908-
; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
909-
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
910-
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
911-
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
912-
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
913-
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
916+
; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
917+
; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
918+
; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
919+
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
920+
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
921+
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
922+
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
923+
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
914924
; GFX9-O0-NEXT: s_waitcnt vmcnt(9)
915925
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
916926
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
@@ -1028,10 +1038,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
10281038
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
10291039
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
10301040
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
1031-
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1032-
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1033-
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
1034-
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1041+
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
1042+
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1043+
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1044+
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
10351045
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
10361046
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
10371047
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload

0 commit comments

Comments
 (0)