Skip to content

Commit 0a8acd2

Browse files
authored
[DAG] ComputeNumSignBits - ISD::EXTRACT_ELEMENT needs to return at least 1 (#155455)
When going through the ISD::EXTRACT_ELEMENT case, `KnownSign - rIndex * BitWidth` could produce a negative. When a negative is produced, the lower bound of the `std::clamp` is returned. Change that lower bound to one to avoid potential underflows, because the expectation is that `ComputeNumSignBits` should always return at least 1. Fixes #155452.
1 parent 2c920a1 commit 0a8acd2

File tree

2 files changed

+85
-1
lines changed

2 files changed

+85
-1
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5127,7 +5127,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
51275127

51285128
// If the sign portion ends in our element the subtraction gives correct
51295129
// result. Otherwise it gives either negative or > bitwidth result
5130-
return std::clamp(KnownSign - rIndex * BitWidth, 0, BitWidth);
5130+
return std::clamp(KnownSign - rIndex * BitWidth, 1, BitWidth);
51315131
}
51325132
case ISD::INSERT_VECTOR_ELT: {
51335133
if (VT.isScalableVector())
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc %s -march=amdgcn -o - | FileCheck %s
3+
4+
target triple = "amdgcn-amd-amdhsa"
5+
6+
define amdgpu_kernel void @my_kernel(i64 %foo, i32 %bar) {
7+
; CHECK-LABEL: my_kernel:
8+
; CHECK: ; %bb.0: ; %entry
9+
; CHECK-NEXT: s_mov_b32 flat_scratch_lo, s13
10+
; CHECK-NEXT: s_add_i32 s12, s12, s17
11+
; CHECK-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
12+
; CHECK-NEXT: s_load_dword s0, s[8:9], 0x2
13+
; CHECK-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
14+
; CHECK-NEXT: s_mov_b64 s[4:5], 1
15+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
16+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
17+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
18+
; CHECK-NEXT: s_ashr_i32 s6, s0, 31
19+
; CHECK-NEXT: s_abs_i32 s7, s0
20+
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s7
21+
; CHECK-NEXT: s_sub_i32 s0, 0, s7
22+
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
23+
; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
24+
; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
25+
; CHECK-NEXT: v_mul_lo_u32 v3, s0, v2
26+
; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3
27+
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
28+
; CHECK-NEXT: s_and_b64 s[0:1], exec, -1
29+
; CHECK-NEXT: .LBB0_1: ; %loop
30+
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
31+
; CHECK-NEXT: v_mov_b32_e32 v3, s4
32+
; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[8:9], s2, v3, 1
33+
; CHECK-NEXT: s_mul_i32 s4, s3, s4
34+
; CHECK-NEXT: s_mul_i32 s5, s2, s5
35+
; CHECK-NEXT: v_add_i32_e32 v4, vcc, s4, v4
36+
; CHECK-NEXT: v_readfirstlane_b32 s4, v3
37+
; CHECK-NEXT: v_add_i32_e32 v4, vcc, s5, v4
38+
; CHECK-NEXT: s_ashr_i32 s5, s4, 31
39+
; CHECK-NEXT: s_abs_i32 s8, s4
40+
; CHECK-NEXT: s_xor_b32 s5, s5, s6
41+
; CHECK-NEXT: v_mul_hi_u32 v3, s8, v2
42+
; CHECK-NEXT: v_readfirstlane_b32 s9, v3
43+
; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v3
44+
; CHECK-NEXT: s_mul_i32 s9, s9, s7
45+
; CHECK-NEXT: s_sub_i32 s8, s8, s9
46+
; CHECK-NEXT: s_sub_i32 s9, s8, s7
47+
; CHECK-NEXT: s_cmp_ge_u32 s8, s7
48+
; CHECK-NEXT: s_cselect_b64 vcc, -1, 0
49+
; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
50+
; CHECK-NEXT: s_cselect_b32 s8, s9, s8
51+
; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v3
52+
; CHECK-NEXT: s_cmp_ge_u32 s8, s7
53+
; CHECK-NEXT: s_cselect_b64 vcc, -1, 0
54+
; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
55+
; CHECK-NEXT: v_xor_b32_e32 v3, s5, v3
56+
; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s5, v3
57+
; CHECK-NEXT: v_ashrrev_i32_e32 v5, 31, v3
58+
; CHECK-NEXT: v_or_b32_e32 v3, s4, v3
59+
; CHECK-NEXT: v_or_b32_e32 v4, v4, v5
60+
; CHECK-NEXT: flat_load_dwordx2 v[3:4], v[3:4]
61+
; CHECK-NEXT: s_waitcnt vmcnt(0)
62+
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
63+
; CHECK-NEXT: s_mov_b64 s[4:5], 0
64+
; CHECK-NEXT: s_mov_b64 vcc, s[0:1]
65+
; CHECK-NEXT: s_cbranch_vccnz .LBB0_1
66+
; CHECK-NEXT: ; %bb.2: ; %DummyReturnBlock
67+
; CHECK-NEXT: s_endpgm
68+
entry:
69+
br label %loop
70+
71+
loop: ; preds = %entry, %loop
72+
%i = phi i64 [ 1, %entry ], [ 0, %loop ]
73+
%mul = mul i64 %foo, %i
74+
%add = add i64 %mul, 1
75+
%trunc = trunc i64 %add to i32
76+
%div = sdiv i32 %trunc, %bar
77+
%sext = sext i32 %div to i64
78+
%or = or i64 %add, %sext
79+
%inttoptr = inttoptr i64 %or to ptr
80+
%addrspacecast = addrspacecast ptr %inttoptr to ptr addrspace(1)
81+
%val = load double, ptr addrspace(1) %addrspacecast, align 8
82+
store double %val, ptr addrspace(1) null, align 8
83+
br label %loop
84+
}

0 commit comments

Comments
 (0)