Skip to content

Commit deefe3f

Browse files
author
Thorsten Schütt
authored
[GlobalIsel] Post-review combine ADDO (#85961)
#82927
1 parent 07a5e31 commit deefe3f

File tree

5 files changed

+98
-46
lines changed

5 files changed

+98
-46
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+1-9
Original file line numberDiff line numberDiff line change
@@ -6945,10 +6945,6 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
69456945
LLT DstTy = MRI.getType(Dst);
69466946
LLT CarryTy = MRI.getType(Carry);
69476947

6948-
// We want do fold the [u|s]addo.
6949-
if (!MRI.hasOneNonDBGUse(Dst))
6950-
return false;
6951-
69526948
// Fold addo, if the carry is dead -> add, undef.
69536949
if (MRI.use_nodbg_empty(Carry) &&
69546950
isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
@@ -6959,10 +6955,6 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
69596955
return true;
69606956
}
69616957

6962-
// We want do fold the [u|s]addo.
6963-
if (!MRI.hasOneNonDBGUse(Carry))
6964-
return false;
6965-
69666958
// Canonicalize constant to RHS.
69676959
if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
69686960
if (IsSigned) {
@@ -6994,7 +6986,7 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
69946986
return true;
69956987
}
69966988

6997-
// Fold (addo x, 0) -> x, no borrow
6989+
// Fold (addo x, 0) -> x, no carry
69986990
if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
69996991
MatchInfo = [=](MachineIRBuilder &B) {
70006992
B.buildCopy(Dst, LHS);

llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir

+84
Original file line numberDiff line numberDiff line change
@@ -92,3 +92,87 @@ body: |
9292
$w1 = COPY %o_wide
9393
RET_ReallyLR implicit $w0
9494
...
95+
---
96+
name: add_multiuse
97+
body: |
98+
bb.0:
99+
liveins: $w0, $w1
100+
; CHECK-LABEL: name: add_multiuse
101+
; CHECK: liveins: $w0, $w1
102+
; CHECK-NEXT: {{ $}}
103+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
104+
; CHECK-NEXT: %const:_(s32) = G_CONSTANT i32 0
105+
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
106+
; CHECK-NEXT: $w1 = COPY [[COPY]](s32)
107+
; CHECK-NEXT: $w2 = COPY %const(s32)
108+
; CHECK-NEXT: RET_ReallyLR implicit $w0
109+
%0:_(s32) = COPY $w0
110+
%const:_(s32) = G_CONSTANT i32 0
111+
%add:_(s32), %o:_(s1) = G_SADDO %0, %const
112+
%o_wide:_(s32) = G_ZEXT %o(s1)
113+
$w0 = COPY %add(s32)
114+
$w1 = COPY %add(s32)
115+
$w2 = COPY %o_wide
116+
RET_ReallyLR implicit $w0
117+
...
118+
---
119+
name: add_vector
120+
body: |
121+
bb.0:
122+
liveins: $w0, $w1
123+
; CHECK-LABEL: name: add_vector
124+
; CHECK: liveins: $w0, $w1
125+
; CHECK-NEXT: {{ $}}
126+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
127+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
128+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
129+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
130+
; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32), [[COPY1]](s32)
131+
; CHECK-NEXT: %bv1:_(<4 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32), [[COPY2]](s32), [[COPY3]](s32)
132+
; CHECK-NEXT: %add:_(<4 x s32>), %o:_(<4 x s1>) = G_UADDO %bv0, %bv1
133+
; CHECK-NEXT: %o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
134+
; CHECK-NEXT: $q0 = COPY %add(<4 x s32>)
135+
; CHECK-NEXT: $q1 = COPY %o_wide(<4 x s32>)
136+
; CHECK-NEXT: RET_ReallyLR implicit $w0
137+
%0:_(s32) = COPY $w0
138+
%1:_(s32) = COPY $w1
139+
%2:_(s32) = COPY $w2
140+
%3:_(s32) = COPY $w3
141+
%bv0:_(<4 x s32>) = G_BUILD_VECTOR %0:_(s32), %1:_(s32), %0:_(s32), %1:_(s32)
142+
%bv1:_(<4 x s32>) = G_BUILD_VECTOR %2:_(s32), %3:_(s32), %2:_(s32), %3:_(s32)
143+
%add:_(<4 x s32>), %o:_(<4 x s1>) = G_UADDO %bv0, %bv1
144+
%o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
145+
$q0 = COPY %add(<4 x s32>)
146+
$q1 = COPY %o_wide
147+
RET_ReallyLR implicit $w0
148+
...
149+
---
150+
name: add_splat_vector
151+
body: |
152+
bb.0:
153+
liveins: $w0, $w1
154+
; CHECK-LABEL: name: add_splat_vector
155+
; CHECK: liveins: $w0, $w1
156+
; CHECK-NEXT: {{ $}}
157+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
158+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
159+
; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32), [[COPY1]](s32)
160+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
161+
; CHECK-NEXT: %o:_(<4 x s1>) = G_BUILD_VECTOR [[C]](s1), [[C]](s1), [[C]](s1), [[C]](s1)
162+
; CHECK-NEXT: %o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
163+
; CHECK-NEXT: $q0 = COPY %bv0(<4 x s32>)
164+
; CHECK-NEXT: $q1 = COPY %o_wide(<4 x s32>)
165+
; CHECK-NEXT: RET_ReallyLR implicit $w0
166+
%0:_(s32) = COPY $w0
167+
%1:_(s32) = COPY $w1
168+
%2:_(s32) = COPY $w2
169+
%3:_(s32) = COPY $w3
170+
%const:_(s32) = G_CONSTANT i32 0
171+
%bv0:_(<4 x s32>) = G_BUILD_VECTOR %0:_(s32), %1:_(s32), %0:_(s32), %1:_(s32)
172+
%bv1:_(<4 x s32>) = G_BUILD_VECTOR %const:_(s32), %const:_(s32), %const:_(s32), %const:_(s32)
173+
%add:_(<4 x s32>), %o:_(<4 x s1>) = G_SADDO %bv0, %bv1
174+
%o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
175+
$q0 = COPY %add(<4 x s32>)
176+
$q1 = COPY %o_wide
177+
RET_ReallyLR implicit $w0
178+
...

llvm/test/CodeGen/AArch64/arm64-xaluo.ll

+1-2
Original file line numberDiff line numberDiff line change
@@ -2643,8 +2643,7 @@ define i8 @pr60530() {
26432643
;
26442644
; GISEL-LABEL: pr60530:
26452645
; GISEL: // %bb.0:
2646-
; GISEL-NEXT: mov w8, #1 // =0x1
2647-
; GISEL-NEXT: sbfx w0, w8, #0, #1
2646+
; GISEL-NEXT: mov w0, #255 // =0xff
26482647
; GISEL-NEXT: ret
26492648
%1 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 0, i8 1)
26502649
%2 = extractvalue { i8, i1 } %1, 1

llvm/test/CodeGen/AArch64/overflow.ll

+8-31
Original file line numberDiff line numberDiff line change
@@ -64,21 +64,10 @@ entry:
6464
}
6565

6666
define i32 @saddo.select.i64(i32 %v1, i32 %v2, i1 %v3, i64 %v4, i64 %v5) {
67-
; SDAG-LABEL: saddo.select.i64:
68-
; SDAG: // %bb.0: // %entry
69-
; SDAG-NEXT: mov w0, w1
70-
; SDAG-NEXT: ret
71-
;
72-
; GISEL-LABEL: saddo.select.i64:
73-
; GISEL: // %bb.0: // %entry
74-
; GISEL-NEXT: mov w8, #13 // =0xd
75-
; GISEL-NEXT: and x9, x3, #0xc
76-
; GISEL-NEXT: and x8, x4, x8
77-
; GISEL-NEXT: cmn x9, x8
78-
; GISEL-NEXT: cset w8, vs
79-
; GISEL-NEXT: tst w8, #0x1
80-
; GISEL-NEXT: csel w0, w0, w1, ne
81-
; GISEL-NEXT: ret
67+
; CHECK-LABEL: saddo.select.i64:
68+
; CHECK: // %bb.0: // %entry
69+
; CHECK-NEXT: mov w0, w1
70+
; CHECK-NEXT: ret
8271
entry:
8372
%lhs = and i64 %v4, 12
8473
%rhs = and i64 %v5, 13
@@ -89,22 +78,10 @@ entry:
8978
}
9079

9180
define i32 @uaddo.select.i64(i32 %v1, i32 %v2, i1 %v3, i64 %v4, i64 %v5) {
92-
; SDAG-LABEL: uaddo.select.i64:
93-
; SDAG: // %bb.0: // %entry
94-
; SDAG-NEXT: mov w0, w1
95-
; SDAG-NEXT: ret
96-
;
97-
; GISEL-LABEL: uaddo.select.i64:
98-
; GISEL: // %bb.0: // %entry
99-
; GISEL-NEXT: mov w8, #9 // =0x9
100-
; GISEL-NEXT: mov w9, #10 // =0xa
101-
; GISEL-NEXT: and x8, x3, x8
102-
; GISEL-NEXT: and x9, x4, x9
103-
; GISEL-NEXT: cmn x8, x9
104-
; GISEL-NEXT: cset w8, hs
105-
; GISEL-NEXT: tst w8, #0x1
106-
; GISEL-NEXT: csel w0, w0, w1, ne
107-
; GISEL-NEXT: ret
81+
; CHECK-LABEL: uaddo.select.i64:
82+
; CHECK: // %bb.0: // %entry
83+
; CHECK-NEXT: mov w0, w1
84+
; CHECK-NEXT: ret
10885
entry:
10986
%lhs = and i64 %v4, 9
11087
%rhs = and i64 %v5, 10

llvm/test/CodeGen/AMDGPU/fptoi.i128.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ define i128 @fptosi_f64_to_i128(double %x) {
238238
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
239239
; GISEL-NEXT: s_cbranch_execz .LBB0_4
240240
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
241-
; GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 0xfffffbcd, v6
241+
; GISEL-NEXT: v_add_u32_e32 v6, 0xfffffbcd, v6
242242
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
243243
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
244244
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
@@ -612,7 +612,7 @@ define i128 @fptoui_f64_to_i128(double %x) {
612612
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
613613
; GISEL-NEXT: s_cbranch_execz .LBB1_4
614614
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
615-
; GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 0xfffffbcd, v6
615+
; GISEL-NEXT: v_add_u32_e32 v6, 0xfffffbcd, v6
616616
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
617617
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
618618
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
@@ -978,7 +978,7 @@ define i128 @fptosi_f32_to_i128(float %x) {
978978
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
979979
; GISEL-NEXT: s_cbranch_execz .LBB2_4
980980
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
981-
; GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 0xffffff6a, v6
981+
; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff6a, v6
982982
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
983983
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
984984
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
@@ -1338,7 +1338,7 @@ define i128 @fptoui_f32_to_i128(float %x) {
13381338
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
13391339
; GISEL-NEXT: s_cbranch_execz .LBB3_4
13401340
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
1341-
; GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 0xffffff6a, v6
1341+
; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff6a, v6
13421342
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
13431343
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
13441344
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc

0 commit comments

Comments
 (0)