-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[SelectionDAG] Treat CopyFromReg as freezing the value #85932
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-loongarch @llvm/pr-subscribers-llvm-selectiondag Author: Björn Pettersson (bjope) ChangesThe description of CopyFromReg in ISDOpcodes.h says that the input Being able to fold freeze(CopyFromReg) into CopyFromReg seems Things not dealt with in this patch:
Patch is 160.17 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/85932.diff 39 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 49d51a27e3c0f6..800967cbfd2e7b 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -205,6 +205,7 @@ enum NodeType {
/// CopyFromReg - This node indicates that the input value is a virtual or
/// physical register that is defined outside of the scope of this
/// SelectionDAG. The register is available from the RegisterSDNode object.
+ /// Not that CopyFromReg is considered as also freezing the value.
CopyFromReg,
/// UNDEF - An undefined node.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9d73a42df2a479..1996b998852e29 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5012,6 +5012,10 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
case ISD::TargetFrameIndex:
return true;
+ case ISD::CopyFromReg:
+ // Treat CopyFromReg as freezing the value.
+ return true;
+
case ISD::UNDEF:
return PoisonOnly;
diff --git a/llvm/test/CodeGen/AArch64/combine-mul.ll b/llvm/test/CodeGen/AArch64/combine-mul.ll
index a2b0425308093d..c49e5ae6620a9e 100644
--- a/llvm/test/CodeGen/AArch64/combine-mul.ll
+++ b/llvm/test/CodeGen/AArch64/combine-mul.ll
@@ -44,8 +44,7 @@ define <4 x i1> @PR48683_vec_undef(<4 x i32> %x) {
define i64 @combine_mul_self_demandedbits(i64 %x) {
; CHECK-LABEL: combine_mul_self_demandedbits:
; CHECK: // %bb.0:
-; CHECK-NEXT: mul x8, x0, x0
-; CHECK-NEXT: and x0, x8, #0xfffffffffffffffd
+; CHECK-NEXT: mul x0, x0, x0
; CHECK-NEXT: ret
%1 = mul i64 %x, %x
%2 = and i64 %1, -3
@@ -77,7 +76,7 @@ define i8 @one_demanded_bit(i8 %x) {
define <2 x i64> @one_demanded_bit_splat(<2 x i64> %x) {
; CHECK-LABEL: one_demanded_bit_splat:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #32
+; CHECK-NEXT: mov w8, #32 // =0x20
; CHECK-NEXT: shl v0.2d, v0.2d, #5
; CHECK-NEXT: dup v1.2d, x8
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@@ -131,7 +130,7 @@ define i32 @squared_demanded_2_low_bits(i32 %x) {
define <2 x i64> @squared_demanded_2_low_bits_splat(<2 x i64> %x) {
; CHECK-LABEL: squared_demanded_2_low_bits_splat:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #-2
+; CHECK-NEXT: mov x8, #-2 // =0xfffffffffffffffe
; CHECK-NEXT: dup v1.2d, x8
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll
index d2ee80e6aa9513..f032756e007b68 100644
--- a/llvm/test/CodeGen/RISCV/alu64.ll
+++ b/llvm/test/CodeGen/RISCV/alu64.ll
@@ -57,8 +57,8 @@ define i64 @sltiu(i64 %a) nounwind {
;
; RV32I-LABEL: sltiu:
; RV32I: # %bb.0:
-; RV32I-NEXT: seqz a1, a1
; RV32I-NEXT: sltiu a0, a0, 3
+; RV32I-NEXT: seqz a1, a1
; RV32I-NEXT: and a0, a1, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
index 5914e45a153302..aa962d68fc5285 100644
--- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
@@ -372,10 +372,10 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; RV32IA-NEXT: # =>This Loop Header: Depth=1
; RV32IA-NEXT: # Child Loop BB2_3 Depth 2
; RV32IA-NEXT: mv a3, a2
-; RV32IA-NEXT: addi a4, a2, 1
-; RV32IA-NEXT: sltu a2, a2, a1
-; RV32IA-NEXT: neg a2, a2
-; RV32IA-NEXT: and a4, a2, a4
+; RV32IA-NEXT: addi a2, a2, 1
+; RV32IA-NEXT: sltu a4, a3, a1
+; RV32IA-NEXT: neg a4, a4
+; RV32IA-NEXT: and a4, a4, a2
; RV32IA-NEXT: .LBB2_3: # %atomicrmw.start
; RV32IA-NEXT: # Parent Loop BB2_1 Depth=1
; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
@@ -607,10 +607,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; RV64IA-NEXT: # =>This Loop Header: Depth=1
; RV64IA-NEXT: # Child Loop BB3_3 Depth 2
; RV64IA-NEXT: mv a3, a2
-; RV64IA-NEXT: addi a4, a2, 1
-; RV64IA-NEXT: sltu a2, a2, a1
-; RV64IA-NEXT: neg a2, a2
-; RV64IA-NEXT: and a4, a2, a4
+; RV64IA-NEXT: addi a2, a2, 1
+; RV64IA-NEXT: sltu a4, a3, a1
+; RV64IA-NEXT: neg a4, a4
+; RV64IA-NEXT: and a4, a4, a2
; RV64IA-NEXT: .LBB3_3: # %atomicrmw.start
; RV64IA-NEXT: # Parent Loop BB3_1 Depth=1
; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll
index 3700a18bafc612..932682a89e28fb 100644
--- a/llvm/test/CodeGen/RISCV/double-convert.ll
+++ b/llvm/test/CodeGen/RISCV/double-convert.ll
@@ -771,9 +771,8 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
; RV32IFD-NEXT: feq.d a2, fs0, fs0
; RV32IFD-NEXT: neg a2, a2
; RV32IFD-NEXT: lui a4, 524288
-; RV32IFD-NEXT: li a5, 1
; RV32IFD-NEXT: lui a3, 524288
-; RV32IFD-NEXT: bne s2, a5, .LBB12_2
+; RV32IFD-NEXT: beqz s2, .LBB12_2
; RV32IFD-NEXT: # %bb.1: # %start
; RV32IFD-NEXT: mv a3, a1
; RV32IFD-NEXT: .LBB12_2: # %start
@@ -807,31 +806,29 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw a0, 0(sp)
-; RV32IZFINXZDINX-NEXT: sw a1, 4(sp)
-; RV32IZFINXZDINX-NEXT: lw s0, 0(sp)
-; RV32IZFINXZDINX-NEXT: lw s1, 4(sp)
+; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
+; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
+; RV32IZFINXZDINX-NEXT: lw s0, 8(sp)
+; RV32IZFINXZDINX-NEXT: lw s1, 12(sp)
+; RV32IZFINXZDINX-NEXT: call __fixdfdi
; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI12_0)
; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI12_0+4)(a2)
; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI12_0)(a2)
-; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0
-; RV32IZFINXZDINX-NEXT: neg s3, s2
-; RV32IZFINXZDINX-NEXT: call __fixdfdi
-; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI12_1)
-; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI12_1+4)(a2)
-; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI12_1)(a2)
-; RV32IZFINXZDINX-NEXT: and a0, s3, a0
-; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0
+; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI12_1)
+; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI12_1+4)(a4)
+; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI12_1)(a4)
+; RV32IZFINXZDINX-NEXT: fle.d a6, a2, s0
+; RV32IZFINXZDINX-NEXT: neg a2, a6
+; RV32IZFINXZDINX-NEXT: and a0, a2, a0
+; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0
; RV32IZFINXZDINX-NEXT: neg a2, a3
; RV32IZFINXZDINX-NEXT: or a0, a2, a0
; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0
; RV32IZFINXZDINX-NEXT: neg a2, a2
; RV32IZFINXZDINX-NEXT: lui a5, 524288
-; RV32IZFINXZDINX-NEXT: li a6, 1
+; RV32IZFINXZDINX-NEXT: li a7, 1
; RV32IZFINXZDINX-NEXT: lui a4, 524288
-; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB12_2
+; RV32IZFINXZDINX-NEXT: bne a6, a7, .LBB12_2
; RV32IZFINXZDINX-NEXT: # %bb.1: # %start
; RV32IZFINXZDINX-NEXT: mv a4, a1
; RV32IZFINXZDINX-NEXT: .LBB12_2: # %start
@@ -844,8 +841,6 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
; RV32IZFINXZDINX-NEXT: ret
;
@@ -868,33 +863,32 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a1
; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lui a3, 278016
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: li a2, -1
+; RV32I-NEXT: call __gtdf2
+; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: lui a3, 802304
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: li a2, 0
; RV32I-NEXT: call __gedf2
-; RV32I-NEXT: mv s2, a0
+; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: call __fixdfdi
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv s4, a1
-; RV32I-NEXT: lui s6, 524288
-; RV32I-NEXT: bgez s2, .LBB12_2
+; RV32I-NEXT: mv s4, a0
+; RV32I-NEXT: mv s5, a1
+; RV32I-NEXT: lui a0, 524288
+; RV32I-NEXT: bgez s3, .LBB12_2
; RV32I-NEXT: # %bb.1: # %start
-; RV32I-NEXT: lui s4, 524288
+; RV32I-NEXT: lui s5, 524288
; RV32I-NEXT: .LBB12_2: # %start
-; RV32I-NEXT: lui a3, 278016
-; RV32I-NEXT: addi a3, a3, -1
-; RV32I-NEXT: li a2, -1
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: call __gtdf2
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: blez a0, .LBB12_4
+; RV32I-NEXT: blez s2, .LBB12_4
; RV32I-NEXT: # %bb.3: # %start
-; RV32I-NEXT: addi s4, s6, -1
+; RV32I-NEXT: addi s5, a0, -1
; RV32I-NEXT: .LBB12_4: # %start
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
@@ -903,11 +897,11 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
; RV32I-NEXT: call __unorddf2
; RV32I-NEXT: snez a0, a0
; RV32I-NEXT: addi a0, a0, -1
-; RV32I-NEXT: and a1, a0, s4
-; RV32I-NEXT: slti a2, s2, 0
+; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: slti a2, s3, 0
; RV32I-NEXT: addi a2, a2, -1
-; RV32I-NEXT: and a2, a2, s3
-; RV32I-NEXT: sgtz a3, s5
+; RV32I-NEXT: and a2, a2, s4
+; RV32I-NEXT: sgtz a3, s2
; RV32I-NEXT: neg a3, a3
; RV32I-NEXT: or a2, a3, a2
; RV32I-NEXT: and a0, a0, a2
@@ -918,7 +912,6 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
@@ -1027,22 +1020,23 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind {
; RV32IFD-NEXT: addi sp, sp, -16
; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: lui a0, %hi(.LCPI14_0)
-; RV32IFD-NEXT: fld fa5, %lo(.LCPI14_0)(a0)
-; RV32IFD-NEXT: flt.d a0, fa5, fa0
-; RV32IFD-NEXT: neg s0, a0
+; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT: fmv.d fs0, fa0
; RV32IFD-NEXT: fcvt.d.w fa5, zero
; RV32IFD-NEXT: fle.d a0, fa5, fa0
-; RV32IFD-NEXT: neg s1, a0
+; RV32IFD-NEXT: neg s0, a0
; RV32IFD-NEXT: call __fixunsdfdi
-; RV32IFD-NEXT: and a0, s1, a0
-; RV32IFD-NEXT: or a0, s0, a0
-; RV32IFD-NEXT: and a1, s1, a1
-; RV32IFD-NEXT: or a1, s0, a1
+; RV32IFD-NEXT: lui a2, %hi(.LCPI14_0)
+; RV32IFD-NEXT: fld fa5, %lo(.LCPI14_0)(a2)
+; RV32IFD-NEXT: and a0, s0, a0
+; RV32IFD-NEXT: flt.d a2, fa5, fs0
+; RV32IFD-NEXT: neg a2, a2
+; RV32IFD-NEXT: or a0, a2, a0
+; RV32IFD-NEXT: and a1, s0, a1
+; RV32IFD-NEXT: or a1, a2, a1
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 16
; RV32IFD-NEXT: ret
;
@@ -1061,28 +1055,26 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind {
; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
; RV32IZFINXZDINX-NEXT: lw s0, 8(sp)
; RV32IZFINXZDINX-NEXT: lw s1, 12(sp)
+; RV32IZFINXZDINX-NEXT: call __fixunsdfdi
; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero
+; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI14_0)
+; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI14_0+4)(a4)
+; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI14_0)(a4)
; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0
-; RV32IZFINXZDINX-NEXT: neg s2, a2
-; RV32IZFINXZDINX-NEXT: call __fixunsdfdi
-; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI14_0)
-; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI14_0+4)(a2)
-; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI14_0)(a2)
-; RV32IZFINXZDINX-NEXT: and a0, s2, a0
-; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0
; RV32IZFINXZDINX-NEXT: neg a2, a2
-; RV32IZFINXZDINX-NEXT: or a0, a2, a0
-; RV32IZFINXZDINX-NEXT: and a1, s2, a1
-; RV32IZFINXZDINX-NEXT: or a1, a2, a1
+; RV32IZFINXZDINX-NEXT: and a0, a2, a0
+; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0
+; RV32IZFINXZDINX-NEXT: neg a3, a3
+; RV32IZFINXZDINX-NEXT: or a0, a3, a0
+; RV32IZFINXZDINX-NEXT: and a1, a2, a1
+; RV32IZFINXZDINX-NEXT: or a1, a3, a1
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
; RV32IZFINXZDINX-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
index 7cdf18e2fea9c0..a3a35ba1ccc8d9 100644
--- a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
+++ b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
@@ -73,9 +73,8 @@ define i64 @test_floor_si64(double %x) nounwind {
; RV32IFD-NEXT: feq.d a2, fs0, fs0
; RV32IFD-NEXT: neg a2, a2
; RV32IFD-NEXT: lui a4, 524288
-; RV32IFD-NEXT: li a5, 1
; RV32IFD-NEXT: lui a3, 524288
-; RV32IFD-NEXT: bne s2, a5, .LBB1_2
+; RV32IFD-NEXT: beqz s2, .LBB1_2
; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: mv a3, a1
; RV32IFD-NEXT: .LBB1_2:
@@ -353,9 +352,8 @@ define i64 @test_ceil_si64(double %x) nounwind {
; RV32IFD-NEXT: feq.d a2, fs0, fs0
; RV32IFD-NEXT: neg a2, a2
; RV32IFD-NEXT: lui a4, 524288
-; RV32IFD-NEXT: li a5, 1
; RV32IFD-NEXT: lui a3, 524288
-; RV32IFD-NEXT: bne s2, a5, .LBB5_2
+; RV32IFD-NEXT: beqz s2, .LBB5_2
; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: mv a3, a1
; RV32IFD-NEXT: .LBB5_2:
@@ -633,9 +631,8 @@ define i64 @test_trunc_si64(double %x) nounwind {
; RV32IFD-NEXT: feq.d a2, fs0, fs0
; RV32IFD-NEXT: neg a2, a2
; RV32IFD-NEXT: lui a4, 524288
-; RV32IFD-NEXT: li a5, 1
; RV32IFD-NEXT: lui a3, 524288
-; RV32IFD-NEXT: bne s2, a5, .LBB9_2
+; RV32IFD-NEXT: beqz s2, .LBB9_2
; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: mv a3, a1
; RV32IFD-NEXT: .LBB9_2:
@@ -913,9 +910,8 @@ define i64 @test_round_si64(double %x) nounwind {
; RV32IFD-NEXT: feq.d a2, fs0, fs0
; RV32IFD-NEXT: neg a2, a2
; RV32IFD-NEXT: lui a4, 524288
-; RV32IFD-NEXT: li a5, 1
; RV32IFD-NEXT: lui a3, 524288
-; RV32IFD-NEXT: bne s2, a5, .LBB13_2
+; RV32IFD-NEXT: beqz s2, .LBB13_2
; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: mv a3, a1
; RV32IFD-NEXT: .LBB13_2:
@@ -1193,9 +1189,8 @@ define i64 @test_roundeven_si64(double %x) nounwind {
; RV32IFD-NEXT: feq.d a2, fs0, fs0
; RV32IFD-NEXT: neg a2, a2
; RV32IFD-NEXT: lui a4, 524288
-; RV32IFD-NEXT: li a5, 1
; RV32IFD-NEXT: lui a3, 524288
-; RV32IFD-NEXT: bne s2, a5, .LBB17_2
+; RV32IFD-NEXT: beqz s2, .LBB17_2
; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: mv a3, a1
; RV32IFD-NEXT: .LBB17_2:
@@ -1473,9 +1468,8 @@ define i64 @test_rint_si64(double %x) nounwind {
; RV32IFD-NEXT: feq.d a2, fs0, fs0
; RV32IFD-NEXT: neg a2, a2
; RV32IFD-NEXT: lui a4, 524288
-; RV32IFD-NEXT: li a5, 1
; RV32IFD-NEXT: lui a3, 524288
-; RV32IFD-NEXT: bne s2, a5, .LBB21_2
+; RV32IFD-NEXT: beqz s2, .LBB21_2
; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: mv a3, a1
; RV32IFD-NEXT: .LBB21_2:
diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll
index 9fb78d4c4d5210..ee54b45afa4357 100644
--- a/llvm/test/CodeGen/RISCV/float-convert.ll
+++ b/llvm/test/CodeGen/RISCV/float-convert.ll
@@ -275,26 +275,24 @@ define i32 @fcvt_wu_s_sat(float %a) nounwind {
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lui a1, 325632
-; RV32I-NEXT: addi a1, a1, -1
-; RV32I-NEXT: call __gtsf2
-; RV32I-NEXT: sgtz a0, a0
-; RV32I-NEXT: neg s1, a0
-; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: call __gesf2
; RV32I-NEXT: slti a0, a0, 0
-; RV32I-NEXT: addi s2, a0, -1
+; RV32I-NEXT: addi s1, a0, -1
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __fixunssfsi
-; RV32I-NEXT: and a0, s2, a0
-; RV32I-NEXT: or a0, s1, a0
+; RV32I-NEXT: and s1, s1, a0
+; RV32I-NEXT: lui a1, 325632
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: call __gtsf2
+; RV32I-NEXT: sgtz a0, a0
+; RV32I-NEXT: neg a0, a0
+; RV32I-NEXT: or a0, a0, s1
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
@@ -869,22 +867,23 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind {
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: lui a0, %hi(.LCPI14_0)
-; RV32IF-NEXT: flw fa5, %lo(.LCPI14_0)(a0)
-; RV32IF-NEXT: flt.s a0, fa5, fa0
-; RV32IF-NEXT: neg s0, a0
+; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: fmv.w.x fa5, zero
; RV32IF-NEXT: fle.s a0, fa5, fa0
-; RV32IF-NEXT: neg s1, a0
+; RV32IF-NEXT: neg s0, a0
; RV32IF-NEXT: call __fixunssfdi
-; RV32IF-NEXT: and a0, s1, a0
-; RV32IF-NEXT: or a0, s0, a0
-; RV32IF-NEXT: and a1, s1, a1
-; RV32IF-NEXT: or a1, s0, a1
+; RV32IF-NEXT: lui a2, %hi(.LCPI14_0)
+; RV32IF-NEXT: flw fa5, %lo(.LCPI14_0)(a2)
+; RV32IF-NEXT: and a0, s0, a0
+; RV32IF-NEXT: flt.s a2, fa5, fs0
+; RV32IF-NEXT: neg a2, a2
+; RV32IF-NEXT: or a0, a2, a0
+; RV32IF-NEXT: and a1, s0, a1
+; RV32IF-NEXT: or a1, a2, a1
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
;
@@ -903,17 +902,19 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind {
; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZFINX-NEXT: lui a1, %hi(.LCPI14_0)
-; RV32IZFINX-NEXT: lw a1, %lo(.LCPI14_0)(a1)
-; RV32IZFINX-NEXT: flt.s a1, a1, a0
-; RV32IZFINX-NEXT: neg s0, a1
-; RV32IZFINX-NEXT: fle.s a1, zero, a0
-; RV32IZFINX-NEXT: neg s1, a1
+; RV32IZFINX-NEXT: mv s0, a0
+; RV32IZFINX-NEXT: fle.s a0, zero, a0
+; RV32IZFINX-NEXT: neg s1, a0
+; RV32IZFINX-NEXT: mv a0, s0
; RV32IZFINX-NEXT: call __fixunssfdi
+; RV32IZFINX-NEXT: lui a2, %hi(.LCPI14_0)
+; RV32IZFINX-NEXT: lw a2, %lo(.LCPI14_0)(a2)
; RV32IZFINX-NEXT: and a0, s1, a0
-; RV32IZFINX-NEXT: or a0, s0, a0
+; RV32IZFINX-NEXT: flt.s a2, a2, s0
+; RV32IZFINX-NEXT: neg a2, a2
+; RV32IZFINX-NEXT: or a0, a2, a0
; RV32IZFINX-NEXT: and a1, s1, a1
-; RV32IZFINX-NEXT: or a1, s0, a1
+; RV32IZFINX-NEXT: or a1, a2, a1
; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-...
[truncated]
|
@@ -205,6 +205,7 @@ enum NodeType { | |||
/// CopyFromReg - This node indicates that the input value is a virtual or | |||
/// physical register that is defined outside of the scope of this | |||
/// SelectionDAG. The register is available from the RegisterSDNode object. | |||
/// Not that CopyFromReg is considered as also freezing the value. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, thanks! It should say "Note ...".
@@ -5012,6 +5012,10 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, | |||
case ISD::TargetFrameIndex: | |||
return true; | |||
|
|||
case ISD::CopyFromReg: | |||
// Treat CopyFromReg as freezing the value. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just add to the case above handling FrameIndex and co? Don't need the special comment?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe this is correct.
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 | ||
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 | ||
; X86-NEXT: movl %ebp, %esp | ||
; X86-NEXT: popl %ebp |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Regression?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems like it can be avoided by a patch like this (and suddenly there are floats in the constant pool, but the asm instructions no longer diff):
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 35f756ea5e1d..d7539b2d942d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8771,7 +8771,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// and blend the FREEZE-UNDEF operands back in.
// FIXME: is this worthwhile even for a single FREEZE-UNDEF operand?
if (unsigned NumFrozenUndefElts = FrozenUndefMask.popcount();
- NumFrozenUndefElts >= 2 && NumFrozenUndefElts < NumElems) {
+ NumFrozenUndefElts >= 1 && NumFrozenUndefElts < NumElems) {
SmallVector<int, 16> BlendMask(NumElems, -1);
SmallVector<SDValue, 16> Elts(NumElems, DAG.getUNDEF(OpEltVT));
for (unsigned i = 0; i < NumElems; ++i) {
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the more general problem here is that we now fold freeze over BUILD_VECTOR more often.
Right here, with this patch, we get
t2: v4i32,ch = CopyFromReg t0, Register:v4i32 %0
t18: i32 = freeze undef:i32
t19: v4i32 = BUILD_VECTOR Constant:i32<1>, Constant:i32<2>, Constant:i32<3>, t18
t8: v4i32 = add t2, t19
instead of
t2: v4i32,ch = CopyFromReg t0, Register:v4i32 %0
t7: v4i32 = BUILD_VECTOR Constant:i32<1>, Constant:i32<2>, Constant:i32<3>, undef:i32
t8: v4i32 = add t2, t7
t9: v4i32 = freeze t8
And then I guess there might be several places where we do handle undef arguments to a BUILD_VECTOR, but we do not deal with "isFreezeUndef" operands just as good. For exampe isBuildVectorOfConstantSDNodes would fail when there is a frozen undef operand, while it would return true if some operands are just undef.
Seems really complicated (and lots of work) to fix all regressions that would pop up if merging #84924 , but considering that different users have suffered from miscompiles I think we might need to accept some regressions ("slow but correct" is often better that "fast but wrong" IMHO).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the right place to fix this would be in generic BUILD_VECTOR lowering to constant pool. In that case, we'd want to lower a BUILD_VECTOR that is constant apart from one-use freeze undef to a constant pool load where the freeze undef has been replaced by zero.
A more aggressive alternative would be to just always replace freeze undef with zero. This is essentially what InstCombine does in the middle-end (well, it's a tad smarter and sometimes uses -1 based on usage). But I suspect that just always doing this may not work as well on the SDAG layer.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The discussion here https://discourse.llvm.org/t/what-are-we-allowed-to-do-with-freeze-undef-in-selectiondag/78175/11 was started when I looked at this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is one hack to get rid of those regressions: e2419b6
Maybe that should go into a separate pull-request, if it isn't considered as a too hacky solution.
If we say that violating a zeroext/signext attribute is undefined behavior rather than producing a poison value, then yes -- with the caveat that we can't just match the AssertZExt + CopyFromReg pattern. The AssertZExt could have come from somewhere other than the ABI. |
We had that earlier attempt that classified AssertZext/AssertSext as not producing poison. It was reverted due to AssertSext being used in some fptosi legalization (see #66603). So yes, we need to consider that as well somehow. |
This would kind of render AssertSext/AssertZext useless |
Ping. |
I'd like a mitigation for the BUILD_VECTOR issue, but otherwise this looks good to me. |
Reverse ping. At least you can rebase to resolve conflicts. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM after a rebase.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A couple of minors
MVT VT = N0.getSimpleValueType(); | ||
MVT EltVT = VT.getVectorElementType(); | ||
if (llvm::ISD::isBuildVectorAllOnes(N0.getNode())) | ||
return DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, EltVT)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use DAG.getAllOnesConstant()
?
if (llvm::ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { | ||
SmallVector<SDValue, 8> NewVecC; | ||
for (const SDValue &Op : N0->op_values()) | ||
NewVecC.push_back(Op.isUndef() ? DAG.getConstant(0, DL, EltVT) : Op); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using EltVT doesn't account for BUILD_VECTOR implicit truncation - it needs to be:
DAG.getConstant(0, DL, Op.getValueType())
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
The description of CopyFromReg in ISDOpcodes.h says that the input valus is defined outside the scope of the current SelectionDAG. I think that means that we basically can treat it as a FREEZE in the sense that it can be seen as neither being undef nor poison. Being able to fold freeze(CopyFromReg) into CopyFromReg seems useful to avoid regressions if we start to introduce freeze instruction in DAGCombiner/foldBoolSelectToLogic, e.g. to solve llvm#84653 Things _not_ dealt with in this patch: - Depending on calling convention an input argument can be passed also on the stack and not in a register. If it is allowed to treat an argument received in a register as not being poison, then I think we want to treat arguments received on the stack the same way. But then we need to attribute load instructions, or add explicit FREEZE when lowering formal arguments. - A common pattern is that there is an AssertZext or AssertSext just after CopyFromReg. I think that if we treat CopyFromReg as never being poison, then it should be allowed to fold (freeze(AssertZext(CopyFromReg))) -> AssertZext(CopyFromReg))
Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all ones" or "constant" into something that depends on freeze(undef), as that would destroy those properties. Instead we replace undef by 0/-1 in such vectors, making it possible to fold away the freeze. We typically use -1 if the BUILD_VECTOR would identify as "all ones", and otherwise we use the value 0.
The description of CopyFromReg in ISDOpcodes.h says that the input valus is defined outside the scope of the current SelectionDAG. I think that means that we basically can treat it as a FREEZE in the sense that it can be seen as neither being undef nor poison. Being able to fold freeze(CopyFromReg) into CopyFromReg seems useful to avoid regressions if we start to introduce freeze instruction in DAGCombiner/foldBoolSelectToLogic, e.g. to solve llvm#84653 Things _not_ dealt with in this patch: - Depending on calling convention an input argument can be passed also on the stack and not in a register. If it is allowed to treat an argument received in a register as not being poison, then I think we want to treat arguments received on the stack the same way. But then we need to attribute load instructions, or add explicit FREEZE when lowering formal arguments. - A common pattern is that there is an AssertZext or AssertSext just after CopyFromReg. I think that if we treat CopyFromReg as never being poison, then it should be allowed to fold (freeze(AssertZext(CopyFromReg))) -> AssertZext(CopyFromReg))
Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all ones" or "constant" into something that depends on freeze(undef), as that would destroy those properties. Instead we replace undef by 0/-1 in such vectors, making it possible to fold away the freeze. We typically use -1 if the BUILD_VECTOR would identify as "all ones", and otherwise we use the value 0.
…84921)" and more... This reverts: b3c55b7 - "[SelectionDAG] Handle more opcodes in canCreateUndefOrPoison (#84921)" (because it updates a test case that I don't know how to resolve the conflict for) 8e2f649 - "[DAGCombiner] Do not always fold FREEZE over BUILD_VECTOR (#85932)" 73472c5 - "[SelectionDAG] Treat CopyFromReg as freezing the value (#85932)" Due to a test suite failure on AArch64 when compiling for SVE. https://lab.llvm.org/buildbot/#/builders/197/builds/13955 clang: ../llvm/llvm/include/llvm/CodeGen/ValueTypes.h:307: MVT llvm::EVT::getSimpleVT() const: Assertion `isSimple() && "Expected a SimpleValueType!"' failed.
I have had to revert this due to a test suite failure when compiling for AArch64 Scalable Vectors (SVE), for both specific vector lengths and length agnostic code.
Reproducer: sve_repro.tar.gz This should reproduce on any machine as long as you target AArch64 with SVE. Our AArch64 builder without SVE did not have this problem. |
I meet a similar issue: dtcxzyw/llvm-codegen-benchmark#6 (comment). But it was caused by an irregular type |
// the future, then this special handling can be removed. | ||
if (N0.getOpcode() == ISD::BUILD_VECTOR) { | ||
SDLoc DL(N0); | ||
MVT VT = N0.getSimpleValueType(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm guessing these need to be EVT VT = N0.getValueType();
creduced example:
|
Thanks for the revert, and sorry for the inconvenience. |
…rPoison (#84921)" and more..." This reverts commit 16bd10a. Re-applies: b3c55b7 - "[SelectionDAG] Handle more opcodes in canCreateUndefOrPoison (#84921)" 8e2f649 - "[DAGCombiner] Do not always fold FREEZE over BUILD_VECTOR (#85932)" 73472c5 - "[SelectionDAG] Treat CopyFromReg as freezing the value (#85932)" with a fix in DAGCombiner::visitFREEZE.
Re-applied in 55c6bda |
The description of CopyFromReg in ISDOpcodes.h says that the input
valus is defined outside the scope of the current SelectionDAG. I
think that means that we basically can treat it as a FREEZE in the
sense that it can be seen as neither being undef nor poison.
Being able to fold freeze(CopyFromReg) into CopyFromReg seems
useful to avoid regressions if we start to introduce freeze
instruction in DAGCombiner/foldBoolSelectToLogic, e.g. to solve
#84653
Things not dealt with in this patch:
also on the stack and not in a register. If it is allowed to treat
an argument received in a register as not being poison, then I think
we want to treat arguments received on the stack the same way. But
then we need to attribute load instructions, or add explicit FREEZE
when lowering formal arguments.
after CopyFromReg. I think that if we treat CopyFromReg as never
being poison, then it should be allowed to fold
(freeze(AssertZext(CopyFromReg))) -> AssertZext(CopyFromReg))