Skip to content

Commit 335f2ff

Browse files
committed
[RISCV] Set riscv-fpimm-cost threshold to 3 by default
`-riscv-fp-imm-cost` controls the threshold at which the constant pool is used for float constants rather than generating directly (typically into a GPR followed by an `fmv`). The value used for this knob indicates the number of instructions that can be used to produce the value (otherwise we fall back to the constant pool). Upping to to 3 covers a huge number of additional constants (see <llvm#153402>), e.g. most whole numbers which can be generated through lui+shift+fmv. As in general we struggle with efficient code generation for constant pool accesses, reducing the number of constant pool accesses is beneficial. We are typically replacing a two-instruction sequence (which includes a load) with a three instruction sequence (two simple arithmetic operations plus a fmv), which. The CHECK prefixes for various tests had to be updated to avoid conflicts leading to check lines being dropped altogether (see <llvm#159321> for a change to update_llc_test_checks to aid diagnosing this).
1 parent d8e1c20 commit 335f2ff

File tree

72 files changed

+19694
-6771
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+19694
-6771
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ static cl::opt<int>
7979
FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden,
8080
cl::desc("Give the maximum number of instructions that we will "
8181
"use for creating a floating-point immediate value"),
82-
cl::init(2));
82+
cl::init(3));
8383

8484
static cl::opt<bool>
8585
ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,

llvm/test/CodeGen/RISCV/bfloat-convert.ll

Lines changed: 130 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,14 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind {
5151
; CHECK32ZFBFMIN-LABEL: fcvt_si_bf16_sat:
5252
; CHECK32ZFBFMIN: # %bb.0: # %start
5353
; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0
54-
; CHECK32ZFBFMIN-NEXT: lui a0, %hi(.LCPI1_0)
55-
; CHECK32ZFBFMIN-NEXT: feq.s a1, fa5, fa5
56-
; CHECK32ZFBFMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a0)
5754
; CHECK32ZFBFMIN-NEXT: lui a0, 815104
58-
; CHECK32ZFBFMIN-NEXT: fmv.w.x fa3, a0
59-
; CHECK32ZFBFMIN-NEXT: fmax.s fa5, fa5, fa3
60-
; CHECK32ZFBFMIN-NEXT: neg a0, a1
55+
; CHECK32ZFBFMIN-NEXT: lui a1, 290816
56+
; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, a0
57+
; CHECK32ZFBFMIN-NEXT: feq.s a0, fa5, fa5
58+
; CHECK32ZFBFMIN-NEXT: addi a1, a1, -512
59+
; CHECK32ZFBFMIN-NEXT: neg a0, a0
60+
; CHECK32ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4
61+
; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, a1
6162
; CHECK32ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4
6263
; CHECK32ZFBFMIN-NEXT: fcvt.w.s a1, fa5, rtz
6364
; CHECK32ZFBFMIN-NEXT: and a0, a0, a1
@@ -68,12 +69,13 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind {
6869
; RV32ID-NEXT: fmv.x.w a0, fa0
6970
; RV32ID-NEXT: lui a1, 815104
7071
; RV32ID-NEXT: fmv.w.x fa5, a1
71-
; RV32ID-NEXT: lui a1, %hi(.LCPI1_0)
72+
; RV32ID-NEXT: lui a1, 290816
7273
; RV32ID-NEXT: slli a0, a0, 16
73-
; RV32ID-NEXT: flw fa4, %lo(.LCPI1_0)(a1)
74-
; RV32ID-NEXT: fmv.w.x fa3, a0
75-
; RV32ID-NEXT: feq.s a0, fa3, fa3
76-
; RV32ID-NEXT: fmax.s fa5, fa3, fa5
74+
; RV32ID-NEXT: addi a1, a1, -512
75+
; RV32ID-NEXT: fmv.w.x fa4, a0
76+
; RV32ID-NEXT: feq.s a0, fa4, fa4
77+
; RV32ID-NEXT: fmax.s fa5, fa4, fa5
78+
; RV32ID-NEXT: fmv.w.x fa4, a1
7779
; RV32ID-NEXT: neg a0, a0
7880
; RV32ID-NEXT: fmin.s fa5, fa5, fa4
7981
; RV32ID-NEXT: fcvt.w.s a1, fa5, rtz
@@ -83,13 +85,14 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind {
8385
; CHECK64ZFBFMIN-LABEL: fcvt_si_bf16_sat:
8486
; CHECK64ZFBFMIN: # %bb.0: # %start
8587
; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0
86-
; CHECK64ZFBFMIN-NEXT: lui a0, %hi(.LCPI1_0)
87-
; CHECK64ZFBFMIN-NEXT: feq.s a1, fa5, fa5
88-
; CHECK64ZFBFMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a0)
8988
; CHECK64ZFBFMIN-NEXT: lui a0, 815104
90-
; CHECK64ZFBFMIN-NEXT: fmv.w.x fa3, a0
91-
; CHECK64ZFBFMIN-NEXT: fmax.s fa5, fa5, fa3
92-
; CHECK64ZFBFMIN-NEXT: neg a0, a1
89+
; CHECK64ZFBFMIN-NEXT: lui a1, 290816
90+
; CHECK64ZFBFMIN-NEXT: fmv.w.x fa4, a0
91+
; CHECK64ZFBFMIN-NEXT: feq.s a0, fa5, fa5
92+
; CHECK64ZFBFMIN-NEXT: addi a1, a1, -512
93+
; CHECK64ZFBFMIN-NEXT: neg a0, a0
94+
; CHECK64ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4
95+
; CHECK64ZFBFMIN-NEXT: fmv.w.x fa4, a1
9396
; CHECK64ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4
9497
; CHECK64ZFBFMIN-NEXT: fcvt.l.s a1, fa5, rtz
9598
; CHECK64ZFBFMIN-NEXT: and a0, a0, a1
@@ -100,12 +103,13 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind {
100103
; RV64ID-NEXT: fmv.x.w a0, fa0
101104
; RV64ID-NEXT: lui a1, 815104
102105
; RV64ID-NEXT: fmv.w.x fa5, a1
103-
; RV64ID-NEXT: lui a1, %hi(.LCPI1_0)
106+
; RV64ID-NEXT: lui a1, 290816
104107
; RV64ID-NEXT: slli a0, a0, 16
105-
; RV64ID-NEXT: flw fa4, %lo(.LCPI1_0)(a1)
106-
; RV64ID-NEXT: fmv.w.x fa3, a0
107-
; RV64ID-NEXT: feq.s a0, fa3, fa3
108-
; RV64ID-NEXT: fmax.s fa5, fa3, fa5
108+
; RV64ID-NEXT: addi a1, a1, -512
109+
; RV64ID-NEXT: fmv.w.x fa4, a0
110+
; RV64ID-NEXT: feq.s a0, fa4, fa4
111+
; RV64ID-NEXT: fmax.s fa5, fa4, fa5
112+
; RV64ID-NEXT: fmv.w.x fa4, a1
109113
; RV64ID-NEXT: neg a0, a0
110114
; RV64ID-NEXT: fmin.s fa5, fa5, fa4
111115
; RV64ID-NEXT: fcvt.l.s a1, fa5, rtz
@@ -152,49 +156,53 @@ define i16 @fcvt_ui_bf16(bfloat %a) nounwind {
152156
define i16 @fcvt_ui_bf16_sat(bfloat %a) nounwind {
153157
; CHECK32ZFBFMIN-LABEL: fcvt_ui_bf16_sat:
154158
; CHECK32ZFBFMIN: # %bb.0: # %start
155-
; CHECK32ZFBFMIN-NEXT: lui a0, %hi(.LCPI3_0)
156-
; CHECK32ZFBFMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a0)
157-
; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0
158-
; CHECK32ZFBFMIN-NEXT: fmv.w.x fa3, zero
159-
; CHECK32ZFBFMIN-NEXT: fmax.s fa4, fa4, fa3
160-
; CHECK32ZFBFMIN-NEXT: fmin.s fa5, fa4, fa5
159+
; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0
160+
; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, zero
161+
; CHECK32ZFBFMIN-NEXT: lui a0, 292864
162+
; CHECK32ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4
163+
; CHECK32ZFBFMIN-NEXT: addi a0, a0, -256
164+
; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, a0
165+
; CHECK32ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4
161166
; CHECK32ZFBFMIN-NEXT: fcvt.wu.s a0, fa5, rtz
162167
; CHECK32ZFBFMIN-NEXT: ret
163168
;
164169
; RV32ID-LABEL: fcvt_ui_bf16_sat:
165170
; RV32ID: # %bb.0: # %start
166-
; RV32ID-NEXT: lui a0, %hi(.LCPI3_0)
167-
; RV32ID-NEXT: flw fa5, %lo(.LCPI3_0)(a0)
168171
; RV32ID-NEXT: fmv.x.w a0, fa0
172+
; RV32ID-NEXT: fmv.w.x fa5, zero
169173
; RV32ID-NEXT: slli a0, a0, 16
170174
; RV32ID-NEXT: fmv.w.x fa4, a0
171-
; RV32ID-NEXT: fmv.w.x fa3, zero
172-
; RV32ID-NEXT: fmax.s fa4, fa4, fa3
173-
; RV32ID-NEXT: fmin.s fa5, fa4, fa5
175+
; RV32ID-NEXT: lui a0, 292864
176+
; RV32ID-NEXT: addi a0, a0, -256
177+
; RV32ID-NEXT: fmax.s fa5, fa4, fa5
178+
; RV32ID-NEXT: fmv.w.x fa4, a0
179+
; RV32ID-NEXT: fmin.s fa5, fa5, fa4
174180
; RV32ID-NEXT: fcvt.wu.s a0, fa5, rtz
175181
; RV32ID-NEXT: ret
176182
;
177183
; CHECK64ZFBFMIN-LABEL: fcvt_ui_bf16_sat:
178184
; CHECK64ZFBFMIN: # %bb.0: # %start
179-
; CHECK64ZFBFMIN-NEXT: lui a0, %hi(.LCPI3_0)
180-
; CHECK64ZFBFMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a0)
181-
; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0
182-
; CHECK64ZFBFMIN-NEXT: fmv.w.x fa3, zero
183-
; CHECK64ZFBFMIN-NEXT: fmax.s fa4, fa4, fa3
184-
; CHECK64ZFBFMIN-NEXT: fmin.s fa5, fa4, fa5
185+
; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0
186+
; CHECK64ZFBFMIN-NEXT: fmv.w.x fa4, zero
187+
; CHECK64ZFBFMIN-NEXT: lui a0, 292864
188+
; CHECK64ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4
189+
; CHECK64ZFBFMIN-NEXT: addi a0, a0, -256
190+
; CHECK64ZFBFMIN-NEXT: fmv.w.x fa4, a0
191+
; CHECK64ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4
185192
; CHECK64ZFBFMIN-NEXT: fcvt.lu.s a0, fa5, rtz
186193
; CHECK64ZFBFMIN-NEXT: ret
187194
;
188195
; RV64ID-LABEL: fcvt_ui_bf16_sat:
189196
; RV64ID: # %bb.0: # %start
190-
; RV64ID-NEXT: lui a0, %hi(.LCPI3_0)
191-
; RV64ID-NEXT: flw fa5, %lo(.LCPI3_0)(a0)
192197
; RV64ID-NEXT: fmv.x.w a0, fa0
198+
; RV64ID-NEXT: fmv.w.x fa5, zero
193199
; RV64ID-NEXT: slli a0, a0, 16
194200
; RV64ID-NEXT: fmv.w.x fa4, a0
195-
; RV64ID-NEXT: fmv.w.x fa3, zero
196-
; RV64ID-NEXT: fmax.s fa4, fa4, fa3
197-
; RV64ID-NEXT: fmin.s fa5, fa4, fa5
201+
; RV64ID-NEXT: lui a0, 292864
202+
; RV64ID-NEXT: addi a0, a0, -256
203+
; RV64ID-NEXT: fmax.s fa5, fa4, fa5
204+
; RV64ID-NEXT: fmv.w.x fa4, a0
205+
; RV64ID-NEXT: fmin.s fa5, fa5, fa4
198206
; RV64ID-NEXT: fcvt.lu.s a0, fa5, rtz
199207
; RV64ID-NEXT: ret
200208
start:
@@ -472,20 +480,21 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
472480
; RV32IZFBFMIN-NEXT: # %bb.1: # %start
473481
; RV32IZFBFMIN-NEXT: mv a2, a1
474482
; RV32IZFBFMIN-NEXT: .LBB10_2: # %start
475-
; RV32IZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
476-
; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
483+
; RV32IZFBFMIN-NEXT: lui a1, 389120
484+
; RV32IZFBFMIN-NEXT: addi a1, a1, -1
485+
; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a1
477486
; RV32IZFBFMIN-NEXT: flt.s a1, fa5, fs0
478487
; RV32IZFBFMIN-NEXT: beqz a1, .LBB10_4
479488
; RV32IZFBFMIN-NEXT: # %bb.3:
480489
; RV32IZFBFMIN-NEXT: addi a2, a3, -1
481490
; RV32IZFBFMIN-NEXT: .LBB10_4: # %start
482491
; RV32IZFBFMIN-NEXT: feq.s a3, fs0, fs0
483-
; RV32IZFBFMIN-NEXT: neg a4, a1
484-
; RV32IZFBFMIN-NEXT: neg a1, s0
492+
; RV32IZFBFMIN-NEXT: neg a4, s0
493+
; RV32IZFBFMIN-NEXT: neg a5, a1
485494
; RV32IZFBFMIN-NEXT: neg a3, a3
486-
; RV32IZFBFMIN-NEXT: and a0, a1, a0
495+
; RV32IZFBFMIN-NEXT: and a0, a4, a0
487496
; RV32IZFBFMIN-NEXT: and a1, a3, a2
488-
; RV32IZFBFMIN-NEXT: or a0, a4, a0
497+
; RV32IZFBFMIN-NEXT: or a0, a5, a0
489498
; RV32IZFBFMIN-NEXT: and a0, a3, a0
490499
; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
491500
; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -511,20 +520,21 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
511520
; R32IDZFBFMIN-NEXT: # %bb.1: # %start
512521
; R32IDZFBFMIN-NEXT: mv a2, a1
513522
; R32IDZFBFMIN-NEXT: .LBB10_2: # %start
514-
; R32IDZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
515-
; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
523+
; R32IDZFBFMIN-NEXT: lui a1, 389120
524+
; R32IDZFBFMIN-NEXT: addi a1, a1, -1
525+
; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a1
516526
; R32IDZFBFMIN-NEXT: flt.s a1, fa5, fs0
517527
; R32IDZFBFMIN-NEXT: beqz a1, .LBB10_4
518528
; R32IDZFBFMIN-NEXT: # %bb.3:
519529
; R32IDZFBFMIN-NEXT: addi a2, a3, -1
520530
; R32IDZFBFMIN-NEXT: .LBB10_4: # %start
521531
; R32IDZFBFMIN-NEXT: feq.s a3, fs0, fs0
522-
; R32IDZFBFMIN-NEXT: neg a4, a1
523-
; R32IDZFBFMIN-NEXT: neg a1, s0
532+
; R32IDZFBFMIN-NEXT: neg a4, s0
533+
; R32IDZFBFMIN-NEXT: neg a5, a1
524534
; R32IDZFBFMIN-NEXT: neg a3, a3
525-
; R32IDZFBFMIN-NEXT: and a0, a1, a0
535+
; R32IDZFBFMIN-NEXT: and a0, a4, a0
526536
; R32IDZFBFMIN-NEXT: and a1, a3, a2
527-
; R32IDZFBFMIN-NEXT: or a0, a4, a0
537+
; R32IDZFBFMIN-NEXT: or a0, a5, a0
528538
; R32IDZFBFMIN-NEXT: and a0, a3, a0
529539
; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
530540
; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -552,8 +562,9 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
552562
; RV32ID-NEXT: # %bb.1: # %start
553563
; RV32ID-NEXT: mv a2, a1
554564
; RV32ID-NEXT: .LBB10_2: # %start
555-
; RV32ID-NEXT: lui a1, %hi(.LCPI10_0)
556-
; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
565+
; RV32ID-NEXT: lui a1, 389120
566+
; RV32ID-NEXT: addi a1, a1, -1
567+
; RV32ID-NEXT: fmv.w.x fa5, a1
557568
; RV32ID-NEXT: flt.s a1, fa5, fs0
558569
; RV32ID-NEXT: beqz a1, .LBB10_4
559570
; RV32ID-NEXT: # %bb.3:
@@ -641,30 +652,59 @@ define i64 @fcvt_lu_bf16(bfloat %a) nounwind {
641652
}
642653

643654
define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
644-
; CHECK32ZFBFMIN-LABEL: fcvt_lu_bf16_sat:
645-
; CHECK32ZFBFMIN: # %bb.0: # %start
646-
; CHECK32ZFBFMIN-NEXT: addi sp, sp, -16
647-
; CHECK32ZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
648-
; CHECK32ZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
649-
; CHECK32ZFBFMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
650-
; CHECK32ZFBFMIN-NEXT: lui a0, %hi(.LCPI12_0)
651-
; CHECK32ZFBFMIN-NEXT: flw fa5, %lo(.LCPI12_0)(a0)
652-
; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa0, fa0
653-
; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, zero
654-
; CHECK32ZFBFMIN-NEXT: fle.s a0, fa4, fa0
655-
; CHECK32ZFBFMIN-NEXT: flt.s a1, fa5, fa0
656-
; CHECK32ZFBFMIN-NEXT: neg s0, a1
657-
; CHECK32ZFBFMIN-NEXT: neg s1, a0
658-
; CHECK32ZFBFMIN-NEXT: call __fixunssfdi
659-
; CHECK32ZFBFMIN-NEXT: and a0, s1, a0
660-
; CHECK32ZFBFMIN-NEXT: and a1, s1, a1
661-
; CHECK32ZFBFMIN-NEXT: or a0, s0, a0
662-
; CHECK32ZFBFMIN-NEXT: or a1, s0, a1
663-
; CHECK32ZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
664-
; CHECK32ZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
665-
; CHECK32ZFBFMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
666-
; CHECK32ZFBFMIN-NEXT: addi sp, sp, 16
667-
; CHECK32ZFBFMIN-NEXT: ret
655+
; RV32IZFBFMIN-LABEL: fcvt_lu_bf16_sat:
656+
; RV32IZFBFMIN: # %bb.0: # %start
657+
; RV32IZFBFMIN-NEXT: addi sp, sp, -16
658+
; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
659+
; RV32IZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
660+
; RV32IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
661+
; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
662+
; RV32IZFBFMIN-NEXT: fmv.w.x fa5, zero
663+
; RV32IZFBFMIN-NEXT: fle.s a0, fa5, fs0
664+
; RV32IZFBFMIN-NEXT: neg s0, a0
665+
; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0
666+
; RV32IZFBFMIN-NEXT: call __fixunssfdi
667+
; RV32IZFBFMIN-NEXT: and a0, s0, a0
668+
; RV32IZFBFMIN-NEXT: lui a2, 391168
669+
; RV32IZFBFMIN-NEXT: and a1, s0, a1
670+
; RV32IZFBFMIN-NEXT: addi a2, a2, -1
671+
; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a2
672+
; RV32IZFBFMIN-NEXT: flt.s a2, fa5, fs0
673+
; RV32IZFBFMIN-NEXT: neg a2, a2
674+
; RV32IZFBFMIN-NEXT: or a0, a2, a0
675+
; RV32IZFBFMIN-NEXT: or a1, a2, a1
676+
; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
677+
; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
678+
; RV32IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
679+
; RV32IZFBFMIN-NEXT: addi sp, sp, 16
680+
; RV32IZFBFMIN-NEXT: ret
681+
;
682+
; R32IDZFBFMIN-LABEL: fcvt_lu_bf16_sat:
683+
; R32IDZFBFMIN: # %bb.0: # %start
684+
; R32IDZFBFMIN-NEXT: addi sp, sp, -16
685+
; R32IDZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
686+
; R32IDZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
687+
; R32IDZFBFMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
688+
; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
689+
; R32IDZFBFMIN-NEXT: fmv.w.x fa5, zero
690+
; R32IDZFBFMIN-NEXT: fle.s a0, fa5, fs0
691+
; R32IDZFBFMIN-NEXT: neg s0, a0
692+
; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0
693+
; R32IDZFBFMIN-NEXT: call __fixunssfdi
694+
; R32IDZFBFMIN-NEXT: and a0, s0, a0
695+
; R32IDZFBFMIN-NEXT: lui a2, 391168
696+
; R32IDZFBFMIN-NEXT: and a1, s0, a1
697+
; R32IDZFBFMIN-NEXT: addi a2, a2, -1
698+
; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a2
699+
; R32IDZFBFMIN-NEXT: flt.s a2, fa5, fs0
700+
; R32IDZFBFMIN-NEXT: neg a2, a2
701+
; R32IDZFBFMIN-NEXT: or a0, a2, a0
702+
; R32IDZFBFMIN-NEXT: or a1, a2, a1
703+
; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
704+
; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
705+
; R32IDZFBFMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
706+
; R32IDZFBFMIN-NEXT: addi sp, sp, 16
707+
; R32IDZFBFMIN-NEXT: ret
668708
;
669709
; RV32ID-LABEL: fcvt_lu_bf16_sat:
670710
; RV32ID: # %bb.0: # %start
@@ -673,15 +713,16 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
673713
; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
674714
; RV32ID-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
675715
; RV32ID-NEXT: fmv.x.w a0, fa0
676-
; RV32ID-NEXT: lui a1, %hi(.LCPI12_0)
677-
; RV32ID-NEXT: fmv.w.x fa5, zero
678-
; RV32ID-NEXT: flw fa4, %lo(.LCPI12_0)(a1)
716+
; RV32ID-NEXT: lui a1, 391168
679717
; RV32ID-NEXT: slli a0, a0, 16
718+
; RV32ID-NEXT: addi a1, a1, -1
680719
; RV32ID-NEXT: fmv.w.x fa0, a0
681-
; RV32ID-NEXT: fle.s a0, fa5, fa0
682-
; RV32ID-NEXT: flt.s a1, fa4, fa0
683-
; RV32ID-NEXT: neg s0, a1
684-
; RV32ID-NEXT: neg s1, a0
720+
; RV32ID-NEXT: fmv.w.x fa5, a1
721+
; RV32ID-NEXT: flt.s a0, fa5, fa0
722+
; RV32ID-NEXT: fmv.w.x fa5, zero
723+
; RV32ID-NEXT: fle.s a1, fa5, fa0
724+
; RV32ID-NEXT: neg s0, a0
725+
; RV32ID-NEXT: neg s1, a1
685726
; RV32ID-NEXT: call __fixunssfdi
686727
; RV32ID-NEXT: and a0, s1, a0
687728
; RV32ID-NEXT: and a1, s1, a1

llvm/test/CodeGen/RISCV/bfloat-imm.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77
define bfloat @bfloat_imm() nounwind {
88
; CHECK-LABEL: bfloat_imm:
99
; CHECK: # %bb.0:
10-
; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
11-
; CHECK-NEXT: flh fa0, %lo(.LCPI0_0)(a0)
10+
; CHECK-NEXT: lui a0, 4
11+
; CHECK-NEXT: addi a0, a0, 64
12+
; CHECK-NEXT: fmv.h.x fa0, a0
1213
; CHECK-NEXT: ret
1314
ret bfloat 3.0
1415
}

0 commit comments

Comments
 (0)