Skip to content

Commit 6515fdf

Browse files
authored
[AMDGPU][True16][CodeGen] true16 codegen for FPMinMax pat (#125107)
true16 codegen for FPMinMax Pattern
1 parent d814824 commit 6515fdf

File tree

3 files changed

+487
-165
lines changed

3 files changed

+487
-165
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3790,6 +3790,13 @@ def : FPMinCanonMaxPat<V_MINMAX_F32_e64, f32, fmaxnum_like, fminnum_like_oneuse>
37903790
def : FPMinCanonMaxPat<V_MAXMIN_F32_e64, f32, fminnum_like, fmaxnum_like_oneuse>;
37913791
}
37923792

3793+
let True16Predicate = UseRealTrue16Insts in {
3794+
def : FPMinMaxPat<V_MINMAX_F16_t16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
3795+
def : FPMinMaxPat<V_MAXMIN_F16_t16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
3796+
def : FPMinCanonMaxPat<V_MINMAX_F16_t16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
3797+
def : FPMinCanonMaxPat<V_MAXMIN_F16_t16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
3798+
}
3799+
37933800
let True16Predicate = UseFakeTrue16Insts in {
37943801
def : FPMinMaxPat<V_MINMAX_F16_fake16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
37953802
def : FPMinMaxPat<V_MAXMIN_F16_fake16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
@@ -3819,6 +3826,13 @@ def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>
38193826
def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
38203827
}
38213828

3829+
let True16Predicate = UseRealTrue16Insts, SubtargetPredicate = isGFX12Plus in {
3830+
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_t16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
3831+
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_t16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
3832+
def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F16_t16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
3833+
def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F16_t16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
3834+
}
3835+
38223836
let True16Predicate = UseFakeTrue16Insts, SubtargetPredicate = isGFX12Plus in {
38233837
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_fake16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
38243838
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_fake16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;

llvm/test/CodeGen/AMDGPU/minimummaximum.ll

Lines changed: 70 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,SDAG %s
3-
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GISEL %s
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-TRUE16 %s
3+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-FAKE16 %s
4+
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GISEL,GISEL-TRUE16 %s
5+
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GISEL,GISEL-FAKE16 %s
46

57
define amdgpu_ps float @test_minmax_f32(float %a, float %b, float %c) {
68
; GFX12-LABEL: test_minmax_f32:
@@ -72,30 +74,84 @@ define amdgpu_ps float @test_maxmin_commuted_f32(float %a, float %b, float %c) {
7274
}
7375

7476
define amdgpu_ps half @test_minmax_f16(half %a, half %b, half %c) {
75-
; GFX12-LABEL: test_minmax_f16:
76-
; GFX12: ; %bb.0:
77-
; GFX12-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
78-
; GFX12-NEXT: ; return to shader part epilog
77+
; SDAG-TRUE16-LABEL: test_minmax_f16:
78+
; SDAG-TRUE16: ; %bb.0:
79+
; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
80+
; SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
81+
; SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
82+
; SDAG-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v0.h, v1.l
83+
; SDAG-TRUE16-NEXT: ; return to shader part epilog
84+
;
85+
; SDAG-FAKE16-LABEL: test_minmax_f16:
86+
; SDAG-FAKE16: ; %bb.0:
87+
; SDAG-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
88+
; SDAG-FAKE16-NEXT: ; return to shader part epilog
89+
;
90+
; GISEL-TRUE16-LABEL: test_minmax_f16:
91+
; GISEL-TRUE16: ; %bb.0:
92+
; GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
93+
; GISEL-TRUE16-NEXT: ; return to shader part epilog
94+
;
95+
; GISEL-FAKE16-LABEL: test_minmax_f16:
96+
; GISEL-FAKE16: ; %bb.0:
97+
; GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
98+
; GISEL-FAKE16-NEXT: ; return to shader part epilog
7999
%max = call half @llvm.maximum.f16(half %a, half %b)
80100
%minmax = call half @llvm.minimum.f16(half %max, half %c)
81101
ret half %minmax
82102
}
83103

84104
define amdgpu_ps half @test_minmax_commuted_f16(half %a, half %b, half %c) {
85-
; GFX12-LABEL: test_minmax_commuted_f16:
86-
; GFX12: ; %bb.0:
87-
; GFX12-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
88-
; GFX12-NEXT: ; return to shader part epilog
105+
; SDAG-TRUE16-LABEL: test_minmax_commuted_f16:
106+
; SDAG-TRUE16: ; %bb.0:
107+
; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
108+
; SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
109+
; SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
110+
; SDAG-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v0.h, v1.l
111+
; SDAG-TRUE16-NEXT: ; return to shader part epilog
112+
;
113+
; SDAG-FAKE16-LABEL: test_minmax_commuted_f16:
114+
; SDAG-FAKE16: ; %bb.0:
115+
; SDAG-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
116+
; SDAG-FAKE16-NEXT: ; return to shader part epilog
117+
;
118+
; GISEL-TRUE16-LABEL: test_minmax_commuted_f16:
119+
; GISEL-TRUE16: ; %bb.0:
120+
; GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
121+
; GISEL-TRUE16-NEXT: ; return to shader part epilog
122+
;
123+
; GISEL-FAKE16-LABEL: test_minmax_commuted_f16:
124+
; GISEL-FAKE16: ; %bb.0:
125+
; GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
126+
; GISEL-FAKE16-NEXT: ; return to shader part epilog
89127
%max = call half @llvm.maximum.f16(half %a, half %b)
90128
%minmax = call half @llvm.minimum.f16(half %c, half %max)
91129
ret half %minmax
92130
}
93131

94132
define amdgpu_ps half @test_maxmin_commuted_f16(half %a, half %b, half %c) {
95-
; GFX12-LABEL: test_maxmin_commuted_f16:
96-
; GFX12: ; %bb.0:
97-
; GFX12-NEXT: v_minimummaximum_f16 v0, v0, v1, v2
98-
; GFX12-NEXT: ; return to shader part epilog
133+
; SDAG-TRUE16-LABEL: test_maxmin_commuted_f16:
134+
; SDAG-TRUE16: ; %bb.0:
135+
; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
136+
; SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
137+
; SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
138+
; SDAG-TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v0.h, v1.l
139+
; SDAG-TRUE16-NEXT: ; return to shader part epilog
140+
;
141+
; SDAG-FAKE16-LABEL: test_maxmin_commuted_f16:
142+
; SDAG-FAKE16: ; %bb.0:
143+
; SDAG-FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2
144+
; SDAG-FAKE16-NEXT: ; return to shader part epilog
145+
;
146+
; GISEL-TRUE16-LABEL: test_maxmin_commuted_f16:
147+
; GISEL-TRUE16: ; %bb.0:
148+
; GISEL-TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v1.l, v2.l
149+
; GISEL-TRUE16-NEXT: ; return to shader part epilog
150+
;
151+
; GISEL-FAKE16-LABEL: test_maxmin_commuted_f16:
152+
; GISEL-FAKE16: ; %bb.0:
153+
; GISEL-FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2
154+
; GISEL-FAKE16-NEXT: ; return to shader part epilog
99155
%min = call half @llvm.minimum.f16(half %a, half %b)
100156
%maxmin = call half @llvm.maximum.f16(half %c, half %min)
101157
ret half %maxmin

0 commit comments

Comments
 (0)