Skip to content

Commit f078330

Browse files
committed
R600/SI: Use v_madmk_f32
llvm-svn: 230149
1 parent 0325d3d commit f078330

File tree

3 files changed

+233
-5
lines changed

3 files changed

+233
-5
lines changed

llvm/lib/Target/R600/SIInstrInfo.cpp

+51-4
Original file line numberDiff line numberDiff line change
@@ -892,10 +892,57 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
892892
MachineOperand *Src1 = getNamedOperand(*UseMI, AMDGPU::OpName::src1);
893893
MachineOperand *Src2 = getNamedOperand(*UseMI, AMDGPU::OpName::src2);
894894

895-
// The VOP2 src0 can't be an SGPR since the constant bus use will be the
896-
// literal constant.
897-
if (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))
898-
return false;
895+
// Multiplied part is the constant: Use v_madmk_f32
896+
// We should only expect these to be on src0 due to canonicalizations.
897+
if (Src0->isReg() && Src0->getReg() == Reg) {
898+
if (!Src1->isReg() ||
899+
(Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
900+
return false;
901+
902+
if (!Src2->isReg() ||
903+
(Src2->isReg() && RI.isSGPRClass(MRI->getRegClass(Src2->getReg()))))
904+
return false;
905+
906+
// We need to do some weird looking operand shuffling since the madmk
907+
// operands are out of the normal expected order with the multiplied
908+
// constant as the last operand.
909+
//
910+
// v_mad_f32 src0, src1, src2 -> v_madmk_f32 src0 * src2K + src1
911+
// src0 -> src2 K
912+
// src1 -> src0
913+
// src2 -> src1
914+
915+
const int64_t Imm = DefMI->getOperand(1).getImm();
916+
917+
// FIXME: This would be a lot easier if we could return a new instruction
918+
// instead of having to modify in place.
919+
920+
// Remove these first since they are at the end.
921+
UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
922+
AMDGPU::OpName::omod));
923+
UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
924+
AMDGPU::OpName::clamp));
925+
926+
unsigned Src1Reg = Src1->getReg();
927+
unsigned Src1SubReg = Src1->getSubReg();
928+
unsigned Src2Reg = Src2->getReg();
929+
unsigned Src2SubReg = Src2->getSubReg();
930+
Src0->setReg(Src1Reg);
931+
Src0->setSubReg(Src1SubReg);
932+
Src1->setReg(Src2Reg);
933+
Src1->setSubReg(Src2SubReg);
934+
935+
Src2->ChangeToImmediate(Imm);
936+
937+
removeModOperands(*UseMI);
938+
UseMI->setDesc(get(AMDGPU::V_MADMK_F32));
939+
940+
bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
941+
if (DeleteDef)
942+
DefMI->eraseFromParent();
943+
944+
return true;
945+
}
899946

900947
// Added part is the constant: Use v_madak_f32
901948
if (Src2->isReg() && Src2->getReg() == Reg) {

llvm/test/CodeGen/R600/madmk.ll

+181
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2+
; XUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3+
4+
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
5+
declare float @llvm.fabs.f32(float) nounwind readnone
6+
7+
; GCN-LABEL: {{^}}madmk_f32:
8+
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
9+
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
10+
; GCN: v_madmk_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
11+
define void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
12+
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
13+
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
14+
%gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
15+
%out.gep = getelementptr float addrspace(1)* %out, i32 %tid
16+
17+
%a = load float addrspace(1)* %gep.0, align 4
18+
%b = load float addrspace(1)* %gep.1, align 4
19+
20+
%mul = fmul float %a, 10.0
21+
%madmk = fadd float %mul, %b
22+
store float %madmk, float addrspace(1)* %out.gep, align 4
23+
ret void
24+
}
25+
26+
; GCN-LABEL: {{^}}madmk_2_use_f32:
27+
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
28+
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
29+
; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
30+
; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
31+
; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VK]], [[VB]]
32+
; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VK]], [[VC]]
33+
; GCN: s_endpgm
34+
define void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
35+
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
36+
37+
%in.gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
38+
%in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
39+
%in.gep.2 = getelementptr float addrspace(1)* %in.gep.0, i32 2
40+
41+
%out.gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
42+
%out.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
43+
44+
%a = load float addrspace(1)* %in.gep.0, align 4
45+
%b = load float addrspace(1)* %in.gep.1, align 4
46+
%c = load float addrspace(1)* %in.gep.2, align 4
47+
48+
%mul0 = fmul float %a, 10.0
49+
%mul1 = fmul float %a, 10.0
50+
%madmk0 = fadd float %mul0, %b
51+
%madmk1 = fadd float %mul1, %c
52+
53+
store float %madmk0, float addrspace(1)* %out.gep.0, align 4
54+
store float %madmk1, float addrspace(1)* %out.gep.1, align 4
55+
ret void
56+
}
57+
58+
; We don't get any benefit if the constant is an inline immediate.
59+
; GCN-LABEL: {{^}}madmk_inline_imm_f32:
60+
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
61+
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
62+
; GCN: v_mad_f32 {{v[0-9]+}}, 4.0, [[VA]], [[VB]]
63+
define void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
64+
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
65+
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
66+
%gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
67+
%out.gep = getelementptr float addrspace(1)* %out, i32 %tid
68+
69+
%a = load float addrspace(1)* %gep.0, align 4
70+
%b = load float addrspace(1)* %gep.1, align 4
71+
72+
%mul = fmul float %a, 4.0
73+
%madmk = fadd float %mul, %b
74+
store float %madmk, float addrspace(1)* %out.gep, align 4
75+
ret void
76+
}
77+
78+
; GCN-LABEL: {{^}}s_s_madmk_f32:
79+
; GCN-NOT: v_madmk_f32
80+
; GCN: v_mad_f32
81+
; GCN: s_endpgm
82+
define void @s_s_madmk_f32(float addrspace(1)* noalias %out, float %a, float %b) nounwind {
83+
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
84+
%out.gep = getelementptr float addrspace(1)* %out, i32 %tid
85+
86+
%mul = fmul float %a, 10.0
87+
%madmk = fadd float %mul, %b
88+
store float %madmk, float addrspace(1)* %out.gep, align 4
89+
ret void
90+
}
91+
92+
; GCN-LABEL: {{^}}v_s_madmk_f32:
93+
; GCN-NOT: v_madmk_f32
94+
; GCN: v_mad_f32
95+
; GCN: s_endpgm
96+
define void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %b) nounwind {
97+
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
98+
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
99+
%out.gep = getelementptr float addrspace(1)* %out, i32 %tid
100+
%a = load float addrspace(1)* %gep.0, align 4
101+
102+
%mul = fmul float %a, 10.0
103+
%madmk = fadd float %mul, %b
104+
store float %madmk, float addrspace(1)* %out.gep, align 4
105+
ret void
106+
}
107+
108+
; GCN-LABEL: {{^}}scalar_vector_madmk_f32:
109+
; GCN-NOT: v_madmk_f32
110+
; GCN: v_mad_f32
111+
; GCN: s_endpgm
112+
define void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %a) nounwind {
113+
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
114+
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
115+
%out.gep = getelementptr float addrspace(1)* %out, i32 %tid
116+
%b = load float addrspace(1)* %gep.0, align 4
117+
118+
%mul = fmul float %a, 10.0
119+
%madmk = fadd float %mul, %b
120+
store float %madmk, float addrspace(1)* %out.gep, align 4
121+
ret void
122+
}
123+
124+
; GCN-LABEL: {{^}}no_madmk_src0_modifier_f32:
125+
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
126+
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
127+
; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}}
128+
define void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
129+
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
130+
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
131+
%gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
132+
%out.gep = getelementptr float addrspace(1)* %out, i32 %tid
133+
134+
%a = load float addrspace(1)* %gep.0, align 4
135+
%b = load float addrspace(1)* %gep.1, align 4
136+
137+
%a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
138+
139+
%mul = fmul float %a.fabs, 10.0
140+
%madmk = fadd float %mul, %b
141+
store float %madmk, float addrspace(1)* %out.gep, align 4
142+
ret void
143+
}
144+
145+
; GCN-LABEL: {{^}}no_madmk_src2_modifier_f32:
146+
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
147+
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
148+
; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, |{{[sv][0-9]+}}|
149+
define void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
150+
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
151+
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
152+
%gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
153+
%out.gep = getelementptr float addrspace(1)* %out, i32 %tid
154+
155+
%a = load float addrspace(1)* %gep.0, align 4
156+
%b = load float addrspace(1)* %gep.1, align 4
157+
158+
%b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
159+
160+
%mul = fmul float %a, 10.0
161+
%madmk = fadd float %mul, %b.fabs
162+
store float %madmk, float addrspace(1)* %out.gep, align 4
163+
ret void
164+
}
165+
166+
; GCN-LABEL: {{^}}madmk_add_inline_imm_f32:
167+
; GCN: buffer_load_dword [[A:v[0-9]+]]
168+
; GCN: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
169+
; GCN: v_mad_f32 {{v[0-9]+}}, [[VK]], [[A]], 2.0
170+
define void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
171+
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
172+
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
173+
%out.gep = getelementptr float addrspace(1)* %out, i32 %tid
174+
175+
%a = load float addrspace(1)* %gep.0, align 4
176+
177+
%mul = fmul float %a, 10.0
178+
%madmk = fadd float %mul, 2.0
179+
store float %madmk, float addrspace(1)* %out.gep, align 4
180+
ret void
181+
}

llvm/test/CodeGen/R600/uint_to_fp.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ define void @uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32>
5050
; R600: MULADD_IEEE
5151
; SI: v_cvt_f32_u32_e32
5252
; SI: v_cvt_f32_u32_e32
53-
; SI: v_mad_f32
53+
; SI: v_madmk_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, 0x4f800000
5454
; SI: s_endpgm
5555
define void @uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) {
5656
entry:

0 commit comments

Comments
 (0)