Skip to content

Commit 70feafd

Browse files
authored
IR/AMDGPU: Autoupgrade amdgpu-unsafe-fp-atomics attribute (#101698)
Delete the attribute and annotate any atomicrmw instructions in the function with new metadata.
1 parent 8949290 commit 70feafd

File tree

2 files changed

+114
-0
lines changed

2 files changed

+114
-0
lines changed

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5272,6 +5272,22 @@ struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
52725272
Call.addFnAttr(Attribute::NoBuiltin);
52735273
}
52745274
};
5275+
5276+
/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
5277+
struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
5278+
: public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
5279+
AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
5280+
5281+
void visitAtomicRMWInst(AtomicRMWInst &RMW) {
5282+
if (!RMW.isFloatingPointOperation())
5283+
return;
5284+
5285+
MDNode *Empty = MDNode::get(RMW.getContext(), {});
5286+
RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
5287+
RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
5288+
RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
5289+
}
5290+
};
52755291
} // namespace
52765292

52775293
void llvm::UpgradeFunctionAttributes(Function &F) {
@@ -5294,6 +5310,24 @@ void llvm::UpgradeFunctionAttributes(Function &F) {
52945310
F.setSection(A.getValueAsString());
52955311
F.removeFnAttr("implicit-section-name");
52965312
}
5313+
5314+
if (!F.empty()) {
5315+
// For some reason this is called twice, and the first time is before any
5316+
// instructions are loaded into the body.
5317+
5318+
if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
5319+
A.isValid()) {
5320+
5321+
if (A.getValueAsBool()) {
5322+
AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
5323+
Visitor.visit(F);
5324+
}
5325+
5326+
// We will leave behind dead attribute uses on external declarations, but
5327+
// clang never added these to declarations anyway.
5328+
F.removeFnAttr("amdgpu-unsafe-fp-atomics");
5329+
}
5330+
}
52975331
}
52985332

52995333
static bool isOldLoopArgument(Metadata *MD) {
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
2+
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
3+
4+
; amdgpu-unsafe-fp-atomics attribute should be removed and replaced
5+
; with metadata attached to any atomicrmw with floating-point
6+
; operations.
7+
8+
; Maybe the attribute should be dropped from declarations, but it
9+
; didn't do anything on one and clang never added it.
10+
declare void @unsafe_fp_atomics_true_decl() "amdgpu-unsafe-fp-atomics"="true"
11+
declare void @unsafe_fp_atomics_false_decl() "amdgpu-unsafe-fp-atomics"="false"
12+
13+
; Delete the attribute and replace with the most aggressive metadata possible
14+
define void @unsafe_fp_atomics_true(ptr addrspace(1) %ptr, float %val, i32 %ival, <2 x half> %vval) "amdgpu-unsafe-fp-atomics"="true" {
15+
; CHECK-LABEL: define void @unsafe_fp_atomics_true(
16+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]], i32 [[IVAL:%.*]], <2 x half> [[VVAL:%.*]]) {
17+
; CHECK-NEXT: [[RMW_FADD:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0:![0-9]+]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
18+
; CHECK-NEXT: [[RMW_FSUB:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
19+
; CHECK-NEXT: [[RMW_FMIN:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
20+
; CHECK-NEXT: [[RMW_FMAX:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
21+
; CHECK-NEXT: [[RMW_XOR:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[IVAL]] syncscope("one-as") seq_cst, align 4
22+
; CHECK-NEXT: [[RMW_FADD_VECTOR:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
23+
; CHECK-NEXT: [[RMW_FSUB_VECTOR:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
24+
; CHECK-NEXT: [[RMW_FMIN_VECTOR:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
25+
; CHECK-NEXT: [[RMW_FMAX_VECTOR:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
26+
; CHECK-NEXT: [[RMW_XCHG:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4
27+
; CHECK-NEXT: ret void
28+
;
29+
%rmw.fadd = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
30+
%rmw.fsub = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
31+
%rmw.fmin = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
32+
%rmw.fmax = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
33+
%rmw.xor = atomicrmw xor ptr addrspace(1) %ptr, i32 %ival syncscope("one-as") seq_cst
34+
%rmw.fadd.vector = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
35+
%rmw.fsub.vector = atomicrmw fsub ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
36+
%rmw.fmin.vector = atomicrmw fmin ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
37+
%rmw.fmax.vector = atomicrmw fmax ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
38+
39+
; xchg doesn't need any metadata
40+
%rmw.xchg = atomicrmw xchg ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
41+
ret void
42+
}
43+
44+
; Should just delete the effectless attribute if it exists
45+
define void @unsafe_fp_atomics_false(ptr addrspace(1) %ptr, float %val, i32 %ival, <2 x half> %vval) "amdgpu-unsafe-fp-atomics"="false" {
46+
; CHECK-LABEL: define void @unsafe_fp_atomics_false(
47+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]], i32 [[IVAL:%.*]], <2 x half> [[VVAL:%.*]]) {
48+
; CHECK-NEXT: [[RMW_FADD:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4
49+
; CHECK-NEXT: [[RMW_FSUB:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4
50+
; CHECK-NEXT: [[RMW_FMIN:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4
51+
; CHECK-NEXT: [[RMW_FMAX:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4
52+
; CHECK-NEXT: [[RMW_XOR:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[IVAL]] syncscope("one-as") seq_cst, align 4
53+
; CHECK-NEXT: [[RMW_FADD_VECTOR:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4
54+
; CHECK-NEXT: [[RMW_FSUB_VECTOR:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4
55+
; CHECK-NEXT: [[RMW_FMIN_VECTOR:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4
56+
; CHECK-NEXT: [[RMW_FMAX_VECTOR:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4
57+
; CHECK-NEXT: [[RMW_XCHG:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4
58+
; CHECK-NEXT: ret void
59+
;
60+
%rmw.fadd = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
61+
%rmw.fsub = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
62+
%rmw.fmin = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
63+
%rmw.fmax = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
64+
%rmw.xor = atomicrmw xor ptr addrspace(1) %ptr, i32 %ival syncscope("one-as") seq_cst
65+
%rmw.fadd.vector = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
66+
%rmw.fsub.vector = atomicrmw fsub ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
67+
%rmw.fmin.vector = atomicrmw fmin ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
68+
%rmw.fmax.vector = atomicrmw fmax ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
69+
70+
; xchg doesn't need any metadata
71+
%rmw.xchg = atomicrmw xchg ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
72+
ret void
73+
}
74+
75+
;.
76+
; CHECK: attributes #[[ATTR0:[0-9]+]] = { "amdgpu-unsafe-fp-atomics"="true" }
77+
; CHECK: attributes #[[ATTR1:[0-9]+]] = { "amdgpu-unsafe-fp-atomics"="false" }
78+
;.
79+
; CHECK: [[META0]] = !{}
80+
;.

0 commit comments

Comments
 (0)