IR/AMDGPU: Autoupgrade amdgpu-unsafe-fp-atomics attribute #101698

arsenm · 2024-08-02T15:54:42Z

Delete the attribute and annotate any atomicrmw instructions in the
function with new metadata.

arsenm · 2024-08-02T15:55:11Z

This stack of pull requests is managed by Graphite. Learn more about stacking.

Join @arsenm and the rest of your teammates on Graphite

llvmbot · 2024-08-02T15:55:15Z

@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-llvm-ir
@llvm/pr-subscribers-clang-codegen
@llvm/pr-subscribers-clang

@llvm/pr-subscribers-llvm-globalisel

Author: Matt Arsenault (arsenm)

Changes

Delete the attribute and annotate any atomicrmw instructions in the
function with new metadata.

Full diff: https://github.com/llvm/llvm-project/pull/101698.diff

2 Files Affected:

(modified) llvm/lib/IR/AutoUpgrade.cpp (+34)
(added) llvm/test/Bitcode/amdgpu-unsafe-fp-atomics-upgrade.ll (+80)

diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 0e95c6df670dc..eda5c0f66349f 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -5260,6 +5260,22 @@ struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
     Call.addFnAttr(Attribute::NoBuiltin);
   }
 };
+
+/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
+struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
+    : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
+  AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
+
+  void visitAtomicRMWInst(AtomicRMWInst &RMW) {
+    if (!RMW.isFloatingPointOperation())
+      return;
+
+    MDNode *Empty = MDNode::get(RMW.getContext(), {});
+    RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
+    RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
+    RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
+  }
+};
 } // namespace
 
 void llvm::UpgradeFunctionAttributes(Function &F) {
@@ -5282,6 +5298,24 @@ void llvm::UpgradeFunctionAttributes(Function &F) {
     F.setSection(A.getValueAsString());
     F.removeFnAttr("implicit-section-name");
   }
+
+  if (!F.empty()) {
+    // For some reason this is called twice, and the first time is before any
+    // instructions are loaded into the body.
+
+    if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
+        A.isValid()) {
+
+      if (A.getValueAsBool()) {
+        AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
+        Visitor.visit(F);
+      }
+
+      // We will leave behind dead attribute uses on external declarations, but
+      // clang never added these to declarations anyway.
+      F.removeFnAttr("amdgpu-unsafe-fp-atomics");
+    }
+  }
 }
 
 static bool isOldLoopArgument(Metadata *MD) {
diff --git a/llvm/test/Bitcode/amdgpu-unsafe-fp-atomics-upgrade.ll b/llvm/test/Bitcode/amdgpu-unsafe-fp-atomics-upgrade.ll
new file mode 100644
index 0000000000000..ceac496d1e8dd
--- /dev/null
+++ b/llvm/test/Bitcode/amdgpu-unsafe-fp-atomics-upgrade.ll
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; amdgpu-unsafe-fp-atomics attribute should be removed and replaced
+; with metadata attached to any atomicrmw with floating-point
+; operations.
+
+; Maybe the attribute should be dropped from declarations, but it
+; didn't do anything on one and clang never added it.
+declare void @unsafe_fp_atomics_true_decl() "amdgpu-unsafe-fp-atomics"="true"
+declare void @unsafe_fp_atomics_false_decl() "amdgpu-unsafe-fp-atomics"="false"
+
+; Delete the attribute and replace with the most aggressive metadata possible
+define void @unsafe_fp_atomics_true(ptr addrspace(1) %ptr, float %val, i32 %ival, <2 x half> %vval) "amdgpu-unsafe-fp-atomics"="true" {
+; CHECK-LABEL: define void @unsafe_fp_atomics_true(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]], i32 [[IVAL:%.*]], <2 x half> [[VVAL:%.*]]) {
+; CHECK-NEXT:    [[RMW_FADD:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0:![0-9]+]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; CHECK-NEXT:    [[RMW_FSUB:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; CHECK-NEXT:    [[RMW_FMIN:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; CHECK-NEXT:    [[RMW_FMAX:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; CHECK-NEXT:    [[RMW_XOR:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[IVAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    [[RMW_FADD_VECTOR:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; CHECK-NEXT:    [[RMW_FSUB_VECTOR:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; CHECK-NEXT:    [[RMW_FMIN_VECTOR:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; CHECK-NEXT:    [[RMW_FMAX_VECTOR:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; CHECK-NEXT:    [[RMW_XCHG:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
+  %rmw.fadd = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
+  %rmw.fsub = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
+  %rmw.fmin = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
+  %rmw.fmax = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
+  %rmw.xor = atomicrmw xor ptr addrspace(1) %ptr, i32 %ival syncscope("one-as") seq_cst
+  %rmw.fadd.vector = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
+  %rmw.fsub.vector = atomicrmw fsub ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
+  %rmw.fmin.vector = atomicrmw fmin ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
+  %rmw.fmax.vector = atomicrmw fmax ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
+
+  ; xchg doesn't need any metadata
+  %rmw.xchg = atomicrmw xchg ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
+  ret void
+}
+
+; Should just delete the effectless attribute if it exists
+define void @unsafe_fp_atomics_false(ptr addrspace(1) %ptr, float %val, i32 %ival, <2 x half> %vval) "amdgpu-unsafe-fp-atomics"="false" {
+; CHECK-LABEL: define void @unsafe_fp_atomics_false(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]], i32 [[IVAL:%.*]], <2 x half> [[VVAL:%.*]]) {
+; CHECK-NEXT:    [[RMW_FADD:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    [[RMW_FSUB:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    [[RMW_FMIN:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    [[RMW_FMAX:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    [[RMW_XOR:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[IVAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    [[RMW_FADD_VECTOR:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    [[RMW_FSUB_VECTOR:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    [[RMW_FMIN_VECTOR:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    [[RMW_FMAX_VECTOR:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    [[RMW_XCHG:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
+  %rmw.fadd = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
+  %rmw.fsub = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
+  %rmw.fmin = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
+  %rmw.fmax = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
+  %rmw.xor = atomicrmw xor ptr addrspace(1) %ptr, i32 %ival syncscope("one-as") seq_cst
+  %rmw.fadd.vector = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
+  %rmw.fsub.vector = atomicrmw fsub ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
+  %rmw.fmin.vector = atomicrmw fmin ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
+  %rmw.fmax.vector = atomicrmw fmax ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
+
+  ; xchg doesn't need any metadata
+  %rmw.xchg = atomicrmw xchg ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
+  ret void
+}
+
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { "amdgpu-unsafe-fp-atomics"="true" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { "amdgpu-unsafe-fp-atomics"="false" }
+;.
+; CHECK: [[META0]] = !{}
+;.

Delete the attribute and annotate any atomicrmw instructions in the function with new metadata.

arsenm · 2024-08-12T10:55:12Z

Merge activity

Aug 12, 6:55 AM EDT: @arsenm started a stack merge that includes this pull request via Graphite.
Aug 12, 6:56 AM EDT: @arsenm merged this pull request with Graphite.

arsenm added clang Clang issues not falling into any other category backend:AMDGPU clang:codegen IR generation bugs: mangling, exceptions, etc. llvm:globalisel llvm:ir llvm:transforms labels Aug 2, 2024 — with Graphite App

arsenm mentioned this pull request Aug 2, 2024

AMDGPU: Legalize v2f16 atomicrmw fadd for buffer fat pointers #95929

Merged

arsenm requested review from AlexVlx, jayfoad and Sisyph August 2, 2024 15:55

arsenm mentioned this pull request Aug 2, 2024

AMDGPU: Handle legal v2bf16 atomicrmw fadd for gfx12 #95930

Merged

arsenm requested review from yashssh and yxsamliu August 2, 2024 15:55

This was referenced Aug 2, 2024

AMDGPU: Add a subtarget feature for fine-grained remote memory support #96442

Merged

clang/AMDGPU: Emit atomicrmw for flat/global atomic min/max f64 builtins #96876

Merged

AMDGPU: Add subtarget feature for global atomic fadd denormal support #96443

Merged

arsenm marked this pull request as ready for review August 2, 2024 15:55

arsenm mentioned this pull request Aug 2, 2024

AMDGPU: Remove global/flat atomic fadd intrinics #97051

Merged

arsenm mentioned this pull request Aug 2, 2024

AMDGPU: Stop handling legacy amdgpu-unsafe-fp-atomics attribute #101699

Merged

yxsamliu approved these changes Aug 7, 2024

View reviewed changes

IR/AMDGPU: Autoupgrade amdgpu-unsafe-fp-atomics attribute

5ad5d43

Delete the attribute and annotate any atomicrmw instructions in the function with new metadata.

arsenm force-pushed the users/arsenm/amdgpu-autoupgrade-unsafe-fp-atomics-attribute branch from f19c1c2 to 5ad5d43 Compare August 12, 2024 09:02

arsenm requested review from JDevlieghere, cyndyishida, aaupov, maksfb, rafaelauler, ayermolo, dcci, tbaederr, Endilll, nikic and a team as code owners August 12, 2024 09:02

arsenm changed the base branch from users/arsenm/amdgpu-remove-global-flat-atomic-fadd-intrinsics to main August 12, 2024 09:02

Endilll removed request for a team and Endilll August 12, 2024 09:10

arsenm merged commit 70feafd into main Aug 12, 2024
23 of 26 checks passed

arsenm deleted the users/arsenm/amdgpu-autoupgrade-unsafe-fp-atomics-attribute branch August 12, 2024 10:56

ldionne removed request for a team August 14, 2024 18:53

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

IR/AMDGPU: Autoupgrade amdgpu-unsafe-fp-atomics attribute #101698

IR/AMDGPU: Autoupgrade amdgpu-unsafe-fp-atomics attribute #101698

Uh oh!

arsenm commented Aug 2, 2024

Uh oh!

arsenm commented Aug 2, 2024 •

edited

Loading

Uh oh!

llvmbot commented Aug 2, 2024 •

edited

Loading

Uh oh!

arsenm commented Aug 12, 2024 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

IR/AMDGPU: Autoupgrade amdgpu-unsafe-fp-atomics attribute #101698

IR/AMDGPU: Autoupgrade amdgpu-unsafe-fp-atomics attribute #101698

Uh oh!

Conversation

arsenm commented Aug 2, 2024

Uh oh!

arsenm commented Aug 2, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Aug 2, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

arsenm commented Aug 12, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Merge activity

Uh oh!

Uh oh!

Uh oh!

arsenm commented Aug 2, 2024 •

edited

Loading

llvmbot commented Aug 2, 2024 •

edited

Loading

arsenm commented Aug 12, 2024 •

edited

Loading