Skip to content

IR/AMDGPU: Autoupgrade amdgpu-unsafe-fp-atomics attribute #101698

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented Aug 2, 2024

Delete the attribute and annotate any atomicrmw instructions in the
function with new metadata.

@arsenm arsenm added clang Clang issues not falling into any other category backend:AMDGPU clang:codegen IR generation bugs: mangling, exceptions, etc. llvm:globalisel llvm:ir llvm:transforms labels Aug 2, 2024 — with Graphite App
@arsenm arsenm requested review from AlexVlx, jayfoad and Sisyph August 2, 2024 15:55
@arsenm arsenm requested review from yashssh and yxsamliu August 2, 2024 15:55
@arsenm arsenm marked this pull request as ready for review August 2, 2024 15:55
Copy link
Contributor Author

arsenm commented Aug 2, 2024

This stack of pull requests is managed by Graphite. Learn more about stacking.

Join @arsenm and the rest of your teammates on Graphite Graphite

@llvmbot
Copy link
Member

llvmbot commented Aug 2, 2024

@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-llvm-ir
@llvm/pr-subscribers-clang-codegen
@llvm/pr-subscribers-clang

@llvm/pr-subscribers-llvm-globalisel

Author: Matt Arsenault (arsenm)

Changes

Delete the attribute and annotate any atomicrmw instructions in the
function with new metadata.


Full diff: https://github.com/llvm/llvm-project/pull/101698.diff

2 Files Affected:

  • (modified) llvm/lib/IR/AutoUpgrade.cpp (+34)
  • (added) llvm/test/Bitcode/amdgpu-unsafe-fp-atomics-upgrade.ll (+80)
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 0e95c6df670dc..eda5c0f66349f 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -5260,6 +5260,22 @@ struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
     Call.addFnAttr(Attribute::NoBuiltin);
   }
 };
+
+/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
+struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
+    : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
+  AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
+
+  void visitAtomicRMWInst(AtomicRMWInst &RMW) {
+    if (!RMW.isFloatingPointOperation())
+      return;
+
+    MDNode *Empty = MDNode::get(RMW.getContext(), {});
+    RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
+    RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
+    RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
+  }
+};
 } // namespace
 
 void llvm::UpgradeFunctionAttributes(Function &F) {
@@ -5282,6 +5298,24 @@ void llvm::UpgradeFunctionAttributes(Function &F) {
     F.setSection(A.getValueAsString());
     F.removeFnAttr("implicit-section-name");
   }
+
+  if (!F.empty()) {
+    // For some reason this is called twice, and the first time is before any
+    // instructions are loaded into the body.
+
+    if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
+        A.isValid()) {
+
+      if (A.getValueAsBool()) {
+        AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
+        Visitor.visit(F);
+      }
+
+      // We will leave behind dead attribute uses on external declarations, but
+      // clang never added these to declarations anyway.
+      F.removeFnAttr("amdgpu-unsafe-fp-atomics");
+    }
+  }
 }
 
 static bool isOldLoopArgument(Metadata *MD) {
diff --git a/llvm/test/Bitcode/amdgpu-unsafe-fp-atomics-upgrade.ll b/llvm/test/Bitcode/amdgpu-unsafe-fp-atomics-upgrade.ll
new file mode 100644
index 0000000000000..ceac496d1e8dd
--- /dev/null
+++ b/llvm/test/Bitcode/amdgpu-unsafe-fp-atomics-upgrade.ll
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; amdgpu-unsafe-fp-atomics attribute should be removed and replaced
+; with metadata attached to any atomicrmw with floating-point
+; operations.
+
+; Maybe the attribute should be dropped from declarations, but it
+; didn't do anything on one and clang never added it.
+declare void @unsafe_fp_atomics_true_decl() "amdgpu-unsafe-fp-atomics"="true"
+declare void @unsafe_fp_atomics_false_decl() "amdgpu-unsafe-fp-atomics"="false"
+
+; Delete the attribute and replace with the most aggressive metadata possible
+define void @unsafe_fp_atomics_true(ptr addrspace(1) %ptr, float %val, i32 %ival, <2 x half> %vval) "amdgpu-unsafe-fp-atomics"="true" {
+; CHECK-LABEL: define void @unsafe_fp_atomics_true(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]], i32 [[IVAL:%.*]], <2 x half> [[VVAL:%.*]]) {
+; CHECK-NEXT:    [[RMW_FADD:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0:![0-9]+]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; CHECK-NEXT:    [[RMW_FSUB:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; CHECK-NEXT:    [[RMW_FMIN:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; CHECK-NEXT:    [[RMW_FMAX:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; CHECK-NEXT:    [[RMW_XOR:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[IVAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    [[RMW_FADD_VECTOR:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; CHECK-NEXT:    [[RMW_FSUB_VECTOR:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; CHECK-NEXT:    [[RMW_FMIN_VECTOR:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; CHECK-NEXT:    [[RMW_FMAX_VECTOR:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; CHECK-NEXT:    [[RMW_XCHG:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
+  %rmw.fadd = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
+  %rmw.fsub = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
+  %rmw.fmin = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
+  %rmw.fmax = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
+  %rmw.xor = atomicrmw xor ptr addrspace(1) %ptr, i32 %ival syncscope("one-as") seq_cst
+  %rmw.fadd.vector = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
+  %rmw.fsub.vector = atomicrmw fsub ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
+  %rmw.fmin.vector = atomicrmw fmin ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
+  %rmw.fmax.vector = atomicrmw fmax ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
+
+  ; xchg doesn't need any metadata
+  %rmw.xchg = atomicrmw xchg ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
+  ret void
+}
+
+; Should just delete the effectless attribute if it exists
+define void @unsafe_fp_atomics_false(ptr addrspace(1) %ptr, float %val, i32 %ival, <2 x half> %vval) "amdgpu-unsafe-fp-atomics"="false" {
+; CHECK-LABEL: define void @unsafe_fp_atomics_false(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]], i32 [[IVAL:%.*]], <2 x half> [[VVAL:%.*]]) {
+; CHECK-NEXT:    [[RMW_FADD:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    [[RMW_FSUB:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    [[RMW_FMIN:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    [[RMW_FMAX:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    [[RMW_XOR:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[IVAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    [[RMW_FADD_VECTOR:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    [[RMW_FSUB_VECTOR:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    [[RMW_FMIN_VECTOR:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    [[RMW_FMAX_VECTOR:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    [[RMW_XCHG:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
+  %rmw.fadd = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
+  %rmw.fsub = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
+  %rmw.fmin = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
+  %rmw.fmax = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
+  %rmw.xor = atomicrmw xor ptr addrspace(1) %ptr, i32 %ival syncscope("one-as") seq_cst
+  %rmw.fadd.vector = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
+  %rmw.fsub.vector = atomicrmw fsub ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
+  %rmw.fmin.vector = atomicrmw fmin ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
+  %rmw.fmax.vector = atomicrmw fmax ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst
+
+  ; xchg doesn't need any metadata
+  %rmw.xchg = atomicrmw xchg ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst
+  ret void
+}
+
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { "amdgpu-unsafe-fp-atomics"="true" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { "amdgpu-unsafe-fp-atomics"="false" }
+;.
+; CHECK: [[META0]] = !{}
+;.

Delete the attribute and annotate any atomicrmw instructions in the
function with new metadata.
@arsenm arsenm force-pushed the users/arsenm/amdgpu-autoupgrade-unsafe-fp-atomics-attribute branch from f19c1c2 to 5ad5d43 Compare August 12, 2024 09:02
@arsenm arsenm changed the base branch from users/arsenm/amdgpu-remove-global-flat-atomic-fadd-intrinsics to main August 12, 2024 09:02
@Endilll Endilll removed request for a team and Endilll August 12, 2024 09:10
Copy link
Contributor Author

arsenm commented Aug 12, 2024

Merge activity

  • Aug 12, 6:55 AM EDT: @arsenm started a stack merge that includes this pull request via Graphite.
  • Aug 12, 6:56 AM EDT: @arsenm merged this pull request with Graphite.

@arsenm arsenm merged commit 70feafd into main Aug 12, 2024
23 of 26 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu-autoupgrade-unsafe-fp-atomics-attribute branch August 12, 2024 10:56
@ldionne ldionne removed request for a team August 14, 2024 18:53
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU clang:codegen IR generation bugs: mangling, exceptions, etc. clang Clang issues not falling into any other category llvm:globalisel llvm:ir llvm:transforms
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants