diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index ec719754183d5..e24d119b78162 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -5272,6 +5272,22 @@ struct StrictFPUpgradeVisitor : public InstVisitor { Call.addFnAttr(Attribute::NoBuiltin); } }; + +/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata +struct AMDGPUUnsafeFPAtomicsUpgradeVisitor + : public InstVisitor { + AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default; + + void visitAtomicRMWInst(AtomicRMWInst &RMW) { + if (!RMW.isFloatingPointOperation()) + return; + + MDNode *Empty = MDNode::get(RMW.getContext(), {}); + RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty); + RMW.setMetadata("amdgpu.no.remote.memory.access", Empty); + RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty); + } +}; } // namespace void llvm::UpgradeFunctionAttributes(Function &F) { @@ -5294,6 +5310,24 @@ void llvm::UpgradeFunctionAttributes(Function &F) { F.setSection(A.getValueAsString()); F.removeFnAttr("implicit-section-name"); } + + if (!F.empty()) { + // For some reason this is called twice, and the first time is before any + // instructions are loaded into the body. + + if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics"); + A.isValid()) { + + if (A.getValueAsBool()) { + AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor; + Visitor.visit(F); + } + + // We will leave behind dead attribute uses on external declarations, but + // clang never added these to declarations anyway. + F.removeFnAttr("amdgpu-unsafe-fp-atomics"); + } + } } static bool isOldLoopArgument(Metadata *MD) { diff --git a/llvm/test/Bitcode/amdgpu-unsafe-fp-atomics-upgrade.ll b/llvm/test/Bitcode/amdgpu-unsafe-fp-atomics-upgrade.ll new file mode 100644 index 0000000000000..ceac496d1e8dd --- /dev/null +++ b/llvm/test/Bitcode/amdgpu-unsafe-fp-atomics-upgrade.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +; amdgpu-unsafe-fp-atomics attribute should be removed and replaced +; with metadata attached to any atomicrmw with floating-point +; operations. + +; Maybe the attribute should be dropped from declarations, but it +; didn't do anything on one and clang never added it. +declare void @unsafe_fp_atomics_true_decl() "amdgpu-unsafe-fp-atomics"="true" +declare void @unsafe_fp_atomics_false_decl() "amdgpu-unsafe-fp-atomics"="false" + +; Delete the attribute and replace with the most aggressive metadata possible +define void @unsafe_fp_atomics_true(ptr addrspace(1) %ptr, float %val, i32 %ival, <2 x half> %vval) "amdgpu-unsafe-fp-atomics"="true" { +; CHECK-LABEL: define void @unsafe_fp_atomics_true( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]], i32 [[IVAL:%.*]], <2 x half> [[VVAL:%.*]]) { +; CHECK-NEXT: [[RMW_FADD:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0:![0-9]+]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; CHECK-NEXT: [[RMW_FSUB:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; CHECK-NEXT: [[RMW_FMIN:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; CHECK-NEXT: [[RMW_FMAX:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; CHECK-NEXT: [[RMW_XOR:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[IVAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: [[RMW_FADD_VECTOR:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; CHECK-NEXT: [[RMW_FSUB_VECTOR:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; CHECK-NEXT: [[RMW_FMIN_VECTOR:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; CHECK-NEXT: [[RMW_FMAX_VECTOR:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; CHECK-NEXT: [[RMW_XCHG:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: ret void +; + %rmw.fadd = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst + %rmw.fsub = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst + %rmw.fmin = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst + %rmw.fmax = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst + %rmw.xor = atomicrmw xor ptr addrspace(1) %ptr, i32 %ival syncscope("one-as") seq_cst + %rmw.fadd.vector = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst + %rmw.fsub.vector = atomicrmw fsub ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst + %rmw.fmin.vector = atomicrmw fmin ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst + %rmw.fmax.vector = atomicrmw fmax ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst + + ; xchg doesn't need any metadata + %rmw.xchg = atomicrmw xchg ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst + ret void +} + +; Should just delete the effectless attribute if it exists +define void @unsafe_fp_atomics_false(ptr addrspace(1) %ptr, float %val, i32 %ival, <2 x half> %vval) "amdgpu-unsafe-fp-atomics"="false" { +; CHECK-LABEL: define void @unsafe_fp_atomics_false( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]], i32 [[IVAL:%.*]], <2 x half> [[VVAL:%.*]]) { +; CHECK-NEXT: [[RMW_FADD:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: [[RMW_FSUB:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: [[RMW_FMIN:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: [[RMW_FMAX:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: [[RMW_XOR:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[IVAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: [[RMW_FADD_VECTOR:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: [[RMW_FSUB_VECTOR:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: [[RMW_FMIN_VECTOR:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: [[RMW_FMAX_VECTOR:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: [[RMW_XCHG:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: ret void +; + %rmw.fadd = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst + %rmw.fsub = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst + %rmw.fmin = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst + %rmw.fmax = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst + %rmw.xor = atomicrmw xor ptr addrspace(1) %ptr, i32 %ival syncscope("one-as") seq_cst + %rmw.fadd.vector = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst + %rmw.fsub.vector = atomicrmw fsub ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst + %rmw.fmin.vector = atomicrmw fmin ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst + %rmw.fmax.vector = atomicrmw fmax ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst + + ; xchg doesn't need any metadata + %rmw.xchg = atomicrmw xchg ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst + ret void +} + +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { "amdgpu-unsafe-fp-atomics"="true" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { "amdgpu-unsafe-fp-atomics"="false" } +;. +; CHECK: [[META0]] = !{} +;.