From 75abcefcb44cff7d675f93a30ce4800672f181a4 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Wed, 11 Jan 2023 11:20:14 +0000 Subject: [PATCH 01/10] First commit adding some HIP atomics --- clang/lib/Frontend/InitPreprocessor.cpp | 12 ++-- libclc/amdgcn-amdhsa/libspirv/SOURCES | 6 +- .../libspirv/atomic/atomic_add.cl | 40 ++++++++++++ .../libspirv/atomic/atomic_helpers.h | 62 +++++++++++++++++++ .../libspirv/atomic/atomic_load.cl | 41 ++++++++++++ .../libspirv/atomic/atomic_max.cl | 45 ++++++++++++++ .../libspirv/atomic/atomic_min.cl | 40 ++++++++++++ .../libspirv/atomic/atomic_store.cl | 41 ++++++++++++ 8 files changed, 281 insertions(+), 6 deletions(-) create mode 100644 libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h create mode 100644 libclc/amdgcn-amdhsa/libspirv/atomic/atomic_load.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/atomic/atomic_max.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/atomic/atomic_min.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/atomic/atomic_store.cl diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index ef8f2557e6f26..44925c4278ac4 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -588,17 +588,19 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, if (LangOpts.HIP) { Builder.defineMacro("__HIP__"); Builder.defineMacro("__HIPCC__"); - Builder.defineMacro("__HIP_MEMORY_SCOPE_SINGLETHREAD", "1"); - Builder.defineMacro("__HIP_MEMORY_SCOPE_WAVEFRONT", "2"); - Builder.defineMacro("__HIP_MEMORY_SCOPE_WORKGROUP", "3"); - Builder.defineMacro("__HIP_MEMORY_SCOPE_AGENT", "4"); - Builder.defineMacro("__HIP_MEMORY_SCOPE_SYSTEM", "5"); if (LangOpts.CUDAIsDevice) Builder.defineMacro("__HIP_DEVICE_COMPILE__"); if (LangOpts.GPUDefaultStream == LangOptions::GPUDefaultStreamKind::PerThread) Builder.defineMacro("HIP_API_PER_THREAD_DEFAULT_STREAM"); } + if (LangOpts.HIP || (LangOpts.OpenCL && TI.getTriple().isAMDGPU())) { + Builder.defineMacro("__HIP_MEMORY_SCOPE_SINGLETHREAD", "1"); + Builder.defineMacro("__HIP_MEMORY_SCOPE_WAVEFRONT", "2"); + Builder.defineMacro("__HIP_MEMORY_SCOPE_WORKGROUP", "3"); + Builder.defineMacro("__HIP_MEMORY_SCOPE_AGENT", "4"); + Builder.defineMacro("__HIP_MEMORY_SCOPE_SYSTEM", "5"); + } } /// Initialize the predefined C++ language feature test macros defined in diff --git a/libclc/amdgcn-amdhsa/libspirv/SOURCES b/libclc/amdgcn-amdhsa/libspirv/SOURCES index 43a10d58dffe4..b97a979baf499 100644 --- a/libclc/amdgcn-amdhsa/libspirv/SOURCES +++ b/libclc/amdgcn-amdhsa/libspirv/SOURCES @@ -3,7 +3,11 @@ workitem/get_global_offset.ll group/group_ballot.cl group/collectives.cl group/collectives_helpers.ll -atomic/loadstore_helpers.ll +atomic/atomic_add.cl +atomic/atomic_load.cl +atomic/atomic_store.cl +atomic/atomic_max.cl +atomic/atomic_min.cl cl_khr_int64_extended_atomics/minmax_helpers.ll synchronization/barrier.cl math/acos.cl diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl new file mode 100644 index 0000000000000..4549165282561 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl @@ -0,0 +1,40 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, int, i, global, AS1, + __hip_atomic_fetch_add) +AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned int, j, global, AS1, + __hip_atomic_fetch_add) +AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, int, i, local, AS3, + __hip_atomic_fetch_add) +AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned int, j, local, AS3, + __hip_atomic_fetch_add) + +#ifdef cl_khr_int64_base_atomics +AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, long, l, global, AS1, + __hip_atomic_fetch_add) +AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned long, m, global, AS1, + __hip_atomic_fetch_add) +AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, long, l, local, AS3, + __hip_atomic_fetch_add) +AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned long, m, local, AS3, + __hip_atomic_fetch_add) +#endif + +AMDGPU_ATOMIC(_Z21__spirv_AtomicFAddEXT, float, f, global, AS1, + __hip_atomic_fetch_add) +AMDGPU_ATOMIC(_Z21__spirv_AtomicFAddEXT, float, f, local, AS3, + __hip_atomic_fetch_add) + +// TODO implement for fp64 + +#undef AMDGPU_ATOMIC diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h new file mode 100644 index 0000000000000..5b49187ada81c --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h @@ -0,0 +1,62 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#define GET_ATOMIC_SCOPE_AND_ORDER(IN_SCOPE, OUT_SCOPE, IN_SEMANTICS, \ + OUT_ORDER) \ + switch (IN_SCOPE) { \ + case Subgroup: \ + OUT_SCOPE = __HIP_MEMORY_SCOPE_WAVEFRONT; \ + break; \ + case Workgroup: \ + OUT_SCOPE = __HIP_MEMORY_SCOPE_WORKGROUP; \ + break; \ + case Device: \ + OUT_SCOPE = __HIP_MEMORY_SCOPE_AGENT; \ + break; \ + case CrossDevice: \ + OUT_SCOPE = __HIP_MEMORY_SCOPE_SYSTEM; \ + break; \ + default: \ + __builtin_trap(); \ + __builtin_unreachable(); \ + } \ + unsigned order = IN_SEMANTICS & 0x1F; \ + switch (order) { \ + case None: \ + OUT_ORDER = __ATOMIC_RELAXED; \ + break; \ + case Acquire: \ + OUT_ORDER = __ATOMIC_ACQUIRE; \ + break; \ + case Release: \ + OUT_ORDER = __ATOMIC_RELEASE; \ + break; \ + case AcquireRelease: \ + OUT_ORDER = __ATOMIC_ACQ_REL; \ + break; \ + case SequentiallyConsistent: \ + OUT_ORDER = __ATOMIC_SEQ_CST; \ + break; \ + default: \ + __builtin_trap(); \ + __builtin_unreachable(); \ + } + +#define AMDGPU_ATOMIC(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED, BUILTIN) \ + _CLC_DEF TYPE \ + FUNC_NAME##PU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile AS TYPE *p, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + return BUILTIN(p, val, memory_order, atomic_scope); \ + } + diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_load.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_load.cl new file mode 100644 index 0000000000000..5314117a77f98 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_load.cl @@ -0,0 +1,41 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +#define AMDGPU_ATOMIC_LOAD(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ + _CLC_DEF TYPE \ + FUNC_NAME##PU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + const volatile AS TYPE *p, enum Scope scope, \ + enum MemorySemanticsMask semantics) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + TYPE res = __hip_atomic_load(p, memory_order, atomic_scope); \ + return *(TYPE *)&res; \ + } + +AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, int, Ki, global, AS1) +AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, unsigned int, Kj, global, AS1) +AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, int, Ki, local, AS3) +AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, unsigned int, Kj, local, AS3) + +#ifdef cl_khr_int64_base_atomics +AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, long, Kl, global, AS1) +AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, unsigned long, Km, global, AS1) +AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, long, Kl, local, AS3) +AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, unsigned long, Km, local, AS3) +#endif + +AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, float, Kf, global, AS1) +AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, float, Kf, local, AS3) + +// TODO implement for fp64 + +#undef AMDGPU_ATOMIC diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_max.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_max.cl new file mode 100644 index 0000000000000..288067d229b64 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_max.cl @@ -0,0 +1,45 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +AMDGPU_ATOMIC(_Z18__spirv_AtomicSMax, int, i, global, AS1, + __hip_atomic_fetch_max) +AMDGPU_ATOMIC(_Z18__spirv_AtomicUMax, unsigned int, j, global, AS1, + __hip_atomic_fetch_max) +AMDGPU_ATOMIC(_Z18__spirv_AtomicSMax, int, i, local, AS3, + __hip_atomic_fetch_max) +AMDGPU_ATOMIC(_Z18__spirv_AtomicUMax, unsigned int, j, local, AS3, + __hip_atomic_fetch_max) + +#ifdef cl_khr_int64_base_atomics +AMDGPU_ATOMIC(_Z18__spirv_AtomicSMax, long, l, global, AS1, + __hip_atomic_fetch_max) +AMDGPU_ATOMIC(_Z18__spirv_AtomicUMax, unsigned long, m, global, AS1, + __hip_atomic_fetch_max) +AMDGPU_ATOMIC(_Z18__spirv_AtomicSMax, long, l, local, AS3, + __hip_atomic_fetch_max) +AMDGPU_ATOMIC(_Z18__spirv_AtomicUMax, unsigned long, m, local, AS3, + __hip_atomic_fetch_max) +#endif + +/* + + TODO atomic_fetch_max is broken when ptr[0] < 0 and val > 0; + +AMDGPU_ATOMIC(_Z21__spirv_AtomicFMaxEXT, float, f, global, AS1, + __hip_atomic_fetch_max) +AMDGPU_ATOMIC(_Z21__spirv_AtomicFMaxEXT, float, f, local, AS3, + __hip_atomic_fetch_max) +*/ + +// TODO implement for fp64 + +#undef AMDGPU_ATOMIC diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_min.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_min.cl new file mode 100644 index 0000000000000..b57e6e37aada3 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_min.cl @@ -0,0 +1,40 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +AMDGPU_ATOMIC(_Z18__spirv_AtomicSMin, int, i, global, AS1, + __hip_atomic_fetch_min) +AMDGPU_ATOMIC(_Z18__spirv_AtomicUMin, unsigned int, j, global, AS1, + __hip_atomic_fetch_min) +AMDGPU_ATOMIC(_Z18__spirv_AtomicSMin, int, i, local, AS3, + __hip_atomic_fetch_min) +AMDGPU_ATOMIC(_Z18__spirv_AtomicUMin, unsigned int, j, local, AS3, + __hip_atomic_fetch_min) + +#ifdef cl_khr_int64_base_atomics +AMDGPU_ATOMIC(_Z18__spirv_AtomicSMin, long, l, global, AS1, + __hip_atomic_fetch_min) +AMDGPU_ATOMIC(_Z18__spirv_AtomicUMin, unsigned long, m, global, AS1, + __hip_atomic_fetch_min) +AMDGPU_ATOMIC(_Z18__spirv_AtomicSMin, long, l, local, AS3, + __hip_atomic_fetch_min) +AMDGPU_ATOMIC(_Z18__spirv_AtomicUMin, unsigned long, m, local, AS3, + __hip_atomic_fetch_min) +#endif + +AMDGPU_ATOMIC(_Z21__spirv_AtomicFMinEXT, float, f, global, AS1, + __hip_atomic_fetch_min) +AMDGPU_ATOMIC(_Z21__spirv_AtomicFMinEXT, float, f, local, AS3, + __hip_atomic_fetch_min) + +// TODO implement for fp64 + +#undef AMDGPU_ATOMIC diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_store.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_store.cl new file mode 100644 index 0000000000000..aeade2086b34c --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_store.cl @@ -0,0 +1,41 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +#define AMDGPU_ATOMIC_STORE(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ + _CLC_DEF void \ + FUNC_NAME##PU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile AS TYPE *p, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + __hip_atomic_store(p, val, memory_order, atomic_scope); \ + return; \ + } + +AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, int, i, global, AS1) +AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, unsigned int, j, global, AS1) +AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, int, i, local, AS3) +AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, unsigned int, j, local, AS3) + +#ifdef cl_khr_int64_base_atomics +AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, long, l, global, AS1) +AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, unsigned long, m, global, AS1) +AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, long, l, local, AS3) +AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, unsigned long, m, local, AS3) +#endif + +AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, float, f, global, AS1) +AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, float, f, local, AS3) + +// TODO implement for fp64 + +#undef AMDGPU_ATOMIC_STORE From 8953f41637ad7c2c8a613bfc966584c24fb8e1df Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Fri, 13 Jan 2023 11:09:33 +0000 Subject: [PATCH 02/10] Adding initial support for HIP atomics --- clang/lib/Frontend/InitPreprocessor.cpp | 2 +- libclc/amdgcn-amdhsa/libspirv/SOURCES | 3 +- .../libspirv/atomic/atomic_add.cl | 2 - .../libspirv/atomic/atomic_cmpxchg.cl | 38 ++++++++++++++++ .../libspirv/atomic/atomic_load.cl | 2 - .../libspirv/atomic/atomic_max.cl | 45 ------------------- .../libspirv/atomic/atomic_min.cl | 40 ----------------- .../libspirv/atomic/atomic_store.cl | 2 - .../libspirv/atomic/atomic_xchg.cl | 38 ++++++++++++++++ sycl/include/sycl/atomic_ref.hpp | 6 +++ sycl/plugins/hip/pi_hip.cpp | 9 +++- 11 files changed, 91 insertions(+), 96 deletions(-) create mode 100644 libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl delete mode 100644 libclc/amdgcn-amdhsa/libspirv/atomic/atomic_max.cl delete mode 100644 libclc/amdgcn-amdhsa/libspirv/atomic/atomic_min.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xchg.cl diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 44925c4278ac4..1f49f976ddff3 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -1301,7 +1301,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI, const llvm::Triple &DeviceTriple = TI.getTriple(); const llvm::Triple::SubArchType DeviceSubArch = DeviceTriple.getSubArch(); - if (DeviceTriple.isNVPTX() || + if (DeviceTriple.isNVPTX() || DeviceTriple.isAMDGPU() || (DeviceTriple.isSPIR() && DeviceSubArch != llvm::Triple::SPIRSubArch_fpga)) Builder.defineMacro("SYCL_USE_NATIVE_FP_ATOMICS"); diff --git a/libclc/amdgcn-amdhsa/libspirv/SOURCES b/libclc/amdgcn-amdhsa/libspirv/SOURCES index b97a979baf499..33ac359ec2223 100644 --- a/libclc/amdgcn-amdhsa/libspirv/SOURCES +++ b/libclc/amdgcn-amdhsa/libspirv/SOURCES @@ -4,10 +4,9 @@ group/group_ballot.cl group/collectives.cl group/collectives_helpers.ll atomic/atomic_add.cl +atomic/atomic_xchg.cl atomic/atomic_load.cl atomic/atomic_store.cl -atomic/atomic_max.cl -atomic/atomic_min.cl cl_khr_int64_extended_atomics/minmax_helpers.ll synchronization/barrier.cl math/acos.cl diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl index 4549165282561..881eb1f2facf5 100644 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl @@ -19,7 +19,6 @@ AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, int, i, local, AS3, AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned int, j, local, AS3, __hip_atomic_fetch_add) -#ifdef cl_khr_int64_base_atomics AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, long, l, global, AS1, __hip_atomic_fetch_add) AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned long, m, global, AS1, @@ -28,7 +27,6 @@ AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, long, l, local, AS3, __hip_atomic_fetch_add) AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned long, m, local, AS3, __hip_atomic_fetch_add) -#endif AMDGPU_ATOMIC(_Z21__spirv_AtomicFAddEXT, float, f, global, AS1, __hip_atomic_fetch_add) diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl new file mode 100644 index 0000000000000..a23991ae59a58 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl @@ -0,0 +1,38 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM +// CompareExceptions. See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +AMDGPU_ATOMIC(_Z22__spirv_AtomicCompareExchange, int, i, global, AS1, + __atomic_compare_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicCompareExchange, unsigned int, j, global, AS1, + __atomic_compare_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicCompareExchange, int, i, local, AS3, + __atomic_compare_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicCompareExchange, unsigned int, j, local, AS3, + __atomic_compare_exchange) + +AMDGPU_ATOMIC(_Z22__spirv_AtomicCompareExchange, long, l, global, AS1, + __atomic_compare_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicCompareExchange, unsigned long, m, global, AS1, + __atomic_compare_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicCompareExchange, long, l, local, AS3, + __atomic_compare_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicCompareExchange, unsigned long, m, local, AS3, + __atomic_compare_exchange) + +AMDGPU_ATOMIC(_Z22__spirv_AtomicCompareExchange, float, f, global, AS1, + __atomic_compare_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicCompareExchange, float, f, local, AS3, + __atomic_compare_exchange) + +// TODO implement for fp64 + +#undef AMDGPU_ATOMIC diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_load.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_load.cl index 5314117a77f98..6cbb1f9664f59 100644 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_load.cl +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_load.cl @@ -26,12 +26,10 @@ AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, unsigned int, Kj, global, AS1) AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, int, Ki, local, AS3) AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, unsigned int, Kj, local, AS3) -#ifdef cl_khr_int64_base_atomics AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, long, Kl, global, AS1) AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, unsigned long, Km, global, AS1) AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, long, Kl, local, AS3) AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, unsigned long, Km, local, AS3) -#endif AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, float, Kf, global, AS1) AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, float, Kf, local, AS3) diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_max.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_max.cl deleted file mode 100644 index 288067d229b64..0000000000000 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_max.cl +++ /dev/null @@ -1,45 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "atomic_helpers.h" -#include -#include - -AMDGPU_ATOMIC(_Z18__spirv_AtomicSMax, int, i, global, AS1, - __hip_atomic_fetch_max) -AMDGPU_ATOMIC(_Z18__spirv_AtomicUMax, unsigned int, j, global, AS1, - __hip_atomic_fetch_max) -AMDGPU_ATOMIC(_Z18__spirv_AtomicSMax, int, i, local, AS3, - __hip_atomic_fetch_max) -AMDGPU_ATOMIC(_Z18__spirv_AtomicUMax, unsigned int, j, local, AS3, - __hip_atomic_fetch_max) - -#ifdef cl_khr_int64_base_atomics -AMDGPU_ATOMIC(_Z18__spirv_AtomicSMax, long, l, global, AS1, - __hip_atomic_fetch_max) -AMDGPU_ATOMIC(_Z18__spirv_AtomicUMax, unsigned long, m, global, AS1, - __hip_atomic_fetch_max) -AMDGPU_ATOMIC(_Z18__spirv_AtomicSMax, long, l, local, AS3, - __hip_atomic_fetch_max) -AMDGPU_ATOMIC(_Z18__spirv_AtomicUMax, unsigned long, m, local, AS3, - __hip_atomic_fetch_max) -#endif - -/* - - TODO atomic_fetch_max is broken when ptr[0] < 0 and val > 0; - -AMDGPU_ATOMIC(_Z21__spirv_AtomicFMaxEXT, float, f, global, AS1, - __hip_atomic_fetch_max) -AMDGPU_ATOMIC(_Z21__spirv_AtomicFMaxEXT, float, f, local, AS3, - __hip_atomic_fetch_max) -*/ - -// TODO implement for fp64 - -#undef AMDGPU_ATOMIC diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_min.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_min.cl deleted file mode 100644 index b57e6e37aada3..0000000000000 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_min.cl +++ /dev/null @@ -1,40 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "atomic_helpers.h" -#include -#include - -AMDGPU_ATOMIC(_Z18__spirv_AtomicSMin, int, i, global, AS1, - __hip_atomic_fetch_min) -AMDGPU_ATOMIC(_Z18__spirv_AtomicUMin, unsigned int, j, global, AS1, - __hip_atomic_fetch_min) -AMDGPU_ATOMIC(_Z18__spirv_AtomicSMin, int, i, local, AS3, - __hip_atomic_fetch_min) -AMDGPU_ATOMIC(_Z18__spirv_AtomicUMin, unsigned int, j, local, AS3, - __hip_atomic_fetch_min) - -#ifdef cl_khr_int64_base_atomics -AMDGPU_ATOMIC(_Z18__spirv_AtomicSMin, long, l, global, AS1, - __hip_atomic_fetch_min) -AMDGPU_ATOMIC(_Z18__spirv_AtomicUMin, unsigned long, m, global, AS1, - __hip_atomic_fetch_min) -AMDGPU_ATOMIC(_Z18__spirv_AtomicSMin, long, l, local, AS3, - __hip_atomic_fetch_min) -AMDGPU_ATOMIC(_Z18__spirv_AtomicUMin, unsigned long, m, local, AS3, - __hip_atomic_fetch_min) -#endif - -AMDGPU_ATOMIC(_Z21__spirv_AtomicFMinEXT, float, f, global, AS1, - __hip_atomic_fetch_min) -AMDGPU_ATOMIC(_Z21__spirv_AtomicFMinEXT, float, f, local, AS3, - __hip_atomic_fetch_min) - -// TODO implement for fp64 - -#undef AMDGPU_ATOMIC diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_store.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_store.cl index aeade2086b34c..50fd8528ee9b0 100644 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_store.cl +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_store.cl @@ -26,12 +26,10 @@ AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, unsigned int, j, global, AS1) AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, int, i, local, AS3) AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, unsigned int, j, local, AS3) -#ifdef cl_khr_int64_base_atomics AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, long, l, global, AS1) AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, unsigned long, m, global, AS1) AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, long, l, local, AS3) AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, unsigned long, m, local, AS3) -#endif AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, float, f, global, AS1) AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, float, f, local, AS3) diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xchg.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xchg.cl new file mode 100644 index 0000000000000..8cb223416e03e --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xchg.cl @@ -0,0 +1,38 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, int, i, global, AS1, + __hip_atomic_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, unsigned int, j, global, AS1, + __hip_atomic_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, int, i, local, AS3, + __hip_atomic_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, unsigned int, j, local, AS3, + __hip_atomic_exchange) + +AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, long, l, global, AS1, + __hip_atomic_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, unsigned long, m, global, AS1, + __hip_atomic_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, long, l, local, AS3, + __hip_atomic_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, unsigned long, m, local, AS3, + __hip_atomic_exchange) + +AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, float, f, global, AS1, + __hip_atomic_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, float, f, local, AS3, + __hip_atomic_exchange) + +// TODO implement for fp64 + +#undef AMDGPU_ATOMIC diff --git a/sycl/include/sycl/atomic_ref.hpp b/sycl/include/sycl/atomic_ref.hpp index cbae52de9d7c7..dba3290f68c8c 100644 --- a/sycl/include/sycl/atomic_ref.hpp +++ b/sycl/include/sycl/atomic_ref.hpp @@ -126,6 +126,12 @@ class atomic_ref_base { detail::IsValidDefaultOrder::value, "Invalid default memory_order for atomics. Valid defaults are: " "relaxed, acq_rel, seq_cst"); +#ifdef __AMDGPU__ + // FIXME should this query device's memory capabilities at runtime? + static_assert("seq_cst memory order is not supported on AMDGPU" && + DefaultOrder != sycl::memory_order::seq_cst); +#endif + public: using value_type = T; diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 0f31986096d48..b9eba9906c638 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1852,8 +1852,13 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, pi_int32{1}); } - // TODO: Implement. - case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: + case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { + pi_memory_order_capabilities capabilities = + PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL; + return getInfo(param_value_size, param_value, param_value_size_ret, + capabilities); + } // TODO: Investigate if this information is available on HIP. case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: case PI_DEVICE_INFO_DEVICE_ID: From 41af655407ef66a47c678368ff1cf50c2e2137e4 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Fri, 13 Jan 2023 11:59:32 +0000 Subject: [PATCH 03/10] Removing cmpxchg --- .../libspirv/atomic/atomic_cmpxchg.cl | 38 ------------------- 1 file changed, 38 deletions(-) delete mode 100644 libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl deleted file mode 100644 index a23991ae59a58..0000000000000 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl +++ /dev/null @@ -1,38 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM -// CompareExceptions. See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "atomic_helpers.h" -#include -#include - -AMDGPU_ATOMIC(_Z22__spirv_AtomicCompareExchange, int, i, global, AS1, - __atomic_compare_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicCompareExchange, unsigned int, j, global, AS1, - __atomic_compare_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicCompareExchange, int, i, local, AS3, - __atomic_compare_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicCompareExchange, unsigned int, j, local, AS3, - __atomic_compare_exchange) - -AMDGPU_ATOMIC(_Z22__spirv_AtomicCompareExchange, long, l, global, AS1, - __atomic_compare_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicCompareExchange, unsigned long, m, global, AS1, - __atomic_compare_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicCompareExchange, long, l, local, AS3, - __atomic_compare_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicCompareExchange, unsigned long, m, local, AS3, - __atomic_compare_exchange) - -AMDGPU_ATOMIC(_Z22__spirv_AtomicCompareExchange, float, f, global, AS1, - __atomic_compare_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicCompareExchange, float, f, local, AS3, - __atomic_compare_exchange) - -// TODO implement for fp64 - -#undef AMDGPU_ATOMIC From 263c7a76b0df3af57528ff5ab3851c6c2148ae9a Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Fri, 13 Jan 2023 18:58:27 +0000 Subject: [PATCH 04/10] Adding compare exchange as well as generic AS --- libclc/CMakeLists.txt | 8 +- libclc/amdgcn-amdhsa/libspirv/SOURCES | 2 + .../libspirv/atomic/atomic_add.cl | 29 +- .../libspirv/atomic/atomic_cmpxchg.cl | 51 +++ .../libspirv/atomic/atomic_helpers.h | 99 +++--- .../libspirv/atomic/atomic_load.cl | 42 +-- .../libspirv/atomic/atomic_store.cl | 42 +-- .../libspirv/atomic/atomic_sub.cl | 44 +++ .../libspirv/atomic/atomic_xchg.cl | 27 +- .../libspirv/atomic/loadstore_helpers.ll | 296 ------------------ sycl/plugins/hip/pi_hip.cpp | 6 +- 11 files changed, 218 insertions(+), 428 deletions(-) create mode 100644 libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/atomic/atomic_sub.cl delete mode 100644 libclc/amdgcn-amdhsa/libspirv/atomic/loadstore_helpers.ll diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index 052a7abd3808a..f747c72d3e546 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -259,6 +259,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) endif() if( ${ARCH} STREQUAL r600 OR ${ARCH} STREQUAL amdgcn ) + list( APPEND dirs amdgpu ) endif() @@ -336,8 +337,11 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) # Disables NVVM reflection to defer to after linking set( flags "SHELL:-Xclang -target-feature" "SHELL:-Xclang +ptx72" "SHELL:-march=sm_86" "SHELL:-mllvm --nvvm-reflect-enable=false") - else() - set ( flags ) + elseif( ${ARCH} STREQUAL amdgcn ) + # AMDGCN needs generic address space for atomics + set( flags "SHELL:-Xclang -cl-std=CL2.0") + else() + set ( flags ) endif() set( arch_suffix "${t}" ) else() diff --git a/libclc/amdgcn-amdhsa/libspirv/SOURCES b/libclc/amdgcn-amdhsa/libspirv/SOURCES index 33ac359ec2223..cb365952f3480 100644 --- a/libclc/amdgcn-amdhsa/libspirv/SOURCES +++ b/libclc/amdgcn-amdhsa/libspirv/SOURCES @@ -4,8 +4,10 @@ group/group_ballot.cl group/collectives.cl group/collectives_helpers.ll atomic/atomic_add.cl +atomic/atomic_cmpxchg.cl atomic/atomic_xchg.cl atomic/atomic_load.cl +atomic/atomic_sub.cl atomic/atomic_store.cl cl_khr_int64_extended_atomics/minmax_helpers.ll synchronization/barrier.cl diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl index 881eb1f2facf5..d537fcdf0ff94 100644 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl @@ -10,29 +10,14 @@ #include #include -AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, int, i, global, AS1, - __hip_atomic_fetch_add) -AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned int, j, global, AS1, - __hip_atomic_fetch_add) -AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, int, i, local, AS3, - __hip_atomic_fetch_add) -AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned int, j, local, AS3, - __hip_atomic_fetch_add) - -AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, long, l, global, AS1, - __hip_atomic_fetch_add) -AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned long, m, global, AS1, - __hip_atomic_fetch_add) -AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, long, l, local, AS3, - __hip_atomic_fetch_add) -AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned long, m, local, AS3, - __hip_atomic_fetch_add) - -AMDGPU_ATOMIC(_Z21__spirv_AtomicFAddEXT, float, f, global, AS1, - __hip_atomic_fetch_add) -AMDGPU_ATOMIC(_Z21__spirv_AtomicFAddEXT, float, f, local, AS3, - __hip_atomic_fetch_add) +AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, int, i, __hip_atomic_fetch_add) +AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned int, j, __hip_atomic_fetch_add) +AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, long, l, __hip_atomic_fetch_add) +AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned long, m, __hip_atomic_fetch_add) +AMDGPU_ATOMIC(_Z21__spirv_AtomicFAddEXT, float, f, __hip_atomic_fetch_add) // TODO implement for fp64 #undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl new file mode 100644 index 0000000000000..6911f16679457 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl @@ -0,0 +1,51 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +#define AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB1, \ + SUB2) \ + _CLC_DEF TYPE \ + _Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2##_##TYPE_MANGLED##TYPE_MANGLED( \ + volatile AS TYPE *p, enum Scope scope, \ + enum MemorySemanticsMask success_semantics, \ + enum MemorySemanticsMask failure_semantics, TYPE desired, \ + TYPE expected) { \ + int atomic_scope = 0, memory_order_success = 0, memory_order_failure = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, success_semantics, \ + memory_order_success) \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, failure_semantics, \ + memory_order_failure) \ + TYPE original_val = *p; \ + bool success = __hip_atomic_compare_exchange_strong( \ + p, &expected, desired, memory_order_success, memory_order_failure, \ + atomic_scope); \ + \ + return success ? original_val : *p; \ + } + +#define AMDGPU_ATOMIC_CMPXCHG(TYPE, TYPE_MANGLED) \ + AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, global, U3AS1, 1, 5) \ + AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, local, U3AS3, 1, 5) \ + AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, , , 0, 4) + +AMDGPU_ATOMIC_CMPXCHG(int, i) +AMDGPU_ATOMIC_CMPXCHG(unsigned int, j) +AMDGPU_ATOMIC_CMPXCHG(long, l) +AMDGPU_ATOMIC_CMPXCHG(unsigned long, m) +AMDGPU_ATOMIC_CMPXCHG(float, f) + +// TODO implement for fp64 + +#undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef AMDGPU_ATOMIC_CPMXCHG +#undef AMDGPU_ATOMIC_CPMXCHG_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h index 5b49187ada81c..1ccca95c8bd77 100644 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h @@ -11,52 +11,61 @@ #define GET_ATOMIC_SCOPE_AND_ORDER(IN_SCOPE, OUT_SCOPE, IN_SEMANTICS, \ OUT_ORDER) \ - switch (IN_SCOPE) { \ - case Subgroup: \ - OUT_SCOPE = __HIP_MEMORY_SCOPE_WAVEFRONT; \ - break; \ - case Workgroup: \ - OUT_SCOPE = __HIP_MEMORY_SCOPE_WORKGROUP; \ - break; \ - case Device: \ - OUT_SCOPE = __HIP_MEMORY_SCOPE_AGENT; \ - break; \ - case CrossDevice: \ - OUT_SCOPE = __HIP_MEMORY_SCOPE_SYSTEM; \ - break; \ - default: \ - __builtin_trap(); \ - __builtin_unreachable(); \ - } \ - unsigned order = IN_SEMANTICS & 0x1F; \ - switch (order) { \ - case None: \ - OUT_ORDER = __ATOMIC_RELAXED; \ - break; \ - case Acquire: \ - OUT_ORDER = __ATOMIC_ACQUIRE; \ - break; \ - case Release: \ - OUT_ORDER = __ATOMIC_RELEASE; \ - break; \ - case AcquireRelease: \ - OUT_ORDER = __ATOMIC_ACQ_REL; \ - break; \ - case SequentiallyConsistent: \ - OUT_ORDER = __ATOMIC_SEQ_CST; \ - break; \ - default: \ - __builtin_trap(); \ - __builtin_unreachable(); \ + { \ + switch (IN_SCOPE) { \ + case Subgroup: \ + OUT_SCOPE = __HIP_MEMORY_SCOPE_WAVEFRONT; \ + break; \ + case Workgroup: \ + OUT_SCOPE = __HIP_MEMORY_SCOPE_WORKGROUP; \ + break; \ + case Device: \ + OUT_SCOPE = __HIP_MEMORY_SCOPE_AGENT; \ + break; \ + case CrossDevice: \ + OUT_SCOPE = __HIP_MEMORY_SCOPE_SYSTEM; \ + break; \ + default: \ + __builtin_trap(); \ + __builtin_unreachable(); \ + } \ + unsigned order = IN_SEMANTICS & 0x1F; \ + switch (order) { \ + case None: \ + OUT_ORDER = __ATOMIC_RELAXED; \ + break; \ + case Acquire: \ + OUT_ORDER = __ATOMIC_ACQUIRE; \ + break; \ + case Release: \ + OUT_ORDER = __ATOMIC_RELEASE; \ + break; \ + case AcquireRelease: \ + OUT_ORDER = __ATOMIC_ACQ_REL; \ + break; \ + case SequentiallyConsistent: \ + OUT_ORDER = __ATOMIC_SEQ_CST; \ + break; \ + default: \ + __builtin_trap(); \ + __builtin_unreachable(); \ + } \ } -#define AMDGPU_ATOMIC(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED, BUILTIN) \ - _CLC_DEF TYPE \ - FUNC_NAME##PU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - int atomic_scope = 0, memory_order = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ - return BUILTIN(p, val, memory_order, atomic_scope); \ +#define AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED, \ + NOT_GENERIC, BUILTIN) \ + _CLC_DEF TYPE \ + FUNC_NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##NOT_GENERIC##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile AS TYPE *p, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + TYPE ret = BUILTIN(p, val, memory_order, atomic_scope); \ + return *(TYPE *)&ret; \ } +#define AMDGPU_ATOMIC(FUNC_NAME, TYPE, TYPE_MANGLED, BUILTIN) \ + AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, global, U3AS1, 1, BUILTIN) \ + AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, local, U3AS3, 1, BUILTIN) \ + AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, , , 0, BUILTIN) + diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_load.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_load.cl index 6cbb1f9664f59..a96c2b233b386 100644 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_load.cl +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_load.cl @@ -10,30 +10,32 @@ #include #include -#define AMDGPU_ATOMIC_LOAD(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_DEF TYPE \ - FUNC_NAME##PU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - const volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics) { \ - int atomic_scope = 0, memory_order = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ - TYPE res = __hip_atomic_load(p, memory_order, atomic_scope); \ - return *(TYPE *)&res; \ +#define AMDGPU_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ + _CLC_DEF TYPE \ + _Z18__spirv_AtomicLoadP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + const volatile AS TYPE *p, enum Scope scope, \ + enum MemorySemanticsMask semantics) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + TYPE res = __hip_atomic_load(p, memory_order, atomic_scope); \ + return *(TYPE *)&res; \ } -AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, int, Ki, global, AS1) -AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, unsigned int, Kj, global, AS1) -AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, int, Ki, local, AS3) -AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, unsigned int, Kj, local, AS3) +#define AMDGPU_ATOMIC_LOAD(TYPE, TYPE_MANGLED) \ + AMDGPU_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, global, U3AS1) \ + AMDGPU_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, local, U3AS3) \ + AMDGPU_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, , ) -AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, long, Kl, global, AS1) -AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, unsigned long, Km, global, AS1) -AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, long, Kl, local, AS3) -AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, unsigned long, Km, local, AS3) - -AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, float, Kf, global, AS1) -AMDGPU_ATOMIC_LOAD(_Z18__spirv_AtomicLoad, float, Kf, local, AS3) +AMDGPU_ATOMIC_LOAD(int, Ki) +AMDGPU_ATOMIC_LOAD(unsigned int, Kj) +AMDGPU_ATOMIC_LOAD(long, Kl) +AMDGPU_ATOMIC_LOAD(unsigned long, Km) +AMDGPU_ATOMIC_LOAD(float, Kf) // TODO implement for fp64 #undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef AMDGPU_ATOMIC_LOAD +#undef AMDGPU_ATOMIC_LOAD_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_store.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_store.cl index 50fd8528ee9b0..e138910d9b19d 100644 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_store.cl +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_store.cl @@ -10,30 +10,32 @@ #include #include -#define AMDGPU_ATOMIC_STORE(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_DEF void \ - FUNC_NAME##PU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - int atomic_scope = 0, memory_order = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ - __hip_atomic_store(p, val, memory_order, atomic_scope); \ - return; \ +#define AMDGPU_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB1) \ + _CLC_DEF void \ + _Z19__spirv_AtomicStore##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile AS TYPE *p, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + __hip_atomic_store(p, val, memory_order, atomic_scope); \ + return; \ } -AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, int, i, global, AS1) -AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, unsigned int, j, global, AS1) -AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, int, i, local, AS3) -AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, unsigned int, j, local, AS3) +#define AMDGPU_ATOMIC_STORE(TYPE, TYPE_MANGLED) \ + AMDGPU_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, global, U3AS1, 1) \ + AMDGPU_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, local, U3AS3, 1) \ + AMDGPU_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, , , 0) -AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, long, l, global, AS1) -AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, unsigned long, m, global, AS1) -AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, long, l, local, AS3) -AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, unsigned long, m, local, AS3) - -AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, float, f, global, AS1) -AMDGPU_ATOMIC_STORE(_Z19__spirv_AtomicStore, float, f, local, AS3) +AMDGPU_ATOMIC_STORE(int, i) +AMDGPU_ATOMIC_STORE(unsigned int, j) +AMDGPU_ATOMIC_STORE(long, l) +AMDGPU_ATOMIC_STORE(unsigned long, m) +AMDGPU_ATOMIC_STORE(float, f) // TODO implement for fp64 +#undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL #undef AMDGPU_ATOMIC_STORE +#undef AMDGPU_ATOMIC_STORE_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_sub.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_sub.cl new file mode 100644 index 0000000000000..bab2a8582f959 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_sub.cl @@ -0,0 +1,44 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +#define AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED, \ + NOT_GENERIC, BUILTIN) \ + _CLC_DEF TYPE \ + FUNC_NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##NOT_GENERIC##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile AS TYPE *p, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + TYPE ret = BUILTIN(p, val, memory_order); \ + return *(TYPE *)&ret; \ + } + +#define AMDGPU_ATOMIC_SUB(FUNC_NAME, TYPE, TYPE_MANGLED, BUILTIN) \ + AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, global, U3AS1, 1, \ + BUILTIN) \ + AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, local, U3AS3, 1, \ + BUILTIN) \ + AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, , , 0, BUILTIN) + +AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, int, i, __atomic_fetch_sub) +AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, unsigned int, j, __atomic_fetch_sub) +AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, long, l, __atomic_fetch_sub) +AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, unsigned long, m, __atomic_fetch_sub) +AMDGPU_ATOMIC_SUB(_Z21__spirv_AtomicFSubEXT, float, f, __atomic_fetch_sub) + +// TODO implement for fp64 + +#undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef AMDGPU_ATOMIC_SUB +#undef AMDGPU_ATOMIC_SUB_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xchg.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xchg.cl index 8cb223416e03e..131510956d545 100644 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xchg.cl +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xchg.cl @@ -10,29 +10,16 @@ #include #include -AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, int, i, global, AS1, +AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, int, i, __hip_atomic_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, unsigned int, j, __hip_atomic_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, unsigned int, j, global, AS1, - __hip_atomic_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, int, i, local, AS3, - __hip_atomic_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, unsigned int, j, local, AS3, - __hip_atomic_exchange) - -AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, long, l, global, AS1, - __hip_atomic_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, unsigned long, m, global, AS1, - __hip_atomic_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, long, l, local, AS3, - __hip_atomic_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, unsigned long, m, local, AS3, - __hip_atomic_exchange) - -AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, float, f, global, AS1, - __hip_atomic_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, float, f, local, AS3, +AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, long, l, __hip_atomic_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, unsigned long, m, __hip_atomic_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, float, f, __hip_atomic_exchange) // TODO implement for fp64 #undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/loadstore_helpers.ll b/libclc/amdgcn-amdhsa/libspirv/atomic/loadstore_helpers.ll deleted file mode 100644 index 3c27e43c295fd..0000000000000 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/loadstore_helpers.ll +++ /dev/null @@ -1,296 +0,0 @@ -#if __clang_major__ >= 7 -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" -#else -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" -#endif - -declare void @llvm.trap() - -define i32 @__clc__atomic_load_global_4_unordered(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - %0 = load atomic volatile i32, i32 addrspace(1)* %ptr unordered, align 4 - ret i32 %0 -} - -define i32 @__clc__atomic_load_local_4_unordered(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - %0 = load atomic volatile i32, i32 addrspace(3)* %ptr unordered, align 4 - ret i32 %0 -} - -define i64 @__clc__atomic_load_global_8_unordered(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - %0 = load atomic volatile i64, i64 addrspace(1)* %ptr unordered, align 8 - ret i64 %0 -} - -define i64 @__clc__atomic_load_local_8_unordered(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - %0 = load atomic volatile i64, i64 addrspace(3)* %ptr unordered, align 8 - ret i64 %0 -} - -define i32 @__clc__atomic_uload_global_4_unordered(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - %0 = load atomic volatile i32, i32 addrspace(1)* %ptr unordered, align 4 - ret i32 %0 -} - -define i32 @__clc__atomic_uload_local_4_unordered(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - %0 = load atomic volatile i32, i32 addrspace(3)* %ptr unordered, align 4 - ret i32 %0 -} - -define i64 @__clc__atomic_uload_global_8_unordered(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - %0 = load atomic volatile i64, i64 addrspace(1)* %ptr unordered, align 8 - ret i64 %0 -} - -define i64 @__clc__atomic_uload_local_8_unordered(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - %0 = load atomic volatile i64, i64 addrspace(3)* %ptr unordered, align 8 - ret i64 %0 -} - -define i32 @__clc__atomic_load_global_4_acquire(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i32 @__clc__atomic_load_local_4_acquire(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i64 @__clc__atomic_load_global_8_acquire(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i64 @__clc__atomic_load_local_8_acquire(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i32 @__clc__atomic_uload_global_4_acquire(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i32 @__clc__atomic_uload_local_4_acquire(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i64 @__clc__atomic_uload_global_8_acquire(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i64 @__clc__atomic_uload_local_8_acquire(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - - -define i32 @__clc__atomic_load_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i32 @__clc__atomic_load_local_4_seq_cst(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i64 @__clc__atomic_load_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i64 @__clc__atomic_load_local_8_seq_cst(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i32 @__clc__atomic_uload_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i32 @__clc__atomic_uload_local_4_seq_cst(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i64 @__clc__atomic_uload_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i64 @__clc__atomic_uload_local_8_seq_cst(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_store_global_4_unordered(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - store atomic volatile i32 %value, i32 addrspace(1)* %ptr unordered, align 4 - ret void -} - -define void @__clc__atomic_store_local_4_unordered(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - store atomic volatile i32 %value, i32 addrspace(3)* %ptr unordered, align 4 - ret void -} - -define void @__clc__atomic_store_global_8_unordered(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - store atomic volatile i64 %value, i64 addrspace(1)* %ptr unordered, align 8 - ret void -} - -define void @__clc__atomic_store_local_8_unordered(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - store atomic volatile i64 %value, i64 addrspace(3)* %ptr unordered, align 8 - ret void -} - -define void @__clc__atomic_ustore_global_4_unordered(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - store atomic volatile i32 %value, i32 addrspace(1)* %ptr unordered, align 4 - ret void -} - -define void @__clc__atomic_ustore_local_4_unordered(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - store atomic volatile i32 %value, i32 addrspace(3)* %ptr unordered, align 4 - ret void -} - -define void @__clc__atomic_ustore_global_8_unordered(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - store atomic volatile i64 %value, i64 addrspace(1)* %ptr unordered, align 8 - ret void -} - -define void @__clc__atomic_ustore_local_8_unordered(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - store atomic volatile i64 %value, i64 addrspace(3)* %ptr unordered, align 8 - ret void -} - -define void @__clc__atomic_store_global_4_release(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_store_local_4_release(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_store_global_8_release(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_store_local_8_release(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_ustore_global_4_release(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_ustore_local_4_release(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_ustore_global_8_release(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_ustore_local_8_release(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_store_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_store_local_4_seq_cst(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_store_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_store_local_8_seq_cst(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_ustore_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_ustore_local_4_seq_cst(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_ustore_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_ustore_local_8_seq_cst(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index b9eba9906c638..17583c47e148b 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1853,9 +1853,9 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, } case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { - pi_memory_order_capabilities capabilities = - PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | - PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL; + pi_memory_order_capabilities capabilities = PI_MEMORY_ORDER_RELAXED | + PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE; return getInfo(param_value_size, param_value, param_value_size_ret, capabilities); } From 7289005ba164c4f43563cbe633e8b8971c74be28 Mon Sep 17 00:00:00 2001 From: Hugh Delaney <46290137+hdelan@users.noreply.github.com> Date: Wed, 18 Jan 2023 10:35:13 +0000 Subject: [PATCH 05/10] Update sycl/include/sycl/atomic_ref.hpp Co-authored-by: Steffen Larsen --- sycl/include/sycl/atomic_ref.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/include/sycl/atomic_ref.hpp b/sycl/include/sycl/atomic_ref.hpp index dba3290f68c8c..db4051a0849ab 100644 --- a/sycl/include/sycl/atomic_ref.hpp +++ b/sycl/include/sycl/atomic_ref.hpp @@ -128,8 +128,8 @@ class atomic_ref_base { "relaxed, acq_rel, seq_cst"); #ifdef __AMDGPU__ // FIXME should this query device's memory capabilities at runtime? - static_assert("seq_cst memory order is not supported on AMDGPU" && - DefaultOrder != sycl::memory_order::seq_cst); + static_assert(DefaultOrder != sycl::memory_order::seq_cst, + "seq_cst memory order is not supported on AMDGPU"); #endif From ad575d49f1b9b7fcbee076af7d14d98dc4b1bc37 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Wed, 18 Jan 2023 12:30:19 +0000 Subject: [PATCH 06/10] Add AtomicAdd for FP64 --- .../libspirv/atomic/atomic_add.cl | 36 ++++++++++++++++++- .../libspirv/atomic/atomic_helpers.h | 4 +-- .../libspirv/atomic/atomic_sub.cl | 2 -- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl index d537fcdf0ff94..5252afbacf2af 100644 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl @@ -16,8 +16,42 @@ AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, long, l, __hip_atomic_fetch_add) AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned long, m, __hip_atomic_fetch_add) AMDGPU_ATOMIC(_Z21__spirv_AtomicFAddEXT, float, f, __hip_atomic_fetch_add) -// TODO implement for fp64 +#define AMDGPU_ATOMIC_FP64_ADD_IMPL(AS, AS_MANGLED, SUB1, SUB2) \ + _CLC_DEF long \ + _Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##lN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2##_ll( \ + volatile AS long *, enum Scope, enum MemorySemanticsMask, \ + enum MemorySemanticsMask, long desired, long expected); \ + _CLC_DEF long \ + _Z18__spirv_AtomicLoadP##AS_MANGLED##KlN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + const volatile AS long *, enum Scope, enum MemorySemanticsMask); \ + _CLC_DEF double \ + _Z21__spirv_AtomicFAddEXTP##AS_MANGLED##dN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagEd( \ + volatile AS double *p, enum Scope scope, \ + enum MemorySemanticsMask semantics, double val) { \ + int atomic_scope = 0, memory_order = 0; \ + volatile AS long *int_pointer = (volatile AS long *)p; \ + long old_int_val = 0, new_int_val = 0; \ + do { \ + old_int_val = \ + _Z18__spirv_AtomicLoadP##AS_MANGLED##KlN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + int_pointer, scope, semantics); \ + double new_double_val = *(double *)&old_int_val + val; \ + new_int_val = *(long *)&new_double_val; \ + } while ( \ + _Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##lN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2##_ll( \ + int_pointer, scope, semantics, semantics, new_int_val, \ + old_int_val) != old_int_val); \ + \ + return *(double *)&old_int_val; \ + } + +#ifdef cl_khr_int64_base_atomics +AMDGPU_ATOMIC_FP64_ADD_IMPL(global, U3AS1, 1, 5) +AMDGPU_ATOMIC_FP64_ADD_IMPL(local, U3AS3, 1, 5) +AMDGPU_ATOMIC_FP64_ADD_IMPL(, , 0, 4) +#endif #undef AMDGPU_ATOMIC #undef AMDGPU_ATOMIC_IMPL +#undef AMDGPU_ATOMIC_FP64_ADD_IMPL #undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h index 1ccca95c8bd77..6b053a38930fb 100644 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h @@ -53,9 +53,9 @@ } #define AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED, \ - NOT_GENERIC, BUILTIN) \ + SUB1, BUILTIN) \ _CLC_DEF TYPE \ - FUNC_NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##NOT_GENERIC##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + FUNC_NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ volatile AS TYPE *p, enum Scope scope, \ enum MemorySemanticsMask semantics, TYPE val) { \ int atomic_scope = 0, memory_order = 0; \ diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_sub.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_sub.cl index bab2a8582f959..e6581385d7720 100644 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_sub.cl +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_sub.cl @@ -35,8 +35,6 @@ AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, long, l, __atomic_fetch_sub) AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, unsigned long, m, __atomic_fetch_sub) AMDGPU_ATOMIC_SUB(_Z21__spirv_AtomicFSubEXT, float, f, __atomic_fetch_sub) -// TODO implement for fp64 - #undef AMDGPU_ATOMIC #undef AMDGPU_ATOMIC_IMPL #undef AMDGPU_ATOMIC_SUB From 89fb7e66f8e0d3b770ebbe6a58848a95be426fe8 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Wed, 18 Jan 2023 15:02:29 +0000 Subject: [PATCH 07/10] Updating preprocessor tests --- .../opencl-macro-target-specific.cl | 7 +++++++ .../sycl-macro-target-specific.cpp | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 clang/test/Preprocessor/opencl-macro-target-specific.cl diff --git a/clang/test/Preprocessor/opencl-macro-target-specific.cl b/clang/test/Preprocessor/opencl-macro-target-specific.cl new file mode 100644 index 0000000000000..732e5c59e79e9 --- /dev/null +++ b/clang/test/Preprocessor/opencl-macro-target-specific.cl @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 %s -E -dM -triple amdgcn-amdhsa-amdhsa \ +// RUN: | FileCheck --check-prefix=CHECK-AMDGPU %s +// CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_AGENT +// CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_SINGLETHREAD +// CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_SYSTEM +// CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_WAVEFRONT +// CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_WORKGROUP diff --git a/clang/test/Preprocessor/sycl-macro-target-specific.cpp b/clang/test/Preprocessor/sycl-macro-target-specific.cpp index c9f227def1178..b0c95c9b0670b 100644 --- a/clang/test/Preprocessor/sycl-macro-target-specific.cpp +++ b/clang/test/Preprocessor/sycl-macro-target-specific.cpp @@ -8,9 +8,26 @@ // RUN: | FileCheck --check-prefix=CHECK-NVPTX-NEG %s // RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_fpga-unknown-unknown -E -dM \ // RUN: | FileCheck --check-prefix=CHECK-NVPTX-NEG %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amdhsa-amdhsa -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-NVPTX-NEG %s // CHECK-NVPTX: #define __NVPTX__ // CHECK-NVPTX-NEG-NOT: #define __NVPTX__ +// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amdhsa-amdhsa -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-AMDGPU %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-nvcl -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-AMDGPU-NEG %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64-unknown-unknown -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-AMDGPU-NEG %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_gen-unknown-unknown -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-AMDGPU-NEG %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_x86_64-unknown-unknown -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-AMDGPU-NEG %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_fpga-unknown-unknown -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-AMDGPU-NEG %s +// CHECK-AMDGPU: #define __AMDGPU__ +// CHECK-AMDGPU-NEG-NOT: #define __AMDGPU__ + // RUN: %clang_cc1 %s -fsycl-is-device -triple spir64-unknown-unknown -E -dM \ // RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s // RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_gen-unknown-unknown -E -dM \ @@ -21,6 +38,8 @@ // RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS-NEG %s // RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-nvcl -E -dM \ // RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amdhsa-amdhsa -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s // CHECK-SYCL-FP-ATOMICS: #define SYCL_USE_NATIVE_FP_ATOMICS // CHECK-SYCL-FP-ATOMICS-NEG-NOT: #define SYCL_USE_NATIVE_FP_ATOMICS From 4dfac50c6a795e0803169282d517a7db974de9d8 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Thu, 19 Jan 2023 10:05:47 +0000 Subject: [PATCH 08/10] Add comments to test and remove empty line in CMakeLists --- clang/test/Preprocessor/opencl-macro-target-specific.cl | 2 ++ clang/test/Preprocessor/sycl-macro-target-specific.cpp | 2 ++ libclc/CMakeLists.txt | 1 - 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/clang/test/Preprocessor/opencl-macro-target-specific.cl b/clang/test/Preprocessor/opencl-macro-target-specific.cl index 732e5c59e79e9..a37a5740fa8f0 100644 --- a/clang/test/Preprocessor/opencl-macro-target-specific.cl +++ b/clang/test/Preprocessor/opencl-macro-target-specific.cl @@ -1,3 +1,5 @@ +// This test checks for the presence of target specific macros for openCL +// // RUN: %clang_cc1 %s -E -dM -triple amdgcn-amdhsa-amdhsa \ // RUN: | FileCheck --check-prefix=CHECK-AMDGPU %s // CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_AGENT diff --git a/clang/test/Preprocessor/sycl-macro-target-specific.cpp b/clang/test/Preprocessor/sycl-macro-target-specific.cpp index b0c95c9b0670b..001df46104560 100644 --- a/clang/test/Preprocessor/sycl-macro-target-specific.cpp +++ b/clang/test/Preprocessor/sycl-macro-target-specific.cpp @@ -1,3 +1,5 @@ +// This test checks for the presence of target specific macros for SYCL +// // RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-nvcl -E -dM \ // RUN: | FileCheck --check-prefix=CHECK-NVPTX %s // RUN: %clang_cc1 %s -fsycl-is-device -triple spir64-unknown-unknown -E -dM \ diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index f747c72d3e546..ac4b69711dcb2 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -259,7 +259,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) endif() if( ${ARCH} STREQUAL r600 OR ${ARCH} STREQUAL amdgcn ) - list( APPEND dirs amdgpu ) endif() From 85e96b9b43d84a4471ccf6dcdcebbf30eb80346d Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Thu, 19 Jan 2023 11:10:00 +0000 Subject: [PATCH 09/10] Making test cl-std=2.0 --- libclc/test/binding/core/GroupWaitEvents.cl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libclc/test/binding/core/GroupWaitEvents.cl b/libclc/test/binding/core/GroupWaitEvents.cl index 6787c6a28e75e..fd8a179151379 100644 --- a/libclc/test/binding/core/GroupWaitEvents.cl +++ b/libclc/test/binding/core/GroupWaitEvents.cl @@ -9,7 +9,7 @@ // Autogenerated by gen-libclc-test.py -// RUN: %clang -emit-llvm -S -o - %s | FileCheck %s +// RUN: %clang -emit-llvm -cl-std=CL2.0 -S -o - %s | FileCheck %s #include From 33ba67064f6e2d999dc56b5cc35f77dcafc0fad3 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Thu, 26 Jan 2023 13:06:14 +0000 Subject: [PATCH 10/10] Add remaining atomic ops --- libclc/amdgcn-amdhsa/libspirv/SOURCES | 6 ++- .../libspirv/atomic/atomic_and.cl | 20 ++++++++ .../libspirv/atomic/atomic_max.cl | 32 +++++++++++++ .../libspirv/atomic/atomic_min.cl | 32 +++++++++++++ .../libspirv/atomic/atomic_minmax.h | 48 +++++++++++++++++++ .../libspirv/atomic/atomic_or.cl | 20 ++++++++ .../libspirv/atomic/atomic_xor.cl | 20 ++++++++ 7 files changed, 177 insertions(+), 1 deletion(-) create mode 100644 libclc/amdgcn-amdhsa/libspirv/atomic/atomic_and.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/atomic/atomic_max.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/atomic/atomic_min.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/atomic/atomic_minmax.h create mode 100644 libclc/amdgcn-amdhsa/libspirv/atomic/atomic_or.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xor.cl diff --git a/libclc/amdgcn-amdhsa/libspirv/SOURCES b/libclc/amdgcn-amdhsa/libspirv/SOURCES index cb365952f3480..15f27e951ca31 100644 --- a/libclc/amdgcn-amdhsa/libspirv/SOURCES +++ b/libclc/amdgcn-amdhsa/libspirv/SOURCES @@ -3,13 +3,17 @@ workitem/get_global_offset.ll group/group_ballot.cl group/collectives.cl group/collectives_helpers.ll +atomic/atomic_and.cl atomic/atomic_add.cl atomic/atomic_cmpxchg.cl atomic/atomic_xchg.cl atomic/atomic_load.cl +atomic/atomic_or.cl +atomic/atomic_xor.cl +atomic/atomic_min.cl +atomic/atomic_max.cl atomic/atomic_sub.cl atomic/atomic_store.cl -cl_khr_int64_extended_atomics/minmax_helpers.ll synchronization/barrier.cl math/acos.cl math/acosh.cl diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_and.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_and.cl new file mode 100644 index 0000000000000..4966de3405312 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_and.cl @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +AMDGPU_ATOMIC(_Z17__spirv_AtomicAnd, int, i, __hip_atomic_fetch_and) +AMDGPU_ATOMIC(_Z17__spirv_AtomicAnd, unsigned int, j, __hip_atomic_fetch_and) +AMDGPU_ATOMIC(_Z17__spirv_AtomicAnd, long, l, __hip_atomic_fetch_and) +AMDGPU_ATOMIC(_Z17__spirv_AtomicAnd, unsigned long, m, __hip_atomic_fetch_and) + +#undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_max.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_max.cl new file mode 100644 index 0000000000000..1a7e5e3943448 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_max.cl @@ -0,0 +1,32 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include "atomic_minmax.h" +#include +#include + +AMDGPU_ATOMIC(_Z18__spirv_AtomicSMax, int, i, __hip_atomic_fetch_max) +AMDGPU_ATOMIC(_Z18__spirv_AtomicUMax, unsigned int, j, __hip_atomic_fetch_max) +AMDGPU_ATOMIC(_Z18__spirv_AtomicSMax, long, l, __hip_atomic_fetch_max) +AMDGPU_ATOMIC(_Z18__spirv_AtomicUMax, unsigned long, m, __hip_atomic_fetch_max) + +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, global, U3AS1, 1, 5_ii) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, local, U3AS3, 1, 5_ii) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, , , 0, 4_ii) + +#ifdef cl_khr_int64_base_atomics +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, global, U3AS1, 1, 5_ll) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, local, U3AS3, 1, 5_ll) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, , , 0, 4_ll) +#endif + +#undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef AMDGPU_ATOMIC_FP_MINMAX_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_min.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_min.cl new file mode 100644 index 0000000000000..d924874b964b5 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_min.cl @@ -0,0 +1,32 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include "atomic_minmax.h" +#include +#include + +AMDGPU_ATOMIC(_Z18__spirv_AtomicSMin, int, i, __hip_atomic_fetch_min) +AMDGPU_ATOMIC(_Z18__spirv_AtomicUMin, unsigned int, j, __hip_atomic_fetch_min) +AMDGPU_ATOMIC(_Z18__spirv_AtomicSMin, long, l, __hip_atomic_fetch_min) +AMDGPU_ATOMIC(_Z18__spirv_AtomicUMin, unsigned long, m, __hip_atomic_fetch_min) + +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, global, U3AS1, 1, 5_ii) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, local, U3AS3, 1, 5_ii) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, , , 0, 4_ii) + +#ifdef cl_khr_int64_base_atomics +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, global, U3AS1, 1, 5_ll) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, local, U3AS3, 1, 5_ll) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, , , 0, 4_ll) +#endif + +#undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef AMDGPU_ATOMIC_FP_MINMAX_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_minmax.h b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_minmax.h new file mode 100644 index 0000000000000..2e35b240c342e --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_minmax.h @@ -0,0 +1,48 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +#define AMDGPU_ATOMIC_FP_MINMAX_IMPL(OPNAME, OP, TYPE, TYPE_MANGLED, \ + STORAGE_TYPE, STORAGE_TYPE_MANGLED, AS, \ + AS_MANGLED, SUB1, SUB2) \ + _CLC_DEF STORAGE_TYPE \ + _Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##STORAGE_TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2( \ + volatile AS STORAGE_TYPE *, enum Scope, enum MemorySemanticsMask, \ + enum MemorySemanticsMask, STORAGE_TYPE desired, \ + STORAGE_TYPE expected); \ + _CLC_DEF STORAGE_TYPE \ + _Z18__spirv_AtomicLoadP##AS_MANGLED##K##STORAGE_TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + const volatile AS STORAGE_TYPE *, enum Scope, \ + enum MemorySemanticsMask); \ + _CLC_DEF TYPE \ + _Z21__spirv_AtomicF##OPNAME##EXTP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile AS TYPE *p, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + volatile AS STORAGE_TYPE *int_pointer = (volatile AS STORAGE_TYPE *)p; \ + STORAGE_TYPE old_int_val = 0, new_int_val = 0; \ + TYPE old_val = 0; \ + do { \ + old_int_val = \ + _Z18__spirv_AtomicLoadP##AS_MANGLED##K##STORAGE_TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + int_pointer, scope, semantics); \ + old_val = *(TYPE *)&old_int_val; \ + if (old_val OP val) \ + return old_val; \ + new_int_val = *(STORAGE_TYPE *)&val; \ + } while ( \ + _Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##STORAGE_TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2( \ + int_pointer, scope, semantics, semantics, new_int_val, \ + old_int_val) != old_int_val); \ + \ + return old_val; \ + } + diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_or.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_or.cl new file mode 100644 index 0000000000000..48339fed8f617 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_or.cl @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +AMDGPU_ATOMIC(_Z16__spirv_AtomicOr, int, i, __hip_atomic_fetch_or) +AMDGPU_ATOMIC(_Z16__spirv_AtomicOr, unsigned int, j, __hip_atomic_fetch_or) +AMDGPU_ATOMIC(_Z16__spirv_AtomicOr, long, l, __hip_atomic_fetch_or) +AMDGPU_ATOMIC(_Z16__spirv_AtomicOr, unsigned long, m, __hip_atomic_fetch_or) + +#undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xor.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xor.cl new file mode 100644 index 0000000000000..3de8f61842fbb --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xor.cl @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, int, i, __hip_atomic_fetch_xor) +AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, unsigned int, j, __hip_atomic_fetch_xor) +AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, long, l, __hip_atomic_fetch_xor) +AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, unsigned long, m, __hip_atomic_fetch_xor) + +#undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER