diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index ef8f2557e6f26..1f49f976ddff3 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -588,17 +588,19 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, if (LangOpts.HIP) { Builder.defineMacro("__HIP__"); Builder.defineMacro("__HIPCC__"); - Builder.defineMacro("__HIP_MEMORY_SCOPE_SINGLETHREAD", "1"); - Builder.defineMacro("__HIP_MEMORY_SCOPE_WAVEFRONT", "2"); - Builder.defineMacro("__HIP_MEMORY_SCOPE_WORKGROUP", "3"); - Builder.defineMacro("__HIP_MEMORY_SCOPE_AGENT", "4"); - Builder.defineMacro("__HIP_MEMORY_SCOPE_SYSTEM", "5"); if (LangOpts.CUDAIsDevice) Builder.defineMacro("__HIP_DEVICE_COMPILE__"); if (LangOpts.GPUDefaultStream == LangOptions::GPUDefaultStreamKind::PerThread) Builder.defineMacro("HIP_API_PER_THREAD_DEFAULT_STREAM"); } + if (LangOpts.HIP || (LangOpts.OpenCL && TI.getTriple().isAMDGPU())) { + Builder.defineMacro("__HIP_MEMORY_SCOPE_SINGLETHREAD", "1"); + Builder.defineMacro("__HIP_MEMORY_SCOPE_WAVEFRONT", "2"); + Builder.defineMacro("__HIP_MEMORY_SCOPE_WORKGROUP", "3"); + Builder.defineMacro("__HIP_MEMORY_SCOPE_AGENT", "4"); + Builder.defineMacro("__HIP_MEMORY_SCOPE_SYSTEM", "5"); + } } /// Initialize the predefined C++ language feature test macros defined in @@ -1299,7 +1301,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI, const llvm::Triple &DeviceTriple = TI.getTriple(); const llvm::Triple::SubArchType DeviceSubArch = DeviceTriple.getSubArch(); - if (DeviceTriple.isNVPTX() || + if (DeviceTriple.isNVPTX() || DeviceTriple.isAMDGPU() || (DeviceTriple.isSPIR() && DeviceSubArch != llvm::Triple::SPIRSubArch_fpga)) Builder.defineMacro("SYCL_USE_NATIVE_FP_ATOMICS"); diff --git a/clang/test/Preprocessor/opencl-macro-target-specific.cl b/clang/test/Preprocessor/opencl-macro-target-specific.cl new file mode 100644 index 0000000000000..a37a5740fa8f0 --- /dev/null +++ b/clang/test/Preprocessor/opencl-macro-target-specific.cl @@ -0,0 +1,9 @@ +// This test checks for the presence of target specific macros for openCL +// +// RUN: %clang_cc1 %s -E -dM -triple amdgcn-amdhsa-amdhsa \ +// RUN: | FileCheck --check-prefix=CHECK-AMDGPU %s +// CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_AGENT +// CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_SINGLETHREAD +// CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_SYSTEM +// CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_WAVEFRONT +// CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_WORKGROUP diff --git a/clang/test/Preprocessor/sycl-macro-target-specific.cpp b/clang/test/Preprocessor/sycl-macro-target-specific.cpp index c9f227def1178..001df46104560 100644 --- a/clang/test/Preprocessor/sycl-macro-target-specific.cpp +++ b/clang/test/Preprocessor/sycl-macro-target-specific.cpp @@ -1,3 +1,5 @@ +// This test checks for the presence of target specific macros for SYCL +// // RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-nvcl -E -dM \ // RUN: | FileCheck --check-prefix=CHECK-NVPTX %s // RUN: %clang_cc1 %s -fsycl-is-device -triple spir64-unknown-unknown -E -dM \ @@ -8,9 +10,26 @@ // RUN: | FileCheck --check-prefix=CHECK-NVPTX-NEG %s // RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_fpga-unknown-unknown -E -dM \ // RUN: | FileCheck --check-prefix=CHECK-NVPTX-NEG %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amdhsa-amdhsa -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-NVPTX-NEG %s // CHECK-NVPTX: #define __NVPTX__ // CHECK-NVPTX-NEG-NOT: #define __NVPTX__ +// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amdhsa-amdhsa -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-AMDGPU %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-nvcl -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-AMDGPU-NEG %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64-unknown-unknown -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-AMDGPU-NEG %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_gen-unknown-unknown -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-AMDGPU-NEG %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_x86_64-unknown-unknown -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-AMDGPU-NEG %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_fpga-unknown-unknown -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-AMDGPU-NEG %s +// CHECK-AMDGPU: #define __AMDGPU__ +// CHECK-AMDGPU-NEG-NOT: #define __AMDGPU__ + // RUN: %clang_cc1 %s -fsycl-is-device -triple spir64-unknown-unknown -E -dM \ // RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s // RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_gen-unknown-unknown -E -dM \ @@ -21,6 +40,8 @@ // RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS-NEG %s // RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-nvcl -E -dM \ // RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amdhsa-amdhsa -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s // CHECK-SYCL-FP-ATOMICS: #define SYCL_USE_NATIVE_FP_ATOMICS // CHECK-SYCL-FP-ATOMICS-NEG-NOT: #define SYCL_USE_NATIVE_FP_ATOMICS diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index 052a7abd3808a..ac4b69711dcb2 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -336,8 +336,11 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) # Disables NVVM reflection to defer to after linking set( flags "SHELL:-Xclang -target-feature" "SHELL:-Xclang +ptx72" "SHELL:-march=sm_86" "SHELL:-mllvm --nvvm-reflect-enable=false") - else() - set ( flags ) + elseif( ${ARCH} STREQUAL amdgcn ) + # AMDGCN needs generic address space for atomics + set( flags "SHELL:-Xclang -cl-std=CL2.0") + else() + set ( flags ) endif() set( arch_suffix "${t}" ) else() diff --git a/libclc/amdgcn-amdhsa/libspirv/SOURCES b/libclc/amdgcn-amdhsa/libspirv/SOURCES index 43a10d58dffe4..15f27e951ca31 100644 --- a/libclc/amdgcn-amdhsa/libspirv/SOURCES +++ b/libclc/amdgcn-amdhsa/libspirv/SOURCES @@ -3,8 +3,17 @@ workitem/get_global_offset.ll group/group_ballot.cl group/collectives.cl group/collectives_helpers.ll -atomic/loadstore_helpers.ll -cl_khr_int64_extended_atomics/minmax_helpers.ll +atomic/atomic_and.cl +atomic/atomic_add.cl +atomic/atomic_cmpxchg.cl +atomic/atomic_xchg.cl +atomic/atomic_load.cl +atomic/atomic_or.cl +atomic/atomic_xor.cl +atomic/atomic_min.cl +atomic/atomic_max.cl +atomic/atomic_sub.cl +atomic/atomic_store.cl synchronization/barrier.cl math/acos.cl math/acosh.cl diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl new file mode 100644 index 0000000000000..5252afbacf2af --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl @@ -0,0 +1,57 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, int, i, __hip_atomic_fetch_add) +AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned int, j, __hip_atomic_fetch_add) +AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, long, l, __hip_atomic_fetch_add) +AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned long, m, __hip_atomic_fetch_add) +AMDGPU_ATOMIC(_Z21__spirv_AtomicFAddEXT, float, f, __hip_atomic_fetch_add) + +#define AMDGPU_ATOMIC_FP64_ADD_IMPL(AS, AS_MANGLED, SUB1, SUB2) \ + _CLC_DEF long \ + _Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##lN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2##_ll( \ + volatile AS long *, enum Scope, enum MemorySemanticsMask, \ + enum MemorySemanticsMask, long desired, long expected); \ + _CLC_DEF long \ + _Z18__spirv_AtomicLoadP##AS_MANGLED##KlN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + const volatile AS long *, enum Scope, enum MemorySemanticsMask); \ + _CLC_DEF double \ + _Z21__spirv_AtomicFAddEXTP##AS_MANGLED##dN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagEd( \ + volatile AS double *p, enum Scope scope, \ + enum MemorySemanticsMask semantics, double val) { \ + int atomic_scope = 0, memory_order = 0; \ + volatile AS long *int_pointer = (volatile AS long *)p; \ + long old_int_val = 0, new_int_val = 0; \ + do { \ + old_int_val = \ + _Z18__spirv_AtomicLoadP##AS_MANGLED##KlN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + int_pointer, scope, semantics); \ + double new_double_val = *(double *)&old_int_val + val; \ + new_int_val = *(long *)&new_double_val; \ + } while ( \ + _Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##lN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2##_ll( \ + int_pointer, scope, semantics, semantics, new_int_val, \ + old_int_val) != old_int_val); \ + \ + return *(double *)&old_int_val; \ + } + +#ifdef cl_khr_int64_base_atomics +AMDGPU_ATOMIC_FP64_ADD_IMPL(global, U3AS1, 1, 5) +AMDGPU_ATOMIC_FP64_ADD_IMPL(local, U3AS3, 1, 5) +AMDGPU_ATOMIC_FP64_ADD_IMPL(, , 0, 4) +#endif + +#undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef AMDGPU_ATOMIC_FP64_ADD_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_and.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_and.cl new file mode 100644 index 0000000000000..4966de3405312 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_and.cl @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +AMDGPU_ATOMIC(_Z17__spirv_AtomicAnd, int, i, __hip_atomic_fetch_and) +AMDGPU_ATOMIC(_Z17__spirv_AtomicAnd, unsigned int, j, __hip_atomic_fetch_and) +AMDGPU_ATOMIC(_Z17__spirv_AtomicAnd, long, l, __hip_atomic_fetch_and) +AMDGPU_ATOMIC(_Z17__spirv_AtomicAnd, unsigned long, m, __hip_atomic_fetch_and) + +#undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl new file mode 100644 index 0000000000000..6911f16679457 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl @@ -0,0 +1,51 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +#define AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB1, \ + SUB2) \ + _CLC_DEF TYPE \ + _Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2##_##TYPE_MANGLED##TYPE_MANGLED( \ + volatile AS TYPE *p, enum Scope scope, \ + enum MemorySemanticsMask success_semantics, \ + enum MemorySemanticsMask failure_semantics, TYPE desired, \ + TYPE expected) { \ + int atomic_scope = 0, memory_order_success = 0, memory_order_failure = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, success_semantics, \ + memory_order_success) \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, failure_semantics, \ + memory_order_failure) \ + TYPE original_val = *p; \ + bool success = __hip_atomic_compare_exchange_strong( \ + p, &expected, desired, memory_order_success, memory_order_failure, \ + atomic_scope); \ + \ + return success ? original_val : *p; \ + } + +#define AMDGPU_ATOMIC_CMPXCHG(TYPE, TYPE_MANGLED) \ + AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, global, U3AS1, 1, 5) \ + AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, local, U3AS3, 1, 5) \ + AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, , , 0, 4) + +AMDGPU_ATOMIC_CMPXCHG(int, i) +AMDGPU_ATOMIC_CMPXCHG(unsigned int, j) +AMDGPU_ATOMIC_CMPXCHG(long, l) +AMDGPU_ATOMIC_CMPXCHG(unsigned long, m) +AMDGPU_ATOMIC_CMPXCHG(float, f) + +// TODO implement for fp64 + +#undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef AMDGPU_ATOMIC_CPMXCHG +#undef AMDGPU_ATOMIC_CPMXCHG_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h new file mode 100644 index 0000000000000..6b053a38930fb --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h @@ -0,0 +1,71 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#define GET_ATOMIC_SCOPE_AND_ORDER(IN_SCOPE, OUT_SCOPE, IN_SEMANTICS, \ + OUT_ORDER) \ + { \ + switch (IN_SCOPE) { \ + case Subgroup: \ + OUT_SCOPE = __HIP_MEMORY_SCOPE_WAVEFRONT; \ + break; \ + case Workgroup: \ + OUT_SCOPE = __HIP_MEMORY_SCOPE_WORKGROUP; \ + break; \ + case Device: \ + OUT_SCOPE = __HIP_MEMORY_SCOPE_AGENT; \ + break; \ + case CrossDevice: \ + OUT_SCOPE = __HIP_MEMORY_SCOPE_SYSTEM; \ + break; \ + default: \ + __builtin_trap(); \ + __builtin_unreachable(); \ + } \ + unsigned order = IN_SEMANTICS & 0x1F; \ + switch (order) { \ + case None: \ + OUT_ORDER = __ATOMIC_RELAXED; \ + break; \ + case Acquire: \ + OUT_ORDER = __ATOMIC_ACQUIRE; \ + break; \ + case Release: \ + OUT_ORDER = __ATOMIC_RELEASE; \ + break; \ + case AcquireRelease: \ + OUT_ORDER = __ATOMIC_ACQ_REL; \ + break; \ + case SequentiallyConsistent: \ + OUT_ORDER = __ATOMIC_SEQ_CST; \ + break; \ + default: \ + __builtin_trap(); \ + __builtin_unreachable(); \ + } \ + } + +#define AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED, \ + SUB1, BUILTIN) \ + _CLC_DEF TYPE \ + FUNC_NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile AS TYPE *p, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + TYPE ret = BUILTIN(p, val, memory_order, atomic_scope); \ + return *(TYPE *)&ret; \ + } + +#define AMDGPU_ATOMIC(FUNC_NAME, TYPE, TYPE_MANGLED, BUILTIN) \ + AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, global, U3AS1, 1, BUILTIN) \ + AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, local, U3AS3, 1, BUILTIN) \ + AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, , , 0, BUILTIN) + diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_load.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_load.cl new file mode 100644 index 0000000000000..a96c2b233b386 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_load.cl @@ -0,0 +1,41 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +#define AMDGPU_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ + _CLC_DEF TYPE \ + _Z18__spirv_AtomicLoadP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + const volatile AS TYPE *p, enum Scope scope, \ + enum MemorySemanticsMask semantics) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + TYPE res = __hip_atomic_load(p, memory_order, atomic_scope); \ + return *(TYPE *)&res; \ + } + +#define AMDGPU_ATOMIC_LOAD(TYPE, TYPE_MANGLED) \ + AMDGPU_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, global, U3AS1) \ + AMDGPU_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, local, U3AS3) \ + AMDGPU_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, , ) + +AMDGPU_ATOMIC_LOAD(int, Ki) +AMDGPU_ATOMIC_LOAD(unsigned int, Kj) +AMDGPU_ATOMIC_LOAD(long, Kl) +AMDGPU_ATOMIC_LOAD(unsigned long, Km) +AMDGPU_ATOMIC_LOAD(float, Kf) + +// TODO implement for fp64 + +#undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef AMDGPU_ATOMIC_LOAD +#undef AMDGPU_ATOMIC_LOAD_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_max.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_max.cl new file mode 100644 index 0000000000000..1a7e5e3943448 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_max.cl @@ -0,0 +1,32 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include "atomic_minmax.h" +#include +#include + +AMDGPU_ATOMIC(_Z18__spirv_AtomicSMax, int, i, __hip_atomic_fetch_max) +AMDGPU_ATOMIC(_Z18__spirv_AtomicUMax, unsigned int, j, __hip_atomic_fetch_max) +AMDGPU_ATOMIC(_Z18__spirv_AtomicSMax, long, l, __hip_atomic_fetch_max) +AMDGPU_ATOMIC(_Z18__spirv_AtomicUMax, unsigned long, m, __hip_atomic_fetch_max) + +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, global, U3AS1, 1, 5_ii) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, local, U3AS3, 1, 5_ii) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, , , 0, 4_ii) + +#ifdef cl_khr_int64_base_atomics +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, global, U3AS1, 1, 5_ll) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, local, U3AS3, 1, 5_ll) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, , , 0, 4_ll) +#endif + +#undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef AMDGPU_ATOMIC_FP_MINMAX_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_min.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_min.cl new file mode 100644 index 0000000000000..d924874b964b5 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_min.cl @@ -0,0 +1,32 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include "atomic_minmax.h" +#include +#include + +AMDGPU_ATOMIC(_Z18__spirv_AtomicSMin, int, i, __hip_atomic_fetch_min) +AMDGPU_ATOMIC(_Z18__spirv_AtomicUMin, unsigned int, j, __hip_atomic_fetch_min) +AMDGPU_ATOMIC(_Z18__spirv_AtomicSMin, long, l, __hip_atomic_fetch_min) +AMDGPU_ATOMIC(_Z18__spirv_AtomicUMin, unsigned long, m, __hip_atomic_fetch_min) + +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, global, U3AS1, 1, 5_ii) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, local, U3AS3, 1, 5_ii) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, , , 0, 4_ii) + +#ifdef cl_khr_int64_base_atomics +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, global, U3AS1, 1, 5_ll) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, local, U3AS3, 1, 5_ll) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, , , 0, 4_ll) +#endif + +#undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef AMDGPU_ATOMIC_FP_MINMAX_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_minmax.h b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_minmax.h new file mode 100644 index 0000000000000..2e35b240c342e --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_minmax.h @@ -0,0 +1,48 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +#define AMDGPU_ATOMIC_FP_MINMAX_IMPL(OPNAME, OP, TYPE, TYPE_MANGLED, \ + STORAGE_TYPE, STORAGE_TYPE_MANGLED, AS, \ + AS_MANGLED, SUB1, SUB2) \ + _CLC_DEF STORAGE_TYPE \ + _Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##STORAGE_TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2( \ + volatile AS STORAGE_TYPE *, enum Scope, enum MemorySemanticsMask, \ + enum MemorySemanticsMask, STORAGE_TYPE desired, \ + STORAGE_TYPE expected); \ + _CLC_DEF STORAGE_TYPE \ + _Z18__spirv_AtomicLoadP##AS_MANGLED##K##STORAGE_TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + const volatile AS STORAGE_TYPE *, enum Scope, \ + enum MemorySemanticsMask); \ + _CLC_DEF TYPE \ + _Z21__spirv_AtomicF##OPNAME##EXTP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile AS TYPE *p, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + volatile AS STORAGE_TYPE *int_pointer = (volatile AS STORAGE_TYPE *)p; \ + STORAGE_TYPE old_int_val = 0, new_int_val = 0; \ + TYPE old_val = 0; \ + do { \ + old_int_val = \ + _Z18__spirv_AtomicLoadP##AS_MANGLED##K##STORAGE_TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + int_pointer, scope, semantics); \ + old_val = *(TYPE *)&old_int_val; \ + if (old_val OP val) \ + return old_val; \ + new_int_val = *(STORAGE_TYPE *)&val; \ + } while ( \ + _Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##STORAGE_TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2( \ + int_pointer, scope, semantics, semantics, new_int_val, \ + old_int_val) != old_int_val); \ + \ + return old_val; \ + } + diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_or.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_or.cl new file mode 100644 index 0000000000000..48339fed8f617 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_or.cl @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +AMDGPU_ATOMIC(_Z16__spirv_AtomicOr, int, i, __hip_atomic_fetch_or) +AMDGPU_ATOMIC(_Z16__spirv_AtomicOr, unsigned int, j, __hip_atomic_fetch_or) +AMDGPU_ATOMIC(_Z16__spirv_AtomicOr, long, l, __hip_atomic_fetch_or) +AMDGPU_ATOMIC(_Z16__spirv_AtomicOr, unsigned long, m, __hip_atomic_fetch_or) + +#undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_store.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_store.cl new file mode 100644 index 0000000000000..e138910d9b19d --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_store.cl @@ -0,0 +1,41 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +#define AMDGPU_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB1) \ + _CLC_DEF void \ + _Z19__spirv_AtomicStore##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile AS TYPE *p, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + __hip_atomic_store(p, val, memory_order, atomic_scope); \ + return; \ + } + +#define AMDGPU_ATOMIC_STORE(TYPE, TYPE_MANGLED) \ + AMDGPU_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, global, U3AS1, 1) \ + AMDGPU_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, local, U3AS3, 1) \ + AMDGPU_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, , , 0) + +AMDGPU_ATOMIC_STORE(int, i) +AMDGPU_ATOMIC_STORE(unsigned int, j) +AMDGPU_ATOMIC_STORE(long, l) +AMDGPU_ATOMIC_STORE(unsigned long, m) +AMDGPU_ATOMIC_STORE(float, f) + +// TODO implement for fp64 + +#undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef AMDGPU_ATOMIC_STORE +#undef AMDGPU_ATOMIC_STORE_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_sub.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_sub.cl new file mode 100644 index 0000000000000..e6581385d7720 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_sub.cl @@ -0,0 +1,42 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +#define AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED, \ + NOT_GENERIC, BUILTIN) \ + _CLC_DEF TYPE \ + FUNC_NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##NOT_GENERIC##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile AS TYPE *p, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + TYPE ret = BUILTIN(p, val, memory_order); \ + return *(TYPE *)&ret; \ + } + +#define AMDGPU_ATOMIC_SUB(FUNC_NAME, TYPE, TYPE_MANGLED, BUILTIN) \ + AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, global, U3AS1, 1, \ + BUILTIN) \ + AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, local, U3AS3, 1, \ + BUILTIN) \ + AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, , , 0, BUILTIN) + +AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, int, i, __atomic_fetch_sub) +AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, unsigned int, j, __atomic_fetch_sub) +AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, long, l, __atomic_fetch_sub) +AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, unsigned long, m, __atomic_fetch_sub) +AMDGPU_ATOMIC_SUB(_Z21__spirv_AtomicFSubEXT, float, f, __atomic_fetch_sub) + +#undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef AMDGPU_ATOMIC_SUB +#undef AMDGPU_ATOMIC_SUB_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xchg.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xchg.cl new file mode 100644 index 0000000000000..131510956d545 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xchg.cl @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, int, i, __hip_atomic_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, unsigned int, j, + __hip_atomic_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, long, l, __hip_atomic_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, unsigned long, m, + __hip_atomic_exchange) +AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, float, f, __hip_atomic_exchange) + +// TODO implement for fp64 + +#undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xor.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xor.cl new file mode 100644 index 0000000000000..3de8f61842fbb --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xor.cl @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "atomic_helpers.h" +#include +#include + +AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, int, i, __hip_atomic_fetch_xor) +AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, unsigned int, j, __hip_atomic_fetch_xor) +AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, long, l, __hip_atomic_fetch_xor) +AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, unsigned long, m, __hip_atomic_fetch_xor) + +#undef AMDGPU_ATOMIC +#undef AMDGPU_ATOMIC_IMPL +#undef GET_ATOMIC_SCOPE_AND_ORDER diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/loadstore_helpers.ll b/libclc/amdgcn-amdhsa/libspirv/atomic/loadstore_helpers.ll deleted file mode 100644 index 3c27e43c295fd..0000000000000 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/loadstore_helpers.ll +++ /dev/null @@ -1,296 +0,0 @@ -#if __clang_major__ >= 7 -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" -#else -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" -#endif - -declare void @llvm.trap() - -define i32 @__clc__atomic_load_global_4_unordered(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - %0 = load atomic volatile i32, i32 addrspace(1)* %ptr unordered, align 4 - ret i32 %0 -} - -define i32 @__clc__atomic_load_local_4_unordered(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - %0 = load atomic volatile i32, i32 addrspace(3)* %ptr unordered, align 4 - ret i32 %0 -} - -define i64 @__clc__atomic_load_global_8_unordered(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - %0 = load atomic volatile i64, i64 addrspace(1)* %ptr unordered, align 8 - ret i64 %0 -} - -define i64 @__clc__atomic_load_local_8_unordered(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - %0 = load atomic volatile i64, i64 addrspace(3)* %ptr unordered, align 8 - ret i64 %0 -} - -define i32 @__clc__atomic_uload_global_4_unordered(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - %0 = load atomic volatile i32, i32 addrspace(1)* %ptr unordered, align 4 - ret i32 %0 -} - -define i32 @__clc__atomic_uload_local_4_unordered(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - %0 = load atomic volatile i32, i32 addrspace(3)* %ptr unordered, align 4 - ret i32 %0 -} - -define i64 @__clc__atomic_uload_global_8_unordered(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - %0 = load atomic volatile i64, i64 addrspace(1)* %ptr unordered, align 8 - ret i64 %0 -} - -define i64 @__clc__atomic_uload_local_8_unordered(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - %0 = load atomic volatile i64, i64 addrspace(3)* %ptr unordered, align 8 - ret i64 %0 -} - -define i32 @__clc__atomic_load_global_4_acquire(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i32 @__clc__atomic_load_local_4_acquire(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i64 @__clc__atomic_load_global_8_acquire(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i64 @__clc__atomic_load_local_8_acquire(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i32 @__clc__atomic_uload_global_4_acquire(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i32 @__clc__atomic_uload_local_4_acquire(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i64 @__clc__atomic_uload_global_8_acquire(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i64 @__clc__atomic_uload_local_8_acquire(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - - -define i32 @__clc__atomic_load_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i32 @__clc__atomic_load_local_4_seq_cst(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i64 @__clc__atomic_load_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i64 @__clc__atomic_load_local_8_seq_cst(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i32 @__clc__atomic_uload_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i32 @__clc__atomic_uload_local_4_seq_cst(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i64 @__clc__atomic_uload_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define i64 @__clc__atomic_uload_local_8_seq_cst(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_store_global_4_unordered(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - store atomic volatile i32 %value, i32 addrspace(1)* %ptr unordered, align 4 - ret void -} - -define void @__clc__atomic_store_local_4_unordered(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - store atomic volatile i32 %value, i32 addrspace(3)* %ptr unordered, align 4 - ret void -} - -define void @__clc__atomic_store_global_8_unordered(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - store atomic volatile i64 %value, i64 addrspace(1)* %ptr unordered, align 8 - ret void -} - -define void @__clc__atomic_store_local_8_unordered(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - store atomic volatile i64 %value, i64 addrspace(3)* %ptr unordered, align 8 - ret void -} - -define void @__clc__atomic_ustore_global_4_unordered(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - store atomic volatile i32 %value, i32 addrspace(1)* %ptr unordered, align 4 - ret void -} - -define void @__clc__atomic_ustore_local_4_unordered(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - store atomic volatile i32 %value, i32 addrspace(3)* %ptr unordered, align 4 - ret void -} - -define void @__clc__atomic_ustore_global_8_unordered(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - store atomic volatile i64 %value, i64 addrspace(1)* %ptr unordered, align 8 - ret void -} - -define void @__clc__atomic_ustore_local_8_unordered(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - store atomic volatile i64 %value, i64 addrspace(3)* %ptr unordered, align 8 - ret void -} - -define void @__clc__atomic_store_global_4_release(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_store_local_4_release(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_store_global_8_release(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_store_local_8_release(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_ustore_global_4_release(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_ustore_local_4_release(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_ustore_global_8_release(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_ustore_local_8_release(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_store_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_store_local_4_seq_cst(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_store_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_store_local_8_seq_cst(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_ustore_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_ustore_local_4_seq_cst(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_ustore_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} - -define void @__clc__atomic_ustore_local_8_seq_cst(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { -entry: - tail call void @llvm.trap() - unreachable -} diff --git a/libclc/test/binding/core/GroupWaitEvents.cl b/libclc/test/binding/core/GroupWaitEvents.cl index 6787c6a28e75e..fd8a179151379 100644 --- a/libclc/test/binding/core/GroupWaitEvents.cl +++ b/libclc/test/binding/core/GroupWaitEvents.cl @@ -9,7 +9,7 @@ // Autogenerated by gen-libclc-test.py -// RUN: %clang -emit-llvm -S -o - %s | FileCheck %s +// RUN: %clang -emit-llvm -cl-std=CL2.0 -S -o - %s | FileCheck %s #include diff --git a/sycl/include/sycl/atomic_ref.hpp b/sycl/include/sycl/atomic_ref.hpp index cbae52de9d7c7..db4051a0849ab 100644 --- a/sycl/include/sycl/atomic_ref.hpp +++ b/sycl/include/sycl/atomic_ref.hpp @@ -126,6 +126,12 @@ class atomic_ref_base { detail::IsValidDefaultOrder::value, "Invalid default memory_order for atomics. Valid defaults are: " "relaxed, acq_rel, seq_cst"); +#ifdef __AMDGPU__ + // FIXME should this query device's memory capabilities at runtime? + static_assert(DefaultOrder != sycl::memory_order::seq_cst, + "seq_cst memory order is not supported on AMDGPU"); +#endif + public: using value_type = T; diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 0f31986096d48..17583c47e148b 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1852,8 +1852,13 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, pi_int32{1}); } - // TODO: Implement. - case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: + case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { + pi_memory_order_capabilities capabilities = PI_MEMORY_ORDER_RELAXED | + PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE; + return getInfo(param_value_size, param_value, param_value_size_ret, + capabilities); + } // TODO: Investigate if this information is available on HIP. case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: case PI_DEVICE_INFO_DEVICE_ID: