Skip to content

opal: enable load-linked, store-conditional atomics for AArch64 #8412

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion opal/class/opal_fifo.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
* reseved.
* Copyright (c) 2021 Triad National Security, LLC. All rights reserved.
* Copyright (c) 2021 Google, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -76,7 +78,7 @@ static inline bool opal_fifo_is_empty( opal_fifo_t* fifo )
return opal_fifo_head (fifo) == &fifo->opal_fifo_ghost;
}

#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && !OPAL_HAVE_ATOMIC_LLSC_PTR

/* Add one element to the FIFO. We will return the last head of the list
* to allow the upper level to detect if this element is the first one in the
Expand Down
8 changes: 5 additions & 3 deletions opal/class/opal_lifo.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
* reseved.
* Copyright (c) 2016-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2021 Triad National Security, LLC. All rights reserved.
* Copyright (c) 2021 Google, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -50,7 +52,7 @@ union opal_counted_pointer_t {
/** list item pointer */
volatile opal_atomic_intptr_t item;
} data;
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && HAVE_OPAL_INT128_T
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && HAVE_OPAL_INT128_T && !OPAL_HAVE_ATOMIC_LLSC_PTR
/** used for atomics when there is a cmpset that can operate on
* two 64-bit values */
opal_atomic_int128_t atomic_value;
Expand All @@ -60,7 +62,7 @@ union opal_counted_pointer_t {
typedef union opal_counted_pointer_t opal_counted_pointer_t;


#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && !OPAL_HAVE_ATOMIC_LLSC_PTR

/* Add one element to the FIFO. We will return the last head of the list
* to allow the upper level to detect if this element is the first one in the
Expand Down Expand Up @@ -136,7 +138,7 @@ static inline bool opal_lifo_is_empty( opal_lifo_t* lifo )
}


#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && !OPAL_HAVE_ATOMIC_LLSC_PTR

/* Add one element to the LIFO. We will return the last head of the list
* to allow the upper level to detect if this element is the first one in the
Expand Down
3 changes: 3 additions & 0 deletions opal/include/opal/sys/arm64/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2021 Triad National Security, LLC. All rights reserved.
# Copyright (c) 2021 Google, LLC. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
Expand All @@ -20,5 +22,6 @@

headers += \
opal/sys/arm64/atomic.h \
opal/sys/arm64/atomic_llsc.h \
opal/sys/arm64/timer.h

58 changes: 4 additions & 54 deletions opal/include/opal/sys/arm64/atomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,29 @@
* Copyright (c) 2010 ARM ltd. All rights reserved.
* Copyright (c) 2016-2018 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2021 Triad National Security, LLC. All rights reserved.
* Copyright (c) 2021 Google, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/

#include "atomic_llsc.h"

#if !defined(OPAL_SYS_ARCH_ATOMIC_H)

#define OPAL_SYS_ARCH_ATOMIC_H 1

#if OPAL_GCC_INLINE_ASSEMBLY

#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
#define OPAL_HAVE_ATOMIC_LLSC_32 1
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
#define OPAL_HAVE_ATOMIC_SWAP_32 1
#define OPAL_HAVE_ATOMIC_MATH_32 1
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
#define OPAL_HAVE_ATOMIC_SWAP_64 1
#define OPAL_HAVE_ATOMIC_LLSC_64 1
#define OPAL_HAVE_ATOMIC_ADD_32 1
#define OPAL_HAVE_ATOMIC_AND_32 1
#define OPAL_HAVE_ATOMIC_OR_32 1
Expand Down Expand Up @@ -162,32 +164,6 @@ static inline bool opal_atomic_compare_exchange_strong_rel_32 (opal_atomic_int32
return ret;
}

#define opal_atomic_ll_32(addr, ret) \
do { \
opal_atomic_int32_t *_addr = (addr); \
int32_t _ret; \
\
__asm__ __volatile__ ("ldaxr %w0, [%1] \n" \
: "=&r" (_ret) \
: "r" (_addr)); \
\
ret = (typeof(ret)) _ret; \
} while (0)

#define opal_atomic_sc_32(addr, newval, ret) \
do { \
opal_atomic_int32_t *_addr = (addr); \
int32_t _newval = (int32_t) newval; \
int _ret; \
\
__asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n" \
: "=&r" (_ret) \
: "r" (_addr), "r" (_newval) \
: "cc", "memory"); \
\
ret = (_ret == 0); \
} while (0)

static inline bool opal_atomic_compare_exchange_strong_64 (opal_atomic_int64_t *addr, int64_t *oldval, int64_t newval)
{
int64_t prev;
Expand Down Expand Up @@ -272,32 +248,6 @@ static inline bool opal_atomic_compare_exchange_strong_rel_64 (opal_atomic_int64
return ret;
}

#define opal_atomic_ll_64(addr, ret) \
do { \
opal_atomic_int64_t *_addr = (addr); \
int64_t _ret; \
\
__asm__ __volatile__ ("ldaxr %0, [%1] \n" \
: "=&r" (_ret) \
: "r" (_addr)); \
\
ret = (typeof(ret)) _ret; \
} while (0)

#define opal_atomic_sc_64(addr, newval, ret) \
do { \
opal_atomic_int64_t *_addr = (addr); \
int64_t _newval = (int64_t) newval; \
int _ret; \
\
__asm__ __volatile__ ("stlxr %w0, %2, [%1] \n" \
: "=&r" (_ret) \
: "r" (_addr), "r" (_newval) \
: "cc", "memory"); \
\
ret = (_ret == 0); \
} while (0)

#define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \
static inline type opal_atomic_fetch_ ## name ## _ ## bits (opal_atomic_ ## type *addr, type value) \
{ \
Expand Down
92 changes: 92 additions & 0 deletions opal/include/opal/sys/arm64/atomic_llsc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2010 ARM ltd. All rights reserved.
* Copyright (c) 2016-2018 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2021 Triad National Security, LLC. All rights reserved.
* Copyright (c) 2021 Google, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/

#if !defined(OPAL_SYS_ARCH_ATOMIC_LLSC_H)

#define OPAL_SYS_ARCH_ATOMIC_LLSC_H

#if OPAL_C_GCC_INLINE_ASSEMBLY

#undef OPAL_HAVE_ATOMIC_LLSC_32
#undef OPAL_HAVE_ATOMIC_LLSC_64

#define OPAL_HAVE_ATOMIC_LLSC_32 1
#define OPAL_HAVE_ATOMIC_LLSC_64 1

#define opal_atomic_ll_32(addr, ret) \
do { \
opal_atomic_int32_t *_addr = (addr); \
int32_t _ret; \
\
__asm__ __volatile__ ("ldaxr %w0, [%1] \n" \
: "=&r" (_ret) \
: "r" (_addr)); \
\
ret = (typeof(ret)) _ret; \
} while (0)

#define opal_atomic_sc_32(addr, newval, ret) \
do { \
opal_atomic_int32_t *_addr = (addr); \
int32_t _newval = (int32_t) newval; \
int _ret; \
\
__asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n" \
: "=&r" (_ret) \
: "r" (_addr), "r" (_newval) \
: "cc", "memory"); \
\
ret = (_ret == 0); \
} while (0)

#define opal_atomic_ll_64(addr, ret) \
do { \
opal_atomic_int64_t *_addr = (addr); \
int64_t _ret; \
\
__asm__ __volatile__ ("ldaxr %0, [%1] \n" \
: "=&r" (_ret) \
: "r" (_addr)); \
\
ret = (typeof(ret)) _ret; \
} while (0)

#define opal_atomic_sc_64(addr, newval, ret) \
do { \
opal_atomic_int64_t *_addr = (addr); \
int64_t _newval = (int64_t) newval; \
int _ret; \
\
__asm__ __volatile__ ("stlxr %w0, %2, [%1] \n" \
: "=&r" (_ret) \
: "r" (_addr), "r" (_newval) \
: "cc", "memory"); \
\
ret = (_ret == 0); \
} while (0)

#endif /* OPAL_GCC_INLINE_ASSEMBLY */

#endif /* ! OPAL_SYS_ARCH_ATOMIC_LLSC_H */
54 changes: 48 additions & 6 deletions opal/include/opal/sys/atomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,19 +59,19 @@
#include "opal/sys/architecture.h"
#include "opal_stdatomic.h"

#if OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_C11 && !defined(__INTEL_COMPILER)

#include "atomic_stdc.h"

#else /* !OPAL_C_HAVE__ATOMIC */

/* do some quick #define cleanup in cases where we are doing
testing... */
#ifdef OPAL_DISABLE_INLINE_ASM
#undef OPAL_C_GCC_INLINE_ASSEMBLY
#define OPAL_C_GCC_INLINE_ASSEMBLY 0
#endif

#if OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_C11 && !defined(__INTEL_COMPILER)

#include "atomic_stdc.h"

#else /* !OPAL_C_HAVE__ATOMIC */

/* define OPAL_{GCC,DEC,XLC}_INLINE_ASSEMBLY based on the
OPAL_C_{GCC,DEC,XLC}_INLINE_ASSEMBLY defines and whether we
are in C or C++ */
Expand Down Expand Up @@ -642,6 +642,48 @@ static inline intptr_t opal_atomic_fetch_sub_ptr( opal_atomic_intptr_t* addr, vo

#endif /* !OPAL_C_HAVE__ATOMIC */

/****** load-linked, store-conditional atomic implementations ******/

/* C11 atomics do not expose the low-level load-linked, store-conditional
* instructions. Open MPI can use these instructions to implement a more
* efficient version of the lock-free lifo and fifo. On Apple Silicon the
* LL/SC fifo and lifo are ~ 2-20x faster than the CAS128 implementation. */
#if OPAL_ASSEMBLY_ARCH == OPAL_ARM64
#include "opal/sys/arm64/atomic_llsc.h"
#endif

#if !defined(OPAL_HAVE_ATOMIC_LLSC_32)
#define OPAL_HAVE_ATOMIC_LLSC_32 0
#endif

#if !defined(OPAL_HAVE_ATOMIC_LLSC_64)
#define OPAL_HAVE_ATOMIC_LLSC_64 0
#endif

#if (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64)

#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_LLSC_32

#define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_32((opal_atomic_int32_t *) (addr), ret)
#define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_32((opal_atomic_int32_t *) (addr), (intptr_t) (value), ret)

#define OPAL_HAVE_ATOMIC_LLSC_PTR 1

#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64

#define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_64((opal_atomic_int64_t *) (addr), ret)
#define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_64((opal_atomic_int64_t *) (addr), (intptr_t) (value), ret)

#define OPAL_HAVE_ATOMIC_LLSC_PTR 1

#endif

#else

#define OPAL_HAVE_ATOMIC_LLSC_PTR 0

#endif /* (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64)*/

END_C_DECLS

#endif /* OPAL_SYS_ATOMIC_H */
20 changes: 0 additions & 20 deletions opal/include/opal/sys/atomic_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -304,26 +304,6 @@ OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_rel_)

#endif /* (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64) */

#if (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64)

#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_LLSC_32

#define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_32((opal_atomic_int32_t *) (addr), ret)
#define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_32((opal_atomic_int32_t *) (addr), (intptr_t) (value), ret)

#define OPAL_HAVE_ATOMIC_LLSC_PTR 1

#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64

#define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_64((opal_atomic_int64_t *) (addr), ret)
#define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_64((opal_atomic_int64_t *) (addr), (intptr_t) (value), ret)

#define OPAL_HAVE_ATOMIC_LLSC_PTR 1

#endif

#endif /* (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64)*/

#if !defined(OPAL_HAVE_ATOMIC_LLSC_PTR)
#define OPAL_HAVE_ATOMIC_LLSC_PTR 0
#endif
Expand Down
4 changes: 0 additions & 4 deletions opal/include/opal/sys/atomic_stdc.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,6 @@
#define OPAL_HAVE_ATOMIC_XOR_64 1
#define OPAL_HAVE_ATOMIC_SUB_64 1

#define OPAL_HAVE_ATOMIC_LLSC_32 0
#define OPAL_HAVE_ATOMIC_LLSC_64 0
#define OPAL_HAVE_ATOMIC_LLSC_PTR 0

#define OPAL_HAVE_ATOMIC_MIN_32 1
#define OPAL_HAVE_ATOMIC_MAX_32 1

Expand Down
4 changes: 2 additions & 2 deletions test/class/opal_fifo.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,10 @@ static void *thread_test_exhaust (opal_object_t *arg) {

static bool check_fifo_consistency (opal_fifo_t *fifo, int expected_count)
{
volatile opal_list_item_t *volatile item;
opal_list_item_t * item;
int count;

for (count = 0, item = fifo->opal_fifo_head.data.item ; item != &fifo->opal_fifo_ghost ;
for (count = 0, item = (opal_list_item_t *) fifo->opal_fifo_head.data.item ; item != &fifo->opal_fifo_ghost ;
item = opal_list_get_next(item), count++);

return count == expected_count;
Expand Down
2 changes: 1 addition & 1 deletion test/class/opal_lifo.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ static bool check_lifo_consistency (opal_lifo_t *lifo, int expected_count)
opal_list_item_t *item;
int count;

for (count = 0, item = lifo->opal_lifo_head.data.item ; item != &lifo->opal_lifo_ghost ;
for (count = 0, item = (opal_list_item_t *) lifo->opal_lifo_head.data.item ; item != &lifo->opal_lifo_ghost ;
item = opal_list_get_next(item), count++);

return count == expected_count;
Expand Down