From 84f63d0aca48179f0728e8b010286e58329d53a8 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 8 Nov 2017 09:54:54 -0700 Subject: [PATCH 1/5] opal/asm: add opal_atomic_compare_exchange_strong functions This commit adds a new set of compare-and-exchange functions. These functions have a signature similar to the functions found in C11. The old cmpset functions are now deprecated and defined in terms of the new compare-and-exchange functions. All asm backends have been updated. Signed-off-by: Nathan Hjelm --- opal/class/opal_fifo.h | 2 +- opal/class/opal_lifo.h | 10 +- opal/include/opal/sys/arm/atomic.h | 110 +++----- opal/include/opal/sys/arm64/atomic.h | 88 +++--- opal/include/opal/sys/atomic.h | 177 ++++++++---- opal/include/opal/sys/atomic_impl.h | 285 ++++++++++++-------- opal/include/opal/sys/gcc_builtin/atomic.h | 59 ++-- opal/include/opal/sys/ia32/atomic.h | 12 +- opal/include/opal/sys/powerpc/atomic.h | 150 +++++------ opal/include/opal/sys/sparcv9/atomic.h | 109 ++++---- opal/include/opal/sys/sync_builtin/atomic.h | 59 ++-- opal/include/opal/sys/x86_64/atomic.h | 34 ++- opal/threads/thread_usage.h | 31 ++- test/asm/atomic_cmpset.c | 101 +++++-- 14 files changed, 697 insertions(+), 530 deletions(-) diff --git a/opal/class/opal_fifo.h b/opal/class/opal_fifo.h index 67aa479deb5..a9da88f459a 100644 --- a/opal/class/opal_fifo.h +++ b/opal/class/opal_fifo.h @@ -76,7 +76,7 @@ static inline bool opal_fifo_is_empty( opal_fifo_t* fifo ) return opal_fifo_head (fifo) == &fifo->opal_fifo_ghost; } -#if OPAL_HAVE_ATOMIC_CMPSET_128 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 /* Add one element to the FIFO. We will return the last head of the list * to allow the upper level to detect if this element is the first one in the diff --git a/opal/class/opal_lifo.h b/opal/class/opal_lifo.h index 73caf32cb0c..0d8512fe0e8 100644 --- a/opal/class/opal_lifo.h +++ b/opal/class/opal_lifo.h @@ -36,8 +36,8 @@ BEGIN_C_DECLS /* NTH: temporarily suppress warnings about this not being defined */ -#if !defined(OPAL_HAVE_ATOMIC_CMPSET_128) -#define OPAL_HAVE_ATOMIC_CMPSET_128 0 +#if !defined(OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128) +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0 #endif /** @@ -50,7 +50,7 @@ union opal_counted_pointer_t { /** list item pointer */ volatile opal_list_item_t * volatile item; } data; -#if OPAL_HAVE_ATOMIC_CMPSET_128 && HAVE_OPAL_INT128_T +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && HAVE_OPAL_INT128_T /** used for atomics when there is a cmpset that can operate on * two 64-bit values */ opal_int128_t value; @@ -59,7 +59,7 @@ union opal_counted_pointer_t { typedef union opal_counted_pointer_t opal_counted_pointer_t; -#if OPAL_HAVE_ATOMIC_CMPSET_128 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 /* Add one element to the FIFO. We will return the last head of the list * to allow the upper level to detect if this element is the first one in the @@ -110,7 +110,7 @@ static inline bool opal_lifo_is_empty( opal_lifo_t* lifo ) } -#if OPAL_HAVE_ATOMIC_CMPSET_128 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 /* Add one element to the LIFO. We will return the last head of the list * to allow the upper level to detect if this element is the first one in the diff --git a/opal/include/opal/sys/arm/atomic.h b/opal/include/opal/sys/arm/atomic.h index fa3b35d18b2..89e84c653c0 100644 --- a/opal/include/opal/sys/arm/atomic.h +++ b/opal/include/opal/sys/arm/atomic.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +12,8 @@ * All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2010 ARM ltd. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -104,12 +107,12 @@ void opal_atomic_isync(void) #if (OPAL_GCC_INLINE_ASSEMBLY && (OPAL_ASM_ARM_VERSION >= 6)) -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_MATH_32 1 -static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - int32_t ret, tmp; + int32_t prev, tmp; + bool ret; __asm__ __volatile__ ( "1: ldrex %0, [%2] \n" @@ -120,11 +123,13 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, " bne 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } /* these two functions aren't inlined in the non-gcc case because then @@ -132,51 +137,50 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { bool rc; - rc = opal_atomic_bool_cmpset_32(addr, oldval, newval); + rc = opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); opal_atomic_rmb(); return rc; } -static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { opal_atomic_wmb(); - return opal_atomic_bool_cmpset_32(addr, oldval, newval); + return opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); } #if (OPAL_ASM_SUPPORT_64BIT == 1) -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 -static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - int64_t ret; - int tmp; - - - __asm__ __volatile__ ( - "1: ldrexd %0, %H0, [%2] \n" - " cmp %0, %3 \n" - " it eq \n" - " cmpeq %H0, %H3 \n" - " bne 2f \n" - " strexd %1, %4, %H4, [%2] \n" - " cmp %1, #0 \n" - " bne 1b \n" - "2: \n" - - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) - : "cc", "memory"); - - return (ret == oldval); + int64_t prev; + int tmp; + bool ret; + + __asm__ __volatile__ ( + "1: ldrexd %0, %H0, [%2] \n" + " cmp %0, %3 \n" + " it eq \n" + " cmpeq %H0, %H3 \n" + " bne 2f \n" + " strexd %1, %4, %H4, [%2] \n" + " cmp %1, #0 \n" + " bne 1b \n" + "2: \n" + + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) + : "cc", "memory"); + + ret = (prev == *oldval); + *oldval = prev; + return ret; } /* these two functions aren't inlined in the non-gcc case because then @@ -184,23 +188,21 @@ static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { bool rc; - rc = opal_atomic_bool_cmpset_64(addr, oldval, newval); + rc = opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); opal_atomic_rmb(); return rc; } -static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { opal_atomic_wmb(); - return opal_atomic_bool_cmpset_64(addr, oldval, newval); + return opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); } #endif @@ -247,30 +249,6 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int dec) return t; } -#else /* OPAL_ASM_ARM_VERSION <=5 or no GCC inline assembly */ - -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 -#define __kuser_cmpxchg (*((int (*)(int, int, volatile int*))(0xffff0fc0))) -static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) -{ - return !(__kuser_cmpxchg(oldval, newval, addr)); -} - -static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) -{ - /* kernel function includes all necessary memory barriers */ - return opal_atomic_bool_cmpset_32(addr, oldval, newval); -} - -static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) -{ - /* kernel function includes all necessary memory barriers */ - return opal_atomic_bool_cmpset_32(addr, oldval, newval); -} - #endif #endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ diff --git a/opal/include/opal/sys/arm64/atomic.h b/opal/include/opal/sys/arm64/atomic.h index c95c3cdc6ad..6ef7776ea6f 100644 --- a/opal/include/opal/sys/arm64/atomic.h +++ b/opal/include/opal/sys/arm64/atomic.h @@ -29,10 +29,10 @@ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 #define OPAL_HAVE_ATOMIC_LLSC_32 1 -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_SWAP_32 1 #define OPAL_HAVE_ATOMIC_MATH_32 1 -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 #define OPAL_HAVE_ATOMIC_SWAP_64 1 #define OPAL_HAVE_ATOMIC_LLSC_64 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 @@ -82,10 +82,10 @@ static inline void opal_atomic_isync (void) * *********************************************************************/ -static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - int32_t ret, tmp; + int32_t prev, tmp; + bool ret; __asm__ __volatile__ ("1: ldaxr %w0, [%2] \n" " cmp %w0, %w3 \n" @@ -93,11 +93,13 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, " stxr %w1, %w4, [%2] \n" " cbnz %w1, 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval) @@ -119,10 +121,10 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - int32_t ret, tmp; + int32_t prev, tmp; + bool ret; __asm__ __volatile__ ("1: ldaxr %w0, [%2] \n" " cmp %w0, %w3 \n" @@ -130,18 +132,20 @@ static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, " stxr %w1, %w4, [%2] \n" " cbnz %w1, 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } -static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - int32_t ret, tmp; + int32_t prev, tmp; + bool ret; __asm__ __volatile__ ("1: ldxr %w0, [%2] \n" " cmp %w0, %w3 \n" @@ -149,11 +153,13 @@ static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, " stlxr %w1, %w4, [%2] \n" " cbnz %w1, 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr) @@ -179,11 +185,11 @@ static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval) return ret == 0; } -static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - int64_t ret; + int64_t prev; int tmp; + bool ret; __asm__ __volatile__ ("1: ldaxr %0, [%2] \n" " cmp %0, %3 \n" @@ -191,11 +197,13 @@ static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, " stxr %w1, %4, [%2] \n" " cbnz %w1, 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == oldval); + *oldval = prev; + return ret; } static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newval) @@ -218,11 +226,11 @@ static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newva atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - int64_t ret; + int64_t prev; int tmp; + bool ret; __asm__ __volatile__ ("1: ldaxr %0, [%2] \n" " cmp %0, %3 \n" @@ -230,19 +238,21 @@ static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, " stxr %w1, %4, [%2] \n" " cbnz %w1, 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == oldval); + *oldval = prev; + return ret; } -static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - int64_t ret; + int64_t prev; int tmp; + bool ret; __asm__ __volatile__ ("1: ldxr %0, [%2] \n" " cmp %0, %3 \n" @@ -250,11 +260,13 @@ static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, " stlxr %w1, %4, [%2] \n" " cbnz %w1, 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == oldval); + *oldval = prev; + return ret; } static inline int64_t opal_atomic_ll_64 (volatile int64_t *addr) diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index 961ebac0114..c9ac7c7b43c 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -40,11 +40,11 @@ * * - \c OPAL_HAVE_ATOMIC_MEM_BARRIER atomic memory barriers * - \c OPAL_HAVE_ATOMIC_SPINLOCKS atomic spinlocks - * - \c OPAL_HAVE_ATOMIC_MATH_32 if 32 bit add/sub/cmpset can be done "atomicly" - * - \c OPAL_HAVE_ATOMIC_MATH_64 if 64 bit add/sub/cmpset can be done "atomicly" + * - \c OPAL_HAVE_ATOMIC_MATH_32 if 32 bit add/sub/compare-exchange can be done "atomicly" + * - \c OPAL_HAVE_ATOMIC_MATH_64 if 64 bit add/sub/compare-exchange can be done "atomicly" * * Note that for the Atomic math, atomic add/sub may be implemented as - * C code using opal_atomic_bool_cmpset. The appearance of atomic + * C code using opal_atomic_compare_exchange. The appearance of atomic * operation will be upheld in these cases. */ @@ -107,8 +107,8 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t; *********************************************************************/ #if !OPAL_GCC_INLINE_ASSEMBLY #define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 0 -#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 0 -#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 0 +#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 0 +#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 0 #define OPAL_HAVE_INLINE_ATOMIC_ADD_32 0 #define OPAL_HAVE_INLINE_ATOMIC_AND_32 0 #define OPAL_HAVE_INLINE_ATOMIC_OR_32 0 @@ -123,8 +123,8 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t; #define OPAL_HAVE_INLINE_ATOMIC_SWAP_64 0 #else #define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 1 -#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 1 -#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 1 +#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 1 #define OPAL_HAVE_INLINE_ATOMIC_ADD_32 1 #define OPAL_HAVE_INLINE_ATOMIC_AND_32 1 #define OPAL_HAVE_INLINE_ATOMIC_OR_32 1 @@ -187,14 +187,14 @@ enum { /* compare and set operations can't really be emulated from software, so if these defines aren't already set, they should be set to 0 now */ -#ifndef OPAL_HAVE_ATOMIC_CMPSET_32 -#define OPAL_HAVE_ATOMIC_CMPSET_32 0 +#ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 0 #endif -#ifndef OPAL_HAVE_ATOMIC_CMPSET_64 -#define OPAL_HAVE_ATOMIC_CMPSET_64 0 +#ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 0 #endif -#ifndef OPAL_HAVE_ATOMIC_CMPSET_128 -#define OPAL_HAVE_ATOMIC_CMPSET_128 0 +#ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0 #endif #ifndef OPAL_HAVE_ATOMIC_LLSC_32 #define OPAL_HAVE_ATOMIC_LLSC_32 0 @@ -270,7 +270,7 @@ void opal_atomic_wmb(void); /********************************************************************** * - * Atomic spinlocks - always inlined, if have atomic cmpset + * Atomic spinlocks - always inlined, if have atomic compare-and-swap * *********************************************************************/ @@ -280,7 +280,7 @@ void opal_atomic_wmb(void); #define OPAL_HAVE_ATOMIC_SPINLOCKS 0 #endif -#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_SPINLOCKS || (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) +#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_SPINLOCKS || (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) /** * Initialize a lock to value @@ -330,7 +330,7 @@ void opal_atomic_unlock(opal_atomic_lock_t *lock); #if OPAL_HAVE_ATOMIC_SPINLOCKS == 0 #undef OPAL_HAVE_ATOMIC_SPINLOCKS -#define OPAL_HAVE_ATOMIC_SPINLOCKS (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) +#define OPAL_HAVE_ATOMIC_SPINLOCKS (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) #define OPAL_NEED_INLINE_ATOMIC_SPINLOCKS 1 #endif @@ -353,25 +353,40 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, int32_t oldval, int32_t newval); -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 static inline -#endif bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, int32_t oldval, int32_t newval); -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 static inline -#endif bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, int32_t oldval, int32_t newval); #endif -#if !defined(OPAL_HAVE_ATOMIC_CMPSET_64) && !defined(DOXYGEN) -#define OPAL_HAVE_ATOMIC_CMPSET_64 0 +#if !defined(OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) && !defined(DOXYGEN) +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 0 #endif -#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_CMPSET_64 +#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 +#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 +static inline +#endif +bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, + int64_t newval); + +#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 +static inline +#endif +bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, + int64_t newval); + +#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 +static inline +#endif +bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, + int64_t newval); + +/* XXX -- DEPRECATED -- XXX -- Legacy cmpset functions */ #if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 static inline #endif @@ -397,35 +412,35 @@ bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, int64_t oldval, #define OPAL_HAVE_ATOMIC_MATH_32 0 #endif -#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_CMPSET_32 +#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 /* OPAL_HAVE_INLINE_ATOMIC_*_32 will be 1 if /atomic.h provides a static inline version of it (in assembly). If we have to fall - back on cmpset 32, that too will be inline. */ -#if OPAL_HAVE_INLINE_ATOMIC_ADD_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_CMPSET_32) + back on compare-exchange 32, that too will be inline. */ +#if OPAL_HAVE_INLINE_ATOMIC_ADD_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) static inline #endif int32_t opal_atomic_add_32(volatile int32_t *addr, int delta); -#if OPAL_HAVE_INLINE_ATOMIC_AND_32 || (!defined(OPAL_HAVE_ATOMIC_AND_32) && OPAL_HAVE_ATOMIC_CMPSET_32) +#if OPAL_HAVE_INLINE_ATOMIC_AND_32 || (!defined(OPAL_HAVE_ATOMIC_AND_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) static inline #endif int32_t opal_atomic_and_32(volatile int32_t *addr, int32_t value); -#if OPAL_HAVE_INLINE_ATOMIC_OR_32 || (!defined(OPAL_HAVE_ATOMIC_OR_32) && OPAL_HAVE_ATOMIC_CMPSET_32) +#if OPAL_HAVE_INLINE_ATOMIC_OR_32 || (!defined(OPAL_HAVE_ATOMIC_OR_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) static inline #endif int32_t opal_atomic_or_32(volatile int32_t *addr, int32_t value); -#if OPAL_HAVE_INLINE_ATOMIC_XOR_32 || (!defined(OPAL_HAVE_ATOMIC_XOR_32) && OPAL_HAVE_ATOMIC_CMPSET_32) +#if OPAL_HAVE_INLINE_ATOMIC_XOR_32 || (!defined(OPAL_HAVE_ATOMIC_XOR_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) static inline #endif int32_t opal_atomic_xor_32(volatile int32_t *addr, int32_t value); /* OPAL_HAVE_INLINE_ATOMIC_*_32 will be 1 if /atomic.h provides a static inline version of it (in assembly). If we have to fall - back to cmpset 32, that too will be inline. */ -#if OPAL_HAVE_INLINE_ATOMIC_SUB_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_CMPSET_32) + back to compare-exchange 32, that too will be inline. */ +#if OPAL_HAVE_INLINE_ATOMIC_SUB_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) static inline #endif int32_t opal_atomic_sub_32(volatile int32_t *addr, int delta); @@ -435,7 +450,7 @@ int32_t opal_atomic_sub_32(volatile int32_t *addr, int delta); #if ! OPAL_HAVE_ATOMIC_MATH_32 /* fix up the value of opal_have_atomic_math_32 to allow for C versions */ #undef OPAL_HAVE_ATOMIC_MATH_32 -#define OPAL_HAVE_ATOMIC_MATH_32 OPAL_HAVE_ATOMIC_CMPSET_32 +#define OPAL_HAVE_ATOMIC_MATH_32 OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 #endif #ifndef OPAL_HAVE_ATOMIC_MATH_64 @@ -443,35 +458,35 @@ int32_t opal_atomic_sub_32(volatile int32_t *addr, int delta); #define OPAL_HAVE_ATOMIC_MATH_64 0 #endif -#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_64 || OPAL_HAVE_ATOMIC_CMPSET_64 +#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_64 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 /* OPAL_HAVE_INLINE_ATOMIC_*_64 will be 1 if /atomic.h provides a static inline version of it (in assembly). If we have to fall - back to cmpset 64, that too will be inline */ -#if OPAL_HAVE_INLINE_ATOMIC_ADD_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_CMPSET_64) + back to compare-exchange 64, that too will be inline */ +#if OPAL_HAVE_INLINE_ATOMIC_ADD_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) static inline #endif int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta); -#if OPAL_HAVE_INLINE_ATOMIC_AND_64 || (!defined(OPAL_HAVE_ATOMIC_AND_64) && OPAL_HAVE_ATOMIC_CMPSET_64) +#if OPAL_HAVE_INLINE_ATOMIC_AND_64 || (!defined(OPAL_HAVE_ATOMIC_AND_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) static inline #endif int64_t opal_atomic_and_64(volatile int64_t *addr, int64_t value); -#if OPAL_HAVE_INLINE_ATOMIC_OR_64 || (!defined(OPAL_HAVE_ATOMIC_OR_64) && OPAL_HAVE_ATOMIC_CMPSET_64) +#if OPAL_HAVE_INLINE_ATOMIC_OR_64 || (!defined(OPAL_HAVE_ATOMIC_OR_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) static inline #endif int64_t opal_atomic_or_64(volatile int64_t *addr, int64_t value); -#if OPAL_HAVE_INLINE_ATOMIC_XOR_64 || (!defined(OPAL_HAVE_ATOMIC_XOR_64) && OPAL_HAVE_ATOMIC_CMPSET_64) +#if OPAL_HAVE_INLINE_ATOMIC_XOR_64 || (!defined(OPAL_HAVE_ATOMIC_XOR_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) static inline #endif int64_t opal_atomic_xor_64(volatile int64_t *addr, int64_t value); /* OPAL_HAVE_INLINE_ATOMIC_*_64 will be 1 if /atomic.h provides a static inline version of it (in assembly). If we have to fall - back to cmpset 64, that too will be inline */ -#if OPAL_HAVE_INLINE_ATOMIC_SUB_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_CMPSET_64) + back to compare-exchange 64, that too will be inline */ +#if OPAL_HAVE_INLINE_ATOMIC_SUB_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) static inline #endif int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta); @@ -481,7 +496,7 @@ int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta); #if ! OPAL_HAVE_ATOMIC_MATH_64 /* fix up the value of opal_have_atomic_math_64 to allow for C versions */ #undef OPAL_HAVE_ATOMIC_MATH_64 -#define OPAL_HAVE_ATOMIC_MATH_64 OPAL_HAVE_ATOMIC_CMPSET_64 +#define OPAL_HAVE_ATOMIC_MATH_64 OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 #endif /* provide a size_t add/subtract. When in debug mode, make it an @@ -524,9 +539,26 @@ opal_atomic_sub_size_t(volatile size_t *addr, size_t delta) #endif #endif -#if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) +#if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) /* these are always done with inline functions, so always mark as static inline */ + +static inline bool opal_atomic_compare_exchange_strong_xx (volatile void *addr, void *oldval, + int64_t newval, size_t length); +static inline bool opal_atomic_compare_exchange_strong_acq_xx (volatile void *addr, void *oldval, + int64_t newval, size_t length); +static inline bool opal_atomic_compare_exchange_strong_rel_xx (volatile void *addr, void *oldval, + int64_t newval, size_t length); + + +static inline bool opal_atomic_compare_exchange_strong_ptr (volatile void* addr, void *oldval, + void *newval); +static inline bool opal_atomic_compare_exchange_strong_acq_ptr (volatile void* addr, void *oldval, + void *newval); +static inline bool opal_atomic_compare_exchange_strong_rel_ptr (volatile void* addr, void *oldval, + void *newval); + +/* XXX -- DEPRECATED -- XXX -- Define legacy cmpset functions */ static inline bool opal_atomic_bool_cmpset_xx(volatile void* addr, int64_t oldval, int64_t newval, size_t length); static inline bool opal_atomic_bool_cmpset_acq_xx(volatile void* addr, @@ -546,6 +578,61 @@ static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr, void* oldval, void* newval); +/** + * Atomic compare and set of generic type with relaxed semantics. This + * macro detect at compile time the type of the first argument and + * choose the correct function to be called. + * + * \note This macro should only be used for integer types. + * + * @param addr Address of . + * @param oldval Comparison value address of . + * @param newval New value to set if comparision is true . + * + * See opal_atomic_compare_exchange_* for pseudo-code. + */ +#define opal_atomic_compare_exchange_strong( ADDR, OLDVAL, NEWVAL ) \ + opal_atomic_compare_exchange_strong_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \ + (intptr_t)(NEWVAL), sizeof(*(ADDR)) ) + +/** + * Atomic compare and set of generic type with acquire semantics. This + * macro detect at compile time the type of the first argument and + * choose the correct function to be called. + * + * \note This macro should only be used for integer types. + * + * @param addr Address of . + * @param oldval Comparison value address of . + * @param newval New value to set if comparision is true . + * + * See opal_atomic_compare_exchange_acq_* for pseudo-code. + */ +#define opal_atomic_compare_exchange_strong_acq( ADDR, OLDVAL, NEWVAL ) \ + opal_atomic_compare_exchange_strong_acq_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \ + (intptr_t)(NEWVAL), sizeof(*(ADDR)) ) + +/** + * Atomic compare and set of generic type with release semantics. This + * macro detect at compile time the type of the first argument and + * choose the correct function to be called. + * + * \note This macro should only be used for integer types. + * + * @param addr Address of . + * @param oldval Comparison value address of . + * @param newval New value to set if comparision is true . + * + * See opal_atomic_compare_exchange_rel_* for pseudo-code. + */ +#define opal_atomic_compare_exchange_strong_rel( ADDR, OLDVAL, NEWVAL ) \ + opal_atomic_compare_exchange_strong_rel_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \ + (intptr_t)(NEWVAL), sizeof(*(ADDR)) ) + + + +/* XXX -- DEPRECATED -- XXX -- Define legacy cmpset functions */ + /** * Atomic compare and set of pointer with relaxed semantics. This * macro detect at compile time the type of the first argument and @@ -598,7 +685,7 @@ static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr, opal_atomic_bool_cmpset_rel_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \ (int64_t)(NEWVAL), sizeof(*(ADDR)) ) -#endif /* (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) */ +#endif /* (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) */ #if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64) @@ -606,10 +693,10 @@ static inline void opal_atomic_add_xx(volatile void* addr, int32_t value, size_t length); static inline void opal_atomic_sub_xx(volatile void* addr, int32_t value, size_t length); -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32 +#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 static inline int32_t opal_atomic_add_ptr( volatile void* addr, void* delta ); static inline int32_t opal_atomic_sub_ptr( volatile void* addr, void* delta ); -#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64 +#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 static inline int64_t opal_atomic_add_ptr( volatile void* addr, void* delta ); static inline int64_t opal_atomic_sub_ptr( volatile void* addr, void* delta ); #else diff --git a/opal/include/opal/sys/atomic_impl.h b/opal/include/opal/sys/atomic_impl.h index c066d831cb9..4337fcb9f74 100644 --- a/opal/include/opal/sys/atomic_impl.h +++ b/opal/include/opal/sys/atomic_impl.h @@ -34,10 +34,22 @@ * * Some architectures do not provide support for the 64 bits * atomic operations. Until we find a better solution let's just - * undefine all those functions if there is no 64 bit cmpset + * undefine all those functions if there is no 64 bit compare-exchange * *********************************************************************/ -#if OPAL_HAVE_ATOMIC_CMPSET_32 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 + +#define OPAL_ATOMIC_DEFINE_CMPXCG_OP(type, bits, operand, name) \ + static inline type opal_atomic_ ## name ## _ ## bits (volatile type *addr, type value) \ + { \ + type oldval, newval; \ + do { \ + oldval = *addr; \ + newval = oldval operand value; \ + } while (!opal_atomic_compare_exchange_strong_ ## bits (addr, &oldval, newval)); \ + \ + return newval; \ + } #if !defined(OPAL_HAVE_ATOMIC_SWAP_32) #define OPAL_HAVE_ATOMIC_SWAP_32 1 @@ -55,79 +67,44 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, #if !defined(OPAL_HAVE_ATOMIC_ADD_32) #define OPAL_HAVE_ATOMIC_ADD_32 1 -static inline int32_t -opal_atomic_add_32(volatile int32_t *addr, int delta) -{ - int32_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval + delta)); - return (oldval + delta); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, +, add) + #endif /* OPAL_HAVE_ATOMIC_ADD_32 */ #if !defined(OPAL_HAVE_ATOMIC_AND_32) #define OPAL_HAVE_ATOMIC_AND_32 1 -static inline int32_t -opal_atomic_and_32(volatile int32_t *addr, int32_t value) -{ - int32_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval & value)); - return (oldval & value); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, &, and) + #endif /* OPAL_HAVE_ATOMIC_AND_32 */ #if !defined(OPAL_HAVE_ATOMIC_OR_32) #define OPAL_HAVE_ATOMIC_OR_32 1 -static inline int32_t -opal_atomic_or_32(volatile int32_t *addr, int32_t value) -{ - int32_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval | value)); - return (oldval | value); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, |, or) + #endif /* OPAL_HAVE_ATOMIC_OR_32 */ #if !defined(OPAL_HAVE_ATOMIC_XOR_32) #define OPAL_HAVE_ATOMIC_XOR_32 1 -static inline int32_t -opal_atomic_xor_32(volatile int32_t *addr, int32_t value) -{ - int32_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval ^ value)); - return (oldval ^ value); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, ^, xor) + #endif /* OPAL_HAVE_ATOMIC_XOR_32 */ #if !defined(OPAL_HAVE_ATOMIC_SUB_32) #define OPAL_HAVE_ATOMIC_SUB_32 1 -static inline int32_t -opal_atomic_sub_32(volatile int32_t *addr, int delta) -{ - int32_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval - delta)); - return (oldval - delta); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, -, sub) + #endif /* OPAL_HAVE_ATOMIC_SUB_32 */ -#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */ +#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */ -#if OPAL_HAVE_ATOMIC_CMPSET_64 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 #if !defined(OPAL_HAVE_ATOMIC_SWAP_64) #define OPAL_HAVE_ATOMIC_SWAP_64 1 @@ -144,72 +121,37 @@ static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, #if !defined(OPAL_HAVE_ATOMIC_ADD_64) #define OPAL_HAVE_ATOMIC_ADD_64 1 -static inline int64_t -opal_atomic_add_64(volatile int64_t *addr, int64_t delta) -{ - int64_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval + delta)); - return (oldval + delta); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, +, add) + #endif /* OPAL_HAVE_ATOMIC_ADD_64 */ #if !defined(OPAL_HAVE_ATOMIC_AND_64) #define OPAL_HAVE_ATOMIC_AND_64 1 -static inline int64_t -opal_atomic_and_64(volatile int64_t *addr, int64_t value) -{ - int64_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval & value)); - return (oldval & value); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, &, and) + #endif /* OPAL_HAVE_ATOMIC_AND_64 */ #if !defined(OPAL_HAVE_ATOMIC_OR_64) #define OPAL_HAVE_ATOMIC_OR_64 1 -static inline int64_t -opal_atomic_or_64(volatile int64_t *addr, int64_t value) -{ - int64_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval | value)); - return (oldval | value); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, |, or) + #endif /* OPAL_HAVE_ATOMIC_OR_64 */ #if !defined(OPAL_HAVE_ATOMIC_XOR_64) #define OPAL_HAVE_ATOMIC_XOR_64 1 -static inline int64_t -opal_atomic_xor_64(volatile int64_t *addr, int64_t value) -{ - int64_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval ^ value)); - return (oldval ^ value); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, ^, xor) + #endif /* OPAL_HAVE_ATOMIC_XOR_64 */ #if !defined(OPAL_HAVE_ATOMIC_SUB_64) #define OPAL_HAVE_ATOMIC_SUB_64 1 -static inline int64_t -opal_atomic_sub_64(volatile int64_t *addr, int64_t delta) -{ - int64_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval - delta)); - return (oldval - delta); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, -, sub) + #endif /* OPAL_HAVE_ATOMIC_SUB_64 */ #else @@ -222,27 +164,138 @@ opal_atomic_sub_64(volatile int64_t *addr, int64_t delta) #define OPAL_HAVE_ATOMIC_SUB_64 0 #endif -#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */ +#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 */ + +#if (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) + +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 +#define OPAL_ATOMIC_DEFINE_CMPXCG_XX(semantics) \ + static inline bool \ + opal_atomic_compare_exchange_strong ## semantics ## xx (volatile void* addr, void *oldval, \ + int64_t newval, const size_t length) \ + { \ + switch (length) { \ + case 4: \ + return opal_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, \ + (int32_t *) oldval, (int32_t) newval); \ + case 8: \ + return opal_atomic_compare_exchange_strong_64 ((volatile int64_t *) addr, \ + (int64_t *) oldval, (int64_t) newval); \ + } \ + abort(); \ + } +#elif OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 +#define OPAL_ATOMIC_DEFINE_CMPXCG_XX(semantics) \ + static inline bool \ + opal_atomic_compare_exchange_strong ## semantics ## xx (volatile void* addr, void *oldval, \ + int64_t newval, const size_t length) \ + { \ + switch (length) { \ + case 4: \ + return opal_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, \ + (int32_t *) oldval, (int32_t) newval); \ + abort(); \ + } +#else +#error "Platform does not have required atomic compare-and-swap functionality" +#endif + +OPAL_ATOMIC_DEFINE_CMPXCG_XX(_) +OPAL_ATOMIC_DEFINE_CMPXCG_XX(_acq_) +OPAL_ATOMIC_DEFINE_CMPXCG_XX(_rel_) + +#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 +#define OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(semantics) \ + static inline bool \ + opal_atomic_compare_exchange_strong ## semantics ## ptr (volatile void* addr, void *oldval, void *newval) \ + { \ + return opal_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, (int32_t *) oldval, (int32_t) newval); \ + } +#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 +#define OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(semantics) \ + static inline bool \ + opal_atomic_compare_exchange_strong ## semantics ## ptr (volatile void* addr, void *oldval, void *newval) \ + { \ + return opal_atomic_compare_exchange_strong_64 ((volatile int64_t *) addr, (int64_t *) oldval, (int64_t) newval); \ + } +#else +#error "Can not define opal_atomic_compare_exchange_strong_ptr with existing atomics" +#endif + +OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_) +OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_acq_) +OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_rel_) + +#endif /* (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) */ + +/* XXX -- DEPRECATED -- XXX -- Define legacy cmpset functions */ +#if (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) +static inline bool opal_atomic_bool_cmpset_32 (volatile int32_t *addr, int32_t oldval, + int32_t newval) +{ + return opal_atomic_compare_exchange_strong_32 (addr, &oldval, newval); +} + +static inline bool opal_atomic_bool_cmpset_acq_32 (volatile int32_t *addr, int32_t oldval, + int32_t newval) +{ + return opal_atomic_compare_exchange_strong_acq_32 (addr, &oldval, newval); +} + +static inline bool opal_atomic_bool_cmpset_rel_32 (volatile int32_t *addr, int32_t oldval, + int32_t newval) +{ + return opal_atomic_compare_exchange_strong_rel_32 (addr, &oldval, newval); +} +#endif + +#if (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) +static inline bool opal_atomic_bool_cmpset_64 (volatile int64_t *addr, int64_t oldval, + int64_t newval) +{ + return opal_atomic_compare_exchange_strong_64 (addr, &oldval, newval); +} + +static inline bool opal_atomic_bool_cmpset_acq_64 (volatile int64_t *addr, int64_t oldval, + int64_t newval) +{ + return opal_atomic_compare_exchange_strong_acq_64 (addr, &oldval, newval); +} + +static inline bool opal_atomic_bool_cmpset_rel_64 (volatile int64_t *addr, int64_t oldval, + int64_t newval) +{ + return opal_atomic_compare_exchange_strong_rel_64 (addr, &oldval, newval); +} +#endif + +#if (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128) +static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, opal_int128_t oldval, + opal_int128_t newval) +{ + return opal_atomic_compare_exchange_strong_128 (addr, &oldval, newval); +} +#endif -#if (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) +#if (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) static inline bool opal_atomic_bool_cmpset_xx(volatile void* addr, int64_t oldval, int64_t newval, size_t length) { switch( length ) { -#if OPAL_HAVE_ATOMIC_CMPSET_32 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 case 4: return opal_atomic_bool_cmpset_32( (volatile int32_t*)addr, (int32_t)oldval, (int32_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */ +#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */ -#if OPAL_HAVE_ATOMIC_CMPSET_64 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 case 8: return opal_atomic_bool_cmpset_64( (volatile int64_t*)addr, (int64_t)oldval, (int64_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */ +#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 */ } abort(); /* This should never happen, so deliberately abort (hopefully @@ -255,17 +308,17 @@ opal_atomic_bool_cmpset_acq_xx(volatile void* addr, int64_t oldval, int64_t newval, size_t length) { switch( length ) { -#if OPAL_HAVE_ATOMIC_CMPSET_32 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 case 4: return opal_atomic_bool_cmpset_acq_32( (volatile int32_t*)addr, (int32_t)oldval, (int32_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */ +#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */ -#if OPAL_HAVE_ATOMIC_CMPSET_64 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 case 8: return opal_atomic_bool_cmpset_acq_64( (volatile int64_t*)addr, (int64_t)oldval, (int64_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */ +#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 */ } /* This should never happen, so deliberately abort (hopefully leaving a corefile for analysis) */ @@ -278,17 +331,17 @@ opal_atomic_bool_cmpset_rel_xx(volatile void* addr, int64_t oldval, int64_t newval, size_t length) { switch( length ) { -#if OPAL_HAVE_ATOMIC_CMPSET_32 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 case 4: return opal_atomic_bool_cmpset_rel_32( (volatile int32_t*)addr, (int32_t)oldval, (int32_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */ +#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */ -#if OPAL_HAVE_ATOMIC_CMPSET_64 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 case 8: return opal_atomic_bool_cmpset_rel_64( (volatile int64_t*)addr, (int64_t)oldval, (int64_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */ +#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 */ } /* This should never happen, so deliberately abort (hopefully leaving a corefile for analysis) */ @@ -301,10 +354,10 @@ opal_atomic_bool_cmpset_ptr(volatile void* addr, void* oldval, void* newval) { -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32 +#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 return opal_atomic_bool_cmpset_32((int32_t*) addr, (unsigned long) oldval, (unsigned long) newval); -#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64 +#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 return opal_atomic_bool_cmpset_64((int64_t*) addr, (unsigned long) oldval, (unsigned long) newval); #else @@ -318,10 +371,10 @@ opal_atomic_bool_cmpset_acq_ptr(volatile void* addr, void* oldval, void* newval) { -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32 +#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 return opal_atomic_bool_cmpset_acq_32((int32_t*) addr, (unsigned long) oldval, (unsigned long) newval); -#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64 +#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 return opal_atomic_bool_cmpset_acq_64((int64_t*) addr, (unsigned long) oldval, (unsigned long) newval); #else @@ -334,10 +387,10 @@ static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr, void* oldval, void* newval) { -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32 +#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 return opal_atomic_bool_cmpset_rel_32((int32_t*) addr, (unsigned long) oldval, (unsigned long) newval); -#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64 +#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 return opal_atomic_bool_cmpset_rel_64((int64_t*) addr, (unsigned long) oldval, (unsigned long) newval); #else @@ -345,7 +398,7 @@ static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr, #endif } -#endif /* (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) */ +#endif /* (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) */ #if (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64) @@ -392,7 +445,7 @@ opal_atomic_add_xx(volatile void* addr, int32_t value, size_t length) case 4: opal_atomic_add_32( (volatile int32_t*)addr, (int32_t)value ); break; -#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */ +#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */ #if OPAL_HAVE_ATOMIC_ADD_64 case 8: diff --git a/opal/include/opal/sys/gcc_builtin/atomic.h b/opal/include/opal/sys/gcc_builtin/atomic.h index 2425bbf2509..6521a50ed98 100644 --- a/opal/include/opal/sys/gcc_builtin/atomic.h +++ b/opal/include/opal/sys/gcc_builtin/atomic.h @@ -33,7 +33,7 @@ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 #define OPAL_HAVE_ATOMIC_MATH_32 1 -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 #define OPAL_HAVE_ATOMIC_AND_32 1 #define OPAL_HAVE_ATOMIC_OR_32 1 @@ -41,7 +41,7 @@ #define OPAL_HAVE_ATOMIC_SUB_32 1 #define OPAL_HAVE_ATOMIC_SWAP_32 1 #define OPAL_HAVE_ATOMIC_MATH_64 1 -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 #define OPAL_HAVE_ATOMIC_ADD_64 1 #define OPAL_HAVE_ATOMIC_AND_64 1 #define OPAL_HAVE_ATOMIC_OR_64 1 @@ -81,26 +81,20 @@ static inline void opal_atomic_wmb(void) #pragma error_messages(off, E_ARG_INCOMPATIBLE_WITH_ARG_L) #endif -static inline bool opal_atomic_bool_cmpset_acq_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } -static inline bool opal_atomic_bool_cmpset_rel_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_RELEASE, __ATOMIC_RELAXED); + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED); } -static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } static inline int32_t opal_atomic_swap_32 (volatile int32_t *addr, int32_t newval) @@ -135,26 +129,20 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t *addr, int32_t delta) return __atomic_sub_fetch (addr, delta, __ATOMIC_RELAXED); } -static inline bool opal_atomic_bool_cmpset_acq_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } -static inline bool opal_atomic_bool_cmpset_rel_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_RELEASE, __ATOMIC_RELAXED); + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED); } -static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newval) @@ -191,25 +179,28 @@ static inline int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta) #if OPAL_HAVE_GCC_BUILTIN_CSWAP_INT128 -#define OPAL_HAVE_ATOMIC_CMPSET_128 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 -static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, - opal_int128_t oldval, opal_int128_t newval) +static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr, + opal_int128_t *oldval, opal_int128_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } #elif defined(OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128) && OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128 -#define OPAL_HAVE_ATOMIC_CMPSET_128 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 /* __atomic version is not lock-free so use legacy __sync version */ -static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, - opal_int128_t oldval, opal_int128_t newval) +static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr, + opal_int128_t *oldval, opal_int128_t newval) { - return __sync_bool_compare_and_swap (addr, oldval, newval); + opal_int128_t prev = __sync_val_compare_and_swap (addr, *oldval, newval); + bool ret = prev == *oldval; + *oldval = prev; + return ret; } #endif diff --git a/opal/include/opal/sys/ia32/atomic.h b/opal/include/opal/sys/ia32/atomic.h index 35da400ef04..009256f3cce 100644 --- a/opal/include/opal/sys/ia32/atomic.h +++ b/opal/include/opal/sys/ia32/atomic.h @@ -40,7 +40,7 @@ *********************************************************************/ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_MATH_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 @@ -84,15 +84,13 @@ static inline void opal_atomic_isync(void) *********************************************************************/ #if OPAL_GCC_INLINE_ASSEMBLY -static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, - int32_t oldval, - int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { unsigned char ret; __asm__ __volatile__ ( SMPLOCK "cmpxchgl %3,%2 \n\t" "sete %0 \n\t" - : "=qm" (ret), "+a" (oldval), "+m" (*addr) + : "=qm" (ret), "+a" (*oldval), "+m" (*addr) : "q"(newval) : "memory", "cc"); @@ -101,8 +99,8 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, #endif /* OPAL_GCC_INLINE_ASSEMBLY */ -#define opal_atomic_bool_cmpset_acq_32 opal_atomic_bool_cmpset_32 -#define opal_atomic_bool_cmpset_rel_32 opal_atomic_bool_cmpset_32 +#define opal_atomic_compare_exchange_strong_acq_32 opal_atomic_compare_exchange_strong_32 +#define opal_atomic_compare_exchange_strong_rel_32 opal_atomic_compare_exchange_strong_32 #if OPAL_GCC_INLINE_ASSEMBLY diff --git a/opal/include/opal/sys/powerpc/atomic.h b/opal/include/opal/sys/powerpc/atomic.h index 34c3a689fab..31cf96b1f7c 100644 --- a/opal/include/opal/sys/powerpc/atomic.h +++ b/opal/include/opal/sys/powerpc/atomic.h @@ -40,7 +40,7 @@ *********************************************************************/ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_SWAP_32 1 #define OPAL_HAVE_ATOMIC_LLSC_32 1 @@ -53,7 +53,7 @@ #if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) || OPAL_ASM_SUPPORT_64BIT -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 #define OPAL_HAVE_ATOMIC_SWAP_64 1 #define OPAL_HAVE_ATOMIC_LLSC_64 1 #define OPAL_HAVE_ATOMIC_MATH_64 1 @@ -144,24 +144,25 @@ void opal_atomic_isync(void) #define OPAL_ASM_VALUE64(x) x #endif - -static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - int32_t ret; - - __asm__ __volatile__ ( - "1: lwarx %0, 0, %2 \n\t" - " cmpw 0, %0, %3 \n\t" - " bne- 2f \n\t" - " stwcx. %4, 0, %2 \n\t" - " bne- 1b \n\t" - "2:" - : "=&r" (ret), "=m" (*addr) - : "r" OPAL_ASM_ADDR(addr), "r" (oldval), "r" (newval), "m" (*addr) - : "cc", "memory"); + int32_t prev; + bool ret; + + __asm__ __volatile__ ( + "1: lwarx %0, 0, %2 \n\t" + " cmpw 0, %0, %3 \n\t" + " bne- 2f \n\t" + " stwcx. %4, 0, %2 \n\t" + " bne- 1b \n\t" + "2:" + : "=&r" (prev), "=m" (*addr) + : "r" OPAL_ASM_ADDR(addr), "r" (*oldval), "r" (newval), "m" (*addr) + : "cc", "memory"); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr) @@ -195,23 +196,21 @@ static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval) atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { bool rc; - rc = opal_atomic_bool_cmpset_32(addr, oldval, newval); + rc = opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); opal_atomic_rmb(); return rc; } -static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { opal_atomic_wmb(); - return opal_atomic_bool_cmpset_32(addr, oldval, newval); + return opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); } static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval) @@ -258,23 +257,25 @@ OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(or, or) OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(xor, xor) OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(sub, subf) -static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - int64_t ret; - - __asm__ __volatile__ ( - "1: ldarx %0, 0, %2 \n\t" - " cmpd 0, %0, %3 \n\t" - " bne- 2f \n\t" - " stdcx. %4, 0, %2 \n\t" - " bne- 1b \n\t" - "2:" - : "=&r" (ret), "=m" (*addr) - : "r" (addr), "r" (OPAL_ASM_VALUE64(oldval)), "r" (OPAL_ASM_VALUE64(newval)), "m" (*addr) - : "cc", "memory"); + int64_t prev; + bool ret; + + __asm__ __volatile__ ( + "1: ldarx %0, 0, %2 \n\t" + " cmpd 0, %0, %3 \n\t" + " bne- 2f \n\t" + " stdcx. %4, 0, %2 \n\t" + " bne- 1b \n\t" + "2:" + : "=&r" (prev), "=m" (*addr) + : "r" (addr), "r" (OPAL_ASM_VALUE64(*oldval)), "r" (OPAL_ASM_VALUE64(newval)), "m" (*addr) + : "cc", "memory"); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } static inline int64_t opal_atomic_ll_64(volatile int64_t *addr) @@ -303,29 +304,6 @@ static inline int opal_atomic_sc_64(volatile int64_t *addr, int64_t newval) return ret; } -/* these two functions aren't inlined in the non-gcc case because then - there would be two function calls (since neither cmpset_64 nor - atomic_?mb can be inlined). Instead, we "inline" them by hand in - the assembly, meaning there is one function call overhead instead - of two */ -static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) -{ - bool rc; - - rc = opal_atomic_bool_cmpset_64(addr, oldval, newval); - opal_atomic_rmb(); - - return rc; -} - - -static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) -{ - opal_atomic_wmb(); - return opal_atomic_bool_cmpset_64(addr, oldval, newval); -} static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval) { @@ -352,9 +330,9 @@ static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval #if OPAL_GCC_INLINE_ASSEMBLY -static inline int opal_atomic_bool_cmpset_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { + int64_t prev; int ret; /* @@ -369,55 +347,53 @@ static inline int opal_atomic_bool_cmpset_64(volatile int64_t *addr, * is very similar to the pure 64 bit version. */ __asm__ __volatile__ ( - "ld r4,%2 \n\t" - "ld r5,%3 \n\t" - "1: ldarx r9, 0, %1 \n\t" - " cmpd 0, r9, r4 \n\t" + "ld r4,%3 \n\t" + "ld r5,%4 \n\t" + "1: ldarx %1, 0, %2 \n\t" + " cmpd 0, %1, r4 \n\t" " bne- 2f \n\t" - " stdcx. r5, 0, %1 \n\t" + " stdcx. r5, 0, %2 \n\t" " bne- 1b \n\t" "2: \n\t" - "xor r5,r4,r9 \n\t" + "xor r5,r4,%1 \n\t" "subfic r9,r5,0 \n\t" "adde %0,r9,r5 \n\t" - : "=&r" (ret) + : "=&r" (ret), "+r" (prev) : "r"OPAL_ASM_ADDR(addr), - "m"(oldval), "m"(newval) + "m"(*oldval), "m"(newval) : "r4", "r5", "r9", "cc", "memory"); - - return ret; + *oldval = prev; + return (bool) ret; } +#endif /* OPAL_GCC_INLINE_ASSEMBLY */ + +#endif /* OPAL_ASM_SUPPORT_64BIT */ + +#if OPAL_GCC_INLINE_ASSEMBLY + /* these two functions aren't inlined in the non-gcc case because then there would be two function calls (since neither cmpset_64 nor atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - int rc; + bool rc; - rc = opal_atomic_bool_cmpset_64(addr, oldval, newval); + rc = opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); opal_atomic_rmb(); return rc; } -static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { opal_atomic_wmb(); - return opal_atomic_bool_cmpset_64(addr, oldval, newval); + return opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); } -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#endif /* OPAL_ASM_SUPPORT_64BIT */ - - -#if OPAL_GCC_INLINE_ASSEMBLY #define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(type, instr) \ static inline int32_t opal_atomic_ ## type ## _32(volatile int32_t* v, int val) \ diff --git a/opal/include/opal/sys/sparcv9/atomic.h b/opal/include/opal/sys/sparcv9/atomic.h index 098cf875ce9..c79e32b1ebb 100644 --- a/opal/include/opal/sys/sparcv9/atomic.h +++ b/opal/include/opal/sys/sparcv9/atomic.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -12,6 +13,8 @@ * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserverd. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,9 +41,9 @@ *********************************************************************/ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 /********************************************************************** @@ -82,50 +85,49 @@ static inline void opal_atomic_isync(void) *********************************************************************/ #if OPAL_GCC_INLINE_ASSEMBLY -static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - /* casa [reg(rs1)] %asi, reg(rs2), reg(rd) - * - * if (*(reg(rs1)) == reg(rs2) ) - * swap reg(rd), *(reg(rs1)) - * else - * reg(rd) = *(reg(rs1)) - */ - - int32_t ret = newval; - - __asm__ __volatile__("casa [%1] " ASI_P ", %2, %0" - : "+r" (ret) - : "r" (addr), "r" (oldval)); - return (ret == oldval); + /* casa [reg(rs1)] %asi, reg(rs2), reg(rd) + * + * if (*(reg(rs1)) == reg(rs2) ) + * swap reg(rd), *(reg(rs1)) + * else + * reg(rd) = *(reg(rs1)) + */ + + int32_t prev = newval; + bool ret; + + __asm__ __volatile__("casa [%1] " ASI_P ", %2, %0" + : "+r" (prev) + : "r" (addr), "r" (*oldval)); + ret = (prev == *oldval); + *oldval = prev; + return ret; } -static inline bool opal_atomic_bool_cmpset_acq_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - bool rc; + bool rc; - rc = opal_atomic_bool_cmpset_32(addr, oldval, newval); - opal_atomic_rmb(); + rc = opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); + opal_atomic_rmb(); - return rc; + return rc; } -static inline bool opal_atomic_bool_cmpset_rel_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - opal_atomic_wmb(); - return opal_atomic_bool_cmpset_32(addr, oldval, newval); + opal_atomic_wmb(); + return opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); } #if OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64 -static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { /* casa [reg(rs1)] %asi, reg(rs2), reg(rd) * @@ -134,18 +136,20 @@ static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, * else * reg(rd) = *(reg(rs1)) */ - int64_t ret = newval; - - __asm__ __volatile__("casxa [%1] " ASI_P ", %2, %0" - : "+r" (ret) - : "r" (addr), "r" (oldval)); - return (ret == oldval); + int64_t prev = newval; + bool ret; + + __asm__ __volatile__("casxa [%1] " ASI_P ", %2, %0" + : "+r" (prev) + : "r" (addr), "r" (*oldval)); + ret = (prev == *oldval); + *oldval = prev; + return ret; } #else /* OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64 */ -static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { /* casa [reg(rs1)] %asi, reg(rs2), reg(rd) * @@ -155,40 +159,41 @@ static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, * reg(rd) = *(reg(rs1)) * */ - long long ret = newval; + int64_t prev = newval; + bool ret; __asm__ __volatile__( "ldx %0, %%g1 \n\t" /* g1 = ret */ "ldx %2, %%g2 \n\t" /* g2 = oldval */ "casxa [%1] " ASI_P ", %%g2, %%g1 \n\t" "stx %%g1, %0 \n" - : "+m"(ret) - : "r"(addr), "m"(oldval) + : "+m"(prev) + : "r"(addr), "m"(*oldval) : "%g1", "%g2" ); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } #endif /* OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64 */ -static inline bool opal_atomic_bool_cmpset_acq_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - bool rc; + bool rc; - rc = opal_atomic_bool_cmpset_64(addr, oldval, newval); - opal_atomic_rmb(); + rc = opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); + opal_atomic_rmb(); - return rc; + return rc; } -static inline bool opal_atomic_bool_cmpset_rel_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - opal_atomic_wmb(); - return opal_atomic_bool_cmpset_64(addr, oldval, newval); + opal_atomic_wmb(); + return opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); } #endif /* OPAL_GCC_INLINE_ASSEMBLY */ diff --git a/opal/include/opal/sys/sync_builtin/atomic.h b/opal/include/opal/sys/sync_builtin/atomic.h index 0a95048079f..f8557a69451 100644 --- a/opal/include/opal/sys/sync_builtin/atomic.h +++ b/opal/include/opal/sys/sync_builtin/atomic.h @@ -53,25 +53,19 @@ static inline void opal_atomic_wmb(void) * *********************************************************************/ -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 -static inline bool opal_atomic_bool_cmpset_acq_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) -{ - return __sync_bool_compare_and_swap(addr, oldval, newval); -} - +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -static inline bool opal_atomic_bool_cmpset_rel_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - return __sync_bool_compare_and_swap(addr, oldval, newval);} - -static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) -{ - return __sync_bool_compare_and_swap(addr, oldval, newval); + int32_t prev = __sync_val_compare_and_swap (add, *oldval, newval); + bool ret = prev == *oldval; + *oldval = prev; + return ret; } +#define opal_atomic_compare_exchange_strong_acq_32 opal_atomic_compare_exchange_strong_32 +#define opal_atomic_compare_exchange_strong_rel_32 opal_atomic_compare_exchange_strong_32 + #define OPAL_HAVE_ATOMIC_MATH_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 @@ -106,25 +100,19 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t *addr, int32_t delta) #if OPAL_ASM_SYNC_HAVE_64BIT -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 -static inline bool opal_atomic_bool_cmpset_acq_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) -{ - return __sync_bool_compare_and_swap(addr, oldval, newval); -} - -static inline bool opal_atomic_bool_cmpset_rel_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) -{ - return __sync_bool_compare_and_swap(addr, oldval, newval);} - +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 -static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - return __sync_bool_compare_and_swap(addr, oldval, newval); + int64_t prev = __sync_val_compare_and_swap (add, *oldval, newval); + bool ret = prev == *oldval; + *oldval = prev; + return ret; } +#define opal_atomic_compare_exchange_strong_acq_64 opal_atomic_compare_exchange_strong_64 +#define opal_atomic_compare_exchange_strong_rel_64 opal_atomic_compare_exchange_strong_64 + #define OPAL_HAVE_ATOMIC_MATH_64 1 #define OPAL_HAVE_ATOMIC_ADD_64 1 static inline int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta) @@ -159,13 +147,16 @@ static inline int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta) #endif #if OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128 -static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, - opal_int128_t oldval, opal_int128_t newval) +static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr, + opal_int128_t *oldval, opal_int128_t newval) { - return __sync_bool_compare_and_swap(addr, oldval, newval); + opal_int128_t prev = __sync_val_compare_and_swap (addr, *oldval, newval); + bool ret = prev == *oldval; + *oldval = prev; + return ret; } -#define OPAL_HAVE_ATOMIC_CMPSET_128 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 #endif diff --git a/opal/include/opal/sys/x86_64/atomic.h b/opal/include/opal/sys/x86_64/atomic.h index b56dd939b49..a9d881f6552 100644 --- a/opal/include/opal/sys/x86_64/atomic.h +++ b/opal/include/opal/sys/x86_64/atomic.h @@ -40,9 +40,9 @@ *********************************************************************/ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 /********************************************************************** * @@ -82,14 +82,13 @@ static inline void opal_atomic_isync(void) *********************************************************************/ #if OPAL_GCC_INLINE_ASSEMBLY -static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { unsigned char ret; __asm__ __volatile__ ( SMPLOCK "cmpxchgl %3,%2 \n\t" "sete %0 \n\t" - : "=qm" (ret), "+a" (oldval), "+m" (*addr) + : "=qm" (ret), "+a" (*oldval), "+m" (*addr) : "q"(newval) : "memory", "cc"); @@ -98,19 +97,18 @@ static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr, #endif /* OPAL_GCC_INLINE_ASSEMBLY */ -#define opal_atomic_bool_cmpset_acq_32 opal_atomic_bool_cmpset_32 -#define opal_atomic_bool_cmpset_rel_32 opal_atomic_bool_cmpset_32 +#define opal_atomic_compare_exchange_strong_acq_32 opal_atomic_compare_exchange_strong_32 +#define opal_atomic_compare_exchange_strong_rel_32 opal_atomic_compare_exchange_strong_32 #if OPAL_GCC_INLINE_ASSEMBLY -static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { unsigned char ret; __asm__ __volatile__ ( SMPLOCK "cmpxchgq %3,%2 \n\t" "sete %0 \n\t" - : "=qm" (ret), "+a" (oldval), "+m" (*((volatile long*)addr)) + : "=qm" (ret), "+a" (*oldval), "+m" (*((volatile long*)addr)) : "q"(newval) : "memory", "cc" ); @@ -120,13 +118,12 @@ static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, #endif /* OPAL_GCC_INLINE_ASSEMBLY */ -#define opal_atomic_bool_cmpset_acq_64 opal_atomic_bool_cmpset_64 -#define opal_atomic_bool_cmpset_rel_64 opal_atomic_bool_cmpset_64 +#define opal_atomic_compare_exchange_strong_acq_64 opal_atomic_compare_exchange_strong_64 +#define opal_atomic_compare_exchange_strong_rel_64 opal_atomic_compare_exchange_strong_64 #if OPAL_GCC_INLINE_ASSEMBLY && OPAL_HAVE_CMPXCHG16B && HAVE_OPAL_INT128_T -static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, opal_int128_t oldval, - opal_int128_t newval) +static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr, opal_int128_t *oldval, opal_int128_t newval) { unsigned char ret; @@ -135,15 +132,14 @@ static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, op * at the address is returned in eax:edx. */ __asm__ __volatile__ (SMPLOCK "cmpxchg16b (%%rsi) \n\t" "sete %0 \n\t" - : "=qm" (ret) - : "S" (addr), "b" (((int64_t *)&newval)[0]), "c" (((int64_t *)&newval)[1]), - "a" (((int64_t *)&oldval)[0]), "d" (((int64_t *)&oldval)[1]) - : "memory", "cc"); + : "=qm" (ret), "+a" (((int64_t *)oldval)[0]), "+d" (((int64_t *)oldval)[1]) + : "S" (addr), "b" (((int64_t *)&newval)[0]), "c" (((int64_t *)&newval)[1]) + : "memory", "cc", "eax", "edx"); return (bool) ret; } -#define OPAL_HAVE_ATOMIC_CMPSET_128 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 #endif /* OPAL_GCC_INLINE_ASSEMBLY */ diff --git a/opal/threads/thread_usage.h b/opal/threads/thread_usage.h index 6976d0b555f..91260f4c5cd 100644 --- a/opal/threads/thread_usage.h +++ b/opal/threads/thread_usage.h @@ -13,7 +13,7 @@ * Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -158,6 +158,23 @@ static inline bool opal_thread_cmpset_bool_ ## suffix (volatile addr_type *addr, return false; \ } +#define OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(type, addr_type, suffix) \ +static inline bool opal_thread_compare_exchange_strong_ ## suffix (volatile addr_type *addr, type *compare, type value) \ +{ \ + if (OPAL_UNLIKELY(opal_using_threads())) { \ + return opal_atomic_compare_exchange_strong_ ## suffix ((volatile type *) addr, compare, value); \ + } \ + \ + if ((type) *addr == *compare) { \ + ((type *) addr)[0] = value; \ + return true; \ + } \ + \ + *compare = ((type *) addr)[0]; \ + \ + return false; \ +} + #define OPAL_THREAD_DEFINE_ATOMIC_SWAP(type, addr_type, suffix) \ static inline type opal_thread_swap_ ## suffix (volatile addr_type *ptr, type newvalue) \ { \ @@ -180,6 +197,8 @@ OPAL_THREAD_DEFINE_ATOMIC_SUB(int32_t, 32) OPAL_THREAD_DEFINE_ATOMIC_SUB(size_t, size_t) OPAL_THREAD_DEFINE_ATOMIC_CMPSET(int32_t, int32_t, 32) OPAL_THREAD_DEFINE_ATOMIC_CMPSET(void *, intptr_t, ptr) +OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(int32_t, int32_t, 32) +OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(void *, intptr_t, ptr) OPAL_THREAD_DEFINE_ATOMIC_SWAP(int32_t, int32_t, 32) OPAL_THREAD_DEFINE_ATOMIC_SWAP(void *, intptr_t, ptr) @@ -204,9 +223,15 @@ OPAL_THREAD_DEFINE_ATOMIC_SWAP(void *, intptr_t, ptr) #define OPAL_THREAD_BOOL_CMPSET_32 opal_thread_cmpset_bool_32 #define OPAL_ATOMIC_BOOL_CMPSET_32 opal_thread_cmpset_bool_32 +#define OPAL_THREAD_BOOL_COMPARE_EXCHANGE_STRONG_32 opal_thread_compare_exchange_strong_32 +#define OPAL_ATOMIC_BOOL_COMPARE_EXCHANGE_STRONG_32 opal_thread_compare_exchange_strong_32 + #define OPAL_THREAD_BOOL_CMPSET_PTR(x, y, z) opal_thread_cmpset_bool_ptr ((volatile intptr_t *) x, (void *) y, (void *) z) #define OPAL_ATOMIC_BOOL_CMPSET_PTR OPAL_THREAD_BOOL_CMPSET_PTR +#define OPAL_THREAD_COMPARE_EXCHANGE_STRONG_PTR(x, y, z) opal_thread_compare_exchange_strong_ptr ((volatile intptr_t *) x, (void *) y, (void *) z) +#define OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR OPAL_THREAD_COMPARE_EXCHANGE_STRONG_PTR + #define OPAL_THREAD_SWAP_32 opal_thread_swap_32 #define OPAL_ATOMIC_SWAP_32 opal_thread_swap_32 @@ -221,6 +246,7 @@ OPAL_THREAD_DEFINE_ATOMIC_AND(int64_t, 64) OPAL_THREAD_DEFINE_ATOMIC_OR(int64_t, 64) OPAL_THREAD_DEFINE_ATOMIC_XOR(int64_t, 64) OPAL_THREAD_DEFINE_ATOMIC_CMPSET(int64_t, int64_t, 64) +OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(int64_t, int64_t, 64) OPAL_THREAD_DEFINE_ATOMIC_SWAP(int64_t, int64_t, 64) #define OPAL_THREAD_ADD64 opal_thread_add_64 @@ -238,6 +264,9 @@ OPAL_THREAD_DEFINE_ATOMIC_SWAP(int64_t, int64_t, 64) #define OPAL_THREAD_BOOL_CMPSET_64 opal_thread_cmpset_bool_64 #define OPAL_ATOMIC_BOOL_CMPSET_64 opal_thread_cmpset_bool_64 +#define OPAL_THREAD_COMPARE_EXCHANGE_STRONG_64 opal_thread_compare_exchange_strong_64 +#define OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_64 opal_thread_compare_exchange_strong_64 + #define OPAL_THREAD_SWAP_64 opal_thread_swap_64 #define OPAL_ATOMIC_SWAP_64 opal_thread_swap_64 diff --git a/test/asm/atomic_cmpset.c b/test/asm/atomic_cmpset.c index 3e467ff7412..75288feee7f 100644 --- a/test/asm/atomic_cmpset.c +++ b/test/asm/atomic_cmpset.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -12,6 +13,8 @@ * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -54,6 +57,13 @@ int64_t old64 = 0; int64_t new64 = 0; #endif +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 +volatile opal_int128_t vol128; +opal_int128_t val128; +opal_int128_t old128; +opal_int128_t new128; +#endif + volatile int volint = 0; int valint = 0; int oldint = 0; @@ -99,124 +109,165 @@ int main(int argc, char *argv[]) /* -- cmpset 32-bit tests -- */ vol32 = 42, old32 = 42, new32 = 50; - assert(opal_atomic_bool_cmpset_32(&vol32, old32, new32) == 1); + assert(opal_atomic_compare_exchange_strong_32 (&vol32, &old32, new32) == true); opal_atomic_rmb(); assert(vol32 == new32); + assert(old32 == 42); vol32 = 42, old32 = 420, new32 = 50; - assert(opal_atomic_bool_cmpset_32(&vol32, old32, new32) == 0); + assert(opal_atomic_compare_exchange_strong_32 (&vol32, &old32, new32) == false); opal_atomic_rmb(); assert(vol32 == 42); + assert(old32 == 42); vol32 = 42, old32 = 42, new32 = 50; - assert(opal_atomic_bool_cmpset_acq_32(&vol32, old32, new32) == 1); + assert(opal_atomic_compare_exchange_strong_32 (&vol32, &old32, new32) == true); assert(vol32 == new32); + assert(old32 == 42); vol32 = 42, old32 = 420, new32 = 50; - assert(opal_atomic_bool_cmpset_acq_32(&vol32, old32, new32) == 0); + assert(opal_atomic_compare_exchange_strong_acq_32 (&vol32, &old32, new32) == false); assert(vol32 == 42); + assert(old32 == 42); vol32 = 42, old32 = 42, new32 = 50; - assert(opal_atomic_bool_cmpset_rel_32(&vol32, old32, new32) == 1); + assert(opal_atomic_compare_exchange_strong_rel_32 (&vol32, &old32, new32) == true); opal_atomic_rmb(); assert(vol32 == new32); + assert(old32 == 42); vol32 = 42, old32 = 420, new32 = 50; - assert(opal_atomic_bool_cmpset_rel_32(&vol32, old32, new32) == 0); + assert(opal_atomic_compare_exchange_strong_rel_32 (&vol32, &old32, new32) == false); opal_atomic_rmb(); assert(vol32 == 42); + assert(old32 == 42); /* -- cmpset 64-bit tests -- */ #if OPAL_HAVE_ATOMIC_MATH_64 vol64 = 42, old64 = 42, new64 = 50; - assert(1 == opal_atomic_bool_cmpset_64(&vol64, old64, new64)); + assert(opal_atomic_compare_exchange_strong_64 (&vol64, &old64, new64) == true); opal_atomic_rmb(); assert(new64 == vol64); + assert(old64 == 42); vol64 = 42, old64 = 420, new64 = 50; - assert(opal_atomic_bool_cmpset_64(&vol64, old64, new64) == 0); + assert(opal_atomic_compare_exchange_strong_64 (&vol64, &old64, new64) == false); opal_atomic_rmb(); assert(vol64 == 42); + assert(old64 == 42); vol64 = 42, old64 = 42, new64 = 50; - assert(opal_atomic_bool_cmpset_acq_64(&vol64, old64, new64) == 1); + assert(opal_atomic_compare_exchange_strong_acq_64 (&vol64, &old64, new64) == true); assert(vol64 == new64); + assert(old64 == 42); vol64 = 42, old64 = 420, new64 = 50; - assert(opal_atomic_bool_cmpset_acq_64(&vol64, old64, new64) == 0); + assert(opal_atomic_compare_exchange_strong_acq_64 (&vol64, &old64, new64) == false); assert(vol64 == 42); + assert(old64 == 42); vol64 = 42, old64 = 42, new64 = 50; - assert(opal_atomic_bool_cmpset_rel_64(&vol64, old64, new64) == 1); + assert(opal_atomic_compare_exchange_strong_rel_64 (&vol64, &old64, new64) == true); opal_atomic_rmb(); assert(vol64 == new64); + assert(old64 == 42); vol64 = 42, old64 = 420, new64 = 50; - assert(opal_atomic_bool_cmpset_rel_64(&vol64, old64, new64) == 0); + assert(opal_atomic_compare_exchange_strong_rel_64 (&vol64, &old64, new64) == false); opal_atomic_rmb(); assert(vol64 == 42); + assert(old64 == 42); #endif + + /* -- cmpset 128-bit tests -- */ + +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 + vol128 = 42, old128 = 42, new128 = 50; + assert(opal_atomic_compare_exchange_strong_128 (&vol128, &old128, new128) == true); + opal_atomic_rmb(); + assert(new128 == vol128); + assert(old128 == 42); + + vol128 = 42, old128 = 420, new128 = 50; + assert(opal_atomic_compare_exchange_strong_128 (&vol128, &old128, new128) == false); + opal_atomic_rmb(); + assert(vol128 == 42); + assert(old128 == 42); +#endif + /* -- cmpset int tests -- */ volint = 42, oldint = 42, newint = 50; - assert(opal_atomic_bool_cmpset(&volint, oldint, newint) == 1); + assert(opal_atomic_compare_exchange_strong (&volint, &oldint, newint) == true); opal_atomic_rmb(); - assert(volint ==newint); + assert(volint == newint); + assert(oldint == 42); volint = 42, oldint = 420, newint = 50; - assert(opal_atomic_bool_cmpset(&volint, oldint, newint) == 0); + assert(opal_atomic_compare_exchange_strong (&volint, &oldint, newint) == false); opal_atomic_rmb(); assert(volint == 42); + assert(oldint == 42); volint = 42, oldint = 42, newint = 50; - assert(opal_atomic_bool_cmpset_acq(&volint, oldint, newint) == 1); + assert(opal_atomic_compare_exchange_strong_acq (&volint, &oldint, newint) == true); assert(volint == newint); + assert(oldint == 42); volint = 42, oldint = 420, newint = 50; - assert(opal_atomic_bool_cmpset_acq(&volint, oldint, newint) == 0); + assert(opal_atomic_compare_exchange_strong_acq (&volint, &oldint, newint) == false); assert(volint == 42); + assert(oldint == 42); volint = 42, oldint = 42, newint = 50; - assert(opal_atomic_bool_cmpset_rel(&volint, oldint, newint) == 1); + assert(opal_atomic_compare_exchange_strong_rel (&volint, &oldint, newint) == true); opal_atomic_rmb(); assert(volint == newint); + assert(oldint == 42); volint = 42, oldint = 420, newint = 50; - assert(opal_atomic_bool_cmpset_rel(&volint, oldint, newint) == 0); + assert(opal_atomic_compare_exchange_strong_rel (&volint, &oldint, newint) == false); opal_atomic_rmb(); assert(volint == 42); + assert(oldint == 42); /* -- cmpset ptr tests -- */ volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50; - assert(opal_atomic_bool_cmpset_ptr(&volptr, oldptr, newptr) == 1); + assert(opal_atomic_compare_exchange_strong_ptr (&volptr, &oldptr, newptr) == true); opal_atomic_rmb(); assert(volptr == newptr); + assert(oldptr == (void *) 42); volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50; - assert(opal_atomic_bool_cmpset_ptr(&volptr, oldptr, newptr) == 0); + assert(opal_atomic_compare_exchange_strong_ptr (&volptr, &oldptr, newptr) == false); opal_atomic_rmb(); assert(volptr == (void *) 42); + assert(oldptr == (void *) 42); volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50; - assert(opal_atomic_bool_cmpset_acq_ptr(&volptr, oldptr, newptr) == 1); + assert(opal_atomic_compare_exchange_strong_acq_ptr (&volptr, &oldptr, newptr) == true); assert(volptr == newptr); + assert(oldptr == (void *) 42); volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50; - assert(opal_atomic_bool_cmpset_acq_ptr(&volptr, oldptr, newptr) == 0); + assert(opal_atomic_compare_exchange_strong_acq_ptr (&volptr, &oldptr, newptr) == false); assert(volptr == (void *) 42); + assert(oldptr == (void *) 42); volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50; - assert(opal_atomic_bool_cmpset_rel_ptr(&volptr, oldptr, newptr) == 1); + assert(opal_atomic_compare_exchange_strong_rel_ptr (&volptr, &oldptr, newptr) == true); opal_atomic_rmb(); assert(volptr == newptr); + assert(oldptr == (void *) 42); volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50; - assert(opal_atomic_bool_cmpset_rel_ptr(&volptr, oldptr, newptr) == 0); + assert(opal_atomic_compare_exchange_strong_rel_ptr (&volptr, &oldptr, newptr) == false); opal_atomic_rmb(); assert(volptr == (void *) 42); + assert(oldptr == (void *) 42); /* -- add_32 tests -- */ From 11bb8b09a03aecf147598cea06edee12c556aa75 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 8 Nov 2017 10:23:42 -0700 Subject: [PATCH 2/5] opal/class: use new compare-and-swap functions Signed-off-by: Nathan Hjelm --- opal/class/opal_fifo.h | 49 ++++++++++++++++++------------------- opal/class/opal_lifo.h | 45 ++++++++++++++++++++-------------- opal/threads/thread_usage.h | 4 +-- 3 files changed, 52 insertions(+), 46 deletions(-) diff --git a/opal/class/opal_fifo.h b/opal/class/opal_fifo.h index a9da88f459a..ad67c77a6ff 100644 --- a/opal/class/opal_fifo.h +++ b/opal/class/opal_fifo.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. - * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * reseved. * $COPYRIGHT$ * @@ -85,14 +85,12 @@ static inline bool opal_fifo_is_empty( opal_fifo_t* fifo ) static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, opal_list_item_t *item) { - opal_counted_pointer_t tail; + opal_counted_pointer_t tail = {.value = fifo->opal_fifo_tail.value}; item->opal_list_next = &fifo->opal_fifo_ghost; do { - tail.value = fifo->opal_fifo_tail.value; - - if (opal_update_counted_pointer (&fifo->opal_fifo_tail, tail, item)) { + if (opal_update_counted_pointer (&fifo->opal_fifo_tail, &tail, item)) { break; } } while (1); @@ -102,7 +100,7 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, if (&fifo->opal_fifo_ghost == tail.data.item) { /* update the head */ opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value}; - opal_update_counted_pointer (&fifo->opal_fifo_head, head, item); + opal_update_counted_pointer (&fifo->opal_fifo_head, &head, item); } else { /* update previous item */ tail.data.item->opal_list_next = item; @@ -116,29 +114,28 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, */ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) { - opal_list_item_t *item, *next; - opal_counted_pointer_t head, tail; + opal_list_item_t *item, *next, *ghost = &fifo->opal_fifo_ghost; + opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value}, tail; do { - head.value = fifo->opal_fifo_head.value; tail.value = fifo->opal_fifo_tail.value; opal_atomic_rmb (); item = (opal_list_item_t *) head.data.item; next = (opal_list_item_t *) item->opal_list_next; - if (&fifo->opal_fifo_ghost == tail.data.item && &fifo->opal_fifo_ghost == item) { + if (ghost == tail.data.item && ghost == item) { return NULL; } /* the head or next pointer are in an inconsistent state. keep looping. */ - if (tail.data.item != item && &fifo->opal_fifo_ghost != tail.data.item && - &fifo->opal_fifo_ghost == next) { + if (tail.data.item != item && ghost != tail.data.item && ghost == next) { + head.value = fifo->opal_fifo_head.value; continue; } /* try popping the head */ - if (opal_update_counted_pointer (&fifo->opal_fifo_head, head, next)) { + if (opal_update_counted_pointer (&fifo->opal_fifo_head, &head, next)) { break; } } while (1); @@ -146,14 +143,14 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) opal_atomic_wmb (); /* check for tail and head consistency */ - if (&fifo->opal_fifo_ghost == next) { + if (ghost == next) { /* the head was just set to &fifo->opal_fifo_ghost. try to update the tail as well */ - if (!opal_update_counted_pointer (&fifo->opal_fifo_tail, tail, &fifo->opal_fifo_ghost)) { + if (!opal_update_counted_pointer (&fifo->opal_fifo_tail, &tail, ghost)) { /* tail was changed by a push operation. wait for the item's next pointer to be se then * update the head */ /* wait for next pointer to be updated by push */ - while (&fifo->opal_fifo_ghost == item->opal_list_next) { + while (ghost == item->opal_list_next) { opal_atomic_rmb (); } @@ -166,7 +163,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) head.value = fifo->opal_fifo_head.value; next = (opal_list_item_t *) item->opal_list_next; - assert (&fifo->opal_fifo_ghost == head.data.item); + assert (ghost == head.data.item); fifo->opal_fifo_head.data.item = next; opal_atomic_wmb (); @@ -215,14 +212,14 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, */ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) { - opal_list_item_t *item, *next; + opal_list_item_t *item, *next, *ghost = &fifo->opal_fifo_ghost; #if OPAL_HAVE_ATOMIC_LLSC_PTR /* use load-linked store-conditional to avoid ABA issues */ do { item = opal_atomic_ll_ptr (&fifo->opal_fifo_head.data.item); - if (&fifo->opal_fifo_ghost == item) { - if (&fifo->opal_fifo_ghost == fifo->opal_fifo_tail.data.item) { + if (ghost == item) { + if (ghost == fifo->opal_fifo_tail.data.item) { return NULL; } @@ -239,7 +236,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) #else /* protect against ABA issues by "locking" the head */ do { - if (opal_atomic_bool_cmpset_32 ((int32_t *) &fifo->opal_fifo_head.data.counter, 0, 1)) { + if (!opal_atomic_swap_32 ((volatile int32_t *) &fifo->opal_fifo_head.data.counter, 1)) { break; } @@ -249,7 +246,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) opal_atomic_wmb(); item = opal_fifo_head (fifo); - if (&fifo->opal_fifo_ghost == item) { + if (ghost == item) { fifo->opal_fifo_head.data.counter = 0; return NULL; } @@ -258,9 +255,11 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) fifo->opal_fifo_head.data.item = next; #endif - if (&fifo->opal_fifo_ghost == next) { - if (!opal_atomic_bool_cmpset_ptr (&fifo->opal_fifo_tail.data.item, item, &fifo->opal_fifo_ghost)) { - while (&fifo->opal_fifo_ghost == item->opal_list_next) { + if (ghost == next) { + void *tmp = item; + + if (!opal_atomic_compare_exchange_strong_ptr (&fifo->opal_fifo_tail.data.item, &tmp, ghost)) { + while (ghost == item->opal_list_next) { opal_atomic_rmb (); } diff --git a/opal/class/opal_lifo.h b/opal/class/opal_lifo.h index 0d8512fe0e8..e5a3f9110cb 100644 --- a/opal/class/opal_lifo.h +++ b/opal/class/opal_lifo.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * reseved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -65,13 +65,13 @@ typedef union opal_counted_pointer_t opal_counted_pointer_t; * to allow the upper level to detect if this element is the first one in the * list (if the list was empty before this operation). */ -static inline bool opal_update_counted_pointer (volatile opal_counted_pointer_t *addr, opal_counted_pointer_t old, +static inline bool opal_update_counted_pointer (volatile opal_counted_pointer_t *addr, opal_counted_pointer_t *old, opal_list_item_t *item) { opal_counted_pointer_t new_p; new_p.data.item = item; - new_p.data.counter = old.data.counter + 1; - return opal_atomic_bool_cmpset_128 (&addr->value, old.value, new_p.value); + new_p.data.counter = old->data.counter + 1; + return opal_atomic_compare_exchange_strong_128 (&addr->value, &old->value, new_p.value); } #endif @@ -119,14 +119,14 @@ static inline bool opal_lifo_is_empty( opal_lifo_t* lifo ) static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo, opal_list_item_t *item) { - do { - opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item; + opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item; + do { item->opal_list_next = next; opal_atomic_wmb (); /* to protect against ABA issues it is sufficient to only update the counter in pop */ - if (opal_atomic_bool_cmpset_ptr (&lifo->opal_lifo_head.data.item, next, item)) { + if (opal_atomic_compare_exchange_strong_ptr (&lifo->opal_lifo_head.data.item, &next, item)) { return next; } /* DO some kind of pause to release the bus */ @@ -141,17 +141,17 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) opal_counted_pointer_t old_head; opal_list_item_t *item; - do { - - old_head.data.counter = lifo->opal_lifo_head.data.counter; - opal_atomic_rmb (); - old_head.data.item = item = (opal_list_item_t*)lifo->opal_lifo_head.data.item; + old_head.data.counter = lifo->opal_lifo_head.data.counter; + opal_atomic_rmb (); + old_head.data.item = (opal_list_item_t *) lifo->opal_lifo_head.data.item; + do { + item = (opal_list_item_t *) old_head.data.item; if (item == &lifo->opal_lifo_ghost) { return NULL; } - if (opal_update_counted_pointer (&lifo->opal_lifo_head, old_head, + if (opal_update_counted_pointer (&lifo->opal_lifo_head, &old_head, (opal_list_item_t *) item->opal_list_next)) { opal_atomic_wmb (); item->opal_list_next = NULL; @@ -169,13 +169,15 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo, opal_list_item_t *item) { + opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item; + /* item free acts as a mini lock to avoid ABA problems */ item->item_free = 1; + do { - opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item; item->opal_list_next = next; opal_atomic_wmb(); - if (opal_atomic_bool_cmpset_ptr (&lifo->opal_lifo_head.data.item, next, item)) { + if (opal_atomic_compare_exchange_strong_ptr (&lifo->opal_lifo_head.data.item, &next, item)) { opal_atomic_wmb (); /* now safe to pop this item */ item->item_free = 0; @@ -236,8 +238,11 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) */ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) { - opal_list_item_t *item; - while ((item = (opal_list_item_t *) lifo->opal_lifo_head.data.item) != &lifo->opal_lifo_ghost) { + opal_list_item_t *item, *head, *ghost = &lifo->opal_lifo_ghost; + + item = (opal_list_item_t *) lifo->opal_lifo_head.data.item; + + while (item != ghost) { /* ensure it is safe to pop the head */ if (opal_atomic_swap_32((volatile int32_t *) &item->item_free, 1)) { continue; @@ -245,14 +250,16 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) opal_atomic_wmb (); + head = item; /* try to swap out the head pointer */ - if (opal_atomic_bool_cmpset_ptr (&lifo->opal_lifo_head.data.item, item, - (void *) item->opal_list_next)) { + if (opal_atomic_compare_exchange_strong_ptr (&lifo->opal_lifo_head.data.item, &head, + (void *) item->opal_list_next)) { break; } /* NTH: don't need another atomic here */ item->item_free = 0; + item = head; /* Do some kind of pause to release the bus */ } diff --git a/opal/threads/thread_usage.h b/opal/threads/thread_usage.h index 91260f4c5cd..4a41a1dba66 100644 --- a/opal/threads/thread_usage.h +++ b/opal/threads/thread_usage.h @@ -223,8 +223,8 @@ OPAL_THREAD_DEFINE_ATOMIC_SWAP(void *, intptr_t, ptr) #define OPAL_THREAD_BOOL_CMPSET_32 opal_thread_cmpset_bool_32 #define OPAL_ATOMIC_BOOL_CMPSET_32 opal_thread_cmpset_bool_32 -#define OPAL_THREAD_BOOL_COMPARE_EXCHANGE_STRONG_32 opal_thread_compare_exchange_strong_32 -#define OPAL_ATOMIC_BOOL_COMPARE_EXCHANGE_STRONG_32 opal_thread_compare_exchange_strong_32 +#define OPAL_THREAD_COMPARE_EXCHANGE_STRONG_32 opal_thread_compare_exchange_strong_32 +#define OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32 opal_thread_compare_exchange_strong_32 #define OPAL_THREAD_BOOL_CMPSET_PTR(x, y, z) opal_thread_cmpset_bool_ptr ((volatile intptr_t *) x, (void *) y, (void *) z) #define OPAL_ATOMIC_BOOL_CMPSET_PTR OPAL_THREAD_BOOL_CMPSET_PTR From 9d0b3fe9f43c8ac98b361901298f5085ab9f0252 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 8 Nov 2017 14:22:48 -0700 Subject: [PATCH 3/5] opal/asm: remove opal_atomic_bool_cmpset functions This commit eliminates the old opal_atomic_bool_cmpset functions. They have been replaced by the opal_atomic_compare_exchange_strong functions. Signed-off-by: Nathan Hjelm --- ompi/datatype/ompi_datatype_args.c | 5 +- ompi/group/group.h | 4 +- ompi/mca/mtl/portals4/mtl_portals4_flowctl.c | 5 +- ompi/mca/osc/pt2pt/osc_pt2pt.h | 16 +- ompi/mca/osc/pt2pt/osc_pt2pt_frag.c | 4 +- ompi/mca/osc/pt2pt/osc_pt2pt_frag.h | 2 +- ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c | 7 +- ompi/mca/osc/rdma/osc_rdma_active_target.c | 6 +- ompi/mca/osc/rdma/osc_rdma_lock.h | 3 +- ompi/mca/osc/rdma/osc_rdma_peer.h | 5 +- ompi/mca/osc/rdma/osc_rdma_types.h | 10 +- ompi/mca/osc/sm/osc_sm_active_target.c | 15 +- ompi/request/req_wait.c | 30 ++- ompi/request/request.h | 10 +- opal/include/opal/sys/atomic.h | 118 ++--------- opal/include/opal/sys/atomic_impl.h | 199 ++---------------- opal/mca/btl/openib/btl_openib_endpoint.c | 19 +- opal/mca/btl/openib/btl_openib_endpoint.h | 20 +- opal/mca/btl/ugni/btl_ugni_smsg.c | 3 +- opal/mca/btl/vader/btl_vader_fifo.h | 8 +- opal/runtime/opal_cr.c | 21 +- opal/threads/thread_usage.h | 27 --- oshmem/runtime/oshmem_shmem_finalize.c | 3 +- 23 files changed, 139 insertions(+), 401 deletions(-) diff --git a/ompi/datatype/ompi_datatype_args.c b/ompi/datatype/ompi_datatype_args.c index add69f9bf70..53aaa00b7ee 100644 --- a/ompi/datatype/ompi_datatype_args.c +++ b/ompi/datatype/ompi_datatype_args.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -487,7 +487,8 @@ int ompi_datatype_get_pack_description( ompi_datatype_t* datatype, void* recursive_buffer; if (NULL == packed_description) { - if (opal_atomic_bool_cmpset (&datatype->packed_description, NULL, (void *) 1)) { + void *_tmp_ptr = NULL; + if (opal_atomic_compare_exchange_strong_ptr (&datatype->packed_description, (void *) &_tmp_ptr, (void *) 1)) { if( ompi_datatype_is_predefined(datatype) ) { packed_description = malloc(2 * sizeof(int)); } else if( NULL == args ) { diff --git a/ompi/group/group.h b/ompi/group/group.h index 4f303c34186..30664f8a4e0 100644 --- a/ompi/group/group.h +++ b/ompi/group/group.h @@ -14,7 +14,7 @@ * Copyright (c) 2007-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -356,7 +356,7 @@ static inline struct ompi_proc_t *ompi_group_dense_lookup (ompi_group_t *group, ompi_proc_t *real_proc = (ompi_proc_t *) ompi_proc_for_name (ompi_proc_sentinel_to_name ((uintptr_t) proc)); - if (opal_atomic_bool_cmpset_ptr (group->grp_proc_pointers + peer_id, proc, real_proc)) { + if (opal_atomic_compare_exchange_strong_ptr (group->grp_proc_pointers + peer_id, &proc, real_proc)) { OBJ_RETAIN(real_proc); } diff --git a/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c b/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c index 50cf3c79b72..c68e30f6700 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -296,9 +296,10 @@ ompi_mtl_portals4_flowctl_add_procs(size_t me, int ompi_mtl_portals4_flowctl_trigger(void) { + int32_t _tmp_value = 0; int ret; - if (true == OPAL_ATOMIC_BOOL_CMPSET_32(&ompi_mtl_portals4.flowctl.flowctl_active, false, true)) { + if (true == OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32(&ompi_mtl_portals4.flowctl.flowctl_active, &_tmp_value, 1)) { /* send trigger to root */ ret = PtlPut(ompi_mtl_portals4.zero_md_h, 0, diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt.h b/ompi/mca/osc/pt2pt/osc_pt2pt.h index 660b7c3246a..77eabcc5922 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt.h @@ -8,7 +8,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. @@ -145,15 +145,11 @@ static inline bool ompi_osc_pt2pt_peer_eager_active (ompi_osc_pt2pt_peer_t *peer static inline void ompi_osc_pt2pt_peer_set_flag (ompi_osc_pt2pt_peer_t *peer, int32_t flag, bool value) { - int32_t peer_flags, new_flags; - do { - peer_flags = peer->flags; - if (value) { - new_flags = peer_flags | flag; - } else { - new_flags = peer_flags & ~flag; - } - } while (!OPAL_ATOMIC_BOOL_CMPSET_32 (&peer->flags, peer_flags, new_flags)); + if (value) { + OPAL_ATOMIC_OR32 (&peer->flags, flag); + } else { + OPAL_ATOMIC_AND32 (&peer->flags, ~flag); + } } static inline void ompi_osc_pt2pt_peer_set_locked (ompi_osc_pt2pt_peer_t *peer, bool value) diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c index 51a31181a88..632495eb234 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -105,7 +105,7 @@ static int ompi_osc_pt2pt_flush_active_frag (ompi_osc_pt2pt_module_t *module, om "osc pt2pt: flushing active fragment to target %d. pending: %d", active_frag->target, active_frag->pending)); - if (opal_atomic_bool_cmpset (&peer->active_frag, active_frag, NULL)) { + if (opal_atomic_compare_exchange_strong_ptr (&peer->active_frag, &active_frag, NULL)) { if (0 != OPAL_THREAD_ADD32(&active_frag->pending, -1)) { /* communication going on while synchronizing; this is an rma usage bug */ return OMPI_ERR_RMA_SYNC; diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h index cddc3c3f07f..0c16cfe690c 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h @@ -67,7 +67,7 @@ static inline ompi_osc_pt2pt_frag_t *ompi_osc_pt2pt_frag_alloc_non_buffered (omp /* to ensure ordering flush the buffer on the peer */ curr = peer->active_frag; - if (NULL != curr && opal_atomic_bool_cmpset (&peer->active_frag, curr, NULL)) { + if (NULL != curr && opal_atomic_compare_exchange_strong_ptr (&peer->active_frag, &curr, NULL)) { /* If there's something pending, the pending finish will start the buffer. Otherwise, we need to start it now. */ int ret = ompi_osc_pt2pt_frag_finish (module, curr); diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c b/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c index 34059a0851c..9d0210c2f8a 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c @@ -744,14 +744,13 @@ static bool ompi_osc_pt2pt_lock_try_acquire (ompi_osc_pt2pt_module_t* module, in break; } - if (opal_atomic_bool_cmpset_32 (&module->lock_status, lock_status, lock_status + 1)) { + if (opal_atomic_compare_exchange_strong_32 (&module->lock_status, &lock_status, lock_status + 1)) { break; } - - lock_status = module->lock_status; } while (1); } else { - queue = !opal_atomic_bool_cmpset_32 (&module->lock_status, 0, -1); + int32_t _tmp_value = 0; + queue = !opal_atomic_compare_exchange_strong_32 (&module->lock_status, &_tmp_value, -1); } if (queue) { diff --git a/ompi/mca/osc/rdma/osc_rdma_active_target.c b/ompi/mca/osc/rdma/osc_rdma_active_target.c index 30e160e93f9..b045ebf3ec6 100644 --- a/ompi/mca/osc/rdma/osc_rdma_active_target.c +++ b/ompi/mca/osc/rdma/osc_rdma_active_target.c @@ -8,7 +8,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. @@ -285,7 +285,9 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win) ret = ompi_osc_rdma_lock_btl_cswap (module, peer, target, 0, 1 + (int64_t) my_rank, &result); assert (OMPI_SUCCESS == ret); } else { - result = !ompi_osc_rdma_lock_cmpset ((osc_rdma_counter_t *) target, 0, 1 + (osc_rdma_counter_t) my_rank); + ompi_osc_rdma_lock_t _tmp_value = 0; + + result = !ompi_osc_rdma_lock_compare_exchange ((osc_rdma_counter_t *) target, &_tmp_value, 1 + (osc_rdma_counter_t) my_rank); } if (OPAL_LIKELY(0 == result)) { diff --git a/ompi/mca/osc/rdma/osc_rdma_lock.h b/ompi/mca/osc/rdma/osc_rdma_lock.h index 4352c5cbf1c..8c35018badf 100644 --- a/ompi/mca/osc/rdma/osc_rdma_lock.h +++ b/ompi/mca/osc/rdma/osc_rdma_lock.h @@ -17,7 +17,8 @@ static inline int ompi_osc_rdma_trylock_local (volatile ompi_osc_rdma_lock_t *lock) { - return !ompi_osc_rdma_lock_cmpset (lock, 0, OMPI_OSC_RDMA_LOCK_EXCLUSIVE); + ompi_osc_rdma_lock_t _tmp_value = 0; + return !ompi_osc_rdma_lock_compare_exchange (lock, &_tmp_value, OMPI_OSC_RDMA_LOCK_EXCLUSIVE); } static inline void ompi_osc_rdma_unlock_local (volatile ompi_osc_rdma_lock_t *lock) diff --git a/ompi/mca/osc/rdma/osc_rdma_peer.h b/ompi/mca/osc/rdma/osc_rdma_peer.h index c31f27a62cd..21aeecb4078 100644 --- a/ompi/mca/osc/rdma/osc_rdma_peer.h +++ b/ompi/mca/osc/rdma/osc_rdma_peer.h @@ -201,14 +201,13 @@ static inline bool ompi_osc_rdma_peer_test_set_flag (ompi_osc_rdma_peer_t *peer, int32_t flags; opal_atomic_mb (); + flags = peer->flags; do { - flags = peer->flags; if (flags & flag) { return false; } - - } while (!OPAL_THREAD_BOOL_CMPSET_32 (&peer->flags, flags, flags | flag)); + } while (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32 (&peer->flags, &flags, flags | flag)); return true; } diff --git a/ompi/mca/osc/rdma/osc_rdma_types.h b/ompi/mca/osc/rdma/osc_rdma_types.h index d6dfb0d0188..4fed013cbf4 100644 --- a/ompi/mca/osc/rdma/osc_rdma_types.h +++ b/ompi/mca/osc/rdma/osc_rdma_types.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -54,12 +54,12 @@ static inline int64_t ompi_osc_rdma_lock_add (volatile int64_t *p, int64_t value return new; } -static inline int ompi_osc_rdma_lock_cmpset (volatile int64_t *p, int64_t comp, int64_t value) +static inline int ompi_osc_rdma_lock_compare_exchange (volatile int64_t *p, int64_t *comp, int64_t value) { int ret; opal_atomic_mb (); - ret = opal_atomic_bool_cmpset_64 (p, comp, value); + ret = opal_atomic_compare_exchange_strong_64 (p, comp, value); opal_atomic_mb (); return ret; @@ -83,12 +83,12 @@ static inline int32_t ompi_osc_rdma_lock_add (volatile int32_t *p, int32_t value return new; } -static inline int ompi_osc_rdma_lock_cmpset (volatile int32_t *p, int32_t comp, int32_t value) +static inline int ompi_osc_rdma_lock_compare_exchange (volatile int32_t *p, int32_t *comp, int32_t value) { int ret; opal_atomic_mb (); - ret = opal_atomic_bool_cmpset_32 (p, comp, value); + ret = opal_atomic_compare_exchange_strong_32 (p, comp, value); opal_atomic_mb (); return ret; diff --git a/ompi/mca/osc/sm/osc_sm_active_target.c b/ompi/mca/osc/sm/osc_sm_active_target.c index 083992d8331..6c1e00263f2 100644 --- a/ompi/mca/osc/sm/osc_sm_active_target.c +++ b/ompi/mca/osc/sm/osc_sm_active_target.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights @@ -130,10 +130,11 @@ ompi_osc_sm_start(struct ompi_group_t *group, ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; int my_rank = ompi_comm_rank (module->comm); + void *_tmp_ptr = NULL; OBJ_RETAIN(group); - if (!OPAL_ATOMIC_BOOL_CMPSET_PTR(&module->start_group, NULL, group)) { + if (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&module->start_group, (void *) &_tmp_ptr, group)) { OBJ_RELEASE(group); return OMPI_ERR_RMA_SYNC; } @@ -160,9 +161,11 @@ ompi_osc_sm_start(struct ompi_group_t *group, opal_atomic_rmb (); - do { - old = module->posts[my_rank][rank_byte]; - } while (!opal_atomic_bool_cmpset ((volatile osc_sm_post_type_t *) module->posts[my_rank] + rank_byte, old, old ^ rank_bit)); +#if OPAL_HAVE_ATOMIC_MATH_64 + opal_atomic_xor_64 ((volatile osc_sm_post_type_t *) module->posts[my_rank] + rank_byte, rank_bit); +#else + opal_atomic_xor_32 ((volatile osc_sm_post_type_t *) module->posts[my_rank] + rank_byte, rank_bit); +#endif } free (ranks); @@ -185,7 +188,7 @@ ompi_osc_sm_complete(struct ompi_win_t *win) opal_atomic_mb(); group = module->start_group; - if (NULL == group || !OPAL_ATOMIC_BOOL_CMPSET_PTR(&module->start_group, group, NULL)) { + if (NULL == group || !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&module->start_group, &group, NULL)) { return OMPI_ERR_RMA_SYNC; } diff --git a/ompi/request/req_wait.c b/ompi/request/req_wait.c index 233d1dd30d0..e4d4d5e68a6 100644 --- a/ompi/request/req_wait.c +++ b/ompi/request/req_wait.c @@ -13,7 +13,7 @@ * Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science @@ -100,6 +100,8 @@ int ompi_request_default_wait_any(size_t count, num_requests_null_inactive = 0; for (i = 0; i < count; i++) { + void *_tmp_ptr = REQUEST_PENDING; + request = requests[i]; /* Check for null or completed persistent request. For @@ -110,7 +112,7 @@ int ompi_request_default_wait_any(size_t count, continue; } - if( !OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync) ) { + if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync) ) { assert(REQUEST_COMPLETE(request)); completed = i; *index = i; @@ -136,6 +138,8 @@ int ompi_request_default_wait_any(size_t count, * user. */ for(i = completed-1; (i+1) > 0; i--) { + void *tmp_ptr = &sync; + request = requests[i]; if( request->req_state == OMPI_REQUEST_INACTIVE ) { @@ -146,7 +150,7 @@ int ompi_request_default_wait_any(size_t count, * Otherwise, the request has been completed meanwhile, and it * has been atomically marked as REQUEST_COMPLETE. */ - if( !OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) { + if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &tmp_ptr, REQUEST_PENDING) ) { *index = i; } } @@ -211,6 +215,8 @@ int ompi_request_default_wait_all( size_t count, WAIT_SYNC_INIT(&sync, count); rptr = requests; for (i = 0; i < count; i++) { + void *_tmp_ptr = REQUEST_PENDING; + request = *rptr++; if( request->req_state == OMPI_REQUEST_INACTIVE ) { @@ -218,7 +224,7 @@ int ompi_request_default_wait_all( size_t count, continue; } - if (!OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync)) { + if (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync)) { if( OPAL_UNLIKELY( MPI_SUCCESS != request->req_status.MPI_ERROR ) ) { failed++; } @@ -246,6 +252,8 @@ int ompi_request_default_wait_all( size_t count, if (MPI_STATUSES_IGNORE != statuses) { /* fill out status and free request if required */ for( i = 0; i < count; i++, rptr++ ) { + void *_tmp_ptr = &sync; + request = *rptr; if( request->req_state == OMPI_REQUEST_INACTIVE ) { @@ -260,7 +268,7 @@ int ompi_request_default_wait_all( size_t count, * mark the request as pending then it is neither failed nor complete, and * we must stop altering it. */ - if( OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING ) ) { + if( OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_PENDING ) ) { /* * Per MPI 2.2 p 60: * Allows requests to be marked as MPI_ERR_PENDING if they are @@ -306,6 +314,8 @@ int ompi_request_default_wait_all( size_t count, int rc; /* free request if required */ for( i = 0; i < count; i++, rptr++ ) { + void *_tmp_ptr = &sync; + request = *rptr; if( request->req_state == OMPI_REQUEST_INACTIVE ) { @@ -320,7 +330,7 @@ int ompi_request_default_wait_all( size_t count, /* If the request is still pending due to a failed request * then skip it in this loop. */ - if( OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING ) ) { + if( OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_PENDING ) ) { /* * Per MPI 2.2 p 60: * Allows requests to be marked as MPI_ERR_PENDING if they are @@ -398,6 +408,8 @@ int ompi_request_default_wait_some(size_t count, num_requests_null_inactive = 0; num_requests_done = 0; for (size_t i = 0; i < count; i++, rptr++) { + void *_tmp_ptr = REQUEST_PENDING; + request = *rptr; /* * Check for null or completed persistent request. @@ -407,7 +419,7 @@ int ompi_request_default_wait_some(size_t count, num_requests_null_inactive++; continue; } - indices[i] = OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync); + indices[i] = OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync); if( !indices[i] ) { /* If the request is completed go ahead and mark it as such */ assert( REQUEST_COMPLETE(request) ); @@ -434,6 +446,8 @@ int ompi_request_default_wait_some(size_t count, rptr = requests; num_requests_done = 0; for (size_t i = 0; i < count; i++, rptr++) { + void *_tmp_ptr = &sync; + request = *rptr; if( request->req_state == OMPI_REQUEST_INACTIVE ) { @@ -454,7 +468,7 @@ int ompi_request_default_wait_some(size_t count, */ if( !indices[i] ){ indices[num_requests_done++] = i; - } else if( !OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) { + } else if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_PENDING) ) { indices[num_requests_done++] = i; } } diff --git a/ompi/request/request.h b/ompi/request/request.h index 8f472c1f5cd..5a1c02c4b65 100644 --- a/ompi/request/request.h +++ b/ompi/request/request.h @@ -13,7 +13,7 @@ * Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -396,10 +396,12 @@ static inline int ompi_request_free(ompi_request_t** request) static inline void ompi_request_wait_completion(ompi_request_t *req) { if (opal_using_threads () && !REQUEST_COMPLETE(req)) { + void *_tmp_ptr = REQUEST_PENDING; ompi_wait_sync_t sync; + WAIT_SYNC_INIT(&sync, 1); - if (OPAL_ATOMIC_BOOL_CMPSET_PTR(&req->req_complete, REQUEST_PENDING, &sync)) { + if (OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&req->req_complete, &_tmp_ptr, &sync)) { SYNC_WAIT(&sync); } else { /* completed before we had a chance to swap in the sync object */ @@ -439,7 +441,9 @@ static inline int ompi_request_complete(ompi_request_t* request, bool with_signa if (0 == rc) { if( OPAL_LIKELY(with_signal) ) { - if(!OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, REQUEST_COMPLETED)) { + void *_tmp_ptr = REQUEST_PENDING; + + if(!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_COMPLETED)) { ompi_wait_sync_t *tmp_sync = (ompi_wait_sync_t *) OPAL_ATOMIC_SWAP_PTR(&request->req_complete, REQUEST_COMPLETED); /* In the case where another thread concurrently changed the request to REQUEST_PENDING */ diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index c9ac7c7b43c..3a2a05a2277 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -347,19 +347,23 @@ void opal_atomic_unlock(opal_atomic_lock_t *lock); #endif #if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_CMPSET_32 -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 +#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 static inline #endif -bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, int32_t oldval, - int32_t newval); +bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, + int32_t newval); +#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 static inline -bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, int32_t oldval, - int32_t newval); +#endif +bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, + int32_t newval); +#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 static inline -bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, int32_t oldval, - int32_t newval); +#endif +bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, + int32_t newval); #endif @@ -368,43 +372,24 @@ bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, int32_t oldval, #endif #if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 +#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 static inline #endif bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval); -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 +#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 static inline #endif bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval); -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 +#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 static inline #endif bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval); -/* XXX -- DEPRECATED -- XXX -- Legacy cmpset functions */ -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 -static inline -#endif -bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, int64_t oldval, - int64_t newval); - -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 -static inline -#endif -bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, int64_t oldval, - int64_t newval); - -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 -static inline -#endif -bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, int64_t oldval, - int64_t newval); - #endif #if !defined(OPAL_HAVE_ATOMIC_MATH_32) && !defined(DOXYGEN) @@ -558,26 +543,6 @@ static inline bool opal_atomic_compare_exchange_strong_acq_ptr (volatile void* a static inline bool opal_atomic_compare_exchange_strong_rel_ptr (volatile void* addr, void *oldval, void *newval); -/* XXX -- DEPRECATED -- XXX -- Define legacy cmpset functions */ -static inline bool opal_atomic_bool_cmpset_xx(volatile void* addr, int64_t oldval, - int64_t newval, size_t length); -static inline bool opal_atomic_bool_cmpset_acq_xx(volatile void* addr, - int64_t oldval, int64_t newval, - size_t length); -static inline bool opal_atomic_bool_cmpset_rel_xx(volatile void* addr, - int64_t oldval, int64_t newval, - size_t length); - -static inline bool opal_atomic_bool_cmpset_ptr(volatile void* addr, - void* oldval, - void* newval); -static inline bool opal_atomic_bool_cmpset_acq_ptr(volatile void* addr, - void* oldval, - void* newval); -static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr, - void* oldval, - void* newval); - /** * Atomic compare and set of generic type with relaxed semantics. This * macro detect at compile time the type of the first argument and @@ -630,61 +595,6 @@ static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr, (intptr_t)(NEWVAL), sizeof(*(ADDR)) ) - -/* XXX -- DEPRECATED -- XXX -- Define legacy cmpset functions */ - -/** - * Atomic compare and set of pointer with relaxed semantics. This - * macro detect at compile time the type of the first argument and - * choose the correct function to be called. - * - * \note This macro should only be used for integer types. - * - * @param addr Address of . - * @param oldval Comparison value . - * @param newval New value to set if comparision is true . - * - * See opal_atomic_bool_cmpset_* for pseudo-code. - */ -#define opal_atomic_bool_cmpset( ADDR, OLDVAL, NEWVAL ) \ - opal_atomic_bool_cmpset_xx( (volatile void*)(ADDR), (intptr_t)(OLDVAL), \ - (intptr_t)(NEWVAL), sizeof(*(ADDR)) ) - -/** - * Atomic compare and set of pointer with acquire semantics. This - * macro detect at compile time the type of the first argument - * and choose the correct function to be called. - * - * \note This macro should only be used for integer types. - * - * @param addr Address of . - * @param oldval Comparison value . - * @param newval New value to set if comparision is true . - * - * See opal_atomic_bool_cmpset_acq_* for pseudo-code. - */ -#define opal_atomic_bool_cmpset_acq( ADDR, OLDVAL, NEWVAL ) \ - opal_atomic_bool_cmpset_acq_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \ - (int64_t)(NEWVAL), sizeof(*(ADDR)) ) - - -/** - * Atomic compare and set of pointer with release semantics. This - * macro detect at compile time the type of the first argument - * and choose the correct function to b - * - * \note This macro should only be used for integer types. - * - * @param addr Address of . - * @param oldval Comparison value . - * @param newval New value to set if comparision is true . - * - * See opal_atomic_bool_cmpsetrel_* for pseudo-code. - */ -#define opal_atomic_bool_cmpset_rel( ADDR, OLDVAL, NEWVAL ) \ - opal_atomic_bool_cmpset_rel_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \ - (int64_t)(NEWVAL), sizeof(*(ADDR)) ) - #endif /* (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) */ #if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64) diff --git a/opal/include/opal/sys/atomic_impl.h b/opal/include/opal/sys/atomic_impl.h index 4337fcb9f74..576fc8ed2e5 100644 --- a/opal/include/opal/sys/atomic_impl.h +++ b/opal/include/opal/sys/atomic_impl.h @@ -56,10 +56,9 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval) { - int32_t old; + int32_t old = *addr; do { - old = *addr; - } while (!opal_atomic_bool_cmpset_32(addr, old, newval)); + } while (!opal_atomic_compare_exchange_strong_32 (addr, &old, newval)); return old; } @@ -111,10 +110,10 @@ OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, -, sub) static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval) { - int64_t old; + int64_t old = *addr; do { - old = *addr; - } while (!opal_atomic_bool_cmpset_64(addr, old, newval)); + } while (!opal_atomic_compare_exchange_strong_64 (addr, &old, newval)); + return old; } #endif /* OPAL_HAVE_ATOMIC_SWAP_32 */ @@ -229,177 +228,6 @@ OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_rel_) #endif /* (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) */ -/* XXX -- DEPRECATED -- XXX -- Define legacy cmpset functions */ -#if (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) -static inline bool opal_atomic_bool_cmpset_32 (volatile int32_t *addr, int32_t oldval, - int32_t newval) -{ - return opal_atomic_compare_exchange_strong_32 (addr, &oldval, newval); -} - -static inline bool opal_atomic_bool_cmpset_acq_32 (volatile int32_t *addr, int32_t oldval, - int32_t newval) -{ - return opal_atomic_compare_exchange_strong_acq_32 (addr, &oldval, newval); -} - -static inline bool opal_atomic_bool_cmpset_rel_32 (volatile int32_t *addr, int32_t oldval, - int32_t newval) -{ - return opal_atomic_compare_exchange_strong_rel_32 (addr, &oldval, newval); -} -#endif - -#if (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) -static inline bool opal_atomic_bool_cmpset_64 (volatile int64_t *addr, int64_t oldval, - int64_t newval) -{ - return opal_atomic_compare_exchange_strong_64 (addr, &oldval, newval); -} - -static inline bool opal_atomic_bool_cmpset_acq_64 (volatile int64_t *addr, int64_t oldval, - int64_t newval) -{ - return opal_atomic_compare_exchange_strong_acq_64 (addr, &oldval, newval); -} - -static inline bool opal_atomic_bool_cmpset_rel_64 (volatile int64_t *addr, int64_t oldval, - int64_t newval) -{ - return opal_atomic_compare_exchange_strong_rel_64 (addr, &oldval, newval); -} -#endif - -#if (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128) -static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, opal_int128_t oldval, - opal_int128_t newval) -{ - return opal_atomic_compare_exchange_strong_128 (addr, &oldval, newval); -} -#endif - -#if (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) - -static inline bool -opal_atomic_bool_cmpset_xx(volatile void* addr, int64_t oldval, - int64_t newval, size_t length) -{ - switch( length ) { -#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 - case 4: - return opal_atomic_bool_cmpset_32( (volatile int32_t*)addr, - (int32_t)oldval, (int32_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */ - -#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 - case 8: - return opal_atomic_bool_cmpset_64( (volatile int64_t*)addr, - (int64_t)oldval, (int64_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 */ - } - abort(); - /* This should never happen, so deliberately abort (hopefully - leaving a corefile for analysis) */ -} - - -static inline bool -opal_atomic_bool_cmpset_acq_xx(volatile void* addr, int64_t oldval, - int64_t newval, size_t length) -{ - switch( length ) { -#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 - case 4: - return opal_atomic_bool_cmpset_acq_32( (volatile int32_t*)addr, - (int32_t)oldval, (int32_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */ - -#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 - case 8: - return opal_atomic_bool_cmpset_acq_64( (volatile int64_t*)addr, - (int64_t)oldval, (int64_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 */ - } - /* This should never happen, so deliberately abort (hopefully - leaving a corefile for analysis) */ - abort(); -} - - -static inline bool -opal_atomic_bool_cmpset_rel_xx(volatile void* addr, int64_t oldval, - int64_t newval, size_t length) -{ - switch( length ) { -#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 - case 4: - return opal_atomic_bool_cmpset_rel_32( (volatile int32_t*)addr, - (int32_t)oldval, (int32_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */ - -#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 - case 8: - return opal_atomic_bool_cmpset_rel_64( (volatile int64_t*)addr, - (int64_t)oldval, (int64_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 */ - } - /* This should never happen, so deliberately abort (hopefully - leaving a corefile for analysis) */ - abort(); -} - - -static inline bool -opal_atomic_bool_cmpset_ptr(volatile void* addr, - void* oldval, - void* newval) -{ -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 - return opal_atomic_bool_cmpset_32((int32_t*) addr, (unsigned long) oldval, - (unsigned long) newval); -#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 - return opal_atomic_bool_cmpset_64((int64_t*) addr, (unsigned long) oldval, - (unsigned long) newval); -#else - abort(); -#endif -} - - -static inline bool -opal_atomic_bool_cmpset_acq_ptr(volatile void* addr, - void* oldval, - void* newval) -{ -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 - return opal_atomic_bool_cmpset_acq_32((int32_t*) addr, (unsigned long) oldval, - (unsigned long) newval); -#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 - return opal_atomic_bool_cmpset_acq_64((int64_t*) addr, (unsigned long) oldval, - (unsigned long) newval); -#else - abort(); -#endif -} - - -static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr, - void* oldval, - void* newval) -{ -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 - return opal_atomic_bool_cmpset_rel_32((int32_t*) addr, (unsigned long) oldval, - (unsigned long) newval); -#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 - return opal_atomic_bool_cmpset_rel_64((int64_t*) addr, (unsigned long) oldval, - (unsigned long) newval); -#else - abort(); -#endif -} - -#endif /* (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) */ - #if (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64) #if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SWAP_32 @@ -546,21 +374,20 @@ opal_atomic_lock_init( opal_atomic_lock_t* lock, int32_t value ) static inline int opal_atomic_trylock(opal_atomic_lock_t *lock) { - bool ret = opal_atomic_bool_cmpset_acq_32( &(lock->u.lock), - OPAL_ATOMIC_LOCK_UNLOCKED, OPAL_ATOMIC_LOCK_LOCKED); - return (ret == 0) ? 1 : 0; + int32_t unlocked = OPAL_ATOMIC_LOCK_UNLOCKED; + bool ret = opal_atomic_compare_exchange_strong_32 (&lock->u.lock, &unlocked, OPAL_ATOMIC_LOCK_LOCKED); + return (ret == false) ? 1 : 0; } static inline void opal_atomic_lock(opal_atomic_lock_t *lock) { - while( !opal_atomic_bool_cmpset_acq_32( &(lock->u.lock), - OPAL_ATOMIC_LOCK_UNLOCKED, OPAL_ATOMIC_LOCK_LOCKED) ) { - while (lock->u.lock == OPAL_ATOMIC_LOCK_LOCKED) { - /* spin */ ; - } - } + while (opal_atomic_trylock (lock)) { + while (lock->u.lock == OPAL_ATOMIC_LOCK_LOCKED) { + /* spin */ ; + } + } } diff --git a/opal/mca/btl/openib/btl_openib_endpoint.c b/opal/mca/btl/openib/btl_openib_endpoint.c index eaefb7e66b4..8700a204ebb 100644 --- a/opal/mca/btl/openib/btl_openib_endpoint.c +++ b/opal/mca/btl/openib/btl_openib_endpoint.c @@ -373,11 +373,12 @@ static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint) /* Release memory resources */ do { + void *_tmp_ptr = NULL; /* Make sure that mca_btl_openib_endpoint_connect_eager_rdma () * was not in "connect" or "bad" flow (failed to allocate memory) * and changed the pointer back to NULL */ - if(!opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL, (void*)1)) { + if(!opal_atomic_compare_exchange_strong_ptr(&endpoint->eager_rdma_local.base.pval, (void *) &_tmp_ptr, (void *) 1)) { if (NULL != endpoint->eager_rdma_local.reg) { endpoint->endpoint_btl->device->rcache->rcache_deregister (endpoint->endpoint_btl->device->rcache, &endpoint->eager_rdma_local.reg->base); @@ -894,12 +895,14 @@ void mca_btl_openib_endpoint_connect_eager_rdma( mca_btl_openib_recv_frag_t *headers_buf; int i, rc; uint32_t flag = MCA_RCACHE_FLAGS_CACHE_BYPASS; + void *_tmp_ptr = NULL; /* Set local rdma pointer to 1 temporarily so other threads will not try * to enter the function */ - if(!opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL, - (void*)1)) + if(!opal_atomic_compare_exchange_strong_ptr (&endpoint->eager_rdma_local.base.pval, (void *) &_tmp_ptr, + (void *) 1)) { return; + } headers_buf = (mca_btl_openib_recv_frag_t*) malloc(sizeof(mca_btl_openib_recv_frag_t) * @@ -975,18 +978,19 @@ void mca_btl_openib_endpoint_connect_eager_rdma( endpoint->eager_rdma_local.rd_win?endpoint->eager_rdma_local.rd_win:1; /* set local rdma pointer to real value */ - (void)opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, - (void*)1, buf); + endpoint->eager_rdma_local.base.pval = buf; endpoint->eager_rdma_local.alloc_base = alloc_base; if(mca_btl_openib_endpoint_send_eager_rdma(endpoint) == OPAL_SUCCESS) { mca_btl_openib_device_t *device = endpoint->endpoint_btl->device; mca_btl_openib_endpoint_t **p; + void *_tmp_ptr; OBJ_RETAIN(endpoint); assert(((opal_object_t*)endpoint)->obj_reference_count == 2); do { + _tmp_ptr = NULL; p = &device->eager_rdma_buffers[device->eager_rdma_buffers_count]; - } while(!opal_atomic_bool_cmpset_ptr(p, NULL, endpoint)); + } while(!opal_atomic_compare_exchange_strong_ptr (p, (void *) &_tmp_ptr, endpoint)); OPAL_THREAD_ADD32(&openib_btl->eager_rdma_channels, 1); /* from this point progress function starts to poll new buffer */ @@ -1001,8 +1005,7 @@ void mca_btl_openib_endpoint_connect_eager_rdma( free(headers_buf); unlock_rdma_local: /* set local rdma pointer back to zero. Will retry later */ - (void)opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, - endpoint->eager_rdma_local.base.pval, NULL); + endpoint->eager_rdma_local.base.pval = NULL; endpoint->eager_rdma_local.frags = NULL; } diff --git a/opal/mca/btl/openib/btl_openib_endpoint.h b/opal/mca/btl/openib/btl_openib_endpoint.h index f580476abdb..8ca62f65be1 100644 --- a/opal/mca/btl/openib/btl_openib_endpoint.h +++ b/opal/mca/btl/openib/btl_openib_endpoint.h @@ -446,14 +446,16 @@ static inline int mca_btl_openib_endpoint_post_rr( return ret; } -#define BTL_OPENIB_CREDITS_SEND_TRYLOCK(E, Q) \ - OPAL_ATOMIC_BOOL_CMPSET_32(&(E)->qps[(Q)].rd_credit_send_lock, 0, 1) -#define BTL_OPENIB_CREDITS_SEND_UNLOCK(E, Q) \ - OPAL_ATOMIC_BOOL_CMPSET_32(&(E)->qps[(Q)].rd_credit_send_lock, 1, 0) -#define BTL_OPENIB_GET_CREDITS(FROM, TO) \ - do { \ - TO = FROM; \ - } while(0 == OPAL_ATOMIC_BOOL_CMPSET_32(&FROM, TO, 0)) +static inline __opal_attribute_always_inline__ bool btl_openib_credits_send_trylock (mca_btl_openib_endpoint_t *ep, int qp) +{ + int32_t _tmp_value = 0; + return OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32(&ep->qps[qp].rd_credit_send_lock, &_tmp_value, 1); +} + +#define BTL_OPENIB_CREDITS_SEND_UNLOCK(E, Q) \ + OPAL_ATOMIC_SWAP_32 (&(E)->qps[(Q)].rd_credit_send_lock, 0) +#define BTL_OPENIB_GET_CREDITS(FROM, TO) \ + TO = OPAL_ATOMIC_SWAP_32(&FROM, 0) static inline bool check_eager_rdma_credits(const mca_btl_openib_endpoint_t *ep) @@ -486,7 +488,7 @@ static inline void send_credits(mca_btl_openib_endpoint_t *ep, int qp) return; try_send: - if(BTL_OPENIB_CREDITS_SEND_TRYLOCK(ep, qp)) + if(btl_openib_credits_send_trylock(ep, qp)) mca_btl_openib_endpoint_send_credits(ep, qp); } diff --git a/opal/mca/btl/ugni/btl_ugni_smsg.c b/opal/mca/btl/ugni/btl_ugni_smsg.c index bc8858baec5..b90c95a6a9e 100644 --- a/opal/mca/btl/ugni/btl_ugni_smsg.c +++ b/opal/mca/btl/ugni/btl_ugni_smsg.c @@ -59,12 +59,13 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep) mca_btl_ugni_base_frag_t frag; mca_btl_base_segment_t seg; bool disconnect = false; + int32_t _tmp_value = 0; uintptr_t data_ptr; gni_return_t rc; uint32_t len; int count = 0; - if (!opal_atomic_bool_cmpset_32 (&ep->smsg_progressing, 0, 1)) { + if (!opal_atomic_compare_exchange_strong_32 (&ep->smsg_progressing, &_tmp_value, 1)) { /* already progressing (we can't support reentry here) */ return 0; } diff --git a/opal/mca/btl/vader/btl_vader_fifo.h b/opal/mca/btl/vader/btl_vader_fifo.h index 8304841cf84..0dc70bc8a13 100644 --- a/opal/mca/btl/vader/btl_vader_fifo.h +++ b/opal/mca/btl/vader/btl_vader_fifo.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2014 Los Alamos National Security, LLC. + * Copyright (c) 2010-2017 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ * @@ -30,8 +30,9 @@ #include "btl_vader_endpoint.h" #include "btl_vader_frag.h" +#define vader_item_compare_exchange(x, y, z) opal_atomic_compare_exchange_strong_ptr ((volatile void **) (x), (void **) (y), (void *) (z)) + #if SIZEOF_VOID_P == 8 - #define vader_item_cmpset(x, y, z) opal_atomic_bool_cmpset_64((volatile int64_t *)(x), (int64_t)(y), (int64_t)(z)) #define vader_item_swap(x, y) opal_atomic_swap_64((volatile int64_t *)(x), (int64_t)(y)) #define MCA_BTL_VADER_OFFSET_MASK 0xffffffffll @@ -40,7 +41,6 @@ typedef int64_t fifo_value_t; #else - #define vader_item_cmpset(x, y, z) opal_atomic_bool_cmpset_32((volatile int32_t *)(x), (int32_t)(y), (int32_t)(z)) #define vader_item_swap(x, y) opal_atomic_swap_32((volatile int32_t *)(x), (int32_t)(y)) #define MCA_BTL_VADER_OFFSET_MASK 0x00ffffffl @@ -138,7 +138,7 @@ static inline mca_btl_vader_hdr_t *vader_fifo_read (vader_fifo_t *fifo, struct m if (OPAL_UNLIKELY(VADER_FIFO_FREE == hdr->next)) { opal_atomic_rmb(); - if (!vader_item_cmpset (&fifo->fifo_tail, value, VADER_FIFO_FREE)) { + if (!vader_item_compare_exchange (&fifo->fifo_tail, &value, VADER_FIFO_FREE)) { while (VADER_FIFO_FREE == hdr->next) { opal_atomic_rmb (); } diff --git a/opal/runtime/opal_cr.c b/opal/runtime/opal_cr.c index ef9a73d3ceb..96a37156786 100644 --- a/opal/runtime/opal_cr.c +++ b/opal/runtime/opal_cr.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2012-2013 Cisco Systems, Inc. All rights reserved. @@ -171,15 +171,16 @@ static const uint32_t ProcInc = 0x2; opal_cr_thread_in_library = false; \ } \ } -#define OPAL_CR_THREAD_LOCK() \ - { \ - while(!OPAL_ATOMIC_BOOL_CMPSET_32(&opal_cr_thread_num_in_library, 0, ThreadFlag)) { \ - if( !opal_cr_thread_is_active && opal_cr_thread_is_done) { \ - break; \ - } \ - sched_yield(); \ - usleep(opal_cr_thread_sleep_check); \ - } \ +#define OPAL_CR_THREAD_LOCK() \ + { \ + int32_t _tmp_value = 0; \ + while(!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32 (&opal_cr_thread_num_in_library, &_tmp_value, ThreadFlag)) { \ + if( !opal_cr_thread_is_active && opal_cr_thread_is_done) { \ + break; \ + } \ + sched_yield(); \ + usleep(opal_cr_thread_sleep_check); \ + } \ } #define OPAL_CR_THREAD_UNLOCK() \ { \ diff --git a/opal/threads/thread_usage.h b/opal/threads/thread_usage.h index 4a41a1dba66..248735ae3aa 100644 --- a/opal/threads/thread_usage.h +++ b/opal/threads/thread_usage.h @@ -143,21 +143,6 @@ static inline type opal_thread_sub_ ## suffix (volatile type *addr, type delta) return (*addr -= delta); \ } -#define OPAL_THREAD_DEFINE_ATOMIC_CMPSET(type, addr_type, suffix) \ -static inline bool opal_thread_cmpset_bool_ ## suffix (volatile addr_type *addr, type compare, type value) \ -{ \ - if (OPAL_UNLIKELY(opal_using_threads())) { \ - return opal_atomic_bool_cmpset_ ## suffix ((volatile type *) addr, compare, value); \ - } \ - \ - if ((type) *addr == compare) { \ - ((type *) addr)[0] = value; \ - return true; \ - } \ - \ - return false; \ -} - #define OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(type, addr_type, suffix) \ static inline bool opal_thread_compare_exchange_strong_ ## suffix (volatile addr_type *addr, type *compare, type value) \ { \ @@ -195,8 +180,6 @@ OPAL_THREAD_DEFINE_ATOMIC_OR(int32_t, 32) OPAL_THREAD_DEFINE_ATOMIC_XOR(int32_t, 32) OPAL_THREAD_DEFINE_ATOMIC_SUB(int32_t, 32) OPAL_THREAD_DEFINE_ATOMIC_SUB(size_t, size_t) -OPAL_THREAD_DEFINE_ATOMIC_CMPSET(int32_t, int32_t, 32) -OPAL_THREAD_DEFINE_ATOMIC_CMPSET(void *, intptr_t, ptr) OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(int32_t, int32_t, 32) OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(void *, intptr_t, ptr) OPAL_THREAD_DEFINE_ATOMIC_SWAP(int32_t, int32_t, 32) @@ -220,15 +203,9 @@ OPAL_THREAD_DEFINE_ATOMIC_SWAP(void *, intptr_t, ptr) #define OPAL_THREAD_SUB_SIZE_T opal_thread_sub_size_t #define OPAL_ATOMIC_SUB_SIZE_T opal_thread_sub_size_t -#define OPAL_THREAD_BOOL_CMPSET_32 opal_thread_cmpset_bool_32 -#define OPAL_ATOMIC_BOOL_CMPSET_32 opal_thread_cmpset_bool_32 - #define OPAL_THREAD_COMPARE_EXCHANGE_STRONG_32 opal_thread_compare_exchange_strong_32 #define OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32 opal_thread_compare_exchange_strong_32 -#define OPAL_THREAD_BOOL_CMPSET_PTR(x, y, z) opal_thread_cmpset_bool_ptr ((volatile intptr_t *) x, (void *) y, (void *) z) -#define OPAL_ATOMIC_BOOL_CMPSET_PTR OPAL_THREAD_BOOL_CMPSET_PTR - #define OPAL_THREAD_COMPARE_EXCHANGE_STRONG_PTR(x, y, z) opal_thread_compare_exchange_strong_ptr ((volatile intptr_t *) x, (void *) y, (void *) z) #define OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR OPAL_THREAD_COMPARE_EXCHANGE_STRONG_PTR @@ -245,7 +222,6 @@ OPAL_THREAD_DEFINE_ATOMIC_ADD(int64_t, 64) OPAL_THREAD_DEFINE_ATOMIC_AND(int64_t, 64) OPAL_THREAD_DEFINE_ATOMIC_OR(int64_t, 64) OPAL_THREAD_DEFINE_ATOMIC_XOR(int64_t, 64) -OPAL_THREAD_DEFINE_ATOMIC_CMPSET(int64_t, int64_t, 64) OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(int64_t, int64_t, 64) OPAL_THREAD_DEFINE_ATOMIC_SWAP(int64_t, int64_t, 64) @@ -261,9 +237,6 @@ OPAL_THREAD_DEFINE_ATOMIC_SWAP(int64_t, int64_t, 64) #define OPAL_THREAD_XOR64 opal_thread_xor_64 #define OPAL_ATOMIC_XOR64 opal_thread_xor_64 -#define OPAL_THREAD_BOOL_CMPSET_64 opal_thread_cmpset_bool_64 -#define OPAL_ATOMIC_BOOL_CMPSET_64 opal_thread_cmpset_bool_64 - #define OPAL_THREAD_COMPARE_EXCHANGE_STRONG_64 opal_thread_compare_exchange_strong_64 #define OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_64 opal_thread_compare_exchange_strong_64 diff --git a/oshmem/runtime/oshmem_shmem_finalize.c b/oshmem/runtime/oshmem_shmem_finalize.c index 6f8601da753..b05774278ab 100644 --- a/oshmem/runtime/oshmem_shmem_finalize.c +++ b/oshmem/runtime/oshmem_shmem_finalize.c @@ -64,8 +64,9 @@ int oshmem_shmem_finalize(void) { int ret = OSHMEM_SUCCESS; static int32_t finalize_has_already_started = 0; + int32_t _tmp = 0; - if (opal_atomic_bool_cmpset_32(&finalize_has_already_started, 0, 1) + if (opal_atomic_compare_exchange_strong_32 (&finalize_has_already_started, &_tmp, 1) && oshmem_shmem_initialized && !oshmem_shmem_aborted) { /* Should be called first because ompi_mpi_finalize makes orte and opal finalization */ ret = _shmem_finalize(); From 1282e98a017744c2475dc6bb2e898aff45531a13 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 29 Nov 2017 14:55:46 -0700 Subject: [PATCH 4/5] opal/asm: rename existing arithmetic atomic functions This commit renames the arithmetic atomic operations in opal to indicate that they return the new value not the old value. This naming differentiates these routines from new functions that return the old value. Signed-off-by: Nathan Hjelm --- ompi/datatype/ompi_datatype_args.c | 4 +- ompi/mca/coll/libnbc/coll_libnbc_component.c | 2 +- ompi/mca/coll/libnbc/nbc.c | 2 +- .../monitoring/coll_monitoring_component.c | 4 +- .../coll/portals4/coll_portals4_allreduce.c | 2 +- .../mca/coll/portals4/coll_portals4_barrier.c | 2 +- ompi/mca/coll/portals4/coll_portals4_bcast.c | 4 +- ompi/mca/coll/portals4/coll_portals4_gather.c | 4 +- ompi/mca/coll/portals4/coll_portals4_reduce.c | 2 +- .../mca/coll/portals4/coll_portals4_scatter.c | 2 +- ompi/mca/coll/sm/coll_sm.h | 2 +- ompi/mca/coll/sm/coll_sm_barrier.c | 2 +- ompi/mca/coll/sm/coll_sm_module.c | 2 +- .../mca/common/monitoring/common_monitoring.c | 28 ++++---- .../monitoring/common_monitoring_coll.c | 12 ++-- ompi/mca/mtl/portals4/mtl_portals4_flowctl.c | 2 +- ompi/mca/mtl/portals4/mtl_portals4_recv.c | 14 ++-- ompi/mca/mtl/portals4/mtl_portals4_send.c | 28 ++++---- .../osc/monitoring/osc_monitoring_template.h | 2 +- .../osc/portals4/osc_portals4_active_target.c | 4 +- ompi/mca/osc/portals4/osc_portals4_comm.c | 72 +++++++++---------- .../mca/osc/portals4/osc_portals4_component.c | 4 +- .../portals4/osc_portals4_passive_target.c | 6 +- ompi/mca/osc/pt2pt/osc_pt2pt.h | 16 ++--- ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c | 8 +-- ompi/mca/osc/pt2pt/osc_pt2pt_comm.c | 2 +- ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c | 14 ++-- ompi/mca/osc/pt2pt/osc_pt2pt_frag.c | 2 +- ompi/mca/osc/pt2pt/osc_pt2pt_frag.h | 6 +- ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c | 20 +++--- ompi/mca/osc/pt2pt/osc_pt2pt_sync.h | 2 +- ompi/mca/osc/rdma/osc_rdma_accumulate.c | 4 +- ompi/mca/osc/rdma/osc_rdma_comm.c | 12 ++-- ompi/mca/osc/rdma/osc_rdma_comm.h | 2 +- ompi/mca/osc/rdma/osc_rdma_frag.h | 4 +- ompi/mca/osc/rdma/osc_rdma_peer.h | 2 +- ompi/mca/osc/rdma/osc_rdma_request.c | 2 +- ompi/mca/osc/rdma/osc_rdma_types.h | 10 +-- ompi/mca/osc/sm/osc_sm_active_target.c | 8 +-- ompi/mca/osc/sm/osc_sm_passive_target.c | 6 +- ompi/mca/pml/base/pml_base_bsend.c | 4 +- ompi/mca/pml/bfo/pml_bfo_failover.h | 2 +- ompi/mca/pml/bfo/pml_bfo_recvfrag.c | 2 +- ompi/mca/pml/bfo/pml_bfo_recvreq.c | 12 ++-- ompi/mca/pml/bfo/pml_bfo_recvreq.h | 4 +- ompi/mca/pml/bfo/pml_bfo_sendreq.c | 16 ++--- ompi/mca/pml/bfo/pml_bfo_sendreq.h | 6 +- ompi/mca/pml/ob1/pml_ob1_isend.c | 4 +- ompi/mca/pml/ob1/pml_ob1_progress.c | 4 +- ompi/mca/pml/ob1/pml_ob1_recvfrag.c | 2 +- ompi/mca/pml/ob1/pml_ob1_recvreq.c | 14 ++-- ompi/mca/pml/ob1/pml_ob1_recvreq.h | 4 +- ompi/mca/pml/ob1/pml_ob1_sendreq.c | 18 ++--- ompi/mca/pml/ob1/pml_ob1_sendreq.h | 6 +- opal/class/opal_list.c | 2 +- opal/class/opal_list.h | 12 ++-- opal/class/opal_object.h | 2 +- opal/class/opal_tree.c | 2 +- opal/include/opal/sys/arm/atomic.h | 4 +- opal/include/opal/sys/arm64/atomic.h | 2 +- opal/include/opal/sys/atomic.h | 60 ++++++++-------- opal/include/opal/sys/atomic_impl.h | 34 ++++----- opal/include/opal/sys/gcc_builtin/atomic.h | 20 +++--- opal/include/opal/sys/ia32/atomic.h | 4 +- opal/include/opal/sys/powerpc/atomic.h | 4 +- opal/include/opal/sys/sync_builtin/atomic.h | 20 +++--- opal/include/opal/sys/x86_64/atomic.h | 8 +-- opal/mca/btl/openib/btl_openib.c | 4 +- opal/mca/btl/openib/btl_openib_async.c | 8 +-- opal/mca/btl/openib/btl_openib_component.c | 32 ++++----- opal/mca/btl/openib/btl_openib_eager_rdma.h | 4 +- opal/mca/btl/openib/btl_openib_endpoint.c | 22 +++--- opal/mca/btl/openib/btl_openib_endpoint.h | 40 +++++------ opal/mca/btl/openib/btl_openib_get.c | 6 +- opal/mca/btl/portals4/btl_portals4.c | 10 +-- .../mca/btl/portals4/btl_portals4_component.c | 6 +- opal/mca/btl/portals4/btl_portals4_rdma.c | 6 +- opal/mca/btl/portals4/btl_portals4_send.c | 4 +- opal/mca/btl/smcuda/btl_smcuda.c | 8 +-- opal/mca/btl/smcuda/btl_smcuda_component.c | 6 +- opal/mca/btl/smcuda/btl_smcuda_fifo.h | 2 +- opal/mca/btl/ugni/btl_ugni_add_procs.c | 4 +- opal/mca/btl/ugni/btl_ugni_component.c | 2 +- opal/mca/btl/ugni/btl_ugni_endpoint.c | 6 +- opal/mca/btl/ugni/btl_ugni_frag.h | 2 +- opal/mca/btl/vader/btl_vader_fbox.h | 6 +- opal/mca/btl/vader/btl_vader_xpmem.c | 6 +- opal/mca/common/sm/common_sm.c | 2 +- .../mpool/hugepage/mpool_hugepage_module.c | 6 +- opal/mca/rcache/grdma/rcache_grdma_module.c | 8 +-- opal/mca/rcache/udreg/rcache_udreg_module.c | 4 +- opal/runtime/opal_cr.c | 6 +- opal/runtime/opal_progress.c | 12 ++-- opal/threads/thread_usage.h | 60 ++++++++-------- opal/threads/wait_sync.h | 2 +- oshmem/mca/spml/ikrit/spml_ikrit.c | 12 ++-- test/asm/atomic_cmpset.c | 12 ++-- test/asm/atomic_math.c | 12 ++-- test/threads/opal_thread.c | 4 +- 99 files changed, 461 insertions(+), 461 deletions(-) diff --git a/ompi/datatype/ompi_datatype_args.c b/ompi/datatype/ompi_datatype_args.c index 53aaa00b7ee..d301aa44e78 100644 --- a/ompi/datatype/ompi_datatype_args.c +++ b/ompi/datatype/ompi_datatype_args.c @@ -378,7 +378,7 @@ int32_t ompi_datatype_copy_args( const ompi_datatype_t* source_data, * a read only memory). */ if( NULL != pArgs ) { - OPAL_THREAD_ADD32(&pArgs->ref_count, 1); + OPAL_THREAD_ADD_FETCH32(&pArgs->ref_count, 1); dest_data->args = pArgs; } return OMPI_SUCCESS; @@ -396,7 +396,7 @@ int32_t ompi_datatype_release_args( ompi_datatype_t* pData ) ompi_datatype_args_t* pArgs = (ompi_datatype_args_t*)pData->args; assert( 0 < pArgs->ref_count ); - OPAL_THREAD_ADD32(&pArgs->ref_count, -1); + OPAL_THREAD_ADD_FETCH32(&pArgs->ref_count, -1); if( 0 == pArgs->ref_count ) { /* There are some duplicated datatypes around that have a pointer to this * args. We will release them only when the last datatype will dissapear. diff --git a/ompi/mca/coll/libnbc/coll_libnbc_component.c b/ompi/mca/coll/libnbc/coll_libnbc_component.c index cf55d0dbd0e..1a611eaee5f 100644 --- a/ompi/mca/coll/libnbc/coll_libnbc_component.c +++ b/ompi/mca/coll/libnbc/coll_libnbc_component.c @@ -314,7 +314,7 @@ libnbc_module_destruct(ompi_coll_libnbc_module_t *module) /* if we ever were used for a collective op, do the progress cleanup. */ if (true == module->comm_registered) { int32_t tmp = - OPAL_THREAD_ADD32(&mca_coll_libnbc_component.active_comms, -1); + OPAL_THREAD_ADD_FETCH32(&mca_coll_libnbc_component.active_comms, -1); if (0 == tmp) { opal_progress_unregister(ompi_coll_libnbc_progress); } diff --git a/ompi/mca/coll/libnbc/nbc.c b/ompi/mca/coll/libnbc/nbc.c index dff6362bee7..28f022e5c99 100644 --- a/ompi/mca/coll/libnbc/nbc.c +++ b/ompi/mca/coll/libnbc/nbc.c @@ -618,7 +618,7 @@ int NBC_Init_handle(struct ompi_communicator_t *comm, ompi_coll_libnbc_request_t /* register progress */ if (need_register) { int32_t tmp = - OPAL_THREAD_ADD32(&mca_coll_libnbc_component.active_comms, 1); + OPAL_THREAD_ADD_FETCH32(&mca_coll_libnbc_component.active_comms, 1); if (tmp == 1) { opal_progress_register(ompi_coll_libnbc_progress); } diff --git a/ompi/mca/coll/monitoring/coll_monitoring_component.c b/ompi/mca/coll/monitoring/coll_monitoring_component.c index 995757ddedc..47d14375e10 100644 --- a/ompi/mca/coll/monitoring/coll_monitoring_component.c +++ b/ompi/mca/coll/monitoring/coll_monitoring_component.c @@ -120,7 +120,7 @@ static int mca_coll_monitoring_module_enable(mca_coll_base_module_t*module, struct ompi_communicator_t*comm) { mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; - if( 1 == opal_atomic_add_32(&monitoring_module->is_initialized, 1) ) { + if( 1 == opal_atomic_add_fetch_32(&monitoring_module->is_initialized, 1) ) { MONITORING_SAVE_FULL_PREV_COLL_API(monitoring_module, comm); monitoring_module->data = mca_common_monitoring_coll_new(comm); OPAL_MONITORING_PRINT_INFO("coll_module_enabled"); @@ -132,7 +132,7 @@ static int mca_coll_monitoring_module_disable(mca_coll_base_module_t*module, struct ompi_communicator_t*comm) { mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; - if( 0 == opal_atomic_sub_32(&monitoring_module->is_initialized, 1) ) { + if( 0 == opal_atomic_sub_fetch_32(&monitoring_module->is_initialized, 1) ) { MONITORING_RELEASE_FULL_PREV_COLL_API(monitoring_module, comm); mca_common_monitoring_coll_release(monitoring_module->data); monitoring_module->data = NULL; diff --git a/ompi/mca/coll/portals4/coll_portals4_allreduce.c b/ompi/mca/coll/portals4/coll_portals4_allreduce.c index 935ce6cd9d3..56f1ea30621 100644 --- a/ompi/mca/coll/portals4/coll_portals4_allreduce.c +++ b/ompi/mca/coll/portals4/coll_portals4_allreduce.c @@ -68,7 +68,7 @@ allreduce_kary_tree_top(const void *sendbuf, void *recvbuf, int count, zero_md_h = mca_coll_portals4_component.zero_md_h; data_md_h = mca_coll_portals4_component.data_md_h; - internal_count = opal_atomic_add_size_t(&module->coll_count, 1); + internal_count = opal_atomic_add_fetch_size_t(&module->coll_count, 1); /* ** DATATYPE and SIZES diff --git a/ompi/mca/coll/portals4/coll_portals4_barrier.c b/ompi/mca/coll/portals4/coll_portals4_barrier.c index 9d5c4f3c164..f2544ce0cd1 100644 --- a/ompi/mca/coll/portals4/coll_portals4_barrier.c +++ b/ompi/mca/coll/portals4/coll_portals4_barrier.c @@ -44,7 +44,7 @@ barrier_hypercube_top(struct ompi_communicator_t *comm, request->type = OMPI_COLL_PORTALS4_TYPE_BARRIER; - count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); + count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1); ret = PtlCTAlloc(mca_coll_portals4_component.ni_h, &request->u.barrier.rtr_ct_h); diff --git a/ompi/mca/coll/portals4/coll_portals4_bcast.c b/ompi/mca/coll/portals4/coll_portals4_bcast.c index 11132f6ce4c..8432d5823cd 100644 --- a/ompi/mca/coll/portals4/coll_portals4_bcast.c +++ b/ompi/mca/coll/portals4/coll_portals4_bcast.c @@ -176,7 +176,7 @@ bcast_kary_tree_top(void *buff, int count, zero_md_h = mca_coll_portals4_component.zero_md_h; data_md_h = mca_coll_portals4_component.data_md_h; - internal_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); + internal_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1); /* @@ -513,7 +513,7 @@ bcast_pipeline_top(void *buff, int count, zero_md_h = mca_coll_portals4_component.zero_md_h; data_md_h = mca_coll_portals4_component.data_md_h; - internal_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); + internal_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1); /* ** DATATYPE and SIZES diff --git a/ompi/mca/coll/portals4/coll_portals4_gather.c b/ompi/mca/coll/portals4/coll_portals4_gather.c index 45ff4c07728..7e38e27c009 100644 --- a/ompi/mca/coll/portals4/coll_portals4_gather.c +++ b/ompi/mca/coll/portals4/coll_portals4_gather.c @@ -582,7 +582,7 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc /* Setup Common Parameters */ /**********************************/ - request->u.gather.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); + request->u.gather.coll_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1); COLL_PORTALS4_UPDATE_IN_ORDER_BMTREE( comm, portals4_module, request->u.gather.root_rank ); bmtree = portals4_module->cached_in_order_bmtree; @@ -879,7 +879,7 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct i_am_root = (request->u.gather.my_rank == request->u.gather.root_rank); - request->u.gather.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); + request->u.gather.coll_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1); ret = setup_gather_buffers_linear(comm, request, portals4_module); if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } diff --git a/ompi/mca/coll/portals4/coll_portals4_reduce.c b/ompi/mca/coll/portals4/coll_portals4_reduce.c index 1a55a5c3f70..2fdb36b739c 100644 --- a/ompi/mca/coll/portals4/coll_portals4_reduce.c +++ b/ompi/mca/coll/portals4/coll_portals4_reduce.c @@ -69,7 +69,7 @@ reduce_kary_tree_top(const void *sendbuf, void *recvbuf, int count, zero_md_h = mca_coll_portals4_component.zero_md_h; data_md_h = mca_coll_portals4_component.data_md_h; - internal_count = opal_atomic_add_size_t(&module->coll_count, 1); + internal_count = opal_atomic_add_fetch_size_t(&module->coll_count, 1); /* ** DATATYPE and SIZES diff --git a/ompi/mca/coll/portals4/coll_portals4_scatter.c b/ompi/mca/coll/portals4/coll_portals4_scatter.c index d1cfbbaa0d2..4f3351ac784 100644 --- a/ompi/mca/coll/portals4/coll_portals4_scatter.c +++ b/ompi/mca/coll/portals4/coll_portals4_scatter.c @@ -399,7 +399,7 @@ ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct i_am_root = (request->u.scatter.my_rank == request->u.scatter.root_rank); - request->u.scatter.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); + request->u.scatter.coll_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1); ret = setup_scatter_buffers_linear(comm, request, portals4_module); if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } diff --git a/ompi/mca/coll/sm/coll_sm.h b/ompi/mca/coll/sm/coll_sm.h index baaa510ed19..eaff4518b10 100644 --- a/ompi/mca/coll/sm/coll_sm.h +++ b/ompi/mca/coll/sm/coll_sm.h @@ -358,7 +358,7 @@ extern uint32_t mca_coll_sm_one; * Macro to release an in-use flag from this process */ #define FLAG_RELEASE(flag) \ - (void)opal_atomic_add(&(flag)->mcsiuf_num_procs_using, -1) + (void)opal_atomic_add_fetch(&(flag)->mcsiuf_num_procs_using, -1) /** * Macro to copy a single segment in from a user buffer to a shared diff --git a/ompi/mca/coll/sm/coll_sm_barrier.c b/ompi/mca/coll/sm/coll_sm_barrier.c index a3000b7d847..b29199271d0 100644 --- a/ompi/mca/coll/sm/coll_sm_barrier.c +++ b/ompi/mca/coll/sm/coll_sm_barrier.c @@ -101,7 +101,7 @@ int mca_coll_sm_barrier_intra(struct ompi_communicator_t *comm, if (0 != rank) { /* Get parent *in* buffer */ parent = &data->mcb_barrier_control_parent[buffer_set]; - (void)opal_atomic_add(parent, 1); + (void)opal_atomic_add_fetch(parent, 1); SPIN_CONDITION(0 != *me_out, exit_label2); *me_out = 0; diff --git a/ompi/mca/coll/sm/coll_sm_module.c b/ompi/mca/coll/sm/coll_sm_module.c index 6c34851ee46..88393bebf09 100644 --- a/ompi/mca/coll/sm/coll_sm_module.c +++ b/ompi/mca/coll/sm/coll_sm_module.c @@ -463,7 +463,7 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, OBJ_RETAIN(sm_module->previous_reduce_module); /* Indicate that we have successfully attached and setup */ - (void)opal_atomic_add(&(data->sm_bootstrap_meta->module_seg->seg_inited), 1); + (void)opal_atomic_add_fetch(&(data->sm_bootstrap_meta->module_seg->seg_inited), 1); /* Wait for everyone in this communicator to attach and setup */ opal_output_verbose(10, ompi_coll_base_framework.framework_output, diff --git a/ompi/mca/common/monitoring/common_monitoring.c b/ompi/mca/common/monitoring/common_monitoring.c index 519da37318e..e521ca56417 100644 --- a/ompi/mca/common/monitoring/common_monitoring.c +++ b/ompi/mca/common/monitoring/common_monitoring.c @@ -209,7 +209,7 @@ static int mca_common_monitoring_comm_size_notify(mca_base_pvar_t *pvar, int mca_common_monitoring_init( void ) { if( !mca_common_monitoring_enabled ) return OMPI_ERROR; - if( 1 < opal_atomic_add_32(&mca_common_monitoring_hold, 1) ) return OMPI_SUCCESS; /* Already initialized */ + if( 1 < opal_atomic_add_fetch_32(&mca_common_monitoring_hold, 1) ) return OMPI_SUCCESS; /* Already initialized */ char hostname[OPAL_MAXHOSTNAMELEN] = "NA"; /* Initialize constant */ @@ -229,7 +229,7 @@ int mca_common_monitoring_init( void ) void mca_common_monitoring_finalize( void ) { if( ! mca_common_monitoring_enabled || /* Don't release if not last */ - 0 < opal_atomic_sub_32(&mca_common_monitoring_hold, 1) ) return; + 0 < opal_atomic_sub_fetch_32(&mca_common_monitoring_hold, 1) ) return; OPAL_MONITORING_PRINT_INFO("common_component_finish"); /* Dump monitoring informations */ @@ -503,21 +503,21 @@ void mca_common_monitoring_record_pml(int world_rank, size_t data_size, int tag) /* Keep tracks of the data_size distribution */ if( 0 == data_size ) { - opal_atomic_add_size_t(&size_histogram[world_rank * max_size_histogram], 1); + opal_atomic_add_fetch_size_t(&size_histogram[world_rank * max_size_histogram], 1); } else { int log2_size = log10(data_size)/log10_2; if(log2_size > max_size_histogram - 2) /* Avoid out-of-bound write */ log2_size = max_size_histogram - 2; - opal_atomic_add_size_t(&size_histogram[world_rank * max_size_histogram + log2_size + 1], 1); + opal_atomic_add_fetch_size_t(&size_histogram[world_rank * max_size_histogram + log2_size + 1], 1); } /* distinguishses positive and negative tags if requested */ if( (tag < 0) && (mca_common_monitoring_filter()) ) { - opal_atomic_add_size_t(&filtered_pml_data[world_rank], data_size); - opal_atomic_add_size_t(&filtered_pml_count[world_rank], 1); + opal_atomic_add_fetch_size_t(&filtered_pml_data[world_rank], data_size); + opal_atomic_add_fetch_size_t(&filtered_pml_count[world_rank], 1); } else { /* if filtered monitoring is not activated data is aggregated indifferently */ - opal_atomic_add_size_t(&pml_data[world_rank], data_size); - opal_atomic_add_size_t(&pml_count[world_rank], 1); + opal_atomic_add_fetch_size_t(&pml_data[world_rank], data_size); + opal_atomic_add_fetch_size_t(&pml_count[world_rank], 1); } } @@ -564,11 +564,11 @@ void mca_common_monitoring_record_osc(int world_rank, size_t data_size, if( 0 == mca_common_monitoring_current_state ) return; /* right now the monitoring is not started */ if( SEND == dir ) { - opal_atomic_add_size_t(&osc_data_s[world_rank], data_size); - opal_atomic_add_size_t(&osc_count_s[world_rank], 1); + opal_atomic_add_fetch_size_t(&osc_data_s[world_rank], data_size); + opal_atomic_add_fetch_size_t(&osc_count_s[world_rank], 1); } else { - opal_atomic_add_size_t(&osc_data_r[world_rank], data_size); - opal_atomic_add_size_t(&osc_count_r[world_rank], 1); + opal_atomic_add_fetch_size_t(&osc_data_r[world_rank], data_size); + opal_atomic_add_fetch_size_t(&osc_count_r[world_rank], 1); } } @@ -650,8 +650,8 @@ void mca_common_monitoring_record_coll(int world_rank, size_t data_size) { if( 0 == mca_common_monitoring_current_state ) return; /* right now the monitoring is not started */ - opal_atomic_add_size_t(&coll_data[world_rank], data_size); - opal_atomic_add_size_t(&coll_count[world_rank], 1); + opal_atomic_add_fetch_size_t(&coll_data[world_rank], data_size); + opal_atomic_add_fetch_size_t(&coll_count[world_rank], 1); } static int mca_common_monitoring_get_coll_count(const struct mca_base_pvar_t *pvar, diff --git a/ompi/mca/common/monitoring/common_monitoring_coll.c b/ompi/mca/common/monitoring/common_monitoring_coll.c index e37d5bb6c34..5af3059320b 100644 --- a/ompi/mca/common/monitoring/common_monitoring_coll.c +++ b/ompi/mca/common/monitoring/common_monitoring_coll.c @@ -236,8 +236,8 @@ void mca_common_monitoring_coll_o2a(size_t size, mca_monitoring_coll_data_t*data return; } #endif /* OPAL_ENABLE_DEBUG */ - opal_atomic_add_size_t(&data->o2a_size, size); - opal_atomic_add_size_t(&data->o2a_count, 1); + opal_atomic_add_fetch_size_t(&data->o2a_size, size); + opal_atomic_add_fetch_size_t(&data->o2a_count, 1); } int mca_common_monitoring_coll_get_o2a_count(const struct mca_base_pvar_t *pvar, @@ -277,8 +277,8 @@ void mca_common_monitoring_coll_a2o(size_t size, mca_monitoring_coll_data_t*data return; } #endif /* OPAL_ENABLE_DEBUG */ - opal_atomic_add_size_t(&data->a2o_size, size); - opal_atomic_add_size_t(&data->a2o_count, 1); + opal_atomic_add_fetch_size_t(&data->a2o_size, size); + opal_atomic_add_fetch_size_t(&data->a2o_count, 1); } int mca_common_monitoring_coll_get_a2o_count(const struct mca_base_pvar_t *pvar, @@ -318,8 +318,8 @@ void mca_common_monitoring_coll_a2a(size_t size, mca_monitoring_coll_data_t*data return; } #endif /* OPAL_ENABLE_DEBUG */ - opal_atomic_add_size_t(&data->a2a_size, size); - opal_atomic_add_size_t(&data->a2a_count, 1); + opal_atomic_add_fetch_size_t(&data->a2a_size, size); + opal_atomic_add_fetch_size_t(&data->a2a_count, 1); } int mca_common_monitoring_coll_get_a2a_count(const struct mca_base_pvar_t *pvar, diff --git a/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c b/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c index c68e30f6700..19d3b600b36 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c @@ -347,7 +347,7 @@ start_recover(void) int64_t epoch_counter; ompi_mtl_portals4.flowctl.flowctl_active = true; - epoch_counter = opal_atomic_add_64(&ompi_mtl_portals4.flowctl.epoch_counter, 1); + epoch_counter = opal_atomic_add_fetch_64(&ompi_mtl_portals4.flowctl.epoch_counter, 1); opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "Entering flowctl_start_recover %ld", diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv.c b/ompi/mca/mtl/portals4/mtl_portals4_recv.c index 230b3785532..f2737428e26 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv.c @@ -53,14 +53,14 @@ read_msg(void *start, ptl_size_t length, ptl_process_t target, int32_t frag_count; #if OMPI_MTL_PORTALS4_FLOW_CONTROL - while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); + while (OPAL_UNLIKELY(OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { + OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1); ompi_mtl_portals4_progress(); } #endif frag_count = (length + ompi_mtl_portals4.max_msg_size_mtl - 1) / ompi_mtl_portals4.max_msg_size_mtl; - ret = OPAL_THREAD_ADD32(&(request->pending_reply), frag_count); + ret = OPAL_THREAD_ADD_FETCH32(&(request->pending_reply), frag_count); for (i = 0 ; i < frag_count ; i++) { opal_free_list_item_t *tmp; @@ -385,14 +385,14 @@ ompi_mtl_portals4_rndv_get_frag_progress(ptl_event_t *ev, opal_free_list_return (&ompi_mtl_portals4.fl_rndv_get_frag, &rndv_get_frag->super); - ret = OPAL_THREAD_ADD32(&(ptl_request->pending_reply), -1); + ret = OPAL_THREAD_ADD_FETCH32(&(ptl_request->pending_reply), -1); if (ret > 0) { return OMPI_SUCCESS; } assert(ptl_request->pending_reply == 0); #if OMPI_MTL_PORTALS4_FLOW_CONTROL - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); + OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1); #endif /* make sure the data is in the right place. Use _ucount for @@ -468,7 +468,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, ptl_request->super.type = portals4_req_recv; ptl_request->super.event_callback = ompi_mtl_portals4_recv_progress; #if OPAL_ENABLE_DEBUG - ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1); + ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1); ptl_request->hdr_data = 0; #endif ptl_request->buffer_ptr = (free_after) ? start : NULL; @@ -549,7 +549,7 @@ ompi_mtl_portals4_imrecv(struct mca_mtl_base_module_t* mtl, } #if OPAL_ENABLE_DEBUG - ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1); + ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1); ptl_request->hdr_data = 0; #endif ptl_request->super.type = portals4_req_recv; diff --git a/ompi/mca/mtl/portals4/mtl_portals4_send.c b/ompi/mca/mtl/portals4/mtl_portals4_send.c index 6393b9a465b..27291eed559 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_send.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_send.c @@ -45,7 +45,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev, (ompi_mtl_portals4_isend_request_t*) ptl_base_request; if (PTL_EVENT_GET == ev->type) { - ret = OPAL_THREAD_ADD32(&(ptl_request->pending_get), -1); + ret = OPAL_THREAD_ADD_FETCH32(&(ptl_request->pending_get), -1); if (ret > 0) { /* wait for other gets */ OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "PTL_EVENT_GET received now pending_get=%d",ret)); @@ -94,7 +94,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev, opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends, &pending->super.super); - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); + OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1); ompi_mtl_portals4_flowctl_trigger(); return OMPI_SUCCESS; @@ -124,7 +124,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev, if ((eager == ompi_mtl_portals4.protocol) || (ptl_request->length % ompi_mtl_portals4.max_msg_size_mtl <= ompi_mtl_portals4.eager_limit)) { - val = OPAL_THREAD_ADD32(&(ptl_request->pending_get), -1); + val = OPAL_THREAD_ADD_FETCH32(&(ptl_request->pending_get), -1); } if (0 == val) { add = 2; /* We haven't to wait for any get, so we have to add an extra count to cause the message to complete */ @@ -161,7 +161,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev, ptl_request->me_h = PTL_INVALID_HANDLE; add++; } - val = OPAL_THREAD_ADD32((int32_t*)&ptl_request->event_count, add); + val = OPAL_THREAD_ADD_FETCH32((int32_t*)&ptl_request->event_count, add); assert(val <= 3); if (val == 3) { @@ -174,7 +174,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev, *complete = true; #if OMPI_MTL_PORTALS4_FLOW_CONTROL - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); + OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1); opal_free_list_return (&ompi_mtl_portals4.flowctl.pending_fl, &ptl_request->pending->super); @@ -422,15 +422,15 @@ ompi_mtl_portals4_pending_list_progress() while ((!ompi_mtl_portals4.flowctl.flowctl_active) && (0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) { - val = OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1); + val = OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, -1); if (val < 0) { - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); + OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1); return; } item = opal_list_remove_first(&ompi_mtl_portals4.flowctl.pending_sends); if (OPAL_UNLIKELY(NULL == item)) { - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); + OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1); return; } @@ -456,7 +456,7 @@ ompi_mtl_portals4_pending_list_progress() if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { opal_list_prepend(&ompi_mtl_portals4.flowctl.pending_sends, &pending->super.super); - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); + OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1); } } } @@ -492,7 +492,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after); if (OMPI_SUCCESS != ret) return ret; - ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*)&ompi_mtl_portals4.opcount, 1); + ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((int64_t*)&ompi_mtl_portals4.opcount, 1); ptl_request->buffer_ptr = (free_after) ? start : NULL; ptl_request->length = length; ptl_request->event_count = 0; @@ -520,15 +520,15 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, pending->ptl_proc = ptl_proc; pending->ptl_request = ptl_request; - if (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); + if (OPAL_UNLIKELY(OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { + OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1); opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends, &pending->super.super); return OMPI_SUCCESS; } if (OPAL_UNLIKELY(0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) { - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); + OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1); opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends, &pending->super.super); ompi_mtl_portals4_pending_list_progress(); @@ -536,7 +536,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, } if (OPAL_UNLIKELY(ompi_mtl_portals4.flowctl.flowctl_active)) { - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); + OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1); opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends, &pending->super.super); return OMPI_SUCCESS; diff --git a/ompi/mca/osc/monitoring/osc_monitoring_template.h b/ompi/mca/osc/monitoring/osc_monitoring_template.h index 7d56421b3d8..6724a6b10c3 100644 --- a/ompi/mca/osc/monitoring/osc_monitoring_template.h +++ b/ompi/mca/osc/monitoring/osc_monitoring_template.h @@ -61,7 +61,7 @@ static inline void* \ ompi_osc_monitoring_## template ##_set_template (ompi_osc_base_module_t*module) \ { \ - if( 1 == opal_atomic_add_32(&(OMPI_OSC_MONITORING_MODULE_INIT(template)), 1) ) { \ + if( 1 == opal_atomic_add_fetch_32(&(OMPI_OSC_MONITORING_MODULE_INIT(template)), 1) ) { \ /* Saves the original module functions in \ * ompi_osc_monitoring_module_## template ##_template \ */ \ diff --git a/ompi/mca/osc/portals4/osc_portals4_active_target.c b/ompi/mca/osc/portals4/osc_portals4_active_target.c index e2bd9a9da20..23a763efe8e 100644 --- a/ompi/mca/osc/portals4/osc_portals4_active_target.c +++ b/ompi/mca/osc/portals4/osc_portals4_active_target.c @@ -99,7 +99,7 @@ ompi_osc_portals4_complete(struct ompi_win_t *win) PTL_SUM, PTL_INT32_T); if (ret != OMPI_SUCCESS) return ret; - OPAL_THREAD_ADD64(&module->opcount, 1); + OPAL_THREAD_ADD_FETCH64(&module->opcount, 1); } ret = ompi_osc_portals4_complete_all(module); @@ -144,7 +144,7 @@ ompi_osc_portals4_post(struct ompi_group_t *group, PTL_SUM, PTL_INT32_T); if (ret != OMPI_SUCCESS) return ret; - OPAL_THREAD_ADD64(&module->opcount, 1); + OPAL_THREAD_ADD_FETCH64(&module->opcount, 1); } } else { module->post_group = NULL; diff --git a/ompi/mca/osc/portals4/osc_portals4_comm.c b/ompi/mca/osc/portals4/osc_portals4_comm.c index b792d20f8b7..b125f2aee50 100644 --- a/ompi/mca/osc/portals4/osc_portals4_comm.c +++ b/ompi/mca/osc/portals4/osc_portals4_comm.c @@ -206,7 +206,7 @@ segmentedPut(int64_t *opcount, ptl_size_t bytes_put = 0; do { - opal_atomic_add_64(opcount, 1); + opal_atomic_add_fetch_64(opcount, 1); ptl_size_t frag_length = MIN(put_length, segment_length); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, @@ -222,7 +222,7 @@ segmentedPut(int64_t *opcount, user_ptr, hdr_data); if (PTL_OK != ret) { - opal_atomic_add_64(opcount, -1); + opal_atomic_add_fetch_64(opcount, -1); opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlPut failed with return value %d", __FUNCTION__, __LINE__, ret); @@ -251,7 +251,7 @@ segmentedGet(int64_t *opcount, ptl_size_t bytes_gotten = 0; do { - opal_atomic_add_64(opcount, 1); + opal_atomic_add_fetch_64(opcount, 1); ptl_size_t frag_length = MIN(get_length, segment_length); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, @@ -266,7 +266,7 @@ segmentedGet(int64_t *opcount, target_offset + bytes_gotten, user_ptr); if (PTL_OK != ret) { - opal_atomic_add_64(opcount, -1); + opal_atomic_add_fetch_64(opcount, -1); opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlGet failed with return value %d", __FUNCTION__, __LINE__, ret); @@ -297,7 +297,7 @@ segmentedAtomic(int64_t *opcount, ptl_size_t sent = 0; do { - opal_atomic_add_64(opcount, 1); + opal_atomic_add_fetch_64(opcount, 1); ptl_size_t frag_length = MIN(length, segment_length); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, @@ -315,7 +315,7 @@ segmentedAtomic(int64_t *opcount, ptl_op, ptl_dt); if (PTL_OK != ret) { - opal_atomic_add_64(opcount, -1); + opal_atomic_add_fetch_64(opcount, -1); opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlAtomic failed with return value %d", __FUNCTION__, __LINE__, ret); @@ -348,7 +348,7 @@ segmentedFetchAtomic(int64_t *opcount, ptl_size_t sent = 0; do { - opal_atomic_add_64(opcount, 1); + opal_atomic_add_fetch_64(opcount, 1); ptl_size_t frag_length = MIN(length, segment_length); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, @@ -367,7 +367,7 @@ segmentedFetchAtomic(int64_t *opcount, ptl_op, ptl_dt); if (PTL_OK != ret) { - opal_atomic_add_64(opcount, -1); + opal_atomic_add_fetch_64(opcount, -1); opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlFetchAtomic failed with return value %d", __FUNCTION__, __LINE__, ret); @@ -399,7 +399,7 @@ segmentedSwap(int64_t *opcount, ptl_size_t sent = 0; do { - opal_atomic_add_64(opcount, 1); + opal_atomic_add_fetch_64(opcount, 1); ptl_size_t frag_length = MIN(length, segment_length); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, @@ -419,7 +419,7 @@ segmentedSwap(int64_t *opcount, PTL_SWAP, ptl_dt); if (PTL_OK != ret) { - opal_atomic_add_64(opcount, -1); + opal_atomic_add_fetch_64(opcount, -1); opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlSwap failed with return value %d", __FUNCTION__, __LINE__, ret); @@ -547,7 +547,7 @@ get_to_iovec(ompi_osc_portals4_module_t *module, return ret; } - opal_atomic_add_64(&module->opcount, 1); + opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Get(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)", @@ -564,7 +564,7 @@ get_to_iovec(ompi_osc_portals4_module_t *module, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d PtlGet() failed: ret = %d", __FUNCTION__, __LINE__, ret)); - opal_atomic_add_64(&module->opcount, -1); + opal_atomic_add_fetch_64(&module->opcount, -1); return ret; } @@ -716,7 +716,7 @@ put_from_iovec(ompi_osc_portals4_module_t *module, return ret; } - opal_atomic_add_64(&module->opcount, 1); + opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)", @@ -735,7 +735,7 @@ put_from_iovec(ompi_osc_portals4_module_t *module, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d PtlPut() failed: ret = %d", __FUNCTION__, __LINE__, ret)); - opal_atomic_add_64(&module->opcount, -1); + opal_atomic_add_fetch_64(&module->opcount, -1); return ret; } @@ -1252,7 +1252,7 @@ put_to_noncontig(int64_t *opcount, /* determine how much to transfer in this operation */ rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); - opal_atomic_add_64(opcount, 1); + opal_atomic_add_fetch_64(opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "performing rdma on contiguous region. local: %p, remote: %p, len: %lu", @@ -1270,7 +1270,7 @@ put_to_noncontig(int64_t *opcount, user_ptr, 0); if (OPAL_UNLIKELY(PTL_OK != ret)) { - opal_atomic_add_64(opcount, -1); + opal_atomic_add_fetch_64(opcount, -1); return ret; } @@ -1361,7 +1361,7 @@ atomic_put_to_noncontig(ompi_osc_portals4_module_t *module, /* determine how much to transfer in this operation */ rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); - opal_atomic_add_64(&module->opcount, 1); + opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "performing rdma on contiguous region. local: %p, remote: %p, len: %lu", @@ -1379,7 +1379,7 @@ atomic_put_to_noncontig(ompi_osc_portals4_module_t *module, user_ptr, 0); if (OPAL_UNLIKELY(PTL_OK != ret)) { - opal_atomic_add_64(&module->opcount, -1); + opal_atomic_add_fetch_64(&module->opcount, -1); return ret; } @@ -1479,7 +1479,7 @@ atomic_to_noncontig(ompi_osc_portals4_module_t *module, /* determine how much to transfer in this operation */ atomic_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), module->atomic_max); - opal_atomic_add_64(&module->opcount, 1); + opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "performing rdma on contiguous region. local: %p, remote: %p, len: %lu", @@ -1501,7 +1501,7 @@ atomic_to_noncontig(ompi_osc_portals4_module_t *module, ptl_op, ptl_dt); if (OPAL_UNLIKELY(PTL_OK != ret)) { - opal_atomic_add_64(&module->opcount, -1); + opal_atomic_add_fetch_64(&module->opcount, -1); return ret; } @@ -1586,7 +1586,7 @@ get_from_noncontig(int64_t *opcount, /* determine how much to transfer in this operation */ rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); - opal_atomic_add_64(opcount, 1); + opal_atomic_add_fetch_64(opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "performing rdma on contiguous region. local: %p, remote: %p, len: %lu", @@ -1602,7 +1602,7 @@ get_from_noncontig(int64_t *opcount, offset + (ptl_size_t)target_iovec[target_iov_index].iov_base, user_ptr); if (OPAL_UNLIKELY(PTL_OK != ret)) { - opal_atomic_add_64(opcount, -1); + opal_atomic_add_fetch_64(opcount, -1); return ret; } @@ -1687,7 +1687,7 @@ atomic_get_from_noncontig(ompi_osc_portals4_module_t *module, /* determine how much to transfer in this operation */ rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); - opal_atomic_add_64(&module->opcount, 1); + opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "performing rdma on contiguous region. local: %p, remote: %p, len: %lu", @@ -1703,7 +1703,7 @@ atomic_get_from_noncontig(ompi_osc_portals4_module_t *module, offset + (ptl_size_t)target_iovec[target_iov_index].iov_base, user_ptr); if (OPAL_UNLIKELY(PTL_OK != ret)) { - opal_atomic_add_64(&module->opcount, -1); + opal_atomic_add_fetch_64(&module->opcount, -1); return ret; } @@ -1817,7 +1817,7 @@ swap_from_noncontig(ompi_osc_portals4_module_t *module, /* determine how much to transfer in this operation */ rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); - opal_atomic_add_64(&module->opcount, 1); + opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu", @@ -1844,7 +1844,7 @@ swap_from_noncontig(ompi_osc_portals4_module_t *module, opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlSwap failed with return value %d", __FUNCTION__, __LINE__, ret); - opal_atomic_add_64(&module->opcount, -1); + opal_atomic_add_fetch_64(&module->opcount, -1); return ret; } @@ -1969,7 +1969,7 @@ fetch_atomic_from_noncontig(ompi_osc_portals4_module_t *module, /* determine how much to transfer in this operation */ rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); - opal_atomic_add_64(&module->opcount, 1); + opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu", @@ -1995,7 +1995,7 @@ fetch_atomic_from_noncontig(ompi_osc_portals4_module_t *module, opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlFetchAtomic failed with return value %d", __FUNCTION__, __LINE__, ret); - opal_atomic_add_64(&module->opcount, -1); + opal_atomic_add_fetch_64(&module->opcount, -1); return ret; } @@ -2411,7 +2411,7 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, do { size_t msg_length = MIN(module->atomic_max, length - sent); - (void)opal_atomic_add_64(&module->opcount, 1); + (void)opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Atomic", __FUNCTION__, __LINE__)); @@ -2428,7 +2428,7 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, ptl_op, ptl_dt); if (OMPI_SUCCESS != ret) { - (void)opal_atomic_add_64(&module->opcount, -1); + (void)opal_atomic_add_fetch_64(&module->opcount, -1); OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } @@ -3149,7 +3149,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr, do { size_t msg_length = MIN(module->atomic_max, length - sent); - (void)opal_atomic_add_64(&module->opcount, 1); + (void)opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Atomic", __FUNCTION__, __LINE__)); @@ -3166,7 +3166,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr, ptl_op, ptl_dt); if (OMPI_SUCCESS != ret) { - (void)opal_atomic_add_64(&module->opcount, -1); + (void)opal_atomic_add_fetch_64(&module->opcount, -1); return ret; } sent += msg_length; @@ -3541,7 +3541,7 @@ ompi_osc_portals4_compare_and_swap(const void *origin_addr, result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; - (void)opal_atomic_add_64(&module->opcount, 1); + (void)opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, "%s,%d Swap", __FUNCTION__, __LINE__)); @@ -3613,7 +3613,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; - (void)opal_atomic_add_64(&module->opcount, 1); + (void)opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Swap", __FUNCTION__, __LINE__)); ret = PtlSwap(module->md_h, @@ -3635,7 +3635,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, md_offset = (ptl_size_t) result_addr; - (void)opal_atomic_add_64(&module->opcount, 1); + (void)opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Get", __FUNCTION__, __LINE__)); ret = PtlGet(module->md_h, @@ -3648,7 +3648,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, NULL); } else { ptl_size_t result_md_offset, origin_md_offset; - (void)opal_atomic_add_64(&module->opcount, 1); + (void)opal_atomic_add_fetch_64(&module->opcount, 1); ret = ompi_osc_portals4_get_op(op, &ptl_op); if (OMPI_SUCCESS != ret) { diff --git a/ompi/mca/osc/portals4/osc_portals4_component.c b/ompi/mca/osc/portals4/osc_portals4_component.c index 38c36fec6d9..8a4781e3af6 100644 --- a/ompi/mca/osc/portals4/osc_portals4_component.c +++ b/ompi/mca/osc/portals4/osc_portals4_component.c @@ -230,8 +230,8 @@ progress_callback(void) } req = (ompi_osc_portals4_request_t*) ev.user_ptr; - opal_atomic_add_size_t(&req->super.req_status._ucount, ev.mlength); - ops = opal_atomic_add_32(&req->ops_committed, 1); + opal_atomic_add_fetch_size_t(&req->super.req_status._ucount, ev.mlength); + ops = opal_atomic_add_fetch_32(&req->ops_committed, 1); if (ops == req->ops_expected) { ompi_request_complete(&req->super, true); } diff --git a/ompi/mca/osc/portals4/osc_portals4_passive_target.c b/ompi/mca/osc/portals4/osc_portals4_passive_target.c index b39d4d904fe..b9baeea6f1c 100644 --- a/ompi/mca/osc/portals4/osc_portals4_passive_target.c +++ b/ompi/mca/osc/portals4/osc_portals4_passive_target.c @@ -43,7 +43,7 @@ lk_cas64(ompi_osc_portals4_module_t *module, int ret; size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock); - (void)opal_atomic_add_64(&module->opcount, 1); + (void)opal_atomic_add_fetch_64(&module->opcount, 1); ret = PtlSwap(module->md_h, (ptl_size_t) result_val, @@ -76,7 +76,7 @@ lk_write64(ompi_osc_portals4_module_t *module, int ret; size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock); - (void)opal_atomic_add_64(&module->opcount, 1); + (void)opal_atomic_add_fetch_64(&module->opcount, 1); ret = PtlPut(module->md_h, (ptl_size_t) &write_val, @@ -106,7 +106,7 @@ lk_add64(ompi_osc_portals4_module_t *module, int ret; size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock); - (void)opal_atomic_add_64(&module->opcount, 1); + (void)opal_atomic_add_fetch_64(&module->opcount, 1); ret = PtlFetchAtomic(module->md_h, (ptl_size_t) result_val, diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt.h b/ompi/mca/osc/pt2pt/osc_pt2pt.h index 77eabcc5922..4b1a423ded1 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt.h @@ -146,9 +146,9 @@ static inline bool ompi_osc_pt2pt_peer_eager_active (ompi_osc_pt2pt_peer_t *peer static inline void ompi_osc_pt2pt_peer_set_flag (ompi_osc_pt2pt_peer_t *peer, int32_t flag, bool value) { if (value) { - OPAL_ATOMIC_OR32 (&peer->flags, flag); + OPAL_ATOMIC_OR_FETCH32 (&peer->flags, flag); } else { - OPAL_ATOMIC_AND32 (&peer->flags, ~flag); + OPAL_ATOMIC_AND_FETCH32 (&peer->flags, ~flag); } } @@ -514,7 +514,7 @@ static inline void mark_incoming_completion (ompi_osc_pt2pt_module_t *module, in OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "mark_incoming_completion marking active incoming complete. module %p, count = %d", (void *) module, (int) module->active_incoming_frag_count + 1)); - new_value = OPAL_THREAD_ADD32(&module->active_incoming_frag_count, 1); + new_value = OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, 1); if (new_value >= 0) { OPAL_THREAD_LOCK(&module->lock); opal_condition_broadcast(&module->cond); @@ -526,7 +526,7 @@ static inline void mark_incoming_completion (ompi_osc_pt2pt_module_t *module, in OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "mark_incoming_completion marking passive incoming complete. module %p, source = %d, count = %d", (void *) module, source, (int) peer->passive_incoming_frag_count + 1)); - new_value = OPAL_THREAD_ADD32((int32_t *) &peer->passive_incoming_frag_count, 1); + new_value = OPAL_THREAD_ADD_FETCH32((int32_t *) &peer->passive_incoming_frag_count, 1); if (0 == new_value) { OPAL_THREAD_LOCK(&module->lock); opal_condition_broadcast(&module->cond); @@ -550,7 +550,7 @@ static inline void mark_incoming_completion (ompi_osc_pt2pt_module_t *module, in */ static inline void mark_outgoing_completion (ompi_osc_pt2pt_module_t *module) { - int32_t new_value = OPAL_THREAD_ADD32((int32_t *) &module->outgoing_frag_count, 1); + int32_t new_value = OPAL_THREAD_ADD_FETCH32((int32_t *) &module->outgoing_frag_count, 1); OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "mark_outgoing_completion: outgoing_frag_count = %d", new_value)); if (new_value >= 0) { @@ -574,12 +574,12 @@ static inline void mark_outgoing_completion (ompi_osc_pt2pt_module_t *module) */ static inline void ompi_osc_signal_outgoing (ompi_osc_pt2pt_module_t *module, int target, int count) { - OPAL_THREAD_ADD32((int32_t *) &module->outgoing_frag_count, -count); + OPAL_THREAD_ADD_FETCH32((int32_t *) &module->outgoing_frag_count, -count); if (MPI_PROC_NULL != target) { OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_signal_outgoing_passive: target = %d, count = %d, total = %d", target, count, module->epoch_outgoing_frag_count[target] + count)); - OPAL_THREAD_ADD32((int32_t *) (module->epoch_outgoing_frag_count + target), count); + OPAL_THREAD_ADD_FETCH32((int32_t *) (module->epoch_outgoing_frag_count + target), count); } } @@ -717,7 +717,7 @@ static inline int get_tag(ompi_osc_pt2pt_module_t *module) /* the LSB of the tag is used be the receiver to determine if the message is a passive or active target (ie, where to mark completion). */ - int32_t tmp = OPAL_THREAD_ADD32((volatile int32_t *) &module->tag_counter, 4); + int32_t tmp = OPAL_THREAD_ADD_FETCH32((volatile int32_t *) &module->tag_counter, 4); return (tmp & OSC_PT2PT_FRAG_MASK) | !!(module->passive_target_access_epoch); } diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c b/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c index 501c126fd14..33df9440a62 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c @@ -183,7 +183,7 @@ int ompi_osc_pt2pt_fence(int assert, ompi_win_t *win) incoming_reqs)); /* set our complete condition for incoming requests */ - OPAL_THREAD_ADD32(&module->active_incoming_frag_count, -incoming_reqs); + OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, -incoming_reqs); /* wait for completion */ while (module->outgoing_frag_count < 0 || module->active_incoming_frag_count < 0) { @@ -272,7 +272,7 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win) OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "found unexpected post from %d", peer->rank)); - OPAL_THREAD_ADD32 (&sync->sync_expected, -1); + OPAL_THREAD_ADD_FETCH32 (&sync->sync_expected, -1); ompi_osc_pt2pt_peer_set_unex (peer, false); } } @@ -574,12 +574,12 @@ void osc_pt2pt_incoming_complete (ompi_osc_pt2pt_module_t *module, int source, i frag_count, module->active_incoming_frag_count, module->num_complete_msgs)); /* the current fragment is not part of the frag_count so we need to add it here */ - OPAL_THREAD_ADD32(&module->active_incoming_frag_count, -frag_count); + OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, -frag_count); /* make sure the signal count is written before changing the complete message count */ opal_atomic_wmb (); - if (0 == OPAL_THREAD_ADD32(&module->num_complete_msgs, 1)) { + if (0 == OPAL_THREAD_ADD_FETCH32(&module->num_complete_msgs, 1)) { OPAL_THREAD_LOCK(&module->lock); opal_condition_broadcast (&module->cond); OPAL_THREAD_UNLOCK(&module->lock); diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c b/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c index a8c218c4cf0..bfe67ea3d8f 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c @@ -62,7 +62,7 @@ static int ompi_osc_pt2pt_req_comm_complete (ompi_request_t *request) /* update the cbdata for ompi_osc_pt2pt_comm_complete */ request->req_complete_cb_data = pt2pt_request->module; - if (0 == OPAL_THREAD_ADD32(&pt2pt_request->outstanding_requests, -1)) { + if (0 == OPAL_THREAD_ADD_FETCH32(&pt2pt_request->outstanding_requests, -1)) { ompi_osc_pt2pt_request_complete (pt2pt_request, request->req_status.MPI_ERROR); } diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c index 8aef87566f9..6a4205499bd 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c @@ -667,7 +667,7 @@ static int accumulate_cb (ompi_request_t *request) rank = acc_data->peer; } - if (0 == OPAL_THREAD_ADD32(&acc_data->request_count, -1)) { + if (0 == OPAL_THREAD_ADD_FETCH32(&acc_data->request_count, -1)) { /* no more requests needed before the buffer can be accumulated */ if (acc_data->source) { @@ -716,9 +716,9 @@ static int ompi_osc_pt2pt_acc_op_queue (ompi_osc_pt2pt_module_t *module, ompi_os /* NTH: ensure we don't leave wait/process_flush/etc until this * accumulate operation is complete. */ if (active_target) { - OPAL_THREAD_ADD32(&module->active_incoming_frag_count, -1); + OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, -1); } else { - OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -1); + OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -1); } pending_acc->active_target = active_target; @@ -1353,7 +1353,7 @@ static inline int process_flush (ompi_osc_pt2pt_module_t *module, int source, "process_flush header = {.frag_count = %d}", flush_header->frag_count)); /* increase signal count by incoming frags */ - OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -(int32_t) flush_header->frag_count); + OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -(int32_t) flush_header->frag_count); OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "%d: process_flush: received message from %d. passive_incoming_frag_count = %d", @@ -1372,7 +1372,7 @@ static inline int process_flush (ompi_osc_pt2pt_module_t *module, int source, } /* signal incomming will increment this counter */ - OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -1); + OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -1); return sizeof (*flush_header); } @@ -1387,7 +1387,7 @@ static inline int process_unlock (ompi_osc_pt2pt_module_t *module, int source, "process_unlock header = {.frag_count = %d}", unlock_header->frag_count)); /* increase signal count by incoming frags */ - OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -(int32_t) unlock_header->frag_count); + OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -(int32_t) unlock_header->frag_count); OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "osc pt2pt: processing unlock request from %d. frag count = %d, processed_count = %d", @@ -1406,7 +1406,7 @@ static inline int process_unlock (ompi_osc_pt2pt_module_t *module, int source, } /* signal incoming will increment this counter */ - OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -1); + OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -1); return sizeof (*unlock_header); } diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c index 632495eb234..4db4259a6fd 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c @@ -106,7 +106,7 @@ static int ompi_osc_pt2pt_flush_active_frag (ompi_osc_pt2pt_module_t *module, om active_frag->target, active_frag->pending)); if (opal_atomic_compare_exchange_strong_ptr (&peer->active_frag, &active_frag, NULL)) { - if (0 != OPAL_THREAD_ADD32(&active_frag->pending, -1)) { + if (0 != OPAL_THREAD_ADD_FETCH32(&active_frag->pending, -1)) { /* communication going on while synchronizing; this is an rma usage bug */ return OMPI_ERR_RMA_SYNC; } diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h index 0c16cfe690c..4ed38930d5a 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h @@ -51,7 +51,7 @@ static inline int ompi_osc_pt2pt_frag_finish (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_frag_t* buffer) { opal_atomic_wmb (); - if (0 == OPAL_THREAD_ADD32(&buffer->pending, -1)) { + if (0 == OPAL_THREAD_ADD_FETCH32(&buffer->pending, -1)) { opal_atomic_mb (); return ompi_osc_pt2pt_frag_start(module, buffer); } @@ -142,11 +142,11 @@ static inline int _ompi_osc_pt2pt_frag_alloc (ompi_osc_pt2pt_module_t *module, i curr->pending_long_sends = long_send; peer->active_frag = curr; } else { - OPAL_THREAD_ADD32(&curr->header->num_ops, 1); + OPAL_THREAD_ADD_FETCH32(&curr->header->num_ops, 1); curr->pending_long_sends += long_send; } - OPAL_THREAD_ADD32(&curr->pending, 1); + OPAL_THREAD_ADD_FETCH32(&curr->pending, 1); } else { curr = ompi_osc_pt2pt_frag_alloc_non_buffered (module, peer, request_len); if (OPAL_UNLIKELY(NULL == curr)) { diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c b/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c index 9d0210c2f8a..091757511f3 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c @@ -64,7 +64,7 @@ static inline int ompi_osc_pt2pt_lock_self (ompi_osc_pt2pt_module_t *module, omp assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); - (void) OPAL_THREAD_ADD32(&lock->sync_expected, 1); + (void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1); acquired = ompi_osc_pt2pt_lock_try_acquire (module, my_rank, lock_type, (uint64_t) (uintptr_t) lock); if (!acquired) { @@ -91,7 +91,7 @@ static inline void ompi_osc_pt2pt_unlock_self (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, my_rank); int lock_type = lock->sync.lock.type; - (void) OPAL_THREAD_ADD32(&lock->sync_expected, 1); + (void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1); assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); @@ -99,9 +99,9 @@ static inline void ompi_osc_pt2pt_unlock_self (ompi_osc_pt2pt_module_t *module, "ompi_osc_pt2pt_unlock_self: unlocking myself. lock state = %d", module->lock_status)); if (MPI_LOCK_EXCLUSIVE == lock_type) { - OPAL_THREAD_ADD32(&module->lock_status, 1); + OPAL_THREAD_ADD_FETCH32(&module->lock_status, 1); ompi_osc_pt2pt_activate_next_lock (module); - } else if (0 == OPAL_THREAD_ADD32(&module->lock_status, -1)) { + } else if (0 == OPAL_THREAD_ADD_FETCH32(&module->lock_status, -1)) { ompi_osc_pt2pt_activate_next_lock (module); } @@ -128,7 +128,7 @@ int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, int target, omp return OMPI_SUCCESS; } - (void) OPAL_THREAD_ADD32(&lock->sync_expected, 1); + (void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1); assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); @@ -145,7 +145,7 @@ int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, int target, omp ret = ompi_osc_pt2pt_control_send_unbuffered (module, target, &lock_req, sizeof (lock_req)); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - OPAL_THREAD_ADD32(&lock->sync_expected, -1); + OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, -1); } else { ompi_osc_pt2pt_peer_set_locked (peer, true); } @@ -163,7 +163,7 @@ static inline int ompi_osc_pt2pt_unlock_remote (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_header_unlock_t unlock_req; int ret; - (void) OPAL_THREAD_ADD32(&lock->sync_expected, 1); + (void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1); assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); @@ -207,7 +207,7 @@ static inline int ompi_osc_pt2pt_flush_remote (ompi_osc_pt2pt_module_t *module, int32_t frag_count = opal_atomic_swap_32 ((int32_t *) module->epoch_outgoing_frag_count + target, -1); int ret; - (void) OPAL_THREAD_ADD32(&lock->sync_expected, 1); + (void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1); assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); @@ -908,9 +908,9 @@ int ompi_osc_pt2pt_process_unlock (ompi_osc_pt2pt_module_t *module, int source, } if (-1 == module->lock_status) { - OPAL_THREAD_ADD32(&module->lock_status, 1); + OPAL_THREAD_ADD_FETCH32(&module->lock_status, 1); ompi_osc_pt2pt_activate_next_lock (module); - } else if (0 == OPAL_THREAD_ADD32(&module->lock_status, -1)) { + } else if (0 == OPAL_THREAD_ADD_FETCH32(&module->lock_status, -1)) { ompi_osc_pt2pt_activate_next_lock (module); } diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_sync.h b/ompi/mca/osc/pt2pt/osc_pt2pt_sync.h index 10398926e84..fe359bf6cf9 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_sync.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_sync.h @@ -166,7 +166,7 @@ static inline void ompi_osc_pt2pt_sync_wait_expected (ompi_osc_pt2pt_sync_t *syn static inline void ompi_osc_pt2pt_sync_expected (ompi_osc_pt2pt_sync_t *sync) { - int32_t new_value = OPAL_THREAD_ADD32 (&sync->sync_expected, -1); + int32_t new_value = OPAL_THREAD_ADD_FETCH32 (&sync->sync_expected, -1); if (0 == new_value) { OPAL_THREAD_LOCK(&sync->lock); if (!(sync->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK && sync->num_peers > 1)) { diff --git a/ompi/mca/osc/rdma/osc_rdma_accumulate.c b/ompi/mca/osc/rdma/osc_rdma_accumulate.c index 9d22bde8ab9..dc49668d164 100644 --- a/ompi/mca/osc/rdma/osc_rdma_accumulate.c +++ b/ompi/mca/osc/rdma/osc_rdma_accumulate.c @@ -516,7 +516,7 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v subreq->internal = true; subreq->parent_request = request; if (request) { - (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1); + (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1); } if (result_datatype) { @@ -557,7 +557,7 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v if (request) { /* release our reference so the request can complete */ - (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); + (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1); } if (source_datatype) { diff --git a/ompi/mca/osc/rdma/osc_rdma_comm.c b/ompi/mca/osc/rdma/osc_rdma_comm.c index 1a0450bd288..3357d1049c9 100644 --- a/ompi/mca/osc/rdma/osc_rdma_comm.c +++ b/ompi/mca/osc/rdma/osc_rdma_comm.c @@ -217,7 +217,7 @@ static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *loc subreq->parent_request = request; if (request) { - (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1); + (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1); } } else if (!alloc_reqs) { subreq = request; @@ -232,7 +232,7 @@ static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *loc if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE != ret)) { if (request) { - (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); + (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1); } if (alloc_reqs) { @@ -266,7 +266,7 @@ static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *loc ompi_osc_rdma_request_complete (request, OMPI_SUCCESS); } - (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); + (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1); } OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "finished scheduling rdma on non-contiguous datatype(s)"); @@ -551,7 +551,7 @@ static int ompi_osc_rdma_put_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_p /* increment the outstanding request counter in the request object */ if (request) { - (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1); + (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1); cbcontext = (void *) ((intptr_t) request | 1); request->sync = sync; } else { @@ -643,12 +643,12 @@ static int ompi_osc_rdma_get_partial (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_ subreq->internal = true; subreq->type = OMPI_OSC_RDMA_TYPE_RDMA; subreq->parent_request = request; - (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1); + (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1); ret = ompi_osc_rdma_get_contig (sync, peer, source_address, source_handle, target_buffer, size, subreq); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { OMPI_OSC_RDMA_REQUEST_RETURN(subreq); - (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); + (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1); } return ret; diff --git a/ompi/mca/osc/rdma/osc_rdma_comm.h b/ompi/mca/osc/rdma/osc_rdma_comm.h index 0e2daf2b4e8..e6d69505753 100644 --- a/ompi/mca/osc/rdma/osc_rdma_comm.h +++ b/ompi/mca/osc/rdma/osc_rdma_comm.h @@ -35,7 +35,7 @@ static inline void ompi_osc_rdma_cleanup_rdma (ompi_osc_rdma_sync_t *sync, ompi_ } if (request) { - (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); + (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1); } ompi_osc_rdma_sync_rdma_dec (sync); diff --git a/ompi/mca/osc/rdma/osc_rdma_frag.h b/ompi/mca/osc/rdma/osc_rdma_frag.h index e9636a24d25..610ce447006 100644 --- a/ompi/mca/osc/rdma/osc_rdma_frag.h +++ b/ompi/mca/osc/rdma/osc_rdma_frag.h @@ -37,7 +37,7 @@ OBJ_CLASS_DECLARATION(ompi_osc_rdma_frag_t); static inline void ompi_osc_rdma_frag_complete (ompi_osc_rdma_frag_t *frag) { - if (0 == OPAL_THREAD_ADD32(&frag->pending, -1)) { + if (0 == OPAL_THREAD_ADD_FETCH32(&frag->pending, -1)) { opal_atomic_rmb (); ompi_osc_rdma_deregister (frag->module, frag->handle); @@ -113,7 +113,7 @@ static inline int ompi_osc_rdma_frag_alloc (ompi_osc_rdma_module_t *module, size curr->top += request_len; curr->remain_len -= request_len; - OPAL_THREAD_ADD32(&curr->pending, 1); + OPAL_THREAD_ADD_FETCH32(&curr->pending, 1); OPAL_THREAD_UNLOCK(&module->lock); diff --git a/ompi/mca/osc/rdma/osc_rdma_peer.h b/ompi/mca/osc/rdma/osc_rdma_peer.h index 21aeecb4078..ad661238154 100644 --- a/ompi/mca/osc/rdma/osc_rdma_peer.h +++ b/ompi/mca/osc/rdma/osc_rdma_peer.h @@ -220,7 +220,7 @@ static inline bool ompi_osc_rdma_peer_test_set_flag (ompi_osc_rdma_peer_t *peer, */ static inline void ompi_osc_rdma_peer_clear_flag (ompi_osc_rdma_peer_t *peer, int flag) { - OPAL_ATOMIC_AND32(&peer->flags, ~flag); + OPAL_ATOMIC_AND_FETCH32(&peer->flags, ~flag); opal_atomic_mb (); } diff --git a/ompi/mca/osc/rdma/osc_rdma_request.c b/ompi/mca/osc/rdma/osc_rdma_request.c index 625b4d380ed..9c032ca4028 100644 --- a/ompi/mca/osc/rdma/osc_rdma_request.c +++ b/ompi/mca/osc/rdma/osc_rdma_request.c @@ -48,7 +48,7 @@ static int request_complete (struct ompi_request_t *request) { ompi_osc_rdma_request_t *parent_request = ((ompi_osc_rdma_request_t *) request)->parent_request; - if (parent_request && 0 == OPAL_THREAD_ADD32 (&parent_request->outstanding_requests, -1)) { + if (parent_request && 0 == OPAL_THREAD_ADD_FETCH32 (&parent_request->outstanding_requests, -1)) { ompi_osc_rdma_request_complete (parent_request, OMPI_SUCCESS); } diff --git a/ompi/mca/osc/rdma/osc_rdma_types.h b/ompi/mca/osc/rdma/osc_rdma_types.h index 4fed013cbf4..fc23f0f343f 100644 --- a/ompi/mca/osc/rdma/osc_rdma_types.h +++ b/ompi/mca/osc/rdma/osc_rdma_types.h @@ -25,7 +25,7 @@ typedef int64_t osc_rdma_base_t; typedef int64_t osc_rdma_size_t; typedef int64_t osc_rdma_counter_t; -#define ompi_osc_rdma_counter_add opal_atomic_add_64 +#define ompi_osc_rdma_counter_add opal_atomic_add_fetch_64 #else @@ -33,7 +33,7 @@ typedef int32_t osc_rdma_base_t; typedef int32_t osc_rdma_size_t; typedef int32_t osc_rdma_counter_t; -#define ompi_osc_rdma_counter_add opal_atomic_add_32 +#define ompi_osc_rdma_counter_add opal_atomic_add_fetch_32 #endif @@ -48,7 +48,7 @@ static inline int64_t ompi_osc_rdma_lock_add (volatile int64_t *p, int64_t value int64_t new; opal_atomic_mb (); - new = opal_atomic_add_64 (p, value) - value; + new = opal_atomic_add_fetch_64 (p, value) - value; opal_atomic_mb (); return new; @@ -76,8 +76,8 @@ static inline int32_t ompi_osc_rdma_lock_add (volatile int32_t *p, int32_t value int32_t new; opal_atomic_mb (); - /* opal_atomic_add_32 differs from normal atomics in that is returns the new value */ - new = opal_atomic_add_32 (p, value) - value; + /* opal_atomic_add_fetch_32 differs from normal atomics in that is returns the new value */ + new = opal_atomic_add_fetch_32 (p, value) - value; opal_atomic_mb (); return new; diff --git a/ompi/mca/osc/sm/osc_sm_active_target.c b/ompi/mca/osc/sm/osc_sm_active_target.c index 6c1e00263f2..31a6ee645e9 100644 --- a/ompi/mca/osc/sm/osc_sm_active_target.c +++ b/ompi/mca/osc/sm/osc_sm_active_target.c @@ -162,9 +162,9 @@ ompi_osc_sm_start(struct ompi_group_t *group, opal_atomic_rmb (); #if OPAL_HAVE_ATOMIC_MATH_64 - opal_atomic_xor_64 ((volatile osc_sm_post_type_t *) module->posts[my_rank] + rank_byte, rank_bit); + opal_atomic_xor_fetch_64 ((volatile osc_sm_post_type_t *) module->posts[my_rank] + rank_byte, rank_bit); #else - opal_atomic_xor_32 ((volatile osc_sm_post_type_t *) module->posts[my_rank] + rank_byte, rank_bit); + opal_atomic_xor_fetch_32 ((volatile osc_sm_post_type_t *) module->posts[my_rank] + rank_byte, rank_bit); #endif } @@ -201,7 +201,7 @@ ompi_osc_sm_complete(struct ompi_win_t *win) gsize = ompi_group_size(group); for (int i = 0 ; i < gsize ; ++i) { - (void) opal_atomic_add_32(&module->node_states[ranks[i]].complete_count, 1); + (void) opal_atomic_add_fetch_32(&module->node_states[ranks[i]].complete_count, 1); } free (ranks); @@ -247,7 +247,7 @@ ompi_osc_sm_post(struct ompi_group_t *group, gsize = ompi_group_size(module->post_group); for (int i = 0 ; i < gsize ; ++i) { - (void) opal_atomic_add ((volatile osc_sm_post_type_t *) module->posts[ranks[i]] + my_byte, my_bit); + (void) opal_atomic_add_fetch ((volatile osc_sm_post_type_t *) module->posts[ranks[i]] + my_byte, my_bit); } opal_atomic_wmb (); diff --git a/ompi/mca/osc/sm/osc_sm_passive_target.c b/ompi/mca/osc/sm/osc_sm_passive_target.c index 889ac829dd1..a3388b776a4 100644 --- a/ompi/mca/osc/sm/osc_sm_passive_target.c +++ b/ompi/mca/osc/sm/osc_sm_passive_target.c @@ -26,9 +26,9 @@ lk_fetch_add32(ompi_osc_sm_module_t *module, size_t offset, uint32_t delta) { - /* opal_atomic_add_32 is an add then fetch so delta needs to be subtracted out to get the + /* opal_atomic_add_fetch_32 is an add then fetch so delta needs to be subtracted out to get the * old value */ - return opal_atomic_add_32((int32_t*) ((char*) &module->node_states[target].lock + offset), + return opal_atomic_add_fetch_32((int32_t*) ((char*) &module->node_states[target].lock + offset), delta) - delta; } @@ -39,7 +39,7 @@ lk_add32(ompi_osc_sm_module_t *module, size_t offset, uint32_t delta) { - opal_atomic_add_32((int32_t*) ((char*) &module->node_states[target].lock + offset), + opal_atomic_add_fetch_32((int32_t*) ((char*) &module->node_states[target].lock + offset), delta); } diff --git a/ompi/mca/pml/base/pml_base_bsend.c b/ompi/mca/pml/base/pml_base_bsend.c index f683570f708..ef6be82599a 100644 --- a/ompi/mca/pml/base/pml_base_bsend.c +++ b/ompi/mca/pml/base/pml_base_bsend.c @@ -81,7 +81,7 @@ int mca_pml_base_bsend_init(bool thread_safe) { size_t tmp; - if(OPAL_THREAD_ADD32(&mca_pml_bsend_init, 1) > 1) + if(OPAL_THREAD_ADD_FETCH32(&mca_pml_bsend_init, 1) > 1) return OMPI_SUCCESS; /* initialize static objects */ @@ -109,7 +109,7 @@ int mca_pml_base_bsend_init(bool thread_safe) */ int mca_pml_base_bsend_fini(void) { - if(OPAL_THREAD_ADD32(&mca_pml_bsend_init,-1) > 0) + if(OPAL_THREAD_ADD_FETCH32(&mca_pml_bsend_init,-1) > 0) return OMPI_SUCCESS; if(NULL != mca_pml_bsend_allocator) diff --git a/ompi/mca/pml/bfo/pml_bfo_failover.h b/ompi/mca/pml/bfo/pml_bfo_failover.h index d1b97807adb..ea4f70fdc48 100644 --- a/ompi/mca/pml/bfo/pml_bfo_failover.h +++ b/ompi/mca/pml/bfo/pml_bfo_failover.h @@ -261,7 +261,7 @@ extern void mca_pml_bfo_recv_frag_callback_recverrnotify( mca_btl_base_module_t */ #define MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq) \ if (sendreq->req_state == -1) { \ - OPAL_THREAD_ADD32(&sendreq->req_state, 1); \ + OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, 1); \ } /* Now check the error state. This request can be in error if the diff --git a/ompi/mca/pml/bfo/pml_bfo_recvfrag.c b/ompi/mca/pml/bfo/pml_bfo_recvfrag.c index ce6827d5385..c7216c0d538 100644 --- a/ompi/mca/pml/bfo/pml_bfo_recvfrag.c +++ b/ompi/mca/pml/bfo/pml_bfo_recvfrag.c @@ -328,7 +328,7 @@ void mca_pml_bfo_recv_frag_callback_ack(mca_btl_base_module_t* btl, * protocol has req_state == 0 and as such should not be * decremented. */ - OPAL_THREAD_ADD32(&sendreq->req_state, -1); + OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1); } if(send_request_pml_complete_check(sendreq) == false) diff --git a/ompi/mca/pml/bfo/pml_bfo_recvreq.c b/ompi/mca/pml/bfo/pml_bfo_recvreq.c index 969420efc0b..c0658f10ef3 100644 --- a/ompi/mca/pml/bfo/pml_bfo_recvreq.c +++ b/ompi/mca/pml/bfo/pml_bfo_recvreq.c @@ -206,7 +206,7 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl, (void *) des->des_remote, des->des_remote_count, 0); } - OPAL_THREAD_SUB_SIZE_T(&recvreq->req_pipeline_depth, 1); + OPAL_THREAD_SUB_FETCH_SIZE_T(&recvreq->req_pipeline_depth, 1); #if PML_BFO btl->btl_free(btl, des); @@ -217,7 +217,7 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl, #endif /* PML_BFO */ /* check completion status */ - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); + OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received); if(recv_request_pml_complete_check(recvreq) == false && recvreq->req_rdma_offset < recvreq->req_send_offset) { /* schedule additional rdma operations */ @@ -388,7 +388,7 @@ static void mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl, #endif /* PML_BFO */ /* is receive request complete */ - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length); + OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length); recv_request_pml_complete_check(recvreq); MCA_PML_BFO_RDMA_FRAG_RETURN(frag); @@ -506,7 +506,7 @@ void mca_pml_bfo_recv_request_progress_frag( mca_pml_bfo_recv_request_t* recvreq recvreq->req_recv.req_base.req_datatype); ); - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); + OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received); /* check completion status */ if(recv_request_pml_complete_check(recvreq) == false && recvreq->req_rdma_offset < recvreq->req_send_offset) { @@ -668,7 +668,7 @@ void mca_pml_bfo_recv_request_progress_rndv( mca_pml_bfo_recv_request_t* recvreq recvreq->req_recv.req_base.req_datatype); ); } - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); + OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received); /* check completion status */ if(recv_request_pml_complete_check(recvreq) == false && recvreq->req_rdma_offset < recvreq->req_send_offset) { @@ -903,7 +903,7 @@ int mca_pml_bfo_recv_request_schedule_once( mca_pml_bfo_recv_request_t* recvreq, #endif /* PML_BFO */ /* update request state */ recvreq->req_rdma_offset += size; - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth, 1); + OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_pipeline_depth, 1); recvreq->req_rdma[rdma_idx].length -= size; bytes_remaining -= size; } else { diff --git a/ompi/mca/pml/bfo/pml_bfo_recvreq.h b/ompi/mca/pml/bfo/pml_bfo_recvreq.h index 9c3f53989a4..7b3a6db6271 100644 --- a/ompi/mca/pml/bfo/pml_bfo_recvreq.h +++ b/ompi/mca/pml/bfo/pml_bfo_recvreq.h @@ -70,12 +70,12 @@ OBJ_CLASS_DECLARATION(mca_pml_bfo_recv_request_t); static inline bool lock_recv_request(mca_pml_bfo_recv_request_t *recvreq) { - return OPAL_THREAD_ADD32(&recvreq->req_lock, 1) == 1; + return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, 1) == 1; } static inline bool unlock_recv_request(mca_pml_bfo_recv_request_t *recvreq) { - return OPAL_THREAD_ADD32(&recvreq->req_lock, -1) == 0; + return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, -1) == 0; } /** diff --git a/ompi/mca/pml/bfo/pml_bfo_sendreq.c b/ompi/mca/pml/bfo/pml_bfo_sendreq.c index 67208a9fe4a..176eadf4f6e 100644 --- a/ompi/mca/pml/bfo/pml_bfo_sendreq.c +++ b/ompi/mca/pml/bfo/pml_bfo_sendreq.c @@ -207,10 +207,10 @@ mca_pml_bfo_rndv_completion_request( mca_bml_base_btl_t* bml_btl, &(sendreq->req_send.req_base), PERUSE_SEND ); } - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); + OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); /* advance the request */ - OPAL_THREAD_ADD32(&sendreq->req_state, -1); + OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1); send_request_pml_complete_check(sendreq); @@ -287,7 +287,7 @@ mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl, (void *) des->des_local, des->des_local_count, 0); if (OPAL_LIKELY(0 < req_bytes_delivered)) { - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); + OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); } send_request_pml_complete_check(sendreq); @@ -360,8 +360,8 @@ mca_pml_bfo_frag_completion( mca_btl_base_module_t* btl, des->des_local_count, sizeof(mca_pml_bfo_frag_hdr_t)); - OPAL_THREAD_SUB_SIZE_T(&sendreq->req_pipeline_depth, 1); - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); + OPAL_THREAD_SUB_FETCH_SIZE_T(&sendreq->req_pipeline_depth, 1); + OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); #if PML_BFO MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, @@ -1164,7 +1164,7 @@ mca_pml_bfo_send_request_schedule_once(mca_pml_bfo_send_request_t* sendreq) range->range_btls[btl_idx].length -= size; range->range_send_length -= size; range->range_send_offset += size; - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth, 1); + OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_pipeline_depth, 1); if(range->range_send_length == 0) { range = get_next_send_range(sendreq, range); prev_bytes_remaining = 0; @@ -1226,7 +1226,7 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl, #endif /* PML_BFO */ /* check for request completion */ - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length); + OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length); send_request_pml_complete_check(sendreq); @@ -1335,7 +1335,7 @@ void mca_pml_bfo_send_request_put( mca_pml_bfo_send_request_t* sendreq, size_t i, size = 0; if(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_TYPE_ACK) { - OPAL_THREAD_ADD32(&sendreq->req_state, -1); + OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1); } #if PML_BFO MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq); diff --git a/ompi/mca/pml/bfo/pml_bfo_sendreq.h b/ompi/mca/pml/bfo/pml_bfo_sendreq.h index 37f15af4578..170512ffe3e 100644 --- a/ompi/mca/pml/bfo/pml_bfo_sendreq.h +++ b/ompi/mca/pml/bfo/pml_bfo_sendreq.h @@ -78,12 +78,12 @@ OBJ_CLASS_DECLARATION(mca_pml_bfo_send_range_t); static inline bool lock_send_request(mca_pml_bfo_send_request_t *sendreq) { - return OPAL_THREAD_ADD32(&sendreq->req_lock, 1) == 1; + return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, 1) == 1; } static inline bool unlock_send_request(mca_pml_bfo_send_request_t *sendreq) { - return OPAL_THREAD_ADD32(&sendreq->req_lock, -1) == 0; + return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, -1) == 0; } static inline void @@ -445,7 +445,7 @@ mca_pml_bfo_send_request_start( mca_pml_bfo_send_request_t* sendreq ) sendreq->req_pipeline_depth = 0; sendreq->req_bytes_delivered = 0; sendreq->req_pending = MCA_PML_BFO_SEND_PENDING_NONE; - sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32( + sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD_FETCH32( &comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1); #if PML_BFO sendreq->req_restartseq = 0; /* counts up restarts */ diff --git a/ompi/mca/pml/ob1/pml_ob1_isend.c b/ompi/mca/pml/ob1/pml_ob1_isend.c index 3a5b0c2d7a0..be673382761 100644 --- a/ompi/mca/pml/ob1/pml_ob1_isend.c +++ b/ompi/mca/pml/ob1/pml_ob1_isend.c @@ -151,7 +151,7 @@ int mca_pml_ob1_isend(const void *buf, } if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm)) { - seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1); + seqn = (uint16_t) OPAL_THREAD_ADD_FETCH32(&ob1_proc->send_sequence, 1); } if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) { @@ -220,7 +220,7 @@ int mca_pml_ob1_send(const void *buf, } if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm)) { - seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1); + seqn = (uint16_t) OPAL_THREAD_ADD_FETCH32(&ob1_proc->send_sequence, 1); } /** diff --git a/ompi/mca/pml/ob1/pml_ob1_progress.c b/ompi/mca/pml/ob1/pml_ob1_progress.c index 276f089938f..e1f84e796b4 100644 --- a/ompi/mca/pml/ob1/pml_ob1_progress.c +++ b/ompi/mca/pml/ob1/pml_ob1_progress.c @@ -56,7 +56,7 @@ static inline int mca_pml_ob1_process_pending_cuda_async_copies(void) static int mca_pml_ob1_progress_needed = 0; int mca_pml_ob1_enable_progress(int32_t count) { - int32_t progress_count = OPAL_ATOMIC_ADD32(&mca_pml_ob1_progress_needed, count); + int32_t progress_count = OPAL_ATOMIC_ADD_FETCH32(&mca_pml_ob1_progress_needed, count); if( 1 < progress_count ) return 0; /* progress was already on */ @@ -119,7 +119,7 @@ int mca_pml_ob1_progress(void) } if( 0 != completed_requests ) { - j = OPAL_ATOMIC_ADD32(&mca_pml_ob1_progress_needed, -completed_requests); + j = OPAL_ATOMIC_ADD_FETCH32(&mca_pml_ob1_progress_needed, -completed_requests); if( 0 == j ) { opal_progress_unregister(mca_pml_ob1_progress); } diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c index f85964f653b..83b7a44902e 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c @@ -445,7 +445,7 @@ void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl, * protocol has req_state == 0 and as such should not be * decremented. */ - OPAL_THREAD_ADD32(&sendreq->req_state, -1); + OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1); } #if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_SEND */ diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index 6616e8eacca..9ccb27e1af4 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -190,7 +190,7 @@ static void mca_pml_ob1_put_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t r mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t *) frag->rdma_req; mca_bml_base_btl_t *bml_btl = frag->rdma_bml; - OPAL_THREAD_ADD32(&recvreq->req_pipeline_depth, -1); + OPAL_THREAD_ADD_FETCH32(&recvreq->req_pipeline_depth, -1); assert ((uint64_t) rdma_size == frag->rdma_length); MCA_PML_OB1_RDMA_FRAG_RETURN(frag); @@ -198,7 +198,7 @@ static void mca_pml_ob1_put_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t r if (OPAL_LIKELY(0 < rdma_size)) { /* check completion status */ - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, rdma_size); + OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, rdma_size); if (recv_request_pml_complete_check(recvreq) == false && recvreq->req_rdma_offset < recvreq->req_send_offset) { /* schedule additional rdma operations */ @@ -373,7 +373,7 @@ static void mca_pml_ob1_rget_completion (mca_btl_base_module_t* btl, struct mca_ } } else { /* is receive request complete */ - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length); + OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length); /* TODO: re-add order */ mca_pml_ob1_send_fin (recvreq->req_recv.req_base.req_proc, bml_btl, frag->rdma_hdr.hdr_rget.hdr_frag, @@ -524,7 +524,7 @@ void mca_pml_ob1_recv_request_progress_frag( mca_pml_ob1_recv_request_t* recvreq recvreq->req_recv.req_base.req_datatype); ); - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); + OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received); /* check completion status */ if(recv_request_pml_complete_check(recvreq) == false && recvreq->req_rdma_offset < recvreq->req_send_offset) { @@ -601,7 +601,7 @@ void mca_pml_ob1_recv_request_frag_copy_finished( mca_btl_base_module_t* btl, * known that the data has been copied out of the descriptor. */ des->des_cbfunc(NULL, NULL, des, 0); - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); + OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received); /* check completion status */ if(recv_request_pml_complete_check(recvreq) == false && @@ -815,7 +815,7 @@ void mca_pml_ob1_recv_request_progress_rndv( mca_pml_ob1_recv_request_t* recvreq recvreq->req_recv.req_base.req_count, recvreq->req_recv.req_base.req_datatype); ); - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); + OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received); } /* check completion status */ if(recv_request_pml_complete_check(recvreq) == false && @@ -1024,7 +1024,7 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq, if (OPAL_LIKELY(OMPI_SUCCESS == rc)) { /* update request state */ recvreq->req_rdma_offset += size; - OPAL_THREAD_ADD32(&recvreq->req_pipeline_depth, 1); + OPAL_THREAD_ADD_FETCH32(&recvreq->req_pipeline_depth, 1); recvreq->req_rdma[rdma_idx].length -= size; bytes_remaining -= size; } else { diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.h b/ompi/mca/pml/ob1/pml_ob1_recvreq.h index 82c4767d834..0ced47e2915 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.h @@ -64,12 +64,12 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_recv_request_t); static inline bool lock_recv_request(mca_pml_ob1_recv_request_t *recvreq) { - return OPAL_THREAD_ADD32(&recvreq->req_lock, 1) == 1; + return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, 1) == 1; } static inline bool unlock_recv_request(mca_pml_ob1_recv_request_t *recvreq) { - return OPAL_THREAD_ADD32(&recvreq->req_lock, -1) == 0; + return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, -1) == 0; } /** diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index f358d733dab..a2aecae09ac 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -205,10 +205,10 @@ mca_pml_ob1_rndv_completion_request( mca_bml_base_btl_t* bml_btl, &(sendreq->req_send.req_base), PERUSE_SEND ); } - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); + OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); /* advance the request */ - OPAL_THREAD_ADD32(&sendreq->req_state, -1); + OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1); send_request_pml_complete_check(sendreq); @@ -261,7 +261,7 @@ mca_pml_ob1_rget_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t rdma_length) /* count bytes of user data actually delivered and check for request completion */ if (OPAL_LIKELY(0 < rdma_length)) { - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, (size_t) rdma_length); + OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, (size_t) rdma_length); } send_request_pml_complete_check(sendreq); @@ -313,8 +313,8 @@ mca_pml_ob1_frag_completion( mca_btl_base_module_t* btl, des->des_segment_count, sizeof(mca_pml_ob1_frag_hdr_t)); - OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, -1); - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); + OPAL_THREAD_ADD_FETCH32(&sendreq->req_pipeline_depth, -1); + OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); if(send_request_pml_complete_check(sendreq) == false) { mca_pml_ob1_send_request_schedule(sendreq); @@ -1044,7 +1044,7 @@ mca_pml_ob1_send_request_schedule_once(mca_pml_ob1_send_request_t* sendreq) range->range_btls[btl_idx].length -= size; range->range_send_length -= size; range->range_send_offset += size; - OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, 1); + OPAL_THREAD_ADD_FETCH32(&sendreq->req_pipeline_depth, 1); if(range->range_send_length == 0) { range = get_next_send_range(sendreq, range); prev_bytes_remaining = 0; @@ -1060,7 +1060,7 @@ mca_pml_ob1_send_request_schedule_once(mca_pml_ob1_send_request_t* sendreq) range->range_btls[btl_idx].length -= size; range->range_send_length -= size; range->range_send_offset += size; - OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, 1); + OPAL_THREAD_ADD_FETCH32(&sendreq->req_pipeline_depth, 1); if(range->range_send_length == 0) { range = get_next_send_range(sendreq, range); prev_bytes_remaining = 0; @@ -1126,7 +1126,7 @@ static void mca_pml_ob1_put_completion (mca_btl_base_module_t* btl, struct mca_b 0, 0); /* check for request completion */ - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length); + OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length); send_request_pml_complete_check(sendreq); } else { @@ -1200,7 +1200,7 @@ void mca_pml_ob1_send_request_put( mca_pml_ob1_send_request_t* sendreq, mca_pml_ob1_rdma_frag_t* frag; if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_TYPE_ACK) { - OPAL_THREAD_ADD32(&sendreq->req_state, -1); + OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1); } sendreq->req_recv.pval = hdr->hdr_recv_req.pval; diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.h b/ompi/mca/pml/ob1/pml_ob1_sendreq.h index 5cb21f6aba6..be36c3f2ac4 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.h @@ -76,12 +76,12 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_range_t); static inline bool lock_send_request(mca_pml_ob1_send_request_t *sendreq) { - return OPAL_THREAD_ADD32(&sendreq->req_lock, 1) == 1; + return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, 1) == 1; } static inline bool unlock_send_request(mca_pml_ob1_send_request_t *sendreq) { - return OPAL_THREAD_ADD32(&sendreq->req_lock, -1) == 0; + return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, -1) == 0; } static inline void @@ -485,7 +485,7 @@ mca_pml_ob1_send_request_start( mca_pml_ob1_send_request_t* sendreq ) return OMPI_ERR_UNREACH; } - seqn = OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1); + seqn = OPAL_THREAD_ADD_FETCH32(&ob1_proc->send_sequence, 1); return mca_pml_ob1_send_request_start_seq (sendreq, endpoint, seqn); } diff --git a/opal/class/opal_list.c b/opal/class/opal_list.c index e0a5112c38a..dd0f654fd82 100644 --- a/opal/class/opal_list.c +++ b/opal/class/opal_list.c @@ -144,7 +144,7 @@ bool opal_list_insert(opal_list_t *list, opal_list_item_t *item, long long idx) /* Spot check: ensure this item is only on the list that we just insertted it into */ - (void)opal_atomic_add( &(item->opal_list_item_refcount), 1 ); + (void)opal_atomic_add_fetch( &(item->opal_list_item_refcount), 1 ); assert(1 == item->opal_list_item_refcount); item->opal_list_item_belong_to = list; #endif diff --git a/opal/class/opal_list.h b/opal/class/opal_list.h index cafc96dfb78..5edd6730d54 100644 --- a/opal/class/opal_list.h +++ b/opal/class/opal_list.h @@ -509,7 +509,7 @@ static inline opal_list_item_t *opal_list_remove_item #if OPAL_ENABLE_DEBUG /* Spot check: ensure that this item is still only on one list */ - OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), -1 ); + OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), -1 ); assert(0 == item->opal_list_item_refcount); item->opal_list_item_belong_to = NULL; #endif @@ -575,7 +575,7 @@ static inline void _opal_list_append(opal_list_t *list, opal_list_item_t *item /* Spot check: ensure this item is only on the list that we just appended it to */ - OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), 1 ); + OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), 1 ); assert(1 == item->opal_list_item_refcount); item->opal_list_item_belong_to = list; #endif @@ -625,7 +625,7 @@ static inline void opal_list_prepend(opal_list_t *list, /* Spot check: ensure this item is only on the list that we just prepended it to */ - OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), 1 ); + OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), 1 ); assert(1 == item->opal_list_item_refcount); item->opal_list_item_belong_to = list; #endif @@ -686,7 +686,7 @@ static inline opal_list_item_t *opal_list_remove_first(opal_list_t *list) /* Spot check: ensure that the item we're returning is now on no lists */ - OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), -1 ); + OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), -1 ); assert(0 == item->opal_list_item_refcount); #endif @@ -746,7 +746,7 @@ static inline opal_list_item_t *opal_list_remove_last(opal_list_t *list) /* Spot check: ensure that the item we're returning is now on no lists */ - OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), -1 ); + OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), -1 ); assert(0 == item->opal_list_item_refcount); item->opal_list_item_belong_to = NULL; #endif @@ -789,7 +789,7 @@ static inline void opal_list_insert_pos(opal_list_t *list, opal_list_item_t *pos /* Spot check: double check that this item is only on the list that we just added it to */ - OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), 1 ); + OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), 1 ); assert(1 == item->opal_list_item_refcount); item->opal_list_item_belong_to = list; #endif diff --git a/opal/class/opal_object.h b/opal/class/opal_object.h index 8539f2bf872..4e2da95c204 100644 --- a/opal/class/opal_object.h +++ b/opal/class/opal_object.h @@ -510,7 +510,7 @@ static inline opal_object_t *opal_obj_new(opal_class_t * cls) static inline int opal_obj_update(opal_object_t *object, int inc) __opal_attribute_always_inline__; static inline int opal_obj_update(opal_object_t *object, int inc) { - return OPAL_THREAD_ADD32(&object->obj_reference_count, inc); + return OPAL_THREAD_ADD_FETCH32(&object->obj_reference_count, inc); } END_C_DECLS diff --git a/opal/class/opal_tree.c b/opal/class/opal_tree.c index fdd41ea20a1..d56813f1dd3 100644 --- a/opal/class/opal_tree.c +++ b/opal/class/opal_tree.c @@ -210,7 +210,7 @@ void opal_tree_add_child(opal_tree_item_t *parent_item, /* Spot check: ensure this item is only on the list that we just appended it to */ - OPAL_THREAD_ADD32( &(new_item->opal_tree_item_refcount), 1 ); + OPAL_THREAD_ADD_FETCH32( &(new_item->opal_tree_item_refcount), 1 ); assert(1 == new_item->opal_tree_item_refcount); new_item->opal_tree_item_belong_to = new_item->opal_tree_container; #endif diff --git a/opal/include/opal/sys/arm/atomic.h b/opal/include/opal/sys/arm/atomic.h index 89e84c653c0..94576b6ddcc 100644 --- a/opal/include/opal/sys/arm/atomic.h +++ b/opal/include/opal/sys/arm/atomic.h @@ -209,7 +209,7 @@ static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t #define OPAL_HAVE_ATOMIC_ADD_32 1 -static inline int32_t opal_atomic_add_32(volatile int32_t* v, int inc) +static inline int32_t opal_atomic_add_fetch_32(volatile int32_t* v, int inc) { int32_t t; int tmp; @@ -230,7 +230,7 @@ static inline int32_t opal_atomic_add_32(volatile int32_t* v, int inc) } #define OPAL_HAVE_ATOMIC_SUB_32 1 -static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int dec) +static inline int32_t opal_atomic_sub_fetch_32(volatile int32_t* v, int dec) { int32_t t; int tmp; diff --git a/opal/include/opal/sys/arm64/atomic.h b/opal/include/opal/sys/arm64/atomic.h index 6ef7776ea6f..8cc9313e14f 100644 --- a/opal/include/opal/sys/arm64/atomic.h +++ b/opal/include/opal/sys/arm64/atomic.h @@ -293,7 +293,7 @@ static inline int opal_atomic_sc_64 (volatile int64_t *addr, int64_t newval) } #define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \ - static inline type opal_atomic_ ## name ## _ ## bits (volatile type *addr, type value) \ + static inline type opal_atomic_ ## name ## _fetch_ ## bits (volatile type *addr, type value) \ { \ type newval; \ int32_t tmp; \ diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index 3a2a05a2277..2a64308f819 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -405,22 +405,22 @@ bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t #if OPAL_HAVE_INLINE_ATOMIC_ADD_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) static inline #endif -int32_t opal_atomic_add_32(volatile int32_t *addr, int delta); +int32_t opal_atomic_add_fetch_32(volatile int32_t *addr, int delta); #if OPAL_HAVE_INLINE_ATOMIC_AND_32 || (!defined(OPAL_HAVE_ATOMIC_AND_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) static inline #endif -int32_t opal_atomic_and_32(volatile int32_t *addr, int32_t value); +int32_t opal_atomic_and_fetch_32(volatile int32_t *addr, int32_t value); #if OPAL_HAVE_INLINE_ATOMIC_OR_32 || (!defined(OPAL_HAVE_ATOMIC_OR_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) static inline #endif -int32_t opal_atomic_or_32(volatile int32_t *addr, int32_t value); +int32_t opal_atomic_or_fetch_32(volatile int32_t *addr, int32_t value); #if OPAL_HAVE_INLINE_ATOMIC_XOR_32 || (!defined(OPAL_HAVE_ATOMIC_XOR_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) static inline #endif -int32_t opal_atomic_xor_32(volatile int32_t *addr, int32_t value); +int32_t opal_atomic_xor_fetch_32(volatile int32_t *addr, int32_t value); /* OPAL_HAVE_INLINE_ATOMIC_*_32 will be 1 if /atomic.h provides a static inline version of it (in assembly). If we have to fall @@ -428,7 +428,7 @@ int32_t opal_atomic_xor_32(volatile int32_t *addr, int32_t value); #if OPAL_HAVE_INLINE_ATOMIC_SUB_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) static inline #endif -int32_t opal_atomic_sub_32(volatile int32_t *addr, int delta); +int32_t opal_atomic_sub_fetch_32(volatile int32_t *addr, int delta); #endif /* OPAL_HAVE_ATOMIC_MATH_32 */ @@ -451,22 +451,22 @@ int32_t opal_atomic_sub_32(volatile int32_t *addr, int delta); #if OPAL_HAVE_INLINE_ATOMIC_ADD_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) static inline #endif -int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta); +int64_t opal_atomic_add_fetch_64(volatile int64_t *addr, int64_t delta); #if OPAL_HAVE_INLINE_ATOMIC_AND_64 || (!defined(OPAL_HAVE_ATOMIC_AND_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) static inline #endif -int64_t opal_atomic_and_64(volatile int64_t *addr, int64_t value); +int64_t opal_atomic_and_fetch_64(volatile int64_t *addr, int64_t value); #if OPAL_HAVE_INLINE_ATOMIC_OR_64 || (!defined(OPAL_HAVE_ATOMIC_OR_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) static inline #endif -int64_t opal_atomic_or_64(volatile int64_t *addr, int64_t value); +int64_t opal_atomic_or_fetch_64(volatile int64_t *addr, int64_t value); #if OPAL_HAVE_INLINE_ATOMIC_XOR_64 || (!defined(OPAL_HAVE_ATOMIC_XOR_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) static inline #endif -int64_t opal_atomic_xor_64(volatile int64_t *addr, int64_t value); +int64_t opal_atomic_xor_fetch_64(volatile int64_t *addr, int64_t value); /* OPAL_HAVE_INLINE_ATOMIC_*_64 will be 1 if /atomic.h provides a static inline version of it (in assembly). If we have to fall @@ -474,7 +474,7 @@ int64_t opal_atomic_xor_64(volatile int64_t *addr, int64_t value); #if OPAL_HAVE_INLINE_ATOMIC_SUB_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) static inline #endif -int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta); +int64_t opal_atomic_sub_fetch_64(volatile int64_t *addr, int64_t delta); #endif /* OPAL_HAVE_ATOMIC_MATH_32 */ @@ -491,34 +491,34 @@ int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta); */ #if defined(DOXYGEN) || OPAL_ENABLE_DEBUG static inline size_t -opal_atomic_add_size_t(volatile size_t *addr, size_t delta) +opal_atomic_add_fetch_size_t(volatile size_t *addr, size_t delta) { #if SIZEOF_SIZE_T == 4 - return (size_t) opal_atomic_add_32((int32_t*) addr, delta); + return (size_t) opal_atomic_add_fetch_32((int32_t*) addr, delta); #elif SIZEOF_SIZE_T == 8 - return (size_t) opal_atomic_add_64((int64_t*) addr, delta); + return (size_t) opal_atomic_add_fetch_64((int64_t*) addr, delta); #else #error "Unknown size_t size" #endif } static inline size_t -opal_atomic_sub_size_t(volatile size_t *addr, size_t delta) +opal_atomic_sub_fetch_size_t(volatile size_t *addr, size_t delta) { #if SIZEOF_SIZE_T == 4 - return (size_t) opal_atomic_sub_32((int32_t*) addr, delta); + return (size_t) opal_atomic_sub_fetch_32((int32_t*) addr, delta); #elif SIZEOF_SIZE_T == 8 - return (size_t) opal_atomic_sub_64((int64_t*) addr, delta); + return (size_t) opal_atomic_sub_fetch_64((int64_t*) addr, delta); #else #error "Unknown size_t size" #endif } #else #if SIZEOF_SIZE_T == 4 -#define opal_atomic_add_size_t(addr, delta) ((size_t) opal_atomic_add_32((int32_t*) addr, delta)) -#define opal_atomic_sub_size_t(addr, delta) ((size_t) opal_atomic_sub_32((int32_t*) addr, delta)) +#define opal_atomic_add_fetch_size_t(addr, delta) ((size_t) opal_atomic_add_fetch_32((int32_t*) addr, delta)) +#define opal_atomic_sub_fetch_size_t(addr, delta) ((size_t) opal_atomic_sub_fetch_32((int32_t*) addr, delta)) #elif SIZEOF_SIZE_T ==8 -#define opal_atomic_add_size_t(addr, delta) ((size_t) opal_atomic_add_64((int64_t*) addr, delta)) -#define opal_atomic_sub_size_t(addr, delta) ((size_t) opal_atomic_sub_64((int64_t*) addr, delta)) +#define opal_atomic_add_fetch_size_t(addr, delta) ((size_t) opal_atomic_add_fetch_64((int64_t*) addr, delta)) +#define opal_atomic_sub_fetch_size_t(addr, delta) ((size_t) opal_atomic_sub_fetch_64((int64_t*) addr, delta)) #else #error "Unknown size_t size" #endif @@ -599,16 +599,16 @@ static inline bool opal_atomic_compare_exchange_strong_rel_ptr (volatile void* a #if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64) -static inline void opal_atomic_add_xx(volatile void* addr, +static inline void opal_atomic_add_fetch_xx(volatile void* addr, int32_t value, size_t length); -static inline void opal_atomic_sub_xx(volatile void* addr, +static inline void opal_atomic_sub_fetch_xx(volatile void* addr, int32_t value, size_t length); #if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 -static inline int32_t opal_atomic_add_ptr( volatile void* addr, void* delta ); -static inline int32_t opal_atomic_sub_ptr( volatile void* addr, void* delta ); +static inline int32_t opal_atomic_add_fetch_ptr( volatile void* addr, void* delta ); +static inline int32_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delta ); #elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 -static inline int64_t opal_atomic_add_ptr( volatile void* addr, void* delta ); -static inline int64_t opal_atomic_sub_ptr( volatile void* addr, void* delta ); +static inline int64_t opal_atomic_add_fetch_ptr( volatile void* addr, void* delta ); +static inline int64_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delta ); #else #error Atomic arithmetic on pointers not supported #endif @@ -623,8 +623,8 @@ static inline int64_t opal_atomic_sub_ptr( volatile void* addr, void* delta ); * @param addr Address of * @param delta Value to add (converted to ). */ -#define opal_atomic_add( ADDR, VALUE ) \ - opal_atomic_add_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \ +#define opal_atomic_add_fetch( ADDR, VALUE ) \ + opal_atomic_add_fetch_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \ sizeof(*(ADDR)) ) /** @@ -637,8 +637,8 @@ static inline int64_t opal_atomic_sub_ptr( volatile void* addr, void* delta ); * @param addr Address of * @param delta Value to substract (converted to ). */ -#define opal_atomic_sub( ADDR, VALUE ) \ - opal_atomic_sub_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \ +#define opal_atomic_sub_fetch( ADDR, VALUE ) \ + opal_atomic_sub_fetch_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \ sizeof(*(ADDR)) ) #endif /* OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64 */ diff --git a/opal/include/opal/sys/atomic_impl.h b/opal/include/opal/sys/atomic_impl.h index 576fc8ed2e5..8c54a62b627 100644 --- a/opal/include/opal/sys/atomic_impl.h +++ b/opal/include/opal/sys/atomic_impl.h @@ -40,7 +40,7 @@ #if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 #define OPAL_ATOMIC_DEFINE_CMPXCG_OP(type, bits, operand, name) \ - static inline type opal_atomic_ ## name ## _ ## bits (volatile type *addr, type value) \ + static inline type opal_atomic_ ## name ## _fetch_ ## bits (volatile type *addr, type value) \ { \ type oldval, newval; \ do { \ @@ -266,18 +266,18 @@ OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_rel_) static inline void -opal_atomic_add_xx(volatile void* addr, int32_t value, size_t length) +opal_atomic_add_fetch_xx(volatile void* addr, int32_t value, size_t length) { switch( length ) { #if OPAL_HAVE_ATOMIC_ADD_32 case 4: - opal_atomic_add_32( (volatile int32_t*)addr, (int32_t)value ); + opal_atomic_add_fetch_32( (volatile int32_t*)addr, (int32_t)value ); break; #endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */ #if OPAL_HAVE_ATOMIC_ADD_64 case 8: - opal_atomic_add_64( (volatile int64_t*)addr, (int64_t)value ); + opal_atomic_add_fetch_64( (volatile int64_t*)addr, (int64_t)value ); break; #endif /* OPAL_HAVE_ATOMIC_ADD_64 */ default: @@ -289,18 +289,18 @@ opal_atomic_add_xx(volatile void* addr, int32_t value, size_t length) static inline void -opal_atomic_sub_xx(volatile void* addr, int32_t value, size_t length) +opal_atomic_sub_fetch_xx(volatile void* addr, int32_t value, size_t length) { switch( length ) { #if OPAL_HAVE_ATOMIC_SUB_32 case 4: - opal_atomic_sub_32( (volatile int32_t*)addr, (int32_t)value ); + opal_atomic_sub_fetch_32( (volatile int32_t*)addr, (int32_t)value ); break; #endif /* OPAL_HAVE_ATOMIC_SUB_32 */ #if OPAL_HAVE_ATOMIC_SUB_64 case 8: - opal_atomic_sub_64( (volatile int64_t*)addr, (int64_t)value ); + opal_atomic_sub_fetch_64( (volatile int64_t*)addr, (int64_t)value ); break; #endif /* OPAL_HAVE_ATOMIC_SUB_64 */ default: @@ -311,19 +311,19 @@ opal_atomic_sub_xx(volatile void* addr, int32_t value, size_t length) } #if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32 -static inline int32_t opal_atomic_add_ptr( volatile void* addr, +static inline int32_t opal_atomic_add_fetch_ptr( volatile void* addr, void* delta ) { - return opal_atomic_add_32((int32_t*) addr, (unsigned long) delta); + return opal_atomic_add_fetch_32((int32_t*) addr, (unsigned long) delta); } #elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_ADD_64 -static inline int64_t opal_atomic_add_ptr( volatile void* addr, +static inline int64_t opal_atomic_add_fetch_ptr( volatile void* addr, void* delta ) { - return opal_atomic_add_64((int64_t*) addr, (unsigned long) delta); + return opal_atomic_add_fetch_64((int64_t*) addr, (unsigned long) delta); } #else -static inline int32_t opal_atomic_add_ptr( volatile void* addr, +static inline int32_t opal_atomic_add_fetch_ptr( volatile void* addr, void* delta ) { abort(); @@ -332,19 +332,19 @@ static inline int32_t opal_atomic_add_ptr( volatile void* addr, #endif #if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32 -static inline int32_t opal_atomic_sub_ptr( volatile void* addr, +static inline int32_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delta ) { - return opal_atomic_sub_32((int32_t*) addr, (unsigned long) delta); + return opal_atomic_sub_fetch_32((int32_t*) addr, (unsigned long) delta); } #elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SUB_32 -static inline int64_t opal_atomic_sub_ptr( volatile void* addr, +static inline int64_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delta ) { - return opal_atomic_sub_64((int64_t*) addr, (unsigned long) delta); + return opal_atomic_sub_fetch_64((int64_t*) addr, (unsigned long) delta); } #else -static inline int32_t opal_atomic_sub_ptr( volatile void* addr, +static inline int32_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delta ) { abort(); diff --git a/opal/include/opal/sys/gcc_builtin/atomic.h b/opal/include/opal/sys/gcc_builtin/atomic.h index 6521a50ed98..ecef65f3d07 100644 --- a/opal/include/opal/sys/gcc_builtin/atomic.h +++ b/opal/include/opal/sys/gcc_builtin/atomic.h @@ -104,27 +104,27 @@ static inline int32_t opal_atomic_swap_32 (volatile int32_t *addr, int32_t newva return oldval; } -static inline int32_t opal_atomic_add_32(volatile int32_t *addr, int32_t delta) +static inline int32_t opal_atomic_add_fetch_32(volatile int32_t *addr, int32_t delta) { return __atomic_add_fetch (addr, delta, __ATOMIC_RELAXED); } -static inline int32_t opal_atomic_and_32(volatile int32_t *addr, int32_t value) +static inline int32_t opal_atomic_and_fetch_32(volatile int32_t *addr, int32_t value) { return __atomic_and_fetch (addr, value, __ATOMIC_RELAXED); } -static inline int32_t opal_atomic_or_32(volatile int32_t *addr, int32_t value) +static inline int32_t opal_atomic_or_fetch_32(volatile int32_t *addr, int32_t value) { return __atomic_or_fetch (addr, value, __ATOMIC_RELAXED); } -static inline int32_t opal_atomic_xor_32(volatile int32_t *addr, int32_t value) +static inline int32_t opal_atomic_xor_fetch_32(volatile int32_t *addr, int32_t value) { return __atomic_xor_fetch (addr, value, __ATOMIC_RELAXED); } -static inline int32_t opal_atomic_sub_32(volatile int32_t *addr, int32_t delta) +static inline int32_t opal_atomic_sub_fetch_32(volatile int32_t *addr, int32_t delta) { return __atomic_sub_fetch (addr, delta, __ATOMIC_RELAXED); } @@ -152,27 +152,27 @@ static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newva return oldval; } -static inline int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta) +static inline int64_t opal_atomic_add_fetch_64(volatile int64_t *addr, int64_t delta) { return __atomic_add_fetch (addr, delta, __ATOMIC_RELAXED); } -static inline int64_t opal_atomic_and_64(volatile int64_t *addr, int64_t value) +static inline int64_t opal_atomic_and_fetch_64(volatile int64_t *addr, int64_t value) { return __atomic_and_fetch (addr, value, __ATOMIC_RELAXED); } -static inline int64_t opal_atomic_or_64(volatile int64_t *addr, int64_t value) +static inline int64_t opal_atomic_or_fetch_64(volatile int64_t *addr, int64_t value) { return __atomic_or_fetch (addr, value, __ATOMIC_RELAXED); } -static inline int64_t opal_atomic_xor_64(volatile int64_t *addr, int64_t value) +static inline int64_t opal_atomic_xor_fetch_64(volatile int64_t *addr, int64_t value) { return __atomic_xor_fetch (addr, value, __ATOMIC_RELAXED); } -static inline int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta) +static inline int64_t opal_atomic_sub_fetch_64(volatile int64_t *addr, int64_t delta) { return __atomic_sub_fetch (addr, delta, __ATOMIC_RELAXED); } diff --git a/opal/include/opal/sys/ia32/atomic.h b/opal/include/opal/sys/ia32/atomic.h index 009256f3cce..6b71dd4f2e3 100644 --- a/opal/include/opal/sys/ia32/atomic.h +++ b/opal/include/opal/sys/ia32/atomic.h @@ -130,7 +130,7 @@ static inline int32_t opal_atomic_swap_32( volatile int32_t *addr, * * Atomically adds @i to @v. */ -static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i) +static inline int32_t opal_atomic_add_fetch_32(volatile int32_t* v, int i) { int ret = i; __asm__ __volatile__( @@ -150,7 +150,7 @@ static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i) * * Atomically subtracts @i from @v. */ -static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i) +static inline int32_t opal_atomic_sub_fetch_32(volatile int32_t* v, int i) { int ret = -i; __asm__ __volatile__( diff --git a/opal/include/opal/sys/powerpc/atomic.h b/opal/include/opal/sys/powerpc/atomic.h index 31cf96b1f7c..4aebb43fdd9 100644 --- a/opal/include/opal/sys/powerpc/atomic.h +++ b/opal/include/opal/sys/powerpc/atomic.h @@ -235,7 +235,7 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval #if OPAL_GCC_INLINE_ASSEMBLY #define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(type, instr) \ -static inline int64_t opal_atomic_ ## type ## _64(volatile int64_t* v, int64_t val) \ +static inline int64_t opal_atomic_ ## type ## _fetch_64(volatile int64_t* v, int64_t val) \ { \ int64_t t; \ \ @@ -396,7 +396,7 @@ static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t #define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(type, instr) \ -static inline int32_t opal_atomic_ ## type ## _32(volatile int32_t* v, int val) \ +static inline int32_t opal_atomic_ ## type ## _fetch_32(volatile int32_t* v, int val) \ { \ int32_t t; \ \ diff --git a/opal/include/opal/sys/sync_builtin/atomic.h b/opal/include/opal/sys/sync_builtin/atomic.h index f8557a69451..f80a29684c4 100644 --- a/opal/include/opal/sys/sync_builtin/atomic.h +++ b/opal/include/opal/sys/sync_builtin/atomic.h @@ -69,31 +69,31 @@ static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *add #define OPAL_HAVE_ATOMIC_MATH_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 -static inline int32_t opal_atomic_add_32(volatile int32_t *addr, int32_t delta) +static inline int32_t opal_atomic_add_fetch_32(volatile int32_t *addr, int32_t delta) { return __sync_add_and_fetch(addr, delta); } #define OPAL_HAVE_ATOMIC_AND_32 1 -static inline int32_t opal_atomic_and_32(volatile int32_t *addr, int32_t value) +static inline int32_t opal_atomic_and_fetch_32(volatile int32_t *addr, int32_t value) { return __sync_and_and_fetch(addr, value); } #define OPAL_HAVE_ATOMIC_OR_32 1 -static inline int32_t opal_atomic_or_32(volatile int32_t *addr, int32_t value) +static inline int32_t opal_atomic_or_fetch_32(volatile int32_t *addr, int32_t value) { return __sync_or_and_fetch(addr, value); } #define OPAL_HAVE_ATOMIC_XOR_32 1 -static inline int32_t opal_atomic_xor_32(volatile int32_t *addr, int32_t value) +static inline int32_t opal_atomic_xor_fetch_32(volatile int32_t *addr, int32_t value) { return __sync_xor_and_fetch(addr, value); } #define OPAL_HAVE_ATOMIC_SUB_32 1 -static inline int32_t opal_atomic_sub_32(volatile int32_t *addr, int32_t delta) +static inline int32_t opal_atomic_sub_fetch_32(volatile int32_t *addr, int32_t delta) { return __sync_sub_and_fetch(addr, delta); } @@ -115,31 +115,31 @@ static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *add #define OPAL_HAVE_ATOMIC_MATH_64 1 #define OPAL_HAVE_ATOMIC_ADD_64 1 -static inline int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta) +static inline int64_t opal_atomic_add_fetch_64(volatile int64_t *addr, int64_t delta) { return __sync_add_and_fetch(addr, delta); } #define OPAL_HAVE_ATOMIC_AND_64 1 -static inline int64_t opal_atomic_and_64(volatile int64_t *addr, int64_t value) +static inline int64_t opal_atomic_and_fetch_64(volatile int64_t *addr, int64_t value) { return __sync_and_and_fetch(addr, value); } #define OPAL_HAVE_ATOMIC_OR_64 1 -static inline int64_t opal_atomic_or_64(volatile int64_t *addr, int64_t value) +static inline int64_t opal_atomic_or_fetch_64(volatile int64_t *addr, int64_t value) { return __sync_or_and_fetch(addr, value); } #define OPAL_HAVE_ATOMIC_XOR_64 1 -static inline int64_t opal_atomic_xor_64(volatile int64_t *addr, int64_t value) +static inline int64_t opal_atomic_xor_fetch_64(volatile int64_t *addr, int64_t value) { return __sync_xor_and_fetch(addr, value); } #define OPAL_HAVE_ATOMIC_SUB_64 1 -static inline int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta) +static inline int64_t opal_atomic_sub_fetch_64(volatile int64_t *addr, int64_t delta) { return __sync_sub_and_fetch(addr, delta); } diff --git a/opal/include/opal/sys/x86_64/atomic.h b/opal/include/opal/sys/x86_64/atomic.h index a9d881f6552..046b4ad7d25 100644 --- a/opal/include/opal/sys/x86_64/atomic.h +++ b/opal/include/opal/sys/x86_64/atomic.h @@ -196,7 +196,7 @@ static inline int64_t opal_atomic_swap_64( volatile int64_t *addr, * * Atomically adds @i to @v. */ -static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i) +static inline int32_t opal_atomic_add_fetch_32(volatile int32_t* v, int i) { int ret = i; __asm__ __volatile__( @@ -217,7 +217,7 @@ static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i) * * Atomically adds @i to @v. */ -static inline int64_t opal_atomic_add_64(volatile int64_t* v, int64_t i) +static inline int64_t opal_atomic_add_fetch_64(volatile int64_t* v, int64_t i) { int64_t ret = i; __asm__ __volatile__( @@ -238,7 +238,7 @@ static inline int64_t opal_atomic_add_64(volatile int64_t* v, int64_t i) * * Atomically subtracts @i from @v. */ -static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i) +static inline int32_t opal_atomic_sub_fetch_32(volatile int32_t* v, int i) { int ret = -i; __asm__ __volatile__( @@ -259,7 +259,7 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i) * * Atomically subtracts @i from @v. */ -static inline int64_t opal_atomic_sub_64(volatile int64_t* v, int64_t i) +static inline int64_t opal_atomic_sub_fetch_64(volatile int64_t* v, int64_t i) { int64_t ret = -i; __asm__ __volatile__( diff --git a/opal/mca/btl/openib/btl_openib.c b/opal/mca/btl/openib/btl_openib.c index 8ace1e9ad13..dc279df8347 100644 --- a/opal/mca/btl/openib/btl_openib.c +++ b/opal/mca/btl/openib/btl_openib.c @@ -1119,7 +1119,7 @@ int mca_btl_openib_add_procs( } if (nprocs_new) { - opal_atomic_add_32 (&openib_btl->num_peers, nprocs_new); + opal_atomic_add_fetch_32 (&openib_btl->num_peers, nprocs_new); /* adjust cq sizes given the new procs */ rc = openib_btl_size_queues (openib_btl); @@ -1229,7 +1229,7 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul /* this is a new process to this openib btl * account this procs if need */ - opal_atomic_add_32 (&openib_btl->num_peers, 1); + opal_atomic_add_fetch_32 (&openib_btl->num_peers, 1); rc = openib_btl_size_queues(openib_btl); if (OPAL_SUCCESS != rc) { BTL_ERROR(("error creating cqs")); diff --git a/opal/mca/btl/openib/btl_openib_async.c b/opal/mca/btl/openib/btl_openib_async.c index 3662624292e..5c52f9566b1 100644 --- a/opal/mca/btl/openib/btl_openib_async.c +++ b/opal/mca/btl/openib/btl_openib_async.c @@ -237,7 +237,7 @@ static void btl_openib_async_device (int fd, short flags, void *arg) /* Set the flag to fatal */ device->got_fatal_event = true; /* It is not critical to protect the counter */ - OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1); + OPAL_THREAD_ADD_FETCH32(&mca_btl_openib_component.error_counter, 1); /* fall through */ case IBV_EVENT_CQ_ERR: case IBV_EVENT_QP_FATAL: @@ -280,7 +280,7 @@ static void btl_openib_async_device (int fd, short flags, void *arg) openib_event_to_str((enum ibv_event_type)event_type)); /* Set the flag to indicate port error */ device->got_port_event = true; - OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1); + OPAL_THREAD_ADD_FETCH32(&mca_btl_openib_component.error_counter, 1); break; case IBV_EVENT_COMM_EST: case IBV_EVENT_PORT_ACTIVE: @@ -470,7 +470,7 @@ void mca_btl_openib_async_fini (void) void mca_btl_openib_async_add_device (mca_btl_openib_device_t *device) { if (mca_btl_openib_component.async_evbase) { - if (1 == OPAL_THREAD_ADD32 (&btl_openib_async_device_count, 1)) { + if (1 == OPAL_THREAD_ADD_FETCH32 (&btl_openib_async_device_count, 1)) { mca_btl_openib_async_init (); } opal_event_set (mca_btl_openib_component.async_evbase, &device->async_event, @@ -484,7 +484,7 @@ void mca_btl_openib_async_rem_device (mca_btl_openib_device_t *device) { if (mca_btl_openib_component.async_evbase) { opal_event_del (&device->async_event); - if (0 == OPAL_THREAD_ADD32 (&btl_openib_async_device_count, -1)) { + if (0 == OPAL_THREAD_ADD_FETCH32 (&btl_openib_async_device_count, -1)) { mca_btl_openib_async_fini (); } } diff --git a/opal/mca/btl/openib/btl_openib_component.c b/opal/mca/btl/openib/btl_openib_component.c index f6d3643306e..a847ac7d18f 100644 --- a/opal/mca/btl/openib/btl_openib_component.c +++ b/opal/mca/btl/openib/btl_openib_component.c @@ -3203,7 +3203,7 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl, credits = hdr->credits; if(hdr->cm_seen) - OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen); + OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen); /* Now return fragment. Don't touch hdr after this point! */ if(MCA_BTL_OPENIB_RDMA_FRAG(frag)) { @@ -3215,7 +3215,7 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl, tf = MCA_BTL_OPENIB_GET_LOCAL_RDMA_FRAG(ep, erl->tail); if(MCA_BTL_OPENIB_RDMA_FRAG_LOCAL(tf)) break; - OPAL_THREAD_ADD32(&erl->credits, 1); + OPAL_THREAD_ADD_FETCH32(&erl->credits, 1); MCA_BTL_OPENIB_RDMA_NEXT_INDEX(erl->tail); } OPAL_THREAD_UNLOCK(&erl->lock); @@ -3233,14 +3233,14 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl, MCA_BTL_IB_FRAG_RETURN(frag); if (BTL_OPENIB_QP_TYPE_PP(rqp)) { if (OPAL_UNLIKELY(is_credit_msg)) { - OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.cm_received, 1); + OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.cm_received, 1); } else { - OPAL_THREAD_ADD32(&ep->qps[rqp].u.pp_qp.rd_posted, -1); + OPAL_THREAD_ADD_FETCH32(&ep->qps[rqp].u.pp_qp.rd_posted, -1); } mca_btl_openib_endpoint_post_rr(ep, cqp); } else { mca_btl_openib_module_t *btl = ep->endpoint_btl; - OPAL_THREAD_ADD32(&btl->qps[rqp].u.srq_qp.rd_posted, -1); + OPAL_THREAD_ADD_FETCH32(&btl->qps[rqp].u.srq_qp.rd_posted, -1); mca_btl_openib_post_srr(btl, rqp); } } @@ -3251,10 +3251,10 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl, /* If we got any credits (RDMA or send), then try to progress all the no_credits_pending_frags lists */ if (rcredits > 0) { - OPAL_THREAD_ADD32(&ep->eager_rdma_remote.tokens, rcredits); + OPAL_THREAD_ADD_FETCH32(&ep->eager_rdma_remote.tokens, rcredits); } if (credits > 0) { - OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.sd_credits, credits); + OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.sd_credits, credits); } if (rcredits + credits > 0) { int rc; @@ -3303,7 +3303,7 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl, credits = hdr->credits; if(hdr->cm_seen) - OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen); + OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen); /* We should not be here with eager, control, or credit messages */ assert(openib_frag_type(frag) != MCA_BTL_OPENIB_FRAG_EAGER_RDMA); @@ -3314,11 +3314,11 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl, /* Otherwise, FRAG_RETURN it and repost if necessary */ MCA_BTL_IB_FRAG_RETURN(frag); if (BTL_OPENIB_QP_TYPE_PP(rqp)) { - OPAL_THREAD_ADD32(&ep->qps[rqp].u.pp_qp.rd_posted, -1); + OPAL_THREAD_ADD_FETCH32(&ep->qps[rqp].u.pp_qp.rd_posted, -1); mca_btl_openib_endpoint_post_rr(ep, cqp); } else { mca_btl_openib_module_t *btl = ep->endpoint_btl; - OPAL_THREAD_ADD32(&btl->qps[rqp].u.srq_qp.rd_posted, -1); + OPAL_THREAD_ADD_FETCH32(&btl->qps[rqp].u.srq_qp.rd_posted, -1); mca_btl_openib_post_srr(btl, rqp); } @@ -3327,10 +3327,10 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl, /* If we got any credits (RDMA or send), then try to progress all the no_credits_pending_frags lists */ if (rcredits > 0) { - OPAL_THREAD_ADD32(&ep->eager_rdma_remote.tokens, rcredits); + OPAL_THREAD_ADD_FETCH32(&ep->eager_rdma_remote.tokens, rcredits); } if (credits > 0) { - OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.sd_credits, credits); + OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.sd_credits, credits); } if (rcredits + credits > 0) { int rc; @@ -3523,7 +3523,7 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq, case IBV_WC_FETCH_ADD: OPAL_OUTPUT((-1, "Got WC: RDMA_READ or RDMA_WRITE")); - OPAL_THREAD_ADD32(&endpoint->get_tokens, 1); + OPAL_THREAD_ADD_FETCH32(&endpoint->get_tokens, 1); mca_btl_openib_get_frag_t *get_frag = to_get_frag(des); @@ -3575,7 +3575,7 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq, n = qp_frag_to_wqe(endpoint, qp, to_com_frag(des)); if(IBV_WC_SEND == wc->opcode && !BTL_OPENIB_QP_TYPE_PP(qp)) { - OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1+n); + OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1+n); /* new SRQ credit available. Try to progress pending frags*/ progress_pending_frags_srq(openib_btl, qp); @@ -3601,7 +3601,7 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq, wc->byte_len < mca_btl_openib_component.eager_limit && openib_btl->eager_rdma_channels < mca_btl_openib_component.max_eager_rdma && - OPAL_THREAD_ADD32(&endpoint->eager_recv_count, 1) == + OPAL_THREAD_ADD_FETCH32(&endpoint->eager_recv_count, 1) == mca_btl_openib_component.eager_rdma_threshold) { mca_btl_openib_endpoint_connect_eager_rdma(endpoint); } @@ -3934,7 +3934,7 @@ int mca_btl_openib_post_srr(mca_btl_openib_module_t* openib_btl, const int qp) if(OPAL_LIKELY(0 == rc)) { struct ibv_srq_attr srq_attr; - OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.rd_posted, num_post); + OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.rd_posted, num_post); if(true == openib_btl->qps[qp].u.srq_qp.srq_limit_event_flag) { srq_attr.max_wr = openib_btl->qps[qp].u.srq_qp.rd_curr_num; diff --git a/opal/mca/btl/openib/btl_openib_eager_rdma.h b/opal/mca/btl/openib/btl_openib_eager_rdma.h index 0ba5a030d4c..5acb038177f 100644 --- a/opal/mca/btl/openib/btl_openib_eager_rdma.h +++ b/opal/mca/btl/openib/btl_openib_eager_rdma.h @@ -96,7 +96,7 @@ typedef struct mca_btl_openib_eager_rdma_remote_t mca_btl_openib_eager_rdma_remo #define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD, SEQ) \ do { \ - (SEQ) = OPAL_THREAD_ADD32(&(HEAD), 1) - 1; \ + (SEQ) = OPAL_THREAD_ADD_FETCH32(&(HEAD), 1) - 1; \ (OLD_HEAD) = (SEQ) % mca_btl_openib_component.eager_rdma_num; \ } while(0) @@ -108,7 +108,7 @@ typedef struct mca_btl_openib_eager_rdma_remote_t mca_btl_openib_eager_rdma_remo #define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD) \ do { \ - (OLD_HEAD) = (OPAL_THREAD_ADD32(&(HEAD), 1) - 1) % mca_btl_openib_component.eager_rdma_num; \ + (OLD_HEAD) = (OPAL_THREAD_ADD_FETCH32(&(HEAD), 1) - 1) % mca_btl_openib_component.eager_rdma_num; \ } while(0) #endif diff --git a/opal/mca/btl/openib/btl_openib_endpoint.c b/opal/mca/btl/openib/btl_openib_endpoint.c index 8700a204ebb..be01664b1c3 100644 --- a/opal/mca/btl/openib/btl_openib_endpoint.c +++ b/opal/mca/btl/openib/btl_openib_endpoint.c @@ -212,7 +212,7 @@ endpoint_init_qp_xrc(mca_btl_base_endpoint_t *ep, const int qp) qp_attr.cap.max_recv_sge = 1; /* we do not use SG list */ rc = ibv_modify_qp (ep_qp->qp->lcl_qp, &qp_attr, IBV_QP_CAP); if (0 == rc) { - opal_atomic_add_32 (&ep_qp->qp->sd_wqe, incr); + opal_atomic_add_fetch_32 (&ep_qp->qp->sd_wqe, incr); } } else { ep_qp->qp->sd_wqe = ep->ib_addr->max_wqe; @@ -767,9 +767,9 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint, if(OPAL_SUCCESS == acquire_eager_rdma_send_credit(endpoint)) { do_rdma = true; } else { - if(OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, 1) > + if(OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_sent, 1) > (mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv - 1)) { - OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1); + OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1); BTL_OPENIB_CREDITS_SEND_UNLOCK(endpoint, qp); return; } @@ -782,7 +782,7 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint, if(cm_return > 255) { frag->hdr->cm_seen = 255; cm_return -= 255; - OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return); + OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return); } else { frag->hdr->cm_seen = cm_return; } @@ -803,14 +803,14 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint, BTL_OPENIB_RDMA_CREDITS_HEADER_NTOH(*credits_hdr); } BTL_OPENIB_CREDITS_SEND_UNLOCK(endpoint, qp); - OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.rd_credits, + OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.rd_credits, frag->hdr->credits); - OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits, + OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_local.credits, credits_hdr->rdma_credits); if(do_rdma) - OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1); + OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, 1); else - OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1); + OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1); BTL_ERROR(("error posting send request errno %d says %s", rc, strerror(errno))); @@ -824,7 +824,7 @@ static void mca_btl_openib_endpoint_eager_rdma_connect_cb( int status) { mca_btl_openib_device_t *device = endpoint->endpoint_btl->device; - OPAL_THREAD_ADD32(&device->non_eager_rdma_endpoints, -1); + OPAL_THREAD_ADD_FETCH32(&device->non_eager_rdma_endpoints, -1); assert(device->non_eager_rdma_endpoints >= 0); MCA_BTL_IB_FRAG_RETURN(descriptor); } @@ -992,9 +992,9 @@ void mca_btl_openib_endpoint_connect_eager_rdma( p = &device->eager_rdma_buffers[device->eager_rdma_buffers_count]; } while(!opal_atomic_compare_exchange_strong_ptr (p, (void *) &_tmp_ptr, endpoint)); - OPAL_THREAD_ADD32(&openib_btl->eager_rdma_channels, 1); + OPAL_THREAD_ADD_FETCH32(&openib_btl->eager_rdma_channels, 1); /* from this point progress function starts to poll new buffer */ - OPAL_THREAD_ADD32(&device->eager_rdma_buffers_count, 1); + OPAL_THREAD_ADD_FETCH32(&device->eager_rdma_buffers_count, 1); return; } diff --git a/opal/mca/btl/openib/btl_openib_endpoint.h b/opal/mca/btl/openib/btl_openib_endpoint.h index 8ca62f65be1..89c42c595e5 100644 --- a/opal/mca/btl/openib/btl_openib_endpoint.h +++ b/opal/mca/btl/openib/btl_openib_endpoint.h @@ -277,19 +277,19 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_endpoint_t); static inline int32_t qp_get_wqe(mca_btl_openib_endpoint_t *ep, const int qp) { - return OPAL_THREAD_ADD32(&ep->qps[qp].qp->sd_wqe, -1); + return OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].qp->sd_wqe, -1); } static inline int32_t qp_put_wqe(mca_btl_openib_endpoint_t *ep, const int qp) { - return OPAL_THREAD_ADD32(&ep->qps[qp].qp->sd_wqe, 1); + return OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].qp->sd_wqe, 1); } static inline int32_t qp_inc_inflight_wqe(mca_btl_openib_endpoint_t *ep, const int qp, mca_btl_openib_com_frag_t *frag) { frag->n_wqes_inflight = 0; - return OPAL_THREAD_ADD32(&ep->qps[qp].qp->sd_wqe_inflight, 1); + return OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].qp->sd_wqe_inflight, 1); } static inline void qp_inflight_wqe_to_frag(mca_btl_openib_endpoint_t *ep, const int qp, mca_btl_openib_com_frag_t *frag) @@ -303,7 +303,7 @@ static inline int qp_frag_to_wqe(mca_btl_openib_endpoint_t *ep, const int qp, mc { int n; n = frag->n_wqes_inflight; - OPAL_THREAD_ADD32(&ep->qps[qp].qp->sd_wqe, n); + OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].qp->sd_wqe, n); frag->n_wqes_inflight = 0; return n; @@ -420,15 +420,15 @@ static inline int mca_btl_openib_endpoint_post_rr_nolock( if((rc = post_recvs(ep, qp, num_post)) != OPAL_SUCCESS) { return rc; } - OPAL_THREAD_ADD32(&ep->qps[qp].u.pp_qp.rd_posted, num_post); - OPAL_THREAD_ADD32(&ep->qps[qp].u.pp_qp.rd_credits, num_post); + OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].u.pp_qp.rd_posted, num_post); + OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].u.pp_qp.rd_credits, num_post); /* post buffers for credit management on credit management qp */ if((rc = post_recvs(ep, cqp, cm_received)) != OPAL_SUCCESS) { return rc; } - OPAL_THREAD_ADD32(&ep->qps[qp].u.pp_qp.cm_return, cm_received); - OPAL_THREAD_ADD32(&ep->qps[qp].u.pp_qp.cm_received, -cm_received); + OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].u.pp_qp.cm_return, cm_received); + OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].u.pp_qp.cm_received, -cm_received); assert(ep->qps[qp].u.pp_qp.rd_credits <= rd_num && ep->qps[qp].u.pp_qp.rd_credits >= 0); @@ -532,8 +532,8 @@ ib_send_flags(uint32_t size, mca_btl_openib_endpoint_qp_t *qp, int do_signal) static inline int acquire_eager_rdma_send_credit(mca_btl_openib_endpoint_t *endpoint) { - if(OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, -1) < 0) { - OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1); + if(OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, -1) < 0) { + OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, 1); return OPAL_ERR_OUT_OF_RESOURCE; } @@ -638,8 +638,8 @@ static inline int mca_btl_openib_endpoint_credit_acquire (struct mca_btl_base_en prio = !prio; if (BTL_OPENIB_QP_TYPE_PP(qp)) { - if (OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits, -1) < 0) { - OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1); + if (OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.sd_credits, -1) < 0) { + OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1); if (queue_frag) { opal_list_append(&endpoint->qps[qp].no_credits_pending_frags[prio], (opal_list_item_t *)frag); @@ -648,8 +648,8 @@ static inline int mca_btl_openib_endpoint_credit_acquire (struct mca_btl_base_en return OPAL_ERR_OUT_OF_RESOURCE; } } else { - if(OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, -1) < 0) { - OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1); + if(OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.sd_credits, -1) < 0) { + OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1); if (queue_frag) { OPAL_THREAD_LOCK(&openib_btl->ib_lock); opal_list_append(&openib_btl->qps[qp].u.srq_qp.pending_frags[prio], @@ -684,7 +684,7 @@ static inline int mca_btl_openib_endpoint_credit_acquire (struct mca_btl_base_en if(cm_return > 255) { hdr->cm_seen = 255; cm_return -= 255; - OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return); + OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return); } else { hdr->cm_seen = cm_return; } @@ -699,18 +699,18 @@ static inline void mca_btl_openib_endpoint_credit_release (struct mca_btl_base_e mca_btl_openib_header_t *hdr = frag->hdr; if (BTL_OPENIB_IS_RDMA_CREDITS(hdr->credits)) { - OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits, BTL_OPENIB_CREDITS(hdr->credits)); + OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_local.credits, BTL_OPENIB_CREDITS(hdr->credits)); } if (do_rdma) { - OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1); + OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, 1); } else { if(BTL_OPENIB_QP_TYPE_PP(qp)) { - OPAL_THREAD_ADD32 (&endpoint->qps[qp].u.pp_qp.rd_credits, hdr->credits); - OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1); + OPAL_THREAD_ADD_FETCH32 (&endpoint->qps[qp].u.pp_qp.rd_credits, hdr->credits); + OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1); } else if BTL_OPENIB_QP_TYPE_SRQ(qp){ mca_btl_openib_module_t *openib_btl = endpoint->endpoint_btl; - OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1); + OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1); } } } diff --git a/opal/mca/btl/openib/btl_openib_get.c b/opal/mca/btl/openib/btl_openib_get.c index c8bc78105db..6dc73bc6e4c 100644 --- a/opal/mca/btl/openib/btl_openib_get.c +++ b/opal/mca/btl/openib/btl_openib_get.c @@ -148,9 +148,9 @@ int mca_btl_openib_get_internal (mca_btl_base_module_t *btl, struct mca_btl_base } /* check for a get token */ - if (OPAL_THREAD_ADD32(&ep->get_tokens,-1) < 0) { + if (OPAL_THREAD_ADD_FETCH32(&ep->get_tokens,-1) < 0) { qp_put_wqe(ep, qp); - OPAL_THREAD_ADD32(&ep->get_tokens,1); + OPAL_THREAD_ADD_FETCH32(&ep->get_tokens,1); return OPAL_ERR_OUT_OF_RESOURCE; } @@ -159,7 +159,7 @@ int mca_btl_openib_get_internal (mca_btl_base_module_t *btl, struct mca_btl_base if (ibv_post_send(ep->qps[qp].qp->lcl_qp, &frag->sr_desc, &bad_wr)) { qp_put_wqe(ep, qp); - OPAL_THREAD_ADD32(&ep->get_tokens,1); + OPAL_THREAD_ADD_FETCH32(&ep->get_tokens,1); return OPAL_ERROR; } diff --git a/opal/mca/btl/portals4/btl_portals4.c b/opal/mca/btl/portals4/btl_portals4.c index b4504d502ce..74cc3af7c41 100644 --- a/opal/mca/btl/portals4/btl_portals4.c +++ b/opal/mca/btl/portals4/btl_portals4.c @@ -423,7 +423,7 @@ mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base, curr_proc, &btl_peer_data[i]); - OPAL_THREAD_ADD32(&portals4_btl->portals_num_procs, 1); + OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_num_procs, 1); /* and here we can reach */ opal_bitmap_set_bit(reachable, i); @@ -476,7 +476,7 @@ mca_btl_portals4_del_procs(struct mca_btl_base_module_t *btl, portals4 entry in proc_endpoints instead of the peer_data */ for (i = 0 ; i < nprocs ; ++i) { free(btl_peer_data[i]); - OPAL_THREAD_ADD32(&portals4_btl->portals_num_procs, -1); + OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_num_procs, -1); } if (0 == portals4_btl->portals_num_procs) @@ -537,7 +537,7 @@ mca_btl_portals4_free(struct mca_btl_base_module_t* btl_base, if (frag->me_h != PTL_INVALID_HANDLE) { frag->me_h = PTL_INVALID_HANDLE; } - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); + OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1); OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "mca_btl_portals4_free: Decrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops)); OPAL_BTL_PORTALS4_FRAG_RETURN_USER(portals4_btl, frag); @@ -622,7 +622,7 @@ mca_btl_portals4_register_mem(mca_btl_base_module_t *btl_base, return NULL; } - handle->key = OPAL_THREAD_ADD64(&(portals4_btl->portals_rdma_key), 1); + handle->key = OPAL_THREAD_ADD_FETCH64(&(portals4_btl->portals_rdma_key), 1); handle->remote_offset = 0; OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, @@ -662,7 +662,7 @@ mca_btl_portals4_register_mem(mca_btl_base_module_t *btl_base, opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlMEAppend failed: %d\n", __FILE__, __LINE__, ret); - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); + OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1); return NULL; } OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, diff --git a/opal/mca/btl/portals4/btl_portals4_component.c b/opal/mca/btl/portals4/btl_portals4_component.c index eda9cd81f70..a56236d3e9f 100644 --- a/opal/mca/btl/portals4/btl_portals4_component.c +++ b/opal/mca/btl/portals4/btl_portals4_component.c @@ -609,7 +609,7 @@ mca_btl_portals4_component_progress(void) mca_btl_portals4_free(&portals4_btl->super, &frag->base); } if (0 != frag->size) { - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); + OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1); OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PTL_EVENT_SEND: Decrementing portals_outstanding_ops=%d (1)\n", portals4_btl->portals_outstanding_ops)); @@ -646,7 +646,7 @@ mca_btl_portals4_component_progress(void) } if (0 != frag->size) { - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); + OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1); OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PTL_EVENT_ACK: Decrementing portals_outstanding_ops=%d (2)\n", portals4_btl->portals_outstanding_ops)); } @@ -749,7 +749,7 @@ mca_btl_portals4_component_progress(void) OPAL_SUCCESS); OPAL_BTL_PORTALS4_FRAG_RETURN_USER(&portals4_btl->super, frag); - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); + OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1); OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PTL_EVENT_REPLY: Decrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops)); goto done; diff --git a/opal/mca/btl/portals4/btl_portals4_rdma.c b/opal/mca/btl/portals4/btl_portals4_rdma.c index 33fb9ab326e..9237b30fce2 100644 --- a/opal/mca/btl/portals4/btl_portals4_rdma.c +++ b/opal/mca/btl/portals4/btl_portals4_rdma.c @@ -53,16 +53,16 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base, int ret; /* reserve space in the event queue for rdma operations immediately */ - while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) > + while (OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, 1) > portals4_btl->portals_max_outstanding_ops) { - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); + OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1); OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (1)\n")); mca_btl_portals4_component_progress(); } OPAL_BTL_PORTALS4_FRAG_ALLOC_USER(portals4_btl, frag); if (NULL == frag){ - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); + OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1); return OPAL_ERROR; } OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, diff --git a/opal/mca/btl/portals4/btl_portals4_send.c b/opal/mca/btl/portals4/btl_portals4_send.c index 1f50fb2ef58..218ed877803 100644 --- a/opal/mca/btl/portals4/btl_portals4_send.c +++ b/opal/mca/btl/portals4/btl_portals4_send.c @@ -49,9 +49,9 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base, BTL_PORTALS4_SET_SEND_BITS(match_bits, 0, 0, tag, msglen_type); /* reserve space in the event queue for rdma operations immediately */ - while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) > + while (OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, 1) > portals4_btl->portals_max_outstanding_ops) { - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); + OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1); OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (4)\n")); mca_btl_portals4_component_progress(); diff --git a/opal/mca/btl/smcuda/btl_smcuda.c b/opal/mca/btl/smcuda/btl_smcuda.c index 03d3a6a116a..561585ea4bf 100644 --- a/opal/mca/btl/smcuda/btl_smcuda.c +++ b/opal/mca/btl/smcuda/btl_smcuda.c @@ -636,7 +636,7 @@ int mca_btl_smcuda_add_procs( /* Sync with other local procs. Force the FIFO initialization to always * happens before the readers access it. */ - (void)opal_atomic_add_32(&mca_btl_smcuda_component.sm_seg->module_seg->seg_inited, 1); + (void)opal_atomic_add_fetch_32(&mca_btl_smcuda_component.sm_seg->module_seg->seg_inited, 1); while( n_local_procs > mca_btl_smcuda_component.sm_seg->module_seg->seg_inited) { opal_progress(); @@ -976,7 +976,7 @@ int mca_btl_smcuda_sendi( struct mca_btl_base_module_t* btl, * the return code indicates failure, the write has still "completed" from * our point of view: it has been posted to a "pending send" queue. */ - OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1); + OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1); MCA_BTL_SMCUDA_FIFO_WRITE(endpoint, endpoint->my_smp_rank, endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc); (void)rc; /* this is safe to ignore as the message is requeued till success */ @@ -1026,7 +1026,7 @@ int mca_btl_smcuda_send( struct mca_btl_base_module_t* btl, * post the descriptor in the queue - post with the relative * address */ - OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1); + OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1); MCA_BTL_SMCUDA_FIFO_WRITE(endpoint, endpoint->my_smp_rank, endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc); if( OPAL_LIKELY(0 == rc) ) { @@ -1241,7 +1241,7 @@ static void mca_btl_smcuda_send_cuda_ipc_request(struct mca_btl_base_module_t* b * the return code indicates failure, the write has still "completed" from * our point of view: it has been posted to a "pending send" queue. */ - OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1); + OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1); opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output, "Sending CUDA IPC REQ (try=%d): myrank=%d, mydev=%d, peerrank=%d", endpoint->ipctries, diff --git a/opal/mca/btl/smcuda/btl_smcuda_component.c b/opal/mca/btl/smcuda/btl_smcuda_component.c index 8aedf9f1d7a..d77398a9965 100644 --- a/opal/mca/btl/smcuda/btl_smcuda_component.c +++ b/opal/mca/btl/smcuda/btl_smcuda_component.c @@ -658,7 +658,7 @@ static void mca_btl_smcuda_send_cuda_ipc_ack(struct mca_btl_base_module_t* btl, * the return code indicates failure, the write has still "completed" from * our point of view: it has been posted to a "pending send" queue. */ - OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1); + OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1); MCA_BTL_SMCUDA_FIFO_WRITE(endpoint, endpoint->my_smp_rank, endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc); @@ -980,7 +980,7 @@ void btl_smcuda_process_pending_sends(struct mca_btl_base_endpoint_t *ep) if(NULL == si) return; /* Another thread got in before us. Thats ok. */ - OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_pending_sends, -1); + OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_pending_sends, -1); MCA_BTL_SMCUDA_FIFO_WRITE(ep, ep->my_smp_rank, ep->peer_smp_rank, si->data, true, false, rc); @@ -1093,7 +1093,7 @@ int mca_btl_smcuda_component_progress(void) if( btl_ownership ) { MCA_BTL_SMCUDA_FRAG_RETURN(frag); } - OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, -1); + OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, -1); if ( 0 < opal_list_get_size(&endpoint->pending_sends) ) { btl_smcuda_process_pending_sends(endpoint); } diff --git a/opal/mca/btl/smcuda/btl_smcuda_fifo.h b/opal/mca/btl/smcuda/btl_smcuda_fifo.h index 7fcf2c1c98c..c4db00d10a8 100644 --- a/opal/mca/btl/smcuda/btl_smcuda_fifo.h +++ b/opal/mca/btl/smcuda/btl_smcuda_fifo.h @@ -40,7 +40,7 @@ add_pending(struct mca_btl_base_endpoint_t *ep, void *data, bool resend) si = (btl_smcuda_pending_send_item_t*)i; si->data = data; - OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_pending_sends, +1); + OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_pending_sends, +1); /* if data was on pending send list then prepend it to the list to * minimize reordering */ diff --git a/opal/mca/btl/ugni/btl_ugni_add_procs.c b/opal/mca/btl/ugni/btl_ugni_add_procs.c index e96e12e6ba9..6a2fa9b81e2 100644 --- a/opal/mca/btl/ugni/btl_ugni_add_procs.c +++ b/opal/mca/btl/ugni/btl_ugni_add_procs.c @@ -272,7 +272,7 @@ static int ugni_reg_mem (void *reg_data, void *base, size_t size, rc = mca_btl_ugni_reg_mem (ugni_module, base, size, (mca_btl_ugni_reg_t *) reg, cq, flags); if (OPAL_LIKELY(OPAL_SUCCESS == rc)) { - opal_atomic_add_32(&ugni_module->reg_count,1); + opal_atomic_add_fetch_32(&ugni_module->reg_count,1); } return rc; @@ -286,7 +286,7 @@ ugni_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg) rc = mca_btl_ugni_dereg_mem (ugni_module, (mca_btl_ugni_reg_t *) reg); if (OPAL_LIKELY(OPAL_SUCCESS == rc)) { - opal_atomic_add_32(&ugni_module->reg_count,-1); + opal_atomic_add_fetch_32(&ugni_module->reg_count,-1); } return rc; diff --git a/opal/mca/btl/ugni/btl_ugni_component.c b/opal/mca/btl/ugni/btl_ugni_component.c index 86eb252973d..cafcdabfc37 100644 --- a/opal/mca/btl/ugni/btl_ugni_component.c +++ b/opal/mca/btl/ugni/btl_ugni_component.c @@ -543,7 +543,7 @@ int mca_btl_ugni_progress_datagram (mca_btl_ugni_device_t *device) BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep)); ep->dg_posted = false; - (void) opal_atomic_add_32 (&ugni_module->active_datagrams, -1); + (void) opal_atomic_add_fetch_32 (&ugni_module->active_datagrams, -1); } (void) mca_btl_ugni_ep_connect_progress (ep); diff --git a/opal/mca/btl/ugni/btl_ugni_endpoint.c b/opal/mca/btl/ugni/btl_ugni_endpoint.c index 04d99349322..2f792839982 100644 --- a/opal/mca/btl/ugni/btl_ugni_endpoint.c +++ b/opal/mca/btl/ugni/btl_ugni_endpoint.c @@ -181,7 +181,7 @@ int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnec } } while (device->dev_smsg_local_cq.active_operations); - (void) opal_atomic_add_32 (&ep->smsg_ep_handle->device->smsg_connections, -1); + (void) opal_atomic_add_fetch_32 (&ep->smsg_ep_handle->device->smsg_connections, -1); } mca_btl_ugni_device_lock (device); @@ -278,7 +278,7 @@ static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) { ep->rmt_irq_mem_hndl = ep->remote_attr->rmt_irq_mem_hndl; ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTED; - (void) opal_atomic_add_32 (&ep->smsg_ep_handle->device->smsg_connections, 1); + (void) opal_atomic_add_fetch_32 (&ep->smsg_ep_handle->device->smsg_connections, 1); /* send all pending messages */ BTL_VERBOSE(("endpoint connected. posting %u sends", (unsigned int) opal_list_get_size (&ep->frag_wait_list))); @@ -312,7 +312,7 @@ static int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep) ep->remote_attr, sizeof (*ep->remote_attr), MCA_BTL_UGNI_CONNECT_DIRECTED_ID | ep->index); if (OPAL_LIKELY(GNI_RC_SUCCESS == rc)) { - (void) opal_atomic_add_32 (&ugni_module->active_datagrams, 1); + (void) opal_atomic_add_fetch_32 (&ugni_module->active_datagrams, 1); } return mca_btl_rc_ugni_to_opal (rc); diff --git a/opal/mca/btl/ugni/btl_ugni_frag.h b/opal/mca/btl/ugni/btl_ugni_frag.h index bb8a58cbc8b..ac9c8bc6ec8 100644 --- a/opal/mca/btl/ugni/btl_ugni_frag.h +++ b/opal/mca/btl/ugni/btl_ugni_frag.h @@ -192,7 +192,7 @@ static inline bool mca_btl_ugni_frag_del_ref (mca_btl_ugni_base_frag_t *frag, in opal_atomic_mb (); - ref_cnt = OPAL_THREAD_ADD32(&frag->ref_cnt, -1); + ref_cnt = OPAL_THREAD_ADD_FETCH32(&frag->ref_cnt, -1); if (ref_cnt) { assert (ref_cnt > 0); return false; diff --git a/opal/mca/btl/vader/btl_vader_fbox.h b/opal/mca/btl/vader/btl_vader_fbox.h index 6f09cb6c513..3ad53b0746c 100644 --- a/opal/mca/btl/vader/btl_vader_fbox.h +++ b/opal/mca/btl/vader/btl_vader_fbox.h @@ -261,14 +261,14 @@ static inline bool mca_btl_vader_check_fboxes (void) static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mca_btl_vader_hdr_t *hdr) { - if (OPAL_UNLIKELY(NULL == ep->fbox_out.buffer && mca_btl_vader_component.fbox_threshold == OPAL_THREAD_ADD_SIZE_T (&ep->send_count, 1))) { + if (OPAL_UNLIKELY(NULL == ep->fbox_out.buffer && mca_btl_vader_component.fbox_threshold == OPAL_THREAD_ADD_FETCH_SIZE_T (&ep->send_count, 1))) { /* protect access to mca_btl_vader_component.segment_offset */ OPAL_THREAD_LOCK(&mca_btl_vader_component.lock); if (mca_btl_vader_component.segment_size >= mca_btl_vader_component.segment_offset + mca_btl_vader_component.fbox_size && mca_btl_vader_component.fbox_max > mca_btl_vader_component.fbox_count) { /* verify the remote side will accept another fbox */ - if (0 <= opal_atomic_add_32 (&ep->fifo->fbox_available, -1)) { + if (0 <= opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, -1)) { void *fbox_base = mca_btl_vader_component.my_segment + mca_btl_vader_component.segment_offset; mca_btl_vader_component.segment_offset += mca_btl_vader_component.fbox_size; @@ -280,7 +280,7 @@ static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mc hdr->fbox_base = virtual2relative((char *) ep->fbox_out.buffer); ++mca_btl_vader_component.fbox_count; } else { - opal_atomic_add_32 (&ep->fifo->fbox_available, 1); + opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, 1); } opal_atomic_wmb (); diff --git a/opal/mca/btl/vader/btl_vader_xpmem.c b/opal/mca/btl/vader/btl_vader_xpmem.c index f635b2c6cdf..7f8cd7da73c 100644 --- a/opal/mca/btl/vader/btl_vader_xpmem.c +++ b/opal/mca/btl/vader/btl_vader_xpmem.c @@ -54,7 +54,7 @@ static int vader_check_reg (mca_rcache_base_registration_t *reg, void *ctx) vader_ctx->reg[0] = reg; if (vader_ctx->bound <= (uintptr_t) reg->bound && vader_ctx->base >= (uintptr_t) reg->base) { - (void)opal_atomic_add (®->ref_count, 1); + (void)opal_atomic_add_fetch (®->ref_count, 1); return 1; } @@ -93,7 +93,7 @@ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpo /* start the new segment from the lower of the two bases */ base = (uintptr_t) reg->base < base ? (uintptr_t) reg->base : base; - if (OPAL_LIKELY(0 == opal_atomic_add_32 (®->ref_count, -1))) { + if (OPAL_LIKELY(0 == opal_atomic_add_fetch_32 (®->ref_count, -1))) { /* this pointer is not in use */ (void) xpmem_detach (reg->rcache_context); OBJ_RELEASE(reg); @@ -143,7 +143,7 @@ void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_ mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module; int32_t ref_count; - ref_count = opal_atomic_add_32 (®->ref_count, -1); + ref_count = opal_atomic_add_fetch_32 (®->ref_count, -1); if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_RCACHE_FLAGS_PERSIST))) { /* protect rcache access */ mca_rcache_base_vma_delete (vma_module, reg); diff --git a/opal/mca/common/sm/common_sm.c b/opal/mca/common/sm/common_sm.c index 52fc4092dea..c6e2a0fdaf8 100644 --- a/opal/mca/common/sm/common_sm.c +++ b/opal/mca/common/sm/common_sm.c @@ -131,7 +131,7 @@ attach_and_init(opal_shmem_ds_t *shmem_bufp, } /* increment the number of processes that are attached to the segment. */ - (void)opal_atomic_add_size_t(&map->module_seg->seg_num_procs_inited, 1); + (void)opal_atomic_add_fetch_size_t(&map->module_seg->seg_num_procs_inited, 1); /* commit the changes before we return */ opal_atomic_wmb(); diff --git a/opal/mca/mpool/hugepage/mpool_hugepage_module.c b/opal/mca/mpool/hugepage/mpool_hugepage_module.c index 5a1f1fa1fc1..6af7003563f 100644 --- a/opal/mca/mpool/hugepage/mpool_hugepage_module.c +++ b/opal/mca/mpool/hugepage/mpool_hugepage_module.c @@ -131,7 +131,7 @@ void *mca_mpool_hugepage_seg_alloc (void *ctx, size_t *sizep) if (huge_page->path) { int32_t count; - count = opal_atomic_add_32 (&huge_page->count, 1); + count = opal_atomic_add_fetch_32 (&huge_page->count, 1); rc = asprintf (&path, "%s/hugepage.openmpi.%d.%d", huge_page->path, getpid (), count); @@ -183,7 +183,7 @@ void *mca_mpool_hugepage_seg_alloc (void *ctx, size_t *sizep) opal_mutex_lock (&hugepage_module->lock); opal_rb_tree_insert (&hugepage_module->allocation_tree, base, (void *) (intptr_t) size); - opal_atomic_add (&mca_mpool_hugepage_component.bytes_allocated, (int64_t) size); + opal_atomic_add_fetch (&mca_mpool_hugepage_component.bytes_allocated, (int64_t) size); opal_mutex_unlock (&hugepage_module->lock); OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_mpool_base_framework.framework_verbose, @@ -207,7 +207,7 @@ void mca_mpool_hugepage_seg_free (void *ctx, void *addr) OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_mpool_base_framework.framework_verbose, "freeing segment %p of size %lu bytes", addr, size)); munmap (addr, size); - opal_atomic_add (&mca_mpool_hugepage_component.bytes_allocated, -(int64_t) size); + opal_atomic_add_fetch (&mca_mpool_hugepage_component.bytes_allocated, -(int64_t) size); } opal_mutex_unlock (&hugepage_module->lock); diff --git a/opal/mca/rcache/grdma/rcache_grdma_module.c b/opal/mca/rcache/grdma/rcache_grdma_module.c index 327c2845a02..d2646c3c4a8 100644 --- a/opal/mca/rcache/grdma/rcache_grdma_module.c +++ b/opal/mca/rcache/grdma/rcache_grdma_module.c @@ -232,7 +232,7 @@ static int mca_rcache_grdma_check_cached (mca_rcache_base_registration_t *grdma_ /* This segment fits fully within an existing segment. */ rcache_grdma->stat_cache_hit++; - int32_t ref_cnt = opal_atomic_add_32 (&grdma_reg->ref_count, 1); + int32_t ref_cnt = opal_atomic_add_fetch_32 (&grdma_reg->ref_count, 1); OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output, "returning existing registration %p. references %d", (void *) grdma_reg, ref_cnt)); (void)ref_cnt; @@ -296,7 +296,7 @@ static int mca_rcache_grdma_register (mca_rcache_base_module_t *rcache, void *ad /* get updated access flags */ access_flags = find_args.access_flags; - OPAL_THREAD_ADD32((volatile int32_t *) &rcache_grdma->stat_cache_miss, 1); + OPAL_THREAD_ADD_FETCH32((volatile int32_t *) &rcache_grdma->stat_cache_miss, 1); } item = opal_free_list_get_mt (&rcache_grdma->reg_list); @@ -380,7 +380,7 @@ static int mca_rcache_grdma_find (mca_rcache_base_module_t *rcache, void *addr, (opal_list_item_t*)(*reg)); } rcache_grdma->stat_cache_found++; - opal_atomic_add_32 (&(*reg)->ref_count, 1); + opal_atomic_add_fetch_32 (&(*reg)->ref_count, 1); } else { rcache_grdma->stat_cache_notfound++; } @@ -398,7 +398,7 @@ static int mca_rcache_grdma_deregister (mca_rcache_base_module_t *rcache, int rc; opal_mutex_lock (&rcache_grdma->cache->vma_module->vma_lock); - ref_count = opal_atomic_add_32 (®->ref_count, -1); + ref_count = opal_atomic_add_fetch_32 (®->ref_count, -1); OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output, "returning registration %p, remaining references %d", (void *) reg, ref_count)); diff --git a/opal/mca/rcache/udreg/rcache_udreg_module.c b/opal/mca/rcache/udreg/rcache_udreg_module.c index 51c24676522..d3045247f17 100644 --- a/opal/mca/rcache/udreg/rcache_udreg_module.c +++ b/opal/mca/rcache/udreg/rcache_udreg_module.c @@ -301,7 +301,7 @@ static int mca_rcache_udreg_register(mca_rcache_base_module_t *rcache, void *add OPAL_THREAD_UNLOCK(&rcache_udreg->lock); *reg = udreg_reg; - (void) OPAL_THREAD_ADD32(&udreg_reg->ref_count, 1); + (void) OPAL_THREAD_ADD_FETCH32(&udreg_reg->ref_count, 1); udreg_reg->rcache_context = udreg_entry; return OPAL_SUCCESS; @@ -318,7 +318,7 @@ static int mca_rcache_udreg_deregister(mca_rcache_base_module_t *rcache, mca_rcache_base_registration_t *reg) { mca_rcache_udreg_module_t *rcache_udreg = (mca_rcache_udreg_module_t *) rcache; - int32_t ref_count = OPAL_THREAD_ADD32 (®->ref_count, -1); + int32_t ref_count = OPAL_THREAD_ADD_FETCH32 (®->ref_count, -1); assert(ref_count >= 0); diff --git a/opal/runtime/opal_cr.c b/opal/runtime/opal_cr.c index 96a37156786..27e32c5fdd2 100644 --- a/opal/runtime/opal_cr.c +++ b/opal/runtime/opal_cr.c @@ -159,14 +159,14 @@ static const uint32_t ProcInc = 0x2; #define OPAL_CR_LOCK() \ { \ opal_cr_thread_in_library = true; \ - OPAL_THREAD_ADD32(&opal_cr_thread_num_in_library, ProcInc); \ + OPAL_THREAD_ADD_FETCH32(&opal_cr_thread_num_in_library, ProcInc); \ while( (opal_cr_thread_num_in_library & ThreadFlag ) != 0 ) { \ sched_yield(); \ } \ } #define OPAL_CR_UNLOCK() \ { \ - OPAL_THREAD_ADD32(&opal_cr_thread_num_in_library, -ProcInc); \ + OPAL_THREAD_ADD_FETCH32(&opal_cr_thread_num_in_library, -ProcInc); \ if( opal_cr_thread_num_in_library <= 0 ) { \ opal_cr_thread_in_library = false; \ } \ @@ -184,7 +184,7 @@ static const uint32_t ProcInc = 0x2; } #define OPAL_CR_THREAD_UNLOCK() \ { \ - OPAL_THREAD_ADD32(&opal_cr_thread_num_in_library, -ThreadFlag); \ + OPAL_THREAD_ADD_FETCH32(&opal_cr_thread_num_in_library, -ThreadFlag); \ } #endif diff --git a/opal/runtime/opal_progress.c b/opal/runtime/opal_progress.c index 30ddcc6ac9a..541b8b8ab9d 100644 --- a/opal/runtime/opal_progress.c +++ b/opal/runtime/opal_progress.c @@ -207,7 +207,7 @@ opal_progress(void) #else /* OPAL_PROGRESS_USE_TIMERS */ /* trip the event library if we've reached our tick rate and we are enabled */ - if (OPAL_THREAD_ADD32(&event_progress_counter, -1) <= 0 ) { + if (OPAL_THREAD_ADD_FETCH32(&event_progress_counter, -1) <= 0 ) { event_progress_counter = (num_event_users > 0) ? 0 : event_progress_delta; events += opal_event_loop(opal_sync_event_base, opal_progress_event_flag); @@ -222,7 +222,7 @@ opal_progress(void) events += (callbacks[i])(); } - if (callbacks_lp_len > 0 && (OPAL_THREAD_ADD32((volatile int32_t *) &num_calls, 1) & 0x7) == 0) { + if (callbacks_lp_len > 0 && (OPAL_THREAD_ADD_FETCH32((volatile int32_t *) &num_calls, 1) & 0x7) == 0) { /* run low priority callbacks once every 8 calls to opal_progress() */ for (i = 0 ; i < callbacks_lp_len ; ++i) { events += (callbacks_lp[i])(); @@ -259,11 +259,11 @@ opal_progress_event_users_increment(void) { #if OPAL_ENABLE_DEBUG int32_t val; - val = opal_atomic_add_32(&num_event_users, 1); + val = opal_atomic_add_fetch_32(&num_event_users, 1); OPAL_OUTPUT((debug_output, "progress: event_users_increment setting count to %d", val)); #else - (void)opal_atomic_add_32(&num_event_users, 1); + (void)opal_atomic_add_fetch_32(&num_event_users, 1); #endif #if OPAL_PROGRESS_USE_TIMERS @@ -281,11 +281,11 @@ opal_progress_event_users_decrement(void) { #if OPAL_ENABLE_DEBUG || ! OPAL_PROGRESS_USE_TIMERS int32_t val; - val = opal_atomic_sub_32(&num_event_users, 1); + val = opal_atomic_sub_fetch_32(&num_event_users, 1); OPAL_OUTPUT((debug_output, "progress: event_users_decrement setting count to %d", val)); #else - (void)opal_atomic_sub_32(&num_event_users, 1); + (void)opal_atomic_sub_fetch_32(&num_event_users, 1); #endif #if !OPAL_PROGRESS_USE_TIMERS diff --git a/opal/threads/thread_usage.h b/opal/threads/thread_usage.h index 248735ae3aa..00752f0605e 100644 --- a/opal/threads/thread_usage.h +++ b/opal/threads/thread_usage.h @@ -94,50 +94,50 @@ static inline bool opal_set_using_threads(bool have) */ #define OPAL_THREAD_DEFINE_ATOMIC_ADD(type, suffix) \ -static inline type opal_thread_add_ ## suffix (volatile type *addr, type delta) \ +static inline type opal_thread_add_fetch_ ## suffix (volatile type *addr, type delta) \ { \ if (OPAL_UNLIKELY(opal_using_threads())) { \ - return opal_atomic_add_ ## suffix (addr, delta); \ + return opal_atomic_add_fetch_ ## suffix (addr, delta); \ } \ \ return (*addr += delta); \ } #define OPAL_THREAD_DEFINE_ATOMIC_AND(type, suffix) \ -static inline type opal_thread_and_ ## suffix (volatile type *addr, type delta) \ +static inline type opal_thread_and_fetch_ ## suffix (volatile type *addr, type delta) \ { \ if (OPAL_UNLIKELY(opal_using_threads())) { \ - return opal_atomic_and_ ## suffix (addr, delta); \ + return opal_atomic_and_fetch_ ## suffix (addr, delta); \ } \ \ return (*addr &= delta); \ } #define OPAL_THREAD_DEFINE_ATOMIC_OR(type, suffix) \ -static inline type opal_thread_or_ ## suffix (volatile type *addr, type delta) \ +static inline type opal_thread_or_fetch_ ## suffix (volatile type *addr, type delta) \ { \ if (OPAL_UNLIKELY(opal_using_threads())) { \ - return opal_atomic_or_ ## suffix (addr, delta); \ + return opal_atomic_or_fetch_ ## suffix (addr, delta); \ } \ \ return (*addr |= delta); \ } #define OPAL_THREAD_DEFINE_ATOMIC_XOR(type, suffix) \ -static inline type opal_thread_xor_ ## suffix (volatile type *addr, type delta) \ +static inline type opal_thread_xor_fetch_ ## suffix (volatile type *addr, type delta) \ { \ if (OPAL_UNLIKELY(opal_using_threads())) { \ - return opal_atomic_xor_ ## suffix (addr, delta); \ + return opal_atomic_xor_fetch_ ## suffix (addr, delta); \ } \ \ return (*addr ^= delta); \ } #define OPAL_THREAD_DEFINE_ATOMIC_SUB(type, suffix) \ -static inline type opal_thread_sub_ ## suffix (volatile type *addr, type delta) \ +static inline type opal_thread_sub_fetch_ ## suffix (volatile type *addr, type delta) \ { \ if (OPAL_UNLIKELY(opal_using_threads())) { \ - return opal_atomic_sub_ ## suffix (addr, delta); \ + return opal_atomic_sub_fetch_ ## suffix (addr, delta); \ } \ \ return (*addr -= delta); \ @@ -185,23 +185,23 @@ OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(void *, intptr_t, ptr) OPAL_THREAD_DEFINE_ATOMIC_SWAP(int32_t, int32_t, 32) OPAL_THREAD_DEFINE_ATOMIC_SWAP(void *, intptr_t, ptr) -#define OPAL_THREAD_ADD32 opal_thread_add_32 -#define OPAL_ATOMIC_ADD32 opal_thread_add_32 +#define OPAL_THREAD_ADD_FETCH32 opal_thread_add_fetch_32 +#define OPAL_ATOMIC_ADD_FETCH32 opal_thread_add_fetch_32 -#define OPAL_THREAD_AND32 opal_thread_and_32 -#define OPAL_ATOMIC_AND32 opal_thread_and_32 +#define OPAL_THREAD_AND_FETCH32 opal_thread_and_fetch_32 +#define OPAL_ATOMIC_AND_FETCH32 opal_thread_and_fetch_32 -#define OPAL_THREAD_OR32 opal_thread_or_32 -#define OPAL_ATOMIC_OR32 opal_thread_or_32 +#define OPAL_THREAD_OR_FETCH32 opal_thread_or_fetch_32 +#define OPAL_ATOMIC_OR_FETCH32 opal_thread_or_fetch_32 -#define OPAL_THREAD_XOR32 opal_thread_xor_32 -#define OPAL_ATOMIC_XOR32 opal_thread_xor_32 +#define OPAL_THREAD_XOR_FETCH32 opal_thread_xor_fetch_32 +#define OPAL_ATOMIC_XOR_FETCH32 opal_thread_xor_fetch_32 -#define OPAL_THREAD_ADD_SIZE_T opal_thread_add_size_t -#define OPAL_ATOMIC_ADD_SIZE_T opal_thread_add_size_t +#define OPAL_THREAD_ADD_FETCH_SIZE_T opal_thread_add_fetch_size_t +#define OPAL_ATOMIC_ADD_FETCH_SIZE_T opal_thread_add_fetch_size_t -#define OPAL_THREAD_SUB_SIZE_T opal_thread_sub_size_t -#define OPAL_ATOMIC_SUB_SIZE_T opal_thread_sub_size_t +#define OPAL_THREAD_SUB_FETCH_SIZE_T opal_thread_sub_fetch_size_t +#define OPAL_ATOMIC_SUB_FETCH_SIZE_T opal_thread_sub_fetch_size_t #define OPAL_THREAD_COMPARE_EXCHANGE_STRONG_32 opal_thread_compare_exchange_strong_32 #define OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32 opal_thread_compare_exchange_strong_32 @@ -225,17 +225,17 @@ OPAL_THREAD_DEFINE_ATOMIC_XOR(int64_t, 64) OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(int64_t, int64_t, 64) OPAL_THREAD_DEFINE_ATOMIC_SWAP(int64_t, int64_t, 64) -#define OPAL_THREAD_ADD64 opal_thread_add_64 -#define OPAL_ATOMIC_ADD64 opal_thread_add_64 +#define OPAL_THREAD_ADD_FETCH64 opal_thread_add_fetch_64 +#define OPAL_ATOMIC_ADD_FETCH64 opal_thread_add_fetch_64 -#define OPAL_THREAD_AND64 opal_thread_and_64 -#define OPAL_ATOMIC_AND64 opal_thread_and_64 +#define OPAL_THREAD_AND_FETCH64 opal_thread_and_fetch_64 +#define OPAL_ATOMIC_AND_FETCH64 opal_thread_and_fetch_64 -#define OPAL_THREAD_OR64 opal_thread_or_64 -#define OPAL_ATOMIC_OR64 opal_thread_or_64 +#define OPAL_THREAD_OR_FETCH64 opal_thread_or_fetch_64 +#define OPAL_ATOMIC_OR_FETCH64 opal_thread_or_fetch_64 -#define OPAL_THREAD_XOR64 opal_thread_xor_64 -#define OPAL_ATOMIC_XOR64 opal_thread_xor_64 +#define OPAL_THREAD_XOR_FETCH64 opal_thread_xor_fetch_64 +#define OPAL_ATOMIC_XOR_FETCH64 opal_thread_xor_fetch_64 #define OPAL_THREAD_COMPARE_EXCHANGE_STRONG_64 opal_thread_compare_exchange_strong_64 #define OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_64 opal_thread_compare_exchange_strong_64 diff --git a/opal/threads/wait_sync.h b/opal/threads/wait_sync.h index 9a582884373..1e594670354 100644 --- a/opal/threads/wait_sync.h +++ b/opal/threads/wait_sync.h @@ -109,7 +109,7 @@ static inline int sync_wait_st (ompi_wait_sync_t *sync) static inline void wait_sync_update(ompi_wait_sync_t *sync, int updates, int status) { if( OPAL_LIKELY(OPAL_SUCCESS == status) ) { - if( 0 != (OPAL_THREAD_ADD32(&sync->count, -updates)) ) { + if( 0 != (OPAL_THREAD_ADD_FETCH32(&sync->count, -updates)) ) { return; } } else { diff --git a/oshmem/mca/spml/ikrit/spml_ikrit.c b/oshmem/mca/spml/ikrit/spml_ikrit.c index d90099caf46..c1500edeebb 100644 --- a/oshmem/mca/spml/ikrit/spml_ikrit.c +++ b/oshmem/mca/spml/ikrit/spml_ikrit.c @@ -673,7 +673,7 @@ static inline void get_completion_cb(void *ctx) { mca_spml_ikrit_get_request_t *get_req = (mca_spml_ikrit_get_request_t *) ctx; - OPAL_THREAD_ADD32(&mca_spml_ikrit.n_active_gets, -1); + OPAL_THREAD_ADD_FETCH32(&mca_spml_ikrit.n_active_gets, -1); free_get_req(get_req); } @@ -701,7 +701,7 @@ static inline int mca_spml_ikrit_get_async(void *src_addr, get_req->mxm_req.flags = 0; get_req->mxm_req.base.completed_cb = get_completion_cb; get_req->mxm_req.base.context = get_req; - OPAL_THREAD_ADD32(&mca_spml_ikrit.n_active_gets, 1); + OPAL_THREAD_ADD_FETCH32(&mca_spml_ikrit.n_active_gets, 1); SPML_IKRIT_MXM_POST_SEND(get_req->mxm_req); @@ -713,7 +713,7 @@ static inline void fence_completion_cb(void *ctx) mca_spml_ikrit_get_request_t *fence_req = (mca_spml_ikrit_get_request_t *) ctx; - OPAL_THREAD_ADD32(&mca_spml_ikrit.n_mxm_fences, -1); + OPAL_THREAD_ADD_FETCH32(&mca_spml_ikrit.n_mxm_fences, -1); free_get_req(fence_req); } @@ -735,7 +735,7 @@ static int mca_spml_ikrit_mxm_fence(int dst) fence_req->mxm_req.base.state = MXM_REQ_NEW; fence_req->mxm_req.base.completed_cb = fence_completion_cb; fence_req->mxm_req.base.context = fence_req; - OPAL_THREAD_ADD32(&mca_spml_ikrit.n_mxm_fences, 1); + OPAL_THREAD_ADD_FETCH32(&mca_spml_ikrit.n_mxm_fences, 1); SPML_IKRIT_MXM_POST_SEND(fence_req->mxm_req); return OSHMEM_SUCCESS; @@ -746,7 +746,7 @@ static inline void put_completion_cb(void *ctx) mca_spml_ikrit_put_request_t *put_req = (mca_spml_ikrit_put_request_t *) ctx; mxm_peer_t *peer; - OPAL_THREAD_ADD32(&mca_spml_ikrit.n_active_puts, -1); + OPAL_THREAD_ADD_FETCH32(&mca_spml_ikrit.n_active_puts, -1); /* TODO: keep pointer to peer in the request */ peer = &mca_spml_ikrit.mxm_peers[put_req->pe]; @@ -848,7 +848,7 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr, put_req->mxm_req.op.mem.remote_mkey = mkey; - OPAL_THREAD_ADD32(&mca_spml_ikrit.n_active_puts, 1); + OPAL_THREAD_ADD_FETCH32(&mca_spml_ikrit.n_active_puts, 1); if (mca_spml_ikrit.mxm_peers[dst].need_fence == 0) { opal_list_append(&mca_spml_ikrit.active_peers, &mca_spml_ikrit.mxm_peers[dst].link); diff --git a/test/asm/atomic_cmpset.c b/test/asm/atomic_cmpset.c index 75288feee7f..d19847cb198 100644 --- a/test/asm/atomic_cmpset.c +++ b/test/asm/atomic_cmpset.c @@ -82,11 +82,11 @@ static void *thread_main(void *arg) /* thread tests */ for (i = 0; i < nreps; i++) { - opal_atomic_add_32(&val32, 5); + opal_atomic_add_fetch_32(&val32, 5); #if OPAL_HAVE_ATOMIC_MATH_64 - opal_atomic_add_64(&val64, 5); + opal_atomic_add_fetch_64(&val64, 5); #endif - opal_atomic_add(&valint, 5); + opal_atomic_add_fetch(&valint, 5); } return (void *) (unsigned long) (rank + 1000); @@ -272,21 +272,21 @@ int main(int argc, char *argv[]) /* -- add_32 tests -- */ val32 = 42; - assert(opal_atomic_add_32(&val32, 5) == (42 + 5)); + assert(opal_atomic_add_fetch_32(&val32, 5) == (42 + 5)); opal_atomic_rmb(); assert((42 + 5) == val32); /* -- add_64 tests -- */ #if OPAL_HAVE_ATOMIC_MATH_64 val64 = 42; - assert(opal_atomic_add_64(&val64, 5) == (42 + 5)); + assert(opal_atomic_add_fetch_64(&val64, 5) == (42 + 5)); opal_atomic_rmb(); assert((42 + 5) == val64); #endif /* -- add_int tests -- */ valint = 42; - opal_atomic_add(&valint, 5); + opal_atomic_add_fetch(&valint, 5); opal_atomic_rmb(); assert((42 + 5) == valint); diff --git a/test/asm/atomic_math.c b/test/asm/atomic_math.c index f94299e8185..e489553d4a4 100644 --- a/test/asm/atomic_math.c +++ b/test/asm/atomic_math.c @@ -44,11 +44,11 @@ static void* atomic_math_test(void* arg) int i; for (i = 0 ; i < count ; ++i) { - (void)opal_atomic_add_32(&val32, 5); + (void)opal_atomic_add_fetch_32(&val32, 5); #if OPAL_HAVE_ATOMIC_MATH_64 - (void)opal_atomic_add_64(&val64, 6); + (void)opal_atomic_add_fetch_64(&val64, 6); #endif - (void)opal_atomic_add(&valint, 4); + (void)opal_atomic_add_fetch(&valint, 4); } return NULL; @@ -113,7 +113,7 @@ main(int argc, char *argv[]) if (ret == 77) return ret; opal_atomic_mb(); if (val32 != TEST_REPS * num_threads * 5) { - printf("opal_atomic_add32 failed. Expected %d, got %d.\n", + printf("opal_atomic_add_fetch32 failed. Expected %d, got %d.\n", TEST_REPS * num_threads * 5, val32); ret = 1; } @@ -121,7 +121,7 @@ main(int argc, char *argv[]) if (val64 != TEST_REPS * num_threads * 6) { /* Safe to case to (int) here because we know it's going to be a small value */ - printf("opal_atomic_add32 failed. Expected %d, got %d.\n", + printf("opal_atomic_add_fetch32 failed. Expected %d, got %d.\n", TEST_REPS * num_threads * 6, (int) val64); ret = 1; } @@ -129,7 +129,7 @@ main(int argc, char *argv[]) printf(" * skipping 64 bit tests\n"); #endif if (valint != TEST_REPS * num_threads * 4) { - printf("opal_atomic_add32 failed. Expected %d, got %d.\n", + printf("opal_atomic_add_fetch32 failed. Expected %d, got %d.\n", TEST_REPS * num_threads * 4, valint); ret = 1; } diff --git a/test/threads/opal_thread.c b/test/threads/opal_thread.c index 7fb11c6f880..f8a743a5352 100644 --- a/test/threads/opal_thread.c +++ b/test/threads/opal_thread.c @@ -36,13 +36,13 @@ static volatile int count = 0; static void* thr1_run(opal_object_t* obj) { - (void)opal_atomic_add(&count, 1); + (void)opal_atomic_add_fetch(&count, 1); return NULL; } static void* thr2_run(opal_object_t* obj) { - (void)opal_atomic_add(&count, 2); + (void)opal_atomic_add_fetch(&count, 2); return NULL; } From 7893248c5ab102f70f4dedd1d18e2531cc029e40 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Thu, 30 Nov 2017 09:25:21 -0700 Subject: [PATCH 5/5] opal/asm: add fetch-and-op atomics This commit adds support for fetch-and-op atomics. This is needed because and and or are irreversible operations so there needs to be a way to get the old value atomically. These are also the only semantics supported by C11 (there is not atomic_op_fetch, just atomic_fetch_op). The old op-and-fetch atomics have been defined in terms of fetch-and-op. Signed-off-by: Nathan Hjelm --- ompi/mca/coll/sm/coll_sm.h | 2 +- ompi/mca/coll/sm/coll_sm_barrier.c | 2 +- ompi/mca/coll/sm/coll_sm_module.c | 2 +- ompi/mca/osc/sm/osc_sm_active_target.c | 8 +- opal/class/opal_list.c | 2 +- opal/include/opal/sys/arm/atomic.h | 40 ++--- opal/include/opal/sys/arm64/atomic.h | 16 +- opal/include/opal/sys/atomic.h | 145 ++++++++---------- opal/include/opal/sys/atomic_impl.h | 92 +++++++---- opal/include/opal/sys/gcc_builtin/atomic.h | 40 ++--- opal/include/opal/sys/ia32/atomic.h | 8 +- opal/include/opal/sys/powerpc/atomic.h | 26 ++-- opal/include/opal/sys/sync_builtin/atomic.h | 40 ++--- opal/include/opal/sys/x86_64/atomic.h | 16 +- opal/mca/btl/vader/btl_vader_xpmem.c | 2 +- .../mpool/hugepage/mpool_hugepage_module.c | 4 +- opal/threads/thread_usage.h | 104 +++++++------ test/asm/atomic_cmpset.c | 4 +- test/asm/atomic_math.c | 142 ++++++++++++++++- test/threads/opal_thread.c | 4 +- 20 files changed, 428 insertions(+), 271 deletions(-) diff --git a/ompi/mca/coll/sm/coll_sm.h b/ompi/mca/coll/sm/coll_sm.h index eaff4518b10..b2da6ede425 100644 --- a/ompi/mca/coll/sm/coll_sm.h +++ b/ompi/mca/coll/sm/coll_sm.h @@ -358,7 +358,7 @@ extern uint32_t mca_coll_sm_one; * Macro to release an in-use flag from this process */ #define FLAG_RELEASE(flag) \ - (void)opal_atomic_add_fetch(&(flag)->mcsiuf_num_procs_using, -1) + opal_atomic_add(&(flag)->mcsiuf_num_procs_using, -1) /** * Macro to copy a single segment in from a user buffer to a shared diff --git a/ompi/mca/coll/sm/coll_sm_barrier.c b/ompi/mca/coll/sm/coll_sm_barrier.c index b29199271d0..2722bbf09f5 100644 --- a/ompi/mca/coll/sm/coll_sm_barrier.c +++ b/ompi/mca/coll/sm/coll_sm_barrier.c @@ -101,7 +101,7 @@ int mca_coll_sm_barrier_intra(struct ompi_communicator_t *comm, if (0 != rank) { /* Get parent *in* buffer */ parent = &data->mcb_barrier_control_parent[buffer_set]; - (void)opal_atomic_add_fetch(parent, 1); + opal_atomic_add (parent, 1); SPIN_CONDITION(0 != *me_out, exit_label2); *me_out = 0; diff --git a/ompi/mca/coll/sm/coll_sm_module.c b/ompi/mca/coll/sm/coll_sm_module.c index 88393bebf09..8922a70eafe 100644 --- a/ompi/mca/coll/sm/coll_sm_module.c +++ b/ompi/mca/coll/sm/coll_sm_module.c @@ -463,7 +463,7 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, OBJ_RETAIN(sm_module->previous_reduce_module); /* Indicate that we have successfully attached and setup */ - (void)opal_atomic_add_fetch(&(data->sm_bootstrap_meta->module_seg->seg_inited), 1); + opal_atomic_add (&(data->sm_bootstrap_meta->module_seg->seg_inited), 1); /* Wait for everyone in this communicator to attach and setup */ opal_output_verbose(10, ompi_coll_base_framework.framework_output, diff --git a/ompi/mca/osc/sm/osc_sm_active_target.c b/ompi/mca/osc/sm/osc_sm_active_target.c index 31a6ee645e9..ab0f73f87c6 100644 --- a/ompi/mca/osc/sm/osc_sm_active_target.c +++ b/ompi/mca/osc/sm/osc_sm_active_target.c @@ -151,7 +151,7 @@ ompi_osc_sm_start(struct ompi_group_t *group, for (int i = 0 ; i < size ; ++i) { int rank_byte = ranks[i] >> OSC_SM_POST_BITS; - osc_sm_post_type_t old, rank_bit = ((osc_sm_post_type_t) 1) << (ranks[i] & 0x3f); + osc_sm_post_type_t rank_bit = ((osc_sm_post_type_t) 1) << (ranks[i] & 0x3f); /* wait for rank to post */ while (!(module->posts[my_rank][rank_byte] & rank_bit)) { @@ -162,9 +162,9 @@ ompi_osc_sm_start(struct ompi_group_t *group, opal_atomic_rmb (); #if OPAL_HAVE_ATOMIC_MATH_64 - opal_atomic_xor_fetch_64 ((volatile osc_sm_post_type_t *) module->posts[my_rank] + rank_byte, rank_bit); + (void) opal_atomic_fetch_xor_64 ((volatile int64_t *) module->posts[my_rank] + rank_byte, rank_bit); #else - opal_atomic_xor_fetch_32 ((volatile osc_sm_post_type_t *) module->posts[my_rank] + rank_byte, rank_bit); + (void) opal_atomic_fetch_xor_32 ((volatile int32_t *) module->posts[my_rank] + rank_byte, rank_bit); #endif } @@ -247,7 +247,7 @@ ompi_osc_sm_post(struct ompi_group_t *group, gsize = ompi_group_size(module->post_group); for (int i = 0 ; i < gsize ; ++i) { - (void) opal_atomic_add_fetch ((volatile osc_sm_post_type_t *) module->posts[ranks[i]] + my_byte, my_bit); + opal_atomic_add ((volatile osc_sm_post_type_t *) module->posts[ranks[i]] + my_byte, my_bit); } opal_atomic_wmb (); diff --git a/opal/class/opal_list.c b/opal/class/opal_list.c index dd0f654fd82..87cb1192b1b 100644 --- a/opal/class/opal_list.c +++ b/opal/class/opal_list.c @@ -144,7 +144,7 @@ bool opal_list_insert(opal_list_t *list, opal_list_item_t *item, long long idx) /* Spot check: ensure this item is only on the list that we just insertted it into */ - (void)opal_atomic_add_fetch( &(item->opal_list_item_refcount), 1 ); + opal_atomic_add ( &(item->opal_list_item_refcount), 1 ); assert(1 == item->opal_list_item_refcount); item->opal_list_item_belong_to = list; #endif diff --git a/opal/include/opal/sys/arm/atomic.h b/opal/include/opal/sys/arm/atomic.h index 94576b6ddcc..6d4db3ad7a4 100644 --- a/opal/include/opal/sys/arm/atomic.h +++ b/opal/include/opal/sys/arm/atomic.h @@ -209,44 +209,44 @@ static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t #define OPAL_HAVE_ATOMIC_ADD_32 1 -static inline int32_t opal_atomic_add_fetch_32(volatile int32_t* v, int inc) +static inline int32_t opal_atomic_fetch_add_32(volatile int32_t* v, int inc) { - int32_t t; - int tmp; + int32_t t, old; + int tmp; - __asm__ __volatile__( - "1: ldrex %0, [%2] \n" - " add %0, %0, %3 \n" - " strex %1, %0, [%2] \n" - " cmp %1, #0 \n" + __asm__ __volatile__( + "1: ldrex %1, [%3] \n" + " add %0, %1, %4 \n" + " strex %2, %0, [%3] \n" + " cmp %2, #0 \n" " bne 1b \n" - : "=&r" (t), "=&r" (tmp) + : "=&r" (t), "=&r" (old), "=&r" (tmp) : "r" (v), "r" (inc) : "cc", "memory"); - return t; + return old; } #define OPAL_HAVE_ATOMIC_SUB_32 1 -static inline int32_t opal_atomic_sub_fetch_32(volatile int32_t* v, int dec) +static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t* v, int dec) { - int32_t t; - int tmp; + int32_t t, old; + int tmp; - __asm__ __volatile__( - "1: ldrex %0, [%2] \n" - " sub %0, %0, %3 \n" - " strex %1, %0, [%2] \n" - " cmp %1, #0 \n" + __asm__ __volatile__( + "1: ldrex %1, [%3] \n" + " sub %0, %1, %4 \n" + " strex %2, %0, [%3] \n" + " cmp %2, #0 \n" " bne 1b \n" - : "=&r" (t), "=&r" (tmp) + : "=&r" (t), "=&r" (old), "=&r" (tmp) : "r" (v), "r" (dec) : "cc", "memory"); - return t; + return t; } #endif diff --git a/opal/include/opal/sys/arm64/atomic.h b/opal/include/opal/sys/arm64/atomic.h index 8cc9313e14f..fd5a773a4f4 100644 --- a/opal/include/opal/sys/arm64/atomic.h +++ b/opal/include/opal/sys/arm64/atomic.h @@ -293,20 +293,20 @@ static inline int opal_atomic_sc_64 (volatile int64_t *addr, int64_t newval) } #define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \ - static inline type opal_atomic_ ## name ## _fetch_ ## bits (volatile type *addr, type value) \ + static inline type opal_atomic_fetch_ ## name ## _ ## bits (volatile type *addr, type value) \ { \ - type newval; \ + type newval, old; \ int32_t tmp; \ \ - __asm__ __volatile__("1: ldxr %" reg "0, [%2] \n" \ - " " inst " %" reg "0, %" reg "0, %" reg "3 \n" \ - " stxr %w1, %" reg "0, [%2] \n" \ - " cbnz %w1, 1b \n" \ - : "=&r" (newval), "=&r" (tmp) \ + __asm__ __volatile__("1: ldxr %" reg "1, [%3] \n" \ + " " inst " %" reg "0, %" reg "1, %" reg "4 \n" \ + " stxr %w2, %" reg "0, [%3] \n" \ + " cbnz %w2, 1b \n" \ + : "=&r" (newval), "=&r" (old), "=&r" (tmp) \ : "r" (addr), "r" (value) \ : "cc", "memory"); \ \ - return newval; \ + return old; \ } OPAL_ASM_MAKE_ATOMIC(int32_t, 32, add, "add", "w") diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index 2a64308f819..53e34333d8c 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -399,36 +399,16 @@ bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t #if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 -/* OPAL_HAVE_INLINE_ATOMIC_*_32 will be 1 if /atomic.h provides - a static inline version of it (in assembly). If we have to fall - back on compare-exchange 32, that too will be inline. */ -#if OPAL_HAVE_INLINE_ATOMIC_ADD_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) -static inline -#endif -int32_t opal_atomic_add_fetch_32(volatile int32_t *addr, int delta); - -#if OPAL_HAVE_INLINE_ATOMIC_AND_32 || (!defined(OPAL_HAVE_ATOMIC_AND_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) -static inline -#endif -int32_t opal_atomic_and_fetch_32(volatile int32_t *addr, int32_t value); - -#if OPAL_HAVE_INLINE_ATOMIC_OR_32 || (!defined(OPAL_HAVE_ATOMIC_OR_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) -static inline -#endif -int32_t opal_atomic_or_fetch_32(volatile int32_t *addr, int32_t value); - -#if OPAL_HAVE_INLINE_ATOMIC_XOR_32 || (!defined(OPAL_HAVE_ATOMIC_XOR_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) -static inline -#endif -int32_t opal_atomic_xor_fetch_32(volatile int32_t *addr, int32_t value); - -/* OPAL_HAVE_INLINE_ATOMIC_*_32 will be 1 if /atomic.h provides - a static inline version of it (in assembly). If we have to fall - back to compare-exchange 32, that too will be inline. */ -#if OPAL_HAVE_INLINE_ATOMIC_SUB_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) -static inline -#endif -int32_t opal_atomic_sub_fetch_32(volatile int32_t *addr, int delta); +static inline int32_t opal_atomic_add_fetch_32(volatile int32_t *addr, int delta); +static inline int32_t opal_atomic_fetch_add_32(volatile int32_t *addr, int delta); +static inline int32_t opal_atomic_and_fetch_32(volatile int32_t *addr, int32_t value); +static inline int32_t opal_atomic_fetch_and_32(volatile int32_t *addr, int32_t value); +static inline int32_t opal_atomic_or_fetch_32(volatile int32_t *addr, int32_t value); +static inline int32_t opal_atomic_fetch_or_32(volatile int32_t *addr, int32_t value); +static inline int32_t opal_atomic_xor_fetch_32(volatile int32_t *addr, int32_t value); +static inline int32_t opal_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value); +static inline int32_t opal_atomic_sub_fetch_32(volatile int32_t *addr, int delta); +static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t *addr, int delta); #endif /* OPAL_HAVE_ATOMIC_MATH_32 */ @@ -445,36 +425,15 @@ int32_t opal_atomic_sub_fetch_32(volatile int32_t *addr, int delta); #if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_64 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 -/* OPAL_HAVE_INLINE_ATOMIC_*_64 will be 1 if /atomic.h provides - a static inline version of it (in assembly). If we have to fall - back to compare-exchange 64, that too will be inline */ -#if OPAL_HAVE_INLINE_ATOMIC_ADD_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) -static inline -#endif -int64_t opal_atomic_add_fetch_64(volatile int64_t *addr, int64_t delta); - -#if OPAL_HAVE_INLINE_ATOMIC_AND_64 || (!defined(OPAL_HAVE_ATOMIC_AND_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) -static inline -#endif -int64_t opal_atomic_and_fetch_64(volatile int64_t *addr, int64_t value); - -#if OPAL_HAVE_INLINE_ATOMIC_OR_64 || (!defined(OPAL_HAVE_ATOMIC_OR_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) -static inline -#endif -int64_t opal_atomic_or_fetch_64(volatile int64_t *addr, int64_t value); - -#if OPAL_HAVE_INLINE_ATOMIC_XOR_64 || (!defined(OPAL_HAVE_ATOMIC_XOR_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) -static inline -#endif -int64_t opal_atomic_xor_fetch_64(volatile int64_t *addr, int64_t value); - -/* OPAL_HAVE_INLINE_ATOMIC_*_64 will be 1 if /atomic.h provides - a static inline version of it (in assembly). If we have to fall - back to compare-exchange 64, that too will be inline */ -#if OPAL_HAVE_INLINE_ATOMIC_SUB_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) -static inline -#endif -int64_t opal_atomic_sub_fetch_64(volatile int64_t *addr, int64_t delta); +static inline int64_t opal_atomic_add_fetch_64(volatile int64_t *addr, int64_t delta); +static inline int64_t opal_atomic_fetch_add_64(volatile int64_t *addr, int64_t delta); +static inline int64_t opal_atomic_and_fetch_64(volatile int64_t *addr, int64_t value); +static inline int64_t opal_atomic_fetch_and_64(volatile int64_t *addr, int64_t value); +static inline int64_t opal_atomic_or_fetch_64(volatile int64_t *addr, int64_t value); +static inline int64_t opal_atomic_fetch_or_64(volatile int64_t *addr, int64_t value); +static inline int64_t opal_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value); +static inline int64_t opal_atomic_sub_fetch_64(volatile int64_t *addr, int64_t delta); +static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t *addr, int64_t delta); #endif /* OPAL_HAVE_ATOMIC_MATH_32 */ @@ -501,6 +460,19 @@ opal_atomic_add_fetch_size_t(volatile size_t *addr, size_t delta) #error "Unknown size_t size" #endif } + +static inline size_t +opal_atomic_fetch_add_size_t(volatile size_t *addr, size_t delta) +{ +#if SIZEOF_SIZE_T == 4 + return (size_t) opal_atomic_fetch_add_32((int32_t*) addr, delta); +#elif SIZEOF_SIZE_T == 8 + return (size_t) opal_atomic_fetch_add_64((int64_t*) addr, delta); +#else +#error "Unknown size_t size" +#endif +} + static inline size_t opal_atomic_sub_fetch_size_t(volatile size_t *addr, size_t delta) { @@ -512,13 +484,30 @@ opal_atomic_sub_fetch_size_t(volatile size_t *addr, size_t delta) #error "Unknown size_t size" #endif } + +static inline size_t +opal_atomic_fetch_sub_size_t(volatile size_t *addr, size_t delta) +{ +#if SIZEOF_SIZE_T == 4 + return (size_t) opal_atomic_fetch_sub_32((int32_t*) addr, delta); +#elif SIZEOF_SIZE_T == 8 + return (size_t) opal_atomic_fetch_sub_64((int64_t*) addr, delta); +#else +#error "Unknown size_t size" +#endif +} + #else #if SIZEOF_SIZE_T == 4 -#define opal_atomic_add_fetch_size_t(addr, delta) ((size_t) opal_atomic_add_fetch_32((int32_t*) addr, delta)) -#define opal_atomic_sub_fetch_size_t(addr, delta) ((size_t) opal_atomic_sub_fetch_32((int32_t*) addr, delta)) -#elif SIZEOF_SIZE_T ==8 -#define opal_atomic_add_fetch_size_t(addr, delta) ((size_t) opal_atomic_add_fetch_64((int64_t*) addr, delta)) -#define opal_atomic_sub_fetch_size_t(addr, delta) ((size_t) opal_atomic_sub_fetch_64((int64_t*) addr, delta)) +#define opal_atomic_add_fetch_size_t(addr, delta) ((size_t) opal_atomic_add_fetch_32((volatile int32_t *) addr, delta)) +#define opal_atomic_fetch_add_size_t(addr, delta) ((size_t) opal_atomic_fetch_add_32((volatile int32_t *) addr, delta)) +#define opal_atomic_sub_fetch_size_t(addr, delta) ((size_t) opal_atomic_sub_fetch_32((volatile int32_t *) addr, delta)) +#define opal_atomic_fetch_sub_size_t(addr, delta) ((size_t) opal_atomic_fetch_sub_32((volatile int32_t *) addr, delta)) +#elif SIZEOF_SIZE_T == 8 +#define opal_atomic_add_fetch_size_t(addr, delta) ((size_t) opal_atomic_add_fetch_64((volatile int64_t *) addr, delta)) +#define opal_atomic_fetch_add_size_t(addr, delta) ((size_t) opal_atomic_fetch_add_64((volatile int64_t *) addr, delta)) +#define opal_atomic_sub_fetch_size_t(addr, delta) ((size_t) opal_atomic_sub_fetch_64((volatile int64_t *) addr, delta)) +#define opal_atomic_fetch_sub_size_t(addr, delta) ((size_t) opal_atomic_fetch_sub_64((volatile int64_t *) addr, delta)) #else #error "Unknown size_t size" #endif @@ -599,19 +588,15 @@ static inline bool opal_atomic_compare_exchange_strong_rel_ptr (volatile void* a #if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64) -static inline void opal_atomic_add_fetch_xx(volatile void* addr, +static inline void opal_atomic_add_xx(volatile void* addr, int32_t value, size_t length); -static inline void opal_atomic_sub_fetch_xx(volatile void* addr, +static inline void opal_atomic_sub_xx(volatile void* addr, int32_t value, size_t length); -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 -static inline int32_t opal_atomic_add_fetch_ptr( volatile void* addr, void* delta ); -static inline int32_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delta ); -#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 -static inline int64_t opal_atomic_add_fetch_ptr( volatile void* addr, void* delta ); -static inline int64_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delta ); -#else -#error Atomic arithmetic on pointers not supported -#endif + +static inline intptr_t opal_atomic_add_fetch_ptr( volatile void* addr, void* delta ); +static inline intptr_t opal_atomic_fetch_add_ptr( volatile void* addr, void* delta ); +static inline intptr_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delta ); +static inline intptr_t opal_atomic_fetch_sub_ptr( volatile void* addr, void* delta ); /** * Atomically increment the content depending on the type. This @@ -623,8 +608,8 @@ static inline int64_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delt * @param addr Address of * @param delta Value to add (converted to ). */ -#define opal_atomic_add_fetch( ADDR, VALUE ) \ - opal_atomic_add_fetch_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \ +#define opal_atomic_add( ADDR, VALUE ) \ + opal_atomic_add_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \ sizeof(*(ADDR)) ) /** @@ -637,8 +622,8 @@ static inline int64_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delt * @param addr Address of * @param delta Value to substract (converted to ). */ -#define opal_atomic_sub_fetch( ADDR, VALUE ) \ - opal_atomic_sub_fetch_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \ +#define opal_atomic_sub( ADDR, VALUE ) \ + opal_atomic_sub_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \ sizeof(*(ADDR)) ) #endif /* OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64 */ diff --git a/opal/include/opal/sys/atomic_impl.h b/opal/include/opal/sys/atomic_impl.h index 8c54a62b627..b3aba9af66b 100644 --- a/opal/include/opal/sys/atomic_impl.h +++ b/opal/include/opal/sys/atomic_impl.h @@ -39,16 +39,15 @@ *********************************************************************/ #if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 -#define OPAL_ATOMIC_DEFINE_CMPXCG_OP(type, bits, operand, name) \ - static inline type opal_atomic_ ## name ## _fetch_ ## bits (volatile type *addr, type value) \ +#define OPAL_ATOMIC_DEFINE_CMPXCG_OP(type, bits, operation, name) \ + static inline type opal_atomic_fetch_ ## name ## _ ## bits (volatile type *addr, type value) \ { \ - type oldval, newval; \ + type oldval; \ do { \ oldval = *addr; \ - newval = oldval operand value; \ - } while (!opal_atomic_compare_exchange_strong_ ## bits (addr, &oldval, newval)); \ + } while (!opal_atomic_compare_exchange_strong_ ## bits (addr, &oldval, oldval operation value)); \ \ - return newval; \ + return oldval; \ } #if !defined(OPAL_HAVE_ATOMIC_SWAP_32) @@ -264,20 +263,19 @@ OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_rel_) #if OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64 - static inline void -opal_atomic_add_fetch_xx(volatile void* addr, int32_t value, size_t length) + opal_atomic_add_xx(volatile void* addr, int32_t value, size_t length) { switch( length ) { #if OPAL_HAVE_ATOMIC_ADD_32 case 4: - opal_atomic_add_fetch_32( (volatile int32_t*)addr, (int32_t)value ); + (void) opal_atomic_fetch_add_32( (volatile int32_t*)addr, (int32_t)value ); break; #endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */ #if OPAL_HAVE_ATOMIC_ADD_64 case 8: - opal_atomic_add_fetch_64( (volatile int64_t*)addr, (int64_t)value ); + (void) opal_atomic_fetch_add_64( (volatile int64_t*)addr, (int64_t)value ); break; #endif /* OPAL_HAVE_ATOMIC_ADD_64 */ default: @@ -289,18 +287,18 @@ opal_atomic_add_fetch_xx(volatile void* addr, int32_t value, size_t length) static inline void -opal_atomic_sub_fetch_xx(volatile void* addr, int32_t value, size_t length) +opal_atomic_sub_xx(volatile void* addr, int32_t value, size_t length) { switch( length ) { #if OPAL_HAVE_ATOMIC_SUB_32 case 4: - opal_atomic_sub_fetch_32( (volatile int32_t*)addr, (int32_t)value ); + (void) opal_atomic_fetch_sub_32( (volatile int32_t*)addr, (int32_t)value ); break; #endif /* OPAL_HAVE_ATOMIC_SUB_32 */ #if OPAL_HAVE_ATOMIC_SUB_64 case 8: - opal_atomic_sub_fetch_64( (volatile int64_t*)addr, (int64_t)value ); + (void) opal_atomic_fetch_sub_64( (volatile int64_t*)addr, (int64_t)value ); break; #endif /* OPAL_HAVE_ATOMIC_SUB_64 */ default: @@ -310,47 +308,77 @@ opal_atomic_sub_fetch_xx(volatile void* addr, int32_t value, size_t length) } } -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32 -static inline int32_t opal_atomic_add_fetch_ptr( volatile void* addr, +#define OPAL_ATOMIC_DEFINE_OP_FETCH(op, operation, type, ptr_type, suffix) \ + static inline type opal_atomic_ ## op ## _fetch_ ## suffix (volatile ptr_type *addr, type value) \ + { \ + return opal_atomic_fetch_ ## op ## _ ## suffix (addr, value) operation value; \ + } + +OPAL_ATOMIC_DEFINE_OP_FETCH(add, +, int32_t, int32_t, 32) +OPAL_ATOMIC_DEFINE_OP_FETCH(and, &, int32_t, int32_t, 32) +OPAL_ATOMIC_DEFINE_OP_FETCH(or, |, int32_t, int32_t, 32) +OPAL_ATOMIC_DEFINE_OP_FETCH(xor, ^, int32_t, int32_t, 32) +OPAL_ATOMIC_DEFINE_OP_FETCH(sub, -, int32_t, int32_t, 32) + +#if OPAL_HAVE_ATOMIC_MATH_64 +OPAL_ATOMIC_DEFINE_OP_FETCH(add, +, int64_t, int64_t, 64) +OPAL_ATOMIC_DEFINE_OP_FETCH(and, &, int64_t, int64_t, 64) +OPAL_ATOMIC_DEFINE_OP_FETCH(or, |, int64_t, int64_t, 64) +OPAL_ATOMIC_DEFINE_OP_FETCH(xor, ^, int64_t, int64_t, 64) +OPAL_ATOMIC_DEFINE_OP_FETCH(sub, -, int64_t, int64_t, 64) +#endif + +static inline intptr_t opal_atomic_fetch_add_ptr( volatile void* addr, void* delta ) { - return opal_atomic_add_fetch_32((int32_t*) addr, (unsigned long) delta); -} +#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32 + return opal_atomic_fetch_add_32((int32_t*) addr, (unsigned long) delta); #elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_ADD_64 -static inline int64_t opal_atomic_add_fetch_ptr( volatile void* addr, + return opal_atomic_fetch_add_64((int64_t*) addr, (unsigned long) delta); +#else + abort (); + return 0; +#endif +} + +static inline intptr_t opal_atomic_add_fetch_ptr( volatile void* addr, void* delta ) { +#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32 + return opal_atomic_add_fetch_32((int32_t*) addr, (unsigned long) delta); +#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_ADD_64 return opal_atomic_add_fetch_64((int64_t*) addr, (unsigned long) delta); -} #else -static inline int32_t opal_atomic_add_fetch_ptr( volatile void* addr, + abort (); + return 0; +#endif +} + +static inline intptr_t opal_atomic_fetch_sub_ptr( volatile void* addr, void* delta ) { +#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32 + return opal_atomic_fetch_sub_32((int32_t*) addr, (unsigned long) delta); +#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SUB_32 + return opal_atomic_fetch_sub_64((int64_t*) addr, (unsigned long) delta); +#else abort(); return 0; -} #endif +} -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32 -static inline int32_t opal_atomic_sub_fetch_ptr( volatile void* addr, +static inline intptr_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delta ) { +#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32 return opal_atomic_sub_fetch_32((int32_t*) addr, (unsigned long) delta); -} #elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SUB_32 -static inline int64_t opal_atomic_sub_fetch_ptr( volatile void* addr, - void* delta ) -{ return opal_atomic_sub_fetch_64((int64_t*) addr, (unsigned long) delta); -} #else -static inline int32_t opal_atomic_sub_fetch_ptr( volatile void* addr, - void* delta ) -{ abort(); return 0; -} #endif +} #endif /* OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64 */ diff --git a/opal/include/opal/sys/gcc_builtin/atomic.h b/opal/include/opal/sys/gcc_builtin/atomic.h index ecef65f3d07..c6ef6eb9c30 100644 --- a/opal/include/opal/sys/gcc_builtin/atomic.h +++ b/opal/include/opal/sys/gcc_builtin/atomic.h @@ -104,29 +104,29 @@ static inline int32_t opal_atomic_swap_32 (volatile int32_t *addr, int32_t newva return oldval; } -static inline int32_t opal_atomic_add_fetch_32(volatile int32_t *addr, int32_t delta) +static inline int32_t opal_atomic_fetch_add_32(volatile int32_t *addr, int32_t delta) { - return __atomic_add_fetch (addr, delta, __ATOMIC_RELAXED); + return __atomic_fetch_add (addr, delta, __ATOMIC_RELAXED); } -static inline int32_t opal_atomic_and_fetch_32(volatile int32_t *addr, int32_t value) +static inline int32_t opal_atomic_fetch_and_32(volatile int32_t *addr, int32_t value) { - return __atomic_and_fetch (addr, value, __ATOMIC_RELAXED); + return __atomic_fetch_and (addr, value, __ATOMIC_RELAXED); } -static inline int32_t opal_atomic_or_fetch_32(volatile int32_t *addr, int32_t value) +static inline int32_t opal_atomic_fetch_or_32(volatile int32_t *addr, int32_t value) { - return __atomic_or_fetch (addr, value, __ATOMIC_RELAXED); + return __atomic_fetch_or (addr, value, __ATOMIC_RELAXED); } -static inline int32_t opal_atomic_xor_fetch_32(volatile int32_t *addr, int32_t value) +static inline int32_t opal_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value) { - return __atomic_xor_fetch (addr, value, __ATOMIC_RELAXED); + return __atomic_fetch_xor (addr, value, __ATOMIC_RELAXED); } -static inline int32_t opal_atomic_sub_fetch_32(volatile int32_t *addr, int32_t delta) +static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t *addr, int32_t delta) { - return __atomic_sub_fetch (addr, delta, __ATOMIC_RELAXED); + return __atomic_fetch_sub (addr, delta, __ATOMIC_RELAXED); } static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) @@ -152,29 +152,29 @@ static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newva return oldval; } -static inline int64_t opal_atomic_add_fetch_64(volatile int64_t *addr, int64_t delta) +static inline int64_t opal_atomic_fetch_add_64(volatile int64_t *addr, int64_t delta) { - return __atomic_add_fetch (addr, delta, __ATOMIC_RELAXED); + return __atomic_fetch_add (addr, delta, __ATOMIC_RELAXED); } -static inline int64_t opal_atomic_and_fetch_64(volatile int64_t *addr, int64_t value) +static inline int64_t opal_atomic_fetch_and_64(volatile int64_t *addr, int64_t value) { - return __atomic_and_fetch (addr, value, __ATOMIC_RELAXED); + return __atomic_fetch_and (addr, value, __ATOMIC_RELAXED); } -static inline int64_t opal_atomic_or_fetch_64(volatile int64_t *addr, int64_t value) +static inline int64_t opal_atomic_fetch_or_64(volatile int64_t *addr, int64_t value) { - return __atomic_or_fetch (addr, value, __ATOMIC_RELAXED); + return __atomic_fetch_or (addr, value, __ATOMIC_RELAXED); } -static inline int64_t opal_atomic_xor_fetch_64(volatile int64_t *addr, int64_t value) +static inline int64_t opal_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value) { - return __atomic_xor_fetch (addr, value, __ATOMIC_RELAXED); + return __atomic_fetch_xor (addr, value, __ATOMIC_RELAXED); } -static inline int64_t opal_atomic_sub_fetch_64(volatile int64_t *addr, int64_t delta) +static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t *addr, int64_t delta) { - return __atomic_sub_fetch (addr, delta, __ATOMIC_RELAXED); + return __atomic_fetch_sub (addr, delta, __ATOMIC_RELAXED); } #if OPAL_HAVE_GCC_BUILTIN_CSWAP_INT128 diff --git a/opal/include/opal/sys/ia32/atomic.h b/opal/include/opal/sys/ia32/atomic.h index 6b71dd4f2e3..bb863dec14a 100644 --- a/opal/include/opal/sys/ia32/atomic.h +++ b/opal/include/opal/sys/ia32/atomic.h @@ -130,7 +130,7 @@ static inline int32_t opal_atomic_swap_32( volatile int32_t *addr, * * Atomically adds @i to @v. */ -static inline int32_t opal_atomic_add_fetch_32(volatile int32_t* v, int i) +static inline int32_t opal_atomic_fetch_add_32(volatile int32_t* v, int i) { int ret = i; __asm__ __volatile__( @@ -139,7 +139,7 @@ static inline int32_t opal_atomic_add_fetch_32(volatile int32_t* v, int i) : :"memory", "cc" ); - return (ret+i); + return ret; } @@ -150,7 +150,7 @@ static inline int32_t opal_atomic_add_fetch_32(volatile int32_t* v, int i) * * Atomically subtracts @i from @v. */ -static inline int32_t opal_atomic_sub_fetch_32(volatile int32_t* v, int i) +static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t* v, int i) { int ret = -i; __asm__ __volatile__( @@ -159,7 +159,7 @@ static inline int32_t opal_atomic_sub_fetch_32(volatile int32_t* v, int i) : :"memory", "cc" ); - return (ret-i); + return ret; } #endif /* OPAL_GCC_INLINE_ASSEMBLY */ diff --git a/opal/include/opal/sys/powerpc/atomic.h b/opal/include/opal/sys/powerpc/atomic.h index 4aebb43fdd9..bf6978aa852 100644 --- a/opal/include/opal/sys/powerpc/atomic.h +++ b/opal/include/opal/sys/powerpc/atomic.h @@ -235,20 +235,20 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval #if OPAL_GCC_INLINE_ASSEMBLY #define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(type, instr) \ -static inline int64_t opal_atomic_ ## type ## _fetch_64(volatile int64_t* v, int64_t val) \ +static inline int64_t opal_atomic_fetch_ ## type ## _64(volatile int64_t* v, int64_t val) \ { \ - int64_t t; \ + int64_t t, old; \ \ __asm__ __volatile__( \ - "1: ldarx %0, 0, %3 \n\t" \ - " " #instr " %0, %2, %0 \n\t" \ - " stdcx. %0, 0, %3 \n\t" \ + "1: ldarx %1, 0, %4 \n\t" \ + " " #instr " %0, %3, %1 \n\t" \ + " stdcx. %0, 0, %4 \n\t" \ " bne- 1b \n\t" \ - : "=&r" (t), "=m" (*v) \ + : "=&r" (t), "=&r" (old), "=m" (*v) \ : "r" (OPAL_ASM_VALUE64(val)), "r" OPAL_ASM_ADDR(v), "m" (*v) \ : "cc"); \ \ - return t; \ + return old; \ } OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(add, add) @@ -396,16 +396,16 @@ static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t #define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(type, instr) \ -static inline int32_t opal_atomic_ ## type ## _fetch_32(volatile int32_t* v, int val) \ +static inline int32_t opal_atomic_fetch_ ## type ## _32(volatile int32_t* v, int val) \ { \ - int32_t t; \ + int32_t t, old; \ \ __asm__ __volatile__( \ - "1: lwarx %0, 0, %3 \n\t" \ - " " #instr " %0, %2, %0 \n\t" \ - " stwcx. %0, 0, %3 \n\t" \ + "1: lwarx %1, 0, %4 \n\t" \ + " " #instr " %0, %3, %1 \n\t" \ + " stwcx. %0, 0, %4 \n\t" \ " bne- 1b \n\t" \ - : "=&r" (t), "=m" (*v) \ + : "=&r" (t), "=&r" (old), "=m" (*v) \ : "r" (val), "r" OPAL_ASM_ADDR(v), "m" (*v) \ : "cc"); \ \ diff --git a/opal/include/opal/sys/sync_builtin/atomic.h b/opal/include/opal/sys/sync_builtin/atomic.h index f80a29684c4..4a6cfbfbe06 100644 --- a/opal/include/opal/sys/sync_builtin/atomic.h +++ b/opal/include/opal/sys/sync_builtin/atomic.h @@ -69,33 +69,33 @@ static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *add #define OPAL_HAVE_ATOMIC_MATH_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 -static inline int32_t opal_atomic_add_fetch_32(volatile int32_t *addr, int32_t delta) +static inline int32_t opal_atomic_fetch_add_32(volatile int32_t *addr, int32_t delta) { - return __sync_add_and_fetch(addr, delta); + return __sync_fetch_and_add(addr, delta); } #define OPAL_HAVE_ATOMIC_AND_32 1 -static inline int32_t opal_atomic_and_fetch_32(volatile int32_t *addr, int32_t value) +static inline int32_t opal_atomic_fetch_and_32(volatile int32_t *addr, int32_t value) { - return __sync_and_and_fetch(addr, value); + return __sync_fetch_and_and(addr, value); } #define OPAL_HAVE_ATOMIC_OR_32 1 -static inline int32_t opal_atomic_or_fetch_32(volatile int32_t *addr, int32_t value) +static inline int32_t opal_atomic_fetch_or_32(volatile int32_t *addr, int32_t value) { - return __sync_or_and_fetch(addr, value); + return __sync_fetch_and_or(addr, value); } #define OPAL_HAVE_ATOMIC_XOR_32 1 -static inline int32_t opal_atomic_xor_fetch_32(volatile int32_t *addr, int32_t value) +static inline int32_t opal_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value) { - return __sync_xor_and_fetch(addr, value); + return __sync_fetch_and_xor(addr, value); } #define OPAL_HAVE_ATOMIC_SUB_32 1 -static inline int32_t opal_atomic_sub_fetch_32(volatile int32_t *addr, int32_t delta) +static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t *addr, int32_t delta) { - return __sync_sub_and_fetch(addr, delta); + return __sync_fetch_and_sub(addr, delta); } #if OPAL_ASM_SYNC_HAVE_64BIT @@ -115,33 +115,33 @@ static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *add #define OPAL_HAVE_ATOMIC_MATH_64 1 #define OPAL_HAVE_ATOMIC_ADD_64 1 -static inline int64_t opal_atomic_add_fetch_64(volatile int64_t *addr, int64_t delta) +static inline int64_t opal_atomic_fetch_add_64(volatile int64_t *addr, int64_t delta) { - return __sync_add_and_fetch(addr, delta); + return __sync_fetch_and_add(addr, delta); } #define OPAL_HAVE_ATOMIC_AND_64 1 -static inline int64_t opal_atomic_and_fetch_64(volatile int64_t *addr, int64_t value) +static inline int64_t opal_atomic_fetch_and_64(volatile int64_t *addr, int64_t value) { - return __sync_and_and_fetch(addr, value); + return __sync_fetch_and_and(addr, value); } #define OPAL_HAVE_ATOMIC_OR_64 1 -static inline int64_t opal_atomic_or_fetch_64(volatile int64_t *addr, int64_t value) +static inline int64_t opal_atomic_fetch_or_64(volatile int64_t *addr, int64_t value) { - return __sync_or_and_fetch(addr, value); + return __sync_fetch_and_or(addr, value); } #define OPAL_HAVE_ATOMIC_XOR_64 1 -static inline int64_t opal_atomic_xor_fetch_64(volatile int64_t *addr, int64_t value) +static inline int64_t opal_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value) { - return __sync_xor_and_fetch(addr, value); + return __sync_fetch_and_xor(addr, value); } #define OPAL_HAVE_ATOMIC_SUB_64 1 -static inline int64_t opal_atomic_sub_fetch_64(volatile int64_t *addr, int64_t delta) +static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t *addr, int64_t delta) { - return __sync_sub_and_fetch(addr, delta); + return __sync_fetch_and_sub(addr, delta); } #endif diff --git a/opal/include/opal/sys/x86_64/atomic.h b/opal/include/opal/sys/x86_64/atomic.h index 046b4ad7d25..9590ada0816 100644 --- a/opal/include/opal/sys/x86_64/atomic.h +++ b/opal/include/opal/sys/x86_64/atomic.h @@ -196,7 +196,7 @@ static inline int64_t opal_atomic_swap_64( volatile int64_t *addr, * * Atomically adds @i to @v. */ -static inline int32_t opal_atomic_add_fetch_32(volatile int32_t* v, int i) +static inline int32_t opal_atomic_fetch_add_32(volatile int32_t* v, int i) { int ret = i; __asm__ __volatile__( @@ -205,7 +205,7 @@ static inline int32_t opal_atomic_add_fetch_32(volatile int32_t* v, int i) : :"memory", "cc" ); - return (ret+i); + return ret; } #define OPAL_HAVE_ATOMIC_ADD_64 1 @@ -217,7 +217,7 @@ static inline int32_t opal_atomic_add_fetch_32(volatile int32_t* v, int i) * * Atomically adds @i to @v. */ -static inline int64_t opal_atomic_add_fetch_64(volatile int64_t* v, int64_t i) +static inline int64_t opal_atomic_fetch_add_64(volatile int64_t* v, int64_t i) { int64_t ret = i; __asm__ __volatile__( @@ -226,7 +226,7 @@ static inline int64_t opal_atomic_add_fetch_64(volatile int64_t* v, int64_t i) : :"memory", "cc" ); - return (ret+i); + return ret; } #define OPAL_HAVE_ATOMIC_SUB_32 1 @@ -238,7 +238,7 @@ static inline int64_t opal_atomic_add_fetch_64(volatile int64_t* v, int64_t i) * * Atomically subtracts @i from @v. */ -static inline int32_t opal_atomic_sub_fetch_32(volatile int32_t* v, int i) +static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t* v, int i) { int ret = -i; __asm__ __volatile__( @@ -247,7 +247,7 @@ static inline int32_t opal_atomic_sub_fetch_32(volatile int32_t* v, int i) : :"memory", "cc" ); - return (ret-i); + return ret; } #define OPAL_HAVE_ATOMIC_SUB_64 1 @@ -259,7 +259,7 @@ static inline int32_t opal_atomic_sub_fetch_32(volatile int32_t* v, int i) * * Atomically subtracts @i from @v. */ -static inline int64_t opal_atomic_sub_fetch_64(volatile int64_t* v, int64_t i) +static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t* v, int64_t i) { int64_t ret = -i; __asm__ __volatile__( @@ -268,7 +268,7 @@ static inline int64_t opal_atomic_sub_fetch_64(volatile int64_t* v, int64_t i) : :"memory", "cc" ); - return (ret-i); + return ret; } #endif /* OPAL_GCC_INLINE_ASSEMBLY */ diff --git a/opal/mca/btl/vader/btl_vader_xpmem.c b/opal/mca/btl/vader/btl_vader_xpmem.c index 7f8cd7da73c..00275df48cb 100644 --- a/opal/mca/btl/vader/btl_vader_xpmem.c +++ b/opal/mca/btl/vader/btl_vader_xpmem.c @@ -54,7 +54,7 @@ static int vader_check_reg (mca_rcache_base_registration_t *reg, void *ctx) vader_ctx->reg[0] = reg; if (vader_ctx->bound <= (uintptr_t) reg->bound && vader_ctx->base >= (uintptr_t) reg->base) { - (void)opal_atomic_add_fetch (®->ref_count, 1); + opal_atomic_add (®->ref_count, 1); return 1; } diff --git a/opal/mca/mpool/hugepage/mpool_hugepage_module.c b/opal/mca/mpool/hugepage/mpool_hugepage_module.c index 6af7003563f..89a8b7eb6d3 100644 --- a/opal/mca/mpool/hugepage/mpool_hugepage_module.c +++ b/opal/mca/mpool/hugepage/mpool_hugepage_module.c @@ -183,7 +183,7 @@ void *mca_mpool_hugepage_seg_alloc (void *ctx, size_t *sizep) opal_mutex_lock (&hugepage_module->lock); opal_rb_tree_insert (&hugepage_module->allocation_tree, base, (void *) (intptr_t) size); - opal_atomic_add_fetch (&mca_mpool_hugepage_component.bytes_allocated, (int64_t) size); + opal_atomic_add (&mca_mpool_hugepage_component.bytes_allocated, (int64_t) size); opal_mutex_unlock (&hugepage_module->lock); OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_mpool_base_framework.framework_verbose, @@ -207,7 +207,7 @@ void mca_mpool_hugepage_seg_free (void *ctx, void *addr) OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_mpool_base_framework.framework_verbose, "freeing segment %p of size %lu bytes", addr, size)); munmap (addr, size); - opal_atomic_add_fetch (&mca_mpool_hugepage_component.bytes_allocated, -(int64_t) size); + opal_atomic_add (&mca_mpool_hugepage_component.bytes_allocated, -(int64_t) size); } opal_mutex_unlock (&hugepage_module->lock); diff --git a/opal/threads/thread_usage.h b/opal/threads/thread_usage.h index 00752f0605e..85492d5f891 100644 --- a/opal/threads/thread_usage.h +++ b/opal/threads/thread_usage.h @@ -93,54 +93,26 @@ static inline bool opal_set_using_threads(bool have) * indicates that threads are in use by the application or library. */ -#define OPAL_THREAD_DEFINE_ATOMIC_ADD(type, suffix) \ -static inline type opal_thread_add_fetch_ ## suffix (volatile type *addr, type delta) \ +#define OPAL_THREAD_DEFINE_ATOMIC_OP(type, name, operator, suffix) \ +static inline type opal_thread_ ## name ## _fetch_ ## suffix (volatile type *addr, type delta) \ { \ if (OPAL_UNLIKELY(opal_using_threads())) { \ - return opal_atomic_add_fetch_ ## suffix (addr, delta); \ + return opal_atomic_ ## name ## _fetch_ ## suffix (addr, delta); \ } \ \ - return (*addr += delta); \ -} - -#define OPAL_THREAD_DEFINE_ATOMIC_AND(type, suffix) \ -static inline type opal_thread_and_fetch_ ## suffix (volatile type *addr, type delta) \ -{ \ - if (OPAL_UNLIKELY(opal_using_threads())) { \ - return opal_atomic_and_fetch_ ## suffix (addr, delta); \ - } \ - \ - return (*addr &= delta); \ -} - -#define OPAL_THREAD_DEFINE_ATOMIC_OR(type, suffix) \ -static inline type opal_thread_or_fetch_ ## suffix (volatile type *addr, type delta) \ -{ \ - if (OPAL_UNLIKELY(opal_using_threads())) { \ - return opal_atomic_or_fetch_ ## suffix (addr, delta); \ - } \ + *addr = *addr operator delta; \ + return *addr; \ +} \ \ - return (*addr |= delta); \ -} - -#define OPAL_THREAD_DEFINE_ATOMIC_XOR(type, suffix) \ -static inline type opal_thread_xor_fetch_ ## suffix (volatile type *addr, type delta) \ +static inline type opal_thread_fetch_ ## name ## _ ## suffix (volatile type *addr, type delta) \ { \ if (OPAL_UNLIKELY(opal_using_threads())) { \ - return opal_atomic_xor_fetch_ ## suffix (addr, delta); \ + return opal_atomic_fetch_ ## name ## _ ## suffix (addr, delta); \ } \ \ - return (*addr ^= delta); \ -} - -#define OPAL_THREAD_DEFINE_ATOMIC_SUB(type, suffix) \ -static inline type opal_thread_sub_fetch_ ## suffix (volatile type *addr, type delta) \ -{ \ - if (OPAL_UNLIKELY(opal_using_threads())) { \ - return opal_atomic_sub_fetch_ ## suffix (addr, delta); \ - } \ - \ - return (*addr -= delta); \ + type old = *addr; \ + *addr = old operator delta; \ + return old; \ } #define OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(type, addr_type, suffix) \ @@ -173,13 +145,14 @@ static inline type opal_thread_swap_ ## suffix (volatile addr_type *ptr, type ne return old; \ } -OPAL_THREAD_DEFINE_ATOMIC_ADD(int32_t, 32) -OPAL_THREAD_DEFINE_ATOMIC_ADD(size_t, size_t) -OPAL_THREAD_DEFINE_ATOMIC_AND(int32_t, 32) -OPAL_THREAD_DEFINE_ATOMIC_OR(int32_t, 32) -OPAL_THREAD_DEFINE_ATOMIC_XOR(int32_t, 32) -OPAL_THREAD_DEFINE_ATOMIC_SUB(int32_t, 32) -OPAL_THREAD_DEFINE_ATOMIC_SUB(size_t, size_t) +OPAL_THREAD_DEFINE_ATOMIC_OP(int32_t, add, +, 32) +OPAL_THREAD_DEFINE_ATOMIC_OP(size_t, add, +, size_t) +OPAL_THREAD_DEFINE_ATOMIC_OP(int32_t, and, &, 32) +OPAL_THREAD_DEFINE_ATOMIC_OP(int32_t, or, |, 32) +OPAL_THREAD_DEFINE_ATOMIC_OP(int32_t, xor, ^, 32) +OPAL_THREAD_DEFINE_ATOMIC_OP(int32_t, sub, -, 32) +OPAL_THREAD_DEFINE_ATOMIC_OP(size_t, sub, -, size_t) + OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(int32_t, int32_t, 32) OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(void *, intptr_t, ptr) OPAL_THREAD_DEFINE_ATOMIC_SWAP(int32_t, int32_t, 32) @@ -203,6 +176,24 @@ OPAL_THREAD_DEFINE_ATOMIC_SWAP(void *, intptr_t, ptr) #define OPAL_THREAD_SUB_FETCH_SIZE_T opal_thread_sub_fetch_size_t #define OPAL_ATOMIC_SUB_FETCH_SIZE_T opal_thread_sub_fetch_size_t +#define OPAL_THREAD_FETCH_ADD32 opal_thread_fetch_add_32 +#define OPAL_ATOMIC_FETCH_ADD32 opal_thread_fetch_add_32 + +#define OPAL_THREAD_FETCH_AND32 opal_thread_fetch_and_32 +#define OPAL_ATOMIC_FETCH_AND32 opal_thread_fetch_and_32 + +#define OPAL_THREAD_FETCH_OR32 opal_thread_fetch_or_32 +#define OPAL_ATOMIC_FETCH_OR32 opal_thread_fetch_or_32 + +#define OPAL_THREAD_FETCH_XOR32 opal_thread_fetch_xor_32 +#define OPAL_ATOMIC_FETCH_XOR32 opal_thread_fetch_xor_32 + +#define OPAL_THREAD_FETCH_ADD_SIZE_T opal_thread_fetch_add_size_t +#define OPAL_ATOMIC_FETCH_ADD_SIZE_T opal_thread_fetch_add_size_t + +#define OPAL_THREAD_FETCH_SUB_SIZE_T opal_thread_fetch_sub_size_t +#define OPAL_ATOMIC_FETCH_SUB_SIZE_T opal_thread_fetch_sub_size_t + #define OPAL_THREAD_COMPARE_EXCHANGE_STRONG_32 opal_thread_compare_exchange_strong_32 #define OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32 opal_thread_compare_exchange_strong_32 @@ -218,10 +209,11 @@ OPAL_THREAD_DEFINE_ATOMIC_SWAP(void *, intptr_t, ptr) /* define 64-bit macros is 64-bit atomic math is available */ #if OPAL_HAVE_ATOMIC_MATH_64 -OPAL_THREAD_DEFINE_ATOMIC_ADD(int64_t, 64) -OPAL_THREAD_DEFINE_ATOMIC_AND(int64_t, 64) -OPAL_THREAD_DEFINE_ATOMIC_OR(int64_t, 64) -OPAL_THREAD_DEFINE_ATOMIC_XOR(int64_t, 64) +OPAL_THREAD_DEFINE_ATOMIC_OP(int64_t, add, +, 64) +OPAL_THREAD_DEFINE_ATOMIC_OP(int64_t, and, &, 64) +OPAL_THREAD_DEFINE_ATOMIC_OP(int64_t, or, |, 64) +OPAL_THREAD_DEFINE_ATOMIC_OP(int64_t, xor, ^, 64) +OPAL_THREAD_DEFINE_ATOMIC_OP(int64_t, sub, -, 64) OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(int64_t, int64_t, 64) OPAL_THREAD_DEFINE_ATOMIC_SWAP(int64_t, int64_t, 64) @@ -237,6 +229,18 @@ OPAL_THREAD_DEFINE_ATOMIC_SWAP(int64_t, int64_t, 64) #define OPAL_THREAD_XOR_FETCH64 opal_thread_xor_fetch_64 #define OPAL_ATOMIC_XOR_FETCH64 opal_thread_xor_fetch_64 +#define OPAL_THREAD_FETCH_ADD64 opal_thread_fetch_add_64 +#define OPAL_ATOMIC_FETCH_ADD64 opal_thread_fetch_add_64 + +#define OPAL_THREAD_FETCH_AND64 opal_thread_fetch_and_64 +#define OPAL_ATOMIC_FETCH_AND64 opal_thread_fetch_and_64 + +#define OPAL_THREAD_FETCH_OR64 opal_thread_fetch_or_64 +#define OPAL_ATOMIC_FETCH_OR64 opal_thread_fetch_or_64 + +#define OPAL_THREAD_FETCH_XOR64 opal_thread_fetch_xor_64 +#define OPAL_ATOMIC_FETCH_XOR64 opal_thread_fetch_xor_64 + #define OPAL_THREAD_COMPARE_EXCHANGE_STRONG_64 opal_thread_compare_exchange_strong_64 #define OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_64 opal_thread_compare_exchange_strong_64 diff --git a/test/asm/atomic_cmpset.c b/test/asm/atomic_cmpset.c index d19847cb198..4a06847703f 100644 --- a/test/asm/atomic_cmpset.c +++ b/test/asm/atomic_cmpset.c @@ -86,7 +86,7 @@ static void *thread_main(void *arg) #if OPAL_HAVE_ATOMIC_MATH_64 opal_atomic_add_fetch_64(&val64, 5); #endif - opal_atomic_add_fetch(&valint, 5); + opal_atomic_add (&valint, 5); } return (void *) (unsigned long) (rank + 1000); @@ -286,7 +286,7 @@ int main(int argc, char *argv[]) /* -- add_int tests -- */ valint = 42; - opal_atomic_add_fetch(&valint, 5); + opal_atomic_add (&valint, 5); opal_atomic_rmb(); assert((42 + 5) == valint); diff --git a/test/asm/atomic_math.c b/test/asm/atomic_math.c index e489553d4a4..54f771cc26b 100644 --- a/test/asm/atomic_math.c +++ b/test/asm/atomic_math.c @@ -48,7 +48,7 @@ static void* atomic_math_test(void* arg) #if OPAL_HAVE_ATOMIC_MATH_64 (void)opal_atomic_add_fetch_64(&val64, 6); #endif - (void)opal_atomic_add_fetch(&valint, 4); + opal_atomic_add (&valint, 4); } return NULL; @@ -100,6 +100,10 @@ atomic_math_test_th(int count, int thr_count) int main(int argc, char *argv[]) { + int32_t test32; +#if OPAL_HAVE_ATOMIC_MATH_64 + int64_t test64; +#endif int ret = 77; int num_threads = 1; @@ -109,6 +113,142 @@ main(int argc, char *argv[]) } num_threads = atoi(argv[1]); + test32 = opal_atomic_add_fetch_32 (&val32, 17); + if (test32 != 17 || val32 != 17) { + fprintf (stderr, "error in opal_atomic_add_fetch_32. expected (17, 17), got (%d, %d)\n", test32, val32); + exit(EXIT_FAILURE); + } + + test32 = opal_atomic_fetch_add_32 (&val32, 13); + if (test32 != 17 || val32 != 30) { + fprintf (stderr, "error in opal_atomic_fetch_add_32. expected (17, 30), got (%d, %d)\n", test32, val32); + exit(EXIT_FAILURE); + } + + + + test32 = opal_atomic_and_fetch_32 (&val32, 0x18); + if (test32 != 24 || val32 != 24) { + fprintf (stderr, "error in opal_atomic_and_fetch_32. expected (24, 24), got (%d, %d)\n", test32, val32); + exit(EXIT_FAILURE); + } + + test32 = opal_atomic_fetch_and_32 (&val32, 0x10); + if (test32 != 24 || val32 != 16) { + fprintf (stderr, "error in opal_atomic_fetch_and_32. expected (24, 16), got (%d, %d)\n", test32, val32); + exit(EXIT_FAILURE); + } + + + + test32 = opal_atomic_or_fetch_32 (&val32, 0x03); + if (test32 != 19 || val32 != 19) { + fprintf (stderr, "error in opal_atomic_or_fetch_32. expected (19, 19), got (%d, %d)\n", test32, val32); + exit(EXIT_FAILURE); + } + + test32 = opal_atomic_fetch_or_32 (&val32, 0x04); + if (test32 != 19 || val32 != 23) { + fprintf (stderr, "error in opal_atomic_fetch_or_32. expected (19, 23), got (%d, %d)\n", test32, val32); + exit(EXIT_FAILURE); + } + + + test32 = opal_atomic_xor_fetch_32 (&val32, 0x03); + if (test32 != 20 || val32 != 20) { + fprintf (stderr, "error in opal_atomic_xor_fetch_32. expected (20, 20), got (%d, %d)\n", test32, val32); + exit(EXIT_FAILURE); + } + + test32 = opal_atomic_fetch_xor_32 (&val32, 0x05); + if (test32 != 20 || val32 != 17) { + fprintf (stderr, "error in opal_atomic_fetch_xor_32. expected (20, 17), got (%d, %d)\n", test32, val32); + exit(EXIT_FAILURE); + } + + + + test32 = opal_atomic_sub_fetch_32 (&val32, 14); + if (test32 != 3 || val32 != 3) { + fprintf (stderr, "error in opal_atomic_sub_fetch_32. expected (3, 3), got (%d, %d)\n", test32, val32); + exit(EXIT_FAILURE); + } + + test32 = opal_atomic_fetch_xor_32 (&val32, 3); + if (test32 != 3 || val32 != 0) { + fprintf (stderr, "error in opal_atomic_fetch_sub_32. expected (3, 0), got (%d, %d)\n", test32, val32); + exit(EXIT_FAILURE); + } + +#if OPAL_HAVE_ATOMIC_MATH_64 + test64 = opal_atomic_add_fetch_64 (&val64, 17); + if (test64 != 17 || val64 != 17) { + fprintf (stderr, "error in opal_atomic_add_fetch_64. expected (17, 17), got (%" PRId64 ", %" PRId64 ")\n", test64, val64); + exit(EXIT_FAILURE); + } + + test64 = opal_atomic_fetch_add_64 (&val64, 13); + if (test64 != 17 || val64 != 30) { + fprintf (stderr, "error in opal_atomic_fetch_add_64. expected (17, 30), got (%" PRId64 ", %" PRId64 ")\n", test64, val64); + exit(EXIT_FAILURE); + } + + + + test64 = opal_atomic_and_fetch_64 (&val64, 0x18); + if (test64 != 24 || val64 != 24) { + fprintf (stderr, "error in opal_atomic_and_fetch_64. expected (24, 24), got (%" PRId64 ", %" PRId64 ")\n", test64, val64); + exit(EXIT_FAILURE); + } + + test64 = opal_atomic_fetch_and_64 (&val64, 0x10); + if (test64 != 24 || val64 != 16) { + fprintf (stderr, "error in opal_atomic_fetch_and_64. expected (24, 16), got (%" PRId64 ", %" PRId64 ")\n", test64, val64); + exit(EXIT_FAILURE); + } + + + + test64 = opal_atomic_or_fetch_64 (&val64, 0x03); + if (test64 != 19 || val64 != 19) { + fprintf (stderr, "error in opal_atomic_or_fetch_64. expected (19, 19), got (%" PRId64 ", %" PRId64 ")\n", test64, val64); + exit(EXIT_FAILURE); + } + + test64 = opal_atomic_fetch_or_64 (&val64, 0x04); + if (test64 != 19 || val64 != 23) { + fprintf (stderr, "error in opal_atomic_fetch_or_64. expected (19, 23), got (%" PRId64 ", %" PRId64 ")\n", test64, val64); + exit(EXIT_FAILURE); + } + + + test64 = opal_atomic_xor_fetch_64 (&val64, 0x03); + if (test64 != 20 || val64 != 20) { + fprintf (stderr, "error in opal_atomic_xor_fetch_64. expected (20, 20), got (%" PRId64 ", %" PRId64 ")\n", test64, val64); + exit(EXIT_FAILURE); + } + + test64 = opal_atomic_fetch_xor_64 (&val64, 0x05); + if (test64 != 20 || val64 != 17) { + fprintf (stderr, "error in opal_atomic_fetch_xor_64. expected (20, 17), got (%" PRId64 ", %" PRId64 ")\n", test64, val64); + exit(EXIT_FAILURE); + } + + + + test64 = opal_atomic_sub_fetch_64 (&val64, 14); + if (test64 != 3 || val64 != 3) { + fprintf (stderr, "error in opal_atomic_sub_fetch_64. expected (3, 3), got (%" PRId64 ", %" PRId64 ")\n", test64, val64); + exit(EXIT_FAILURE); + } + + test64 = opal_atomic_fetch_xor_64 (&val64, 3); + if (test64 != 3 || val64 != 0) { + fprintf (stderr, "error in opal_atomic_fetch_sub_64. expected (3, 0), got (%" PRId64 ", %" PRId64 ")\n", test64, val64); + exit(EXIT_FAILURE); + } +#endif + ret = atomic_math_test_th(TEST_REPS, num_threads); if (ret == 77) return ret; opal_atomic_mb(); diff --git a/test/threads/opal_thread.c b/test/threads/opal_thread.c index f8a743a5352..169c8b5984c 100644 --- a/test/threads/opal_thread.c +++ b/test/threads/opal_thread.c @@ -36,13 +36,13 @@ static volatile int count = 0; static void* thr1_run(opal_object_t* obj) { - (void)opal_atomic_add_fetch(&count, 1); + opal_atomic_add (&count, 1); return NULL; } static void* thr2_run(opal_object_t* obj) { - (void)opal_atomic_add_fetch(&count, 2); + opal_atomic_add (&count, 2); return NULL; }