-
Notifications
You must be signed in to change notification settings - Fork 906
opal/asm: change ll/sc atomics to macros #5209
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -162,28 +162,31 @@ static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t | |
return ret; | ||
} | ||
|
||
static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr) | ||
{ | ||
int32_t ret; | ||
|
||
__asm__ __volatile__ ("ldaxr %w0, [%1] \n" | ||
: "=&r" (ret) | ||
: "r" (addr)); | ||
|
||
return ret; | ||
} | ||
|
||
static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval) | ||
{ | ||
int ret; | ||
|
||
__asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n" | ||
: "=&r" (ret) | ||
: "r" (addr), "r" (newval) | ||
: "cc", "memory"); | ||
|
||
return ret == 0; | ||
} | ||
#define opal_atomic_ll_32(addr, ret) \ | ||
do { \ | ||
volatile int32_t *_addr = (addr); \ | ||
int32_t _ret; \ | ||
\ | ||
__asm__ __volatile__ ("ldaxr %w0, [%1] \n" \ | ||
: "=&r" (_ret) \ | ||
: "r" (_addr)); \ | ||
\ | ||
ret = (typeof(ret)) _ret; \ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this conversion work for any signed type ? |
||
} while (0) | ||
|
||
#define opal_atomic_sc_32(addr, newval, ret) \ | ||
do { \ | ||
volatile int32_t *_addr = (addr); \ | ||
int32_t _newval = (int32_t) newval; \ | ||
int _ret; \ | ||
\ | ||
__asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n" \ | ||
: "=&r" (_ret) \ | ||
: "r" (_addr), "r" (_newval) \ | ||
: "cc", "memory"); \ | ||
\ | ||
ret = (_ret == 0); \ | ||
} while (0) | ||
|
||
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) | ||
{ | ||
|
@@ -269,28 +272,31 @@ static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t | |
return ret; | ||
} | ||
|
||
static inline int64_t opal_atomic_ll_64 (volatile int64_t *addr) | ||
{ | ||
int64_t ret; | ||
|
||
__asm__ __volatile__ ("ldaxr %0, [%1] \n" | ||
: "=&r" (ret) | ||
: "r" (addr)); | ||
|
||
return ret; | ||
} | ||
|
||
static inline int opal_atomic_sc_64 (volatile int64_t *addr, int64_t newval) | ||
{ | ||
int ret; | ||
|
||
__asm__ __volatile__ ("stlxr %w0, %2, [%1] \n" | ||
: "=&r" (ret) | ||
: "r" (addr), "r" (newval) | ||
: "cc", "memory"); | ||
|
||
return ret == 0; | ||
} | ||
#define opal_atomic_ll_64(addr, ret) \ | ||
do { \ | ||
volatile int64_t *_addr = (addr); \ | ||
int64_t _ret; \ | ||
\ | ||
__asm__ __volatile__ ("ldaxr %0, [%1] \n" \ | ||
: "=&r" (_ret) \ | ||
: "r" (_addr)); \ | ||
\ | ||
ret = (typeof(ret)) _ret; \ | ||
} while (0) | ||
|
||
#define opal_atomic_sc_64(addr, newval, ret) \ | ||
do { \ | ||
volatile int64_t *_addr = (addr); \ | ||
int64_t _newval = (int64_t) newval; \ | ||
int _ret; \ | ||
\ | ||
__asm__ __volatile__ ("stlxr %w0, %2, [%1] \n" \ | ||
: "=&r" (_ret) \ | ||
: "r" (_addr), "r" (_newval) \ | ||
: "cc", "memory"); \ | ||
\ | ||
ret = (_ret == 0); \ | ||
} while (0) | ||
|
||
#define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \ | ||
static inline type opal_atomic_fetch_ ## name ## _ ## bits (volatile type *addr, type value) \ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -165,31 +165,35 @@ static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *add | |
return ret; | ||
} | ||
|
||
static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr) | ||
{ | ||
int32_t ret; | ||
|
||
__asm__ __volatile__ ("lwarx %0, 0, %1 \n\t" | ||
: "=&r" (ret) | ||
: "r" (addr) | ||
); | ||
return ret; | ||
} | ||
|
||
static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval) | ||
{ | ||
int32_t ret, foo; | ||
|
||
__asm__ __volatile__ (" stwcx. %4, 0, %3 \n\t" | ||
" li %0,0 \n\t" | ||
" bne- 1f \n\t" | ||
" ori %0,%0,1 \n\t" | ||
"1:" | ||
: "=r" (ret), "=m" (*addr), "=r" (foo) | ||
: "r" (addr), "r" (newval) | ||
: "cc", "memory"); | ||
return ret; | ||
} | ||
/* NTH: the LL/SC support is done through macros due to issues with non-optimized builds. The reason | ||
* is that even with an always_inline attribute the compiler may still emit instructions to store then | ||
* load the arguments to/from the stack. This sequence may cause the ll reservation to be cancelled. */ | ||
#define opal_atomic_ll_32(addr, ret) \ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes sense. This is useful comment that should be present in some upper layer. It true for Power and Arm There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah. I plan to add some additional documentation detailing what live-lock is with LL/SC. |
||
do { \ | ||
volatile int32_t *_addr = (addr); \ | ||
int32_t _ret; \ | ||
__asm__ __volatile__ ("lwarx %0, 0, %1 \n\t" \ | ||
: "=&r" (_ret) \ | ||
: "r" (_addr) \ | ||
); \ | ||
ret = (typeof(ret)) _ret; \ | ||
} while (0) | ||
|
||
#define opal_atomic_sc_32(addr, value, ret) \ | ||
do { \ | ||
volatile int32_t *_addr = (addr); \ | ||
int32_t _ret, _foo, _newval = (int32_t) value; \ | ||
\ | ||
__asm__ __volatile__ (" stwcx. %4, 0, %3 \n\t" \ | ||
" li %0,0 \n\t" \ | ||
" bne- 1f \n\t" \ | ||
" ori %0,%0,1 \n\t" \ | ||
"1:" \ | ||
: "=r" (_ret), "=m" (*_addr), "=r" (_foo) \ | ||
: "r" (_addr), "r" (_newval) \ | ||
: "cc", "memory"); \ | ||
ret = _ret; \ | ||
} while (0) | ||
|
||
/* these two functions aren't inlined in the non-gcc case because then | ||
there would be two function calls (since neither cmpset_32 nor | ||
|
@@ -278,32 +282,33 @@ static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *add | |
return ret; | ||
} | ||
|
||
static inline int64_t opal_atomic_ll_64(volatile int64_t *addr) | ||
{ | ||
int64_t ret; | ||
|
||
__asm__ __volatile__ ("ldarx %0, 0, %1 \n\t" | ||
: "=&r" (ret) | ||
: "r" (addr) | ||
); | ||
return ret; | ||
} | ||
|
||
static inline int opal_atomic_sc_64(volatile int64_t *addr, int64_t newval) | ||
{ | ||
int32_t ret; | ||
|
||
__asm__ __volatile__ (" stdcx. %2, 0, %1 \n\t" | ||
" li %0,0 \n\t" | ||
" bne- 1f \n\t" | ||
" ori %0,%0,1 \n\t" | ||
"1:" | ||
: "=r" (ret) | ||
: "r" (addr), "r" (OPAL_ASM_VALUE64(newval)) | ||
: "cc", "memory"); | ||
return ret; | ||
} | ||
|
||
#define opal_atomic_ll_64(addr, ret) \ | ||
do { \ | ||
volatile int64_t *_addr = (addr); \ | ||
int64_t _ret; \ | ||
__asm__ __volatile__ ("ldarx %0, 0, %1 \n\t" \ | ||
: "=&r" (_ret) \ | ||
: "r" (_addr) \ | ||
); \ | ||
ret = (typeof(ret)) _ret; \ | ||
} while (0) | ||
|
||
#define opal_atomic_sc_64(addr, value, ret) \ | ||
do { \ | ||
volatile int64_t *_addr = (addr); \ | ||
int64_t _foo, _newval = (int64_t) value; \ | ||
int32_t _ret; \ | ||
\ | ||
__asm__ __volatile__ (" stdcx. %2, 0, %1 \n\t" \ | ||
" li %0,0 \n\t" \ | ||
" bne- 1f \n\t" \ | ||
" ori %0,%0,1 \n\t" \ | ||
"1:" \ | ||
: "=r" (_ret) \ | ||
: "r" (_addr), "r" (OPAL_ASM_VALUE64(_newval)) \ | ||
: "cc", "memory"); \ | ||
ret = _ret; \ | ||
} while (0) | ||
|
||
static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval) | ||
{ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why we need this wmb ? there is nothing to flush, except the atomic swap right above.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In fact I am not sure I understand how this code can be correct in the case where 2 threads are doing conflicting operations on a FIFO that contains only one element. Imagine a thread that pop while the second one push. The operation at line 202 (
tail_item->opal_list_next = item
) conflicts with theopal_atomic_sc_ptr
in the pop, and can lead to a case where the head is NULL (it contains what the previous element assumed was his next) but the tail is not (it contains the result of the previous push).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This scenario is tested by test/class/opal_fifo.c. The exhaustive test causes the threads to empty the fifo.
The code that keeps things consistent is at line 276. If the pop detected that the fifo was empty but something was being pushed (next == ghost && tail != item) it waits until the next pointer has been updated then sets the head to the new item. This is the same strategy used by vader and nemesis.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, looking at it there is no way it can get to one of the following lines without the read and write completing anyway. That means the wmb does nothing. It shouldn't hurt though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If this is not critical path, it is fine ...