Skip to content

Commit d2e5aab

Browse files
committed
opal/fifo: reduce change of live-lock in LL/SC implementation
This commit addresses a live-lock that can occur on LL/SC architectures when turning off optimizations (-O0). In this case opal_atomic_ll and opal_atomic_sc are not inlined. This adds additional loads and stores between the load-linked and store-conditional instructions in the LL/SC lifo and fifo implemenations. The problem is addressed in two ways: - Re-work the LL/SC fifo code to reduce the chance that a load or store can cancel the load-linked reservation before the store-conditional can be executed. This rework involves moving the SC closer to the LL and using the register keyword to avoid additional load instructions. - Convert the LL/SC atomics from inline function to macros. The functions were changed to macros because the same behavior is observed with -O0 and always_inline. Signed-off-by: Nathan Hjelm <[email protected]>
1 parent a7a3042 commit d2e5aab

File tree

5 files changed

+94
-120
lines changed

5 files changed

+94
-120
lines changed

opal/class/opal_fifo.h

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2007 Voltaire All rights reserved.
1414
* Copyright (c) 2010 IBM Corporation. All rights reserved.
15-
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
15+
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
1616
* reseved.
1717
* $COPYRIGHT$
1818
*
@@ -215,28 +215,33 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
215215
*/
216216
static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
217217
{
218-
opal_list_item_t *item, *next;
219-
220218
#if OPAL_HAVE_ATOMIC_LLSC_PTR
219+
register opal_list_item_t *ghost = &fifo->opal_fifo_ghost;
220+
register opal_list_item_t *item, *next;
221+
register opal_list_item_t volatile * volatile * head = &fifo->opal_fifo_head.data.item;
222+
int ret;
223+
221224
/* use load-linked store-conditional to avoid ABA issues */
222225
do {
223-
item = opal_atomic_ll_ptr (&fifo->opal_fifo_head.data.item);
224-
if (&fifo->opal_fifo_ghost == item) {
225-
if (&fifo->opal_fifo_ghost == fifo->opal_fifo_tail.data.item) {
226+
opal_atomic_ll_ptr(head, item);
227+
next = (opal_list_item_t *) item->opal_list_next;
228+
if (ghost != item) {
229+
opal_atomic_sc_ptr(head, next, ret);
230+
if (ret) {
231+
break;
232+
}
233+
} else {
234+
if (ghost == fifo->opal_fifo_tail.data.item) {
226235
return NULL;
227236
}
228237

229238
/* fifo does not appear empty. wait for the fifo to be made
230239
* consistent by conflicting thread. */
231-
continue;
232-
}
233-
234-
next = (opal_list_item_t *) item->opal_list_next;
235-
if (opal_atomic_sc_ptr (&fifo->opal_fifo_head.data.item, next)) {
236-
break;
237240
}
238241
} while (1);
239242
#else
243+
opal_list_item_t *item, *next;
244+
240245
/* protect against ABA issues by "locking" the head */
241246
do {
242247
if (opal_atomic_cmpset_32 ((int32_t *) &fifo->opal_fifo_head.data.counter, 0, 1)) {

opal/class/opal_lifo.h

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2007 Voltaire All rights reserved.
1414
* Copyright (c) 2010 IBM Corporation. All rights reserved.
15-
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
15+
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
1616
* reseved.
1717
* Copyright (c) 2016 Research Organization for Information Science
1818
* and Technology (RIST). All rights reserved.
@@ -204,8 +204,10 @@ static inline void _opal_lifo_release_cpu (void)
204204
*/
205205
static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
206206
{
207-
opal_list_item_t *item, *next;
208-
int attempt = 0;
207+
register opal_list_item_t volatile * volatile * head = &lifo->opal_lifo_head.data.item;
208+
register opal_list_item_t *ghost = &lifo->opal_lifo_ghost;
209+
register opal_list_item_t *item, *next;
210+
int attempt = 0, ret;
209211

210212
do {
211213
if (++attempt == 5) {
@@ -215,13 +217,14 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
215217
attempt = 0;
216218
}
217219

218-
item = (opal_list_item_t *) opal_atomic_ll_ptr (&lifo->opal_lifo_head.data.item);
219-
if (&lifo->opal_lifo_ghost == item) {
220+
opal_atomic_ll_ptr(head, item);
221+
next = (opal_list_item_t *) item->opal_list_next;
222+
if (ghost == item) {
220223
return NULL;
221224
}
222225

223-
next = (opal_list_item_t *) item->opal_list_next;
224-
} while (!opal_atomic_sc_ptr (&lifo->opal_lifo_head.data.item, next));
226+
opal_atomic_sc_ptr(head, next, ret);
227+
} while (!ret);
225228

226229
opal_atomic_wmb ();
227230

opal/include/opal/sys/arm64/atomic.h

Lines changed: 26 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2010 IBM Corporation. All rights reserved.
1414
* Copyright (c) 2010 ARM ltd. All rights reserved.
15-
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
15+
* Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights
1616
* reserved.
1717
* $COPYRIGHT$
1818
*
@@ -150,28 +150,19 @@ static inline int opal_atomic_cmpset_rel_32(volatile int32_t *addr,
150150
return (ret == oldval);
151151
}
152152

153-
static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr)
154-
{
155-
int32_t ret;
156-
157-
__asm__ __volatile__ ("ldaxr %w0, [%1] \n"
158-
: "=&r" (ret)
159-
: "r" (addr));
160-
161-
return ret;
162-
}
163-
164-
static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval)
165-
{
166-
int ret;
167-
168-
__asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n"
169-
: "=&r" (ret)
170-
: "r" (addr), "r" (newval)
171-
: "cc", "memory");
153+
#define opal_atomic_ll_32(addr, value) \
154+
__asm__ __volatile__ ("ldaxr %w0, [%1] \n" \
155+
: "=&r" ((int32_t) (value)) \
156+
: "r" ((volatile int32_t *) (addr)))
172157

173-
return ret == 0;
174-
}
158+
#define opal_atomic_sc_32(addr, newval, ret) \
159+
do { \
160+
__asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n" \
161+
: "=&r" ((int) (ret)) \
162+
: "r" ((volatile int32_t *) (addr)), "r" ((int32_t) (newval)) \
163+
: "cc", "memory"); \
164+
(ret) = ((ret) == 0); \
165+
} while (0)
175166

176167
static inline int opal_atomic_cmpset_64(volatile int64_t *addr,
177168
int64_t oldval, int64_t newval)
@@ -251,28 +242,19 @@ static inline int opal_atomic_cmpset_rel_64(volatile int64_t *addr,
251242
return (ret == oldval);
252243
}
253244

254-
static inline int64_t opal_atomic_ll_64 (volatile int64_t *addr)
255-
{
256-
int64_t ret;
257-
258-
__asm__ __volatile__ ("ldaxr %0, [%1] \n"
259-
: "=&r" (ret)
260-
: "r" (addr));
261-
262-
return ret;
263-
}
264-
265-
static inline int opal_atomic_sc_64 (volatile int64_t *addr, int64_t newval)
266-
{
267-
int ret;
268-
269-
__asm__ __volatile__ ("stlxr %w0, %2, [%1] \n"
270-
: "=&r" (ret)
271-
: "r" (addr), "r" (newval)
272-
: "cc", "memory");
273-
274-
return ret == 0;
275-
}
245+
#define opal_atomic_ll_64(addr, value) \
246+
__asm__ __volatile__ ("ldaxr %0, [%1] \n" \
247+
: "=&r" ((int64_t) (value)) \
248+
: "r" ((volatile int64_t *) (addr)))
249+
250+
#define opal_atomic_sc_64(addr, newval, ret) \
251+
do { \
252+
__asm__ __volatile__ ("stlxr %w0, %2, [%1] \n" \
253+
: "=&r" ((int) (ret)) \
254+
: "r" ((volatile int64_t *) (addr)), "r" ((int64_t) (newval)) \
255+
: "cc", "memory"); \
256+
(ret) = ((ret) == 0); \
257+
} while (0)
276258

277259
#define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \
278260
static inline type opal_atomic_ ## name ## _ ## bits (volatile type *addr, type value) \

opal/include/opal/sys/atomic_impl.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
1313
* Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved.
14-
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
14+
* Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights
1515
* reserved.
1616
* $COPYRIGHT$
1717
*
@@ -278,15 +278,15 @@ static inline int opal_atomic_cmpset_rel_ptr(volatile void* addr,
278278

279279
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_LLSC_32
280280

281-
#define opal_atomic_ll_ptr(addr) (void *) opal_atomic_ll_32((int32_t *) addr)
282-
#define opal_atomic_sc_ptr(addr, newval) opal_atomic_sc_32((int32_t *) addr, (int32_t) newval)
281+
#define opal_atomic_ll_ptr opal_atomic_ll_32
282+
#define opal_atomic_sc_ptr opal_atomic_sc_32
283283

284284
#define OPAL_HAVE_ATOMIC_LLSC_PTR 1
285285

286286
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64
287287

288-
#define opal_atomic_ll_ptr(addr) (void *) opal_atomic_ll_64((int64_t *) addr)
289-
#define opal_atomic_sc_ptr(addr, newval) opal_atomic_sc_64((int64_t *) addr, (int64_t) newval)
288+
#define opal_atomic_ll_ptr opal_atomic_ll_64
289+
#define opal_atomic_sc_ptr opal_atomic_sc_64
290290

291291
#define OPAL_HAVE_ATOMIC_LLSC_PTR 1
292292

opal/include/opal/sys/powerpc/atomic.h

Lines changed: 36 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
1313
* Copyright (c) 2010-2017 IBM Corporation. All rights reserved.
14-
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
14+
* Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights
1515
* reserved.
1616
* $COPYRIGHT$
1717
*
@@ -135,7 +135,7 @@ void opal_atomic_isync(void)
135135
* it will emit lwz instead of ld to load the 64-bit operand. */
136136
#define OPAL_ASM_VALUE64(x) (void *)(intptr_t) (x)
137137
#else
138-
#define OPAL_ASM_VALUE64(x) x
138+
#define OPAL_ASM_VALUE64(x) (int64_t) (x)
139139
#endif
140140

141141

@@ -158,31 +158,21 @@ static inline int opal_atomic_cmpset_32(volatile int32_t *addr,
158158
return (ret == oldval);
159159
}
160160

161-
static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr)
162-
{
163-
int32_t ret;
164-
165-
__asm__ __volatile__ ("lwarx %0, 0, %1 \n\t"
166-
: "=&r" (ret)
167-
: "r" (addr)
168-
);
169-
return ret;
170-
}
171-
172-
static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval)
173-
{
174-
int32_t ret, foo;
175-
176-
__asm__ __volatile__ (" stwcx. %4, 0, %3 \n\t"
177-
" li %0,0 \n\t"
178-
" bne- 1f \n\t"
179-
" ori %0,%0,1 \n\t"
180-
"1:"
181-
: "=r" (ret), "=m" (*addr), "=r" (foo)
182-
: "r" (addr), "r" (newval)
183-
: "cc", "memory");
184-
return ret;
185-
}
161+
#define opal_atomic_ll_32(addr, value) \
162+
__asm__ __volatile__ ("ldarx %0, 0, %1 \n\t" \
163+
: "=&r" ((int32_t) (value)) \
164+
: "r" ((volatile int32_t *) (addr)))
165+
166+
#define opal_atomic_sc_32(addr, newval, ret) \
167+
__asm__ __volatile__ (" stwcx. %2, 0, %1 \n\t" \
168+
" li %0,0 \n\t" \
169+
" bne- 1f \n\t" \
170+
" ori %0,%0,1 \n\t" \
171+
"1:" \
172+
: "=r" ((int32_t) ret) \
173+
: "r" ((volatile int32_t *) (addr)), \
174+
"r" ((int32_t) newval) \
175+
: "cc", "memory")
186176

187177
/* these two functions aren't inlined in the non-gcc case because then
188178
there would be two function calls (since neither cmpset_32 nor
@@ -280,31 +270,25 @@ static inline int opal_atomic_cmpset_64(volatile int64_t *addr,
280270
return (ret == oldval);
281271
}
282272

283-
static inline int64_t opal_atomic_ll_64(volatile int64_t *addr)
284-
{
285-
int64_t ret;
286-
287-
__asm__ __volatile__ ("ldarx %0, 0, %1 \n\t"
288-
: "=&r" (ret)
289-
: "r" (addr)
290-
);
291-
return ret;
292-
}
293-
294-
static inline int opal_atomic_sc_64(volatile int64_t *addr, int64_t newval)
295-
{
296-
int32_t ret;
297-
298-
__asm__ __volatile__ (" stdcx. %2, 0, %1 \n\t"
299-
" li %0,0 \n\t"
300-
" bne- 1f \n\t"
301-
" ori %0,%0,1 \n\t"
302-
"1:"
303-
: "=r" (ret)
304-
: "r" (addr), "r" (OPAL_ASM_VALUE64(newval))
305-
: "cc", "memory");
306-
return ret;
307-
}
273+
#define opal_atomic_ll_64(addr, value) \
274+
do { \
275+
int64_t _ll_tmp; \
276+
__asm__ __volatile__ ("ldarx %0, 0, %1 \n\t" \
277+
: "=&r" (OPAL_ASM_VALUE64(_ll_tmp)) \
278+
: "r" ((volatile int64_t *) (addr))); \
279+
OPAL_ASM_VALUE64(value) = _ll_tmp; \
280+
} while (0)
281+
282+
#define opal_atomic_sc_64(addr, newval, ret) \
283+
__asm__ __volatile__ (" stdcx. %2, 0, %1 \n\t" \
284+
" li %0,0 \n\t" \
285+
" bne- 1f \n\t" \
286+
" ori %0,%0,1 \n\t" \
287+
"1:" \
288+
: "=r" ((ret)) \
289+
: "r" ((volatile int64_t *) (addr)), \
290+
"r" (OPAL_ASM_VALUE64(newval)) \
291+
: "cc", "memory")
308292

309293
/* these two functions aren't inlined in the non-gcc case because then
310294
there would be two function calls (since neither cmpset_64 nor

0 commit comments

Comments
 (0)