Skip to content

Commit ee1e0d9

Browse files
committed
opal/asm: add fetch-and-op atomics
This commit adds support for fetch-and-op atomics. This is needed because and and or are irreversible operations so there needs to be a way to get the old value atomically. These are also the only semantics supported by C11 (there is not atomic_op_fetch, just atomic_fetch_op). The old op-and-fetch atomics have been defined in terms of fetch-and-op. Signed-off-by: Nathan Hjelm <[email protected]>
1 parent bc534a1 commit ee1e0d9

File tree

19 files changed

+427
-270
lines changed

19 files changed

+427
-270
lines changed

ompi/mca/coll/sm/coll_sm.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ extern uint32_t mca_coll_sm_one;
358358
* Macro to release an in-use flag from this process
359359
*/
360360
#define FLAG_RELEASE(flag) \
361-
(void)opal_atomic_add_fetch(&(flag)->mcsiuf_num_procs_using, -1)
361+
opal_atomic_add(&(flag)->mcsiuf_num_procs_using, -1)
362362

363363
/**
364364
* Macro to copy a single segment in from a user buffer to a shared

ompi/mca/coll/sm/coll_sm_barrier.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ int mca_coll_sm_barrier_intra(struct ompi_communicator_t *comm,
101101
if (0 != rank) {
102102
/* Get parent *in* buffer */
103103
parent = &data->mcb_barrier_control_parent[buffer_set];
104-
(void)opal_atomic_add_fetch(parent, 1);
104+
opal_atomic_add (parent, 1);
105105

106106
SPIN_CONDITION(0 != *me_out, exit_label2);
107107
*me_out = 0;

ompi/mca/coll/sm/coll_sm_module.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,7 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module,
463463
OBJ_RETAIN(sm_module->previous_reduce_module);
464464

465465
/* Indicate that we have successfully attached and setup */
466-
(void)opal_atomic_add_fetch(&(data->sm_bootstrap_meta->module_seg->seg_inited), 1);
466+
opal_atomic_add (&(data->sm_bootstrap_meta->module_seg->seg_inited), 1);
467467

468468
/* Wait for everyone in this communicator to attach and setup */
469469
opal_output_verbose(10, ompi_coll_base_framework.framework_output,

ompi/mca/osc/sm/osc_sm_active_target.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ ompi_osc_sm_start(struct ompi_group_t *group,
151151

152152
for (int i = 0 ; i < size ; ++i) {
153153
int rank_byte = ranks[i] >> OSC_SM_POST_BITS;
154-
osc_sm_post_type_t old, rank_bit = ((osc_sm_post_type_t) 1) << (ranks[i] & 0x3f);
154+
osc_sm_post_type_t rank_bit = ((osc_sm_post_type_t) 1) << (ranks[i] & 0x3f);
155155

156156
/* wait for rank to post */
157157
while (!(module->posts[my_rank][rank_byte] & rank_bit)) {
@@ -162,9 +162,9 @@ ompi_osc_sm_start(struct ompi_group_t *group,
162162
opal_atomic_rmb ();
163163

164164
#if OPAL_HAVE_ATOMIC_MATH_64
165-
opal_atomic_xor_fetch_64 ((volatile osc_sm_post_type_t *) module->posts[my_rank] + rank_byte, rank_bit);
165+
(void) opal_atomic_fetch_xor_64 ((volatile int64_t *) module->posts[my_rank] + rank_byte, rank_bit);
166166
#else
167-
opal_atomic_xor_fetch_32 ((volatile osc_sm_post_type_t *) module->posts[my_rank] + rank_byte, rank_bit);
167+
(void) opal_atomic_fetch_xor_32 ((volatile int32_t *) module->posts[my_rank] + rank_byte, rank_bit);
168168
#endif
169169
}
170170

@@ -247,7 +247,7 @@ ompi_osc_sm_post(struct ompi_group_t *group,
247247

248248
gsize = ompi_group_size(module->post_group);
249249
for (int i = 0 ; i < gsize ; ++i) {
250-
(void) opal_atomic_add_fetch ((volatile osc_sm_post_type_t *) module->posts[ranks[i]] + my_byte, my_bit);
250+
opal_atomic_add ((volatile osc_sm_post_type_t *) module->posts[ranks[i]] + my_byte, my_bit);
251251
}
252252

253253
opal_atomic_wmb ();

opal/class/opal_list.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ bool opal_list_insert(opal_list_t *list, opal_list_item_t *item, long long idx)
144144
/* Spot check: ensure this item is only on the list that we
145145
just insertted it into */
146146

147-
(void)opal_atomic_add_fetch( &(item->opal_list_item_refcount), 1 );
147+
opal_atomic_add ( &(item->opal_list_item_refcount), 1 );
148148
assert(1 == item->opal_list_item_refcount);
149149
item->opal_list_item_belong_to = list;
150150
#endif

opal/include/opal/sys/arm/atomic.h

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -209,44 +209,44 @@ static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t
209209

210210

211211
#define OPAL_HAVE_ATOMIC_ADD_32 1
212-
static inline int32_t opal_atomic_add_fetch_32(volatile int32_t* v, int inc)
212+
static inline int32_t opal_atomic_fetch_add_32(volatile int32_t* v, int inc)
213213
{
214-
int32_t t;
215-
int tmp;
214+
int32_t t, old;
215+
int tmp;
216216

217-
__asm__ __volatile__(
218-
"1: ldrex %0, [%2] \n"
219-
" add %0, %0, %3 \n"
220-
" strex %1, %0, [%2] \n"
221-
" cmp %1, #0 \n"
217+
__asm__ __volatile__(
218+
"1: ldrex %1, [%3] \n"
219+
" add %0, %1, %4 \n"
220+
" strex %2, %0, [%3] \n"
221+
" cmp %2, #0 \n"
222222
" bne 1b \n"
223223

224-
: "=&r" (t), "=&r" (tmp)
224+
: "=&r" (t), "=&r" (old), "=&r" (tmp)
225225
: "r" (v), "r" (inc)
226226
: "cc", "memory");
227227

228228

229-
return t;
229+
return old;
230230
}
231231

232232
#define OPAL_HAVE_ATOMIC_SUB_32 1
233-
static inline int32_t opal_atomic_sub_fetch_32(volatile int32_t* v, int dec)
233+
static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t* v, int dec)
234234
{
235-
int32_t t;
236-
int tmp;
235+
int32_t t, old;
236+
int tmp;
237237

238-
__asm__ __volatile__(
239-
"1: ldrex %0, [%2] \n"
240-
" sub %0, %0, %3 \n"
241-
" strex %1, %0, [%2] \n"
242-
" cmp %1, #0 \n"
238+
__asm__ __volatile__(
239+
"1: ldrex %1, [%3] \n"
240+
" sub %0, %1, %4 \n"
241+
" strex %2, %0, [%3] \n"
242+
" cmp %2, #0 \n"
243243
" bne 1b \n"
244244

245-
: "=&r" (t), "=&r" (tmp)
245+
: "=&r" (t), "=&r" (old), "=&r" (tmp)
246246
: "r" (v), "r" (dec)
247247
: "cc", "memory");
248248

249-
return t;
249+
return t;
250250
}
251251

252252
#endif

opal/include/opal/sys/arm64/atomic.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -293,20 +293,20 @@ static inline int opal_atomic_sc_64 (volatile int64_t *addr, int64_t newval)
293293
}
294294

295295
#define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \
296-
static inline type opal_atomic_ ## name ## _fetch_ ## bits (volatile type *addr, type value) \
296+
static inline type opal_atomic_fetch_ ## name ## _ ## bits (volatile type *addr, type value) \
297297
{ \
298-
type newval; \
298+
type newval, old; \
299299
int32_t tmp; \
300300
\
301-
__asm__ __volatile__("1: ldxr %" reg "0, [%2] \n" \
302-
" " inst " %" reg "0, %" reg "0, %" reg "3 \n" \
303-
" stxr %w1, %" reg "0, [%2] \n" \
304-
" cbnz %w1, 1b \n" \
305-
: "=&r" (newval), "=&r" (tmp) \
301+
__asm__ __volatile__("1: ldxr %" reg "1, [%3] \n" \
302+
" " inst " %" reg "0, %" reg "1, %" reg "4 \n" \
303+
" stxr %w2, %" reg "0, [%3] \n" \
304+
" cbnz %w2, 1b \n" \
305+
: "=&r" (newval), "=&r" (old), "=&r" (tmp) \
306306
: "r" (addr), "r" (value) \
307307
: "cc", "memory"); \
308308
\
309-
return newval; \
309+
return old; \
310310
}
311311

312312
OPAL_ASM_MAKE_ATOMIC(int32_t, 32, add, "add", "w")

opal/include/opal/sys/atomic.h

Lines changed: 65 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -399,36 +399,16 @@ bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t
399399

400400
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32
401401

402-
/* OPAL_HAVE_INLINE_ATOMIC_*_32 will be 1 if <arch>/atomic.h provides
403-
a static inline version of it (in assembly). If we have to fall
404-
back on compare-exchange 32, that too will be inline. */
405-
#if OPAL_HAVE_INLINE_ATOMIC_ADD_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32)
406-
static inline
407-
#endif
408-
int32_t opal_atomic_add_fetch_32(volatile int32_t *addr, int delta);
409-
410-
#if OPAL_HAVE_INLINE_ATOMIC_AND_32 || (!defined(OPAL_HAVE_ATOMIC_AND_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32)
411-
static inline
412-
#endif
413-
int32_t opal_atomic_and_fetch_32(volatile int32_t *addr, int32_t value);
414-
415-
#if OPAL_HAVE_INLINE_ATOMIC_OR_32 || (!defined(OPAL_HAVE_ATOMIC_OR_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32)
416-
static inline
417-
#endif
418-
int32_t opal_atomic_or_fetch_32(volatile int32_t *addr, int32_t value);
419-
420-
#if OPAL_HAVE_INLINE_ATOMIC_XOR_32 || (!defined(OPAL_HAVE_ATOMIC_XOR_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32)
421-
static inline
422-
#endif
423-
int32_t opal_atomic_xor_fetch_32(volatile int32_t *addr, int32_t value);
424-
425-
/* OPAL_HAVE_INLINE_ATOMIC_*_32 will be 1 if <arch>/atomic.h provides
426-
a static inline version of it (in assembly). If we have to fall
427-
back to compare-exchange 32, that too will be inline. */
428-
#if OPAL_HAVE_INLINE_ATOMIC_SUB_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32)
429-
static inline
430-
#endif
431-
int32_t opal_atomic_sub_fetch_32(volatile int32_t *addr, int delta);
402+
static inline int32_t opal_atomic_add_fetch_32(volatile int32_t *addr, int delta);
403+
static inline int32_t opal_atomic_fetch_add_32(volatile int32_t *addr, int delta);
404+
static inline int32_t opal_atomic_and_fetch_32(volatile int32_t *addr, int32_t value);
405+
static inline int32_t opal_atomic_fetch_and_32(volatile int32_t *addr, int32_t value);
406+
static inline int32_t opal_atomic_or_fetch_32(volatile int32_t *addr, int32_t value);
407+
static inline int32_t opal_atomic_fetch_or_32(volatile int32_t *addr, int32_t value);
408+
static inline int32_t opal_atomic_xor_fetch_32(volatile int32_t *addr, int32_t value);
409+
static inline int32_t opal_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value);
410+
static inline int32_t opal_atomic_sub_fetch_32(volatile int32_t *addr, int delta);
411+
static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t *addr, int delta);
432412

433413
#endif /* OPAL_HAVE_ATOMIC_MATH_32 */
434414

@@ -445,36 +425,15 @@ int32_t opal_atomic_sub_fetch_32(volatile int32_t *addr, int delta);
445425

446426
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_64 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
447427

448-
/* OPAL_HAVE_INLINE_ATOMIC_*_64 will be 1 if <arch>/atomic.h provides
449-
a static inline version of it (in assembly). If we have to fall
450-
back to compare-exchange 64, that too will be inline */
451-
#if OPAL_HAVE_INLINE_ATOMIC_ADD_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64)
452-
static inline
453-
#endif
454-
int64_t opal_atomic_add_fetch_64(volatile int64_t *addr, int64_t delta);
455-
456-
#if OPAL_HAVE_INLINE_ATOMIC_AND_64 || (!defined(OPAL_HAVE_ATOMIC_AND_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64)
457-
static inline
458-
#endif
459-
int64_t opal_atomic_and_fetch_64(volatile int64_t *addr, int64_t value);
460-
461-
#if OPAL_HAVE_INLINE_ATOMIC_OR_64 || (!defined(OPAL_HAVE_ATOMIC_OR_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64)
462-
static inline
463-
#endif
464-
int64_t opal_atomic_or_fetch_64(volatile int64_t *addr, int64_t value);
465-
466-
#if OPAL_HAVE_INLINE_ATOMIC_XOR_64 || (!defined(OPAL_HAVE_ATOMIC_XOR_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64)
467-
static inline
468-
#endif
469-
int64_t opal_atomic_xor_fetch_64(volatile int64_t *addr, int64_t value);
470-
471-
/* OPAL_HAVE_INLINE_ATOMIC_*_64 will be 1 if <arch>/atomic.h provides
472-
a static inline version of it (in assembly). If we have to fall
473-
back to compare-exchange 64, that too will be inline */
474-
#if OPAL_HAVE_INLINE_ATOMIC_SUB_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64)
475-
static inline
476-
#endif
477-
int64_t opal_atomic_sub_fetch_64(volatile int64_t *addr, int64_t delta);
428+
static inline int64_t opal_atomic_add_fetch_64(volatile int64_t *addr, int64_t delta);
429+
static inline int64_t opal_atomic_fetch_add_64(volatile int64_t *addr, int64_t delta);
430+
static inline int64_t opal_atomic_and_fetch_64(volatile int64_t *addr, int64_t value);
431+
static inline int64_t opal_atomic_fetch_and_64(volatile int64_t *addr, int64_t value);
432+
static inline int64_t opal_atomic_or_fetch_64(volatile int64_t *addr, int64_t value);
433+
static inline int64_t opal_atomic_fetch_or_64(volatile int64_t *addr, int64_t value);
434+
static inline int64_t opal_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value);
435+
static inline int64_t opal_atomic_sub_fetch_64(volatile int64_t *addr, int64_t delta);
436+
static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t *addr, int64_t delta);
478437

479438
#endif /* OPAL_HAVE_ATOMIC_MATH_32 */
480439

@@ -501,6 +460,19 @@ opal_atomic_add_fetch_size_t(volatile size_t *addr, size_t delta)
501460
#error "Unknown size_t size"
502461
#endif
503462
}
463+
464+
static inline size_t
465+
opal_atomic_fetch_add_size_t(volatile size_t *addr, size_t delta)
466+
{
467+
#if SIZEOF_SIZE_T == 4
468+
return (size_t) opal_atomic_fetch_add_32((int32_t*) addr, delta);
469+
#elif SIZEOF_SIZE_T == 8
470+
return (size_t) opal_atomic_fetch_add_64((int64_t*) addr, delta);
471+
#else
472+
#error "Unknown size_t size"
473+
#endif
474+
}
475+
504476
static inline size_t
505477
opal_atomic_sub_fetch_size_t(volatile size_t *addr, size_t delta)
506478
{
@@ -512,13 +484,30 @@ opal_atomic_sub_fetch_size_t(volatile size_t *addr, size_t delta)
512484
#error "Unknown size_t size"
513485
#endif
514486
}
487+
488+
static inline size_t
489+
opal_atomic_fetch_sub_size_t(volatile size_t *addr, size_t delta)
490+
{
491+
#if SIZEOF_SIZE_T == 4
492+
return (size_t) opal_atomic_fetch_sub_32((int32_t*) addr, delta);
493+
#elif SIZEOF_SIZE_T == 8
494+
return (size_t) opal_atomic_fetch_sub_64((int64_t*) addr, delta);
495+
#else
496+
#error "Unknown size_t size"
497+
#endif
498+
}
499+
515500
#else
516501
#if SIZEOF_SIZE_T == 4
517-
#define opal_atomic_add_fetch_size_t(addr, delta) ((size_t) opal_atomic_add_fetch_32((int32_t*) addr, delta))
518-
#define opal_atomic_sub_fetch_size_t(addr, delta) ((size_t) opal_atomic_sub_fetch_32((int32_t*) addr, delta))
519-
#elif SIZEOF_SIZE_T ==8
520-
#define opal_atomic_add_fetch_size_t(addr, delta) ((size_t) opal_atomic_add_fetch_64((int64_t*) addr, delta))
521-
#define opal_atomic_sub_fetch_size_t(addr, delta) ((size_t) opal_atomic_sub_fetch_64((int64_t*) addr, delta))
502+
#define opal_atomic_add_fetch_size_t(addr, delta) ((size_t) opal_atomic_add_fetch_32((volatile int32_t *) addr, delta))
503+
#define opal_atomic_fetch_add_size_t(addr, delta) ((size_t) opal_atomic_fetch_add_32((volatile int32_t *) addr, delta))
504+
#define opal_atomic_sub_fetch_size_t(addr, delta) ((size_t) opal_atomic_sub_fetch_32((volatile int32_t *) addr, delta))
505+
#define opal_atomic_fetch_sub_size_t(addr, delta) ((size_t) opal_atomic_fetch_sub_32((volatile int32_t *) addr, delta))
506+
#elif SIZEOF_SIZE_T == 8
507+
#define opal_atomic_add_fetch_size_t(addr, delta) ((size_t) opal_atomic_add_fetch_64((volatile int64_t *) addr, delta))
508+
#define opal_atomic_fetch_add_size_t(addr, delta) ((size_t) opal_atomic_fetch_add_64((volatile int64_t *) addr, delta))
509+
#define opal_atomic_sub_fetch_size_t(addr, delta) ((size_t) opal_atomic_sub_fetch_64((volatile int64_t *) addr, delta))
510+
#define opal_atomic_fetch_sub_size_t(addr, delta) ((size_t) opal_atomic_fetch_sub_64((volatile int64_t *) addr, delta))
522511
#else
523512
#error "Unknown size_t size"
524513
#endif
@@ -599,19 +588,15 @@ static inline bool opal_atomic_compare_exchange_strong_rel_ptr (volatile void* a
599588

600589
#if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64)
601590

602-
static inline void opal_atomic_add_fetch_xx(volatile void* addr,
591+
static inline void opal_atomic_add_xx(volatile void* addr,
603592
int32_t value, size_t length);
604-
static inline void opal_atomic_sub_fetch_xx(volatile void* addr,
593+
static inline void opal_atomic_sub_xx(volatile void* addr,
605594
int32_t value, size_t length);
606-
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32
607-
static inline int32_t opal_atomic_add_fetch_ptr( volatile void* addr, void* delta );
608-
static inline int32_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delta );
609-
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
610-
static inline int64_t opal_atomic_add_fetch_ptr( volatile void* addr, void* delta );
611-
static inline int64_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delta );
612-
#else
613-
#error Atomic arithmetic on pointers not supported
614-
#endif
595+
596+
static inline intptr_t opal_atomic_add_fetch_ptr( volatile void* addr, void* delta );
597+
static inline intptr_t opal_atomic_fetch_add_ptr( volatile void* addr, void* delta );
598+
static inline intptr_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delta );
599+
static inline intptr_t opal_atomic_fetch_sub_ptr( volatile void* addr, void* delta );
615600

616601
/**
617602
* Atomically increment the content depending on the type. This
@@ -623,8 +608,8 @@ static inline int64_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delt
623608
* @param addr Address of <TYPE>
624609
* @param delta Value to add (converted to <TYPE>).
625610
*/
626-
#define opal_atomic_add_fetch( ADDR, VALUE ) \
627-
opal_atomic_add_fetch_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \
611+
#define opal_atomic_add( ADDR, VALUE ) \
612+
opal_atomic_add_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \
628613
sizeof(*(ADDR)) )
629614

630615
/**
@@ -637,8 +622,8 @@ static inline int64_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delt
637622
* @param addr Address of <TYPE>
638623
* @param delta Value to substract (converted to <TYPE>).
639624
*/
640-
#define opal_atomic_sub_fetch( ADDR, VALUE ) \
641-
opal_atomic_sub_fetch_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \
625+
#define opal_atomic_sub( ADDR, VALUE ) \
626+
opal_atomic_sub_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \
642627
sizeof(*(ADDR)) )
643628

644629
#endif /* OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64 */

0 commit comments

Comments
 (0)