diff --git a/ompi/datatype/ompi_datatype_args.c b/ompi/datatype/ompi_datatype_args.c index add69f9bf70..53aaa00b7ee 100644 --- a/ompi/datatype/ompi_datatype_args.c +++ b/ompi/datatype/ompi_datatype_args.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -487,7 +487,8 @@ int ompi_datatype_get_pack_description( ompi_datatype_t* datatype, void* recursive_buffer; if (NULL == packed_description) { - if (opal_atomic_bool_cmpset (&datatype->packed_description, NULL, (void *) 1)) { + void *_tmp_ptr = NULL; + if (opal_atomic_compare_exchange_strong_ptr (&datatype->packed_description, (void *) &_tmp_ptr, (void *) 1)) { if( ompi_datatype_is_predefined(datatype) ) { packed_description = malloc(2 * sizeof(int)); } else if( NULL == args ) { diff --git a/ompi/group/group.h b/ompi/group/group.h index 4f303c34186..30664f8a4e0 100644 --- a/ompi/group/group.h +++ b/ompi/group/group.h @@ -14,7 +14,7 @@ * Copyright (c) 2007-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -356,7 +356,7 @@ static inline struct ompi_proc_t *ompi_group_dense_lookup (ompi_group_t *group, ompi_proc_t *real_proc = (ompi_proc_t *) ompi_proc_for_name (ompi_proc_sentinel_to_name ((uintptr_t) proc)); - if (opal_atomic_bool_cmpset_ptr (group->grp_proc_pointers + peer_id, proc, real_proc)) { + if (opal_atomic_compare_exchange_strong_ptr (group->grp_proc_pointers + peer_id, &proc, real_proc)) { OBJ_RETAIN(real_proc); } diff --git a/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c b/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c index 50cf3c79b72..c68e30f6700 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -296,9 +296,10 @@ ompi_mtl_portals4_flowctl_add_procs(size_t me, int ompi_mtl_portals4_flowctl_trigger(void) { + int32_t _tmp_value = 0; int ret; - if (true == OPAL_ATOMIC_BOOL_CMPSET_32(&ompi_mtl_portals4.flowctl.flowctl_active, false, true)) { + if (true == OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32(&ompi_mtl_portals4.flowctl.flowctl_active, &_tmp_value, 1)) { /* send trigger to root */ ret = PtlPut(ompi_mtl_portals4.zero_md_h, 0, diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt.h b/ompi/mca/osc/pt2pt/osc_pt2pt.h index 660b7c3246a..77eabcc5922 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt.h @@ -8,7 +8,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. @@ -145,15 +145,11 @@ static inline bool ompi_osc_pt2pt_peer_eager_active (ompi_osc_pt2pt_peer_t *peer static inline void ompi_osc_pt2pt_peer_set_flag (ompi_osc_pt2pt_peer_t *peer, int32_t flag, bool value) { - int32_t peer_flags, new_flags; - do { - peer_flags = peer->flags; - if (value) { - new_flags = peer_flags | flag; - } else { - new_flags = peer_flags & ~flag; - } - } while (!OPAL_ATOMIC_BOOL_CMPSET_32 (&peer->flags, peer_flags, new_flags)); + if (value) { + OPAL_ATOMIC_OR32 (&peer->flags, flag); + } else { + OPAL_ATOMIC_AND32 (&peer->flags, ~flag); + } } static inline void ompi_osc_pt2pt_peer_set_locked (ompi_osc_pt2pt_peer_t *peer, bool value) diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c index 51a31181a88..632495eb234 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -105,7 +105,7 @@ static int ompi_osc_pt2pt_flush_active_frag (ompi_osc_pt2pt_module_t *module, om "osc pt2pt: flushing active fragment to target %d. pending: %d", active_frag->target, active_frag->pending)); - if (opal_atomic_bool_cmpset (&peer->active_frag, active_frag, NULL)) { + if (opal_atomic_compare_exchange_strong_ptr (&peer->active_frag, &active_frag, NULL)) { if (0 != OPAL_THREAD_ADD32(&active_frag->pending, -1)) { /* communication going on while synchronizing; this is an rma usage bug */ return OMPI_ERR_RMA_SYNC; diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h index cddc3c3f07f..0c16cfe690c 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h @@ -67,7 +67,7 @@ static inline ompi_osc_pt2pt_frag_t *ompi_osc_pt2pt_frag_alloc_non_buffered (omp /* to ensure ordering flush the buffer on the peer */ curr = peer->active_frag; - if (NULL != curr && opal_atomic_bool_cmpset (&peer->active_frag, curr, NULL)) { + if (NULL != curr && opal_atomic_compare_exchange_strong_ptr (&peer->active_frag, &curr, NULL)) { /* If there's something pending, the pending finish will start the buffer. Otherwise, we need to start it now. */ int ret = ompi_osc_pt2pt_frag_finish (module, curr); diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c b/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c index 34059a0851c..9d0210c2f8a 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c @@ -744,14 +744,13 @@ static bool ompi_osc_pt2pt_lock_try_acquire (ompi_osc_pt2pt_module_t* module, in break; } - if (opal_atomic_bool_cmpset_32 (&module->lock_status, lock_status, lock_status + 1)) { + if (opal_atomic_compare_exchange_strong_32 (&module->lock_status, &lock_status, lock_status + 1)) { break; } - - lock_status = module->lock_status; } while (1); } else { - queue = !opal_atomic_bool_cmpset_32 (&module->lock_status, 0, -1); + int32_t _tmp_value = 0; + queue = !opal_atomic_compare_exchange_strong_32 (&module->lock_status, &_tmp_value, -1); } if (queue) { diff --git a/ompi/mca/osc/rdma/osc_rdma_active_target.c b/ompi/mca/osc/rdma/osc_rdma_active_target.c index 30e160e93f9..b045ebf3ec6 100644 --- a/ompi/mca/osc/rdma/osc_rdma_active_target.c +++ b/ompi/mca/osc/rdma/osc_rdma_active_target.c @@ -8,7 +8,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. @@ -285,7 +285,9 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win) ret = ompi_osc_rdma_lock_btl_cswap (module, peer, target, 0, 1 + (int64_t) my_rank, &result); assert (OMPI_SUCCESS == ret); } else { - result = !ompi_osc_rdma_lock_cmpset ((osc_rdma_counter_t *) target, 0, 1 + (osc_rdma_counter_t) my_rank); + ompi_osc_rdma_lock_t _tmp_value = 0; + + result = !ompi_osc_rdma_lock_compare_exchange ((osc_rdma_counter_t *) target, &_tmp_value, 1 + (osc_rdma_counter_t) my_rank); } if (OPAL_LIKELY(0 == result)) { diff --git a/ompi/mca/osc/rdma/osc_rdma_lock.h b/ompi/mca/osc/rdma/osc_rdma_lock.h index 4352c5cbf1c..8c35018badf 100644 --- a/ompi/mca/osc/rdma/osc_rdma_lock.h +++ b/ompi/mca/osc/rdma/osc_rdma_lock.h @@ -17,7 +17,8 @@ static inline int ompi_osc_rdma_trylock_local (volatile ompi_osc_rdma_lock_t *lock) { - return !ompi_osc_rdma_lock_cmpset (lock, 0, OMPI_OSC_RDMA_LOCK_EXCLUSIVE); + ompi_osc_rdma_lock_t _tmp_value = 0; + return !ompi_osc_rdma_lock_compare_exchange (lock, &_tmp_value, OMPI_OSC_RDMA_LOCK_EXCLUSIVE); } static inline void ompi_osc_rdma_unlock_local (volatile ompi_osc_rdma_lock_t *lock) diff --git a/ompi/mca/osc/rdma/osc_rdma_peer.h b/ompi/mca/osc/rdma/osc_rdma_peer.h index c31f27a62cd..21aeecb4078 100644 --- a/ompi/mca/osc/rdma/osc_rdma_peer.h +++ b/ompi/mca/osc/rdma/osc_rdma_peer.h @@ -201,14 +201,13 @@ static inline bool ompi_osc_rdma_peer_test_set_flag (ompi_osc_rdma_peer_t *peer, int32_t flags; opal_atomic_mb (); + flags = peer->flags; do { - flags = peer->flags; if (flags & flag) { return false; } - - } while (!OPAL_THREAD_BOOL_CMPSET_32 (&peer->flags, flags, flags | flag)); + } while (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32 (&peer->flags, &flags, flags | flag)); return true; } diff --git a/ompi/mca/osc/rdma/osc_rdma_types.h b/ompi/mca/osc/rdma/osc_rdma_types.h index d6dfb0d0188..4fed013cbf4 100644 --- a/ompi/mca/osc/rdma/osc_rdma_types.h +++ b/ompi/mca/osc/rdma/osc_rdma_types.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -54,12 +54,12 @@ static inline int64_t ompi_osc_rdma_lock_add (volatile int64_t *p, int64_t value return new; } -static inline int ompi_osc_rdma_lock_cmpset (volatile int64_t *p, int64_t comp, int64_t value) +static inline int ompi_osc_rdma_lock_compare_exchange (volatile int64_t *p, int64_t *comp, int64_t value) { int ret; opal_atomic_mb (); - ret = opal_atomic_bool_cmpset_64 (p, comp, value); + ret = opal_atomic_compare_exchange_strong_64 (p, comp, value); opal_atomic_mb (); return ret; @@ -83,12 +83,12 @@ static inline int32_t ompi_osc_rdma_lock_add (volatile int32_t *p, int32_t value return new; } -static inline int ompi_osc_rdma_lock_cmpset (volatile int32_t *p, int32_t comp, int32_t value) +static inline int ompi_osc_rdma_lock_compare_exchange (volatile int32_t *p, int32_t *comp, int32_t value) { int ret; opal_atomic_mb (); - ret = opal_atomic_bool_cmpset_32 (p, comp, value); + ret = opal_atomic_compare_exchange_strong_32 (p, comp, value); opal_atomic_mb (); return ret; diff --git a/ompi/mca/osc/sm/osc_sm_active_target.c b/ompi/mca/osc/sm/osc_sm_active_target.c index 083992d8331..6c1e00263f2 100644 --- a/ompi/mca/osc/sm/osc_sm_active_target.c +++ b/ompi/mca/osc/sm/osc_sm_active_target.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights @@ -130,10 +130,11 @@ ompi_osc_sm_start(struct ompi_group_t *group, ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; int my_rank = ompi_comm_rank (module->comm); + void *_tmp_ptr = NULL; OBJ_RETAIN(group); - if (!OPAL_ATOMIC_BOOL_CMPSET_PTR(&module->start_group, NULL, group)) { + if (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&module->start_group, (void *) &_tmp_ptr, group)) { OBJ_RELEASE(group); return OMPI_ERR_RMA_SYNC; } @@ -160,9 +161,11 @@ ompi_osc_sm_start(struct ompi_group_t *group, opal_atomic_rmb (); - do { - old = module->posts[my_rank][rank_byte]; - } while (!opal_atomic_bool_cmpset ((volatile osc_sm_post_type_t *) module->posts[my_rank] + rank_byte, old, old ^ rank_bit)); +#if OPAL_HAVE_ATOMIC_MATH_64 + opal_atomic_xor_64 ((volatile osc_sm_post_type_t *) module->posts[my_rank] + rank_byte, rank_bit); +#else + opal_atomic_xor_32 ((volatile osc_sm_post_type_t *) module->posts[my_rank] + rank_byte, rank_bit); +#endif } free (ranks); @@ -185,7 +188,7 @@ ompi_osc_sm_complete(struct ompi_win_t *win) opal_atomic_mb(); group = module->start_group; - if (NULL == group || !OPAL_ATOMIC_BOOL_CMPSET_PTR(&module->start_group, group, NULL)) { + if (NULL == group || !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&module->start_group, &group, NULL)) { return OMPI_ERR_RMA_SYNC; } diff --git a/ompi/request/req_wait.c b/ompi/request/req_wait.c index 233d1dd30d0..e4d4d5e68a6 100644 --- a/ompi/request/req_wait.c +++ b/ompi/request/req_wait.c @@ -13,7 +13,7 @@ * Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science @@ -100,6 +100,8 @@ int ompi_request_default_wait_any(size_t count, num_requests_null_inactive = 0; for (i = 0; i < count; i++) { + void *_tmp_ptr = REQUEST_PENDING; + request = requests[i]; /* Check for null or completed persistent request. For @@ -110,7 +112,7 @@ int ompi_request_default_wait_any(size_t count, continue; } - if( !OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync) ) { + if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync) ) { assert(REQUEST_COMPLETE(request)); completed = i; *index = i; @@ -136,6 +138,8 @@ int ompi_request_default_wait_any(size_t count, * user. */ for(i = completed-1; (i+1) > 0; i--) { + void *tmp_ptr = &sync; + request = requests[i]; if( request->req_state == OMPI_REQUEST_INACTIVE ) { @@ -146,7 +150,7 @@ int ompi_request_default_wait_any(size_t count, * Otherwise, the request has been completed meanwhile, and it * has been atomically marked as REQUEST_COMPLETE. */ - if( !OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) { + if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &tmp_ptr, REQUEST_PENDING) ) { *index = i; } } @@ -211,6 +215,8 @@ int ompi_request_default_wait_all( size_t count, WAIT_SYNC_INIT(&sync, count); rptr = requests; for (i = 0; i < count; i++) { + void *_tmp_ptr = REQUEST_PENDING; + request = *rptr++; if( request->req_state == OMPI_REQUEST_INACTIVE ) { @@ -218,7 +224,7 @@ int ompi_request_default_wait_all( size_t count, continue; } - if (!OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync)) { + if (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync)) { if( OPAL_UNLIKELY( MPI_SUCCESS != request->req_status.MPI_ERROR ) ) { failed++; } @@ -246,6 +252,8 @@ int ompi_request_default_wait_all( size_t count, if (MPI_STATUSES_IGNORE != statuses) { /* fill out status and free request if required */ for( i = 0; i < count; i++, rptr++ ) { + void *_tmp_ptr = &sync; + request = *rptr; if( request->req_state == OMPI_REQUEST_INACTIVE ) { @@ -260,7 +268,7 @@ int ompi_request_default_wait_all( size_t count, * mark the request as pending then it is neither failed nor complete, and * we must stop altering it. */ - if( OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING ) ) { + if( OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_PENDING ) ) { /* * Per MPI 2.2 p 60: * Allows requests to be marked as MPI_ERR_PENDING if they are @@ -306,6 +314,8 @@ int ompi_request_default_wait_all( size_t count, int rc; /* free request if required */ for( i = 0; i < count; i++, rptr++ ) { + void *_tmp_ptr = &sync; + request = *rptr; if( request->req_state == OMPI_REQUEST_INACTIVE ) { @@ -320,7 +330,7 @@ int ompi_request_default_wait_all( size_t count, /* If the request is still pending due to a failed request * then skip it in this loop. */ - if( OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING ) ) { + if( OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_PENDING ) ) { /* * Per MPI 2.2 p 60: * Allows requests to be marked as MPI_ERR_PENDING if they are @@ -398,6 +408,8 @@ int ompi_request_default_wait_some(size_t count, num_requests_null_inactive = 0; num_requests_done = 0; for (size_t i = 0; i < count; i++, rptr++) { + void *_tmp_ptr = REQUEST_PENDING; + request = *rptr; /* * Check for null or completed persistent request. @@ -407,7 +419,7 @@ int ompi_request_default_wait_some(size_t count, num_requests_null_inactive++; continue; } - indices[i] = OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync); + indices[i] = OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync); if( !indices[i] ) { /* If the request is completed go ahead and mark it as such */ assert( REQUEST_COMPLETE(request) ); @@ -434,6 +446,8 @@ int ompi_request_default_wait_some(size_t count, rptr = requests; num_requests_done = 0; for (size_t i = 0; i < count; i++, rptr++) { + void *_tmp_ptr = &sync; + request = *rptr; if( request->req_state == OMPI_REQUEST_INACTIVE ) { @@ -454,7 +468,7 @@ int ompi_request_default_wait_some(size_t count, */ if( !indices[i] ){ indices[num_requests_done++] = i; - } else if( !OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) { + } else if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_PENDING) ) { indices[num_requests_done++] = i; } } diff --git a/ompi/request/request.h b/ompi/request/request.h index 8f472c1f5cd..5a1c02c4b65 100644 --- a/ompi/request/request.h +++ b/ompi/request/request.h @@ -13,7 +13,7 @@ * Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -396,10 +396,12 @@ static inline int ompi_request_free(ompi_request_t** request) static inline void ompi_request_wait_completion(ompi_request_t *req) { if (opal_using_threads () && !REQUEST_COMPLETE(req)) { + void *_tmp_ptr = REQUEST_PENDING; ompi_wait_sync_t sync; + WAIT_SYNC_INIT(&sync, 1); - if (OPAL_ATOMIC_BOOL_CMPSET_PTR(&req->req_complete, REQUEST_PENDING, &sync)) { + if (OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&req->req_complete, &_tmp_ptr, &sync)) { SYNC_WAIT(&sync); } else { /* completed before we had a chance to swap in the sync object */ @@ -439,7 +441,9 @@ static inline int ompi_request_complete(ompi_request_t* request, bool with_signa if (0 == rc) { if( OPAL_LIKELY(with_signal) ) { - if(!OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, REQUEST_COMPLETED)) { + void *_tmp_ptr = REQUEST_PENDING; + + if(!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_COMPLETED)) { ompi_wait_sync_t *tmp_sync = (ompi_wait_sync_t *) OPAL_ATOMIC_SWAP_PTR(&request->req_complete, REQUEST_COMPLETED); /* In the case where another thread concurrently changed the request to REQUEST_PENDING */ diff --git a/opal/class/opal_fifo.h b/opal/class/opal_fifo.h index 67aa479deb5..ad67c77a6ff 100644 --- a/opal/class/opal_fifo.h +++ b/opal/class/opal_fifo.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. - * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * reseved. * $COPYRIGHT$ * @@ -76,7 +76,7 @@ static inline bool opal_fifo_is_empty( opal_fifo_t* fifo ) return opal_fifo_head (fifo) == &fifo->opal_fifo_ghost; } -#if OPAL_HAVE_ATOMIC_CMPSET_128 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 /* Add one element to the FIFO. We will return the last head of the list * to allow the upper level to detect if this element is the first one in the @@ -85,14 +85,12 @@ static inline bool opal_fifo_is_empty( opal_fifo_t* fifo ) static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, opal_list_item_t *item) { - opal_counted_pointer_t tail; + opal_counted_pointer_t tail = {.value = fifo->opal_fifo_tail.value}; item->opal_list_next = &fifo->opal_fifo_ghost; do { - tail.value = fifo->opal_fifo_tail.value; - - if (opal_update_counted_pointer (&fifo->opal_fifo_tail, tail, item)) { + if (opal_update_counted_pointer (&fifo->opal_fifo_tail, &tail, item)) { break; } } while (1); @@ -102,7 +100,7 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, if (&fifo->opal_fifo_ghost == tail.data.item) { /* update the head */ opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value}; - opal_update_counted_pointer (&fifo->opal_fifo_head, head, item); + opal_update_counted_pointer (&fifo->opal_fifo_head, &head, item); } else { /* update previous item */ tail.data.item->opal_list_next = item; @@ -116,29 +114,28 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, */ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) { - opal_list_item_t *item, *next; - opal_counted_pointer_t head, tail; + opal_list_item_t *item, *next, *ghost = &fifo->opal_fifo_ghost; + opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value}, tail; do { - head.value = fifo->opal_fifo_head.value; tail.value = fifo->opal_fifo_tail.value; opal_atomic_rmb (); item = (opal_list_item_t *) head.data.item; next = (opal_list_item_t *) item->opal_list_next; - if (&fifo->opal_fifo_ghost == tail.data.item && &fifo->opal_fifo_ghost == item) { + if (ghost == tail.data.item && ghost == item) { return NULL; } /* the head or next pointer are in an inconsistent state. keep looping. */ - if (tail.data.item != item && &fifo->opal_fifo_ghost != tail.data.item && - &fifo->opal_fifo_ghost == next) { + if (tail.data.item != item && ghost != tail.data.item && ghost == next) { + head.value = fifo->opal_fifo_head.value; continue; } /* try popping the head */ - if (opal_update_counted_pointer (&fifo->opal_fifo_head, head, next)) { + if (opal_update_counted_pointer (&fifo->opal_fifo_head, &head, next)) { break; } } while (1); @@ -146,14 +143,14 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) opal_atomic_wmb (); /* check for tail and head consistency */ - if (&fifo->opal_fifo_ghost == next) { + if (ghost == next) { /* the head was just set to &fifo->opal_fifo_ghost. try to update the tail as well */ - if (!opal_update_counted_pointer (&fifo->opal_fifo_tail, tail, &fifo->opal_fifo_ghost)) { + if (!opal_update_counted_pointer (&fifo->opal_fifo_tail, &tail, ghost)) { /* tail was changed by a push operation. wait for the item's next pointer to be se then * update the head */ /* wait for next pointer to be updated by push */ - while (&fifo->opal_fifo_ghost == item->opal_list_next) { + while (ghost == item->opal_list_next) { opal_atomic_rmb (); } @@ -166,7 +163,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) head.value = fifo->opal_fifo_head.value; next = (opal_list_item_t *) item->opal_list_next; - assert (&fifo->opal_fifo_ghost == head.data.item); + assert (ghost == head.data.item); fifo->opal_fifo_head.data.item = next; opal_atomic_wmb (); @@ -215,14 +212,14 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, */ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) { - opal_list_item_t *item, *next; + opal_list_item_t *item, *next, *ghost = &fifo->opal_fifo_ghost; #if OPAL_HAVE_ATOMIC_LLSC_PTR /* use load-linked store-conditional to avoid ABA issues */ do { item = opal_atomic_ll_ptr (&fifo->opal_fifo_head.data.item); - if (&fifo->opal_fifo_ghost == item) { - if (&fifo->opal_fifo_ghost == fifo->opal_fifo_tail.data.item) { + if (ghost == item) { + if (ghost == fifo->opal_fifo_tail.data.item) { return NULL; } @@ -239,7 +236,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) #else /* protect against ABA issues by "locking" the head */ do { - if (opal_atomic_bool_cmpset_32 ((int32_t *) &fifo->opal_fifo_head.data.counter, 0, 1)) { + if (!opal_atomic_swap_32 ((volatile int32_t *) &fifo->opal_fifo_head.data.counter, 1)) { break; } @@ -249,7 +246,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) opal_atomic_wmb(); item = opal_fifo_head (fifo); - if (&fifo->opal_fifo_ghost == item) { + if (ghost == item) { fifo->opal_fifo_head.data.counter = 0; return NULL; } @@ -258,9 +255,11 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) fifo->opal_fifo_head.data.item = next; #endif - if (&fifo->opal_fifo_ghost == next) { - if (!opal_atomic_bool_cmpset_ptr (&fifo->opal_fifo_tail.data.item, item, &fifo->opal_fifo_ghost)) { - while (&fifo->opal_fifo_ghost == item->opal_list_next) { + if (ghost == next) { + void *tmp = item; + + if (!opal_atomic_compare_exchange_strong_ptr (&fifo->opal_fifo_tail.data.item, &tmp, ghost)) { + while (ghost == item->opal_list_next) { opal_atomic_rmb (); } diff --git a/opal/class/opal_lifo.h b/opal/class/opal_lifo.h index 73caf32cb0c..e5a3f9110cb 100644 --- a/opal/class/opal_lifo.h +++ b/opal/class/opal_lifo.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * reseved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -36,8 +36,8 @@ BEGIN_C_DECLS /* NTH: temporarily suppress warnings about this not being defined */ -#if !defined(OPAL_HAVE_ATOMIC_CMPSET_128) -#define OPAL_HAVE_ATOMIC_CMPSET_128 0 +#if !defined(OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128) +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0 #endif /** @@ -50,7 +50,7 @@ union opal_counted_pointer_t { /** list item pointer */ volatile opal_list_item_t * volatile item; } data; -#if OPAL_HAVE_ATOMIC_CMPSET_128 && HAVE_OPAL_INT128_T +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && HAVE_OPAL_INT128_T /** used for atomics when there is a cmpset that can operate on * two 64-bit values */ opal_int128_t value; @@ -59,19 +59,19 @@ union opal_counted_pointer_t { typedef union opal_counted_pointer_t opal_counted_pointer_t; -#if OPAL_HAVE_ATOMIC_CMPSET_128 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 /* Add one element to the FIFO. We will return the last head of the list * to allow the upper level to detect if this element is the first one in the * list (if the list was empty before this operation). */ -static inline bool opal_update_counted_pointer (volatile opal_counted_pointer_t *addr, opal_counted_pointer_t old, +static inline bool opal_update_counted_pointer (volatile opal_counted_pointer_t *addr, opal_counted_pointer_t *old, opal_list_item_t *item) { opal_counted_pointer_t new_p; new_p.data.item = item; - new_p.data.counter = old.data.counter + 1; - return opal_atomic_bool_cmpset_128 (&addr->value, old.value, new_p.value); + new_p.data.counter = old->data.counter + 1; + return opal_atomic_compare_exchange_strong_128 (&addr->value, &old->value, new_p.value); } #endif @@ -110,7 +110,7 @@ static inline bool opal_lifo_is_empty( opal_lifo_t* lifo ) } -#if OPAL_HAVE_ATOMIC_CMPSET_128 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 /* Add one element to the LIFO. We will return the last head of the list * to allow the upper level to detect if this element is the first one in the @@ -119,14 +119,14 @@ static inline bool opal_lifo_is_empty( opal_lifo_t* lifo ) static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo, opal_list_item_t *item) { - do { - opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item; + opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item; + do { item->opal_list_next = next; opal_atomic_wmb (); /* to protect against ABA issues it is sufficient to only update the counter in pop */ - if (opal_atomic_bool_cmpset_ptr (&lifo->opal_lifo_head.data.item, next, item)) { + if (opal_atomic_compare_exchange_strong_ptr (&lifo->opal_lifo_head.data.item, &next, item)) { return next; } /* DO some kind of pause to release the bus */ @@ -141,17 +141,17 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) opal_counted_pointer_t old_head; opal_list_item_t *item; - do { - - old_head.data.counter = lifo->opal_lifo_head.data.counter; - opal_atomic_rmb (); - old_head.data.item = item = (opal_list_item_t*)lifo->opal_lifo_head.data.item; + old_head.data.counter = lifo->opal_lifo_head.data.counter; + opal_atomic_rmb (); + old_head.data.item = (opal_list_item_t *) lifo->opal_lifo_head.data.item; + do { + item = (opal_list_item_t *) old_head.data.item; if (item == &lifo->opal_lifo_ghost) { return NULL; } - if (opal_update_counted_pointer (&lifo->opal_lifo_head, old_head, + if (opal_update_counted_pointer (&lifo->opal_lifo_head, &old_head, (opal_list_item_t *) item->opal_list_next)) { opal_atomic_wmb (); item->opal_list_next = NULL; @@ -169,13 +169,15 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo, opal_list_item_t *item) { + opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item; + /* item free acts as a mini lock to avoid ABA problems */ item->item_free = 1; + do { - opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item; item->opal_list_next = next; opal_atomic_wmb(); - if (opal_atomic_bool_cmpset_ptr (&lifo->opal_lifo_head.data.item, next, item)) { + if (opal_atomic_compare_exchange_strong_ptr (&lifo->opal_lifo_head.data.item, &next, item)) { opal_atomic_wmb (); /* now safe to pop this item */ item->item_free = 0; @@ -236,8 +238,11 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) */ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) { - opal_list_item_t *item; - while ((item = (opal_list_item_t *) lifo->opal_lifo_head.data.item) != &lifo->opal_lifo_ghost) { + opal_list_item_t *item, *head, *ghost = &lifo->opal_lifo_ghost; + + item = (opal_list_item_t *) lifo->opal_lifo_head.data.item; + + while (item != ghost) { /* ensure it is safe to pop the head */ if (opal_atomic_swap_32((volatile int32_t *) &item->item_free, 1)) { continue; @@ -245,14 +250,16 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) opal_atomic_wmb (); + head = item; /* try to swap out the head pointer */ - if (opal_atomic_bool_cmpset_ptr (&lifo->opal_lifo_head.data.item, item, - (void *) item->opal_list_next)) { + if (opal_atomic_compare_exchange_strong_ptr (&lifo->opal_lifo_head.data.item, &head, + (void *) item->opal_list_next)) { break; } /* NTH: don't need another atomic here */ item->item_free = 0; + item = head; /* Do some kind of pause to release the bus */ } diff --git a/opal/include/opal/sys/arm/atomic.h b/opal/include/opal/sys/arm/atomic.h index fa3b35d18b2..89e84c653c0 100644 --- a/opal/include/opal/sys/arm/atomic.h +++ b/opal/include/opal/sys/arm/atomic.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +12,8 @@ * All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2010 ARM ltd. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -104,12 +107,12 @@ void opal_atomic_isync(void) #if (OPAL_GCC_INLINE_ASSEMBLY && (OPAL_ASM_ARM_VERSION >= 6)) -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_MATH_32 1 -static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - int32_t ret, tmp; + int32_t prev, tmp; + bool ret; __asm__ __volatile__ ( "1: ldrex %0, [%2] \n" @@ -120,11 +123,13 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, " bne 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } /* these two functions aren't inlined in the non-gcc case because then @@ -132,51 +137,50 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { bool rc; - rc = opal_atomic_bool_cmpset_32(addr, oldval, newval); + rc = opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); opal_atomic_rmb(); return rc; } -static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { opal_atomic_wmb(); - return opal_atomic_bool_cmpset_32(addr, oldval, newval); + return opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); } #if (OPAL_ASM_SUPPORT_64BIT == 1) -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 -static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - int64_t ret; - int tmp; - - - __asm__ __volatile__ ( - "1: ldrexd %0, %H0, [%2] \n" - " cmp %0, %3 \n" - " it eq \n" - " cmpeq %H0, %H3 \n" - " bne 2f \n" - " strexd %1, %4, %H4, [%2] \n" - " cmp %1, #0 \n" - " bne 1b \n" - "2: \n" - - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) - : "cc", "memory"); - - return (ret == oldval); + int64_t prev; + int tmp; + bool ret; + + __asm__ __volatile__ ( + "1: ldrexd %0, %H0, [%2] \n" + " cmp %0, %3 \n" + " it eq \n" + " cmpeq %H0, %H3 \n" + " bne 2f \n" + " strexd %1, %4, %H4, [%2] \n" + " cmp %1, #0 \n" + " bne 1b \n" + "2: \n" + + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) + : "cc", "memory"); + + ret = (prev == *oldval); + *oldval = prev; + return ret; } /* these two functions aren't inlined in the non-gcc case because then @@ -184,23 +188,21 @@ static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { bool rc; - rc = opal_atomic_bool_cmpset_64(addr, oldval, newval); + rc = opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); opal_atomic_rmb(); return rc; } -static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { opal_atomic_wmb(); - return opal_atomic_bool_cmpset_64(addr, oldval, newval); + return opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); } #endif @@ -247,30 +249,6 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int dec) return t; } -#else /* OPAL_ASM_ARM_VERSION <=5 or no GCC inline assembly */ - -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 -#define __kuser_cmpxchg (*((int (*)(int, int, volatile int*))(0xffff0fc0))) -static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) -{ - return !(__kuser_cmpxchg(oldval, newval, addr)); -} - -static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) -{ - /* kernel function includes all necessary memory barriers */ - return opal_atomic_bool_cmpset_32(addr, oldval, newval); -} - -static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) -{ - /* kernel function includes all necessary memory barriers */ - return opal_atomic_bool_cmpset_32(addr, oldval, newval); -} - #endif #endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ diff --git a/opal/include/opal/sys/arm64/atomic.h b/opal/include/opal/sys/arm64/atomic.h index c95c3cdc6ad..6ef7776ea6f 100644 --- a/opal/include/opal/sys/arm64/atomic.h +++ b/opal/include/opal/sys/arm64/atomic.h @@ -29,10 +29,10 @@ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 #define OPAL_HAVE_ATOMIC_LLSC_32 1 -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_SWAP_32 1 #define OPAL_HAVE_ATOMIC_MATH_32 1 -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 #define OPAL_HAVE_ATOMIC_SWAP_64 1 #define OPAL_HAVE_ATOMIC_LLSC_64 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 @@ -82,10 +82,10 @@ static inline void opal_atomic_isync (void) * *********************************************************************/ -static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - int32_t ret, tmp; + int32_t prev, tmp; + bool ret; __asm__ __volatile__ ("1: ldaxr %w0, [%2] \n" " cmp %w0, %w3 \n" @@ -93,11 +93,13 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, " stxr %w1, %w4, [%2] \n" " cbnz %w1, 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval) @@ -119,10 +121,10 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - int32_t ret, tmp; + int32_t prev, tmp; + bool ret; __asm__ __volatile__ ("1: ldaxr %w0, [%2] \n" " cmp %w0, %w3 \n" @@ -130,18 +132,20 @@ static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, " stxr %w1, %w4, [%2] \n" " cbnz %w1, 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } -static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - int32_t ret, tmp; + int32_t prev, tmp; + bool ret; __asm__ __volatile__ ("1: ldxr %w0, [%2] \n" " cmp %w0, %w3 \n" @@ -149,11 +153,13 @@ static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, " stlxr %w1, %w4, [%2] \n" " cbnz %w1, 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr) @@ -179,11 +185,11 @@ static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval) return ret == 0; } -static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - int64_t ret; + int64_t prev; int tmp; + bool ret; __asm__ __volatile__ ("1: ldaxr %0, [%2] \n" " cmp %0, %3 \n" @@ -191,11 +197,13 @@ static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, " stxr %w1, %4, [%2] \n" " cbnz %w1, 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == oldval); + *oldval = prev; + return ret; } static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newval) @@ -218,11 +226,11 @@ static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newva atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - int64_t ret; + int64_t prev; int tmp; + bool ret; __asm__ __volatile__ ("1: ldaxr %0, [%2] \n" " cmp %0, %3 \n" @@ -230,19 +238,21 @@ static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, " stxr %w1, %4, [%2] \n" " cbnz %w1, 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == oldval); + *oldval = prev; + return ret; } -static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - int64_t ret; + int64_t prev; int tmp; + bool ret; __asm__ __volatile__ ("1: ldxr %0, [%2] \n" " cmp %0, %3 \n" @@ -250,11 +260,13 @@ static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, " stlxr %w1, %4, [%2] \n" " cbnz %w1, 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == oldval); + *oldval = prev; + return ret; } static inline int64_t opal_atomic_ll_64 (volatile int64_t *addr) diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index 961ebac0114..3a2a05a2277 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -40,11 +40,11 @@ * * - \c OPAL_HAVE_ATOMIC_MEM_BARRIER atomic memory barriers * - \c OPAL_HAVE_ATOMIC_SPINLOCKS atomic spinlocks - * - \c OPAL_HAVE_ATOMIC_MATH_32 if 32 bit add/sub/cmpset can be done "atomicly" - * - \c OPAL_HAVE_ATOMIC_MATH_64 if 64 bit add/sub/cmpset can be done "atomicly" + * - \c OPAL_HAVE_ATOMIC_MATH_32 if 32 bit add/sub/compare-exchange can be done "atomicly" + * - \c OPAL_HAVE_ATOMIC_MATH_64 if 64 bit add/sub/compare-exchange can be done "atomicly" * * Note that for the Atomic math, atomic add/sub may be implemented as - * C code using opal_atomic_bool_cmpset. The appearance of atomic + * C code using opal_atomic_compare_exchange. The appearance of atomic * operation will be upheld in these cases. */ @@ -107,8 +107,8 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t; *********************************************************************/ #if !OPAL_GCC_INLINE_ASSEMBLY #define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 0 -#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 0 -#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 0 +#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 0 +#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 0 #define OPAL_HAVE_INLINE_ATOMIC_ADD_32 0 #define OPAL_HAVE_INLINE_ATOMIC_AND_32 0 #define OPAL_HAVE_INLINE_ATOMIC_OR_32 0 @@ -123,8 +123,8 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t; #define OPAL_HAVE_INLINE_ATOMIC_SWAP_64 0 #else #define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 1 -#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 1 -#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 1 +#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 1 #define OPAL_HAVE_INLINE_ATOMIC_ADD_32 1 #define OPAL_HAVE_INLINE_ATOMIC_AND_32 1 #define OPAL_HAVE_INLINE_ATOMIC_OR_32 1 @@ -187,14 +187,14 @@ enum { /* compare and set operations can't really be emulated from software, so if these defines aren't already set, they should be set to 0 now */ -#ifndef OPAL_HAVE_ATOMIC_CMPSET_32 -#define OPAL_HAVE_ATOMIC_CMPSET_32 0 +#ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 0 #endif -#ifndef OPAL_HAVE_ATOMIC_CMPSET_64 -#define OPAL_HAVE_ATOMIC_CMPSET_64 0 +#ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 0 #endif -#ifndef OPAL_HAVE_ATOMIC_CMPSET_128 -#define OPAL_HAVE_ATOMIC_CMPSET_128 0 +#ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0 #endif #ifndef OPAL_HAVE_ATOMIC_LLSC_32 #define OPAL_HAVE_ATOMIC_LLSC_32 0 @@ -270,7 +270,7 @@ void opal_atomic_wmb(void); /********************************************************************** * - * Atomic spinlocks - always inlined, if have atomic cmpset + * Atomic spinlocks - always inlined, if have atomic compare-and-swap * *********************************************************************/ @@ -280,7 +280,7 @@ void opal_atomic_wmb(void); #define OPAL_HAVE_ATOMIC_SPINLOCKS 0 #endif -#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_SPINLOCKS || (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) +#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_SPINLOCKS || (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) /** * Initialize a lock to value @@ -330,7 +330,7 @@ void opal_atomic_unlock(opal_atomic_lock_t *lock); #if OPAL_HAVE_ATOMIC_SPINLOCKS == 0 #undef OPAL_HAVE_ATOMIC_SPINLOCKS -#define OPAL_HAVE_ATOMIC_SPINLOCKS (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) +#define OPAL_HAVE_ATOMIC_SPINLOCKS (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) #define OPAL_NEED_INLINE_ATOMIC_SPINLOCKS 1 #endif @@ -347,48 +347,48 @@ void opal_atomic_unlock(opal_atomic_lock_t *lock); #endif #if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_CMPSET_32 -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 +#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 static inline #endif -bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, int32_t oldval, - int32_t newval); +bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, + int32_t newval); -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 +#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 static inline #endif -bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, int32_t oldval, - int32_t newval); +bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, + int32_t newval); -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 +#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 static inline #endif -bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, int32_t oldval, - int32_t newval); +bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, + int32_t newval); #endif -#if !defined(OPAL_HAVE_ATOMIC_CMPSET_64) && !defined(DOXYGEN) -#define OPAL_HAVE_ATOMIC_CMPSET_64 0 +#if !defined(OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) && !defined(DOXYGEN) +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 0 #endif -#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_CMPSET_64 +#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 +#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 static inline #endif -bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, int64_t oldval, - int64_t newval); +bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, + int64_t newval); -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 +#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 static inline #endif -bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, int64_t oldval, - int64_t newval); +bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, + int64_t newval); -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 +#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 static inline #endif -bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, int64_t oldval, - int64_t newval); +bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, + int64_t newval); #endif @@ -397,35 +397,35 @@ bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, int64_t oldval, #define OPAL_HAVE_ATOMIC_MATH_32 0 #endif -#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_CMPSET_32 +#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 /* OPAL_HAVE_INLINE_ATOMIC_*_32 will be 1 if /atomic.h provides a static inline version of it (in assembly). If we have to fall - back on cmpset 32, that too will be inline. */ -#if OPAL_HAVE_INLINE_ATOMIC_ADD_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_CMPSET_32) + back on compare-exchange 32, that too will be inline. */ +#if OPAL_HAVE_INLINE_ATOMIC_ADD_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) static inline #endif int32_t opal_atomic_add_32(volatile int32_t *addr, int delta); -#if OPAL_HAVE_INLINE_ATOMIC_AND_32 || (!defined(OPAL_HAVE_ATOMIC_AND_32) && OPAL_HAVE_ATOMIC_CMPSET_32) +#if OPAL_HAVE_INLINE_ATOMIC_AND_32 || (!defined(OPAL_HAVE_ATOMIC_AND_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) static inline #endif int32_t opal_atomic_and_32(volatile int32_t *addr, int32_t value); -#if OPAL_HAVE_INLINE_ATOMIC_OR_32 || (!defined(OPAL_HAVE_ATOMIC_OR_32) && OPAL_HAVE_ATOMIC_CMPSET_32) +#if OPAL_HAVE_INLINE_ATOMIC_OR_32 || (!defined(OPAL_HAVE_ATOMIC_OR_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) static inline #endif int32_t opal_atomic_or_32(volatile int32_t *addr, int32_t value); -#if OPAL_HAVE_INLINE_ATOMIC_XOR_32 || (!defined(OPAL_HAVE_ATOMIC_XOR_32) && OPAL_HAVE_ATOMIC_CMPSET_32) +#if OPAL_HAVE_INLINE_ATOMIC_XOR_32 || (!defined(OPAL_HAVE_ATOMIC_XOR_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) static inline #endif int32_t opal_atomic_xor_32(volatile int32_t *addr, int32_t value); /* OPAL_HAVE_INLINE_ATOMIC_*_32 will be 1 if /atomic.h provides a static inline version of it (in assembly). If we have to fall - back to cmpset 32, that too will be inline. */ -#if OPAL_HAVE_INLINE_ATOMIC_SUB_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_CMPSET_32) + back to compare-exchange 32, that too will be inline. */ +#if OPAL_HAVE_INLINE_ATOMIC_SUB_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32) static inline #endif int32_t opal_atomic_sub_32(volatile int32_t *addr, int delta); @@ -435,7 +435,7 @@ int32_t opal_atomic_sub_32(volatile int32_t *addr, int delta); #if ! OPAL_HAVE_ATOMIC_MATH_32 /* fix up the value of opal_have_atomic_math_32 to allow for C versions */ #undef OPAL_HAVE_ATOMIC_MATH_32 -#define OPAL_HAVE_ATOMIC_MATH_32 OPAL_HAVE_ATOMIC_CMPSET_32 +#define OPAL_HAVE_ATOMIC_MATH_32 OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 #endif #ifndef OPAL_HAVE_ATOMIC_MATH_64 @@ -443,35 +443,35 @@ int32_t opal_atomic_sub_32(volatile int32_t *addr, int delta); #define OPAL_HAVE_ATOMIC_MATH_64 0 #endif -#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_64 || OPAL_HAVE_ATOMIC_CMPSET_64 +#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_64 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 /* OPAL_HAVE_INLINE_ATOMIC_*_64 will be 1 if /atomic.h provides a static inline version of it (in assembly). If we have to fall - back to cmpset 64, that too will be inline */ -#if OPAL_HAVE_INLINE_ATOMIC_ADD_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_CMPSET_64) + back to compare-exchange 64, that too will be inline */ +#if OPAL_HAVE_INLINE_ATOMIC_ADD_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) static inline #endif int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta); -#if OPAL_HAVE_INLINE_ATOMIC_AND_64 || (!defined(OPAL_HAVE_ATOMIC_AND_64) && OPAL_HAVE_ATOMIC_CMPSET_64) +#if OPAL_HAVE_INLINE_ATOMIC_AND_64 || (!defined(OPAL_HAVE_ATOMIC_AND_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) static inline #endif int64_t opal_atomic_and_64(volatile int64_t *addr, int64_t value); -#if OPAL_HAVE_INLINE_ATOMIC_OR_64 || (!defined(OPAL_HAVE_ATOMIC_OR_64) && OPAL_HAVE_ATOMIC_CMPSET_64) +#if OPAL_HAVE_INLINE_ATOMIC_OR_64 || (!defined(OPAL_HAVE_ATOMIC_OR_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) static inline #endif int64_t opal_atomic_or_64(volatile int64_t *addr, int64_t value); -#if OPAL_HAVE_INLINE_ATOMIC_XOR_64 || (!defined(OPAL_HAVE_ATOMIC_XOR_64) && OPAL_HAVE_ATOMIC_CMPSET_64) +#if OPAL_HAVE_INLINE_ATOMIC_XOR_64 || (!defined(OPAL_HAVE_ATOMIC_XOR_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) static inline #endif int64_t opal_atomic_xor_64(volatile int64_t *addr, int64_t value); /* OPAL_HAVE_INLINE_ATOMIC_*_64 will be 1 if /atomic.h provides a static inline version of it (in assembly). If we have to fall - back to cmpset 64, that too will be inline */ -#if OPAL_HAVE_INLINE_ATOMIC_SUB_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_CMPSET_64) + back to compare-exchange 64, that too will be inline */ +#if OPAL_HAVE_INLINE_ATOMIC_SUB_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) static inline #endif int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta); @@ -481,7 +481,7 @@ int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta); #if ! OPAL_HAVE_ATOMIC_MATH_64 /* fix up the value of opal_have_atomic_math_64 to allow for C versions */ #undef OPAL_HAVE_ATOMIC_MATH_64 -#define OPAL_HAVE_ATOMIC_MATH_64 OPAL_HAVE_ATOMIC_CMPSET_64 +#define OPAL_HAVE_ATOMIC_MATH_64 OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 #endif /* provide a size_t add/subtract. When in debug mode, make it an @@ -524,81 +524,78 @@ opal_atomic_sub_size_t(volatile size_t *addr, size_t delta) #endif #endif -#if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) +#if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) /* these are always done with inline functions, so always mark as static inline */ -static inline bool opal_atomic_bool_cmpset_xx(volatile void* addr, int64_t oldval, - int64_t newval, size_t length); -static inline bool opal_atomic_bool_cmpset_acq_xx(volatile void* addr, - int64_t oldval, int64_t newval, - size_t length); -static inline bool opal_atomic_bool_cmpset_rel_xx(volatile void* addr, - int64_t oldval, int64_t newval, - size_t length); - -static inline bool opal_atomic_bool_cmpset_ptr(volatile void* addr, - void* oldval, - void* newval); -static inline bool opal_atomic_bool_cmpset_acq_ptr(volatile void* addr, - void* oldval, - void* newval); -static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr, - void* oldval, - void* newval); + +static inline bool opal_atomic_compare_exchange_strong_xx (volatile void *addr, void *oldval, + int64_t newval, size_t length); +static inline bool opal_atomic_compare_exchange_strong_acq_xx (volatile void *addr, void *oldval, + int64_t newval, size_t length); +static inline bool opal_atomic_compare_exchange_strong_rel_xx (volatile void *addr, void *oldval, + int64_t newval, size_t length); + + +static inline bool opal_atomic_compare_exchange_strong_ptr (volatile void* addr, void *oldval, + void *newval); +static inline bool opal_atomic_compare_exchange_strong_acq_ptr (volatile void* addr, void *oldval, + void *newval); +static inline bool opal_atomic_compare_exchange_strong_rel_ptr (volatile void* addr, void *oldval, + void *newval); /** - * Atomic compare and set of pointer with relaxed semantics. This + * Atomic compare and set of generic type with relaxed semantics. This * macro detect at compile time the type of the first argument and * choose the correct function to be called. * * \note This macro should only be used for integer types. * * @param addr Address of . - * @param oldval Comparison value . + * @param oldval Comparison value address of . * @param newval New value to set if comparision is true . * - * See opal_atomic_bool_cmpset_* for pseudo-code. + * See opal_atomic_compare_exchange_* for pseudo-code. */ -#define opal_atomic_bool_cmpset( ADDR, OLDVAL, NEWVAL ) \ - opal_atomic_bool_cmpset_xx( (volatile void*)(ADDR), (intptr_t)(OLDVAL), \ - (intptr_t)(NEWVAL), sizeof(*(ADDR)) ) +#define opal_atomic_compare_exchange_strong( ADDR, OLDVAL, NEWVAL ) \ + opal_atomic_compare_exchange_strong_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \ + (intptr_t)(NEWVAL), sizeof(*(ADDR)) ) /** - * Atomic compare and set of pointer with acquire semantics. This - * macro detect at compile time the type of the first argument - * and choose the correct function to be called. + * Atomic compare and set of generic type with acquire semantics. This + * macro detect at compile time the type of the first argument and + * choose the correct function to be called. * * \note This macro should only be used for integer types. * * @param addr Address of . - * @param oldval Comparison value . + * @param oldval Comparison value address of . * @param newval New value to set if comparision is true . * - * See opal_atomic_bool_cmpset_acq_* for pseudo-code. + * See opal_atomic_compare_exchange_acq_* for pseudo-code. */ -#define opal_atomic_bool_cmpset_acq( ADDR, OLDVAL, NEWVAL ) \ - opal_atomic_bool_cmpset_acq_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \ - (int64_t)(NEWVAL), sizeof(*(ADDR)) ) - +#define opal_atomic_compare_exchange_strong_acq( ADDR, OLDVAL, NEWVAL ) \ + opal_atomic_compare_exchange_strong_acq_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \ + (intptr_t)(NEWVAL), sizeof(*(ADDR)) ) /** - * Atomic compare and set of pointer with release semantics. This - * macro detect at compile time the type of the first argument - * and choose the correct function to b + * Atomic compare and set of generic type with release semantics. This + * macro detect at compile time the type of the first argument and + * choose the correct function to be called. * * \note This macro should only be used for integer types. * * @param addr Address of . - * @param oldval Comparison value . + * @param oldval Comparison value address of . * @param newval New value to set if comparision is true . * - * See opal_atomic_bool_cmpsetrel_* for pseudo-code. + * See opal_atomic_compare_exchange_rel_* for pseudo-code. */ -#define opal_atomic_bool_cmpset_rel( ADDR, OLDVAL, NEWVAL ) \ - opal_atomic_bool_cmpset_rel_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \ - (int64_t)(NEWVAL), sizeof(*(ADDR)) ) +#define opal_atomic_compare_exchange_strong_rel( ADDR, OLDVAL, NEWVAL ) \ + opal_atomic_compare_exchange_strong_rel_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \ + (intptr_t)(NEWVAL), sizeof(*(ADDR)) ) + -#endif /* (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) */ +#endif /* (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) */ #if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64) @@ -606,10 +603,10 @@ static inline void opal_atomic_add_xx(volatile void* addr, int32_t value, size_t length); static inline void opal_atomic_sub_xx(volatile void* addr, int32_t value, size_t length); -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32 +#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 static inline int32_t opal_atomic_add_ptr( volatile void* addr, void* delta ); static inline int32_t opal_atomic_sub_ptr( volatile void* addr, void* delta ); -#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64 +#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 static inline int64_t opal_atomic_add_ptr( volatile void* addr, void* delta ); static inline int64_t opal_atomic_sub_ptr( volatile void* addr, void* delta ); #else diff --git a/opal/include/opal/sys/atomic_impl.h b/opal/include/opal/sys/atomic_impl.h index c066d831cb9..576fc8ed2e5 100644 --- a/opal/include/opal/sys/atomic_impl.h +++ b/opal/include/opal/sys/atomic_impl.h @@ -34,20 +34,31 @@ * * Some architectures do not provide support for the 64 bits * atomic operations. Until we find a better solution let's just - * undefine all those functions if there is no 64 bit cmpset + * undefine all those functions if there is no 64 bit compare-exchange * *********************************************************************/ -#if OPAL_HAVE_ATOMIC_CMPSET_32 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 + +#define OPAL_ATOMIC_DEFINE_CMPXCG_OP(type, bits, operand, name) \ + static inline type opal_atomic_ ## name ## _ ## bits (volatile type *addr, type value) \ + { \ + type oldval, newval; \ + do { \ + oldval = *addr; \ + newval = oldval operand value; \ + } while (!opal_atomic_compare_exchange_strong_ ## bits (addr, &oldval, newval)); \ + \ + return newval; \ + } #if !defined(OPAL_HAVE_ATOMIC_SWAP_32) #define OPAL_HAVE_ATOMIC_SWAP_32 1 static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval) { - int32_t old; + int32_t old = *addr; do { - old = *addr; - } while (!opal_atomic_bool_cmpset_32(addr, old, newval)); + } while (!opal_atomic_compare_exchange_strong_32 (addr, &old, newval)); return old; } @@ -55,161 +66,91 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, #if !defined(OPAL_HAVE_ATOMIC_ADD_32) #define OPAL_HAVE_ATOMIC_ADD_32 1 -static inline int32_t -opal_atomic_add_32(volatile int32_t *addr, int delta) -{ - int32_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval + delta)); - return (oldval + delta); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, +, add) + #endif /* OPAL_HAVE_ATOMIC_ADD_32 */ #if !defined(OPAL_HAVE_ATOMIC_AND_32) #define OPAL_HAVE_ATOMIC_AND_32 1 -static inline int32_t -opal_atomic_and_32(volatile int32_t *addr, int32_t value) -{ - int32_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval & value)); - return (oldval & value); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, &, and) + #endif /* OPAL_HAVE_ATOMIC_AND_32 */ #if !defined(OPAL_HAVE_ATOMIC_OR_32) #define OPAL_HAVE_ATOMIC_OR_32 1 -static inline int32_t -opal_atomic_or_32(volatile int32_t *addr, int32_t value) -{ - int32_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval | value)); - return (oldval | value); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, |, or) + #endif /* OPAL_HAVE_ATOMIC_OR_32 */ #if !defined(OPAL_HAVE_ATOMIC_XOR_32) #define OPAL_HAVE_ATOMIC_XOR_32 1 -static inline int32_t -opal_atomic_xor_32(volatile int32_t *addr, int32_t value) -{ - int32_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval ^ value)); - return (oldval ^ value); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, ^, xor) + #endif /* OPAL_HAVE_ATOMIC_XOR_32 */ #if !defined(OPAL_HAVE_ATOMIC_SUB_32) #define OPAL_HAVE_ATOMIC_SUB_32 1 -static inline int32_t -opal_atomic_sub_32(volatile int32_t *addr, int delta) -{ - int32_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval - delta)); - return (oldval - delta); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, -, sub) + #endif /* OPAL_HAVE_ATOMIC_SUB_32 */ -#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */ +#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */ -#if OPAL_HAVE_ATOMIC_CMPSET_64 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 #if !defined(OPAL_HAVE_ATOMIC_SWAP_64) #define OPAL_HAVE_ATOMIC_SWAP_64 1 static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval) { - int64_t old; + int64_t old = *addr; do { - old = *addr; - } while (!opal_atomic_bool_cmpset_64(addr, old, newval)); + } while (!opal_atomic_compare_exchange_strong_64 (addr, &old, newval)); + return old; } #endif /* OPAL_HAVE_ATOMIC_SWAP_32 */ #if !defined(OPAL_HAVE_ATOMIC_ADD_64) #define OPAL_HAVE_ATOMIC_ADD_64 1 -static inline int64_t -opal_atomic_add_64(volatile int64_t *addr, int64_t delta) -{ - int64_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval + delta)); - return (oldval + delta); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, +, add) + #endif /* OPAL_HAVE_ATOMIC_ADD_64 */ #if !defined(OPAL_HAVE_ATOMIC_AND_64) #define OPAL_HAVE_ATOMIC_AND_64 1 -static inline int64_t -opal_atomic_and_64(volatile int64_t *addr, int64_t value) -{ - int64_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval & value)); - return (oldval & value); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, &, and) + #endif /* OPAL_HAVE_ATOMIC_AND_64 */ #if !defined(OPAL_HAVE_ATOMIC_OR_64) #define OPAL_HAVE_ATOMIC_OR_64 1 -static inline int64_t -opal_atomic_or_64(volatile int64_t *addr, int64_t value) -{ - int64_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval | value)); - return (oldval | value); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, |, or) + #endif /* OPAL_HAVE_ATOMIC_OR_64 */ #if !defined(OPAL_HAVE_ATOMIC_XOR_64) #define OPAL_HAVE_ATOMIC_XOR_64 1 -static inline int64_t -opal_atomic_xor_64(volatile int64_t *addr, int64_t value) -{ - int64_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval ^ value)); - return (oldval ^ value); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, ^, xor) + #endif /* OPAL_HAVE_ATOMIC_XOR_64 */ #if !defined(OPAL_HAVE_ATOMIC_SUB_64) #define OPAL_HAVE_ATOMIC_SUB_64 1 -static inline int64_t -opal_atomic_sub_64(volatile int64_t *addr, int64_t delta) -{ - int64_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval - delta)); - return (oldval - delta); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, -, sub) + #endif /* OPAL_HAVE_ATOMIC_SUB_64 */ #else @@ -222,130 +163,70 @@ opal_atomic_sub_64(volatile int64_t *addr, int64_t delta) #define OPAL_HAVE_ATOMIC_SUB_64 0 #endif -#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */ - - -#if (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) - -static inline bool -opal_atomic_bool_cmpset_xx(volatile void* addr, int64_t oldval, - int64_t newval, size_t length) -{ - switch( length ) { -#if OPAL_HAVE_ATOMIC_CMPSET_32 - case 4: - return opal_atomic_bool_cmpset_32( (volatile int32_t*)addr, - (int32_t)oldval, (int32_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */ - -#if OPAL_HAVE_ATOMIC_CMPSET_64 - case 8: - return opal_atomic_bool_cmpset_64( (volatile int64_t*)addr, - (int64_t)oldval, (int64_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */ - } - abort(); - /* This should never happen, so deliberately abort (hopefully - leaving a corefile for analysis) */ -} - - -static inline bool -opal_atomic_bool_cmpset_acq_xx(volatile void* addr, int64_t oldval, - int64_t newval, size_t length) -{ - switch( length ) { -#if OPAL_HAVE_ATOMIC_CMPSET_32 - case 4: - return opal_atomic_bool_cmpset_acq_32( (volatile int32_t*)addr, - (int32_t)oldval, (int32_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */ - -#if OPAL_HAVE_ATOMIC_CMPSET_64 - case 8: - return opal_atomic_bool_cmpset_acq_64( (volatile int64_t*)addr, - (int64_t)oldval, (int64_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */ - } - /* This should never happen, so deliberately abort (hopefully - leaving a corefile for analysis) */ - abort(); -} - - -static inline bool -opal_atomic_bool_cmpset_rel_xx(volatile void* addr, int64_t oldval, - int64_t newval, size_t length) -{ - switch( length ) { -#if OPAL_HAVE_ATOMIC_CMPSET_32 - case 4: - return opal_atomic_bool_cmpset_rel_32( (volatile int32_t*)addr, - (int32_t)oldval, (int32_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */ - -#if OPAL_HAVE_ATOMIC_CMPSET_64 - case 8: - return opal_atomic_bool_cmpset_rel_64( (volatile int64_t*)addr, - (int64_t)oldval, (int64_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */ - } - /* This should never happen, so deliberately abort (hopefully - leaving a corefile for analysis) */ - abort(); -} - - -static inline bool -opal_atomic_bool_cmpset_ptr(volatile void* addr, - void* oldval, - void* newval) -{ -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32 - return opal_atomic_bool_cmpset_32((int32_t*) addr, (unsigned long) oldval, - (unsigned long) newval); -#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64 - return opal_atomic_bool_cmpset_64((int64_t*) addr, (unsigned long) oldval, - (unsigned long) newval); +#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 */ + +#if (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) + +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 +#define OPAL_ATOMIC_DEFINE_CMPXCG_XX(semantics) \ + static inline bool \ + opal_atomic_compare_exchange_strong ## semantics ## xx (volatile void* addr, void *oldval, \ + int64_t newval, const size_t length) \ + { \ + switch (length) { \ + case 4: \ + return opal_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, \ + (int32_t *) oldval, (int32_t) newval); \ + case 8: \ + return opal_atomic_compare_exchange_strong_64 ((volatile int64_t *) addr, \ + (int64_t *) oldval, (int64_t) newval); \ + } \ + abort(); \ + } +#elif OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 +#define OPAL_ATOMIC_DEFINE_CMPXCG_XX(semantics) \ + static inline bool \ + opal_atomic_compare_exchange_strong ## semantics ## xx (volatile void* addr, void *oldval, \ + int64_t newval, const size_t length) \ + { \ + switch (length) { \ + case 4: \ + return opal_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, \ + (int32_t *) oldval, (int32_t) newval); \ + abort(); \ + } #else - abort(); +#error "Platform does not have required atomic compare-and-swap functionality" #endif -} - -static inline bool -opal_atomic_bool_cmpset_acq_ptr(volatile void* addr, - void* oldval, - void* newval) -{ -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32 - return opal_atomic_bool_cmpset_acq_32((int32_t*) addr, (unsigned long) oldval, - (unsigned long) newval); -#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64 - return opal_atomic_bool_cmpset_acq_64((int64_t*) addr, (unsigned long) oldval, - (unsigned long) newval); +OPAL_ATOMIC_DEFINE_CMPXCG_XX(_) +OPAL_ATOMIC_DEFINE_CMPXCG_XX(_acq_) +OPAL_ATOMIC_DEFINE_CMPXCG_XX(_rel_) + +#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 +#define OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(semantics) \ + static inline bool \ + opal_atomic_compare_exchange_strong ## semantics ## ptr (volatile void* addr, void *oldval, void *newval) \ + { \ + return opal_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, (int32_t *) oldval, (int32_t) newval); \ + } +#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 +#define OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(semantics) \ + static inline bool \ + opal_atomic_compare_exchange_strong ## semantics ## ptr (volatile void* addr, void *oldval, void *newval) \ + { \ + return opal_atomic_compare_exchange_strong_64 ((volatile int64_t *) addr, (int64_t *) oldval, (int64_t) newval); \ + } #else - abort(); +#error "Can not define opal_atomic_compare_exchange_strong_ptr with existing atomics" #endif -} +OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_) +OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_acq_) +OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_rel_) -static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr, - void* oldval, - void* newval) -{ -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32 - return opal_atomic_bool_cmpset_rel_32((int32_t*) addr, (unsigned long) oldval, - (unsigned long) newval); -#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64 - return opal_atomic_bool_cmpset_rel_64((int64_t*) addr, (unsigned long) oldval, - (unsigned long) newval); -#else - abort(); -#endif -} +#endif /* (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) */ -#endif /* (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) */ #if (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64) @@ -392,7 +273,7 @@ opal_atomic_add_xx(volatile void* addr, int32_t value, size_t length) case 4: opal_atomic_add_32( (volatile int32_t*)addr, (int32_t)value ); break; -#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */ +#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */ #if OPAL_HAVE_ATOMIC_ADD_64 case 8: @@ -493,21 +374,20 @@ opal_atomic_lock_init( opal_atomic_lock_t* lock, int32_t value ) static inline int opal_atomic_trylock(opal_atomic_lock_t *lock) { - bool ret = opal_atomic_bool_cmpset_acq_32( &(lock->u.lock), - OPAL_ATOMIC_LOCK_UNLOCKED, OPAL_ATOMIC_LOCK_LOCKED); - return (ret == 0) ? 1 : 0; + int32_t unlocked = OPAL_ATOMIC_LOCK_UNLOCKED; + bool ret = opal_atomic_compare_exchange_strong_32 (&lock->u.lock, &unlocked, OPAL_ATOMIC_LOCK_LOCKED); + return (ret == false) ? 1 : 0; } static inline void opal_atomic_lock(opal_atomic_lock_t *lock) { - while( !opal_atomic_bool_cmpset_acq_32( &(lock->u.lock), - OPAL_ATOMIC_LOCK_UNLOCKED, OPAL_ATOMIC_LOCK_LOCKED) ) { - while (lock->u.lock == OPAL_ATOMIC_LOCK_LOCKED) { - /* spin */ ; - } - } + while (opal_atomic_trylock (lock)) { + while (lock->u.lock == OPAL_ATOMIC_LOCK_LOCKED) { + /* spin */ ; + } + } } diff --git a/opal/include/opal/sys/gcc_builtin/atomic.h b/opal/include/opal/sys/gcc_builtin/atomic.h index 2425bbf2509..6521a50ed98 100644 --- a/opal/include/opal/sys/gcc_builtin/atomic.h +++ b/opal/include/opal/sys/gcc_builtin/atomic.h @@ -33,7 +33,7 @@ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 #define OPAL_HAVE_ATOMIC_MATH_32 1 -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 #define OPAL_HAVE_ATOMIC_AND_32 1 #define OPAL_HAVE_ATOMIC_OR_32 1 @@ -41,7 +41,7 @@ #define OPAL_HAVE_ATOMIC_SUB_32 1 #define OPAL_HAVE_ATOMIC_SWAP_32 1 #define OPAL_HAVE_ATOMIC_MATH_64 1 -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 #define OPAL_HAVE_ATOMIC_ADD_64 1 #define OPAL_HAVE_ATOMIC_AND_64 1 #define OPAL_HAVE_ATOMIC_OR_64 1 @@ -81,26 +81,20 @@ static inline void opal_atomic_wmb(void) #pragma error_messages(off, E_ARG_INCOMPATIBLE_WITH_ARG_L) #endif -static inline bool opal_atomic_bool_cmpset_acq_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } -static inline bool opal_atomic_bool_cmpset_rel_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_RELEASE, __ATOMIC_RELAXED); + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED); } -static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } static inline int32_t opal_atomic_swap_32 (volatile int32_t *addr, int32_t newval) @@ -135,26 +129,20 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t *addr, int32_t delta) return __atomic_sub_fetch (addr, delta, __ATOMIC_RELAXED); } -static inline bool opal_atomic_bool_cmpset_acq_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } -static inline bool opal_atomic_bool_cmpset_rel_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_RELEASE, __ATOMIC_RELAXED); + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED); } -static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newval) @@ -191,25 +179,28 @@ static inline int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta) #if OPAL_HAVE_GCC_BUILTIN_CSWAP_INT128 -#define OPAL_HAVE_ATOMIC_CMPSET_128 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 -static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, - opal_int128_t oldval, opal_int128_t newval) +static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr, + opal_int128_t *oldval, opal_int128_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } #elif defined(OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128) && OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128 -#define OPAL_HAVE_ATOMIC_CMPSET_128 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 /* __atomic version is not lock-free so use legacy __sync version */ -static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, - opal_int128_t oldval, opal_int128_t newval) +static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr, + opal_int128_t *oldval, opal_int128_t newval) { - return __sync_bool_compare_and_swap (addr, oldval, newval); + opal_int128_t prev = __sync_val_compare_and_swap (addr, *oldval, newval); + bool ret = prev == *oldval; + *oldval = prev; + return ret; } #endif diff --git a/opal/include/opal/sys/ia32/atomic.h b/opal/include/opal/sys/ia32/atomic.h index 35da400ef04..009256f3cce 100644 --- a/opal/include/opal/sys/ia32/atomic.h +++ b/opal/include/opal/sys/ia32/atomic.h @@ -40,7 +40,7 @@ *********************************************************************/ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_MATH_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 @@ -84,15 +84,13 @@ static inline void opal_atomic_isync(void) *********************************************************************/ #if OPAL_GCC_INLINE_ASSEMBLY -static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, - int32_t oldval, - int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { unsigned char ret; __asm__ __volatile__ ( SMPLOCK "cmpxchgl %3,%2 \n\t" "sete %0 \n\t" - : "=qm" (ret), "+a" (oldval), "+m" (*addr) + : "=qm" (ret), "+a" (*oldval), "+m" (*addr) : "q"(newval) : "memory", "cc"); @@ -101,8 +99,8 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, #endif /* OPAL_GCC_INLINE_ASSEMBLY */ -#define opal_atomic_bool_cmpset_acq_32 opal_atomic_bool_cmpset_32 -#define opal_atomic_bool_cmpset_rel_32 opal_atomic_bool_cmpset_32 +#define opal_atomic_compare_exchange_strong_acq_32 opal_atomic_compare_exchange_strong_32 +#define opal_atomic_compare_exchange_strong_rel_32 opal_atomic_compare_exchange_strong_32 #if OPAL_GCC_INLINE_ASSEMBLY diff --git a/opal/include/opal/sys/powerpc/atomic.h b/opal/include/opal/sys/powerpc/atomic.h index 34c3a689fab..31cf96b1f7c 100644 --- a/opal/include/opal/sys/powerpc/atomic.h +++ b/opal/include/opal/sys/powerpc/atomic.h @@ -40,7 +40,7 @@ *********************************************************************/ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_SWAP_32 1 #define OPAL_HAVE_ATOMIC_LLSC_32 1 @@ -53,7 +53,7 @@ #if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) || OPAL_ASM_SUPPORT_64BIT -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 #define OPAL_HAVE_ATOMIC_SWAP_64 1 #define OPAL_HAVE_ATOMIC_LLSC_64 1 #define OPAL_HAVE_ATOMIC_MATH_64 1 @@ -144,24 +144,25 @@ void opal_atomic_isync(void) #define OPAL_ASM_VALUE64(x) x #endif - -static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - int32_t ret; - - __asm__ __volatile__ ( - "1: lwarx %0, 0, %2 \n\t" - " cmpw 0, %0, %3 \n\t" - " bne- 2f \n\t" - " stwcx. %4, 0, %2 \n\t" - " bne- 1b \n\t" - "2:" - : "=&r" (ret), "=m" (*addr) - : "r" OPAL_ASM_ADDR(addr), "r" (oldval), "r" (newval), "m" (*addr) - : "cc", "memory"); + int32_t prev; + bool ret; + + __asm__ __volatile__ ( + "1: lwarx %0, 0, %2 \n\t" + " cmpw 0, %0, %3 \n\t" + " bne- 2f \n\t" + " stwcx. %4, 0, %2 \n\t" + " bne- 1b \n\t" + "2:" + : "=&r" (prev), "=m" (*addr) + : "r" OPAL_ASM_ADDR(addr), "r" (*oldval), "r" (newval), "m" (*addr) + : "cc", "memory"); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr) @@ -195,23 +196,21 @@ static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval) atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { bool rc; - rc = opal_atomic_bool_cmpset_32(addr, oldval, newval); + rc = opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); opal_atomic_rmb(); return rc; } -static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { opal_atomic_wmb(); - return opal_atomic_bool_cmpset_32(addr, oldval, newval); + return opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); } static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval) @@ -258,23 +257,25 @@ OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(or, or) OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(xor, xor) OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(sub, subf) -static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - int64_t ret; - - __asm__ __volatile__ ( - "1: ldarx %0, 0, %2 \n\t" - " cmpd 0, %0, %3 \n\t" - " bne- 2f \n\t" - " stdcx. %4, 0, %2 \n\t" - " bne- 1b \n\t" - "2:" - : "=&r" (ret), "=m" (*addr) - : "r" (addr), "r" (OPAL_ASM_VALUE64(oldval)), "r" (OPAL_ASM_VALUE64(newval)), "m" (*addr) - : "cc", "memory"); + int64_t prev; + bool ret; + + __asm__ __volatile__ ( + "1: ldarx %0, 0, %2 \n\t" + " cmpd 0, %0, %3 \n\t" + " bne- 2f \n\t" + " stdcx. %4, 0, %2 \n\t" + " bne- 1b \n\t" + "2:" + : "=&r" (prev), "=m" (*addr) + : "r" (addr), "r" (OPAL_ASM_VALUE64(*oldval)), "r" (OPAL_ASM_VALUE64(newval)), "m" (*addr) + : "cc", "memory"); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } static inline int64_t opal_atomic_ll_64(volatile int64_t *addr) @@ -303,29 +304,6 @@ static inline int opal_atomic_sc_64(volatile int64_t *addr, int64_t newval) return ret; } -/* these two functions aren't inlined in the non-gcc case because then - there would be two function calls (since neither cmpset_64 nor - atomic_?mb can be inlined). Instead, we "inline" them by hand in - the assembly, meaning there is one function call overhead instead - of two */ -static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) -{ - bool rc; - - rc = opal_atomic_bool_cmpset_64(addr, oldval, newval); - opal_atomic_rmb(); - - return rc; -} - - -static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) -{ - opal_atomic_wmb(); - return opal_atomic_bool_cmpset_64(addr, oldval, newval); -} static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval) { @@ -352,9 +330,9 @@ static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval #if OPAL_GCC_INLINE_ASSEMBLY -static inline int opal_atomic_bool_cmpset_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { + int64_t prev; int ret; /* @@ -369,55 +347,53 @@ static inline int opal_atomic_bool_cmpset_64(volatile int64_t *addr, * is very similar to the pure 64 bit version. */ __asm__ __volatile__ ( - "ld r4,%2 \n\t" - "ld r5,%3 \n\t" - "1: ldarx r9, 0, %1 \n\t" - " cmpd 0, r9, r4 \n\t" + "ld r4,%3 \n\t" + "ld r5,%4 \n\t" + "1: ldarx %1, 0, %2 \n\t" + " cmpd 0, %1, r4 \n\t" " bne- 2f \n\t" - " stdcx. r5, 0, %1 \n\t" + " stdcx. r5, 0, %2 \n\t" " bne- 1b \n\t" "2: \n\t" - "xor r5,r4,r9 \n\t" + "xor r5,r4,%1 \n\t" "subfic r9,r5,0 \n\t" "adde %0,r9,r5 \n\t" - : "=&r" (ret) + : "=&r" (ret), "+r" (prev) : "r"OPAL_ASM_ADDR(addr), - "m"(oldval), "m"(newval) + "m"(*oldval), "m"(newval) : "r4", "r5", "r9", "cc", "memory"); - - return ret; + *oldval = prev; + return (bool) ret; } +#endif /* OPAL_GCC_INLINE_ASSEMBLY */ + +#endif /* OPAL_ASM_SUPPORT_64BIT */ + +#if OPAL_GCC_INLINE_ASSEMBLY + /* these two functions aren't inlined in the non-gcc case because then there would be two function calls (since neither cmpset_64 nor atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - int rc; + bool rc; - rc = opal_atomic_bool_cmpset_64(addr, oldval, newval); + rc = opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); opal_atomic_rmb(); return rc; } -static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { opal_atomic_wmb(); - return opal_atomic_bool_cmpset_64(addr, oldval, newval); + return opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); } -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#endif /* OPAL_ASM_SUPPORT_64BIT */ - - -#if OPAL_GCC_INLINE_ASSEMBLY #define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(type, instr) \ static inline int32_t opal_atomic_ ## type ## _32(volatile int32_t* v, int val) \ diff --git a/opal/include/opal/sys/sparcv9/atomic.h b/opal/include/opal/sys/sparcv9/atomic.h index 098cf875ce9..c79e32b1ebb 100644 --- a/opal/include/opal/sys/sparcv9/atomic.h +++ b/opal/include/opal/sys/sparcv9/atomic.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -12,6 +13,8 @@ * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserverd. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,9 +41,9 @@ *********************************************************************/ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 /********************************************************************** @@ -82,50 +85,49 @@ static inline void opal_atomic_isync(void) *********************************************************************/ #if OPAL_GCC_INLINE_ASSEMBLY -static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - /* casa [reg(rs1)] %asi, reg(rs2), reg(rd) - * - * if (*(reg(rs1)) == reg(rs2) ) - * swap reg(rd), *(reg(rs1)) - * else - * reg(rd) = *(reg(rs1)) - */ - - int32_t ret = newval; - - __asm__ __volatile__("casa [%1] " ASI_P ", %2, %0" - : "+r" (ret) - : "r" (addr), "r" (oldval)); - return (ret == oldval); + /* casa [reg(rs1)] %asi, reg(rs2), reg(rd) + * + * if (*(reg(rs1)) == reg(rs2) ) + * swap reg(rd), *(reg(rs1)) + * else + * reg(rd) = *(reg(rs1)) + */ + + int32_t prev = newval; + bool ret; + + __asm__ __volatile__("casa [%1] " ASI_P ", %2, %0" + : "+r" (prev) + : "r" (addr), "r" (*oldval)); + ret = (prev == *oldval); + *oldval = prev; + return ret; } -static inline bool opal_atomic_bool_cmpset_acq_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - bool rc; + bool rc; - rc = opal_atomic_bool_cmpset_32(addr, oldval, newval); - opal_atomic_rmb(); + rc = opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); + opal_atomic_rmb(); - return rc; + return rc; } -static inline bool opal_atomic_bool_cmpset_rel_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - opal_atomic_wmb(); - return opal_atomic_bool_cmpset_32(addr, oldval, newval); + opal_atomic_wmb(); + return opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); } #if OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64 -static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { /* casa [reg(rs1)] %asi, reg(rs2), reg(rd) * @@ -134,18 +136,20 @@ static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, * else * reg(rd) = *(reg(rs1)) */ - int64_t ret = newval; - - __asm__ __volatile__("casxa [%1] " ASI_P ", %2, %0" - : "+r" (ret) - : "r" (addr), "r" (oldval)); - return (ret == oldval); + int64_t prev = newval; + bool ret; + + __asm__ __volatile__("casxa [%1] " ASI_P ", %2, %0" + : "+r" (prev) + : "r" (addr), "r" (*oldval)); + ret = (prev == *oldval); + *oldval = prev; + return ret; } #else /* OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64 */ -static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { /* casa [reg(rs1)] %asi, reg(rs2), reg(rd) * @@ -155,40 +159,41 @@ static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, * reg(rd) = *(reg(rs1)) * */ - long long ret = newval; + int64_t prev = newval; + bool ret; __asm__ __volatile__( "ldx %0, %%g1 \n\t" /* g1 = ret */ "ldx %2, %%g2 \n\t" /* g2 = oldval */ "casxa [%1] " ASI_P ", %%g2, %%g1 \n\t" "stx %%g1, %0 \n" - : "+m"(ret) - : "r"(addr), "m"(oldval) + : "+m"(prev) + : "r"(addr), "m"(*oldval) : "%g1", "%g2" ); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } #endif /* OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64 */ -static inline bool opal_atomic_bool_cmpset_acq_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - bool rc; + bool rc; - rc = opal_atomic_bool_cmpset_64(addr, oldval, newval); - opal_atomic_rmb(); + rc = opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); + opal_atomic_rmb(); - return rc; + return rc; } -static inline bool opal_atomic_bool_cmpset_rel_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - opal_atomic_wmb(); - return opal_atomic_bool_cmpset_64(addr, oldval, newval); + opal_atomic_wmb(); + return opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); } #endif /* OPAL_GCC_INLINE_ASSEMBLY */ diff --git a/opal/include/opal/sys/sync_builtin/atomic.h b/opal/include/opal/sys/sync_builtin/atomic.h index 0a95048079f..f8557a69451 100644 --- a/opal/include/opal/sys/sync_builtin/atomic.h +++ b/opal/include/opal/sys/sync_builtin/atomic.h @@ -53,25 +53,19 @@ static inline void opal_atomic_wmb(void) * *********************************************************************/ -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 -static inline bool opal_atomic_bool_cmpset_acq_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) -{ - return __sync_bool_compare_and_swap(addr, oldval, newval); -} - +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -static inline bool opal_atomic_bool_cmpset_rel_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - return __sync_bool_compare_and_swap(addr, oldval, newval);} - -static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) -{ - return __sync_bool_compare_and_swap(addr, oldval, newval); + int32_t prev = __sync_val_compare_and_swap (add, *oldval, newval); + bool ret = prev == *oldval; + *oldval = prev; + return ret; } +#define opal_atomic_compare_exchange_strong_acq_32 opal_atomic_compare_exchange_strong_32 +#define opal_atomic_compare_exchange_strong_rel_32 opal_atomic_compare_exchange_strong_32 + #define OPAL_HAVE_ATOMIC_MATH_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 @@ -106,25 +100,19 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t *addr, int32_t delta) #if OPAL_ASM_SYNC_HAVE_64BIT -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 -static inline bool opal_atomic_bool_cmpset_acq_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) -{ - return __sync_bool_compare_and_swap(addr, oldval, newval); -} - -static inline bool opal_atomic_bool_cmpset_rel_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) -{ - return __sync_bool_compare_and_swap(addr, oldval, newval);} - +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 -static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - return __sync_bool_compare_and_swap(addr, oldval, newval); + int64_t prev = __sync_val_compare_and_swap (add, *oldval, newval); + bool ret = prev == *oldval; + *oldval = prev; + return ret; } +#define opal_atomic_compare_exchange_strong_acq_64 opal_atomic_compare_exchange_strong_64 +#define opal_atomic_compare_exchange_strong_rel_64 opal_atomic_compare_exchange_strong_64 + #define OPAL_HAVE_ATOMIC_MATH_64 1 #define OPAL_HAVE_ATOMIC_ADD_64 1 static inline int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta) @@ -159,13 +147,16 @@ static inline int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta) #endif #if OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128 -static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, - opal_int128_t oldval, opal_int128_t newval) +static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr, + opal_int128_t *oldval, opal_int128_t newval) { - return __sync_bool_compare_and_swap(addr, oldval, newval); + opal_int128_t prev = __sync_val_compare_and_swap (addr, *oldval, newval); + bool ret = prev == *oldval; + *oldval = prev; + return ret; } -#define OPAL_HAVE_ATOMIC_CMPSET_128 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 #endif diff --git a/opal/include/opal/sys/x86_64/atomic.h b/opal/include/opal/sys/x86_64/atomic.h index b56dd939b49..a9d881f6552 100644 --- a/opal/include/opal/sys/x86_64/atomic.h +++ b/opal/include/opal/sys/x86_64/atomic.h @@ -40,9 +40,9 @@ *********************************************************************/ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 /********************************************************************** * @@ -82,14 +82,13 @@ static inline void opal_atomic_isync(void) *********************************************************************/ #if OPAL_GCC_INLINE_ASSEMBLY -static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { unsigned char ret; __asm__ __volatile__ ( SMPLOCK "cmpxchgl %3,%2 \n\t" "sete %0 \n\t" - : "=qm" (ret), "+a" (oldval), "+m" (*addr) + : "=qm" (ret), "+a" (*oldval), "+m" (*addr) : "q"(newval) : "memory", "cc"); @@ -98,19 +97,18 @@ static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr, #endif /* OPAL_GCC_INLINE_ASSEMBLY */ -#define opal_atomic_bool_cmpset_acq_32 opal_atomic_bool_cmpset_32 -#define opal_atomic_bool_cmpset_rel_32 opal_atomic_bool_cmpset_32 +#define opal_atomic_compare_exchange_strong_acq_32 opal_atomic_compare_exchange_strong_32 +#define opal_atomic_compare_exchange_strong_rel_32 opal_atomic_compare_exchange_strong_32 #if OPAL_GCC_INLINE_ASSEMBLY -static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { unsigned char ret; __asm__ __volatile__ ( SMPLOCK "cmpxchgq %3,%2 \n\t" "sete %0 \n\t" - : "=qm" (ret), "+a" (oldval), "+m" (*((volatile long*)addr)) + : "=qm" (ret), "+a" (*oldval), "+m" (*((volatile long*)addr)) : "q"(newval) : "memory", "cc" ); @@ -120,13 +118,12 @@ static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, #endif /* OPAL_GCC_INLINE_ASSEMBLY */ -#define opal_atomic_bool_cmpset_acq_64 opal_atomic_bool_cmpset_64 -#define opal_atomic_bool_cmpset_rel_64 opal_atomic_bool_cmpset_64 +#define opal_atomic_compare_exchange_strong_acq_64 opal_atomic_compare_exchange_strong_64 +#define opal_atomic_compare_exchange_strong_rel_64 opal_atomic_compare_exchange_strong_64 #if OPAL_GCC_INLINE_ASSEMBLY && OPAL_HAVE_CMPXCHG16B && HAVE_OPAL_INT128_T -static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, opal_int128_t oldval, - opal_int128_t newval) +static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr, opal_int128_t *oldval, opal_int128_t newval) { unsigned char ret; @@ -135,15 +132,14 @@ static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, op * at the address is returned in eax:edx. */ __asm__ __volatile__ (SMPLOCK "cmpxchg16b (%%rsi) \n\t" "sete %0 \n\t" - : "=qm" (ret) - : "S" (addr), "b" (((int64_t *)&newval)[0]), "c" (((int64_t *)&newval)[1]), - "a" (((int64_t *)&oldval)[0]), "d" (((int64_t *)&oldval)[1]) - : "memory", "cc"); + : "=qm" (ret), "+a" (((int64_t *)oldval)[0]), "+d" (((int64_t *)oldval)[1]) + : "S" (addr), "b" (((int64_t *)&newval)[0]), "c" (((int64_t *)&newval)[1]) + : "memory", "cc", "eax", "edx"); return (bool) ret; } -#define OPAL_HAVE_ATOMIC_CMPSET_128 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 #endif /* OPAL_GCC_INLINE_ASSEMBLY */ diff --git a/opal/mca/btl/openib/btl_openib_endpoint.c b/opal/mca/btl/openib/btl_openib_endpoint.c index eaefb7e66b4..8700a204ebb 100644 --- a/opal/mca/btl/openib/btl_openib_endpoint.c +++ b/opal/mca/btl/openib/btl_openib_endpoint.c @@ -373,11 +373,12 @@ static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint) /* Release memory resources */ do { + void *_tmp_ptr = NULL; /* Make sure that mca_btl_openib_endpoint_connect_eager_rdma () * was not in "connect" or "bad" flow (failed to allocate memory) * and changed the pointer back to NULL */ - if(!opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL, (void*)1)) { + if(!opal_atomic_compare_exchange_strong_ptr(&endpoint->eager_rdma_local.base.pval, (void *) &_tmp_ptr, (void *) 1)) { if (NULL != endpoint->eager_rdma_local.reg) { endpoint->endpoint_btl->device->rcache->rcache_deregister (endpoint->endpoint_btl->device->rcache, &endpoint->eager_rdma_local.reg->base); @@ -894,12 +895,14 @@ void mca_btl_openib_endpoint_connect_eager_rdma( mca_btl_openib_recv_frag_t *headers_buf; int i, rc; uint32_t flag = MCA_RCACHE_FLAGS_CACHE_BYPASS; + void *_tmp_ptr = NULL; /* Set local rdma pointer to 1 temporarily so other threads will not try * to enter the function */ - if(!opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL, - (void*)1)) + if(!opal_atomic_compare_exchange_strong_ptr (&endpoint->eager_rdma_local.base.pval, (void *) &_tmp_ptr, + (void *) 1)) { return; + } headers_buf = (mca_btl_openib_recv_frag_t*) malloc(sizeof(mca_btl_openib_recv_frag_t) * @@ -975,18 +978,19 @@ void mca_btl_openib_endpoint_connect_eager_rdma( endpoint->eager_rdma_local.rd_win?endpoint->eager_rdma_local.rd_win:1; /* set local rdma pointer to real value */ - (void)opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, - (void*)1, buf); + endpoint->eager_rdma_local.base.pval = buf; endpoint->eager_rdma_local.alloc_base = alloc_base; if(mca_btl_openib_endpoint_send_eager_rdma(endpoint) == OPAL_SUCCESS) { mca_btl_openib_device_t *device = endpoint->endpoint_btl->device; mca_btl_openib_endpoint_t **p; + void *_tmp_ptr; OBJ_RETAIN(endpoint); assert(((opal_object_t*)endpoint)->obj_reference_count == 2); do { + _tmp_ptr = NULL; p = &device->eager_rdma_buffers[device->eager_rdma_buffers_count]; - } while(!opal_atomic_bool_cmpset_ptr(p, NULL, endpoint)); + } while(!opal_atomic_compare_exchange_strong_ptr (p, (void *) &_tmp_ptr, endpoint)); OPAL_THREAD_ADD32(&openib_btl->eager_rdma_channels, 1); /* from this point progress function starts to poll new buffer */ @@ -1001,8 +1005,7 @@ void mca_btl_openib_endpoint_connect_eager_rdma( free(headers_buf); unlock_rdma_local: /* set local rdma pointer back to zero. Will retry later */ - (void)opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, - endpoint->eager_rdma_local.base.pval, NULL); + endpoint->eager_rdma_local.base.pval = NULL; endpoint->eager_rdma_local.frags = NULL; } diff --git a/opal/mca/btl/openib/btl_openib_endpoint.h b/opal/mca/btl/openib/btl_openib_endpoint.h index f580476abdb..285221bba35 100644 --- a/opal/mca/btl/openib/btl_openib_endpoint.h +++ b/opal/mca/btl/openib/btl_openib_endpoint.h @@ -446,14 +446,19 @@ static inline int mca_btl_openib_endpoint_post_rr( return ret; } -#define BTL_OPENIB_CREDITS_SEND_TRYLOCK(E, Q) \ - OPAL_ATOMIC_BOOL_CMPSET_32(&(E)->qps[(Q)].rd_credit_send_lock, 0, 1) -#define BTL_OPENIB_CREDITS_SEND_UNLOCK(E, Q) \ - OPAL_ATOMIC_BOOL_CMPSET_32(&(E)->qps[(Q)].rd_credit_send_lock, 1, 0) -#define BTL_OPENIB_GET_CREDITS(FROM, TO) \ - do { \ - TO = FROM; \ - } while(0 == OPAL_ATOMIC_BOOL_CMPSET_32(&FROM, TO, 0)) +static inline __opal_attribute_always_inline__ bool btl_openib_credits_send_trylock (mca_btl_openib_endpoint_t *ep, int qp) +{ + int32_t _tmp_value = 0; + return OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32(&ep->qps[qp].rd_credit_send_lock, &_tmp_value, 1); +} + +#define BTL_OPENIB_CREDITS_SEND_UNLOCK(E, Q) \ + do { \ + int32_t _tmp_value = 1; \ + OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32(&(E)->qps[(Q)].rd_credit_send_lock, &_tmp_value, 0); \ + } while (0) +#define BTL_OPENIB_GET_CREDITS(FROM, TO) \ + TO = OPAL_ATOMIC_SWAP_32(&FROM, 0) static inline bool check_eager_rdma_credits(const mca_btl_openib_endpoint_t *ep) @@ -486,7 +491,7 @@ static inline void send_credits(mca_btl_openib_endpoint_t *ep, int qp) return; try_send: - if(BTL_OPENIB_CREDITS_SEND_TRYLOCK(ep, qp)) + if(btl_openib_credits_send_trylock(ep, qp)) mca_btl_openib_endpoint_send_credits(ep, qp); } diff --git a/opal/mca/btl/ugni/btl_ugni_smsg.c b/opal/mca/btl/ugni/btl_ugni_smsg.c index bc8858baec5..b90c95a6a9e 100644 --- a/opal/mca/btl/ugni/btl_ugni_smsg.c +++ b/opal/mca/btl/ugni/btl_ugni_smsg.c @@ -59,12 +59,13 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep) mca_btl_ugni_base_frag_t frag; mca_btl_base_segment_t seg; bool disconnect = false; + int32_t _tmp_value = 0; uintptr_t data_ptr; gni_return_t rc; uint32_t len; int count = 0; - if (!opal_atomic_bool_cmpset_32 (&ep->smsg_progressing, 0, 1)) { + if (!opal_atomic_compare_exchange_strong_32 (&ep->smsg_progressing, &_tmp_value, 1)) { /* already progressing (we can't support reentry here) */ return 0; } diff --git a/opal/mca/btl/vader/btl_vader_fifo.h b/opal/mca/btl/vader/btl_vader_fifo.h index 8304841cf84..0dc70bc8a13 100644 --- a/opal/mca/btl/vader/btl_vader_fifo.h +++ b/opal/mca/btl/vader/btl_vader_fifo.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2014 Los Alamos National Security, LLC. + * Copyright (c) 2010-2017 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ * @@ -30,8 +30,9 @@ #include "btl_vader_endpoint.h" #include "btl_vader_frag.h" +#define vader_item_compare_exchange(x, y, z) opal_atomic_compare_exchange_strong_ptr ((volatile void **) (x), (void **) (y), (void *) (z)) + #if SIZEOF_VOID_P == 8 - #define vader_item_cmpset(x, y, z) opal_atomic_bool_cmpset_64((volatile int64_t *)(x), (int64_t)(y), (int64_t)(z)) #define vader_item_swap(x, y) opal_atomic_swap_64((volatile int64_t *)(x), (int64_t)(y)) #define MCA_BTL_VADER_OFFSET_MASK 0xffffffffll @@ -40,7 +41,6 @@ typedef int64_t fifo_value_t; #else - #define vader_item_cmpset(x, y, z) opal_atomic_bool_cmpset_32((volatile int32_t *)(x), (int32_t)(y), (int32_t)(z)) #define vader_item_swap(x, y) opal_atomic_swap_32((volatile int32_t *)(x), (int32_t)(y)) #define MCA_BTL_VADER_OFFSET_MASK 0x00ffffffl @@ -138,7 +138,7 @@ static inline mca_btl_vader_hdr_t *vader_fifo_read (vader_fifo_t *fifo, struct m if (OPAL_UNLIKELY(VADER_FIFO_FREE == hdr->next)) { opal_atomic_rmb(); - if (!vader_item_cmpset (&fifo->fifo_tail, value, VADER_FIFO_FREE)) { + if (!vader_item_compare_exchange (&fifo->fifo_tail, &value, VADER_FIFO_FREE)) { while (VADER_FIFO_FREE == hdr->next) { opal_atomic_rmb (); } diff --git a/opal/runtime/opal_cr.c b/opal/runtime/opal_cr.c index ef9a73d3ceb..96a37156786 100644 --- a/opal/runtime/opal_cr.c +++ b/opal/runtime/opal_cr.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2012-2013 Cisco Systems, Inc. All rights reserved. @@ -171,15 +171,16 @@ static const uint32_t ProcInc = 0x2; opal_cr_thread_in_library = false; \ } \ } -#define OPAL_CR_THREAD_LOCK() \ - { \ - while(!OPAL_ATOMIC_BOOL_CMPSET_32(&opal_cr_thread_num_in_library, 0, ThreadFlag)) { \ - if( !opal_cr_thread_is_active && opal_cr_thread_is_done) { \ - break; \ - } \ - sched_yield(); \ - usleep(opal_cr_thread_sleep_check); \ - } \ +#define OPAL_CR_THREAD_LOCK() \ + { \ + int32_t _tmp_value = 0; \ + while(!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32 (&opal_cr_thread_num_in_library, &_tmp_value, ThreadFlag)) { \ + if( !opal_cr_thread_is_active && opal_cr_thread_is_done) { \ + break; \ + } \ + sched_yield(); \ + usleep(opal_cr_thread_sleep_check); \ + } \ } #define OPAL_CR_THREAD_UNLOCK() \ { \ diff --git a/opal/threads/thread_usage.h b/opal/threads/thread_usage.h index 6976d0b555f..248735ae3aa 100644 --- a/opal/threads/thread_usage.h +++ b/opal/threads/thread_usage.h @@ -13,7 +13,7 @@ * Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -143,18 +143,20 @@ static inline type opal_thread_sub_ ## suffix (volatile type *addr, type delta) return (*addr -= delta); \ } -#define OPAL_THREAD_DEFINE_ATOMIC_CMPSET(type, addr_type, suffix) \ -static inline bool opal_thread_cmpset_bool_ ## suffix (volatile addr_type *addr, type compare, type value) \ +#define OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(type, addr_type, suffix) \ +static inline bool opal_thread_compare_exchange_strong_ ## suffix (volatile addr_type *addr, type *compare, type value) \ { \ if (OPAL_UNLIKELY(opal_using_threads())) { \ - return opal_atomic_bool_cmpset_ ## suffix ((volatile type *) addr, compare, value); \ + return opal_atomic_compare_exchange_strong_ ## suffix ((volatile type *) addr, compare, value); \ } \ \ - if ((type) *addr == compare) { \ + if ((type) *addr == *compare) { \ ((type *) addr)[0] = value; \ return true; \ } \ \ + *compare = ((type *) addr)[0]; \ + \ return false; \ } @@ -178,8 +180,8 @@ OPAL_THREAD_DEFINE_ATOMIC_OR(int32_t, 32) OPAL_THREAD_DEFINE_ATOMIC_XOR(int32_t, 32) OPAL_THREAD_DEFINE_ATOMIC_SUB(int32_t, 32) OPAL_THREAD_DEFINE_ATOMIC_SUB(size_t, size_t) -OPAL_THREAD_DEFINE_ATOMIC_CMPSET(int32_t, int32_t, 32) -OPAL_THREAD_DEFINE_ATOMIC_CMPSET(void *, intptr_t, ptr) +OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(int32_t, int32_t, 32) +OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(void *, intptr_t, ptr) OPAL_THREAD_DEFINE_ATOMIC_SWAP(int32_t, int32_t, 32) OPAL_THREAD_DEFINE_ATOMIC_SWAP(void *, intptr_t, ptr) @@ -201,11 +203,11 @@ OPAL_THREAD_DEFINE_ATOMIC_SWAP(void *, intptr_t, ptr) #define OPAL_THREAD_SUB_SIZE_T opal_thread_sub_size_t #define OPAL_ATOMIC_SUB_SIZE_T opal_thread_sub_size_t -#define OPAL_THREAD_BOOL_CMPSET_32 opal_thread_cmpset_bool_32 -#define OPAL_ATOMIC_BOOL_CMPSET_32 opal_thread_cmpset_bool_32 +#define OPAL_THREAD_COMPARE_EXCHANGE_STRONG_32 opal_thread_compare_exchange_strong_32 +#define OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32 opal_thread_compare_exchange_strong_32 -#define OPAL_THREAD_BOOL_CMPSET_PTR(x, y, z) opal_thread_cmpset_bool_ptr ((volatile intptr_t *) x, (void *) y, (void *) z) -#define OPAL_ATOMIC_BOOL_CMPSET_PTR OPAL_THREAD_BOOL_CMPSET_PTR +#define OPAL_THREAD_COMPARE_EXCHANGE_STRONG_PTR(x, y, z) opal_thread_compare_exchange_strong_ptr ((volatile intptr_t *) x, (void *) y, (void *) z) +#define OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR OPAL_THREAD_COMPARE_EXCHANGE_STRONG_PTR #define OPAL_THREAD_SWAP_32 opal_thread_swap_32 #define OPAL_ATOMIC_SWAP_32 opal_thread_swap_32 @@ -220,7 +222,7 @@ OPAL_THREAD_DEFINE_ATOMIC_ADD(int64_t, 64) OPAL_THREAD_DEFINE_ATOMIC_AND(int64_t, 64) OPAL_THREAD_DEFINE_ATOMIC_OR(int64_t, 64) OPAL_THREAD_DEFINE_ATOMIC_XOR(int64_t, 64) -OPAL_THREAD_DEFINE_ATOMIC_CMPSET(int64_t, int64_t, 64) +OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(int64_t, int64_t, 64) OPAL_THREAD_DEFINE_ATOMIC_SWAP(int64_t, int64_t, 64) #define OPAL_THREAD_ADD64 opal_thread_add_64 @@ -235,8 +237,8 @@ OPAL_THREAD_DEFINE_ATOMIC_SWAP(int64_t, int64_t, 64) #define OPAL_THREAD_XOR64 opal_thread_xor_64 #define OPAL_ATOMIC_XOR64 opal_thread_xor_64 -#define OPAL_THREAD_BOOL_CMPSET_64 opal_thread_cmpset_bool_64 -#define OPAL_ATOMIC_BOOL_CMPSET_64 opal_thread_cmpset_bool_64 +#define OPAL_THREAD_COMPARE_EXCHANGE_STRONG_64 opal_thread_compare_exchange_strong_64 +#define OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_64 opal_thread_compare_exchange_strong_64 #define OPAL_THREAD_SWAP_64 opal_thread_swap_64 #define OPAL_ATOMIC_SWAP_64 opal_thread_swap_64 diff --git a/oshmem/runtime/oshmem_shmem_finalize.c b/oshmem/runtime/oshmem_shmem_finalize.c index 6f8601da753..b05774278ab 100644 --- a/oshmem/runtime/oshmem_shmem_finalize.c +++ b/oshmem/runtime/oshmem_shmem_finalize.c @@ -64,8 +64,9 @@ int oshmem_shmem_finalize(void) { int ret = OSHMEM_SUCCESS; static int32_t finalize_has_already_started = 0; + int32_t _tmp = 0; - if (opal_atomic_bool_cmpset_32(&finalize_has_already_started, 0, 1) + if (opal_atomic_compare_exchange_strong_32 (&finalize_has_already_started, &_tmp, 1) && oshmem_shmem_initialized && !oshmem_shmem_aborted) { /* Should be called first because ompi_mpi_finalize makes orte and opal finalization */ ret = _shmem_finalize(); diff --git a/test/asm/atomic_cmpset.c b/test/asm/atomic_cmpset.c index 3e467ff7412..75288feee7f 100644 --- a/test/asm/atomic_cmpset.c +++ b/test/asm/atomic_cmpset.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -12,6 +13,8 @@ * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -54,6 +57,13 @@ int64_t old64 = 0; int64_t new64 = 0; #endif +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 +volatile opal_int128_t vol128; +opal_int128_t val128; +opal_int128_t old128; +opal_int128_t new128; +#endif + volatile int volint = 0; int valint = 0; int oldint = 0; @@ -99,124 +109,165 @@ int main(int argc, char *argv[]) /* -- cmpset 32-bit tests -- */ vol32 = 42, old32 = 42, new32 = 50; - assert(opal_atomic_bool_cmpset_32(&vol32, old32, new32) == 1); + assert(opal_atomic_compare_exchange_strong_32 (&vol32, &old32, new32) == true); opal_atomic_rmb(); assert(vol32 == new32); + assert(old32 == 42); vol32 = 42, old32 = 420, new32 = 50; - assert(opal_atomic_bool_cmpset_32(&vol32, old32, new32) == 0); + assert(opal_atomic_compare_exchange_strong_32 (&vol32, &old32, new32) == false); opal_atomic_rmb(); assert(vol32 == 42); + assert(old32 == 42); vol32 = 42, old32 = 42, new32 = 50; - assert(opal_atomic_bool_cmpset_acq_32(&vol32, old32, new32) == 1); + assert(opal_atomic_compare_exchange_strong_32 (&vol32, &old32, new32) == true); assert(vol32 == new32); + assert(old32 == 42); vol32 = 42, old32 = 420, new32 = 50; - assert(opal_atomic_bool_cmpset_acq_32(&vol32, old32, new32) == 0); + assert(opal_atomic_compare_exchange_strong_acq_32 (&vol32, &old32, new32) == false); assert(vol32 == 42); + assert(old32 == 42); vol32 = 42, old32 = 42, new32 = 50; - assert(opal_atomic_bool_cmpset_rel_32(&vol32, old32, new32) == 1); + assert(opal_atomic_compare_exchange_strong_rel_32 (&vol32, &old32, new32) == true); opal_atomic_rmb(); assert(vol32 == new32); + assert(old32 == 42); vol32 = 42, old32 = 420, new32 = 50; - assert(opal_atomic_bool_cmpset_rel_32(&vol32, old32, new32) == 0); + assert(opal_atomic_compare_exchange_strong_rel_32 (&vol32, &old32, new32) == false); opal_atomic_rmb(); assert(vol32 == 42); + assert(old32 == 42); /* -- cmpset 64-bit tests -- */ #if OPAL_HAVE_ATOMIC_MATH_64 vol64 = 42, old64 = 42, new64 = 50; - assert(1 == opal_atomic_bool_cmpset_64(&vol64, old64, new64)); + assert(opal_atomic_compare_exchange_strong_64 (&vol64, &old64, new64) == true); opal_atomic_rmb(); assert(new64 == vol64); + assert(old64 == 42); vol64 = 42, old64 = 420, new64 = 50; - assert(opal_atomic_bool_cmpset_64(&vol64, old64, new64) == 0); + assert(opal_atomic_compare_exchange_strong_64 (&vol64, &old64, new64) == false); opal_atomic_rmb(); assert(vol64 == 42); + assert(old64 == 42); vol64 = 42, old64 = 42, new64 = 50; - assert(opal_atomic_bool_cmpset_acq_64(&vol64, old64, new64) == 1); + assert(opal_atomic_compare_exchange_strong_acq_64 (&vol64, &old64, new64) == true); assert(vol64 == new64); + assert(old64 == 42); vol64 = 42, old64 = 420, new64 = 50; - assert(opal_atomic_bool_cmpset_acq_64(&vol64, old64, new64) == 0); + assert(opal_atomic_compare_exchange_strong_acq_64 (&vol64, &old64, new64) == false); assert(vol64 == 42); + assert(old64 == 42); vol64 = 42, old64 = 42, new64 = 50; - assert(opal_atomic_bool_cmpset_rel_64(&vol64, old64, new64) == 1); + assert(opal_atomic_compare_exchange_strong_rel_64 (&vol64, &old64, new64) == true); opal_atomic_rmb(); assert(vol64 == new64); + assert(old64 == 42); vol64 = 42, old64 = 420, new64 = 50; - assert(opal_atomic_bool_cmpset_rel_64(&vol64, old64, new64) == 0); + assert(opal_atomic_compare_exchange_strong_rel_64 (&vol64, &old64, new64) == false); opal_atomic_rmb(); assert(vol64 == 42); + assert(old64 == 42); #endif + + /* -- cmpset 128-bit tests -- */ + +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 + vol128 = 42, old128 = 42, new128 = 50; + assert(opal_atomic_compare_exchange_strong_128 (&vol128, &old128, new128) == true); + opal_atomic_rmb(); + assert(new128 == vol128); + assert(old128 == 42); + + vol128 = 42, old128 = 420, new128 = 50; + assert(opal_atomic_compare_exchange_strong_128 (&vol128, &old128, new128) == false); + opal_atomic_rmb(); + assert(vol128 == 42); + assert(old128 == 42); +#endif + /* -- cmpset int tests -- */ volint = 42, oldint = 42, newint = 50; - assert(opal_atomic_bool_cmpset(&volint, oldint, newint) == 1); + assert(opal_atomic_compare_exchange_strong (&volint, &oldint, newint) == true); opal_atomic_rmb(); - assert(volint ==newint); + assert(volint == newint); + assert(oldint == 42); volint = 42, oldint = 420, newint = 50; - assert(opal_atomic_bool_cmpset(&volint, oldint, newint) == 0); + assert(opal_atomic_compare_exchange_strong (&volint, &oldint, newint) == false); opal_atomic_rmb(); assert(volint == 42); + assert(oldint == 42); volint = 42, oldint = 42, newint = 50; - assert(opal_atomic_bool_cmpset_acq(&volint, oldint, newint) == 1); + assert(opal_atomic_compare_exchange_strong_acq (&volint, &oldint, newint) == true); assert(volint == newint); + assert(oldint == 42); volint = 42, oldint = 420, newint = 50; - assert(opal_atomic_bool_cmpset_acq(&volint, oldint, newint) == 0); + assert(opal_atomic_compare_exchange_strong_acq (&volint, &oldint, newint) == false); assert(volint == 42); + assert(oldint == 42); volint = 42, oldint = 42, newint = 50; - assert(opal_atomic_bool_cmpset_rel(&volint, oldint, newint) == 1); + assert(opal_atomic_compare_exchange_strong_rel (&volint, &oldint, newint) == true); opal_atomic_rmb(); assert(volint == newint); + assert(oldint == 42); volint = 42, oldint = 420, newint = 50; - assert(opal_atomic_bool_cmpset_rel(&volint, oldint, newint) == 0); + assert(opal_atomic_compare_exchange_strong_rel (&volint, &oldint, newint) == false); opal_atomic_rmb(); assert(volint == 42); + assert(oldint == 42); /* -- cmpset ptr tests -- */ volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50; - assert(opal_atomic_bool_cmpset_ptr(&volptr, oldptr, newptr) == 1); + assert(opal_atomic_compare_exchange_strong_ptr (&volptr, &oldptr, newptr) == true); opal_atomic_rmb(); assert(volptr == newptr); + assert(oldptr == (void *) 42); volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50; - assert(opal_atomic_bool_cmpset_ptr(&volptr, oldptr, newptr) == 0); + assert(opal_atomic_compare_exchange_strong_ptr (&volptr, &oldptr, newptr) == false); opal_atomic_rmb(); assert(volptr == (void *) 42); + assert(oldptr == (void *) 42); volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50; - assert(opal_atomic_bool_cmpset_acq_ptr(&volptr, oldptr, newptr) == 1); + assert(opal_atomic_compare_exchange_strong_acq_ptr (&volptr, &oldptr, newptr) == true); assert(volptr == newptr); + assert(oldptr == (void *) 42); volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50; - assert(opal_atomic_bool_cmpset_acq_ptr(&volptr, oldptr, newptr) == 0); + assert(opal_atomic_compare_exchange_strong_acq_ptr (&volptr, &oldptr, newptr) == false); assert(volptr == (void *) 42); + assert(oldptr == (void *) 42); volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50; - assert(opal_atomic_bool_cmpset_rel_ptr(&volptr, oldptr, newptr) == 1); + assert(opal_atomic_compare_exchange_strong_rel_ptr (&volptr, &oldptr, newptr) == true); opal_atomic_rmb(); assert(volptr == newptr); + assert(oldptr == (void *) 42); volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50; - assert(opal_atomic_bool_cmpset_rel_ptr(&volptr, oldptr, newptr) == 0); + assert(opal_atomic_compare_exchange_strong_rel_ptr (&volptr, &oldptr, newptr) == false); opal_atomic_rmb(); assert(volptr == (void *) 42); + assert(oldptr == (void *) 42); /* -- add_32 tests -- */