diff --git a/ompi/datatype/ompi_datatype_args.c b/ompi/datatype/ompi_datatype_args.c index add69f9bf70..d301aa44e78 100644 --- a/ompi/datatype/ompi_datatype_args.c +++ b/ompi/datatype/ompi_datatype_args.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -378,7 +378,7 @@ int32_t ompi_datatype_copy_args( const ompi_datatype_t* source_data, * a read only memory). */ if( NULL != pArgs ) { - OPAL_THREAD_ADD32(&pArgs->ref_count, 1); + OPAL_THREAD_ADD_FETCH32(&pArgs->ref_count, 1); dest_data->args = pArgs; } return OMPI_SUCCESS; @@ -396,7 +396,7 @@ int32_t ompi_datatype_release_args( ompi_datatype_t* pData ) ompi_datatype_args_t* pArgs = (ompi_datatype_args_t*)pData->args; assert( 0 < pArgs->ref_count ); - OPAL_THREAD_ADD32(&pArgs->ref_count, -1); + OPAL_THREAD_ADD_FETCH32(&pArgs->ref_count, -1); if( 0 == pArgs->ref_count ) { /* There are some duplicated datatypes around that have a pointer to this * args. We will release them only when the last datatype will dissapear. @@ -487,7 +487,8 @@ int ompi_datatype_get_pack_description( ompi_datatype_t* datatype, void* recursive_buffer; if (NULL == packed_description) { - if (opal_atomic_bool_cmpset (&datatype->packed_description, NULL, (void *) 1)) { + void *_tmp_ptr = NULL; + if (opal_atomic_compare_exchange_strong_ptr (&datatype->packed_description, (void *) &_tmp_ptr, (void *) 1)) { if( ompi_datatype_is_predefined(datatype) ) { packed_description = malloc(2 * sizeof(int)); } else if( NULL == args ) { diff --git a/ompi/group/group.h b/ompi/group/group.h index 4f303c34186..30664f8a4e0 100644 --- a/ompi/group/group.h +++ b/ompi/group/group.h @@ -14,7 +14,7 @@ * Copyright (c) 2007-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -356,7 +356,7 @@ static inline struct ompi_proc_t *ompi_group_dense_lookup (ompi_group_t *group, ompi_proc_t *real_proc = (ompi_proc_t *) ompi_proc_for_name (ompi_proc_sentinel_to_name ((uintptr_t) proc)); - if (opal_atomic_bool_cmpset_ptr (group->grp_proc_pointers + peer_id, proc, real_proc)) { + if (opal_atomic_compare_exchange_strong_ptr (group->grp_proc_pointers + peer_id, &proc, real_proc)) { OBJ_RETAIN(real_proc); } diff --git a/ompi/mca/coll/libnbc/coll_libnbc_component.c b/ompi/mca/coll/libnbc/coll_libnbc_component.c index cf55d0dbd0e..1a611eaee5f 100644 --- a/ompi/mca/coll/libnbc/coll_libnbc_component.c +++ b/ompi/mca/coll/libnbc/coll_libnbc_component.c @@ -314,7 +314,7 @@ libnbc_module_destruct(ompi_coll_libnbc_module_t *module) /* if we ever were used for a collective op, do the progress cleanup. */ if (true == module->comm_registered) { int32_t tmp = - OPAL_THREAD_ADD32(&mca_coll_libnbc_component.active_comms, -1); + OPAL_THREAD_ADD_FETCH32(&mca_coll_libnbc_component.active_comms, -1); if (0 == tmp) { opal_progress_unregister(ompi_coll_libnbc_progress); } diff --git a/ompi/mca/coll/libnbc/nbc.c b/ompi/mca/coll/libnbc/nbc.c index dff6362bee7..28f022e5c99 100644 --- a/ompi/mca/coll/libnbc/nbc.c +++ b/ompi/mca/coll/libnbc/nbc.c @@ -618,7 +618,7 @@ int NBC_Init_handle(struct ompi_communicator_t *comm, ompi_coll_libnbc_request_t /* register progress */ if (need_register) { int32_t tmp = - OPAL_THREAD_ADD32(&mca_coll_libnbc_component.active_comms, 1); + OPAL_THREAD_ADD_FETCH32(&mca_coll_libnbc_component.active_comms, 1); if (tmp == 1) { opal_progress_register(ompi_coll_libnbc_progress); } diff --git a/ompi/mca/coll/monitoring/coll_monitoring_component.c b/ompi/mca/coll/monitoring/coll_monitoring_component.c index 995757ddedc..47d14375e10 100644 --- a/ompi/mca/coll/monitoring/coll_monitoring_component.c +++ b/ompi/mca/coll/monitoring/coll_monitoring_component.c @@ -120,7 +120,7 @@ static int mca_coll_monitoring_module_enable(mca_coll_base_module_t*module, struct ompi_communicator_t*comm) { mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; - if( 1 == opal_atomic_add_32(&monitoring_module->is_initialized, 1) ) { + if( 1 == opal_atomic_add_fetch_32(&monitoring_module->is_initialized, 1) ) { MONITORING_SAVE_FULL_PREV_COLL_API(monitoring_module, comm); monitoring_module->data = mca_common_monitoring_coll_new(comm); OPAL_MONITORING_PRINT_INFO("coll_module_enabled"); @@ -132,7 +132,7 @@ static int mca_coll_monitoring_module_disable(mca_coll_base_module_t*module, struct ompi_communicator_t*comm) { mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; - if( 0 == opal_atomic_sub_32(&monitoring_module->is_initialized, 1) ) { + if( 0 == opal_atomic_sub_fetch_32(&monitoring_module->is_initialized, 1) ) { MONITORING_RELEASE_FULL_PREV_COLL_API(monitoring_module, comm); mca_common_monitoring_coll_release(monitoring_module->data); monitoring_module->data = NULL; diff --git a/ompi/mca/coll/portals4/coll_portals4_allreduce.c b/ompi/mca/coll/portals4/coll_portals4_allreduce.c index 935ce6cd9d3..56f1ea30621 100644 --- a/ompi/mca/coll/portals4/coll_portals4_allreduce.c +++ b/ompi/mca/coll/portals4/coll_portals4_allreduce.c @@ -68,7 +68,7 @@ allreduce_kary_tree_top(const void *sendbuf, void *recvbuf, int count, zero_md_h = mca_coll_portals4_component.zero_md_h; data_md_h = mca_coll_portals4_component.data_md_h; - internal_count = opal_atomic_add_size_t(&module->coll_count, 1); + internal_count = opal_atomic_add_fetch_size_t(&module->coll_count, 1); /* ** DATATYPE and SIZES diff --git a/ompi/mca/coll/portals4/coll_portals4_barrier.c b/ompi/mca/coll/portals4/coll_portals4_barrier.c index 9d5c4f3c164..f2544ce0cd1 100644 --- a/ompi/mca/coll/portals4/coll_portals4_barrier.c +++ b/ompi/mca/coll/portals4/coll_portals4_barrier.c @@ -44,7 +44,7 @@ barrier_hypercube_top(struct ompi_communicator_t *comm, request->type = OMPI_COLL_PORTALS4_TYPE_BARRIER; - count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); + count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1); ret = PtlCTAlloc(mca_coll_portals4_component.ni_h, &request->u.barrier.rtr_ct_h); diff --git a/ompi/mca/coll/portals4/coll_portals4_bcast.c b/ompi/mca/coll/portals4/coll_portals4_bcast.c index 11132f6ce4c..8432d5823cd 100644 --- a/ompi/mca/coll/portals4/coll_portals4_bcast.c +++ b/ompi/mca/coll/portals4/coll_portals4_bcast.c @@ -176,7 +176,7 @@ bcast_kary_tree_top(void *buff, int count, zero_md_h = mca_coll_portals4_component.zero_md_h; data_md_h = mca_coll_portals4_component.data_md_h; - internal_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); + internal_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1); /* @@ -513,7 +513,7 @@ bcast_pipeline_top(void *buff, int count, zero_md_h = mca_coll_portals4_component.zero_md_h; data_md_h = mca_coll_portals4_component.data_md_h; - internal_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); + internal_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1); /* ** DATATYPE and SIZES diff --git a/ompi/mca/coll/portals4/coll_portals4_gather.c b/ompi/mca/coll/portals4/coll_portals4_gather.c index 45ff4c07728..7e38e27c009 100644 --- a/ompi/mca/coll/portals4/coll_portals4_gather.c +++ b/ompi/mca/coll/portals4/coll_portals4_gather.c @@ -582,7 +582,7 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc /* Setup Common Parameters */ /**********************************/ - request->u.gather.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); + request->u.gather.coll_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1); COLL_PORTALS4_UPDATE_IN_ORDER_BMTREE( comm, portals4_module, request->u.gather.root_rank ); bmtree = portals4_module->cached_in_order_bmtree; @@ -879,7 +879,7 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct i_am_root = (request->u.gather.my_rank == request->u.gather.root_rank); - request->u.gather.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); + request->u.gather.coll_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1); ret = setup_gather_buffers_linear(comm, request, portals4_module); if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } diff --git a/ompi/mca/coll/portals4/coll_portals4_reduce.c b/ompi/mca/coll/portals4/coll_portals4_reduce.c index 1a55a5c3f70..2fdb36b739c 100644 --- a/ompi/mca/coll/portals4/coll_portals4_reduce.c +++ b/ompi/mca/coll/portals4/coll_portals4_reduce.c @@ -69,7 +69,7 @@ reduce_kary_tree_top(const void *sendbuf, void *recvbuf, int count, zero_md_h = mca_coll_portals4_component.zero_md_h; data_md_h = mca_coll_portals4_component.data_md_h; - internal_count = opal_atomic_add_size_t(&module->coll_count, 1); + internal_count = opal_atomic_add_fetch_size_t(&module->coll_count, 1); /* ** DATATYPE and SIZES diff --git a/ompi/mca/coll/portals4/coll_portals4_scatter.c b/ompi/mca/coll/portals4/coll_portals4_scatter.c index d1cfbbaa0d2..4f3351ac784 100644 --- a/ompi/mca/coll/portals4/coll_portals4_scatter.c +++ b/ompi/mca/coll/portals4/coll_portals4_scatter.c @@ -399,7 +399,7 @@ ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct i_am_root = (request->u.scatter.my_rank == request->u.scatter.root_rank); - request->u.scatter.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); + request->u.scatter.coll_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1); ret = setup_scatter_buffers_linear(comm, request, portals4_module); if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } diff --git a/ompi/mca/coll/sm/coll_sm.h b/ompi/mca/coll/sm/coll_sm.h index baaa510ed19..b2da6ede425 100644 --- a/ompi/mca/coll/sm/coll_sm.h +++ b/ompi/mca/coll/sm/coll_sm.h @@ -358,7 +358,7 @@ extern uint32_t mca_coll_sm_one; * Macro to release an in-use flag from this process */ #define FLAG_RELEASE(flag) \ - (void)opal_atomic_add(&(flag)->mcsiuf_num_procs_using, -1) + opal_atomic_add(&(flag)->mcsiuf_num_procs_using, -1) /** * Macro to copy a single segment in from a user buffer to a shared diff --git a/ompi/mca/coll/sm/coll_sm_barrier.c b/ompi/mca/coll/sm/coll_sm_barrier.c index a3000b7d847..2722bbf09f5 100644 --- a/ompi/mca/coll/sm/coll_sm_barrier.c +++ b/ompi/mca/coll/sm/coll_sm_barrier.c @@ -101,7 +101,7 @@ int mca_coll_sm_barrier_intra(struct ompi_communicator_t *comm, if (0 != rank) { /* Get parent *in* buffer */ parent = &data->mcb_barrier_control_parent[buffer_set]; - (void)opal_atomic_add(parent, 1); + opal_atomic_add (parent, 1); SPIN_CONDITION(0 != *me_out, exit_label2); *me_out = 0; diff --git a/ompi/mca/coll/sm/coll_sm_module.c b/ompi/mca/coll/sm/coll_sm_module.c index 6c34851ee46..8922a70eafe 100644 --- a/ompi/mca/coll/sm/coll_sm_module.c +++ b/ompi/mca/coll/sm/coll_sm_module.c @@ -463,7 +463,7 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, OBJ_RETAIN(sm_module->previous_reduce_module); /* Indicate that we have successfully attached and setup */ - (void)opal_atomic_add(&(data->sm_bootstrap_meta->module_seg->seg_inited), 1); + opal_atomic_add (&(data->sm_bootstrap_meta->module_seg->seg_inited), 1); /* Wait for everyone in this communicator to attach and setup */ opal_output_verbose(10, ompi_coll_base_framework.framework_output, diff --git a/ompi/mca/common/monitoring/common_monitoring.c b/ompi/mca/common/monitoring/common_monitoring.c index 519da37318e..e521ca56417 100644 --- a/ompi/mca/common/monitoring/common_monitoring.c +++ b/ompi/mca/common/monitoring/common_monitoring.c @@ -209,7 +209,7 @@ static int mca_common_monitoring_comm_size_notify(mca_base_pvar_t *pvar, int mca_common_monitoring_init( void ) { if( !mca_common_monitoring_enabled ) return OMPI_ERROR; - if( 1 < opal_atomic_add_32(&mca_common_monitoring_hold, 1) ) return OMPI_SUCCESS; /* Already initialized */ + if( 1 < opal_atomic_add_fetch_32(&mca_common_monitoring_hold, 1) ) return OMPI_SUCCESS; /* Already initialized */ char hostname[OPAL_MAXHOSTNAMELEN] = "NA"; /* Initialize constant */ @@ -229,7 +229,7 @@ int mca_common_monitoring_init( void ) void mca_common_monitoring_finalize( void ) { if( ! mca_common_monitoring_enabled || /* Don't release if not last */ - 0 < opal_atomic_sub_32(&mca_common_monitoring_hold, 1) ) return; + 0 < opal_atomic_sub_fetch_32(&mca_common_monitoring_hold, 1) ) return; OPAL_MONITORING_PRINT_INFO("common_component_finish"); /* Dump monitoring informations */ @@ -503,21 +503,21 @@ void mca_common_monitoring_record_pml(int world_rank, size_t data_size, int tag) /* Keep tracks of the data_size distribution */ if( 0 == data_size ) { - opal_atomic_add_size_t(&size_histogram[world_rank * max_size_histogram], 1); + opal_atomic_add_fetch_size_t(&size_histogram[world_rank * max_size_histogram], 1); } else { int log2_size = log10(data_size)/log10_2; if(log2_size > max_size_histogram - 2) /* Avoid out-of-bound write */ log2_size = max_size_histogram - 2; - opal_atomic_add_size_t(&size_histogram[world_rank * max_size_histogram + log2_size + 1], 1); + opal_atomic_add_fetch_size_t(&size_histogram[world_rank * max_size_histogram + log2_size + 1], 1); } /* distinguishses positive and negative tags if requested */ if( (tag < 0) && (mca_common_monitoring_filter()) ) { - opal_atomic_add_size_t(&filtered_pml_data[world_rank], data_size); - opal_atomic_add_size_t(&filtered_pml_count[world_rank], 1); + opal_atomic_add_fetch_size_t(&filtered_pml_data[world_rank], data_size); + opal_atomic_add_fetch_size_t(&filtered_pml_count[world_rank], 1); } else { /* if filtered monitoring is not activated data is aggregated indifferently */ - opal_atomic_add_size_t(&pml_data[world_rank], data_size); - opal_atomic_add_size_t(&pml_count[world_rank], 1); + opal_atomic_add_fetch_size_t(&pml_data[world_rank], data_size); + opal_atomic_add_fetch_size_t(&pml_count[world_rank], 1); } } @@ -564,11 +564,11 @@ void mca_common_monitoring_record_osc(int world_rank, size_t data_size, if( 0 == mca_common_monitoring_current_state ) return; /* right now the monitoring is not started */ if( SEND == dir ) { - opal_atomic_add_size_t(&osc_data_s[world_rank], data_size); - opal_atomic_add_size_t(&osc_count_s[world_rank], 1); + opal_atomic_add_fetch_size_t(&osc_data_s[world_rank], data_size); + opal_atomic_add_fetch_size_t(&osc_count_s[world_rank], 1); } else { - opal_atomic_add_size_t(&osc_data_r[world_rank], data_size); - opal_atomic_add_size_t(&osc_count_r[world_rank], 1); + opal_atomic_add_fetch_size_t(&osc_data_r[world_rank], data_size); + opal_atomic_add_fetch_size_t(&osc_count_r[world_rank], 1); } } @@ -650,8 +650,8 @@ void mca_common_monitoring_record_coll(int world_rank, size_t data_size) { if( 0 == mca_common_monitoring_current_state ) return; /* right now the monitoring is not started */ - opal_atomic_add_size_t(&coll_data[world_rank], data_size); - opal_atomic_add_size_t(&coll_count[world_rank], 1); + opal_atomic_add_fetch_size_t(&coll_data[world_rank], data_size); + opal_atomic_add_fetch_size_t(&coll_count[world_rank], 1); } static int mca_common_monitoring_get_coll_count(const struct mca_base_pvar_t *pvar, diff --git a/ompi/mca/common/monitoring/common_monitoring_coll.c b/ompi/mca/common/monitoring/common_monitoring_coll.c index e37d5bb6c34..5af3059320b 100644 --- a/ompi/mca/common/monitoring/common_monitoring_coll.c +++ b/ompi/mca/common/monitoring/common_monitoring_coll.c @@ -236,8 +236,8 @@ void mca_common_monitoring_coll_o2a(size_t size, mca_monitoring_coll_data_t*data return; } #endif /* OPAL_ENABLE_DEBUG */ - opal_atomic_add_size_t(&data->o2a_size, size); - opal_atomic_add_size_t(&data->o2a_count, 1); + opal_atomic_add_fetch_size_t(&data->o2a_size, size); + opal_atomic_add_fetch_size_t(&data->o2a_count, 1); } int mca_common_monitoring_coll_get_o2a_count(const struct mca_base_pvar_t *pvar, @@ -277,8 +277,8 @@ void mca_common_monitoring_coll_a2o(size_t size, mca_monitoring_coll_data_t*data return; } #endif /* OPAL_ENABLE_DEBUG */ - opal_atomic_add_size_t(&data->a2o_size, size); - opal_atomic_add_size_t(&data->a2o_count, 1); + opal_atomic_add_fetch_size_t(&data->a2o_size, size); + opal_atomic_add_fetch_size_t(&data->a2o_count, 1); } int mca_common_monitoring_coll_get_a2o_count(const struct mca_base_pvar_t *pvar, @@ -318,8 +318,8 @@ void mca_common_monitoring_coll_a2a(size_t size, mca_monitoring_coll_data_t*data return; } #endif /* OPAL_ENABLE_DEBUG */ - opal_atomic_add_size_t(&data->a2a_size, size); - opal_atomic_add_size_t(&data->a2a_count, 1); + opal_atomic_add_fetch_size_t(&data->a2a_size, size); + opal_atomic_add_fetch_size_t(&data->a2a_count, 1); } int mca_common_monitoring_coll_get_a2a_count(const struct mca_base_pvar_t *pvar, diff --git a/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c b/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c index 50cf3c79b72..19d3b600b36 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -296,9 +296,10 @@ ompi_mtl_portals4_flowctl_add_procs(size_t me, int ompi_mtl_portals4_flowctl_trigger(void) { + int32_t _tmp_value = 0; int ret; - if (true == OPAL_ATOMIC_BOOL_CMPSET_32(&ompi_mtl_portals4.flowctl.flowctl_active, false, true)) { + if (true == OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32(&ompi_mtl_portals4.flowctl.flowctl_active, &_tmp_value, 1)) { /* send trigger to root */ ret = PtlPut(ompi_mtl_portals4.zero_md_h, 0, @@ -346,7 +347,7 @@ start_recover(void) int64_t epoch_counter; ompi_mtl_portals4.flowctl.flowctl_active = true; - epoch_counter = opal_atomic_add_64(&ompi_mtl_portals4.flowctl.epoch_counter, 1); + epoch_counter = opal_atomic_add_fetch_64(&ompi_mtl_portals4.flowctl.epoch_counter, 1); opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "Entering flowctl_start_recover %ld", diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv.c b/ompi/mca/mtl/portals4/mtl_portals4_recv.c index 230b3785532..f2737428e26 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv.c @@ -53,14 +53,14 @@ read_msg(void *start, ptl_size_t length, ptl_process_t target, int32_t frag_count; #if OMPI_MTL_PORTALS4_FLOW_CONTROL - while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); + while (OPAL_UNLIKELY(OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { + OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1); ompi_mtl_portals4_progress(); } #endif frag_count = (length + ompi_mtl_portals4.max_msg_size_mtl - 1) / ompi_mtl_portals4.max_msg_size_mtl; - ret = OPAL_THREAD_ADD32(&(request->pending_reply), frag_count); + ret = OPAL_THREAD_ADD_FETCH32(&(request->pending_reply), frag_count); for (i = 0 ; i < frag_count ; i++) { opal_free_list_item_t *tmp; @@ -385,14 +385,14 @@ ompi_mtl_portals4_rndv_get_frag_progress(ptl_event_t *ev, opal_free_list_return (&ompi_mtl_portals4.fl_rndv_get_frag, &rndv_get_frag->super); - ret = OPAL_THREAD_ADD32(&(ptl_request->pending_reply), -1); + ret = OPAL_THREAD_ADD_FETCH32(&(ptl_request->pending_reply), -1); if (ret > 0) { return OMPI_SUCCESS; } assert(ptl_request->pending_reply == 0); #if OMPI_MTL_PORTALS4_FLOW_CONTROL - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); + OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1); #endif /* make sure the data is in the right place. Use _ucount for @@ -468,7 +468,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, ptl_request->super.type = portals4_req_recv; ptl_request->super.event_callback = ompi_mtl_portals4_recv_progress; #if OPAL_ENABLE_DEBUG - ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1); + ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1); ptl_request->hdr_data = 0; #endif ptl_request->buffer_ptr = (free_after) ? start : NULL; @@ -549,7 +549,7 @@ ompi_mtl_portals4_imrecv(struct mca_mtl_base_module_t* mtl, } #if OPAL_ENABLE_DEBUG - ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1); + ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1); ptl_request->hdr_data = 0; #endif ptl_request->super.type = portals4_req_recv; diff --git a/ompi/mca/mtl/portals4/mtl_portals4_send.c b/ompi/mca/mtl/portals4/mtl_portals4_send.c index 6393b9a465b..27291eed559 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_send.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_send.c @@ -45,7 +45,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev, (ompi_mtl_portals4_isend_request_t*) ptl_base_request; if (PTL_EVENT_GET == ev->type) { - ret = OPAL_THREAD_ADD32(&(ptl_request->pending_get), -1); + ret = OPAL_THREAD_ADD_FETCH32(&(ptl_request->pending_get), -1); if (ret > 0) { /* wait for other gets */ OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "PTL_EVENT_GET received now pending_get=%d",ret)); @@ -94,7 +94,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev, opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends, &pending->super.super); - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); + OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1); ompi_mtl_portals4_flowctl_trigger(); return OMPI_SUCCESS; @@ -124,7 +124,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev, if ((eager == ompi_mtl_portals4.protocol) || (ptl_request->length % ompi_mtl_portals4.max_msg_size_mtl <= ompi_mtl_portals4.eager_limit)) { - val = OPAL_THREAD_ADD32(&(ptl_request->pending_get), -1); + val = OPAL_THREAD_ADD_FETCH32(&(ptl_request->pending_get), -1); } if (0 == val) { add = 2; /* We haven't to wait for any get, so we have to add an extra count to cause the message to complete */ @@ -161,7 +161,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev, ptl_request->me_h = PTL_INVALID_HANDLE; add++; } - val = OPAL_THREAD_ADD32((int32_t*)&ptl_request->event_count, add); + val = OPAL_THREAD_ADD_FETCH32((int32_t*)&ptl_request->event_count, add); assert(val <= 3); if (val == 3) { @@ -174,7 +174,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev, *complete = true; #if OMPI_MTL_PORTALS4_FLOW_CONTROL - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); + OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1); opal_free_list_return (&ompi_mtl_portals4.flowctl.pending_fl, &ptl_request->pending->super); @@ -422,15 +422,15 @@ ompi_mtl_portals4_pending_list_progress() while ((!ompi_mtl_portals4.flowctl.flowctl_active) && (0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) { - val = OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1); + val = OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, -1); if (val < 0) { - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); + OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1); return; } item = opal_list_remove_first(&ompi_mtl_portals4.flowctl.pending_sends); if (OPAL_UNLIKELY(NULL == item)) { - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); + OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1); return; } @@ -456,7 +456,7 @@ ompi_mtl_portals4_pending_list_progress() if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { opal_list_prepend(&ompi_mtl_portals4.flowctl.pending_sends, &pending->super.super); - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); + OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1); } } } @@ -492,7 +492,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after); if (OMPI_SUCCESS != ret) return ret; - ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*)&ompi_mtl_portals4.opcount, 1); + ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((int64_t*)&ompi_mtl_portals4.opcount, 1); ptl_request->buffer_ptr = (free_after) ? start : NULL; ptl_request->length = length; ptl_request->event_count = 0; @@ -520,15 +520,15 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, pending->ptl_proc = ptl_proc; pending->ptl_request = ptl_request; - if (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); + if (OPAL_UNLIKELY(OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { + OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1); opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends, &pending->super.super); return OMPI_SUCCESS; } if (OPAL_UNLIKELY(0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) { - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); + OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1); opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends, &pending->super.super); ompi_mtl_portals4_pending_list_progress(); @@ -536,7 +536,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, } if (OPAL_UNLIKELY(ompi_mtl_portals4.flowctl.flowctl_active)) { - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); + OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1); opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends, &pending->super.super); return OMPI_SUCCESS; diff --git a/ompi/mca/osc/monitoring/osc_monitoring_template.h b/ompi/mca/osc/monitoring/osc_monitoring_template.h index 7d56421b3d8..6724a6b10c3 100644 --- a/ompi/mca/osc/monitoring/osc_monitoring_template.h +++ b/ompi/mca/osc/monitoring/osc_monitoring_template.h @@ -61,7 +61,7 @@ static inline void* \ ompi_osc_monitoring_## template ##_set_template (ompi_osc_base_module_t*module) \ { \ - if( 1 == opal_atomic_add_32(&(OMPI_OSC_MONITORING_MODULE_INIT(template)), 1) ) { \ + if( 1 == opal_atomic_add_fetch_32(&(OMPI_OSC_MONITORING_MODULE_INIT(template)), 1) ) { \ /* Saves the original module functions in \ * ompi_osc_monitoring_module_## template ##_template \ */ \ diff --git a/ompi/mca/osc/portals4/osc_portals4_active_target.c b/ompi/mca/osc/portals4/osc_portals4_active_target.c index e2bd9a9da20..23a763efe8e 100644 --- a/ompi/mca/osc/portals4/osc_portals4_active_target.c +++ b/ompi/mca/osc/portals4/osc_portals4_active_target.c @@ -99,7 +99,7 @@ ompi_osc_portals4_complete(struct ompi_win_t *win) PTL_SUM, PTL_INT32_T); if (ret != OMPI_SUCCESS) return ret; - OPAL_THREAD_ADD64(&module->opcount, 1); + OPAL_THREAD_ADD_FETCH64(&module->opcount, 1); } ret = ompi_osc_portals4_complete_all(module); @@ -144,7 +144,7 @@ ompi_osc_portals4_post(struct ompi_group_t *group, PTL_SUM, PTL_INT32_T); if (ret != OMPI_SUCCESS) return ret; - OPAL_THREAD_ADD64(&module->opcount, 1); + OPAL_THREAD_ADD_FETCH64(&module->opcount, 1); } } else { module->post_group = NULL; diff --git a/ompi/mca/osc/portals4/osc_portals4_comm.c b/ompi/mca/osc/portals4/osc_portals4_comm.c index b792d20f8b7..b125f2aee50 100644 --- a/ompi/mca/osc/portals4/osc_portals4_comm.c +++ b/ompi/mca/osc/portals4/osc_portals4_comm.c @@ -206,7 +206,7 @@ segmentedPut(int64_t *opcount, ptl_size_t bytes_put = 0; do { - opal_atomic_add_64(opcount, 1); + opal_atomic_add_fetch_64(opcount, 1); ptl_size_t frag_length = MIN(put_length, segment_length); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, @@ -222,7 +222,7 @@ segmentedPut(int64_t *opcount, user_ptr, hdr_data); if (PTL_OK != ret) { - opal_atomic_add_64(opcount, -1); + opal_atomic_add_fetch_64(opcount, -1); opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlPut failed with return value %d", __FUNCTION__, __LINE__, ret); @@ -251,7 +251,7 @@ segmentedGet(int64_t *opcount, ptl_size_t bytes_gotten = 0; do { - opal_atomic_add_64(opcount, 1); + opal_atomic_add_fetch_64(opcount, 1); ptl_size_t frag_length = MIN(get_length, segment_length); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, @@ -266,7 +266,7 @@ segmentedGet(int64_t *opcount, target_offset + bytes_gotten, user_ptr); if (PTL_OK != ret) { - opal_atomic_add_64(opcount, -1); + opal_atomic_add_fetch_64(opcount, -1); opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlGet failed with return value %d", __FUNCTION__, __LINE__, ret); @@ -297,7 +297,7 @@ segmentedAtomic(int64_t *opcount, ptl_size_t sent = 0; do { - opal_atomic_add_64(opcount, 1); + opal_atomic_add_fetch_64(opcount, 1); ptl_size_t frag_length = MIN(length, segment_length); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, @@ -315,7 +315,7 @@ segmentedAtomic(int64_t *opcount, ptl_op, ptl_dt); if (PTL_OK != ret) { - opal_atomic_add_64(opcount, -1); + opal_atomic_add_fetch_64(opcount, -1); opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlAtomic failed with return value %d", __FUNCTION__, __LINE__, ret); @@ -348,7 +348,7 @@ segmentedFetchAtomic(int64_t *opcount, ptl_size_t sent = 0; do { - opal_atomic_add_64(opcount, 1); + opal_atomic_add_fetch_64(opcount, 1); ptl_size_t frag_length = MIN(length, segment_length); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, @@ -367,7 +367,7 @@ segmentedFetchAtomic(int64_t *opcount, ptl_op, ptl_dt); if (PTL_OK != ret) { - opal_atomic_add_64(opcount, -1); + opal_atomic_add_fetch_64(opcount, -1); opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlFetchAtomic failed with return value %d", __FUNCTION__, __LINE__, ret); @@ -399,7 +399,7 @@ segmentedSwap(int64_t *opcount, ptl_size_t sent = 0; do { - opal_atomic_add_64(opcount, 1); + opal_atomic_add_fetch_64(opcount, 1); ptl_size_t frag_length = MIN(length, segment_length); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, @@ -419,7 +419,7 @@ segmentedSwap(int64_t *opcount, PTL_SWAP, ptl_dt); if (PTL_OK != ret) { - opal_atomic_add_64(opcount, -1); + opal_atomic_add_fetch_64(opcount, -1); opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlSwap failed with return value %d", __FUNCTION__, __LINE__, ret); @@ -547,7 +547,7 @@ get_to_iovec(ompi_osc_portals4_module_t *module, return ret; } - opal_atomic_add_64(&module->opcount, 1); + opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Get(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)", @@ -564,7 +564,7 @@ get_to_iovec(ompi_osc_portals4_module_t *module, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d PtlGet() failed: ret = %d", __FUNCTION__, __LINE__, ret)); - opal_atomic_add_64(&module->opcount, -1); + opal_atomic_add_fetch_64(&module->opcount, -1); return ret; } @@ -716,7 +716,7 @@ put_from_iovec(ompi_osc_portals4_module_t *module, return ret; } - opal_atomic_add_64(&module->opcount, 1); + opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)", @@ -735,7 +735,7 @@ put_from_iovec(ompi_osc_portals4_module_t *module, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d PtlPut() failed: ret = %d", __FUNCTION__, __LINE__, ret)); - opal_atomic_add_64(&module->opcount, -1); + opal_atomic_add_fetch_64(&module->opcount, -1); return ret; } @@ -1252,7 +1252,7 @@ put_to_noncontig(int64_t *opcount, /* determine how much to transfer in this operation */ rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); - opal_atomic_add_64(opcount, 1); + opal_atomic_add_fetch_64(opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "performing rdma on contiguous region. local: %p, remote: %p, len: %lu", @@ -1270,7 +1270,7 @@ put_to_noncontig(int64_t *opcount, user_ptr, 0); if (OPAL_UNLIKELY(PTL_OK != ret)) { - opal_atomic_add_64(opcount, -1); + opal_atomic_add_fetch_64(opcount, -1); return ret; } @@ -1361,7 +1361,7 @@ atomic_put_to_noncontig(ompi_osc_portals4_module_t *module, /* determine how much to transfer in this operation */ rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); - opal_atomic_add_64(&module->opcount, 1); + opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "performing rdma on contiguous region. local: %p, remote: %p, len: %lu", @@ -1379,7 +1379,7 @@ atomic_put_to_noncontig(ompi_osc_portals4_module_t *module, user_ptr, 0); if (OPAL_UNLIKELY(PTL_OK != ret)) { - opal_atomic_add_64(&module->opcount, -1); + opal_atomic_add_fetch_64(&module->opcount, -1); return ret; } @@ -1479,7 +1479,7 @@ atomic_to_noncontig(ompi_osc_portals4_module_t *module, /* determine how much to transfer in this operation */ atomic_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), module->atomic_max); - opal_atomic_add_64(&module->opcount, 1); + opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "performing rdma on contiguous region. local: %p, remote: %p, len: %lu", @@ -1501,7 +1501,7 @@ atomic_to_noncontig(ompi_osc_portals4_module_t *module, ptl_op, ptl_dt); if (OPAL_UNLIKELY(PTL_OK != ret)) { - opal_atomic_add_64(&module->opcount, -1); + opal_atomic_add_fetch_64(&module->opcount, -1); return ret; } @@ -1586,7 +1586,7 @@ get_from_noncontig(int64_t *opcount, /* determine how much to transfer in this operation */ rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); - opal_atomic_add_64(opcount, 1); + opal_atomic_add_fetch_64(opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "performing rdma on contiguous region. local: %p, remote: %p, len: %lu", @@ -1602,7 +1602,7 @@ get_from_noncontig(int64_t *opcount, offset + (ptl_size_t)target_iovec[target_iov_index].iov_base, user_ptr); if (OPAL_UNLIKELY(PTL_OK != ret)) { - opal_atomic_add_64(opcount, -1); + opal_atomic_add_fetch_64(opcount, -1); return ret; } @@ -1687,7 +1687,7 @@ atomic_get_from_noncontig(ompi_osc_portals4_module_t *module, /* determine how much to transfer in this operation */ rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); - opal_atomic_add_64(&module->opcount, 1); + opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "performing rdma on contiguous region. local: %p, remote: %p, len: %lu", @@ -1703,7 +1703,7 @@ atomic_get_from_noncontig(ompi_osc_portals4_module_t *module, offset + (ptl_size_t)target_iovec[target_iov_index].iov_base, user_ptr); if (OPAL_UNLIKELY(PTL_OK != ret)) { - opal_atomic_add_64(&module->opcount, -1); + opal_atomic_add_fetch_64(&module->opcount, -1); return ret; } @@ -1817,7 +1817,7 @@ swap_from_noncontig(ompi_osc_portals4_module_t *module, /* determine how much to transfer in this operation */ rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); - opal_atomic_add_64(&module->opcount, 1); + opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu", @@ -1844,7 +1844,7 @@ swap_from_noncontig(ompi_osc_portals4_module_t *module, opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlSwap failed with return value %d", __FUNCTION__, __LINE__, ret); - opal_atomic_add_64(&module->opcount, -1); + opal_atomic_add_fetch_64(&module->opcount, -1); return ret; } @@ -1969,7 +1969,7 @@ fetch_atomic_from_noncontig(ompi_osc_portals4_module_t *module, /* determine how much to transfer in this operation */ rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); - opal_atomic_add_64(&module->opcount, 1); + opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu", @@ -1995,7 +1995,7 @@ fetch_atomic_from_noncontig(ompi_osc_portals4_module_t *module, opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d PtlFetchAtomic failed with return value %d", __FUNCTION__, __LINE__, ret); - opal_atomic_add_64(&module->opcount, -1); + opal_atomic_add_fetch_64(&module->opcount, -1); return ret; } @@ -2411,7 +2411,7 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, do { size_t msg_length = MIN(module->atomic_max, length - sent); - (void)opal_atomic_add_64(&module->opcount, 1); + (void)opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Atomic", __FUNCTION__, __LINE__)); @@ -2428,7 +2428,7 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, ptl_op, ptl_dt); if (OMPI_SUCCESS != ret) { - (void)opal_atomic_add_64(&module->opcount, -1); + (void)opal_atomic_add_fetch_64(&module->opcount, -1); OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } @@ -3149,7 +3149,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr, do { size_t msg_length = MIN(module->atomic_max, length - sent); - (void)opal_atomic_add_64(&module->opcount, 1); + (void)opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Atomic", __FUNCTION__, __LINE__)); @@ -3166,7 +3166,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr, ptl_op, ptl_dt); if (OMPI_SUCCESS != ret) { - (void)opal_atomic_add_64(&module->opcount, -1); + (void)opal_atomic_add_fetch_64(&module->opcount, -1); return ret; } sent += msg_length; @@ -3541,7 +3541,7 @@ ompi_osc_portals4_compare_and_swap(const void *origin_addr, result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; - (void)opal_atomic_add_64(&module->opcount, 1); + (void)opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, "%s,%d Swap", __FUNCTION__, __LINE__)); @@ -3613,7 +3613,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; - (void)opal_atomic_add_64(&module->opcount, 1); + (void)opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Swap", __FUNCTION__, __LINE__)); ret = PtlSwap(module->md_h, @@ -3635,7 +3635,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, md_offset = (ptl_size_t) result_addr; - (void)opal_atomic_add_64(&module->opcount, 1); + (void)opal_atomic_add_fetch_64(&module->opcount, 1); OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Get", __FUNCTION__, __LINE__)); ret = PtlGet(module->md_h, @@ -3648,7 +3648,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, NULL); } else { ptl_size_t result_md_offset, origin_md_offset; - (void)opal_atomic_add_64(&module->opcount, 1); + (void)opal_atomic_add_fetch_64(&module->opcount, 1); ret = ompi_osc_portals4_get_op(op, &ptl_op); if (OMPI_SUCCESS != ret) { diff --git a/ompi/mca/osc/portals4/osc_portals4_component.c b/ompi/mca/osc/portals4/osc_portals4_component.c index 38c36fec6d9..8a4781e3af6 100644 --- a/ompi/mca/osc/portals4/osc_portals4_component.c +++ b/ompi/mca/osc/portals4/osc_portals4_component.c @@ -230,8 +230,8 @@ progress_callback(void) } req = (ompi_osc_portals4_request_t*) ev.user_ptr; - opal_atomic_add_size_t(&req->super.req_status._ucount, ev.mlength); - ops = opal_atomic_add_32(&req->ops_committed, 1); + opal_atomic_add_fetch_size_t(&req->super.req_status._ucount, ev.mlength); + ops = opal_atomic_add_fetch_32(&req->ops_committed, 1); if (ops == req->ops_expected) { ompi_request_complete(&req->super, true); } diff --git a/ompi/mca/osc/portals4/osc_portals4_passive_target.c b/ompi/mca/osc/portals4/osc_portals4_passive_target.c index b39d4d904fe..b9baeea6f1c 100644 --- a/ompi/mca/osc/portals4/osc_portals4_passive_target.c +++ b/ompi/mca/osc/portals4/osc_portals4_passive_target.c @@ -43,7 +43,7 @@ lk_cas64(ompi_osc_portals4_module_t *module, int ret; size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock); - (void)opal_atomic_add_64(&module->opcount, 1); + (void)opal_atomic_add_fetch_64(&module->opcount, 1); ret = PtlSwap(module->md_h, (ptl_size_t) result_val, @@ -76,7 +76,7 @@ lk_write64(ompi_osc_portals4_module_t *module, int ret; size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock); - (void)opal_atomic_add_64(&module->opcount, 1); + (void)opal_atomic_add_fetch_64(&module->opcount, 1); ret = PtlPut(module->md_h, (ptl_size_t) &write_val, @@ -106,7 +106,7 @@ lk_add64(ompi_osc_portals4_module_t *module, int ret; size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock); - (void)opal_atomic_add_64(&module->opcount, 1); + (void)opal_atomic_add_fetch_64(&module->opcount, 1); ret = PtlFetchAtomic(module->md_h, (ptl_size_t) result_val, diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt.h b/ompi/mca/osc/pt2pt/osc_pt2pt.h index 660b7c3246a..4b1a423ded1 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt.h @@ -8,7 +8,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. @@ -145,15 +145,11 @@ static inline bool ompi_osc_pt2pt_peer_eager_active (ompi_osc_pt2pt_peer_t *peer static inline void ompi_osc_pt2pt_peer_set_flag (ompi_osc_pt2pt_peer_t *peer, int32_t flag, bool value) { - int32_t peer_flags, new_flags; - do { - peer_flags = peer->flags; - if (value) { - new_flags = peer_flags | flag; - } else { - new_flags = peer_flags & ~flag; - } - } while (!OPAL_ATOMIC_BOOL_CMPSET_32 (&peer->flags, peer_flags, new_flags)); + if (value) { + OPAL_ATOMIC_OR_FETCH32 (&peer->flags, flag); + } else { + OPAL_ATOMIC_AND_FETCH32 (&peer->flags, ~flag); + } } static inline void ompi_osc_pt2pt_peer_set_locked (ompi_osc_pt2pt_peer_t *peer, bool value) @@ -518,7 +514,7 @@ static inline void mark_incoming_completion (ompi_osc_pt2pt_module_t *module, in OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "mark_incoming_completion marking active incoming complete. module %p, count = %d", (void *) module, (int) module->active_incoming_frag_count + 1)); - new_value = OPAL_THREAD_ADD32(&module->active_incoming_frag_count, 1); + new_value = OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, 1); if (new_value >= 0) { OPAL_THREAD_LOCK(&module->lock); opal_condition_broadcast(&module->cond); @@ -530,7 +526,7 @@ static inline void mark_incoming_completion (ompi_osc_pt2pt_module_t *module, in OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "mark_incoming_completion marking passive incoming complete. module %p, source = %d, count = %d", (void *) module, source, (int) peer->passive_incoming_frag_count + 1)); - new_value = OPAL_THREAD_ADD32((int32_t *) &peer->passive_incoming_frag_count, 1); + new_value = OPAL_THREAD_ADD_FETCH32((int32_t *) &peer->passive_incoming_frag_count, 1); if (0 == new_value) { OPAL_THREAD_LOCK(&module->lock); opal_condition_broadcast(&module->cond); @@ -554,7 +550,7 @@ static inline void mark_incoming_completion (ompi_osc_pt2pt_module_t *module, in */ static inline void mark_outgoing_completion (ompi_osc_pt2pt_module_t *module) { - int32_t new_value = OPAL_THREAD_ADD32((int32_t *) &module->outgoing_frag_count, 1); + int32_t new_value = OPAL_THREAD_ADD_FETCH32((int32_t *) &module->outgoing_frag_count, 1); OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "mark_outgoing_completion: outgoing_frag_count = %d", new_value)); if (new_value >= 0) { @@ -578,12 +574,12 @@ static inline void mark_outgoing_completion (ompi_osc_pt2pt_module_t *module) */ static inline void ompi_osc_signal_outgoing (ompi_osc_pt2pt_module_t *module, int target, int count) { - OPAL_THREAD_ADD32((int32_t *) &module->outgoing_frag_count, -count); + OPAL_THREAD_ADD_FETCH32((int32_t *) &module->outgoing_frag_count, -count); if (MPI_PROC_NULL != target) { OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_signal_outgoing_passive: target = %d, count = %d, total = %d", target, count, module->epoch_outgoing_frag_count[target] + count)); - OPAL_THREAD_ADD32((int32_t *) (module->epoch_outgoing_frag_count + target), count); + OPAL_THREAD_ADD_FETCH32((int32_t *) (module->epoch_outgoing_frag_count + target), count); } } @@ -721,7 +717,7 @@ static inline int get_tag(ompi_osc_pt2pt_module_t *module) /* the LSB of the tag is used be the receiver to determine if the message is a passive or active target (ie, where to mark completion). */ - int32_t tmp = OPAL_THREAD_ADD32((volatile int32_t *) &module->tag_counter, 4); + int32_t tmp = OPAL_THREAD_ADD_FETCH32((volatile int32_t *) &module->tag_counter, 4); return (tmp & OSC_PT2PT_FRAG_MASK) | !!(module->passive_target_access_epoch); } diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c b/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c index 501c126fd14..33df9440a62 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c @@ -183,7 +183,7 @@ int ompi_osc_pt2pt_fence(int assert, ompi_win_t *win) incoming_reqs)); /* set our complete condition for incoming requests */ - OPAL_THREAD_ADD32(&module->active_incoming_frag_count, -incoming_reqs); + OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, -incoming_reqs); /* wait for completion */ while (module->outgoing_frag_count < 0 || module->active_incoming_frag_count < 0) { @@ -272,7 +272,7 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win) OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "found unexpected post from %d", peer->rank)); - OPAL_THREAD_ADD32 (&sync->sync_expected, -1); + OPAL_THREAD_ADD_FETCH32 (&sync->sync_expected, -1); ompi_osc_pt2pt_peer_set_unex (peer, false); } } @@ -574,12 +574,12 @@ void osc_pt2pt_incoming_complete (ompi_osc_pt2pt_module_t *module, int source, i frag_count, module->active_incoming_frag_count, module->num_complete_msgs)); /* the current fragment is not part of the frag_count so we need to add it here */ - OPAL_THREAD_ADD32(&module->active_incoming_frag_count, -frag_count); + OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, -frag_count); /* make sure the signal count is written before changing the complete message count */ opal_atomic_wmb (); - if (0 == OPAL_THREAD_ADD32(&module->num_complete_msgs, 1)) { + if (0 == OPAL_THREAD_ADD_FETCH32(&module->num_complete_msgs, 1)) { OPAL_THREAD_LOCK(&module->lock); opal_condition_broadcast (&module->cond); OPAL_THREAD_UNLOCK(&module->lock); diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c b/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c index a8c218c4cf0..bfe67ea3d8f 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c @@ -62,7 +62,7 @@ static int ompi_osc_pt2pt_req_comm_complete (ompi_request_t *request) /* update the cbdata for ompi_osc_pt2pt_comm_complete */ request->req_complete_cb_data = pt2pt_request->module; - if (0 == OPAL_THREAD_ADD32(&pt2pt_request->outstanding_requests, -1)) { + if (0 == OPAL_THREAD_ADD_FETCH32(&pt2pt_request->outstanding_requests, -1)) { ompi_osc_pt2pt_request_complete (pt2pt_request, request->req_status.MPI_ERROR); } diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c index 8aef87566f9..6a4205499bd 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c @@ -667,7 +667,7 @@ static int accumulate_cb (ompi_request_t *request) rank = acc_data->peer; } - if (0 == OPAL_THREAD_ADD32(&acc_data->request_count, -1)) { + if (0 == OPAL_THREAD_ADD_FETCH32(&acc_data->request_count, -1)) { /* no more requests needed before the buffer can be accumulated */ if (acc_data->source) { @@ -716,9 +716,9 @@ static int ompi_osc_pt2pt_acc_op_queue (ompi_osc_pt2pt_module_t *module, ompi_os /* NTH: ensure we don't leave wait/process_flush/etc until this * accumulate operation is complete. */ if (active_target) { - OPAL_THREAD_ADD32(&module->active_incoming_frag_count, -1); + OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, -1); } else { - OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -1); + OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -1); } pending_acc->active_target = active_target; @@ -1353,7 +1353,7 @@ static inline int process_flush (ompi_osc_pt2pt_module_t *module, int source, "process_flush header = {.frag_count = %d}", flush_header->frag_count)); /* increase signal count by incoming frags */ - OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -(int32_t) flush_header->frag_count); + OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -(int32_t) flush_header->frag_count); OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "%d: process_flush: received message from %d. passive_incoming_frag_count = %d", @@ -1372,7 +1372,7 @@ static inline int process_flush (ompi_osc_pt2pt_module_t *module, int source, } /* signal incomming will increment this counter */ - OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -1); + OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -1); return sizeof (*flush_header); } @@ -1387,7 +1387,7 @@ static inline int process_unlock (ompi_osc_pt2pt_module_t *module, int source, "process_unlock header = {.frag_count = %d}", unlock_header->frag_count)); /* increase signal count by incoming frags */ - OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -(int32_t) unlock_header->frag_count); + OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -(int32_t) unlock_header->frag_count); OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "osc pt2pt: processing unlock request from %d. frag count = %d, processed_count = %d", @@ -1406,7 +1406,7 @@ static inline int process_unlock (ompi_osc_pt2pt_module_t *module, int source, } /* signal incoming will increment this counter */ - OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -1); + OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -1); return sizeof (*unlock_header); } diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c index 51a31181a88..4db4259a6fd 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -105,8 +105,8 @@ static int ompi_osc_pt2pt_flush_active_frag (ompi_osc_pt2pt_module_t *module, om "osc pt2pt: flushing active fragment to target %d. pending: %d", active_frag->target, active_frag->pending)); - if (opal_atomic_bool_cmpset (&peer->active_frag, active_frag, NULL)) { - if (0 != OPAL_THREAD_ADD32(&active_frag->pending, -1)) { + if (opal_atomic_compare_exchange_strong_ptr (&peer->active_frag, &active_frag, NULL)) { + if (0 != OPAL_THREAD_ADD_FETCH32(&active_frag->pending, -1)) { /* communication going on while synchronizing; this is an rma usage bug */ return OMPI_ERR_RMA_SYNC; } diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h index cddc3c3f07f..4ed38930d5a 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h @@ -51,7 +51,7 @@ static inline int ompi_osc_pt2pt_frag_finish (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_frag_t* buffer) { opal_atomic_wmb (); - if (0 == OPAL_THREAD_ADD32(&buffer->pending, -1)) { + if (0 == OPAL_THREAD_ADD_FETCH32(&buffer->pending, -1)) { opal_atomic_mb (); return ompi_osc_pt2pt_frag_start(module, buffer); } @@ -67,7 +67,7 @@ static inline ompi_osc_pt2pt_frag_t *ompi_osc_pt2pt_frag_alloc_non_buffered (omp /* to ensure ordering flush the buffer on the peer */ curr = peer->active_frag; - if (NULL != curr && opal_atomic_bool_cmpset (&peer->active_frag, curr, NULL)) { + if (NULL != curr && opal_atomic_compare_exchange_strong_ptr (&peer->active_frag, &curr, NULL)) { /* If there's something pending, the pending finish will start the buffer. Otherwise, we need to start it now. */ int ret = ompi_osc_pt2pt_frag_finish (module, curr); @@ -142,11 +142,11 @@ static inline int _ompi_osc_pt2pt_frag_alloc (ompi_osc_pt2pt_module_t *module, i curr->pending_long_sends = long_send; peer->active_frag = curr; } else { - OPAL_THREAD_ADD32(&curr->header->num_ops, 1); + OPAL_THREAD_ADD_FETCH32(&curr->header->num_ops, 1); curr->pending_long_sends += long_send; } - OPAL_THREAD_ADD32(&curr->pending, 1); + OPAL_THREAD_ADD_FETCH32(&curr->pending, 1); } else { curr = ompi_osc_pt2pt_frag_alloc_non_buffered (module, peer, request_len); if (OPAL_UNLIKELY(NULL == curr)) { diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c b/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c index 34059a0851c..091757511f3 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c @@ -64,7 +64,7 @@ static inline int ompi_osc_pt2pt_lock_self (ompi_osc_pt2pt_module_t *module, omp assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); - (void) OPAL_THREAD_ADD32(&lock->sync_expected, 1); + (void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1); acquired = ompi_osc_pt2pt_lock_try_acquire (module, my_rank, lock_type, (uint64_t) (uintptr_t) lock); if (!acquired) { @@ -91,7 +91,7 @@ static inline void ompi_osc_pt2pt_unlock_self (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, my_rank); int lock_type = lock->sync.lock.type; - (void) OPAL_THREAD_ADD32(&lock->sync_expected, 1); + (void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1); assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); @@ -99,9 +99,9 @@ static inline void ompi_osc_pt2pt_unlock_self (ompi_osc_pt2pt_module_t *module, "ompi_osc_pt2pt_unlock_self: unlocking myself. lock state = %d", module->lock_status)); if (MPI_LOCK_EXCLUSIVE == lock_type) { - OPAL_THREAD_ADD32(&module->lock_status, 1); + OPAL_THREAD_ADD_FETCH32(&module->lock_status, 1); ompi_osc_pt2pt_activate_next_lock (module); - } else if (0 == OPAL_THREAD_ADD32(&module->lock_status, -1)) { + } else if (0 == OPAL_THREAD_ADD_FETCH32(&module->lock_status, -1)) { ompi_osc_pt2pt_activate_next_lock (module); } @@ -128,7 +128,7 @@ int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, int target, omp return OMPI_SUCCESS; } - (void) OPAL_THREAD_ADD32(&lock->sync_expected, 1); + (void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1); assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); @@ -145,7 +145,7 @@ int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, int target, omp ret = ompi_osc_pt2pt_control_send_unbuffered (module, target, &lock_req, sizeof (lock_req)); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - OPAL_THREAD_ADD32(&lock->sync_expected, -1); + OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, -1); } else { ompi_osc_pt2pt_peer_set_locked (peer, true); } @@ -163,7 +163,7 @@ static inline int ompi_osc_pt2pt_unlock_remote (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_header_unlock_t unlock_req; int ret; - (void) OPAL_THREAD_ADD32(&lock->sync_expected, 1); + (void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1); assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); @@ -207,7 +207,7 @@ static inline int ompi_osc_pt2pt_flush_remote (ompi_osc_pt2pt_module_t *module, int32_t frag_count = opal_atomic_swap_32 ((int32_t *) module->epoch_outgoing_frag_count + target, -1); int ret; - (void) OPAL_THREAD_ADD32(&lock->sync_expected, 1); + (void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1); assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); @@ -744,14 +744,13 @@ static bool ompi_osc_pt2pt_lock_try_acquire (ompi_osc_pt2pt_module_t* module, in break; } - if (opal_atomic_bool_cmpset_32 (&module->lock_status, lock_status, lock_status + 1)) { + if (opal_atomic_compare_exchange_strong_32 (&module->lock_status, &lock_status, lock_status + 1)) { break; } - - lock_status = module->lock_status; } while (1); } else { - queue = !opal_atomic_bool_cmpset_32 (&module->lock_status, 0, -1); + int32_t _tmp_value = 0; + queue = !opal_atomic_compare_exchange_strong_32 (&module->lock_status, &_tmp_value, -1); } if (queue) { @@ -909,9 +908,9 @@ int ompi_osc_pt2pt_process_unlock (ompi_osc_pt2pt_module_t *module, int source, } if (-1 == module->lock_status) { - OPAL_THREAD_ADD32(&module->lock_status, 1); + OPAL_THREAD_ADD_FETCH32(&module->lock_status, 1); ompi_osc_pt2pt_activate_next_lock (module); - } else if (0 == OPAL_THREAD_ADD32(&module->lock_status, -1)) { + } else if (0 == OPAL_THREAD_ADD_FETCH32(&module->lock_status, -1)) { ompi_osc_pt2pt_activate_next_lock (module); } diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_sync.h b/ompi/mca/osc/pt2pt/osc_pt2pt_sync.h index 10398926e84..fe359bf6cf9 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_sync.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_sync.h @@ -166,7 +166,7 @@ static inline void ompi_osc_pt2pt_sync_wait_expected (ompi_osc_pt2pt_sync_t *syn static inline void ompi_osc_pt2pt_sync_expected (ompi_osc_pt2pt_sync_t *sync) { - int32_t new_value = OPAL_THREAD_ADD32 (&sync->sync_expected, -1); + int32_t new_value = OPAL_THREAD_ADD_FETCH32 (&sync->sync_expected, -1); if (0 == new_value) { OPAL_THREAD_LOCK(&sync->lock); if (!(sync->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK && sync->num_peers > 1)) { diff --git a/ompi/mca/osc/rdma/osc_rdma_accumulate.c b/ompi/mca/osc/rdma/osc_rdma_accumulate.c index 9d22bde8ab9..dc49668d164 100644 --- a/ompi/mca/osc/rdma/osc_rdma_accumulate.c +++ b/ompi/mca/osc/rdma/osc_rdma_accumulate.c @@ -516,7 +516,7 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v subreq->internal = true; subreq->parent_request = request; if (request) { - (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1); + (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1); } if (result_datatype) { @@ -557,7 +557,7 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v if (request) { /* release our reference so the request can complete */ - (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); + (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1); } if (source_datatype) { diff --git a/ompi/mca/osc/rdma/osc_rdma_active_target.c b/ompi/mca/osc/rdma/osc_rdma_active_target.c index 30e160e93f9..b045ebf3ec6 100644 --- a/ompi/mca/osc/rdma/osc_rdma_active_target.c +++ b/ompi/mca/osc/rdma/osc_rdma_active_target.c @@ -8,7 +8,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. @@ -285,7 +285,9 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win) ret = ompi_osc_rdma_lock_btl_cswap (module, peer, target, 0, 1 + (int64_t) my_rank, &result); assert (OMPI_SUCCESS == ret); } else { - result = !ompi_osc_rdma_lock_cmpset ((osc_rdma_counter_t *) target, 0, 1 + (osc_rdma_counter_t) my_rank); + ompi_osc_rdma_lock_t _tmp_value = 0; + + result = !ompi_osc_rdma_lock_compare_exchange ((osc_rdma_counter_t *) target, &_tmp_value, 1 + (osc_rdma_counter_t) my_rank); } if (OPAL_LIKELY(0 == result)) { diff --git a/ompi/mca/osc/rdma/osc_rdma_comm.c b/ompi/mca/osc/rdma/osc_rdma_comm.c index 1a0450bd288..3357d1049c9 100644 --- a/ompi/mca/osc/rdma/osc_rdma_comm.c +++ b/ompi/mca/osc/rdma/osc_rdma_comm.c @@ -217,7 +217,7 @@ static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *loc subreq->parent_request = request; if (request) { - (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1); + (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1); } } else if (!alloc_reqs) { subreq = request; @@ -232,7 +232,7 @@ static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *loc if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE != ret)) { if (request) { - (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); + (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1); } if (alloc_reqs) { @@ -266,7 +266,7 @@ static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *loc ompi_osc_rdma_request_complete (request, OMPI_SUCCESS); } - (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); + (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1); } OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "finished scheduling rdma on non-contiguous datatype(s)"); @@ -551,7 +551,7 @@ static int ompi_osc_rdma_put_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_p /* increment the outstanding request counter in the request object */ if (request) { - (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1); + (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1); cbcontext = (void *) ((intptr_t) request | 1); request->sync = sync; } else { @@ -643,12 +643,12 @@ static int ompi_osc_rdma_get_partial (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_ subreq->internal = true; subreq->type = OMPI_OSC_RDMA_TYPE_RDMA; subreq->parent_request = request; - (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1); + (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1); ret = ompi_osc_rdma_get_contig (sync, peer, source_address, source_handle, target_buffer, size, subreq); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { OMPI_OSC_RDMA_REQUEST_RETURN(subreq); - (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); + (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1); } return ret; diff --git a/ompi/mca/osc/rdma/osc_rdma_comm.h b/ompi/mca/osc/rdma/osc_rdma_comm.h index 0e2daf2b4e8..e6d69505753 100644 --- a/ompi/mca/osc/rdma/osc_rdma_comm.h +++ b/ompi/mca/osc/rdma/osc_rdma_comm.h @@ -35,7 +35,7 @@ static inline void ompi_osc_rdma_cleanup_rdma (ompi_osc_rdma_sync_t *sync, ompi_ } if (request) { - (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); + (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1); } ompi_osc_rdma_sync_rdma_dec (sync); diff --git a/ompi/mca/osc/rdma/osc_rdma_frag.h b/ompi/mca/osc/rdma/osc_rdma_frag.h index e9636a24d25..610ce447006 100644 --- a/ompi/mca/osc/rdma/osc_rdma_frag.h +++ b/ompi/mca/osc/rdma/osc_rdma_frag.h @@ -37,7 +37,7 @@ OBJ_CLASS_DECLARATION(ompi_osc_rdma_frag_t); static inline void ompi_osc_rdma_frag_complete (ompi_osc_rdma_frag_t *frag) { - if (0 == OPAL_THREAD_ADD32(&frag->pending, -1)) { + if (0 == OPAL_THREAD_ADD_FETCH32(&frag->pending, -1)) { opal_atomic_rmb (); ompi_osc_rdma_deregister (frag->module, frag->handle); @@ -113,7 +113,7 @@ static inline int ompi_osc_rdma_frag_alloc (ompi_osc_rdma_module_t *module, size curr->top += request_len; curr->remain_len -= request_len; - OPAL_THREAD_ADD32(&curr->pending, 1); + OPAL_THREAD_ADD_FETCH32(&curr->pending, 1); OPAL_THREAD_UNLOCK(&module->lock); diff --git a/ompi/mca/osc/rdma/osc_rdma_lock.h b/ompi/mca/osc/rdma/osc_rdma_lock.h index 4352c5cbf1c..8c35018badf 100644 --- a/ompi/mca/osc/rdma/osc_rdma_lock.h +++ b/ompi/mca/osc/rdma/osc_rdma_lock.h @@ -17,7 +17,8 @@ static inline int ompi_osc_rdma_trylock_local (volatile ompi_osc_rdma_lock_t *lock) { - return !ompi_osc_rdma_lock_cmpset (lock, 0, OMPI_OSC_RDMA_LOCK_EXCLUSIVE); + ompi_osc_rdma_lock_t _tmp_value = 0; + return !ompi_osc_rdma_lock_compare_exchange (lock, &_tmp_value, OMPI_OSC_RDMA_LOCK_EXCLUSIVE); } static inline void ompi_osc_rdma_unlock_local (volatile ompi_osc_rdma_lock_t *lock) diff --git a/ompi/mca/osc/rdma/osc_rdma_peer.h b/ompi/mca/osc/rdma/osc_rdma_peer.h index c31f27a62cd..ad661238154 100644 --- a/ompi/mca/osc/rdma/osc_rdma_peer.h +++ b/ompi/mca/osc/rdma/osc_rdma_peer.h @@ -201,14 +201,13 @@ static inline bool ompi_osc_rdma_peer_test_set_flag (ompi_osc_rdma_peer_t *peer, int32_t flags; opal_atomic_mb (); + flags = peer->flags; do { - flags = peer->flags; if (flags & flag) { return false; } - - } while (!OPAL_THREAD_BOOL_CMPSET_32 (&peer->flags, flags, flags | flag)); + } while (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32 (&peer->flags, &flags, flags | flag)); return true; } @@ -221,7 +220,7 @@ static inline bool ompi_osc_rdma_peer_test_set_flag (ompi_osc_rdma_peer_t *peer, */ static inline void ompi_osc_rdma_peer_clear_flag (ompi_osc_rdma_peer_t *peer, int flag) { - OPAL_ATOMIC_AND32(&peer->flags, ~flag); + OPAL_ATOMIC_AND_FETCH32(&peer->flags, ~flag); opal_atomic_mb (); } diff --git a/ompi/mca/osc/rdma/osc_rdma_request.c b/ompi/mca/osc/rdma/osc_rdma_request.c index 625b4d380ed..9c032ca4028 100644 --- a/ompi/mca/osc/rdma/osc_rdma_request.c +++ b/ompi/mca/osc/rdma/osc_rdma_request.c @@ -48,7 +48,7 @@ static int request_complete (struct ompi_request_t *request) { ompi_osc_rdma_request_t *parent_request = ((ompi_osc_rdma_request_t *) request)->parent_request; - if (parent_request && 0 == OPAL_THREAD_ADD32 (&parent_request->outstanding_requests, -1)) { + if (parent_request && 0 == OPAL_THREAD_ADD_FETCH32 (&parent_request->outstanding_requests, -1)) { ompi_osc_rdma_request_complete (parent_request, OMPI_SUCCESS); } diff --git a/ompi/mca/osc/rdma/osc_rdma_types.h b/ompi/mca/osc/rdma/osc_rdma_types.h index d6dfb0d0188..fc23f0f343f 100644 --- a/ompi/mca/osc/rdma/osc_rdma_types.h +++ b/ompi/mca/osc/rdma/osc_rdma_types.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -25,7 +25,7 @@ typedef int64_t osc_rdma_base_t; typedef int64_t osc_rdma_size_t; typedef int64_t osc_rdma_counter_t; -#define ompi_osc_rdma_counter_add opal_atomic_add_64 +#define ompi_osc_rdma_counter_add opal_atomic_add_fetch_64 #else @@ -33,7 +33,7 @@ typedef int32_t osc_rdma_base_t; typedef int32_t osc_rdma_size_t; typedef int32_t osc_rdma_counter_t; -#define ompi_osc_rdma_counter_add opal_atomic_add_32 +#define ompi_osc_rdma_counter_add opal_atomic_add_fetch_32 #endif @@ -48,18 +48,18 @@ static inline int64_t ompi_osc_rdma_lock_add (volatile int64_t *p, int64_t value int64_t new; opal_atomic_mb (); - new = opal_atomic_add_64 (p, value) - value; + new = opal_atomic_add_fetch_64 (p, value) - value; opal_atomic_mb (); return new; } -static inline int ompi_osc_rdma_lock_cmpset (volatile int64_t *p, int64_t comp, int64_t value) +static inline int ompi_osc_rdma_lock_compare_exchange (volatile int64_t *p, int64_t *comp, int64_t value) { int ret; opal_atomic_mb (); - ret = opal_atomic_bool_cmpset_64 (p, comp, value); + ret = opal_atomic_compare_exchange_strong_64 (p, comp, value); opal_atomic_mb (); return ret; @@ -76,19 +76,19 @@ static inline int32_t ompi_osc_rdma_lock_add (volatile int32_t *p, int32_t value int32_t new; opal_atomic_mb (); - /* opal_atomic_add_32 differs from normal atomics in that is returns the new value */ - new = opal_atomic_add_32 (p, value) - value; + /* opal_atomic_add_fetch_32 differs from normal atomics in that is returns the new value */ + new = opal_atomic_add_fetch_32 (p, value) - value; opal_atomic_mb (); return new; } -static inline int ompi_osc_rdma_lock_cmpset (volatile int32_t *p, int32_t comp, int32_t value) +static inline int ompi_osc_rdma_lock_compare_exchange (volatile int32_t *p, int32_t *comp, int32_t value) { int ret; opal_atomic_mb (); - ret = opal_atomic_bool_cmpset_32 (p, comp, value); + ret = opal_atomic_compare_exchange_strong_32 (p, comp, value); opal_atomic_mb (); return ret; diff --git a/ompi/mca/osc/sm/osc_sm_active_target.c b/ompi/mca/osc/sm/osc_sm_active_target.c index 083992d8331..ab0f73f87c6 100644 --- a/ompi/mca/osc/sm/osc_sm_active_target.c +++ b/ompi/mca/osc/sm/osc_sm_active_target.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights @@ -130,10 +130,11 @@ ompi_osc_sm_start(struct ompi_group_t *group, ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; int my_rank = ompi_comm_rank (module->comm); + void *_tmp_ptr = NULL; OBJ_RETAIN(group); - if (!OPAL_ATOMIC_BOOL_CMPSET_PTR(&module->start_group, NULL, group)) { + if (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&module->start_group, (void *) &_tmp_ptr, group)) { OBJ_RELEASE(group); return OMPI_ERR_RMA_SYNC; } @@ -150,7 +151,7 @@ ompi_osc_sm_start(struct ompi_group_t *group, for (int i = 0 ; i < size ; ++i) { int rank_byte = ranks[i] >> OSC_SM_POST_BITS; - osc_sm_post_type_t old, rank_bit = ((osc_sm_post_type_t) 1) << (ranks[i] & 0x3f); + osc_sm_post_type_t rank_bit = ((osc_sm_post_type_t) 1) << (ranks[i] & 0x3f); /* wait for rank to post */ while (!(module->posts[my_rank][rank_byte] & rank_bit)) { @@ -160,9 +161,11 @@ ompi_osc_sm_start(struct ompi_group_t *group, opal_atomic_rmb (); - do { - old = module->posts[my_rank][rank_byte]; - } while (!opal_atomic_bool_cmpset ((volatile osc_sm_post_type_t *) module->posts[my_rank] + rank_byte, old, old ^ rank_bit)); +#if OPAL_HAVE_ATOMIC_MATH_64 + (void) opal_atomic_fetch_xor_64 ((volatile int64_t *) module->posts[my_rank] + rank_byte, rank_bit); +#else + (void) opal_atomic_fetch_xor_32 ((volatile int32_t *) module->posts[my_rank] + rank_byte, rank_bit); +#endif } free (ranks); @@ -185,7 +188,7 @@ ompi_osc_sm_complete(struct ompi_win_t *win) opal_atomic_mb(); group = module->start_group; - if (NULL == group || !OPAL_ATOMIC_BOOL_CMPSET_PTR(&module->start_group, group, NULL)) { + if (NULL == group || !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&module->start_group, &group, NULL)) { return OMPI_ERR_RMA_SYNC; } @@ -198,7 +201,7 @@ ompi_osc_sm_complete(struct ompi_win_t *win) gsize = ompi_group_size(group); for (int i = 0 ; i < gsize ; ++i) { - (void) opal_atomic_add_32(&module->node_states[ranks[i]].complete_count, 1); + (void) opal_atomic_add_fetch_32(&module->node_states[ranks[i]].complete_count, 1); } free (ranks); @@ -244,7 +247,7 @@ ompi_osc_sm_post(struct ompi_group_t *group, gsize = ompi_group_size(module->post_group); for (int i = 0 ; i < gsize ; ++i) { - (void) opal_atomic_add ((volatile osc_sm_post_type_t *) module->posts[ranks[i]] + my_byte, my_bit); + opal_atomic_add ((volatile osc_sm_post_type_t *) module->posts[ranks[i]] + my_byte, my_bit); } opal_atomic_wmb (); diff --git a/ompi/mca/osc/sm/osc_sm_passive_target.c b/ompi/mca/osc/sm/osc_sm_passive_target.c index 889ac829dd1..a3388b776a4 100644 --- a/ompi/mca/osc/sm/osc_sm_passive_target.c +++ b/ompi/mca/osc/sm/osc_sm_passive_target.c @@ -26,9 +26,9 @@ lk_fetch_add32(ompi_osc_sm_module_t *module, size_t offset, uint32_t delta) { - /* opal_atomic_add_32 is an add then fetch so delta needs to be subtracted out to get the + /* opal_atomic_add_fetch_32 is an add then fetch so delta needs to be subtracted out to get the * old value */ - return opal_atomic_add_32((int32_t*) ((char*) &module->node_states[target].lock + offset), + return opal_atomic_add_fetch_32((int32_t*) ((char*) &module->node_states[target].lock + offset), delta) - delta; } @@ -39,7 +39,7 @@ lk_add32(ompi_osc_sm_module_t *module, size_t offset, uint32_t delta) { - opal_atomic_add_32((int32_t*) ((char*) &module->node_states[target].lock + offset), + opal_atomic_add_fetch_32((int32_t*) ((char*) &module->node_states[target].lock + offset), delta); } diff --git a/ompi/mca/pml/base/pml_base_bsend.c b/ompi/mca/pml/base/pml_base_bsend.c index f683570f708..ef6be82599a 100644 --- a/ompi/mca/pml/base/pml_base_bsend.c +++ b/ompi/mca/pml/base/pml_base_bsend.c @@ -81,7 +81,7 @@ int mca_pml_base_bsend_init(bool thread_safe) { size_t tmp; - if(OPAL_THREAD_ADD32(&mca_pml_bsend_init, 1) > 1) + if(OPAL_THREAD_ADD_FETCH32(&mca_pml_bsend_init, 1) > 1) return OMPI_SUCCESS; /* initialize static objects */ @@ -109,7 +109,7 @@ int mca_pml_base_bsend_init(bool thread_safe) */ int mca_pml_base_bsend_fini(void) { - if(OPAL_THREAD_ADD32(&mca_pml_bsend_init,-1) > 0) + if(OPAL_THREAD_ADD_FETCH32(&mca_pml_bsend_init,-1) > 0) return OMPI_SUCCESS; if(NULL != mca_pml_bsend_allocator) diff --git a/ompi/mca/pml/bfo/pml_bfo_failover.h b/ompi/mca/pml/bfo/pml_bfo_failover.h index d1b97807adb..ea4f70fdc48 100644 --- a/ompi/mca/pml/bfo/pml_bfo_failover.h +++ b/ompi/mca/pml/bfo/pml_bfo_failover.h @@ -261,7 +261,7 @@ extern void mca_pml_bfo_recv_frag_callback_recverrnotify( mca_btl_base_module_t */ #define MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq) \ if (sendreq->req_state == -1) { \ - OPAL_THREAD_ADD32(&sendreq->req_state, 1); \ + OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, 1); \ } /* Now check the error state. This request can be in error if the diff --git a/ompi/mca/pml/bfo/pml_bfo_recvfrag.c b/ompi/mca/pml/bfo/pml_bfo_recvfrag.c index ce6827d5385..c7216c0d538 100644 --- a/ompi/mca/pml/bfo/pml_bfo_recvfrag.c +++ b/ompi/mca/pml/bfo/pml_bfo_recvfrag.c @@ -328,7 +328,7 @@ void mca_pml_bfo_recv_frag_callback_ack(mca_btl_base_module_t* btl, * protocol has req_state == 0 and as such should not be * decremented. */ - OPAL_THREAD_ADD32(&sendreq->req_state, -1); + OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1); } if(send_request_pml_complete_check(sendreq) == false) diff --git a/ompi/mca/pml/bfo/pml_bfo_recvreq.c b/ompi/mca/pml/bfo/pml_bfo_recvreq.c index 969420efc0b..c0658f10ef3 100644 --- a/ompi/mca/pml/bfo/pml_bfo_recvreq.c +++ b/ompi/mca/pml/bfo/pml_bfo_recvreq.c @@ -206,7 +206,7 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl, (void *) des->des_remote, des->des_remote_count, 0); } - OPAL_THREAD_SUB_SIZE_T(&recvreq->req_pipeline_depth, 1); + OPAL_THREAD_SUB_FETCH_SIZE_T(&recvreq->req_pipeline_depth, 1); #if PML_BFO btl->btl_free(btl, des); @@ -217,7 +217,7 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl, #endif /* PML_BFO */ /* check completion status */ - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); + OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received); if(recv_request_pml_complete_check(recvreq) == false && recvreq->req_rdma_offset < recvreq->req_send_offset) { /* schedule additional rdma operations */ @@ -388,7 +388,7 @@ static void mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl, #endif /* PML_BFO */ /* is receive request complete */ - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length); + OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length); recv_request_pml_complete_check(recvreq); MCA_PML_BFO_RDMA_FRAG_RETURN(frag); @@ -506,7 +506,7 @@ void mca_pml_bfo_recv_request_progress_frag( mca_pml_bfo_recv_request_t* recvreq recvreq->req_recv.req_base.req_datatype); ); - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); + OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received); /* check completion status */ if(recv_request_pml_complete_check(recvreq) == false && recvreq->req_rdma_offset < recvreq->req_send_offset) { @@ -668,7 +668,7 @@ void mca_pml_bfo_recv_request_progress_rndv( mca_pml_bfo_recv_request_t* recvreq recvreq->req_recv.req_base.req_datatype); ); } - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); + OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received); /* check completion status */ if(recv_request_pml_complete_check(recvreq) == false && recvreq->req_rdma_offset < recvreq->req_send_offset) { @@ -903,7 +903,7 @@ int mca_pml_bfo_recv_request_schedule_once( mca_pml_bfo_recv_request_t* recvreq, #endif /* PML_BFO */ /* update request state */ recvreq->req_rdma_offset += size; - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth, 1); + OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_pipeline_depth, 1); recvreq->req_rdma[rdma_idx].length -= size; bytes_remaining -= size; } else { diff --git a/ompi/mca/pml/bfo/pml_bfo_recvreq.h b/ompi/mca/pml/bfo/pml_bfo_recvreq.h index 9c3f53989a4..7b3a6db6271 100644 --- a/ompi/mca/pml/bfo/pml_bfo_recvreq.h +++ b/ompi/mca/pml/bfo/pml_bfo_recvreq.h @@ -70,12 +70,12 @@ OBJ_CLASS_DECLARATION(mca_pml_bfo_recv_request_t); static inline bool lock_recv_request(mca_pml_bfo_recv_request_t *recvreq) { - return OPAL_THREAD_ADD32(&recvreq->req_lock, 1) == 1; + return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, 1) == 1; } static inline bool unlock_recv_request(mca_pml_bfo_recv_request_t *recvreq) { - return OPAL_THREAD_ADD32(&recvreq->req_lock, -1) == 0; + return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, -1) == 0; } /** diff --git a/ompi/mca/pml/bfo/pml_bfo_sendreq.c b/ompi/mca/pml/bfo/pml_bfo_sendreq.c index 67208a9fe4a..176eadf4f6e 100644 --- a/ompi/mca/pml/bfo/pml_bfo_sendreq.c +++ b/ompi/mca/pml/bfo/pml_bfo_sendreq.c @@ -207,10 +207,10 @@ mca_pml_bfo_rndv_completion_request( mca_bml_base_btl_t* bml_btl, &(sendreq->req_send.req_base), PERUSE_SEND ); } - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); + OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); /* advance the request */ - OPAL_THREAD_ADD32(&sendreq->req_state, -1); + OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1); send_request_pml_complete_check(sendreq); @@ -287,7 +287,7 @@ mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl, (void *) des->des_local, des->des_local_count, 0); if (OPAL_LIKELY(0 < req_bytes_delivered)) { - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); + OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); } send_request_pml_complete_check(sendreq); @@ -360,8 +360,8 @@ mca_pml_bfo_frag_completion( mca_btl_base_module_t* btl, des->des_local_count, sizeof(mca_pml_bfo_frag_hdr_t)); - OPAL_THREAD_SUB_SIZE_T(&sendreq->req_pipeline_depth, 1); - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); + OPAL_THREAD_SUB_FETCH_SIZE_T(&sendreq->req_pipeline_depth, 1); + OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); #if PML_BFO MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, @@ -1164,7 +1164,7 @@ mca_pml_bfo_send_request_schedule_once(mca_pml_bfo_send_request_t* sendreq) range->range_btls[btl_idx].length -= size; range->range_send_length -= size; range->range_send_offset += size; - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth, 1); + OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_pipeline_depth, 1); if(range->range_send_length == 0) { range = get_next_send_range(sendreq, range); prev_bytes_remaining = 0; @@ -1226,7 +1226,7 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl, #endif /* PML_BFO */ /* check for request completion */ - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length); + OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length); send_request_pml_complete_check(sendreq); @@ -1335,7 +1335,7 @@ void mca_pml_bfo_send_request_put( mca_pml_bfo_send_request_t* sendreq, size_t i, size = 0; if(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_TYPE_ACK) { - OPAL_THREAD_ADD32(&sendreq->req_state, -1); + OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1); } #if PML_BFO MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq); diff --git a/ompi/mca/pml/bfo/pml_bfo_sendreq.h b/ompi/mca/pml/bfo/pml_bfo_sendreq.h index 37f15af4578..170512ffe3e 100644 --- a/ompi/mca/pml/bfo/pml_bfo_sendreq.h +++ b/ompi/mca/pml/bfo/pml_bfo_sendreq.h @@ -78,12 +78,12 @@ OBJ_CLASS_DECLARATION(mca_pml_bfo_send_range_t); static inline bool lock_send_request(mca_pml_bfo_send_request_t *sendreq) { - return OPAL_THREAD_ADD32(&sendreq->req_lock, 1) == 1; + return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, 1) == 1; } static inline bool unlock_send_request(mca_pml_bfo_send_request_t *sendreq) { - return OPAL_THREAD_ADD32(&sendreq->req_lock, -1) == 0; + return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, -1) == 0; } static inline void @@ -445,7 +445,7 @@ mca_pml_bfo_send_request_start( mca_pml_bfo_send_request_t* sendreq ) sendreq->req_pipeline_depth = 0; sendreq->req_bytes_delivered = 0; sendreq->req_pending = MCA_PML_BFO_SEND_PENDING_NONE; - sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32( + sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD_FETCH32( &comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1); #if PML_BFO sendreq->req_restartseq = 0; /* counts up restarts */ diff --git a/ompi/mca/pml/ob1/pml_ob1_isend.c b/ompi/mca/pml/ob1/pml_ob1_isend.c index 3a5b0c2d7a0..be673382761 100644 --- a/ompi/mca/pml/ob1/pml_ob1_isend.c +++ b/ompi/mca/pml/ob1/pml_ob1_isend.c @@ -151,7 +151,7 @@ int mca_pml_ob1_isend(const void *buf, } if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm)) { - seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1); + seqn = (uint16_t) OPAL_THREAD_ADD_FETCH32(&ob1_proc->send_sequence, 1); } if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) { @@ -220,7 +220,7 @@ int mca_pml_ob1_send(const void *buf, } if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm)) { - seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1); + seqn = (uint16_t) OPAL_THREAD_ADD_FETCH32(&ob1_proc->send_sequence, 1); } /** diff --git a/ompi/mca/pml/ob1/pml_ob1_progress.c b/ompi/mca/pml/ob1/pml_ob1_progress.c index 276f089938f..e1f84e796b4 100644 --- a/ompi/mca/pml/ob1/pml_ob1_progress.c +++ b/ompi/mca/pml/ob1/pml_ob1_progress.c @@ -56,7 +56,7 @@ static inline int mca_pml_ob1_process_pending_cuda_async_copies(void) static int mca_pml_ob1_progress_needed = 0; int mca_pml_ob1_enable_progress(int32_t count) { - int32_t progress_count = OPAL_ATOMIC_ADD32(&mca_pml_ob1_progress_needed, count); + int32_t progress_count = OPAL_ATOMIC_ADD_FETCH32(&mca_pml_ob1_progress_needed, count); if( 1 < progress_count ) return 0; /* progress was already on */ @@ -119,7 +119,7 @@ int mca_pml_ob1_progress(void) } if( 0 != completed_requests ) { - j = OPAL_ATOMIC_ADD32(&mca_pml_ob1_progress_needed, -completed_requests); + j = OPAL_ATOMIC_ADD_FETCH32(&mca_pml_ob1_progress_needed, -completed_requests); if( 0 == j ) { opal_progress_unregister(mca_pml_ob1_progress); } diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c index f85964f653b..83b7a44902e 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c @@ -445,7 +445,7 @@ void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl, * protocol has req_state == 0 and as such should not be * decremented. */ - OPAL_THREAD_ADD32(&sendreq->req_state, -1); + OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1); } #if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_SEND */ diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index 6616e8eacca..9ccb27e1af4 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -190,7 +190,7 @@ static void mca_pml_ob1_put_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t r mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t *) frag->rdma_req; mca_bml_base_btl_t *bml_btl = frag->rdma_bml; - OPAL_THREAD_ADD32(&recvreq->req_pipeline_depth, -1); + OPAL_THREAD_ADD_FETCH32(&recvreq->req_pipeline_depth, -1); assert ((uint64_t) rdma_size == frag->rdma_length); MCA_PML_OB1_RDMA_FRAG_RETURN(frag); @@ -198,7 +198,7 @@ static void mca_pml_ob1_put_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t r if (OPAL_LIKELY(0 < rdma_size)) { /* check completion status */ - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, rdma_size); + OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, rdma_size); if (recv_request_pml_complete_check(recvreq) == false && recvreq->req_rdma_offset < recvreq->req_send_offset) { /* schedule additional rdma operations */ @@ -373,7 +373,7 @@ static void mca_pml_ob1_rget_completion (mca_btl_base_module_t* btl, struct mca_ } } else { /* is receive request complete */ - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length); + OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length); /* TODO: re-add order */ mca_pml_ob1_send_fin (recvreq->req_recv.req_base.req_proc, bml_btl, frag->rdma_hdr.hdr_rget.hdr_frag, @@ -524,7 +524,7 @@ void mca_pml_ob1_recv_request_progress_frag( mca_pml_ob1_recv_request_t* recvreq recvreq->req_recv.req_base.req_datatype); ); - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); + OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received); /* check completion status */ if(recv_request_pml_complete_check(recvreq) == false && recvreq->req_rdma_offset < recvreq->req_send_offset) { @@ -601,7 +601,7 @@ void mca_pml_ob1_recv_request_frag_copy_finished( mca_btl_base_module_t* btl, * known that the data has been copied out of the descriptor. */ des->des_cbfunc(NULL, NULL, des, 0); - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); + OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received); /* check completion status */ if(recv_request_pml_complete_check(recvreq) == false && @@ -815,7 +815,7 @@ void mca_pml_ob1_recv_request_progress_rndv( mca_pml_ob1_recv_request_t* recvreq recvreq->req_recv.req_base.req_count, recvreq->req_recv.req_base.req_datatype); ); - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); + OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received); } /* check completion status */ if(recv_request_pml_complete_check(recvreq) == false && @@ -1024,7 +1024,7 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq, if (OPAL_LIKELY(OMPI_SUCCESS == rc)) { /* update request state */ recvreq->req_rdma_offset += size; - OPAL_THREAD_ADD32(&recvreq->req_pipeline_depth, 1); + OPAL_THREAD_ADD_FETCH32(&recvreq->req_pipeline_depth, 1); recvreq->req_rdma[rdma_idx].length -= size; bytes_remaining -= size; } else { diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.h b/ompi/mca/pml/ob1/pml_ob1_recvreq.h index 82c4767d834..0ced47e2915 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.h @@ -64,12 +64,12 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_recv_request_t); static inline bool lock_recv_request(mca_pml_ob1_recv_request_t *recvreq) { - return OPAL_THREAD_ADD32(&recvreq->req_lock, 1) == 1; + return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, 1) == 1; } static inline bool unlock_recv_request(mca_pml_ob1_recv_request_t *recvreq) { - return OPAL_THREAD_ADD32(&recvreq->req_lock, -1) == 0; + return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, -1) == 0; } /** diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index f358d733dab..a2aecae09ac 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -205,10 +205,10 @@ mca_pml_ob1_rndv_completion_request( mca_bml_base_btl_t* bml_btl, &(sendreq->req_send.req_base), PERUSE_SEND ); } - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); + OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); /* advance the request */ - OPAL_THREAD_ADD32(&sendreq->req_state, -1); + OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1); send_request_pml_complete_check(sendreq); @@ -261,7 +261,7 @@ mca_pml_ob1_rget_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t rdma_length) /* count bytes of user data actually delivered and check for request completion */ if (OPAL_LIKELY(0 < rdma_length)) { - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, (size_t) rdma_length); + OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, (size_t) rdma_length); } send_request_pml_complete_check(sendreq); @@ -313,8 +313,8 @@ mca_pml_ob1_frag_completion( mca_btl_base_module_t* btl, des->des_segment_count, sizeof(mca_pml_ob1_frag_hdr_t)); - OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, -1); - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); + OPAL_THREAD_ADD_FETCH32(&sendreq->req_pipeline_depth, -1); + OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); if(send_request_pml_complete_check(sendreq) == false) { mca_pml_ob1_send_request_schedule(sendreq); @@ -1044,7 +1044,7 @@ mca_pml_ob1_send_request_schedule_once(mca_pml_ob1_send_request_t* sendreq) range->range_btls[btl_idx].length -= size; range->range_send_length -= size; range->range_send_offset += size; - OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, 1); + OPAL_THREAD_ADD_FETCH32(&sendreq->req_pipeline_depth, 1); if(range->range_send_length == 0) { range = get_next_send_range(sendreq, range); prev_bytes_remaining = 0; @@ -1060,7 +1060,7 @@ mca_pml_ob1_send_request_schedule_once(mca_pml_ob1_send_request_t* sendreq) range->range_btls[btl_idx].length -= size; range->range_send_length -= size; range->range_send_offset += size; - OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, 1); + OPAL_THREAD_ADD_FETCH32(&sendreq->req_pipeline_depth, 1); if(range->range_send_length == 0) { range = get_next_send_range(sendreq, range); prev_bytes_remaining = 0; @@ -1126,7 +1126,7 @@ static void mca_pml_ob1_put_completion (mca_btl_base_module_t* btl, struct mca_b 0, 0); /* check for request completion */ - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length); + OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length); send_request_pml_complete_check(sendreq); } else { @@ -1200,7 +1200,7 @@ void mca_pml_ob1_send_request_put( mca_pml_ob1_send_request_t* sendreq, mca_pml_ob1_rdma_frag_t* frag; if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_TYPE_ACK) { - OPAL_THREAD_ADD32(&sendreq->req_state, -1); + OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1); } sendreq->req_recv.pval = hdr->hdr_recv_req.pval; diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.h b/ompi/mca/pml/ob1/pml_ob1_sendreq.h index 5cb21f6aba6..be36c3f2ac4 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.h @@ -76,12 +76,12 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_range_t); static inline bool lock_send_request(mca_pml_ob1_send_request_t *sendreq) { - return OPAL_THREAD_ADD32(&sendreq->req_lock, 1) == 1; + return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, 1) == 1; } static inline bool unlock_send_request(mca_pml_ob1_send_request_t *sendreq) { - return OPAL_THREAD_ADD32(&sendreq->req_lock, -1) == 0; + return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, -1) == 0; } static inline void @@ -485,7 +485,7 @@ mca_pml_ob1_send_request_start( mca_pml_ob1_send_request_t* sendreq ) return OMPI_ERR_UNREACH; } - seqn = OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1); + seqn = OPAL_THREAD_ADD_FETCH32(&ob1_proc->send_sequence, 1); return mca_pml_ob1_send_request_start_seq (sendreq, endpoint, seqn); } diff --git a/ompi/request/req_wait.c b/ompi/request/req_wait.c index 233d1dd30d0..e4d4d5e68a6 100644 --- a/ompi/request/req_wait.c +++ b/ompi/request/req_wait.c @@ -13,7 +13,7 @@ * Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science @@ -100,6 +100,8 @@ int ompi_request_default_wait_any(size_t count, num_requests_null_inactive = 0; for (i = 0; i < count; i++) { + void *_tmp_ptr = REQUEST_PENDING; + request = requests[i]; /* Check for null or completed persistent request. For @@ -110,7 +112,7 @@ int ompi_request_default_wait_any(size_t count, continue; } - if( !OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync) ) { + if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync) ) { assert(REQUEST_COMPLETE(request)); completed = i; *index = i; @@ -136,6 +138,8 @@ int ompi_request_default_wait_any(size_t count, * user. */ for(i = completed-1; (i+1) > 0; i--) { + void *tmp_ptr = &sync; + request = requests[i]; if( request->req_state == OMPI_REQUEST_INACTIVE ) { @@ -146,7 +150,7 @@ int ompi_request_default_wait_any(size_t count, * Otherwise, the request has been completed meanwhile, and it * has been atomically marked as REQUEST_COMPLETE. */ - if( !OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) { + if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &tmp_ptr, REQUEST_PENDING) ) { *index = i; } } @@ -211,6 +215,8 @@ int ompi_request_default_wait_all( size_t count, WAIT_SYNC_INIT(&sync, count); rptr = requests; for (i = 0; i < count; i++) { + void *_tmp_ptr = REQUEST_PENDING; + request = *rptr++; if( request->req_state == OMPI_REQUEST_INACTIVE ) { @@ -218,7 +224,7 @@ int ompi_request_default_wait_all( size_t count, continue; } - if (!OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync)) { + if (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync)) { if( OPAL_UNLIKELY( MPI_SUCCESS != request->req_status.MPI_ERROR ) ) { failed++; } @@ -246,6 +252,8 @@ int ompi_request_default_wait_all( size_t count, if (MPI_STATUSES_IGNORE != statuses) { /* fill out status and free request if required */ for( i = 0; i < count; i++, rptr++ ) { + void *_tmp_ptr = &sync; + request = *rptr; if( request->req_state == OMPI_REQUEST_INACTIVE ) { @@ -260,7 +268,7 @@ int ompi_request_default_wait_all( size_t count, * mark the request as pending then it is neither failed nor complete, and * we must stop altering it. */ - if( OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING ) ) { + if( OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_PENDING ) ) { /* * Per MPI 2.2 p 60: * Allows requests to be marked as MPI_ERR_PENDING if they are @@ -306,6 +314,8 @@ int ompi_request_default_wait_all( size_t count, int rc; /* free request if required */ for( i = 0; i < count; i++, rptr++ ) { + void *_tmp_ptr = &sync; + request = *rptr; if( request->req_state == OMPI_REQUEST_INACTIVE ) { @@ -320,7 +330,7 @@ int ompi_request_default_wait_all( size_t count, /* If the request is still pending due to a failed request * then skip it in this loop. */ - if( OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING ) ) { + if( OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_PENDING ) ) { /* * Per MPI 2.2 p 60: * Allows requests to be marked as MPI_ERR_PENDING if they are @@ -398,6 +408,8 @@ int ompi_request_default_wait_some(size_t count, num_requests_null_inactive = 0; num_requests_done = 0; for (size_t i = 0; i < count; i++, rptr++) { + void *_tmp_ptr = REQUEST_PENDING; + request = *rptr; /* * Check for null or completed persistent request. @@ -407,7 +419,7 @@ int ompi_request_default_wait_some(size_t count, num_requests_null_inactive++; continue; } - indices[i] = OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync); + indices[i] = OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync); if( !indices[i] ) { /* If the request is completed go ahead and mark it as such */ assert( REQUEST_COMPLETE(request) ); @@ -434,6 +446,8 @@ int ompi_request_default_wait_some(size_t count, rptr = requests; num_requests_done = 0; for (size_t i = 0; i < count; i++, rptr++) { + void *_tmp_ptr = &sync; + request = *rptr; if( request->req_state == OMPI_REQUEST_INACTIVE ) { @@ -454,7 +468,7 @@ int ompi_request_default_wait_some(size_t count, */ if( !indices[i] ){ indices[num_requests_done++] = i; - } else if( !OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) { + } else if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_PENDING) ) { indices[num_requests_done++] = i; } } diff --git a/ompi/request/request.h b/ompi/request/request.h index 8f472c1f5cd..5a1c02c4b65 100644 --- a/ompi/request/request.h +++ b/ompi/request/request.h @@ -13,7 +13,7 @@ * Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -396,10 +396,12 @@ static inline int ompi_request_free(ompi_request_t** request) static inline void ompi_request_wait_completion(ompi_request_t *req) { if (opal_using_threads () && !REQUEST_COMPLETE(req)) { + void *_tmp_ptr = REQUEST_PENDING; ompi_wait_sync_t sync; + WAIT_SYNC_INIT(&sync, 1); - if (OPAL_ATOMIC_BOOL_CMPSET_PTR(&req->req_complete, REQUEST_PENDING, &sync)) { + if (OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&req->req_complete, &_tmp_ptr, &sync)) { SYNC_WAIT(&sync); } else { /* completed before we had a chance to swap in the sync object */ @@ -439,7 +441,9 @@ static inline int ompi_request_complete(ompi_request_t* request, bool with_signa if (0 == rc) { if( OPAL_LIKELY(with_signal) ) { - if(!OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, REQUEST_COMPLETED)) { + void *_tmp_ptr = REQUEST_PENDING; + + if(!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_COMPLETED)) { ompi_wait_sync_t *tmp_sync = (ompi_wait_sync_t *) OPAL_ATOMIC_SWAP_PTR(&request->req_complete, REQUEST_COMPLETED); /* In the case where another thread concurrently changed the request to REQUEST_PENDING */ diff --git a/opal/class/opal_fifo.h b/opal/class/opal_fifo.h index 67aa479deb5..ad67c77a6ff 100644 --- a/opal/class/opal_fifo.h +++ b/opal/class/opal_fifo.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. - * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * reseved. * $COPYRIGHT$ * @@ -76,7 +76,7 @@ static inline bool opal_fifo_is_empty( opal_fifo_t* fifo ) return opal_fifo_head (fifo) == &fifo->opal_fifo_ghost; } -#if OPAL_HAVE_ATOMIC_CMPSET_128 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 /* Add one element to the FIFO. We will return the last head of the list * to allow the upper level to detect if this element is the first one in the @@ -85,14 +85,12 @@ static inline bool opal_fifo_is_empty( opal_fifo_t* fifo ) static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, opal_list_item_t *item) { - opal_counted_pointer_t tail; + opal_counted_pointer_t tail = {.value = fifo->opal_fifo_tail.value}; item->opal_list_next = &fifo->opal_fifo_ghost; do { - tail.value = fifo->opal_fifo_tail.value; - - if (opal_update_counted_pointer (&fifo->opal_fifo_tail, tail, item)) { + if (opal_update_counted_pointer (&fifo->opal_fifo_tail, &tail, item)) { break; } } while (1); @@ -102,7 +100,7 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, if (&fifo->opal_fifo_ghost == tail.data.item) { /* update the head */ opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value}; - opal_update_counted_pointer (&fifo->opal_fifo_head, head, item); + opal_update_counted_pointer (&fifo->opal_fifo_head, &head, item); } else { /* update previous item */ tail.data.item->opal_list_next = item; @@ -116,29 +114,28 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, */ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) { - opal_list_item_t *item, *next; - opal_counted_pointer_t head, tail; + opal_list_item_t *item, *next, *ghost = &fifo->opal_fifo_ghost; + opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value}, tail; do { - head.value = fifo->opal_fifo_head.value; tail.value = fifo->opal_fifo_tail.value; opal_atomic_rmb (); item = (opal_list_item_t *) head.data.item; next = (opal_list_item_t *) item->opal_list_next; - if (&fifo->opal_fifo_ghost == tail.data.item && &fifo->opal_fifo_ghost == item) { + if (ghost == tail.data.item && ghost == item) { return NULL; } /* the head or next pointer are in an inconsistent state. keep looping. */ - if (tail.data.item != item && &fifo->opal_fifo_ghost != tail.data.item && - &fifo->opal_fifo_ghost == next) { + if (tail.data.item != item && ghost != tail.data.item && ghost == next) { + head.value = fifo->opal_fifo_head.value; continue; } /* try popping the head */ - if (opal_update_counted_pointer (&fifo->opal_fifo_head, head, next)) { + if (opal_update_counted_pointer (&fifo->opal_fifo_head, &head, next)) { break; } } while (1); @@ -146,14 +143,14 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) opal_atomic_wmb (); /* check for tail and head consistency */ - if (&fifo->opal_fifo_ghost == next) { + if (ghost == next) { /* the head was just set to &fifo->opal_fifo_ghost. try to update the tail as well */ - if (!opal_update_counted_pointer (&fifo->opal_fifo_tail, tail, &fifo->opal_fifo_ghost)) { + if (!opal_update_counted_pointer (&fifo->opal_fifo_tail, &tail, ghost)) { /* tail was changed by a push operation. wait for the item's next pointer to be se then * update the head */ /* wait for next pointer to be updated by push */ - while (&fifo->opal_fifo_ghost == item->opal_list_next) { + while (ghost == item->opal_list_next) { opal_atomic_rmb (); } @@ -166,7 +163,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) head.value = fifo->opal_fifo_head.value; next = (opal_list_item_t *) item->opal_list_next; - assert (&fifo->opal_fifo_ghost == head.data.item); + assert (ghost == head.data.item); fifo->opal_fifo_head.data.item = next; opal_atomic_wmb (); @@ -215,14 +212,14 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, */ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) { - opal_list_item_t *item, *next; + opal_list_item_t *item, *next, *ghost = &fifo->opal_fifo_ghost; #if OPAL_HAVE_ATOMIC_LLSC_PTR /* use load-linked store-conditional to avoid ABA issues */ do { item = opal_atomic_ll_ptr (&fifo->opal_fifo_head.data.item); - if (&fifo->opal_fifo_ghost == item) { - if (&fifo->opal_fifo_ghost == fifo->opal_fifo_tail.data.item) { + if (ghost == item) { + if (ghost == fifo->opal_fifo_tail.data.item) { return NULL; } @@ -239,7 +236,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) #else /* protect against ABA issues by "locking" the head */ do { - if (opal_atomic_bool_cmpset_32 ((int32_t *) &fifo->opal_fifo_head.data.counter, 0, 1)) { + if (!opal_atomic_swap_32 ((volatile int32_t *) &fifo->opal_fifo_head.data.counter, 1)) { break; } @@ -249,7 +246,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) opal_atomic_wmb(); item = opal_fifo_head (fifo); - if (&fifo->opal_fifo_ghost == item) { + if (ghost == item) { fifo->opal_fifo_head.data.counter = 0; return NULL; } @@ -258,9 +255,11 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) fifo->opal_fifo_head.data.item = next; #endif - if (&fifo->opal_fifo_ghost == next) { - if (!opal_atomic_bool_cmpset_ptr (&fifo->opal_fifo_tail.data.item, item, &fifo->opal_fifo_ghost)) { - while (&fifo->opal_fifo_ghost == item->opal_list_next) { + if (ghost == next) { + void *tmp = item; + + if (!opal_atomic_compare_exchange_strong_ptr (&fifo->opal_fifo_tail.data.item, &tmp, ghost)) { + while (ghost == item->opal_list_next) { opal_atomic_rmb (); } diff --git a/opal/class/opal_lifo.h b/opal/class/opal_lifo.h index 73caf32cb0c..e5a3f9110cb 100644 --- a/opal/class/opal_lifo.h +++ b/opal/class/opal_lifo.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * reseved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -36,8 +36,8 @@ BEGIN_C_DECLS /* NTH: temporarily suppress warnings about this not being defined */ -#if !defined(OPAL_HAVE_ATOMIC_CMPSET_128) -#define OPAL_HAVE_ATOMIC_CMPSET_128 0 +#if !defined(OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128) +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0 #endif /** @@ -50,7 +50,7 @@ union opal_counted_pointer_t { /** list item pointer */ volatile opal_list_item_t * volatile item; } data; -#if OPAL_HAVE_ATOMIC_CMPSET_128 && HAVE_OPAL_INT128_T +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && HAVE_OPAL_INT128_T /** used for atomics when there is a cmpset that can operate on * two 64-bit values */ opal_int128_t value; @@ -59,19 +59,19 @@ union opal_counted_pointer_t { typedef union opal_counted_pointer_t opal_counted_pointer_t; -#if OPAL_HAVE_ATOMIC_CMPSET_128 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 /* Add one element to the FIFO. We will return the last head of the list * to allow the upper level to detect if this element is the first one in the * list (if the list was empty before this operation). */ -static inline bool opal_update_counted_pointer (volatile opal_counted_pointer_t *addr, opal_counted_pointer_t old, +static inline bool opal_update_counted_pointer (volatile opal_counted_pointer_t *addr, opal_counted_pointer_t *old, opal_list_item_t *item) { opal_counted_pointer_t new_p; new_p.data.item = item; - new_p.data.counter = old.data.counter + 1; - return opal_atomic_bool_cmpset_128 (&addr->value, old.value, new_p.value); + new_p.data.counter = old->data.counter + 1; + return opal_atomic_compare_exchange_strong_128 (&addr->value, &old->value, new_p.value); } #endif @@ -110,7 +110,7 @@ static inline bool opal_lifo_is_empty( opal_lifo_t* lifo ) } -#if OPAL_HAVE_ATOMIC_CMPSET_128 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 /* Add one element to the LIFO. We will return the last head of the list * to allow the upper level to detect if this element is the first one in the @@ -119,14 +119,14 @@ static inline bool opal_lifo_is_empty( opal_lifo_t* lifo ) static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo, opal_list_item_t *item) { - do { - opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item; + opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item; + do { item->opal_list_next = next; opal_atomic_wmb (); /* to protect against ABA issues it is sufficient to only update the counter in pop */ - if (opal_atomic_bool_cmpset_ptr (&lifo->opal_lifo_head.data.item, next, item)) { + if (opal_atomic_compare_exchange_strong_ptr (&lifo->opal_lifo_head.data.item, &next, item)) { return next; } /* DO some kind of pause to release the bus */ @@ -141,17 +141,17 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) opal_counted_pointer_t old_head; opal_list_item_t *item; - do { - - old_head.data.counter = lifo->opal_lifo_head.data.counter; - opal_atomic_rmb (); - old_head.data.item = item = (opal_list_item_t*)lifo->opal_lifo_head.data.item; + old_head.data.counter = lifo->opal_lifo_head.data.counter; + opal_atomic_rmb (); + old_head.data.item = (opal_list_item_t *) lifo->opal_lifo_head.data.item; + do { + item = (opal_list_item_t *) old_head.data.item; if (item == &lifo->opal_lifo_ghost) { return NULL; } - if (opal_update_counted_pointer (&lifo->opal_lifo_head, old_head, + if (opal_update_counted_pointer (&lifo->opal_lifo_head, &old_head, (opal_list_item_t *) item->opal_list_next)) { opal_atomic_wmb (); item->opal_list_next = NULL; @@ -169,13 +169,15 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo, opal_list_item_t *item) { + opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item; + /* item free acts as a mini lock to avoid ABA problems */ item->item_free = 1; + do { - opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item; item->opal_list_next = next; opal_atomic_wmb(); - if (opal_atomic_bool_cmpset_ptr (&lifo->opal_lifo_head.data.item, next, item)) { + if (opal_atomic_compare_exchange_strong_ptr (&lifo->opal_lifo_head.data.item, &next, item)) { opal_atomic_wmb (); /* now safe to pop this item */ item->item_free = 0; @@ -236,8 +238,11 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) */ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) { - opal_list_item_t *item; - while ((item = (opal_list_item_t *) lifo->opal_lifo_head.data.item) != &lifo->opal_lifo_ghost) { + opal_list_item_t *item, *head, *ghost = &lifo->opal_lifo_ghost; + + item = (opal_list_item_t *) lifo->opal_lifo_head.data.item; + + while (item != ghost) { /* ensure it is safe to pop the head */ if (opal_atomic_swap_32((volatile int32_t *) &item->item_free, 1)) { continue; @@ -245,14 +250,16 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) opal_atomic_wmb (); + head = item; /* try to swap out the head pointer */ - if (opal_atomic_bool_cmpset_ptr (&lifo->opal_lifo_head.data.item, item, - (void *) item->opal_list_next)) { + if (opal_atomic_compare_exchange_strong_ptr (&lifo->opal_lifo_head.data.item, &head, + (void *) item->opal_list_next)) { break; } /* NTH: don't need another atomic here */ item->item_free = 0; + item = head; /* Do some kind of pause to release the bus */ } diff --git a/opal/class/opal_list.c b/opal/class/opal_list.c index e0a5112c38a..87cb1192b1b 100644 --- a/opal/class/opal_list.c +++ b/opal/class/opal_list.c @@ -144,7 +144,7 @@ bool opal_list_insert(opal_list_t *list, opal_list_item_t *item, long long idx) /* Spot check: ensure this item is only on the list that we just insertted it into */ - (void)opal_atomic_add( &(item->opal_list_item_refcount), 1 ); + opal_atomic_add ( &(item->opal_list_item_refcount), 1 ); assert(1 == item->opal_list_item_refcount); item->opal_list_item_belong_to = list; #endif diff --git a/opal/class/opal_list.h b/opal/class/opal_list.h index cafc96dfb78..5edd6730d54 100644 --- a/opal/class/opal_list.h +++ b/opal/class/opal_list.h @@ -509,7 +509,7 @@ static inline opal_list_item_t *opal_list_remove_item #if OPAL_ENABLE_DEBUG /* Spot check: ensure that this item is still only on one list */ - OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), -1 ); + OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), -1 ); assert(0 == item->opal_list_item_refcount); item->opal_list_item_belong_to = NULL; #endif @@ -575,7 +575,7 @@ static inline void _opal_list_append(opal_list_t *list, opal_list_item_t *item /* Spot check: ensure this item is only on the list that we just appended it to */ - OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), 1 ); + OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), 1 ); assert(1 == item->opal_list_item_refcount); item->opal_list_item_belong_to = list; #endif @@ -625,7 +625,7 @@ static inline void opal_list_prepend(opal_list_t *list, /* Spot check: ensure this item is only on the list that we just prepended it to */ - OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), 1 ); + OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), 1 ); assert(1 == item->opal_list_item_refcount); item->opal_list_item_belong_to = list; #endif @@ -686,7 +686,7 @@ static inline opal_list_item_t *opal_list_remove_first(opal_list_t *list) /* Spot check: ensure that the item we're returning is now on no lists */ - OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), -1 ); + OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), -1 ); assert(0 == item->opal_list_item_refcount); #endif @@ -746,7 +746,7 @@ static inline opal_list_item_t *opal_list_remove_last(opal_list_t *list) /* Spot check: ensure that the item we're returning is now on no lists */ - OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), -1 ); + OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), -1 ); assert(0 == item->opal_list_item_refcount); item->opal_list_item_belong_to = NULL; #endif @@ -789,7 +789,7 @@ static inline void opal_list_insert_pos(opal_list_t *list, opal_list_item_t *pos /* Spot check: double check that this item is only on the list that we just added it to */ - OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), 1 ); + OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), 1 ); assert(1 == item->opal_list_item_refcount); item->opal_list_item_belong_to = list; #endif diff --git a/opal/class/opal_object.h b/opal/class/opal_object.h index 8539f2bf872..4e2da95c204 100644 --- a/opal/class/opal_object.h +++ b/opal/class/opal_object.h @@ -510,7 +510,7 @@ static inline opal_object_t *opal_obj_new(opal_class_t * cls) static inline int opal_obj_update(opal_object_t *object, int inc) __opal_attribute_always_inline__; static inline int opal_obj_update(opal_object_t *object, int inc) { - return OPAL_THREAD_ADD32(&object->obj_reference_count, inc); + return OPAL_THREAD_ADD_FETCH32(&object->obj_reference_count, inc); } END_C_DECLS diff --git a/opal/class/opal_tree.c b/opal/class/opal_tree.c index fdd41ea20a1..d56813f1dd3 100644 --- a/opal/class/opal_tree.c +++ b/opal/class/opal_tree.c @@ -210,7 +210,7 @@ void opal_tree_add_child(opal_tree_item_t *parent_item, /* Spot check: ensure this item is only on the list that we just appended it to */ - OPAL_THREAD_ADD32( &(new_item->opal_tree_item_refcount), 1 ); + OPAL_THREAD_ADD_FETCH32( &(new_item->opal_tree_item_refcount), 1 ); assert(1 == new_item->opal_tree_item_refcount); new_item->opal_tree_item_belong_to = new_item->opal_tree_container; #endif diff --git a/opal/include/opal/sys/arm/atomic.h b/opal/include/opal/sys/arm/atomic.h index fa3b35d18b2..6d4db3ad7a4 100644 --- a/opal/include/opal/sys/arm/atomic.h +++ b/opal/include/opal/sys/arm/atomic.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +12,8 @@ * All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2010 ARM ltd. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -104,12 +107,12 @@ void opal_atomic_isync(void) #if (OPAL_GCC_INLINE_ASSEMBLY && (OPAL_ASM_ARM_VERSION >= 6)) -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_MATH_32 1 -static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - int32_t ret, tmp; + int32_t prev, tmp; + bool ret; __asm__ __volatile__ ( "1: ldrex %0, [%2] \n" @@ -120,11 +123,13 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, " bne 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } /* these two functions aren't inlined in the non-gcc case because then @@ -132,51 +137,50 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { bool rc; - rc = opal_atomic_bool_cmpset_32(addr, oldval, newval); + rc = opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); opal_atomic_rmb(); return rc; } -static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { opal_atomic_wmb(); - return opal_atomic_bool_cmpset_32(addr, oldval, newval); + return opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); } #if (OPAL_ASM_SUPPORT_64BIT == 1) -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 -static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - int64_t ret; - int tmp; - - - __asm__ __volatile__ ( - "1: ldrexd %0, %H0, [%2] \n" - " cmp %0, %3 \n" - " it eq \n" - " cmpeq %H0, %H3 \n" - " bne 2f \n" - " strexd %1, %4, %H4, [%2] \n" - " cmp %1, #0 \n" - " bne 1b \n" - "2: \n" - - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) - : "cc", "memory"); - - return (ret == oldval); + int64_t prev; + int tmp; + bool ret; + + __asm__ __volatile__ ( + "1: ldrexd %0, %H0, [%2] \n" + " cmp %0, %3 \n" + " it eq \n" + " cmpeq %H0, %H3 \n" + " bne 2f \n" + " strexd %1, %4, %H4, [%2] \n" + " cmp %1, #0 \n" + " bne 1b \n" + "2: \n" + + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) + : "cc", "memory"); + + ret = (prev == *oldval); + *oldval = prev; + return ret; } /* these two functions aren't inlined in the non-gcc case because then @@ -184,91 +188,65 @@ static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { bool rc; - rc = opal_atomic_bool_cmpset_64(addr, oldval, newval); + rc = opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); opal_atomic_rmb(); return rc; } -static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { opal_atomic_wmb(); - return opal_atomic_bool_cmpset_64(addr, oldval, newval); + return opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); } #endif #define OPAL_HAVE_ATOMIC_ADD_32 1 -static inline int32_t opal_atomic_add_32(volatile int32_t* v, int inc) +static inline int32_t opal_atomic_fetch_add_32(volatile int32_t* v, int inc) { - int32_t t; - int tmp; - - __asm__ __volatile__( - "1: ldrex %0, [%2] \n" - " add %0, %0, %3 \n" - " strex %1, %0, [%2] \n" - " cmp %1, #0 \n" + int32_t t, old; + int tmp; + + __asm__ __volatile__( + "1: ldrex %1, [%3] \n" + " add %0, %1, %4 \n" + " strex %2, %0, [%3] \n" + " cmp %2, #0 \n" " bne 1b \n" - : "=&r" (t), "=&r" (tmp) + : "=&r" (t), "=&r" (old), "=&r" (tmp) : "r" (v), "r" (inc) : "cc", "memory"); - return t; + return old; } #define OPAL_HAVE_ATOMIC_SUB_32 1 -static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int dec) +static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t* v, int dec) { - int32_t t; - int tmp; - - __asm__ __volatile__( - "1: ldrex %0, [%2] \n" - " sub %0, %0, %3 \n" - " strex %1, %0, [%2] \n" - " cmp %1, #0 \n" + int32_t t, old; + int tmp; + + __asm__ __volatile__( + "1: ldrex %1, [%3] \n" + " sub %0, %1, %4 \n" + " strex %2, %0, [%3] \n" + " cmp %2, #0 \n" " bne 1b \n" - : "=&r" (t), "=&r" (tmp) + : "=&r" (t), "=&r" (old), "=&r" (tmp) : "r" (v), "r" (dec) : "cc", "memory"); - return t; -} - -#else /* OPAL_ASM_ARM_VERSION <=5 or no GCC inline assembly */ - -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 -#define __kuser_cmpxchg (*((int (*)(int, int, volatile int*))(0xffff0fc0))) -static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) -{ - return !(__kuser_cmpxchg(oldval, newval, addr)); -} - -static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) -{ - /* kernel function includes all necessary memory barriers */ - return opal_atomic_bool_cmpset_32(addr, oldval, newval); -} - -static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) -{ - /* kernel function includes all necessary memory barriers */ - return opal_atomic_bool_cmpset_32(addr, oldval, newval); + return t; } #endif diff --git a/opal/include/opal/sys/arm64/atomic.h b/opal/include/opal/sys/arm64/atomic.h index c95c3cdc6ad..fd5a773a4f4 100644 --- a/opal/include/opal/sys/arm64/atomic.h +++ b/opal/include/opal/sys/arm64/atomic.h @@ -29,10 +29,10 @@ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 #define OPAL_HAVE_ATOMIC_LLSC_32 1 -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_SWAP_32 1 #define OPAL_HAVE_ATOMIC_MATH_32 1 -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 #define OPAL_HAVE_ATOMIC_SWAP_64 1 #define OPAL_HAVE_ATOMIC_LLSC_64 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 @@ -82,10 +82,10 @@ static inline void opal_atomic_isync (void) * *********************************************************************/ -static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - int32_t ret, tmp; + int32_t prev, tmp; + bool ret; __asm__ __volatile__ ("1: ldaxr %w0, [%2] \n" " cmp %w0, %w3 \n" @@ -93,11 +93,13 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, " stxr %w1, %w4, [%2] \n" " cbnz %w1, 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval) @@ -119,10 +121,10 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - int32_t ret, tmp; + int32_t prev, tmp; + bool ret; __asm__ __volatile__ ("1: ldaxr %w0, [%2] \n" " cmp %w0, %w3 \n" @@ -130,18 +132,20 @@ static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, " stxr %w1, %w4, [%2] \n" " cbnz %w1, 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } -static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - int32_t ret, tmp; + int32_t prev, tmp; + bool ret; __asm__ __volatile__ ("1: ldxr %w0, [%2] \n" " cmp %w0, %w3 \n" @@ -149,11 +153,13 @@ static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, " stlxr %w1, %w4, [%2] \n" " cbnz %w1, 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr) @@ -179,11 +185,11 @@ static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval) return ret == 0; } -static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - int64_t ret; + int64_t prev; int tmp; + bool ret; __asm__ __volatile__ ("1: ldaxr %0, [%2] \n" " cmp %0, %3 \n" @@ -191,11 +197,13 @@ static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, " stxr %w1, %4, [%2] \n" " cbnz %w1, 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == oldval); + *oldval = prev; + return ret; } static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newval) @@ -218,11 +226,11 @@ static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newva atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - int64_t ret; + int64_t prev; int tmp; + bool ret; __asm__ __volatile__ ("1: ldaxr %0, [%2] \n" " cmp %0, %3 \n" @@ -230,19 +238,21 @@ static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, " stxr %w1, %4, [%2] \n" " cbnz %w1, 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == oldval); + *oldval = prev; + return ret; } -static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - int64_t ret; + int64_t prev; int tmp; + bool ret; __asm__ __volatile__ ("1: ldxr %0, [%2] \n" " cmp %0, %3 \n" @@ -250,11 +260,13 @@ static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, " stlxr %w1, %4, [%2] \n" " cbnz %w1, 1b \n" "2: \n" - : "=&r" (ret), "=&r" (tmp) - : "r" (addr), "r" (oldval), "r" (newval) + : "=&r" (prev), "=&r" (tmp) + : "r" (addr), "r" (*oldval), "r" (newval) : "cc", "memory"); - return (ret == oldval); + ret = (prev == oldval); + *oldval = prev; + return ret; } static inline int64_t opal_atomic_ll_64 (volatile int64_t *addr) @@ -281,20 +293,20 @@ static inline int opal_atomic_sc_64 (volatile int64_t *addr, int64_t newval) } #define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \ - static inline type opal_atomic_ ## name ## _ ## bits (volatile type *addr, type value) \ + static inline type opal_atomic_fetch_ ## name ## _ ## bits (volatile type *addr, type value) \ { \ - type newval; \ + type newval, old; \ int32_t tmp; \ \ - __asm__ __volatile__("1: ldxr %" reg "0, [%2] \n" \ - " " inst " %" reg "0, %" reg "0, %" reg "3 \n" \ - " stxr %w1, %" reg "0, [%2] \n" \ - " cbnz %w1, 1b \n" \ - : "=&r" (newval), "=&r" (tmp) \ + __asm__ __volatile__("1: ldxr %" reg "1, [%3] \n" \ + " " inst " %" reg "0, %" reg "1, %" reg "4 \n" \ + " stxr %w2, %" reg "0, [%3] \n" \ + " cbnz %w2, 1b \n" \ + : "=&r" (newval), "=&r" (old), "=&r" (tmp) \ : "r" (addr), "r" (value) \ : "cc", "memory"); \ \ - return newval; \ + return old; \ } OPAL_ASM_MAKE_ATOMIC(int32_t, 32, add, "add", "w") diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index 961ebac0114..53e34333d8c 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -40,11 +40,11 @@ * * - \c OPAL_HAVE_ATOMIC_MEM_BARRIER atomic memory barriers * - \c OPAL_HAVE_ATOMIC_SPINLOCKS atomic spinlocks - * - \c OPAL_HAVE_ATOMIC_MATH_32 if 32 bit add/sub/cmpset can be done "atomicly" - * - \c OPAL_HAVE_ATOMIC_MATH_64 if 64 bit add/sub/cmpset can be done "atomicly" + * - \c OPAL_HAVE_ATOMIC_MATH_32 if 32 bit add/sub/compare-exchange can be done "atomicly" + * - \c OPAL_HAVE_ATOMIC_MATH_64 if 64 bit add/sub/compare-exchange can be done "atomicly" * * Note that for the Atomic math, atomic add/sub may be implemented as - * C code using opal_atomic_bool_cmpset. The appearance of atomic + * C code using opal_atomic_compare_exchange. The appearance of atomic * operation will be upheld in these cases. */ @@ -107,8 +107,8 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t; *********************************************************************/ #if !OPAL_GCC_INLINE_ASSEMBLY #define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 0 -#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 0 -#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 0 +#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 0 +#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 0 #define OPAL_HAVE_INLINE_ATOMIC_ADD_32 0 #define OPAL_HAVE_INLINE_ATOMIC_AND_32 0 #define OPAL_HAVE_INLINE_ATOMIC_OR_32 0 @@ -123,8 +123,8 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t; #define OPAL_HAVE_INLINE_ATOMIC_SWAP_64 0 #else #define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 1 -#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 1 -#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 1 +#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 1 #define OPAL_HAVE_INLINE_ATOMIC_ADD_32 1 #define OPAL_HAVE_INLINE_ATOMIC_AND_32 1 #define OPAL_HAVE_INLINE_ATOMIC_OR_32 1 @@ -187,14 +187,14 @@ enum { /* compare and set operations can't really be emulated from software, so if these defines aren't already set, they should be set to 0 now */ -#ifndef OPAL_HAVE_ATOMIC_CMPSET_32 -#define OPAL_HAVE_ATOMIC_CMPSET_32 0 +#ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 0 #endif -#ifndef OPAL_HAVE_ATOMIC_CMPSET_64 -#define OPAL_HAVE_ATOMIC_CMPSET_64 0 +#ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 0 #endif -#ifndef OPAL_HAVE_ATOMIC_CMPSET_128 -#define OPAL_HAVE_ATOMIC_CMPSET_128 0 +#ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0 #endif #ifndef OPAL_HAVE_ATOMIC_LLSC_32 #define OPAL_HAVE_ATOMIC_LLSC_32 0 @@ -270,7 +270,7 @@ void opal_atomic_wmb(void); /********************************************************************** * - * Atomic spinlocks - always inlined, if have atomic cmpset + * Atomic spinlocks - always inlined, if have atomic compare-and-swap * *********************************************************************/ @@ -280,7 +280,7 @@ void opal_atomic_wmb(void); #define OPAL_HAVE_ATOMIC_SPINLOCKS 0 #endif -#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_SPINLOCKS || (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) +#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_SPINLOCKS || (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) /** * Initialize a lock to value @@ -330,7 +330,7 @@ void opal_atomic_unlock(opal_atomic_lock_t *lock); #if OPAL_HAVE_ATOMIC_SPINLOCKS == 0 #undef OPAL_HAVE_ATOMIC_SPINLOCKS -#define OPAL_HAVE_ATOMIC_SPINLOCKS (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) +#define OPAL_HAVE_ATOMIC_SPINLOCKS (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) #define OPAL_NEED_INLINE_ATOMIC_SPINLOCKS 1 #endif @@ -347,48 +347,48 @@ void opal_atomic_unlock(opal_atomic_lock_t *lock); #endif #if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_CMPSET_32 -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 +#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 static inline #endif -bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, int32_t oldval, - int32_t newval); +bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, + int32_t newval); -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 +#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 static inline #endif -bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, int32_t oldval, - int32_t newval); +bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, + int32_t newval); -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 +#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 static inline #endif -bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, int32_t oldval, - int32_t newval); +bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, + int32_t newval); #endif -#if !defined(OPAL_HAVE_ATOMIC_CMPSET_64) && !defined(DOXYGEN) -#define OPAL_HAVE_ATOMIC_CMPSET_64 0 +#if !defined(OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) && !defined(DOXYGEN) +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 0 #endif -#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_CMPSET_64 +#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 +#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 static inline #endif -bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, int64_t oldval, - int64_t newval); +bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, + int64_t newval); -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 +#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 static inline #endif -bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, int64_t oldval, - int64_t newval); +bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, + int64_t newval); -#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 +#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 static inline #endif -bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, int64_t oldval, - int64_t newval); +bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, + int64_t newval); #endif @@ -397,45 +397,25 @@ bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, int64_t oldval, #define OPAL_HAVE_ATOMIC_MATH_32 0 #endif -#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_CMPSET_32 +#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 -/* OPAL_HAVE_INLINE_ATOMIC_*_32 will be 1 if /atomic.h provides - a static inline version of it (in assembly). If we have to fall - back on cmpset 32, that too will be inline. */ -#if OPAL_HAVE_INLINE_ATOMIC_ADD_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_CMPSET_32) -static inline -#endif -int32_t opal_atomic_add_32(volatile int32_t *addr, int delta); - -#if OPAL_HAVE_INLINE_ATOMIC_AND_32 || (!defined(OPAL_HAVE_ATOMIC_AND_32) && OPAL_HAVE_ATOMIC_CMPSET_32) -static inline -#endif -int32_t opal_atomic_and_32(volatile int32_t *addr, int32_t value); - -#if OPAL_HAVE_INLINE_ATOMIC_OR_32 || (!defined(OPAL_HAVE_ATOMIC_OR_32) && OPAL_HAVE_ATOMIC_CMPSET_32) -static inline -#endif -int32_t opal_atomic_or_32(volatile int32_t *addr, int32_t value); - -#if OPAL_HAVE_INLINE_ATOMIC_XOR_32 || (!defined(OPAL_HAVE_ATOMIC_XOR_32) && OPAL_HAVE_ATOMIC_CMPSET_32) -static inline -#endif -int32_t opal_atomic_xor_32(volatile int32_t *addr, int32_t value); - -/* OPAL_HAVE_INLINE_ATOMIC_*_32 will be 1 if /atomic.h provides - a static inline version of it (in assembly). If we have to fall - back to cmpset 32, that too will be inline. */ -#if OPAL_HAVE_INLINE_ATOMIC_SUB_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_CMPSET_32) -static inline -#endif -int32_t opal_atomic_sub_32(volatile int32_t *addr, int delta); +static inline int32_t opal_atomic_add_fetch_32(volatile int32_t *addr, int delta); +static inline int32_t opal_atomic_fetch_add_32(volatile int32_t *addr, int delta); +static inline int32_t opal_atomic_and_fetch_32(volatile int32_t *addr, int32_t value); +static inline int32_t opal_atomic_fetch_and_32(volatile int32_t *addr, int32_t value); +static inline int32_t opal_atomic_or_fetch_32(volatile int32_t *addr, int32_t value); +static inline int32_t opal_atomic_fetch_or_32(volatile int32_t *addr, int32_t value); +static inline int32_t opal_atomic_xor_fetch_32(volatile int32_t *addr, int32_t value); +static inline int32_t opal_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value); +static inline int32_t opal_atomic_sub_fetch_32(volatile int32_t *addr, int delta); +static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t *addr, int delta); #endif /* OPAL_HAVE_ATOMIC_MATH_32 */ #if ! OPAL_HAVE_ATOMIC_MATH_32 /* fix up the value of opal_have_atomic_math_32 to allow for C versions */ #undef OPAL_HAVE_ATOMIC_MATH_32 -#define OPAL_HAVE_ATOMIC_MATH_32 OPAL_HAVE_ATOMIC_CMPSET_32 +#define OPAL_HAVE_ATOMIC_MATH_32 OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 #endif #ifndef OPAL_HAVE_ATOMIC_MATH_64 @@ -443,45 +423,24 @@ int32_t opal_atomic_sub_32(volatile int32_t *addr, int delta); #define OPAL_HAVE_ATOMIC_MATH_64 0 #endif -#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_64 || OPAL_HAVE_ATOMIC_CMPSET_64 +#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_64 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 -/* OPAL_HAVE_INLINE_ATOMIC_*_64 will be 1 if /atomic.h provides - a static inline version of it (in assembly). If we have to fall - back to cmpset 64, that too will be inline */ -#if OPAL_HAVE_INLINE_ATOMIC_ADD_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_CMPSET_64) -static inline -#endif -int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta); - -#if OPAL_HAVE_INLINE_ATOMIC_AND_64 || (!defined(OPAL_HAVE_ATOMIC_AND_64) && OPAL_HAVE_ATOMIC_CMPSET_64) -static inline -#endif -int64_t opal_atomic_and_64(volatile int64_t *addr, int64_t value); - -#if OPAL_HAVE_INLINE_ATOMIC_OR_64 || (!defined(OPAL_HAVE_ATOMIC_OR_64) && OPAL_HAVE_ATOMIC_CMPSET_64) -static inline -#endif -int64_t opal_atomic_or_64(volatile int64_t *addr, int64_t value); - -#if OPAL_HAVE_INLINE_ATOMIC_XOR_64 || (!defined(OPAL_HAVE_ATOMIC_XOR_64) && OPAL_HAVE_ATOMIC_CMPSET_64) -static inline -#endif -int64_t opal_atomic_xor_64(volatile int64_t *addr, int64_t value); - -/* OPAL_HAVE_INLINE_ATOMIC_*_64 will be 1 if /atomic.h provides - a static inline version of it (in assembly). If we have to fall - back to cmpset 64, that too will be inline */ -#if OPAL_HAVE_INLINE_ATOMIC_SUB_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_CMPSET_64) -static inline -#endif -int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta); +static inline int64_t opal_atomic_add_fetch_64(volatile int64_t *addr, int64_t delta); +static inline int64_t opal_atomic_fetch_add_64(volatile int64_t *addr, int64_t delta); +static inline int64_t opal_atomic_and_fetch_64(volatile int64_t *addr, int64_t value); +static inline int64_t opal_atomic_fetch_and_64(volatile int64_t *addr, int64_t value); +static inline int64_t opal_atomic_or_fetch_64(volatile int64_t *addr, int64_t value); +static inline int64_t opal_atomic_fetch_or_64(volatile int64_t *addr, int64_t value); +static inline int64_t opal_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value); +static inline int64_t opal_atomic_sub_fetch_64(volatile int64_t *addr, int64_t delta); +static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t *addr, int64_t delta); #endif /* OPAL_HAVE_ATOMIC_MATH_32 */ #if ! OPAL_HAVE_ATOMIC_MATH_64 /* fix up the value of opal_have_atomic_math_64 to allow for C versions */ #undef OPAL_HAVE_ATOMIC_MATH_64 -#define OPAL_HAVE_ATOMIC_MATH_64 OPAL_HAVE_ATOMIC_CMPSET_64 +#define OPAL_HAVE_ATOMIC_MATH_64 OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 #endif /* provide a size_t add/subtract. When in debug mode, make it an @@ -491,114 +450,141 @@ int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta); */ #if defined(DOXYGEN) || OPAL_ENABLE_DEBUG static inline size_t -opal_atomic_add_size_t(volatile size_t *addr, size_t delta) +opal_atomic_add_fetch_size_t(volatile size_t *addr, size_t delta) { #if SIZEOF_SIZE_T == 4 - return (size_t) opal_atomic_add_32((int32_t*) addr, delta); + return (size_t) opal_atomic_add_fetch_32((int32_t*) addr, delta); #elif SIZEOF_SIZE_T == 8 - return (size_t) opal_atomic_add_64((int64_t*) addr, delta); + return (size_t) opal_atomic_add_fetch_64((int64_t*) addr, delta); #else #error "Unknown size_t size" #endif } + static inline size_t -opal_atomic_sub_size_t(volatile size_t *addr, size_t delta) +opal_atomic_fetch_add_size_t(volatile size_t *addr, size_t delta) { #if SIZEOF_SIZE_T == 4 - return (size_t) opal_atomic_sub_32((int32_t*) addr, delta); + return (size_t) opal_atomic_fetch_add_32((int32_t*) addr, delta); #elif SIZEOF_SIZE_T == 8 - return (size_t) opal_atomic_sub_64((int64_t*) addr, delta); + return (size_t) opal_atomic_fetch_add_64((int64_t*) addr, delta); #else #error "Unknown size_t size" #endif } + +static inline size_t +opal_atomic_sub_fetch_size_t(volatile size_t *addr, size_t delta) +{ +#if SIZEOF_SIZE_T == 4 + return (size_t) opal_atomic_sub_fetch_32((int32_t*) addr, delta); +#elif SIZEOF_SIZE_T == 8 + return (size_t) opal_atomic_sub_fetch_64((int64_t*) addr, delta); +#else +#error "Unknown size_t size" +#endif +} + +static inline size_t +opal_atomic_fetch_sub_size_t(volatile size_t *addr, size_t delta) +{ +#if SIZEOF_SIZE_T == 4 + return (size_t) opal_atomic_fetch_sub_32((int32_t*) addr, delta); +#elif SIZEOF_SIZE_T == 8 + return (size_t) opal_atomic_fetch_sub_64((int64_t*) addr, delta); +#else +#error "Unknown size_t size" +#endif +} + #else #if SIZEOF_SIZE_T == 4 -#define opal_atomic_add_size_t(addr, delta) ((size_t) opal_atomic_add_32((int32_t*) addr, delta)) -#define opal_atomic_sub_size_t(addr, delta) ((size_t) opal_atomic_sub_32((int32_t*) addr, delta)) -#elif SIZEOF_SIZE_T ==8 -#define opal_atomic_add_size_t(addr, delta) ((size_t) opal_atomic_add_64((int64_t*) addr, delta)) -#define opal_atomic_sub_size_t(addr, delta) ((size_t) opal_atomic_sub_64((int64_t*) addr, delta)) +#define opal_atomic_add_fetch_size_t(addr, delta) ((size_t) opal_atomic_add_fetch_32((volatile int32_t *) addr, delta)) +#define opal_atomic_fetch_add_size_t(addr, delta) ((size_t) opal_atomic_fetch_add_32((volatile int32_t *) addr, delta)) +#define opal_atomic_sub_fetch_size_t(addr, delta) ((size_t) opal_atomic_sub_fetch_32((volatile int32_t *) addr, delta)) +#define opal_atomic_fetch_sub_size_t(addr, delta) ((size_t) opal_atomic_fetch_sub_32((volatile int32_t *) addr, delta)) +#elif SIZEOF_SIZE_T == 8 +#define opal_atomic_add_fetch_size_t(addr, delta) ((size_t) opal_atomic_add_fetch_64((volatile int64_t *) addr, delta)) +#define opal_atomic_fetch_add_size_t(addr, delta) ((size_t) opal_atomic_fetch_add_64((volatile int64_t *) addr, delta)) +#define opal_atomic_sub_fetch_size_t(addr, delta) ((size_t) opal_atomic_sub_fetch_64((volatile int64_t *) addr, delta)) +#define opal_atomic_fetch_sub_size_t(addr, delta) ((size_t) opal_atomic_fetch_sub_64((volatile int64_t *) addr, delta)) #else #error "Unknown size_t size" #endif #endif -#if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) +#if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) /* these are always done with inline functions, so always mark as static inline */ -static inline bool opal_atomic_bool_cmpset_xx(volatile void* addr, int64_t oldval, - int64_t newval, size_t length); -static inline bool opal_atomic_bool_cmpset_acq_xx(volatile void* addr, - int64_t oldval, int64_t newval, - size_t length); -static inline bool opal_atomic_bool_cmpset_rel_xx(volatile void* addr, - int64_t oldval, int64_t newval, - size_t length); - -static inline bool opal_atomic_bool_cmpset_ptr(volatile void* addr, - void* oldval, - void* newval); -static inline bool opal_atomic_bool_cmpset_acq_ptr(volatile void* addr, - void* oldval, - void* newval); -static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr, - void* oldval, - void* newval); + +static inline bool opal_atomic_compare_exchange_strong_xx (volatile void *addr, void *oldval, + int64_t newval, size_t length); +static inline bool opal_atomic_compare_exchange_strong_acq_xx (volatile void *addr, void *oldval, + int64_t newval, size_t length); +static inline bool opal_atomic_compare_exchange_strong_rel_xx (volatile void *addr, void *oldval, + int64_t newval, size_t length); + + +static inline bool opal_atomic_compare_exchange_strong_ptr (volatile void* addr, void *oldval, + void *newval); +static inline bool opal_atomic_compare_exchange_strong_acq_ptr (volatile void* addr, void *oldval, + void *newval); +static inline bool opal_atomic_compare_exchange_strong_rel_ptr (volatile void* addr, void *oldval, + void *newval); /** - * Atomic compare and set of pointer with relaxed semantics. This + * Atomic compare and set of generic type with relaxed semantics. This * macro detect at compile time the type of the first argument and * choose the correct function to be called. * * \note This macro should only be used for integer types. * * @param addr Address of . - * @param oldval Comparison value . + * @param oldval Comparison value address of . * @param newval New value to set if comparision is true . * - * See opal_atomic_bool_cmpset_* for pseudo-code. + * See opal_atomic_compare_exchange_* for pseudo-code. */ -#define opal_atomic_bool_cmpset( ADDR, OLDVAL, NEWVAL ) \ - opal_atomic_bool_cmpset_xx( (volatile void*)(ADDR), (intptr_t)(OLDVAL), \ - (intptr_t)(NEWVAL), sizeof(*(ADDR)) ) +#define opal_atomic_compare_exchange_strong( ADDR, OLDVAL, NEWVAL ) \ + opal_atomic_compare_exchange_strong_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \ + (intptr_t)(NEWVAL), sizeof(*(ADDR)) ) /** - * Atomic compare and set of pointer with acquire semantics. This - * macro detect at compile time the type of the first argument - * and choose the correct function to be called. + * Atomic compare and set of generic type with acquire semantics. This + * macro detect at compile time the type of the first argument and + * choose the correct function to be called. * * \note This macro should only be used for integer types. * * @param addr Address of . - * @param oldval Comparison value . + * @param oldval Comparison value address of . * @param newval New value to set if comparision is true . * - * See opal_atomic_bool_cmpset_acq_* for pseudo-code. + * See opal_atomic_compare_exchange_acq_* for pseudo-code. */ -#define opal_atomic_bool_cmpset_acq( ADDR, OLDVAL, NEWVAL ) \ - opal_atomic_bool_cmpset_acq_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \ - (int64_t)(NEWVAL), sizeof(*(ADDR)) ) - +#define opal_atomic_compare_exchange_strong_acq( ADDR, OLDVAL, NEWVAL ) \ + opal_atomic_compare_exchange_strong_acq_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \ + (intptr_t)(NEWVAL), sizeof(*(ADDR)) ) /** - * Atomic compare and set of pointer with release semantics. This - * macro detect at compile time the type of the first argument - * and choose the correct function to b + * Atomic compare and set of generic type with release semantics. This + * macro detect at compile time the type of the first argument and + * choose the correct function to be called. * * \note This macro should only be used for integer types. * * @param addr Address of . - * @param oldval Comparison value . + * @param oldval Comparison value address of . * @param newval New value to set if comparision is true . * - * See opal_atomic_bool_cmpsetrel_* for pseudo-code. + * See opal_atomic_compare_exchange_rel_* for pseudo-code. */ -#define opal_atomic_bool_cmpset_rel( ADDR, OLDVAL, NEWVAL ) \ - opal_atomic_bool_cmpset_rel_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \ - (int64_t)(NEWVAL), sizeof(*(ADDR)) ) +#define opal_atomic_compare_exchange_strong_rel( ADDR, OLDVAL, NEWVAL ) \ + opal_atomic_compare_exchange_strong_rel_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \ + (intptr_t)(NEWVAL), sizeof(*(ADDR)) ) -#endif /* (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) */ + +#endif /* (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) */ #if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64) @@ -606,15 +592,11 @@ static inline void opal_atomic_add_xx(volatile void* addr, int32_t value, size_t length); static inline void opal_atomic_sub_xx(volatile void* addr, int32_t value, size_t length); -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32 -static inline int32_t opal_atomic_add_ptr( volatile void* addr, void* delta ); -static inline int32_t opal_atomic_sub_ptr( volatile void* addr, void* delta ); -#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64 -static inline int64_t opal_atomic_add_ptr( volatile void* addr, void* delta ); -static inline int64_t opal_atomic_sub_ptr( volatile void* addr, void* delta ); -#else -#error Atomic arithmetic on pointers not supported -#endif + +static inline intptr_t opal_atomic_add_fetch_ptr( volatile void* addr, void* delta ); +static inline intptr_t opal_atomic_fetch_add_ptr( volatile void* addr, void* delta ); +static inline intptr_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delta ); +static inline intptr_t opal_atomic_fetch_sub_ptr( volatile void* addr, void* delta ); /** * Atomically increment the content depending on the type. This diff --git a/opal/include/opal/sys/atomic_impl.h b/opal/include/opal/sys/atomic_impl.h index c066d831cb9..b3aba9af66b 100644 --- a/opal/include/opal/sys/atomic_impl.h +++ b/opal/include/opal/sys/atomic_impl.h @@ -34,20 +34,30 @@ * * Some architectures do not provide support for the 64 bits * atomic operations. Until we find a better solution let's just - * undefine all those functions if there is no 64 bit cmpset + * undefine all those functions if there is no 64 bit compare-exchange * *********************************************************************/ -#if OPAL_HAVE_ATOMIC_CMPSET_32 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 + +#define OPAL_ATOMIC_DEFINE_CMPXCG_OP(type, bits, operation, name) \ + static inline type opal_atomic_fetch_ ## name ## _ ## bits (volatile type *addr, type value) \ + { \ + type oldval; \ + do { \ + oldval = *addr; \ + } while (!opal_atomic_compare_exchange_strong_ ## bits (addr, &oldval, oldval operation value)); \ + \ + return oldval; \ + } #if !defined(OPAL_HAVE_ATOMIC_SWAP_32) #define OPAL_HAVE_ATOMIC_SWAP_32 1 static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval) { - int32_t old; + int32_t old = *addr; do { - old = *addr; - } while (!opal_atomic_bool_cmpset_32(addr, old, newval)); + } while (!opal_atomic_compare_exchange_strong_32 (addr, &old, newval)); return old; } @@ -55,161 +65,91 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, #if !defined(OPAL_HAVE_ATOMIC_ADD_32) #define OPAL_HAVE_ATOMIC_ADD_32 1 -static inline int32_t -opal_atomic_add_32(volatile int32_t *addr, int delta) -{ - int32_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval + delta)); - return (oldval + delta); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, +, add) + #endif /* OPAL_HAVE_ATOMIC_ADD_32 */ #if !defined(OPAL_HAVE_ATOMIC_AND_32) #define OPAL_HAVE_ATOMIC_AND_32 1 -static inline int32_t -opal_atomic_and_32(volatile int32_t *addr, int32_t value) -{ - int32_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval & value)); - return (oldval & value); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, &, and) + #endif /* OPAL_HAVE_ATOMIC_AND_32 */ #if !defined(OPAL_HAVE_ATOMIC_OR_32) #define OPAL_HAVE_ATOMIC_OR_32 1 -static inline int32_t -opal_atomic_or_32(volatile int32_t *addr, int32_t value) -{ - int32_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval | value)); - return (oldval | value); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, |, or) + #endif /* OPAL_HAVE_ATOMIC_OR_32 */ #if !defined(OPAL_HAVE_ATOMIC_XOR_32) #define OPAL_HAVE_ATOMIC_XOR_32 1 -static inline int32_t -opal_atomic_xor_32(volatile int32_t *addr, int32_t value) -{ - int32_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval ^ value)); - return (oldval ^ value); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, ^, xor) + #endif /* OPAL_HAVE_ATOMIC_XOR_32 */ #if !defined(OPAL_HAVE_ATOMIC_SUB_32) #define OPAL_HAVE_ATOMIC_SUB_32 1 -static inline int32_t -opal_atomic_sub_32(volatile int32_t *addr, int delta) -{ - int32_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval - delta)); - return (oldval - delta); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, -, sub) + #endif /* OPAL_HAVE_ATOMIC_SUB_32 */ -#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */ +#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */ -#if OPAL_HAVE_ATOMIC_CMPSET_64 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 #if !defined(OPAL_HAVE_ATOMIC_SWAP_64) #define OPAL_HAVE_ATOMIC_SWAP_64 1 static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval) { - int64_t old; + int64_t old = *addr; do { - old = *addr; - } while (!opal_atomic_bool_cmpset_64(addr, old, newval)); + } while (!opal_atomic_compare_exchange_strong_64 (addr, &old, newval)); + return old; } #endif /* OPAL_HAVE_ATOMIC_SWAP_32 */ #if !defined(OPAL_HAVE_ATOMIC_ADD_64) #define OPAL_HAVE_ATOMIC_ADD_64 1 -static inline int64_t -opal_atomic_add_64(volatile int64_t *addr, int64_t delta) -{ - int64_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval + delta)); - return (oldval + delta); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, +, add) + #endif /* OPAL_HAVE_ATOMIC_ADD_64 */ #if !defined(OPAL_HAVE_ATOMIC_AND_64) #define OPAL_HAVE_ATOMIC_AND_64 1 -static inline int64_t -opal_atomic_and_64(volatile int64_t *addr, int64_t value) -{ - int64_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval & value)); - return (oldval & value); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, &, and) + #endif /* OPAL_HAVE_ATOMIC_AND_64 */ #if !defined(OPAL_HAVE_ATOMIC_OR_64) #define OPAL_HAVE_ATOMIC_OR_64 1 -static inline int64_t -opal_atomic_or_64(volatile int64_t *addr, int64_t value) -{ - int64_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval | value)); - return (oldval | value); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, |, or) + #endif /* OPAL_HAVE_ATOMIC_OR_64 */ #if !defined(OPAL_HAVE_ATOMIC_XOR_64) #define OPAL_HAVE_ATOMIC_XOR_64 1 -static inline int64_t -opal_atomic_xor_64(volatile int64_t *addr, int64_t value) -{ - int64_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval ^ value)); - return (oldval ^ value); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, ^, xor) + #endif /* OPAL_HAVE_ATOMIC_XOR_64 */ #if !defined(OPAL_HAVE_ATOMIC_SUB_64) #define OPAL_HAVE_ATOMIC_SUB_64 1 -static inline int64_t -opal_atomic_sub_64(volatile int64_t *addr, int64_t delta) -{ - int64_t oldval; - do { - oldval = *addr; - } while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval - delta)); - return (oldval - delta); -} +OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, -, sub) + #endif /* OPAL_HAVE_ATOMIC_SUB_64 */ #else @@ -222,130 +162,70 @@ opal_atomic_sub_64(volatile int64_t *addr, int64_t delta) #define OPAL_HAVE_ATOMIC_SUB_64 0 #endif -#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */ - - -#if (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) - -static inline bool -opal_atomic_bool_cmpset_xx(volatile void* addr, int64_t oldval, - int64_t newval, size_t length) -{ - switch( length ) { -#if OPAL_HAVE_ATOMIC_CMPSET_32 - case 4: - return opal_atomic_bool_cmpset_32( (volatile int32_t*)addr, - (int32_t)oldval, (int32_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */ - -#if OPAL_HAVE_ATOMIC_CMPSET_64 - case 8: - return opal_atomic_bool_cmpset_64( (volatile int64_t*)addr, - (int64_t)oldval, (int64_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */ - } - abort(); - /* This should never happen, so deliberately abort (hopefully - leaving a corefile for analysis) */ -} - - -static inline bool -opal_atomic_bool_cmpset_acq_xx(volatile void* addr, int64_t oldval, - int64_t newval, size_t length) -{ - switch( length ) { -#if OPAL_HAVE_ATOMIC_CMPSET_32 - case 4: - return opal_atomic_bool_cmpset_acq_32( (volatile int32_t*)addr, - (int32_t)oldval, (int32_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */ - -#if OPAL_HAVE_ATOMIC_CMPSET_64 - case 8: - return opal_atomic_bool_cmpset_acq_64( (volatile int64_t*)addr, - (int64_t)oldval, (int64_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */ - } - /* This should never happen, so deliberately abort (hopefully - leaving a corefile for analysis) */ - abort(); -} - - -static inline bool -opal_atomic_bool_cmpset_rel_xx(volatile void* addr, int64_t oldval, - int64_t newval, size_t length) -{ - switch( length ) { -#if OPAL_HAVE_ATOMIC_CMPSET_32 - case 4: - return opal_atomic_bool_cmpset_rel_32( (volatile int32_t*)addr, - (int32_t)oldval, (int32_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */ - -#if OPAL_HAVE_ATOMIC_CMPSET_64 - case 8: - return opal_atomic_bool_cmpset_rel_64( (volatile int64_t*)addr, - (int64_t)oldval, (int64_t)newval ); -#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */ - } - /* This should never happen, so deliberately abort (hopefully - leaving a corefile for analysis) */ - abort(); -} - - -static inline bool -opal_atomic_bool_cmpset_ptr(volatile void* addr, - void* oldval, - void* newval) -{ -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32 - return opal_atomic_bool_cmpset_32((int32_t*) addr, (unsigned long) oldval, - (unsigned long) newval); -#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64 - return opal_atomic_bool_cmpset_64((int64_t*) addr, (unsigned long) oldval, - (unsigned long) newval); +#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 */ + +#if (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) + +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 +#define OPAL_ATOMIC_DEFINE_CMPXCG_XX(semantics) \ + static inline bool \ + opal_atomic_compare_exchange_strong ## semantics ## xx (volatile void* addr, void *oldval, \ + int64_t newval, const size_t length) \ + { \ + switch (length) { \ + case 4: \ + return opal_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, \ + (int32_t *) oldval, (int32_t) newval); \ + case 8: \ + return opal_atomic_compare_exchange_strong_64 ((volatile int64_t *) addr, \ + (int64_t *) oldval, (int64_t) newval); \ + } \ + abort(); \ + } +#elif OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 +#define OPAL_ATOMIC_DEFINE_CMPXCG_XX(semantics) \ + static inline bool \ + opal_atomic_compare_exchange_strong ## semantics ## xx (volatile void* addr, void *oldval, \ + int64_t newval, const size_t length) \ + { \ + switch (length) { \ + case 4: \ + return opal_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, \ + (int32_t *) oldval, (int32_t) newval); \ + abort(); \ + } #else - abort(); +#error "Platform does not have required atomic compare-and-swap functionality" #endif -} - -static inline bool -opal_atomic_bool_cmpset_acq_ptr(volatile void* addr, - void* oldval, - void* newval) -{ -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32 - return opal_atomic_bool_cmpset_acq_32((int32_t*) addr, (unsigned long) oldval, - (unsigned long) newval); -#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64 - return opal_atomic_bool_cmpset_acq_64((int64_t*) addr, (unsigned long) oldval, - (unsigned long) newval); +OPAL_ATOMIC_DEFINE_CMPXCG_XX(_) +OPAL_ATOMIC_DEFINE_CMPXCG_XX(_acq_) +OPAL_ATOMIC_DEFINE_CMPXCG_XX(_rel_) + +#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 +#define OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(semantics) \ + static inline bool \ + opal_atomic_compare_exchange_strong ## semantics ## ptr (volatile void* addr, void *oldval, void *newval) \ + { \ + return opal_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, (int32_t *) oldval, (int32_t) newval); \ + } +#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 +#define OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(semantics) \ + static inline bool \ + opal_atomic_compare_exchange_strong ## semantics ## ptr (volatile void* addr, void *oldval, void *newval) \ + { \ + return opal_atomic_compare_exchange_strong_64 ((volatile int64_t *) addr, (int64_t *) oldval, (int64_t) newval); \ + } #else - abort(); +#error "Can not define opal_atomic_compare_exchange_strong_ptr with existing atomics" #endif -} +OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_) +OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_acq_) +OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_rel_) -static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr, - void* oldval, - void* newval) -{ -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32 - return opal_atomic_bool_cmpset_rel_32((int32_t*) addr, (unsigned long) oldval, - (unsigned long) newval); -#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64 - return opal_atomic_bool_cmpset_rel_64((int64_t*) addr, (unsigned long) oldval, - (unsigned long) newval); -#else - abort(); -#endif -} +#endif /* (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) */ -#endif /* (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) */ #if (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64) @@ -383,20 +263,19 @@ static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr, #if OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64 - static inline void -opal_atomic_add_xx(volatile void* addr, int32_t value, size_t length) + opal_atomic_add_xx(volatile void* addr, int32_t value, size_t length) { switch( length ) { #if OPAL_HAVE_ATOMIC_ADD_32 case 4: - opal_atomic_add_32( (volatile int32_t*)addr, (int32_t)value ); + (void) opal_atomic_fetch_add_32( (volatile int32_t*)addr, (int32_t)value ); break; -#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */ +#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */ #if OPAL_HAVE_ATOMIC_ADD_64 case 8: - opal_atomic_add_64( (volatile int64_t*)addr, (int64_t)value ); + (void) opal_atomic_fetch_add_64( (volatile int64_t*)addr, (int64_t)value ); break; #endif /* OPAL_HAVE_ATOMIC_ADD_64 */ default: @@ -413,13 +292,13 @@ opal_atomic_sub_xx(volatile void* addr, int32_t value, size_t length) switch( length ) { #if OPAL_HAVE_ATOMIC_SUB_32 case 4: - opal_atomic_sub_32( (volatile int32_t*)addr, (int32_t)value ); + (void) opal_atomic_fetch_sub_32( (volatile int32_t*)addr, (int32_t)value ); break; #endif /* OPAL_HAVE_ATOMIC_SUB_32 */ #if OPAL_HAVE_ATOMIC_SUB_64 case 8: - opal_atomic_sub_64( (volatile int64_t*)addr, (int64_t)value ); + (void) opal_atomic_fetch_sub_64( (volatile int64_t*)addr, (int64_t)value ); break; #endif /* OPAL_HAVE_ATOMIC_SUB_64 */ default: @@ -429,47 +308,77 @@ opal_atomic_sub_xx(volatile void* addr, int32_t value, size_t length) } } -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32 -static inline int32_t opal_atomic_add_ptr( volatile void* addr, +#define OPAL_ATOMIC_DEFINE_OP_FETCH(op, operation, type, ptr_type, suffix) \ + static inline type opal_atomic_ ## op ## _fetch_ ## suffix (volatile ptr_type *addr, type value) \ + { \ + return opal_atomic_fetch_ ## op ## _ ## suffix (addr, value) operation value; \ + } + +OPAL_ATOMIC_DEFINE_OP_FETCH(add, +, int32_t, int32_t, 32) +OPAL_ATOMIC_DEFINE_OP_FETCH(and, &, int32_t, int32_t, 32) +OPAL_ATOMIC_DEFINE_OP_FETCH(or, |, int32_t, int32_t, 32) +OPAL_ATOMIC_DEFINE_OP_FETCH(xor, ^, int32_t, int32_t, 32) +OPAL_ATOMIC_DEFINE_OP_FETCH(sub, -, int32_t, int32_t, 32) + +#if OPAL_HAVE_ATOMIC_MATH_64 +OPAL_ATOMIC_DEFINE_OP_FETCH(add, +, int64_t, int64_t, 64) +OPAL_ATOMIC_DEFINE_OP_FETCH(and, &, int64_t, int64_t, 64) +OPAL_ATOMIC_DEFINE_OP_FETCH(or, |, int64_t, int64_t, 64) +OPAL_ATOMIC_DEFINE_OP_FETCH(xor, ^, int64_t, int64_t, 64) +OPAL_ATOMIC_DEFINE_OP_FETCH(sub, -, int64_t, int64_t, 64) +#endif + +static inline intptr_t opal_atomic_fetch_add_ptr( volatile void* addr, void* delta ) { - return opal_atomic_add_32((int32_t*) addr, (unsigned long) delta); -} +#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32 + return opal_atomic_fetch_add_32((int32_t*) addr, (unsigned long) delta); #elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_ADD_64 -static inline int64_t opal_atomic_add_ptr( volatile void* addr, + return opal_atomic_fetch_add_64((int64_t*) addr, (unsigned long) delta); +#else + abort (); + return 0; +#endif +} + +static inline intptr_t opal_atomic_add_fetch_ptr( volatile void* addr, void* delta ) { - return opal_atomic_add_64((int64_t*) addr, (unsigned long) delta); -} +#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32 + return opal_atomic_add_fetch_32((int32_t*) addr, (unsigned long) delta); +#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_ADD_64 + return opal_atomic_add_fetch_64((int64_t*) addr, (unsigned long) delta); #else -static inline int32_t opal_atomic_add_ptr( volatile void* addr, + abort (); + return 0; +#endif +} + +static inline intptr_t opal_atomic_fetch_sub_ptr( volatile void* addr, void* delta ) { +#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32 + return opal_atomic_fetch_sub_32((int32_t*) addr, (unsigned long) delta); +#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SUB_32 + return opal_atomic_fetch_sub_64((int64_t*) addr, (unsigned long) delta); +#else abort(); return 0; -} #endif +} -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32 -static inline int32_t opal_atomic_sub_ptr( volatile void* addr, +static inline intptr_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delta ) { - return opal_atomic_sub_32((int32_t*) addr, (unsigned long) delta); -} +#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32 + return opal_atomic_sub_fetch_32((int32_t*) addr, (unsigned long) delta); #elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SUB_32 -static inline int64_t opal_atomic_sub_ptr( volatile void* addr, - void* delta ) -{ - return opal_atomic_sub_64((int64_t*) addr, (unsigned long) delta); -} + return opal_atomic_sub_fetch_64((int64_t*) addr, (unsigned long) delta); #else -static inline int32_t opal_atomic_sub_ptr( volatile void* addr, - void* delta ) -{ abort(); return 0; -} #endif +} #endif /* OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64 */ @@ -493,21 +402,20 @@ opal_atomic_lock_init( opal_atomic_lock_t* lock, int32_t value ) static inline int opal_atomic_trylock(opal_atomic_lock_t *lock) { - bool ret = opal_atomic_bool_cmpset_acq_32( &(lock->u.lock), - OPAL_ATOMIC_LOCK_UNLOCKED, OPAL_ATOMIC_LOCK_LOCKED); - return (ret == 0) ? 1 : 0; + int32_t unlocked = OPAL_ATOMIC_LOCK_UNLOCKED; + bool ret = opal_atomic_compare_exchange_strong_32 (&lock->u.lock, &unlocked, OPAL_ATOMIC_LOCK_LOCKED); + return (ret == false) ? 1 : 0; } static inline void opal_atomic_lock(opal_atomic_lock_t *lock) { - while( !opal_atomic_bool_cmpset_acq_32( &(lock->u.lock), - OPAL_ATOMIC_LOCK_UNLOCKED, OPAL_ATOMIC_LOCK_LOCKED) ) { - while (lock->u.lock == OPAL_ATOMIC_LOCK_LOCKED) { - /* spin */ ; - } - } + while (opal_atomic_trylock (lock)) { + while (lock->u.lock == OPAL_ATOMIC_LOCK_LOCKED) { + /* spin */ ; + } + } } diff --git a/opal/include/opal/sys/gcc_builtin/atomic.h b/opal/include/opal/sys/gcc_builtin/atomic.h index 2425bbf2509..c6ef6eb9c30 100644 --- a/opal/include/opal/sys/gcc_builtin/atomic.h +++ b/opal/include/opal/sys/gcc_builtin/atomic.h @@ -33,7 +33,7 @@ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 #define OPAL_HAVE_ATOMIC_MATH_32 1 -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 #define OPAL_HAVE_ATOMIC_AND_32 1 #define OPAL_HAVE_ATOMIC_OR_32 1 @@ -41,7 +41,7 @@ #define OPAL_HAVE_ATOMIC_SUB_32 1 #define OPAL_HAVE_ATOMIC_SWAP_32 1 #define OPAL_HAVE_ATOMIC_MATH_64 1 -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 #define OPAL_HAVE_ATOMIC_ADD_64 1 #define OPAL_HAVE_ATOMIC_AND_64 1 #define OPAL_HAVE_ATOMIC_OR_64 1 @@ -81,26 +81,20 @@ static inline void opal_atomic_wmb(void) #pragma error_messages(off, E_ARG_INCOMPATIBLE_WITH_ARG_L) #endif -static inline bool opal_atomic_bool_cmpset_acq_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } -static inline bool opal_atomic_bool_cmpset_rel_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_RELEASE, __ATOMIC_RELAXED); + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED); } -static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } static inline int32_t opal_atomic_swap_32 (volatile int32_t *addr, int32_t newval) @@ -110,51 +104,45 @@ static inline int32_t opal_atomic_swap_32 (volatile int32_t *addr, int32_t newva return oldval; } -static inline int32_t opal_atomic_add_32(volatile int32_t *addr, int32_t delta) +static inline int32_t opal_atomic_fetch_add_32(volatile int32_t *addr, int32_t delta) { - return __atomic_add_fetch (addr, delta, __ATOMIC_RELAXED); + return __atomic_fetch_add (addr, delta, __ATOMIC_RELAXED); } -static inline int32_t opal_atomic_and_32(volatile int32_t *addr, int32_t value) +static inline int32_t opal_atomic_fetch_and_32(volatile int32_t *addr, int32_t value) { - return __atomic_and_fetch (addr, value, __ATOMIC_RELAXED); + return __atomic_fetch_and (addr, value, __ATOMIC_RELAXED); } -static inline int32_t opal_atomic_or_32(volatile int32_t *addr, int32_t value) +static inline int32_t opal_atomic_fetch_or_32(volatile int32_t *addr, int32_t value) { - return __atomic_or_fetch (addr, value, __ATOMIC_RELAXED); + return __atomic_fetch_or (addr, value, __ATOMIC_RELAXED); } -static inline int32_t opal_atomic_xor_32(volatile int32_t *addr, int32_t value) +static inline int32_t opal_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value) { - return __atomic_xor_fetch (addr, value, __ATOMIC_RELAXED); + return __atomic_fetch_xor (addr, value, __ATOMIC_RELAXED); } -static inline int32_t opal_atomic_sub_32(volatile int32_t *addr, int32_t delta) +static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t *addr, int32_t delta) { - return __atomic_sub_fetch (addr, delta, __ATOMIC_RELAXED); + return __atomic_fetch_sub (addr, delta, __ATOMIC_RELAXED); } -static inline bool opal_atomic_bool_cmpset_acq_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } -static inline bool opal_atomic_bool_cmpset_rel_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_RELEASE, __ATOMIC_RELAXED); + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED); } -static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newval) @@ -164,52 +152,55 @@ static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newva return oldval; } -static inline int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta) +static inline int64_t opal_atomic_fetch_add_64(volatile int64_t *addr, int64_t delta) { - return __atomic_add_fetch (addr, delta, __ATOMIC_RELAXED); + return __atomic_fetch_add (addr, delta, __ATOMIC_RELAXED); } -static inline int64_t opal_atomic_and_64(volatile int64_t *addr, int64_t value) +static inline int64_t opal_atomic_fetch_and_64(volatile int64_t *addr, int64_t value) { - return __atomic_and_fetch (addr, value, __ATOMIC_RELAXED); + return __atomic_fetch_and (addr, value, __ATOMIC_RELAXED); } -static inline int64_t opal_atomic_or_64(volatile int64_t *addr, int64_t value) +static inline int64_t opal_atomic_fetch_or_64(volatile int64_t *addr, int64_t value) { - return __atomic_or_fetch (addr, value, __ATOMIC_RELAXED); + return __atomic_fetch_or (addr, value, __ATOMIC_RELAXED); } -static inline int64_t opal_atomic_xor_64(volatile int64_t *addr, int64_t value) +static inline int64_t opal_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value) { - return __atomic_xor_fetch (addr, value, __ATOMIC_RELAXED); + return __atomic_fetch_xor (addr, value, __ATOMIC_RELAXED); } -static inline int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta) +static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t *addr, int64_t delta) { - return __atomic_sub_fetch (addr, delta, __ATOMIC_RELAXED); + return __atomic_fetch_sub (addr, delta, __ATOMIC_RELAXED); } #if OPAL_HAVE_GCC_BUILTIN_CSWAP_INT128 -#define OPAL_HAVE_ATOMIC_CMPSET_128 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 -static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, - opal_int128_t oldval, opal_int128_t newval) +static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr, + opal_int128_t *oldval, opal_int128_t newval) { - return __atomic_compare_exchange_n (addr, &oldval, newval, false, + return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } #elif defined(OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128) && OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128 -#define OPAL_HAVE_ATOMIC_CMPSET_128 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 /* __atomic version is not lock-free so use legacy __sync version */ -static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, - opal_int128_t oldval, opal_int128_t newval) +static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr, + opal_int128_t *oldval, opal_int128_t newval) { - return __sync_bool_compare_and_swap (addr, oldval, newval); + opal_int128_t prev = __sync_val_compare_and_swap (addr, *oldval, newval); + bool ret = prev == *oldval; + *oldval = prev; + return ret; } #endif diff --git a/opal/include/opal/sys/ia32/atomic.h b/opal/include/opal/sys/ia32/atomic.h index 35da400ef04..bb863dec14a 100644 --- a/opal/include/opal/sys/ia32/atomic.h +++ b/opal/include/opal/sys/ia32/atomic.h @@ -40,7 +40,7 @@ *********************************************************************/ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_MATH_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 @@ -84,15 +84,13 @@ static inline void opal_atomic_isync(void) *********************************************************************/ #if OPAL_GCC_INLINE_ASSEMBLY -static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, - int32_t oldval, - int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { unsigned char ret; __asm__ __volatile__ ( SMPLOCK "cmpxchgl %3,%2 \n\t" "sete %0 \n\t" - : "=qm" (ret), "+a" (oldval), "+m" (*addr) + : "=qm" (ret), "+a" (*oldval), "+m" (*addr) : "q"(newval) : "memory", "cc"); @@ -101,8 +99,8 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, #endif /* OPAL_GCC_INLINE_ASSEMBLY */ -#define opal_atomic_bool_cmpset_acq_32 opal_atomic_bool_cmpset_32 -#define opal_atomic_bool_cmpset_rel_32 opal_atomic_bool_cmpset_32 +#define opal_atomic_compare_exchange_strong_acq_32 opal_atomic_compare_exchange_strong_32 +#define opal_atomic_compare_exchange_strong_rel_32 opal_atomic_compare_exchange_strong_32 #if OPAL_GCC_INLINE_ASSEMBLY @@ -132,7 +130,7 @@ static inline int32_t opal_atomic_swap_32( volatile int32_t *addr, * * Atomically adds @i to @v. */ -static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i) +static inline int32_t opal_atomic_fetch_add_32(volatile int32_t* v, int i) { int ret = i; __asm__ __volatile__( @@ -141,7 +139,7 @@ static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i) : :"memory", "cc" ); - return (ret+i); + return ret; } @@ -152,7 +150,7 @@ static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i) * * Atomically subtracts @i from @v. */ -static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i) +static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t* v, int i) { int ret = -i; __asm__ __volatile__( @@ -161,7 +159,7 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i) : :"memory", "cc" ); - return (ret-i); + return ret; } #endif /* OPAL_GCC_INLINE_ASSEMBLY */ diff --git a/opal/include/opal/sys/powerpc/atomic.h b/opal/include/opal/sys/powerpc/atomic.h index 34c3a689fab..bf6978aa852 100644 --- a/opal/include/opal/sys/powerpc/atomic.h +++ b/opal/include/opal/sys/powerpc/atomic.h @@ -40,7 +40,7 @@ *********************************************************************/ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_SWAP_32 1 #define OPAL_HAVE_ATOMIC_LLSC_32 1 @@ -53,7 +53,7 @@ #if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) || OPAL_ASM_SUPPORT_64BIT -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 #define OPAL_HAVE_ATOMIC_SWAP_64 1 #define OPAL_HAVE_ATOMIC_LLSC_64 1 #define OPAL_HAVE_ATOMIC_MATH_64 1 @@ -144,24 +144,25 @@ void opal_atomic_isync(void) #define OPAL_ASM_VALUE64(x) x #endif - -static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - int32_t ret; - - __asm__ __volatile__ ( - "1: lwarx %0, 0, %2 \n\t" - " cmpw 0, %0, %3 \n\t" - " bne- 2f \n\t" - " stwcx. %4, 0, %2 \n\t" - " bne- 1b \n\t" - "2:" - : "=&r" (ret), "=m" (*addr) - : "r" OPAL_ASM_ADDR(addr), "r" (oldval), "r" (newval), "m" (*addr) - : "cc", "memory"); + int32_t prev; + bool ret; + + __asm__ __volatile__ ( + "1: lwarx %0, 0, %2 \n\t" + " cmpw 0, %0, %3 \n\t" + " bne- 2f \n\t" + " stwcx. %4, 0, %2 \n\t" + " bne- 1b \n\t" + "2:" + : "=&r" (prev), "=m" (*addr) + : "r" OPAL_ASM_ADDR(addr), "r" (*oldval), "r" (newval), "m" (*addr) + : "cc", "memory"); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr) @@ -195,23 +196,21 @@ static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval) atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { bool rc; - rc = opal_atomic_bool_cmpset_32(addr, oldval, newval); + rc = opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); opal_atomic_rmb(); return rc; } -static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { opal_atomic_wmb(); - return opal_atomic_bool_cmpset_32(addr, oldval, newval); + return opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); } static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval) @@ -236,20 +235,20 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval #if OPAL_GCC_INLINE_ASSEMBLY #define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(type, instr) \ -static inline int64_t opal_atomic_ ## type ## _64(volatile int64_t* v, int64_t val) \ +static inline int64_t opal_atomic_fetch_ ## type ## _64(volatile int64_t* v, int64_t val) \ { \ - int64_t t; \ + int64_t t, old; \ \ __asm__ __volatile__( \ - "1: ldarx %0, 0, %3 \n\t" \ - " " #instr " %0, %2, %0 \n\t" \ - " stdcx. %0, 0, %3 \n\t" \ + "1: ldarx %1, 0, %4 \n\t" \ + " " #instr " %0, %3, %1 \n\t" \ + " stdcx. %0, 0, %4 \n\t" \ " bne- 1b \n\t" \ - : "=&r" (t), "=m" (*v) \ + : "=&r" (t), "=&r" (old), "=m" (*v) \ : "r" (OPAL_ASM_VALUE64(val)), "r" OPAL_ASM_ADDR(v), "m" (*v) \ : "cc"); \ \ - return t; \ + return old; \ } OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(add, add) @@ -258,23 +257,25 @@ OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(or, or) OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(xor, xor) OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(sub, subf) -static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - int64_t ret; - - __asm__ __volatile__ ( - "1: ldarx %0, 0, %2 \n\t" - " cmpd 0, %0, %3 \n\t" - " bne- 2f \n\t" - " stdcx. %4, 0, %2 \n\t" - " bne- 1b \n\t" - "2:" - : "=&r" (ret), "=m" (*addr) - : "r" (addr), "r" (OPAL_ASM_VALUE64(oldval)), "r" (OPAL_ASM_VALUE64(newval)), "m" (*addr) - : "cc", "memory"); + int64_t prev; + bool ret; + + __asm__ __volatile__ ( + "1: ldarx %0, 0, %2 \n\t" + " cmpd 0, %0, %3 \n\t" + " bne- 2f \n\t" + " stdcx. %4, 0, %2 \n\t" + " bne- 1b \n\t" + "2:" + : "=&r" (prev), "=m" (*addr) + : "r" (addr), "r" (OPAL_ASM_VALUE64(*oldval)), "r" (OPAL_ASM_VALUE64(newval)), "m" (*addr) + : "cc", "memory"); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } static inline int64_t opal_atomic_ll_64(volatile int64_t *addr) @@ -303,29 +304,6 @@ static inline int opal_atomic_sc_64(volatile int64_t *addr, int64_t newval) return ret; } -/* these two functions aren't inlined in the non-gcc case because then - there would be two function calls (since neither cmpset_64 nor - atomic_?mb can be inlined). Instead, we "inline" them by hand in - the assembly, meaning there is one function call overhead instead - of two */ -static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) -{ - bool rc; - - rc = opal_atomic_bool_cmpset_64(addr, oldval, newval); - opal_atomic_rmb(); - - return rc; -} - - -static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) -{ - opal_atomic_wmb(); - return opal_atomic_bool_cmpset_64(addr, oldval, newval); -} static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval) { @@ -352,9 +330,9 @@ static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval #if OPAL_GCC_INLINE_ASSEMBLY -static inline int opal_atomic_bool_cmpset_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { + int64_t prev; int ret; /* @@ -369,67 +347,65 @@ static inline int opal_atomic_bool_cmpset_64(volatile int64_t *addr, * is very similar to the pure 64 bit version. */ __asm__ __volatile__ ( - "ld r4,%2 \n\t" - "ld r5,%3 \n\t" - "1: ldarx r9, 0, %1 \n\t" - " cmpd 0, r9, r4 \n\t" + "ld r4,%3 \n\t" + "ld r5,%4 \n\t" + "1: ldarx %1, 0, %2 \n\t" + " cmpd 0, %1, r4 \n\t" " bne- 2f \n\t" - " stdcx. r5, 0, %1 \n\t" + " stdcx. r5, 0, %2 \n\t" " bne- 1b \n\t" "2: \n\t" - "xor r5,r4,r9 \n\t" + "xor r5,r4,%1 \n\t" "subfic r9,r5,0 \n\t" "adde %0,r9,r5 \n\t" - : "=&r" (ret) + : "=&r" (ret), "+r" (prev) : "r"OPAL_ASM_ADDR(addr), - "m"(oldval), "m"(newval) + "m"(*oldval), "m"(newval) : "r4", "r5", "r9", "cc", "memory"); - - return ret; + *oldval = prev; + return (bool) ret; } +#endif /* OPAL_GCC_INLINE_ASSEMBLY */ + +#endif /* OPAL_ASM_SUPPORT_64BIT */ + +#if OPAL_GCC_INLINE_ASSEMBLY + /* these two functions aren't inlined in the non-gcc case because then there would be two function calls (since neither cmpset_64 nor atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - int rc; + bool rc; - rc = opal_atomic_bool_cmpset_64(addr, oldval, newval); + rc = opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); opal_atomic_rmb(); return rc; } -static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { opal_atomic_wmb(); - return opal_atomic_bool_cmpset_64(addr, oldval, newval); + return opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); } -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#endif /* OPAL_ASM_SUPPORT_64BIT */ - - -#if OPAL_GCC_INLINE_ASSEMBLY #define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(type, instr) \ -static inline int32_t opal_atomic_ ## type ## _32(volatile int32_t* v, int val) \ +static inline int32_t opal_atomic_fetch_ ## type ## _32(volatile int32_t* v, int val) \ { \ - int32_t t; \ + int32_t t, old; \ \ __asm__ __volatile__( \ - "1: lwarx %0, 0, %3 \n\t" \ - " " #instr " %0, %2, %0 \n\t" \ - " stwcx. %0, 0, %3 \n\t" \ + "1: lwarx %1, 0, %4 \n\t" \ + " " #instr " %0, %3, %1 \n\t" \ + " stwcx. %0, 0, %4 \n\t" \ " bne- 1b \n\t" \ - : "=&r" (t), "=m" (*v) \ + : "=&r" (t), "=&r" (old), "=m" (*v) \ : "r" (val), "r" OPAL_ASM_ADDR(v), "m" (*v) \ : "cc"); \ \ diff --git a/opal/include/opal/sys/sparcv9/atomic.h b/opal/include/opal/sys/sparcv9/atomic.h index 098cf875ce9..c79e32b1ebb 100644 --- a/opal/include/opal/sys/sparcv9/atomic.h +++ b/opal/include/opal/sys/sparcv9/atomic.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -12,6 +13,8 @@ * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserverd. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,9 +41,9 @@ *********************************************************************/ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 /********************************************************************** @@ -82,50 +85,49 @@ static inline void opal_atomic_isync(void) *********************************************************************/ #if OPAL_GCC_INLINE_ASSEMBLY -static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - /* casa [reg(rs1)] %asi, reg(rs2), reg(rd) - * - * if (*(reg(rs1)) == reg(rs2) ) - * swap reg(rd), *(reg(rs1)) - * else - * reg(rd) = *(reg(rs1)) - */ - - int32_t ret = newval; - - __asm__ __volatile__("casa [%1] " ASI_P ", %2, %0" - : "+r" (ret) - : "r" (addr), "r" (oldval)); - return (ret == oldval); + /* casa [reg(rs1)] %asi, reg(rs2), reg(rd) + * + * if (*(reg(rs1)) == reg(rs2) ) + * swap reg(rd), *(reg(rs1)) + * else + * reg(rd) = *(reg(rs1)) + */ + + int32_t prev = newval; + bool ret; + + __asm__ __volatile__("casa [%1] " ASI_P ", %2, %0" + : "+r" (prev) + : "r" (addr), "r" (*oldval)); + ret = (prev == *oldval); + *oldval = prev; + return ret; } -static inline bool opal_atomic_bool_cmpset_acq_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - bool rc; + bool rc; - rc = opal_atomic_bool_cmpset_32(addr, oldval, newval); - opal_atomic_rmb(); + rc = opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); + opal_atomic_rmb(); - return rc; + return rc; } -static inline bool opal_atomic_bool_cmpset_rel_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - opal_atomic_wmb(); - return opal_atomic_bool_cmpset_32(addr, oldval, newval); + opal_atomic_wmb(); + return opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); } #if OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64 -static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { /* casa [reg(rs1)] %asi, reg(rs2), reg(rd) * @@ -134,18 +136,20 @@ static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, * else * reg(rd) = *(reg(rs1)) */ - int64_t ret = newval; - - __asm__ __volatile__("casxa [%1] " ASI_P ", %2, %0" - : "+r" (ret) - : "r" (addr), "r" (oldval)); - return (ret == oldval); + int64_t prev = newval; + bool ret; + + __asm__ __volatile__("casxa [%1] " ASI_P ", %2, %0" + : "+r" (prev) + : "r" (addr), "r" (*oldval)); + ret = (prev == *oldval); + *oldval = prev; + return ret; } #else /* OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64 */ -static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { /* casa [reg(rs1)] %asi, reg(rs2), reg(rd) * @@ -155,40 +159,41 @@ static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, * reg(rd) = *(reg(rs1)) * */ - long long ret = newval; + int64_t prev = newval; + bool ret; __asm__ __volatile__( "ldx %0, %%g1 \n\t" /* g1 = ret */ "ldx %2, %%g2 \n\t" /* g2 = oldval */ "casxa [%1] " ASI_P ", %%g2, %%g1 \n\t" "stx %%g1, %0 \n" - : "+m"(ret) - : "r"(addr), "m"(oldval) + : "+m"(prev) + : "r"(addr), "m"(*oldval) : "%g1", "%g2" ); - return (ret == oldval); + ret = (prev == *oldval); + *oldval = prev; + return ret; } #endif /* OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64 */ -static inline bool opal_atomic_bool_cmpset_acq_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - bool rc; + bool rc; - rc = opal_atomic_bool_cmpset_64(addr, oldval, newval); - opal_atomic_rmb(); + rc = opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); + opal_atomic_rmb(); - return rc; + return rc; } -static inline bool opal_atomic_bool_cmpset_rel_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - opal_atomic_wmb(); - return opal_atomic_bool_cmpset_64(addr, oldval, newval); + opal_atomic_wmb(); + return opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); } #endif /* OPAL_GCC_INLINE_ASSEMBLY */ diff --git a/opal/include/opal/sys/sync_builtin/atomic.h b/opal/include/opal/sys/sync_builtin/atomic.h index 0a95048079f..4a6cfbfbe06 100644 --- a/opal/include/opal/sys/sync_builtin/atomic.h +++ b/opal/include/opal/sys/sync_builtin/atomic.h @@ -53,119 +53,110 @@ static inline void opal_atomic_wmb(void) * *********************************************************************/ -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 -static inline bool opal_atomic_bool_cmpset_acq_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) -{ - return __sync_bool_compare_and_swap(addr, oldval, newval); -} - +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -static inline bool opal_atomic_bool_cmpset_rel_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { - return __sync_bool_compare_and_swap(addr, oldval, newval);} - -static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) -{ - return __sync_bool_compare_and_swap(addr, oldval, newval); + int32_t prev = __sync_val_compare_and_swap (add, *oldval, newval); + bool ret = prev == *oldval; + *oldval = prev; + return ret; } +#define opal_atomic_compare_exchange_strong_acq_32 opal_atomic_compare_exchange_strong_32 +#define opal_atomic_compare_exchange_strong_rel_32 opal_atomic_compare_exchange_strong_32 + #define OPAL_HAVE_ATOMIC_MATH_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 -static inline int32_t opal_atomic_add_32(volatile int32_t *addr, int32_t delta) +static inline int32_t opal_atomic_fetch_add_32(volatile int32_t *addr, int32_t delta) { - return __sync_add_and_fetch(addr, delta); + return __sync_fetch_and_add(addr, delta); } #define OPAL_HAVE_ATOMIC_AND_32 1 -static inline int32_t opal_atomic_and_32(volatile int32_t *addr, int32_t value) +static inline int32_t opal_atomic_fetch_and_32(volatile int32_t *addr, int32_t value) { - return __sync_and_and_fetch(addr, value); + return __sync_fetch_and_and(addr, value); } #define OPAL_HAVE_ATOMIC_OR_32 1 -static inline int32_t opal_atomic_or_32(volatile int32_t *addr, int32_t value) +static inline int32_t opal_atomic_fetch_or_32(volatile int32_t *addr, int32_t value) { - return __sync_or_and_fetch(addr, value); + return __sync_fetch_and_or(addr, value); } #define OPAL_HAVE_ATOMIC_XOR_32 1 -static inline int32_t opal_atomic_xor_32(volatile int32_t *addr, int32_t value) +static inline int32_t opal_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value) { - return __sync_xor_and_fetch(addr, value); + return __sync_fetch_and_xor(addr, value); } #define OPAL_HAVE_ATOMIC_SUB_32 1 -static inline int32_t opal_atomic_sub_32(volatile int32_t *addr, int32_t delta) +static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t *addr, int32_t delta) { - return __sync_sub_and_fetch(addr, delta); + return __sync_fetch_and_sub(addr, delta); } #if OPAL_ASM_SYNC_HAVE_64BIT -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 -static inline bool opal_atomic_bool_cmpset_acq_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) -{ - return __sync_bool_compare_and_swap(addr, oldval, newval); -} - -static inline bool opal_atomic_bool_cmpset_rel_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) -{ - return __sync_bool_compare_and_swap(addr, oldval, newval);} - +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 -static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { - return __sync_bool_compare_and_swap(addr, oldval, newval); + int64_t prev = __sync_val_compare_and_swap (add, *oldval, newval); + bool ret = prev == *oldval; + *oldval = prev; + return ret; } +#define opal_atomic_compare_exchange_strong_acq_64 opal_atomic_compare_exchange_strong_64 +#define opal_atomic_compare_exchange_strong_rel_64 opal_atomic_compare_exchange_strong_64 + #define OPAL_HAVE_ATOMIC_MATH_64 1 #define OPAL_HAVE_ATOMIC_ADD_64 1 -static inline int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta) +static inline int64_t opal_atomic_fetch_add_64(volatile int64_t *addr, int64_t delta) { - return __sync_add_and_fetch(addr, delta); + return __sync_fetch_and_add(addr, delta); } #define OPAL_HAVE_ATOMIC_AND_64 1 -static inline int64_t opal_atomic_and_64(volatile int64_t *addr, int64_t value) +static inline int64_t opal_atomic_fetch_and_64(volatile int64_t *addr, int64_t value) { - return __sync_and_and_fetch(addr, value); + return __sync_fetch_and_and(addr, value); } #define OPAL_HAVE_ATOMIC_OR_64 1 -static inline int64_t opal_atomic_or_64(volatile int64_t *addr, int64_t value) +static inline int64_t opal_atomic_fetch_or_64(volatile int64_t *addr, int64_t value) { - return __sync_or_and_fetch(addr, value); + return __sync_fetch_and_or(addr, value); } #define OPAL_HAVE_ATOMIC_XOR_64 1 -static inline int64_t opal_atomic_xor_64(volatile int64_t *addr, int64_t value) +static inline int64_t opal_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value) { - return __sync_xor_and_fetch(addr, value); + return __sync_fetch_and_xor(addr, value); } #define OPAL_HAVE_ATOMIC_SUB_64 1 -static inline int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta) +static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t *addr, int64_t delta) { - return __sync_sub_and_fetch(addr, delta); + return __sync_fetch_and_sub(addr, delta); } #endif #if OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128 -static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, - opal_int128_t oldval, opal_int128_t newval) +static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr, + opal_int128_t *oldval, opal_int128_t newval) { - return __sync_bool_compare_and_swap(addr, oldval, newval); + opal_int128_t prev = __sync_val_compare_and_swap (addr, *oldval, newval); + bool ret = prev == *oldval; + *oldval = prev; + return ret; } -#define OPAL_HAVE_ATOMIC_CMPSET_128 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 #endif diff --git a/opal/include/opal/sys/x86_64/atomic.h b/opal/include/opal/sys/x86_64/atomic.h index b56dd939b49..9590ada0816 100644 --- a/opal/include/opal/sys/x86_64/atomic.h +++ b/opal/include/opal/sys/x86_64/atomic.h @@ -40,9 +40,9 @@ *********************************************************************/ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 /********************************************************************** * @@ -82,14 +82,13 @@ static inline void opal_atomic_isync(void) *********************************************************************/ #if OPAL_GCC_INLINE_ASSEMBLY -static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) { unsigned char ret; __asm__ __volatile__ ( SMPLOCK "cmpxchgl %3,%2 \n\t" "sete %0 \n\t" - : "=qm" (ret), "+a" (oldval), "+m" (*addr) + : "=qm" (ret), "+a" (*oldval), "+m" (*addr) : "q"(newval) : "memory", "cc"); @@ -98,19 +97,18 @@ static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr, #endif /* OPAL_GCC_INLINE_ASSEMBLY */ -#define opal_atomic_bool_cmpset_acq_32 opal_atomic_bool_cmpset_32 -#define opal_atomic_bool_cmpset_rel_32 opal_atomic_bool_cmpset_32 +#define opal_atomic_compare_exchange_strong_acq_32 opal_atomic_compare_exchange_strong_32 +#define opal_atomic_compare_exchange_strong_rel_32 opal_atomic_compare_exchange_strong_32 #if OPAL_GCC_INLINE_ASSEMBLY -static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) { unsigned char ret; __asm__ __volatile__ ( SMPLOCK "cmpxchgq %3,%2 \n\t" "sete %0 \n\t" - : "=qm" (ret), "+a" (oldval), "+m" (*((volatile long*)addr)) + : "=qm" (ret), "+a" (*oldval), "+m" (*((volatile long*)addr)) : "q"(newval) : "memory", "cc" ); @@ -120,13 +118,12 @@ static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, #endif /* OPAL_GCC_INLINE_ASSEMBLY */ -#define opal_atomic_bool_cmpset_acq_64 opal_atomic_bool_cmpset_64 -#define opal_atomic_bool_cmpset_rel_64 opal_atomic_bool_cmpset_64 +#define opal_atomic_compare_exchange_strong_acq_64 opal_atomic_compare_exchange_strong_64 +#define opal_atomic_compare_exchange_strong_rel_64 opal_atomic_compare_exchange_strong_64 #if OPAL_GCC_INLINE_ASSEMBLY && OPAL_HAVE_CMPXCHG16B && HAVE_OPAL_INT128_T -static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, opal_int128_t oldval, - opal_int128_t newval) +static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr, opal_int128_t *oldval, opal_int128_t newval) { unsigned char ret; @@ -135,15 +132,14 @@ static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, op * at the address is returned in eax:edx. */ __asm__ __volatile__ (SMPLOCK "cmpxchg16b (%%rsi) \n\t" "sete %0 \n\t" - : "=qm" (ret) - : "S" (addr), "b" (((int64_t *)&newval)[0]), "c" (((int64_t *)&newval)[1]), - "a" (((int64_t *)&oldval)[0]), "d" (((int64_t *)&oldval)[1]) - : "memory", "cc"); + : "=qm" (ret), "+a" (((int64_t *)oldval)[0]), "+d" (((int64_t *)oldval)[1]) + : "S" (addr), "b" (((int64_t *)&newval)[0]), "c" (((int64_t *)&newval)[1]) + : "memory", "cc", "eax", "edx"); return (bool) ret; } -#define OPAL_HAVE_ATOMIC_CMPSET_128 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 #endif /* OPAL_GCC_INLINE_ASSEMBLY */ @@ -200,7 +196,7 @@ static inline int64_t opal_atomic_swap_64( volatile int64_t *addr, * * Atomically adds @i to @v. */ -static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i) +static inline int32_t opal_atomic_fetch_add_32(volatile int32_t* v, int i) { int ret = i; __asm__ __volatile__( @@ -209,7 +205,7 @@ static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i) : :"memory", "cc" ); - return (ret+i); + return ret; } #define OPAL_HAVE_ATOMIC_ADD_64 1 @@ -221,7 +217,7 @@ static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i) * * Atomically adds @i to @v. */ -static inline int64_t opal_atomic_add_64(volatile int64_t* v, int64_t i) +static inline int64_t opal_atomic_fetch_add_64(volatile int64_t* v, int64_t i) { int64_t ret = i; __asm__ __volatile__( @@ -230,7 +226,7 @@ static inline int64_t opal_atomic_add_64(volatile int64_t* v, int64_t i) : :"memory", "cc" ); - return (ret+i); + return ret; } #define OPAL_HAVE_ATOMIC_SUB_32 1 @@ -242,7 +238,7 @@ static inline int64_t opal_atomic_add_64(volatile int64_t* v, int64_t i) * * Atomically subtracts @i from @v. */ -static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i) +static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t* v, int i) { int ret = -i; __asm__ __volatile__( @@ -251,7 +247,7 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i) : :"memory", "cc" ); - return (ret-i); + return ret; } #define OPAL_HAVE_ATOMIC_SUB_64 1 @@ -263,7 +259,7 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i) * * Atomically subtracts @i from @v. */ -static inline int64_t opal_atomic_sub_64(volatile int64_t* v, int64_t i) +static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t* v, int64_t i) { int64_t ret = -i; __asm__ __volatile__( @@ -272,7 +268,7 @@ static inline int64_t opal_atomic_sub_64(volatile int64_t* v, int64_t i) : :"memory", "cc" ); - return (ret-i); + return ret; } #endif /* OPAL_GCC_INLINE_ASSEMBLY */ diff --git a/opal/mca/btl/openib/btl_openib.c b/opal/mca/btl/openib/btl_openib.c index 8ace1e9ad13..dc279df8347 100644 --- a/opal/mca/btl/openib/btl_openib.c +++ b/opal/mca/btl/openib/btl_openib.c @@ -1119,7 +1119,7 @@ int mca_btl_openib_add_procs( } if (nprocs_new) { - opal_atomic_add_32 (&openib_btl->num_peers, nprocs_new); + opal_atomic_add_fetch_32 (&openib_btl->num_peers, nprocs_new); /* adjust cq sizes given the new procs */ rc = openib_btl_size_queues (openib_btl); @@ -1229,7 +1229,7 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul /* this is a new process to this openib btl * account this procs if need */ - opal_atomic_add_32 (&openib_btl->num_peers, 1); + opal_atomic_add_fetch_32 (&openib_btl->num_peers, 1); rc = openib_btl_size_queues(openib_btl); if (OPAL_SUCCESS != rc) { BTL_ERROR(("error creating cqs")); diff --git a/opal/mca/btl/openib/btl_openib_async.c b/opal/mca/btl/openib/btl_openib_async.c index 3662624292e..5c52f9566b1 100644 --- a/opal/mca/btl/openib/btl_openib_async.c +++ b/opal/mca/btl/openib/btl_openib_async.c @@ -237,7 +237,7 @@ static void btl_openib_async_device (int fd, short flags, void *arg) /* Set the flag to fatal */ device->got_fatal_event = true; /* It is not critical to protect the counter */ - OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1); + OPAL_THREAD_ADD_FETCH32(&mca_btl_openib_component.error_counter, 1); /* fall through */ case IBV_EVENT_CQ_ERR: case IBV_EVENT_QP_FATAL: @@ -280,7 +280,7 @@ static void btl_openib_async_device (int fd, short flags, void *arg) openib_event_to_str((enum ibv_event_type)event_type)); /* Set the flag to indicate port error */ device->got_port_event = true; - OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1); + OPAL_THREAD_ADD_FETCH32(&mca_btl_openib_component.error_counter, 1); break; case IBV_EVENT_COMM_EST: case IBV_EVENT_PORT_ACTIVE: @@ -470,7 +470,7 @@ void mca_btl_openib_async_fini (void) void mca_btl_openib_async_add_device (mca_btl_openib_device_t *device) { if (mca_btl_openib_component.async_evbase) { - if (1 == OPAL_THREAD_ADD32 (&btl_openib_async_device_count, 1)) { + if (1 == OPAL_THREAD_ADD_FETCH32 (&btl_openib_async_device_count, 1)) { mca_btl_openib_async_init (); } opal_event_set (mca_btl_openib_component.async_evbase, &device->async_event, @@ -484,7 +484,7 @@ void mca_btl_openib_async_rem_device (mca_btl_openib_device_t *device) { if (mca_btl_openib_component.async_evbase) { opal_event_del (&device->async_event); - if (0 == OPAL_THREAD_ADD32 (&btl_openib_async_device_count, -1)) { + if (0 == OPAL_THREAD_ADD_FETCH32 (&btl_openib_async_device_count, -1)) { mca_btl_openib_async_fini (); } } diff --git a/opal/mca/btl/openib/btl_openib_component.c b/opal/mca/btl/openib/btl_openib_component.c index f6d3643306e..a847ac7d18f 100644 --- a/opal/mca/btl/openib/btl_openib_component.c +++ b/opal/mca/btl/openib/btl_openib_component.c @@ -3203,7 +3203,7 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl, credits = hdr->credits; if(hdr->cm_seen) - OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen); + OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen); /* Now return fragment. Don't touch hdr after this point! */ if(MCA_BTL_OPENIB_RDMA_FRAG(frag)) { @@ -3215,7 +3215,7 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl, tf = MCA_BTL_OPENIB_GET_LOCAL_RDMA_FRAG(ep, erl->tail); if(MCA_BTL_OPENIB_RDMA_FRAG_LOCAL(tf)) break; - OPAL_THREAD_ADD32(&erl->credits, 1); + OPAL_THREAD_ADD_FETCH32(&erl->credits, 1); MCA_BTL_OPENIB_RDMA_NEXT_INDEX(erl->tail); } OPAL_THREAD_UNLOCK(&erl->lock); @@ -3233,14 +3233,14 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl, MCA_BTL_IB_FRAG_RETURN(frag); if (BTL_OPENIB_QP_TYPE_PP(rqp)) { if (OPAL_UNLIKELY(is_credit_msg)) { - OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.cm_received, 1); + OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.cm_received, 1); } else { - OPAL_THREAD_ADD32(&ep->qps[rqp].u.pp_qp.rd_posted, -1); + OPAL_THREAD_ADD_FETCH32(&ep->qps[rqp].u.pp_qp.rd_posted, -1); } mca_btl_openib_endpoint_post_rr(ep, cqp); } else { mca_btl_openib_module_t *btl = ep->endpoint_btl; - OPAL_THREAD_ADD32(&btl->qps[rqp].u.srq_qp.rd_posted, -1); + OPAL_THREAD_ADD_FETCH32(&btl->qps[rqp].u.srq_qp.rd_posted, -1); mca_btl_openib_post_srr(btl, rqp); } } @@ -3251,10 +3251,10 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl, /* If we got any credits (RDMA or send), then try to progress all the no_credits_pending_frags lists */ if (rcredits > 0) { - OPAL_THREAD_ADD32(&ep->eager_rdma_remote.tokens, rcredits); + OPAL_THREAD_ADD_FETCH32(&ep->eager_rdma_remote.tokens, rcredits); } if (credits > 0) { - OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.sd_credits, credits); + OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.sd_credits, credits); } if (rcredits + credits > 0) { int rc; @@ -3303,7 +3303,7 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl, credits = hdr->credits; if(hdr->cm_seen) - OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen); + OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen); /* We should not be here with eager, control, or credit messages */ assert(openib_frag_type(frag) != MCA_BTL_OPENIB_FRAG_EAGER_RDMA); @@ -3314,11 +3314,11 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl, /* Otherwise, FRAG_RETURN it and repost if necessary */ MCA_BTL_IB_FRAG_RETURN(frag); if (BTL_OPENIB_QP_TYPE_PP(rqp)) { - OPAL_THREAD_ADD32(&ep->qps[rqp].u.pp_qp.rd_posted, -1); + OPAL_THREAD_ADD_FETCH32(&ep->qps[rqp].u.pp_qp.rd_posted, -1); mca_btl_openib_endpoint_post_rr(ep, cqp); } else { mca_btl_openib_module_t *btl = ep->endpoint_btl; - OPAL_THREAD_ADD32(&btl->qps[rqp].u.srq_qp.rd_posted, -1); + OPAL_THREAD_ADD_FETCH32(&btl->qps[rqp].u.srq_qp.rd_posted, -1); mca_btl_openib_post_srr(btl, rqp); } @@ -3327,10 +3327,10 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl, /* If we got any credits (RDMA or send), then try to progress all the no_credits_pending_frags lists */ if (rcredits > 0) { - OPAL_THREAD_ADD32(&ep->eager_rdma_remote.tokens, rcredits); + OPAL_THREAD_ADD_FETCH32(&ep->eager_rdma_remote.tokens, rcredits); } if (credits > 0) { - OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.sd_credits, credits); + OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.sd_credits, credits); } if (rcredits + credits > 0) { int rc; @@ -3523,7 +3523,7 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq, case IBV_WC_FETCH_ADD: OPAL_OUTPUT((-1, "Got WC: RDMA_READ or RDMA_WRITE")); - OPAL_THREAD_ADD32(&endpoint->get_tokens, 1); + OPAL_THREAD_ADD_FETCH32(&endpoint->get_tokens, 1); mca_btl_openib_get_frag_t *get_frag = to_get_frag(des); @@ -3575,7 +3575,7 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq, n = qp_frag_to_wqe(endpoint, qp, to_com_frag(des)); if(IBV_WC_SEND == wc->opcode && !BTL_OPENIB_QP_TYPE_PP(qp)) { - OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1+n); + OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1+n); /* new SRQ credit available. Try to progress pending frags*/ progress_pending_frags_srq(openib_btl, qp); @@ -3601,7 +3601,7 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq, wc->byte_len < mca_btl_openib_component.eager_limit && openib_btl->eager_rdma_channels < mca_btl_openib_component.max_eager_rdma && - OPAL_THREAD_ADD32(&endpoint->eager_recv_count, 1) == + OPAL_THREAD_ADD_FETCH32(&endpoint->eager_recv_count, 1) == mca_btl_openib_component.eager_rdma_threshold) { mca_btl_openib_endpoint_connect_eager_rdma(endpoint); } @@ -3934,7 +3934,7 @@ int mca_btl_openib_post_srr(mca_btl_openib_module_t* openib_btl, const int qp) if(OPAL_LIKELY(0 == rc)) { struct ibv_srq_attr srq_attr; - OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.rd_posted, num_post); + OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.rd_posted, num_post); if(true == openib_btl->qps[qp].u.srq_qp.srq_limit_event_flag) { srq_attr.max_wr = openib_btl->qps[qp].u.srq_qp.rd_curr_num; diff --git a/opal/mca/btl/openib/btl_openib_eager_rdma.h b/opal/mca/btl/openib/btl_openib_eager_rdma.h index 0ba5a030d4c..5acb038177f 100644 --- a/opal/mca/btl/openib/btl_openib_eager_rdma.h +++ b/opal/mca/btl/openib/btl_openib_eager_rdma.h @@ -96,7 +96,7 @@ typedef struct mca_btl_openib_eager_rdma_remote_t mca_btl_openib_eager_rdma_remo #define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD, SEQ) \ do { \ - (SEQ) = OPAL_THREAD_ADD32(&(HEAD), 1) - 1; \ + (SEQ) = OPAL_THREAD_ADD_FETCH32(&(HEAD), 1) - 1; \ (OLD_HEAD) = (SEQ) % mca_btl_openib_component.eager_rdma_num; \ } while(0) @@ -108,7 +108,7 @@ typedef struct mca_btl_openib_eager_rdma_remote_t mca_btl_openib_eager_rdma_remo #define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD) \ do { \ - (OLD_HEAD) = (OPAL_THREAD_ADD32(&(HEAD), 1) - 1) % mca_btl_openib_component.eager_rdma_num; \ + (OLD_HEAD) = (OPAL_THREAD_ADD_FETCH32(&(HEAD), 1) - 1) % mca_btl_openib_component.eager_rdma_num; \ } while(0) #endif diff --git a/opal/mca/btl/openib/btl_openib_endpoint.c b/opal/mca/btl/openib/btl_openib_endpoint.c index eaefb7e66b4..be01664b1c3 100644 --- a/opal/mca/btl/openib/btl_openib_endpoint.c +++ b/opal/mca/btl/openib/btl_openib_endpoint.c @@ -212,7 +212,7 @@ endpoint_init_qp_xrc(mca_btl_base_endpoint_t *ep, const int qp) qp_attr.cap.max_recv_sge = 1; /* we do not use SG list */ rc = ibv_modify_qp (ep_qp->qp->lcl_qp, &qp_attr, IBV_QP_CAP); if (0 == rc) { - opal_atomic_add_32 (&ep_qp->qp->sd_wqe, incr); + opal_atomic_add_fetch_32 (&ep_qp->qp->sd_wqe, incr); } } else { ep_qp->qp->sd_wqe = ep->ib_addr->max_wqe; @@ -373,11 +373,12 @@ static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint) /* Release memory resources */ do { + void *_tmp_ptr = NULL; /* Make sure that mca_btl_openib_endpoint_connect_eager_rdma () * was not in "connect" or "bad" flow (failed to allocate memory) * and changed the pointer back to NULL */ - if(!opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL, (void*)1)) { + if(!opal_atomic_compare_exchange_strong_ptr(&endpoint->eager_rdma_local.base.pval, (void *) &_tmp_ptr, (void *) 1)) { if (NULL != endpoint->eager_rdma_local.reg) { endpoint->endpoint_btl->device->rcache->rcache_deregister (endpoint->endpoint_btl->device->rcache, &endpoint->eager_rdma_local.reg->base); @@ -766,9 +767,9 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint, if(OPAL_SUCCESS == acquire_eager_rdma_send_credit(endpoint)) { do_rdma = true; } else { - if(OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, 1) > + if(OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_sent, 1) > (mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv - 1)) { - OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1); + OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1); BTL_OPENIB_CREDITS_SEND_UNLOCK(endpoint, qp); return; } @@ -781,7 +782,7 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint, if(cm_return > 255) { frag->hdr->cm_seen = 255; cm_return -= 255; - OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return); + OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return); } else { frag->hdr->cm_seen = cm_return; } @@ -802,14 +803,14 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint, BTL_OPENIB_RDMA_CREDITS_HEADER_NTOH(*credits_hdr); } BTL_OPENIB_CREDITS_SEND_UNLOCK(endpoint, qp); - OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.rd_credits, + OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.rd_credits, frag->hdr->credits); - OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits, + OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_local.credits, credits_hdr->rdma_credits); if(do_rdma) - OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1); + OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, 1); else - OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1); + OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1); BTL_ERROR(("error posting send request errno %d says %s", rc, strerror(errno))); @@ -823,7 +824,7 @@ static void mca_btl_openib_endpoint_eager_rdma_connect_cb( int status) { mca_btl_openib_device_t *device = endpoint->endpoint_btl->device; - OPAL_THREAD_ADD32(&device->non_eager_rdma_endpoints, -1); + OPAL_THREAD_ADD_FETCH32(&device->non_eager_rdma_endpoints, -1); assert(device->non_eager_rdma_endpoints >= 0); MCA_BTL_IB_FRAG_RETURN(descriptor); } @@ -894,12 +895,14 @@ void mca_btl_openib_endpoint_connect_eager_rdma( mca_btl_openib_recv_frag_t *headers_buf; int i, rc; uint32_t flag = MCA_RCACHE_FLAGS_CACHE_BYPASS; + void *_tmp_ptr = NULL; /* Set local rdma pointer to 1 temporarily so other threads will not try * to enter the function */ - if(!opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL, - (void*)1)) + if(!opal_atomic_compare_exchange_strong_ptr (&endpoint->eager_rdma_local.base.pval, (void *) &_tmp_ptr, + (void *) 1)) { return; + } headers_buf = (mca_btl_openib_recv_frag_t*) malloc(sizeof(mca_btl_openib_recv_frag_t) * @@ -975,22 +978,23 @@ void mca_btl_openib_endpoint_connect_eager_rdma( endpoint->eager_rdma_local.rd_win?endpoint->eager_rdma_local.rd_win:1; /* set local rdma pointer to real value */ - (void)opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, - (void*)1, buf); + endpoint->eager_rdma_local.base.pval = buf; endpoint->eager_rdma_local.alloc_base = alloc_base; if(mca_btl_openib_endpoint_send_eager_rdma(endpoint) == OPAL_SUCCESS) { mca_btl_openib_device_t *device = endpoint->endpoint_btl->device; mca_btl_openib_endpoint_t **p; + void *_tmp_ptr; OBJ_RETAIN(endpoint); assert(((opal_object_t*)endpoint)->obj_reference_count == 2); do { + _tmp_ptr = NULL; p = &device->eager_rdma_buffers[device->eager_rdma_buffers_count]; - } while(!opal_atomic_bool_cmpset_ptr(p, NULL, endpoint)); + } while(!opal_atomic_compare_exchange_strong_ptr (p, (void *) &_tmp_ptr, endpoint)); - OPAL_THREAD_ADD32(&openib_btl->eager_rdma_channels, 1); + OPAL_THREAD_ADD_FETCH32(&openib_btl->eager_rdma_channels, 1); /* from this point progress function starts to poll new buffer */ - OPAL_THREAD_ADD32(&device->eager_rdma_buffers_count, 1); + OPAL_THREAD_ADD_FETCH32(&device->eager_rdma_buffers_count, 1); return; } @@ -1001,8 +1005,7 @@ void mca_btl_openib_endpoint_connect_eager_rdma( free(headers_buf); unlock_rdma_local: /* set local rdma pointer back to zero. Will retry later */ - (void)opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, - endpoint->eager_rdma_local.base.pval, NULL); + endpoint->eager_rdma_local.base.pval = NULL; endpoint->eager_rdma_local.frags = NULL; } diff --git a/opal/mca/btl/openib/btl_openib_endpoint.h b/opal/mca/btl/openib/btl_openib_endpoint.h index f580476abdb..89c42c595e5 100644 --- a/opal/mca/btl/openib/btl_openib_endpoint.h +++ b/opal/mca/btl/openib/btl_openib_endpoint.h @@ -277,19 +277,19 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_endpoint_t); static inline int32_t qp_get_wqe(mca_btl_openib_endpoint_t *ep, const int qp) { - return OPAL_THREAD_ADD32(&ep->qps[qp].qp->sd_wqe, -1); + return OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].qp->sd_wqe, -1); } static inline int32_t qp_put_wqe(mca_btl_openib_endpoint_t *ep, const int qp) { - return OPAL_THREAD_ADD32(&ep->qps[qp].qp->sd_wqe, 1); + return OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].qp->sd_wqe, 1); } static inline int32_t qp_inc_inflight_wqe(mca_btl_openib_endpoint_t *ep, const int qp, mca_btl_openib_com_frag_t *frag) { frag->n_wqes_inflight = 0; - return OPAL_THREAD_ADD32(&ep->qps[qp].qp->sd_wqe_inflight, 1); + return OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].qp->sd_wqe_inflight, 1); } static inline void qp_inflight_wqe_to_frag(mca_btl_openib_endpoint_t *ep, const int qp, mca_btl_openib_com_frag_t *frag) @@ -303,7 +303,7 @@ static inline int qp_frag_to_wqe(mca_btl_openib_endpoint_t *ep, const int qp, mc { int n; n = frag->n_wqes_inflight; - OPAL_THREAD_ADD32(&ep->qps[qp].qp->sd_wqe, n); + OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].qp->sd_wqe, n); frag->n_wqes_inflight = 0; return n; @@ -420,15 +420,15 @@ static inline int mca_btl_openib_endpoint_post_rr_nolock( if((rc = post_recvs(ep, qp, num_post)) != OPAL_SUCCESS) { return rc; } - OPAL_THREAD_ADD32(&ep->qps[qp].u.pp_qp.rd_posted, num_post); - OPAL_THREAD_ADD32(&ep->qps[qp].u.pp_qp.rd_credits, num_post); + OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].u.pp_qp.rd_posted, num_post); + OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].u.pp_qp.rd_credits, num_post); /* post buffers for credit management on credit management qp */ if((rc = post_recvs(ep, cqp, cm_received)) != OPAL_SUCCESS) { return rc; } - OPAL_THREAD_ADD32(&ep->qps[qp].u.pp_qp.cm_return, cm_received); - OPAL_THREAD_ADD32(&ep->qps[qp].u.pp_qp.cm_received, -cm_received); + OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].u.pp_qp.cm_return, cm_received); + OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].u.pp_qp.cm_received, -cm_received); assert(ep->qps[qp].u.pp_qp.rd_credits <= rd_num && ep->qps[qp].u.pp_qp.rd_credits >= 0); @@ -446,14 +446,16 @@ static inline int mca_btl_openib_endpoint_post_rr( return ret; } -#define BTL_OPENIB_CREDITS_SEND_TRYLOCK(E, Q) \ - OPAL_ATOMIC_BOOL_CMPSET_32(&(E)->qps[(Q)].rd_credit_send_lock, 0, 1) -#define BTL_OPENIB_CREDITS_SEND_UNLOCK(E, Q) \ - OPAL_ATOMIC_BOOL_CMPSET_32(&(E)->qps[(Q)].rd_credit_send_lock, 1, 0) -#define BTL_OPENIB_GET_CREDITS(FROM, TO) \ - do { \ - TO = FROM; \ - } while(0 == OPAL_ATOMIC_BOOL_CMPSET_32(&FROM, TO, 0)) +static inline __opal_attribute_always_inline__ bool btl_openib_credits_send_trylock (mca_btl_openib_endpoint_t *ep, int qp) +{ + int32_t _tmp_value = 0; + return OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32(&ep->qps[qp].rd_credit_send_lock, &_tmp_value, 1); +} + +#define BTL_OPENIB_CREDITS_SEND_UNLOCK(E, Q) \ + OPAL_ATOMIC_SWAP_32 (&(E)->qps[(Q)].rd_credit_send_lock, 0) +#define BTL_OPENIB_GET_CREDITS(FROM, TO) \ + TO = OPAL_ATOMIC_SWAP_32(&FROM, 0) static inline bool check_eager_rdma_credits(const mca_btl_openib_endpoint_t *ep) @@ -486,7 +488,7 @@ static inline void send_credits(mca_btl_openib_endpoint_t *ep, int qp) return; try_send: - if(BTL_OPENIB_CREDITS_SEND_TRYLOCK(ep, qp)) + if(btl_openib_credits_send_trylock(ep, qp)) mca_btl_openib_endpoint_send_credits(ep, qp); } @@ -530,8 +532,8 @@ ib_send_flags(uint32_t size, mca_btl_openib_endpoint_qp_t *qp, int do_signal) static inline int acquire_eager_rdma_send_credit(mca_btl_openib_endpoint_t *endpoint) { - if(OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, -1) < 0) { - OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1); + if(OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, -1) < 0) { + OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, 1); return OPAL_ERR_OUT_OF_RESOURCE; } @@ -636,8 +638,8 @@ static inline int mca_btl_openib_endpoint_credit_acquire (struct mca_btl_base_en prio = !prio; if (BTL_OPENIB_QP_TYPE_PP(qp)) { - if (OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits, -1) < 0) { - OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1); + if (OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.sd_credits, -1) < 0) { + OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1); if (queue_frag) { opal_list_append(&endpoint->qps[qp].no_credits_pending_frags[prio], (opal_list_item_t *)frag); @@ -646,8 +648,8 @@ static inline int mca_btl_openib_endpoint_credit_acquire (struct mca_btl_base_en return OPAL_ERR_OUT_OF_RESOURCE; } } else { - if(OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, -1) < 0) { - OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1); + if(OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.sd_credits, -1) < 0) { + OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1); if (queue_frag) { OPAL_THREAD_LOCK(&openib_btl->ib_lock); opal_list_append(&openib_btl->qps[qp].u.srq_qp.pending_frags[prio], @@ -682,7 +684,7 @@ static inline int mca_btl_openib_endpoint_credit_acquire (struct mca_btl_base_en if(cm_return > 255) { hdr->cm_seen = 255; cm_return -= 255; - OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return); + OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return); } else { hdr->cm_seen = cm_return; } @@ -697,18 +699,18 @@ static inline void mca_btl_openib_endpoint_credit_release (struct mca_btl_base_e mca_btl_openib_header_t *hdr = frag->hdr; if (BTL_OPENIB_IS_RDMA_CREDITS(hdr->credits)) { - OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits, BTL_OPENIB_CREDITS(hdr->credits)); + OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_local.credits, BTL_OPENIB_CREDITS(hdr->credits)); } if (do_rdma) { - OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1); + OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, 1); } else { if(BTL_OPENIB_QP_TYPE_PP(qp)) { - OPAL_THREAD_ADD32 (&endpoint->qps[qp].u.pp_qp.rd_credits, hdr->credits); - OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1); + OPAL_THREAD_ADD_FETCH32 (&endpoint->qps[qp].u.pp_qp.rd_credits, hdr->credits); + OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1); } else if BTL_OPENIB_QP_TYPE_SRQ(qp){ mca_btl_openib_module_t *openib_btl = endpoint->endpoint_btl; - OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1); + OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1); } } } diff --git a/opal/mca/btl/openib/btl_openib_get.c b/opal/mca/btl/openib/btl_openib_get.c index c8bc78105db..6dc73bc6e4c 100644 --- a/opal/mca/btl/openib/btl_openib_get.c +++ b/opal/mca/btl/openib/btl_openib_get.c @@ -148,9 +148,9 @@ int mca_btl_openib_get_internal (mca_btl_base_module_t *btl, struct mca_btl_base } /* check for a get token */ - if (OPAL_THREAD_ADD32(&ep->get_tokens,-1) < 0) { + if (OPAL_THREAD_ADD_FETCH32(&ep->get_tokens,-1) < 0) { qp_put_wqe(ep, qp); - OPAL_THREAD_ADD32(&ep->get_tokens,1); + OPAL_THREAD_ADD_FETCH32(&ep->get_tokens,1); return OPAL_ERR_OUT_OF_RESOURCE; } @@ -159,7 +159,7 @@ int mca_btl_openib_get_internal (mca_btl_base_module_t *btl, struct mca_btl_base if (ibv_post_send(ep->qps[qp].qp->lcl_qp, &frag->sr_desc, &bad_wr)) { qp_put_wqe(ep, qp); - OPAL_THREAD_ADD32(&ep->get_tokens,1); + OPAL_THREAD_ADD_FETCH32(&ep->get_tokens,1); return OPAL_ERROR; } diff --git a/opal/mca/btl/portals4/btl_portals4.c b/opal/mca/btl/portals4/btl_portals4.c index b4504d502ce..74cc3af7c41 100644 --- a/opal/mca/btl/portals4/btl_portals4.c +++ b/opal/mca/btl/portals4/btl_portals4.c @@ -423,7 +423,7 @@ mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base, curr_proc, &btl_peer_data[i]); - OPAL_THREAD_ADD32(&portals4_btl->portals_num_procs, 1); + OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_num_procs, 1); /* and here we can reach */ opal_bitmap_set_bit(reachable, i); @@ -476,7 +476,7 @@ mca_btl_portals4_del_procs(struct mca_btl_base_module_t *btl, portals4 entry in proc_endpoints instead of the peer_data */ for (i = 0 ; i < nprocs ; ++i) { free(btl_peer_data[i]); - OPAL_THREAD_ADD32(&portals4_btl->portals_num_procs, -1); + OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_num_procs, -1); } if (0 == portals4_btl->portals_num_procs) @@ -537,7 +537,7 @@ mca_btl_portals4_free(struct mca_btl_base_module_t* btl_base, if (frag->me_h != PTL_INVALID_HANDLE) { frag->me_h = PTL_INVALID_HANDLE; } - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); + OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1); OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "mca_btl_portals4_free: Decrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops)); OPAL_BTL_PORTALS4_FRAG_RETURN_USER(portals4_btl, frag); @@ -622,7 +622,7 @@ mca_btl_portals4_register_mem(mca_btl_base_module_t *btl_base, return NULL; } - handle->key = OPAL_THREAD_ADD64(&(portals4_btl->portals_rdma_key), 1); + handle->key = OPAL_THREAD_ADD_FETCH64(&(portals4_btl->portals_rdma_key), 1); handle->remote_offset = 0; OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, @@ -662,7 +662,7 @@ mca_btl_portals4_register_mem(mca_btl_base_module_t *btl_base, opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlMEAppend failed: %d\n", __FILE__, __LINE__, ret); - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); + OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1); return NULL; } OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, diff --git a/opal/mca/btl/portals4/btl_portals4_component.c b/opal/mca/btl/portals4/btl_portals4_component.c index eda9cd81f70..a56236d3e9f 100644 --- a/opal/mca/btl/portals4/btl_portals4_component.c +++ b/opal/mca/btl/portals4/btl_portals4_component.c @@ -609,7 +609,7 @@ mca_btl_portals4_component_progress(void) mca_btl_portals4_free(&portals4_btl->super, &frag->base); } if (0 != frag->size) { - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); + OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1); OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PTL_EVENT_SEND: Decrementing portals_outstanding_ops=%d (1)\n", portals4_btl->portals_outstanding_ops)); @@ -646,7 +646,7 @@ mca_btl_portals4_component_progress(void) } if (0 != frag->size) { - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); + OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1); OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PTL_EVENT_ACK: Decrementing portals_outstanding_ops=%d (2)\n", portals4_btl->portals_outstanding_ops)); } @@ -749,7 +749,7 @@ mca_btl_portals4_component_progress(void) OPAL_SUCCESS); OPAL_BTL_PORTALS4_FRAG_RETURN_USER(&portals4_btl->super, frag); - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); + OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1); OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PTL_EVENT_REPLY: Decrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops)); goto done; diff --git a/opal/mca/btl/portals4/btl_portals4_rdma.c b/opal/mca/btl/portals4/btl_portals4_rdma.c index 33fb9ab326e..9237b30fce2 100644 --- a/opal/mca/btl/portals4/btl_portals4_rdma.c +++ b/opal/mca/btl/portals4/btl_portals4_rdma.c @@ -53,16 +53,16 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base, int ret; /* reserve space in the event queue for rdma operations immediately */ - while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) > + while (OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, 1) > portals4_btl->portals_max_outstanding_ops) { - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); + OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1); OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (1)\n")); mca_btl_portals4_component_progress(); } OPAL_BTL_PORTALS4_FRAG_ALLOC_USER(portals4_btl, frag); if (NULL == frag){ - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); + OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1); return OPAL_ERROR; } OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, diff --git a/opal/mca/btl/portals4/btl_portals4_send.c b/opal/mca/btl/portals4/btl_portals4_send.c index 1f50fb2ef58..218ed877803 100644 --- a/opal/mca/btl/portals4/btl_portals4_send.c +++ b/opal/mca/btl/portals4/btl_portals4_send.c @@ -49,9 +49,9 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base, BTL_PORTALS4_SET_SEND_BITS(match_bits, 0, 0, tag, msglen_type); /* reserve space in the event queue for rdma operations immediately */ - while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) > + while (OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, 1) > portals4_btl->portals_max_outstanding_ops) { - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); + OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1); OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (4)\n")); mca_btl_portals4_component_progress(); diff --git a/opal/mca/btl/smcuda/btl_smcuda.c b/opal/mca/btl/smcuda/btl_smcuda.c index 03d3a6a116a..561585ea4bf 100644 --- a/opal/mca/btl/smcuda/btl_smcuda.c +++ b/opal/mca/btl/smcuda/btl_smcuda.c @@ -636,7 +636,7 @@ int mca_btl_smcuda_add_procs( /* Sync with other local procs. Force the FIFO initialization to always * happens before the readers access it. */ - (void)opal_atomic_add_32(&mca_btl_smcuda_component.sm_seg->module_seg->seg_inited, 1); + (void)opal_atomic_add_fetch_32(&mca_btl_smcuda_component.sm_seg->module_seg->seg_inited, 1); while( n_local_procs > mca_btl_smcuda_component.sm_seg->module_seg->seg_inited) { opal_progress(); @@ -976,7 +976,7 @@ int mca_btl_smcuda_sendi( struct mca_btl_base_module_t* btl, * the return code indicates failure, the write has still "completed" from * our point of view: it has been posted to a "pending send" queue. */ - OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1); + OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1); MCA_BTL_SMCUDA_FIFO_WRITE(endpoint, endpoint->my_smp_rank, endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc); (void)rc; /* this is safe to ignore as the message is requeued till success */ @@ -1026,7 +1026,7 @@ int mca_btl_smcuda_send( struct mca_btl_base_module_t* btl, * post the descriptor in the queue - post with the relative * address */ - OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1); + OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1); MCA_BTL_SMCUDA_FIFO_WRITE(endpoint, endpoint->my_smp_rank, endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc); if( OPAL_LIKELY(0 == rc) ) { @@ -1241,7 +1241,7 @@ static void mca_btl_smcuda_send_cuda_ipc_request(struct mca_btl_base_module_t* b * the return code indicates failure, the write has still "completed" from * our point of view: it has been posted to a "pending send" queue. */ - OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1); + OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1); opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output, "Sending CUDA IPC REQ (try=%d): myrank=%d, mydev=%d, peerrank=%d", endpoint->ipctries, diff --git a/opal/mca/btl/smcuda/btl_smcuda_component.c b/opal/mca/btl/smcuda/btl_smcuda_component.c index 8aedf9f1d7a..d77398a9965 100644 --- a/opal/mca/btl/smcuda/btl_smcuda_component.c +++ b/opal/mca/btl/smcuda/btl_smcuda_component.c @@ -658,7 +658,7 @@ static void mca_btl_smcuda_send_cuda_ipc_ack(struct mca_btl_base_module_t* btl, * the return code indicates failure, the write has still "completed" from * our point of view: it has been posted to a "pending send" queue. */ - OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1); + OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1); MCA_BTL_SMCUDA_FIFO_WRITE(endpoint, endpoint->my_smp_rank, endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc); @@ -980,7 +980,7 @@ void btl_smcuda_process_pending_sends(struct mca_btl_base_endpoint_t *ep) if(NULL == si) return; /* Another thread got in before us. Thats ok. */ - OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_pending_sends, -1); + OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_pending_sends, -1); MCA_BTL_SMCUDA_FIFO_WRITE(ep, ep->my_smp_rank, ep->peer_smp_rank, si->data, true, false, rc); @@ -1093,7 +1093,7 @@ int mca_btl_smcuda_component_progress(void) if( btl_ownership ) { MCA_BTL_SMCUDA_FRAG_RETURN(frag); } - OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, -1); + OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, -1); if ( 0 < opal_list_get_size(&endpoint->pending_sends) ) { btl_smcuda_process_pending_sends(endpoint); } diff --git a/opal/mca/btl/smcuda/btl_smcuda_fifo.h b/opal/mca/btl/smcuda/btl_smcuda_fifo.h index 7fcf2c1c98c..c4db00d10a8 100644 --- a/opal/mca/btl/smcuda/btl_smcuda_fifo.h +++ b/opal/mca/btl/smcuda/btl_smcuda_fifo.h @@ -40,7 +40,7 @@ add_pending(struct mca_btl_base_endpoint_t *ep, void *data, bool resend) si = (btl_smcuda_pending_send_item_t*)i; si->data = data; - OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_pending_sends, +1); + OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_pending_sends, +1); /* if data was on pending send list then prepend it to the list to * minimize reordering */ diff --git a/opal/mca/btl/ugni/btl_ugni_add_procs.c b/opal/mca/btl/ugni/btl_ugni_add_procs.c index e96e12e6ba9..6a2fa9b81e2 100644 --- a/opal/mca/btl/ugni/btl_ugni_add_procs.c +++ b/opal/mca/btl/ugni/btl_ugni_add_procs.c @@ -272,7 +272,7 @@ static int ugni_reg_mem (void *reg_data, void *base, size_t size, rc = mca_btl_ugni_reg_mem (ugni_module, base, size, (mca_btl_ugni_reg_t *) reg, cq, flags); if (OPAL_LIKELY(OPAL_SUCCESS == rc)) { - opal_atomic_add_32(&ugni_module->reg_count,1); + opal_atomic_add_fetch_32(&ugni_module->reg_count,1); } return rc; @@ -286,7 +286,7 @@ ugni_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg) rc = mca_btl_ugni_dereg_mem (ugni_module, (mca_btl_ugni_reg_t *) reg); if (OPAL_LIKELY(OPAL_SUCCESS == rc)) { - opal_atomic_add_32(&ugni_module->reg_count,-1); + opal_atomic_add_fetch_32(&ugni_module->reg_count,-1); } return rc; diff --git a/opal/mca/btl/ugni/btl_ugni_component.c b/opal/mca/btl/ugni/btl_ugni_component.c index 86eb252973d..cafcdabfc37 100644 --- a/opal/mca/btl/ugni/btl_ugni_component.c +++ b/opal/mca/btl/ugni/btl_ugni_component.c @@ -543,7 +543,7 @@ int mca_btl_ugni_progress_datagram (mca_btl_ugni_device_t *device) BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep)); ep->dg_posted = false; - (void) opal_atomic_add_32 (&ugni_module->active_datagrams, -1); + (void) opal_atomic_add_fetch_32 (&ugni_module->active_datagrams, -1); } (void) mca_btl_ugni_ep_connect_progress (ep); diff --git a/opal/mca/btl/ugni/btl_ugni_endpoint.c b/opal/mca/btl/ugni/btl_ugni_endpoint.c index 04d99349322..2f792839982 100644 --- a/opal/mca/btl/ugni/btl_ugni_endpoint.c +++ b/opal/mca/btl/ugni/btl_ugni_endpoint.c @@ -181,7 +181,7 @@ int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnec } } while (device->dev_smsg_local_cq.active_operations); - (void) opal_atomic_add_32 (&ep->smsg_ep_handle->device->smsg_connections, -1); + (void) opal_atomic_add_fetch_32 (&ep->smsg_ep_handle->device->smsg_connections, -1); } mca_btl_ugni_device_lock (device); @@ -278,7 +278,7 @@ static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) { ep->rmt_irq_mem_hndl = ep->remote_attr->rmt_irq_mem_hndl; ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTED; - (void) opal_atomic_add_32 (&ep->smsg_ep_handle->device->smsg_connections, 1); + (void) opal_atomic_add_fetch_32 (&ep->smsg_ep_handle->device->smsg_connections, 1); /* send all pending messages */ BTL_VERBOSE(("endpoint connected. posting %u sends", (unsigned int) opal_list_get_size (&ep->frag_wait_list))); @@ -312,7 +312,7 @@ static int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep) ep->remote_attr, sizeof (*ep->remote_attr), MCA_BTL_UGNI_CONNECT_DIRECTED_ID | ep->index); if (OPAL_LIKELY(GNI_RC_SUCCESS == rc)) { - (void) opal_atomic_add_32 (&ugni_module->active_datagrams, 1); + (void) opal_atomic_add_fetch_32 (&ugni_module->active_datagrams, 1); } return mca_btl_rc_ugni_to_opal (rc); diff --git a/opal/mca/btl/ugni/btl_ugni_frag.h b/opal/mca/btl/ugni/btl_ugni_frag.h index bb8a58cbc8b..ac9c8bc6ec8 100644 --- a/opal/mca/btl/ugni/btl_ugni_frag.h +++ b/opal/mca/btl/ugni/btl_ugni_frag.h @@ -192,7 +192,7 @@ static inline bool mca_btl_ugni_frag_del_ref (mca_btl_ugni_base_frag_t *frag, in opal_atomic_mb (); - ref_cnt = OPAL_THREAD_ADD32(&frag->ref_cnt, -1); + ref_cnt = OPAL_THREAD_ADD_FETCH32(&frag->ref_cnt, -1); if (ref_cnt) { assert (ref_cnt > 0); return false; diff --git a/opal/mca/btl/ugni/btl_ugni_smsg.c b/opal/mca/btl/ugni/btl_ugni_smsg.c index bc8858baec5..b90c95a6a9e 100644 --- a/opal/mca/btl/ugni/btl_ugni_smsg.c +++ b/opal/mca/btl/ugni/btl_ugni_smsg.c @@ -59,12 +59,13 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep) mca_btl_ugni_base_frag_t frag; mca_btl_base_segment_t seg; bool disconnect = false; + int32_t _tmp_value = 0; uintptr_t data_ptr; gni_return_t rc; uint32_t len; int count = 0; - if (!opal_atomic_bool_cmpset_32 (&ep->smsg_progressing, 0, 1)) { + if (!opal_atomic_compare_exchange_strong_32 (&ep->smsg_progressing, &_tmp_value, 1)) { /* already progressing (we can't support reentry here) */ return 0; } diff --git a/opal/mca/btl/vader/btl_vader_fbox.h b/opal/mca/btl/vader/btl_vader_fbox.h index 6f09cb6c513..3ad53b0746c 100644 --- a/opal/mca/btl/vader/btl_vader_fbox.h +++ b/opal/mca/btl/vader/btl_vader_fbox.h @@ -261,14 +261,14 @@ static inline bool mca_btl_vader_check_fboxes (void) static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mca_btl_vader_hdr_t *hdr) { - if (OPAL_UNLIKELY(NULL == ep->fbox_out.buffer && mca_btl_vader_component.fbox_threshold == OPAL_THREAD_ADD_SIZE_T (&ep->send_count, 1))) { + if (OPAL_UNLIKELY(NULL == ep->fbox_out.buffer && mca_btl_vader_component.fbox_threshold == OPAL_THREAD_ADD_FETCH_SIZE_T (&ep->send_count, 1))) { /* protect access to mca_btl_vader_component.segment_offset */ OPAL_THREAD_LOCK(&mca_btl_vader_component.lock); if (mca_btl_vader_component.segment_size >= mca_btl_vader_component.segment_offset + mca_btl_vader_component.fbox_size && mca_btl_vader_component.fbox_max > mca_btl_vader_component.fbox_count) { /* verify the remote side will accept another fbox */ - if (0 <= opal_atomic_add_32 (&ep->fifo->fbox_available, -1)) { + if (0 <= opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, -1)) { void *fbox_base = mca_btl_vader_component.my_segment + mca_btl_vader_component.segment_offset; mca_btl_vader_component.segment_offset += mca_btl_vader_component.fbox_size; @@ -280,7 +280,7 @@ static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mc hdr->fbox_base = virtual2relative((char *) ep->fbox_out.buffer); ++mca_btl_vader_component.fbox_count; } else { - opal_atomic_add_32 (&ep->fifo->fbox_available, 1); + opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, 1); } opal_atomic_wmb (); diff --git a/opal/mca/btl/vader/btl_vader_fifo.h b/opal/mca/btl/vader/btl_vader_fifo.h index 8304841cf84..0dc70bc8a13 100644 --- a/opal/mca/btl/vader/btl_vader_fifo.h +++ b/opal/mca/btl/vader/btl_vader_fifo.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2014 Los Alamos National Security, LLC. + * Copyright (c) 2010-2017 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ * @@ -30,8 +30,9 @@ #include "btl_vader_endpoint.h" #include "btl_vader_frag.h" +#define vader_item_compare_exchange(x, y, z) opal_atomic_compare_exchange_strong_ptr ((volatile void **) (x), (void **) (y), (void *) (z)) + #if SIZEOF_VOID_P == 8 - #define vader_item_cmpset(x, y, z) opal_atomic_bool_cmpset_64((volatile int64_t *)(x), (int64_t)(y), (int64_t)(z)) #define vader_item_swap(x, y) opal_atomic_swap_64((volatile int64_t *)(x), (int64_t)(y)) #define MCA_BTL_VADER_OFFSET_MASK 0xffffffffll @@ -40,7 +41,6 @@ typedef int64_t fifo_value_t; #else - #define vader_item_cmpset(x, y, z) opal_atomic_bool_cmpset_32((volatile int32_t *)(x), (int32_t)(y), (int32_t)(z)) #define vader_item_swap(x, y) opal_atomic_swap_32((volatile int32_t *)(x), (int32_t)(y)) #define MCA_BTL_VADER_OFFSET_MASK 0x00ffffffl @@ -138,7 +138,7 @@ static inline mca_btl_vader_hdr_t *vader_fifo_read (vader_fifo_t *fifo, struct m if (OPAL_UNLIKELY(VADER_FIFO_FREE == hdr->next)) { opal_atomic_rmb(); - if (!vader_item_cmpset (&fifo->fifo_tail, value, VADER_FIFO_FREE)) { + if (!vader_item_compare_exchange (&fifo->fifo_tail, &value, VADER_FIFO_FREE)) { while (VADER_FIFO_FREE == hdr->next) { opal_atomic_rmb (); } diff --git a/opal/mca/btl/vader/btl_vader_xpmem.c b/opal/mca/btl/vader/btl_vader_xpmem.c index f635b2c6cdf..00275df48cb 100644 --- a/opal/mca/btl/vader/btl_vader_xpmem.c +++ b/opal/mca/btl/vader/btl_vader_xpmem.c @@ -54,7 +54,7 @@ static int vader_check_reg (mca_rcache_base_registration_t *reg, void *ctx) vader_ctx->reg[0] = reg; if (vader_ctx->bound <= (uintptr_t) reg->bound && vader_ctx->base >= (uintptr_t) reg->base) { - (void)opal_atomic_add (®->ref_count, 1); + opal_atomic_add (®->ref_count, 1); return 1; } @@ -93,7 +93,7 @@ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpo /* start the new segment from the lower of the two bases */ base = (uintptr_t) reg->base < base ? (uintptr_t) reg->base : base; - if (OPAL_LIKELY(0 == opal_atomic_add_32 (®->ref_count, -1))) { + if (OPAL_LIKELY(0 == opal_atomic_add_fetch_32 (®->ref_count, -1))) { /* this pointer is not in use */ (void) xpmem_detach (reg->rcache_context); OBJ_RELEASE(reg); @@ -143,7 +143,7 @@ void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_ mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module; int32_t ref_count; - ref_count = opal_atomic_add_32 (®->ref_count, -1); + ref_count = opal_atomic_add_fetch_32 (®->ref_count, -1); if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_RCACHE_FLAGS_PERSIST))) { /* protect rcache access */ mca_rcache_base_vma_delete (vma_module, reg); diff --git a/opal/mca/common/sm/common_sm.c b/opal/mca/common/sm/common_sm.c index 52fc4092dea..c6e2a0fdaf8 100644 --- a/opal/mca/common/sm/common_sm.c +++ b/opal/mca/common/sm/common_sm.c @@ -131,7 +131,7 @@ attach_and_init(opal_shmem_ds_t *shmem_bufp, } /* increment the number of processes that are attached to the segment. */ - (void)opal_atomic_add_size_t(&map->module_seg->seg_num_procs_inited, 1); + (void)opal_atomic_add_fetch_size_t(&map->module_seg->seg_num_procs_inited, 1); /* commit the changes before we return */ opal_atomic_wmb(); diff --git a/opal/mca/mpool/hugepage/mpool_hugepage_module.c b/opal/mca/mpool/hugepage/mpool_hugepage_module.c index 5a1f1fa1fc1..89a8b7eb6d3 100644 --- a/opal/mca/mpool/hugepage/mpool_hugepage_module.c +++ b/opal/mca/mpool/hugepage/mpool_hugepage_module.c @@ -131,7 +131,7 @@ void *mca_mpool_hugepage_seg_alloc (void *ctx, size_t *sizep) if (huge_page->path) { int32_t count; - count = opal_atomic_add_32 (&huge_page->count, 1); + count = opal_atomic_add_fetch_32 (&huge_page->count, 1); rc = asprintf (&path, "%s/hugepage.openmpi.%d.%d", huge_page->path, getpid (), count); diff --git a/opal/mca/rcache/grdma/rcache_grdma_module.c b/opal/mca/rcache/grdma/rcache_grdma_module.c index 327c2845a02..d2646c3c4a8 100644 --- a/opal/mca/rcache/grdma/rcache_grdma_module.c +++ b/opal/mca/rcache/grdma/rcache_grdma_module.c @@ -232,7 +232,7 @@ static int mca_rcache_grdma_check_cached (mca_rcache_base_registration_t *grdma_ /* This segment fits fully within an existing segment. */ rcache_grdma->stat_cache_hit++; - int32_t ref_cnt = opal_atomic_add_32 (&grdma_reg->ref_count, 1); + int32_t ref_cnt = opal_atomic_add_fetch_32 (&grdma_reg->ref_count, 1); OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output, "returning existing registration %p. references %d", (void *) grdma_reg, ref_cnt)); (void)ref_cnt; @@ -296,7 +296,7 @@ static int mca_rcache_grdma_register (mca_rcache_base_module_t *rcache, void *ad /* get updated access flags */ access_flags = find_args.access_flags; - OPAL_THREAD_ADD32((volatile int32_t *) &rcache_grdma->stat_cache_miss, 1); + OPAL_THREAD_ADD_FETCH32((volatile int32_t *) &rcache_grdma->stat_cache_miss, 1); } item = opal_free_list_get_mt (&rcache_grdma->reg_list); @@ -380,7 +380,7 @@ static int mca_rcache_grdma_find (mca_rcache_base_module_t *rcache, void *addr, (opal_list_item_t*)(*reg)); } rcache_grdma->stat_cache_found++; - opal_atomic_add_32 (&(*reg)->ref_count, 1); + opal_atomic_add_fetch_32 (&(*reg)->ref_count, 1); } else { rcache_grdma->stat_cache_notfound++; } @@ -398,7 +398,7 @@ static int mca_rcache_grdma_deregister (mca_rcache_base_module_t *rcache, int rc; opal_mutex_lock (&rcache_grdma->cache->vma_module->vma_lock); - ref_count = opal_atomic_add_32 (®->ref_count, -1); + ref_count = opal_atomic_add_fetch_32 (®->ref_count, -1); OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output, "returning registration %p, remaining references %d", (void *) reg, ref_count)); diff --git a/opal/mca/rcache/udreg/rcache_udreg_module.c b/opal/mca/rcache/udreg/rcache_udreg_module.c index 51c24676522..d3045247f17 100644 --- a/opal/mca/rcache/udreg/rcache_udreg_module.c +++ b/opal/mca/rcache/udreg/rcache_udreg_module.c @@ -301,7 +301,7 @@ static int mca_rcache_udreg_register(mca_rcache_base_module_t *rcache, void *add OPAL_THREAD_UNLOCK(&rcache_udreg->lock); *reg = udreg_reg; - (void) OPAL_THREAD_ADD32(&udreg_reg->ref_count, 1); + (void) OPAL_THREAD_ADD_FETCH32(&udreg_reg->ref_count, 1); udreg_reg->rcache_context = udreg_entry; return OPAL_SUCCESS; @@ -318,7 +318,7 @@ static int mca_rcache_udreg_deregister(mca_rcache_base_module_t *rcache, mca_rcache_base_registration_t *reg) { mca_rcache_udreg_module_t *rcache_udreg = (mca_rcache_udreg_module_t *) rcache; - int32_t ref_count = OPAL_THREAD_ADD32 (®->ref_count, -1); + int32_t ref_count = OPAL_THREAD_ADD_FETCH32 (®->ref_count, -1); assert(ref_count >= 0); diff --git a/opal/runtime/opal_cr.c b/opal/runtime/opal_cr.c index ef9a73d3ceb..27e32c5fdd2 100644 --- a/opal/runtime/opal_cr.c +++ b/opal/runtime/opal_cr.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2012-2013 Cisco Systems, Inc. All rights reserved. @@ -159,31 +159,32 @@ static const uint32_t ProcInc = 0x2; #define OPAL_CR_LOCK() \ { \ opal_cr_thread_in_library = true; \ - OPAL_THREAD_ADD32(&opal_cr_thread_num_in_library, ProcInc); \ + OPAL_THREAD_ADD_FETCH32(&opal_cr_thread_num_in_library, ProcInc); \ while( (opal_cr_thread_num_in_library & ThreadFlag ) != 0 ) { \ sched_yield(); \ } \ } #define OPAL_CR_UNLOCK() \ { \ - OPAL_THREAD_ADD32(&opal_cr_thread_num_in_library, -ProcInc); \ + OPAL_THREAD_ADD_FETCH32(&opal_cr_thread_num_in_library, -ProcInc); \ if( opal_cr_thread_num_in_library <= 0 ) { \ opal_cr_thread_in_library = false; \ } \ } -#define OPAL_CR_THREAD_LOCK() \ - { \ - while(!OPAL_ATOMIC_BOOL_CMPSET_32(&opal_cr_thread_num_in_library, 0, ThreadFlag)) { \ - if( !opal_cr_thread_is_active && opal_cr_thread_is_done) { \ - break; \ - } \ - sched_yield(); \ - usleep(opal_cr_thread_sleep_check); \ - } \ +#define OPAL_CR_THREAD_LOCK() \ + { \ + int32_t _tmp_value = 0; \ + while(!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32 (&opal_cr_thread_num_in_library, &_tmp_value, ThreadFlag)) { \ + if( !opal_cr_thread_is_active && opal_cr_thread_is_done) { \ + break; \ + } \ + sched_yield(); \ + usleep(opal_cr_thread_sleep_check); \ + } \ } #define OPAL_CR_THREAD_UNLOCK() \ { \ - OPAL_THREAD_ADD32(&opal_cr_thread_num_in_library, -ThreadFlag); \ + OPAL_THREAD_ADD_FETCH32(&opal_cr_thread_num_in_library, -ThreadFlag); \ } #endif diff --git a/opal/runtime/opal_progress.c b/opal/runtime/opal_progress.c index 30ddcc6ac9a..541b8b8ab9d 100644 --- a/opal/runtime/opal_progress.c +++ b/opal/runtime/opal_progress.c @@ -207,7 +207,7 @@ opal_progress(void) #else /* OPAL_PROGRESS_USE_TIMERS */ /* trip the event library if we've reached our tick rate and we are enabled */ - if (OPAL_THREAD_ADD32(&event_progress_counter, -1) <= 0 ) { + if (OPAL_THREAD_ADD_FETCH32(&event_progress_counter, -1) <= 0 ) { event_progress_counter = (num_event_users > 0) ? 0 : event_progress_delta; events += opal_event_loop(opal_sync_event_base, opal_progress_event_flag); @@ -222,7 +222,7 @@ opal_progress(void) events += (callbacks[i])(); } - if (callbacks_lp_len > 0 && (OPAL_THREAD_ADD32((volatile int32_t *) &num_calls, 1) & 0x7) == 0) { + if (callbacks_lp_len > 0 && (OPAL_THREAD_ADD_FETCH32((volatile int32_t *) &num_calls, 1) & 0x7) == 0) { /* run low priority callbacks once every 8 calls to opal_progress() */ for (i = 0 ; i < callbacks_lp_len ; ++i) { events += (callbacks_lp[i])(); @@ -259,11 +259,11 @@ opal_progress_event_users_increment(void) { #if OPAL_ENABLE_DEBUG int32_t val; - val = opal_atomic_add_32(&num_event_users, 1); + val = opal_atomic_add_fetch_32(&num_event_users, 1); OPAL_OUTPUT((debug_output, "progress: event_users_increment setting count to %d", val)); #else - (void)opal_atomic_add_32(&num_event_users, 1); + (void)opal_atomic_add_fetch_32(&num_event_users, 1); #endif #if OPAL_PROGRESS_USE_TIMERS @@ -281,11 +281,11 @@ opal_progress_event_users_decrement(void) { #if OPAL_ENABLE_DEBUG || ! OPAL_PROGRESS_USE_TIMERS int32_t val; - val = opal_atomic_sub_32(&num_event_users, 1); + val = opal_atomic_sub_fetch_32(&num_event_users, 1); OPAL_OUTPUT((debug_output, "progress: event_users_decrement setting count to %d", val)); #else - (void)opal_atomic_sub_32(&num_event_users, 1); + (void)opal_atomic_sub_fetch_32(&num_event_users, 1); #endif #if !OPAL_PROGRESS_USE_TIMERS diff --git a/opal/threads/thread_usage.h b/opal/threads/thread_usage.h index 6976d0b555f..85492d5f891 100644 --- a/opal/threads/thread_usage.h +++ b/opal/threads/thread_usage.h @@ -13,7 +13,7 @@ * Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -93,68 +93,42 @@ static inline bool opal_set_using_threads(bool have) * indicates that threads are in use by the application or library. */ -#define OPAL_THREAD_DEFINE_ATOMIC_ADD(type, suffix) \ -static inline type opal_thread_add_ ## suffix (volatile type *addr, type delta) \ +#define OPAL_THREAD_DEFINE_ATOMIC_OP(type, name, operator, suffix) \ +static inline type opal_thread_ ## name ## _fetch_ ## suffix (volatile type *addr, type delta) \ { \ if (OPAL_UNLIKELY(opal_using_threads())) { \ - return opal_atomic_add_ ## suffix (addr, delta); \ + return opal_atomic_ ## name ## _fetch_ ## suffix (addr, delta); \ } \ \ - return (*addr += delta); \ -} - -#define OPAL_THREAD_DEFINE_ATOMIC_AND(type, suffix) \ -static inline type opal_thread_and_ ## suffix (volatile type *addr, type delta) \ -{ \ - if (OPAL_UNLIKELY(opal_using_threads())) { \ - return opal_atomic_and_ ## suffix (addr, delta); \ - } \ - \ - return (*addr &= delta); \ -} - -#define OPAL_THREAD_DEFINE_ATOMIC_OR(type, suffix) \ -static inline type opal_thread_or_ ## suffix (volatile type *addr, type delta) \ -{ \ - if (OPAL_UNLIKELY(opal_using_threads())) { \ - return opal_atomic_or_ ## suffix (addr, delta); \ - } \ - \ - return (*addr |= delta); \ -} - -#define OPAL_THREAD_DEFINE_ATOMIC_XOR(type, suffix) \ -static inline type opal_thread_xor_ ## suffix (volatile type *addr, type delta) \ -{ \ - if (OPAL_UNLIKELY(opal_using_threads())) { \ - return opal_atomic_xor_ ## suffix (addr, delta); \ - } \ + *addr = *addr operator delta; \ + return *addr; \ +} \ \ - return (*addr ^= delta); \ -} - -#define OPAL_THREAD_DEFINE_ATOMIC_SUB(type, suffix) \ -static inline type opal_thread_sub_ ## suffix (volatile type *addr, type delta) \ +static inline type opal_thread_fetch_ ## name ## _ ## suffix (volatile type *addr, type delta) \ { \ if (OPAL_UNLIKELY(opal_using_threads())) { \ - return opal_atomic_sub_ ## suffix (addr, delta); \ + return opal_atomic_fetch_ ## name ## _ ## suffix (addr, delta); \ } \ \ - return (*addr -= delta); \ + type old = *addr; \ + *addr = old operator delta; \ + return old; \ } -#define OPAL_THREAD_DEFINE_ATOMIC_CMPSET(type, addr_type, suffix) \ -static inline bool opal_thread_cmpset_bool_ ## suffix (volatile addr_type *addr, type compare, type value) \ +#define OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(type, addr_type, suffix) \ +static inline bool opal_thread_compare_exchange_strong_ ## suffix (volatile addr_type *addr, type *compare, type value) \ { \ if (OPAL_UNLIKELY(opal_using_threads())) { \ - return opal_atomic_bool_cmpset_ ## suffix ((volatile type *) addr, compare, value); \ + return opal_atomic_compare_exchange_strong_ ## suffix ((volatile type *) addr, compare, value); \ } \ \ - if ((type) *addr == compare) { \ + if ((type) *addr == *compare) { \ ((type *) addr)[0] = value; \ return true; \ } \ \ + *compare = ((type *) addr)[0]; \ + \ return false; \ } @@ -171,41 +145,60 @@ static inline type opal_thread_swap_ ## suffix (volatile addr_type *ptr, type ne return old; \ } -OPAL_THREAD_DEFINE_ATOMIC_ADD(int32_t, 32) -OPAL_THREAD_DEFINE_ATOMIC_ADD(size_t, size_t) -OPAL_THREAD_DEFINE_ATOMIC_AND(int32_t, 32) -OPAL_THREAD_DEFINE_ATOMIC_OR(int32_t, 32) -OPAL_THREAD_DEFINE_ATOMIC_XOR(int32_t, 32) -OPAL_THREAD_DEFINE_ATOMIC_SUB(int32_t, 32) -OPAL_THREAD_DEFINE_ATOMIC_SUB(size_t, size_t) -OPAL_THREAD_DEFINE_ATOMIC_CMPSET(int32_t, int32_t, 32) -OPAL_THREAD_DEFINE_ATOMIC_CMPSET(void *, intptr_t, ptr) +OPAL_THREAD_DEFINE_ATOMIC_OP(int32_t, add, +, 32) +OPAL_THREAD_DEFINE_ATOMIC_OP(size_t, add, +, size_t) +OPAL_THREAD_DEFINE_ATOMIC_OP(int32_t, and, &, 32) +OPAL_THREAD_DEFINE_ATOMIC_OP(int32_t, or, |, 32) +OPAL_THREAD_DEFINE_ATOMIC_OP(int32_t, xor, ^, 32) +OPAL_THREAD_DEFINE_ATOMIC_OP(int32_t, sub, -, 32) +OPAL_THREAD_DEFINE_ATOMIC_OP(size_t, sub, -, size_t) + +OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(int32_t, int32_t, 32) +OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(void *, intptr_t, ptr) OPAL_THREAD_DEFINE_ATOMIC_SWAP(int32_t, int32_t, 32) OPAL_THREAD_DEFINE_ATOMIC_SWAP(void *, intptr_t, ptr) -#define OPAL_THREAD_ADD32 opal_thread_add_32 -#define OPAL_ATOMIC_ADD32 opal_thread_add_32 +#define OPAL_THREAD_ADD_FETCH32 opal_thread_add_fetch_32 +#define OPAL_ATOMIC_ADD_FETCH32 opal_thread_add_fetch_32 + +#define OPAL_THREAD_AND_FETCH32 opal_thread_and_fetch_32 +#define OPAL_ATOMIC_AND_FETCH32 opal_thread_and_fetch_32 + +#define OPAL_THREAD_OR_FETCH32 opal_thread_or_fetch_32 +#define OPAL_ATOMIC_OR_FETCH32 opal_thread_or_fetch_32 -#define OPAL_THREAD_AND32 opal_thread_and_32 -#define OPAL_ATOMIC_AND32 opal_thread_and_32 +#define OPAL_THREAD_XOR_FETCH32 opal_thread_xor_fetch_32 +#define OPAL_ATOMIC_XOR_FETCH32 opal_thread_xor_fetch_32 -#define OPAL_THREAD_OR32 opal_thread_or_32 -#define OPAL_ATOMIC_OR32 opal_thread_or_32 +#define OPAL_THREAD_ADD_FETCH_SIZE_T opal_thread_add_fetch_size_t +#define OPAL_ATOMIC_ADD_FETCH_SIZE_T opal_thread_add_fetch_size_t -#define OPAL_THREAD_XOR32 opal_thread_xor_32 -#define OPAL_ATOMIC_XOR32 opal_thread_xor_32 +#define OPAL_THREAD_SUB_FETCH_SIZE_T opal_thread_sub_fetch_size_t +#define OPAL_ATOMIC_SUB_FETCH_SIZE_T opal_thread_sub_fetch_size_t -#define OPAL_THREAD_ADD_SIZE_T opal_thread_add_size_t -#define OPAL_ATOMIC_ADD_SIZE_T opal_thread_add_size_t +#define OPAL_THREAD_FETCH_ADD32 opal_thread_fetch_add_32 +#define OPAL_ATOMIC_FETCH_ADD32 opal_thread_fetch_add_32 -#define OPAL_THREAD_SUB_SIZE_T opal_thread_sub_size_t -#define OPAL_ATOMIC_SUB_SIZE_T opal_thread_sub_size_t +#define OPAL_THREAD_FETCH_AND32 opal_thread_fetch_and_32 +#define OPAL_ATOMIC_FETCH_AND32 opal_thread_fetch_and_32 -#define OPAL_THREAD_BOOL_CMPSET_32 opal_thread_cmpset_bool_32 -#define OPAL_ATOMIC_BOOL_CMPSET_32 opal_thread_cmpset_bool_32 +#define OPAL_THREAD_FETCH_OR32 opal_thread_fetch_or_32 +#define OPAL_ATOMIC_FETCH_OR32 opal_thread_fetch_or_32 -#define OPAL_THREAD_BOOL_CMPSET_PTR(x, y, z) opal_thread_cmpset_bool_ptr ((volatile intptr_t *) x, (void *) y, (void *) z) -#define OPAL_ATOMIC_BOOL_CMPSET_PTR OPAL_THREAD_BOOL_CMPSET_PTR +#define OPAL_THREAD_FETCH_XOR32 opal_thread_fetch_xor_32 +#define OPAL_ATOMIC_FETCH_XOR32 opal_thread_fetch_xor_32 + +#define OPAL_THREAD_FETCH_ADD_SIZE_T opal_thread_fetch_add_size_t +#define OPAL_ATOMIC_FETCH_ADD_SIZE_T opal_thread_fetch_add_size_t + +#define OPAL_THREAD_FETCH_SUB_SIZE_T opal_thread_fetch_sub_size_t +#define OPAL_ATOMIC_FETCH_SUB_SIZE_T opal_thread_fetch_sub_size_t + +#define OPAL_THREAD_COMPARE_EXCHANGE_STRONG_32 opal_thread_compare_exchange_strong_32 +#define OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32 opal_thread_compare_exchange_strong_32 + +#define OPAL_THREAD_COMPARE_EXCHANGE_STRONG_PTR(x, y, z) opal_thread_compare_exchange_strong_ptr ((volatile intptr_t *) x, (void *) y, (void *) z) +#define OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR OPAL_THREAD_COMPARE_EXCHANGE_STRONG_PTR #define OPAL_THREAD_SWAP_32 opal_thread_swap_32 #define OPAL_ATOMIC_SWAP_32 opal_thread_swap_32 @@ -216,27 +209,40 @@ OPAL_THREAD_DEFINE_ATOMIC_SWAP(void *, intptr_t, ptr) /* define 64-bit macros is 64-bit atomic math is available */ #if OPAL_HAVE_ATOMIC_MATH_64 -OPAL_THREAD_DEFINE_ATOMIC_ADD(int64_t, 64) -OPAL_THREAD_DEFINE_ATOMIC_AND(int64_t, 64) -OPAL_THREAD_DEFINE_ATOMIC_OR(int64_t, 64) -OPAL_THREAD_DEFINE_ATOMIC_XOR(int64_t, 64) -OPAL_THREAD_DEFINE_ATOMIC_CMPSET(int64_t, int64_t, 64) +OPAL_THREAD_DEFINE_ATOMIC_OP(int64_t, add, +, 64) +OPAL_THREAD_DEFINE_ATOMIC_OP(int64_t, and, &, 64) +OPAL_THREAD_DEFINE_ATOMIC_OP(int64_t, or, |, 64) +OPAL_THREAD_DEFINE_ATOMIC_OP(int64_t, xor, ^, 64) +OPAL_THREAD_DEFINE_ATOMIC_OP(int64_t, sub, -, 64) +OPAL_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(int64_t, int64_t, 64) OPAL_THREAD_DEFINE_ATOMIC_SWAP(int64_t, int64_t, 64) -#define OPAL_THREAD_ADD64 opal_thread_add_64 -#define OPAL_ATOMIC_ADD64 opal_thread_add_64 +#define OPAL_THREAD_ADD_FETCH64 opal_thread_add_fetch_64 +#define OPAL_ATOMIC_ADD_FETCH64 opal_thread_add_fetch_64 + +#define OPAL_THREAD_AND_FETCH64 opal_thread_and_fetch_64 +#define OPAL_ATOMIC_AND_FETCH64 opal_thread_and_fetch_64 + +#define OPAL_THREAD_OR_FETCH64 opal_thread_or_fetch_64 +#define OPAL_ATOMIC_OR_FETCH64 opal_thread_or_fetch_64 + +#define OPAL_THREAD_XOR_FETCH64 opal_thread_xor_fetch_64 +#define OPAL_ATOMIC_XOR_FETCH64 opal_thread_xor_fetch_64 + +#define OPAL_THREAD_FETCH_ADD64 opal_thread_fetch_add_64 +#define OPAL_ATOMIC_FETCH_ADD64 opal_thread_fetch_add_64 -#define OPAL_THREAD_AND64 opal_thread_and_64 -#define OPAL_ATOMIC_AND64 opal_thread_and_64 +#define OPAL_THREAD_FETCH_AND64 opal_thread_fetch_and_64 +#define OPAL_ATOMIC_FETCH_AND64 opal_thread_fetch_and_64 -#define OPAL_THREAD_OR64 opal_thread_or_64 -#define OPAL_ATOMIC_OR64 opal_thread_or_64 +#define OPAL_THREAD_FETCH_OR64 opal_thread_fetch_or_64 +#define OPAL_ATOMIC_FETCH_OR64 opal_thread_fetch_or_64 -#define OPAL_THREAD_XOR64 opal_thread_xor_64 -#define OPAL_ATOMIC_XOR64 opal_thread_xor_64 +#define OPAL_THREAD_FETCH_XOR64 opal_thread_fetch_xor_64 +#define OPAL_ATOMIC_FETCH_XOR64 opal_thread_fetch_xor_64 -#define OPAL_THREAD_BOOL_CMPSET_64 opal_thread_cmpset_bool_64 -#define OPAL_ATOMIC_BOOL_CMPSET_64 opal_thread_cmpset_bool_64 +#define OPAL_THREAD_COMPARE_EXCHANGE_STRONG_64 opal_thread_compare_exchange_strong_64 +#define OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_64 opal_thread_compare_exchange_strong_64 #define OPAL_THREAD_SWAP_64 opal_thread_swap_64 #define OPAL_ATOMIC_SWAP_64 opal_thread_swap_64 diff --git a/opal/threads/wait_sync.h b/opal/threads/wait_sync.h index 9a582884373..1e594670354 100644 --- a/opal/threads/wait_sync.h +++ b/opal/threads/wait_sync.h @@ -109,7 +109,7 @@ static inline int sync_wait_st (ompi_wait_sync_t *sync) static inline void wait_sync_update(ompi_wait_sync_t *sync, int updates, int status) { if( OPAL_LIKELY(OPAL_SUCCESS == status) ) { - if( 0 != (OPAL_THREAD_ADD32(&sync->count, -updates)) ) { + if( 0 != (OPAL_THREAD_ADD_FETCH32(&sync->count, -updates)) ) { return; } } else { diff --git a/oshmem/mca/spml/ikrit/spml_ikrit.c b/oshmem/mca/spml/ikrit/spml_ikrit.c index d90099caf46..c1500edeebb 100644 --- a/oshmem/mca/spml/ikrit/spml_ikrit.c +++ b/oshmem/mca/spml/ikrit/spml_ikrit.c @@ -673,7 +673,7 @@ static inline void get_completion_cb(void *ctx) { mca_spml_ikrit_get_request_t *get_req = (mca_spml_ikrit_get_request_t *) ctx; - OPAL_THREAD_ADD32(&mca_spml_ikrit.n_active_gets, -1); + OPAL_THREAD_ADD_FETCH32(&mca_spml_ikrit.n_active_gets, -1); free_get_req(get_req); } @@ -701,7 +701,7 @@ static inline int mca_spml_ikrit_get_async(void *src_addr, get_req->mxm_req.flags = 0; get_req->mxm_req.base.completed_cb = get_completion_cb; get_req->mxm_req.base.context = get_req; - OPAL_THREAD_ADD32(&mca_spml_ikrit.n_active_gets, 1); + OPAL_THREAD_ADD_FETCH32(&mca_spml_ikrit.n_active_gets, 1); SPML_IKRIT_MXM_POST_SEND(get_req->mxm_req); @@ -713,7 +713,7 @@ static inline void fence_completion_cb(void *ctx) mca_spml_ikrit_get_request_t *fence_req = (mca_spml_ikrit_get_request_t *) ctx; - OPAL_THREAD_ADD32(&mca_spml_ikrit.n_mxm_fences, -1); + OPAL_THREAD_ADD_FETCH32(&mca_spml_ikrit.n_mxm_fences, -1); free_get_req(fence_req); } @@ -735,7 +735,7 @@ static int mca_spml_ikrit_mxm_fence(int dst) fence_req->mxm_req.base.state = MXM_REQ_NEW; fence_req->mxm_req.base.completed_cb = fence_completion_cb; fence_req->mxm_req.base.context = fence_req; - OPAL_THREAD_ADD32(&mca_spml_ikrit.n_mxm_fences, 1); + OPAL_THREAD_ADD_FETCH32(&mca_spml_ikrit.n_mxm_fences, 1); SPML_IKRIT_MXM_POST_SEND(fence_req->mxm_req); return OSHMEM_SUCCESS; @@ -746,7 +746,7 @@ static inline void put_completion_cb(void *ctx) mca_spml_ikrit_put_request_t *put_req = (mca_spml_ikrit_put_request_t *) ctx; mxm_peer_t *peer; - OPAL_THREAD_ADD32(&mca_spml_ikrit.n_active_puts, -1); + OPAL_THREAD_ADD_FETCH32(&mca_spml_ikrit.n_active_puts, -1); /* TODO: keep pointer to peer in the request */ peer = &mca_spml_ikrit.mxm_peers[put_req->pe]; @@ -848,7 +848,7 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr, put_req->mxm_req.op.mem.remote_mkey = mkey; - OPAL_THREAD_ADD32(&mca_spml_ikrit.n_active_puts, 1); + OPAL_THREAD_ADD_FETCH32(&mca_spml_ikrit.n_active_puts, 1); if (mca_spml_ikrit.mxm_peers[dst].need_fence == 0) { opal_list_append(&mca_spml_ikrit.active_peers, &mca_spml_ikrit.mxm_peers[dst].link); diff --git a/oshmem/runtime/oshmem_shmem_finalize.c b/oshmem/runtime/oshmem_shmem_finalize.c index 6f8601da753..b05774278ab 100644 --- a/oshmem/runtime/oshmem_shmem_finalize.c +++ b/oshmem/runtime/oshmem_shmem_finalize.c @@ -64,8 +64,9 @@ int oshmem_shmem_finalize(void) { int ret = OSHMEM_SUCCESS; static int32_t finalize_has_already_started = 0; + int32_t _tmp = 0; - if (opal_atomic_bool_cmpset_32(&finalize_has_already_started, 0, 1) + if (opal_atomic_compare_exchange_strong_32 (&finalize_has_already_started, &_tmp, 1) && oshmem_shmem_initialized && !oshmem_shmem_aborted) { /* Should be called first because ompi_mpi_finalize makes orte and opal finalization */ ret = _shmem_finalize(); diff --git a/test/asm/atomic_cmpset.c b/test/asm/atomic_cmpset.c index 3e467ff7412..4a06847703f 100644 --- a/test/asm/atomic_cmpset.c +++ b/test/asm/atomic_cmpset.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -12,6 +13,8 @@ * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -54,6 +57,13 @@ int64_t old64 = 0; int64_t new64 = 0; #endif +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 +volatile opal_int128_t vol128; +opal_int128_t val128; +opal_int128_t old128; +opal_int128_t new128; +#endif + volatile int volint = 0; int valint = 0; int oldint = 0; @@ -72,11 +82,11 @@ static void *thread_main(void *arg) /* thread tests */ for (i = 0; i < nreps; i++) { - opal_atomic_add_32(&val32, 5); + opal_atomic_add_fetch_32(&val32, 5); #if OPAL_HAVE_ATOMIC_MATH_64 - opal_atomic_add_64(&val64, 5); + opal_atomic_add_fetch_64(&val64, 5); #endif - opal_atomic_add(&valint, 5); + opal_atomic_add (&valint, 5); } return (void *) (unsigned long) (rank + 1000); @@ -99,143 +109,184 @@ int main(int argc, char *argv[]) /* -- cmpset 32-bit tests -- */ vol32 = 42, old32 = 42, new32 = 50; - assert(opal_atomic_bool_cmpset_32(&vol32, old32, new32) == 1); + assert(opal_atomic_compare_exchange_strong_32 (&vol32, &old32, new32) == true); opal_atomic_rmb(); assert(vol32 == new32); + assert(old32 == 42); vol32 = 42, old32 = 420, new32 = 50; - assert(opal_atomic_bool_cmpset_32(&vol32, old32, new32) == 0); + assert(opal_atomic_compare_exchange_strong_32 (&vol32, &old32, new32) == false); opal_atomic_rmb(); assert(vol32 == 42); + assert(old32 == 42); vol32 = 42, old32 = 42, new32 = 50; - assert(opal_atomic_bool_cmpset_acq_32(&vol32, old32, new32) == 1); + assert(opal_atomic_compare_exchange_strong_32 (&vol32, &old32, new32) == true); assert(vol32 == new32); + assert(old32 == 42); vol32 = 42, old32 = 420, new32 = 50; - assert(opal_atomic_bool_cmpset_acq_32(&vol32, old32, new32) == 0); + assert(opal_atomic_compare_exchange_strong_acq_32 (&vol32, &old32, new32) == false); assert(vol32 == 42); + assert(old32 == 42); vol32 = 42, old32 = 42, new32 = 50; - assert(opal_atomic_bool_cmpset_rel_32(&vol32, old32, new32) == 1); + assert(opal_atomic_compare_exchange_strong_rel_32 (&vol32, &old32, new32) == true); opal_atomic_rmb(); assert(vol32 == new32); + assert(old32 == 42); vol32 = 42, old32 = 420, new32 = 50; - assert(opal_atomic_bool_cmpset_rel_32(&vol32, old32, new32) == 0); + assert(opal_atomic_compare_exchange_strong_rel_32 (&vol32, &old32, new32) == false); opal_atomic_rmb(); assert(vol32 == 42); + assert(old32 == 42); /* -- cmpset 64-bit tests -- */ #if OPAL_HAVE_ATOMIC_MATH_64 vol64 = 42, old64 = 42, new64 = 50; - assert(1 == opal_atomic_bool_cmpset_64(&vol64, old64, new64)); + assert(opal_atomic_compare_exchange_strong_64 (&vol64, &old64, new64) == true); opal_atomic_rmb(); assert(new64 == vol64); + assert(old64 == 42); vol64 = 42, old64 = 420, new64 = 50; - assert(opal_atomic_bool_cmpset_64(&vol64, old64, new64) == 0); + assert(opal_atomic_compare_exchange_strong_64 (&vol64, &old64, new64) == false); opal_atomic_rmb(); assert(vol64 == 42); + assert(old64 == 42); vol64 = 42, old64 = 42, new64 = 50; - assert(opal_atomic_bool_cmpset_acq_64(&vol64, old64, new64) == 1); + assert(opal_atomic_compare_exchange_strong_acq_64 (&vol64, &old64, new64) == true); assert(vol64 == new64); + assert(old64 == 42); vol64 = 42, old64 = 420, new64 = 50; - assert(opal_atomic_bool_cmpset_acq_64(&vol64, old64, new64) == 0); + assert(opal_atomic_compare_exchange_strong_acq_64 (&vol64, &old64, new64) == false); assert(vol64 == 42); + assert(old64 == 42); vol64 = 42, old64 = 42, new64 = 50; - assert(opal_atomic_bool_cmpset_rel_64(&vol64, old64, new64) == 1); + assert(opal_atomic_compare_exchange_strong_rel_64 (&vol64, &old64, new64) == true); opal_atomic_rmb(); assert(vol64 == new64); + assert(old64 == 42); vol64 = 42, old64 = 420, new64 = 50; - assert(opal_atomic_bool_cmpset_rel_64(&vol64, old64, new64) == 0); + assert(opal_atomic_compare_exchange_strong_rel_64 (&vol64, &old64, new64) == false); opal_atomic_rmb(); assert(vol64 == 42); + assert(old64 == 42); #endif + + /* -- cmpset 128-bit tests -- */ + +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 + vol128 = 42, old128 = 42, new128 = 50; + assert(opal_atomic_compare_exchange_strong_128 (&vol128, &old128, new128) == true); + opal_atomic_rmb(); + assert(new128 == vol128); + assert(old128 == 42); + + vol128 = 42, old128 = 420, new128 = 50; + assert(opal_atomic_compare_exchange_strong_128 (&vol128, &old128, new128) == false); + opal_atomic_rmb(); + assert(vol128 == 42); + assert(old128 == 42); +#endif + /* -- cmpset int tests -- */ volint = 42, oldint = 42, newint = 50; - assert(opal_atomic_bool_cmpset(&volint, oldint, newint) == 1); + assert(opal_atomic_compare_exchange_strong (&volint, &oldint, newint) == true); opal_atomic_rmb(); - assert(volint ==newint); + assert(volint == newint); + assert(oldint == 42); volint = 42, oldint = 420, newint = 50; - assert(opal_atomic_bool_cmpset(&volint, oldint, newint) == 0); + assert(opal_atomic_compare_exchange_strong (&volint, &oldint, newint) == false); opal_atomic_rmb(); assert(volint == 42); + assert(oldint == 42); volint = 42, oldint = 42, newint = 50; - assert(opal_atomic_bool_cmpset_acq(&volint, oldint, newint) == 1); + assert(opal_atomic_compare_exchange_strong_acq (&volint, &oldint, newint) == true); assert(volint == newint); + assert(oldint == 42); volint = 42, oldint = 420, newint = 50; - assert(opal_atomic_bool_cmpset_acq(&volint, oldint, newint) == 0); + assert(opal_atomic_compare_exchange_strong_acq (&volint, &oldint, newint) == false); assert(volint == 42); + assert(oldint == 42); volint = 42, oldint = 42, newint = 50; - assert(opal_atomic_bool_cmpset_rel(&volint, oldint, newint) == 1); + assert(opal_atomic_compare_exchange_strong_rel (&volint, &oldint, newint) == true); opal_atomic_rmb(); assert(volint == newint); + assert(oldint == 42); volint = 42, oldint = 420, newint = 50; - assert(opal_atomic_bool_cmpset_rel(&volint, oldint, newint) == 0); + assert(opal_atomic_compare_exchange_strong_rel (&volint, &oldint, newint) == false); opal_atomic_rmb(); assert(volint == 42); + assert(oldint == 42); /* -- cmpset ptr tests -- */ volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50; - assert(opal_atomic_bool_cmpset_ptr(&volptr, oldptr, newptr) == 1); + assert(opal_atomic_compare_exchange_strong_ptr (&volptr, &oldptr, newptr) == true); opal_atomic_rmb(); assert(volptr == newptr); + assert(oldptr == (void *) 42); volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50; - assert(opal_atomic_bool_cmpset_ptr(&volptr, oldptr, newptr) == 0); + assert(opal_atomic_compare_exchange_strong_ptr (&volptr, &oldptr, newptr) == false); opal_atomic_rmb(); assert(volptr == (void *) 42); + assert(oldptr == (void *) 42); volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50; - assert(opal_atomic_bool_cmpset_acq_ptr(&volptr, oldptr, newptr) == 1); + assert(opal_atomic_compare_exchange_strong_acq_ptr (&volptr, &oldptr, newptr) == true); assert(volptr == newptr); + assert(oldptr == (void *) 42); volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50; - assert(opal_atomic_bool_cmpset_acq_ptr(&volptr, oldptr, newptr) == 0); + assert(opal_atomic_compare_exchange_strong_acq_ptr (&volptr, &oldptr, newptr) == false); assert(volptr == (void *) 42); + assert(oldptr == (void *) 42); volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50; - assert(opal_atomic_bool_cmpset_rel_ptr(&volptr, oldptr, newptr) == 1); + assert(opal_atomic_compare_exchange_strong_rel_ptr (&volptr, &oldptr, newptr) == true); opal_atomic_rmb(); assert(volptr == newptr); + assert(oldptr == (void *) 42); volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50; - assert(opal_atomic_bool_cmpset_rel_ptr(&volptr, oldptr, newptr) == 0); + assert(opal_atomic_compare_exchange_strong_rel_ptr (&volptr, &oldptr, newptr) == false); opal_atomic_rmb(); assert(volptr == (void *) 42); + assert(oldptr == (void *) 42); /* -- add_32 tests -- */ val32 = 42; - assert(opal_atomic_add_32(&val32, 5) == (42 + 5)); + assert(opal_atomic_add_fetch_32(&val32, 5) == (42 + 5)); opal_atomic_rmb(); assert((42 + 5) == val32); /* -- add_64 tests -- */ #if OPAL_HAVE_ATOMIC_MATH_64 val64 = 42; - assert(opal_atomic_add_64(&val64, 5) == (42 + 5)); + assert(opal_atomic_add_fetch_64(&val64, 5) == (42 + 5)); opal_atomic_rmb(); assert((42 + 5) == val64); #endif /* -- add_int tests -- */ valint = 42; - opal_atomic_add(&valint, 5); + opal_atomic_add (&valint, 5); opal_atomic_rmb(); assert((42 + 5) == valint); diff --git a/test/asm/atomic_math.c b/test/asm/atomic_math.c index f94299e8185..54f771cc26b 100644 --- a/test/asm/atomic_math.c +++ b/test/asm/atomic_math.c @@ -44,11 +44,11 @@ static void* atomic_math_test(void* arg) int i; for (i = 0 ; i < count ; ++i) { - (void)opal_atomic_add_32(&val32, 5); + (void)opal_atomic_add_fetch_32(&val32, 5); #if OPAL_HAVE_ATOMIC_MATH_64 - (void)opal_atomic_add_64(&val64, 6); + (void)opal_atomic_add_fetch_64(&val64, 6); #endif - (void)opal_atomic_add(&valint, 4); + opal_atomic_add (&valint, 4); } return NULL; @@ -100,6 +100,10 @@ atomic_math_test_th(int count, int thr_count) int main(int argc, char *argv[]) { + int32_t test32; +#if OPAL_HAVE_ATOMIC_MATH_64 + int64_t test64; +#endif int ret = 77; int num_threads = 1; @@ -109,11 +113,147 @@ main(int argc, char *argv[]) } num_threads = atoi(argv[1]); + test32 = opal_atomic_add_fetch_32 (&val32, 17); + if (test32 != 17 || val32 != 17) { + fprintf (stderr, "error in opal_atomic_add_fetch_32. expected (17, 17), got (%d, %d)\n", test32, val32); + exit(EXIT_FAILURE); + } + + test32 = opal_atomic_fetch_add_32 (&val32, 13); + if (test32 != 17 || val32 != 30) { + fprintf (stderr, "error in opal_atomic_fetch_add_32. expected (17, 30), got (%d, %d)\n", test32, val32); + exit(EXIT_FAILURE); + } + + + + test32 = opal_atomic_and_fetch_32 (&val32, 0x18); + if (test32 != 24 || val32 != 24) { + fprintf (stderr, "error in opal_atomic_and_fetch_32. expected (24, 24), got (%d, %d)\n", test32, val32); + exit(EXIT_FAILURE); + } + + test32 = opal_atomic_fetch_and_32 (&val32, 0x10); + if (test32 != 24 || val32 != 16) { + fprintf (stderr, "error in opal_atomic_fetch_and_32. expected (24, 16), got (%d, %d)\n", test32, val32); + exit(EXIT_FAILURE); + } + + + + test32 = opal_atomic_or_fetch_32 (&val32, 0x03); + if (test32 != 19 || val32 != 19) { + fprintf (stderr, "error in opal_atomic_or_fetch_32. expected (19, 19), got (%d, %d)\n", test32, val32); + exit(EXIT_FAILURE); + } + + test32 = opal_atomic_fetch_or_32 (&val32, 0x04); + if (test32 != 19 || val32 != 23) { + fprintf (stderr, "error in opal_atomic_fetch_or_32. expected (19, 23), got (%d, %d)\n", test32, val32); + exit(EXIT_FAILURE); + } + + + test32 = opal_atomic_xor_fetch_32 (&val32, 0x03); + if (test32 != 20 || val32 != 20) { + fprintf (stderr, "error in opal_atomic_xor_fetch_32. expected (20, 20), got (%d, %d)\n", test32, val32); + exit(EXIT_FAILURE); + } + + test32 = opal_atomic_fetch_xor_32 (&val32, 0x05); + if (test32 != 20 || val32 != 17) { + fprintf (stderr, "error in opal_atomic_fetch_xor_32. expected (20, 17), got (%d, %d)\n", test32, val32); + exit(EXIT_FAILURE); + } + + + + test32 = opal_atomic_sub_fetch_32 (&val32, 14); + if (test32 != 3 || val32 != 3) { + fprintf (stderr, "error in opal_atomic_sub_fetch_32. expected (3, 3), got (%d, %d)\n", test32, val32); + exit(EXIT_FAILURE); + } + + test32 = opal_atomic_fetch_xor_32 (&val32, 3); + if (test32 != 3 || val32 != 0) { + fprintf (stderr, "error in opal_atomic_fetch_sub_32. expected (3, 0), got (%d, %d)\n", test32, val32); + exit(EXIT_FAILURE); + } + +#if OPAL_HAVE_ATOMIC_MATH_64 + test64 = opal_atomic_add_fetch_64 (&val64, 17); + if (test64 != 17 || val64 != 17) { + fprintf (stderr, "error in opal_atomic_add_fetch_64. expected (17, 17), got (%" PRId64 ", %" PRId64 ")\n", test64, val64); + exit(EXIT_FAILURE); + } + + test64 = opal_atomic_fetch_add_64 (&val64, 13); + if (test64 != 17 || val64 != 30) { + fprintf (stderr, "error in opal_atomic_fetch_add_64. expected (17, 30), got (%" PRId64 ", %" PRId64 ")\n", test64, val64); + exit(EXIT_FAILURE); + } + + + + test64 = opal_atomic_and_fetch_64 (&val64, 0x18); + if (test64 != 24 || val64 != 24) { + fprintf (stderr, "error in opal_atomic_and_fetch_64. expected (24, 24), got (%" PRId64 ", %" PRId64 ")\n", test64, val64); + exit(EXIT_FAILURE); + } + + test64 = opal_atomic_fetch_and_64 (&val64, 0x10); + if (test64 != 24 || val64 != 16) { + fprintf (stderr, "error in opal_atomic_fetch_and_64. expected (24, 16), got (%" PRId64 ", %" PRId64 ")\n", test64, val64); + exit(EXIT_FAILURE); + } + + + + test64 = opal_atomic_or_fetch_64 (&val64, 0x03); + if (test64 != 19 || val64 != 19) { + fprintf (stderr, "error in opal_atomic_or_fetch_64. expected (19, 19), got (%" PRId64 ", %" PRId64 ")\n", test64, val64); + exit(EXIT_FAILURE); + } + + test64 = opal_atomic_fetch_or_64 (&val64, 0x04); + if (test64 != 19 || val64 != 23) { + fprintf (stderr, "error in opal_atomic_fetch_or_64. expected (19, 23), got (%" PRId64 ", %" PRId64 ")\n", test64, val64); + exit(EXIT_FAILURE); + } + + + test64 = opal_atomic_xor_fetch_64 (&val64, 0x03); + if (test64 != 20 || val64 != 20) { + fprintf (stderr, "error in opal_atomic_xor_fetch_64. expected (20, 20), got (%" PRId64 ", %" PRId64 ")\n", test64, val64); + exit(EXIT_FAILURE); + } + + test64 = opal_atomic_fetch_xor_64 (&val64, 0x05); + if (test64 != 20 || val64 != 17) { + fprintf (stderr, "error in opal_atomic_fetch_xor_64. expected (20, 17), got (%" PRId64 ", %" PRId64 ")\n", test64, val64); + exit(EXIT_FAILURE); + } + + + + test64 = opal_atomic_sub_fetch_64 (&val64, 14); + if (test64 != 3 || val64 != 3) { + fprintf (stderr, "error in opal_atomic_sub_fetch_64. expected (3, 3), got (%" PRId64 ", %" PRId64 ")\n", test64, val64); + exit(EXIT_FAILURE); + } + + test64 = opal_atomic_fetch_xor_64 (&val64, 3); + if (test64 != 3 || val64 != 0) { + fprintf (stderr, "error in opal_atomic_fetch_sub_64. expected (3, 0), got (%" PRId64 ", %" PRId64 ")\n", test64, val64); + exit(EXIT_FAILURE); + } +#endif + ret = atomic_math_test_th(TEST_REPS, num_threads); if (ret == 77) return ret; opal_atomic_mb(); if (val32 != TEST_REPS * num_threads * 5) { - printf("opal_atomic_add32 failed. Expected %d, got %d.\n", + printf("opal_atomic_add_fetch32 failed. Expected %d, got %d.\n", TEST_REPS * num_threads * 5, val32); ret = 1; } @@ -121,7 +261,7 @@ main(int argc, char *argv[]) if (val64 != TEST_REPS * num_threads * 6) { /* Safe to case to (int) here because we know it's going to be a small value */ - printf("opal_atomic_add32 failed. Expected %d, got %d.\n", + printf("opal_atomic_add_fetch32 failed. Expected %d, got %d.\n", TEST_REPS * num_threads * 6, (int) val64); ret = 1; } @@ -129,7 +269,7 @@ main(int argc, char *argv[]) printf(" * skipping 64 bit tests\n"); #endif if (valint != TEST_REPS * num_threads * 4) { - printf("opal_atomic_add32 failed. Expected %d, got %d.\n", + printf("opal_atomic_add_fetch32 failed. Expected %d, got %d.\n", TEST_REPS * num_threads * 4, valint); ret = 1; } diff --git a/test/threads/opal_thread.c b/test/threads/opal_thread.c index 7fb11c6f880..169c8b5984c 100644 --- a/test/threads/opal_thread.c +++ b/test/threads/opal_thread.c @@ -36,13 +36,13 @@ static volatile int count = 0; static void* thr1_run(opal_object_t* obj) { - (void)opal_atomic_add(&count, 1); + opal_atomic_add (&count, 1); return NULL; } static void* thr2_run(opal_object_t* obj) { - (void)opal_atomic_add(&count, 2); + opal_atomic_add (&count, 2); return NULL; }