From c5cf41802f69e102d16238c87c0b330cb9de14ac Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Sat, 18 Jun 2022 10:13:05 -0400 Subject: [PATCH] Atomics: add relaxed load and store The _Atomic qualifier requires accesses to such variables to ensure sequential consistency, leading to unnecessary atomic operations. OPAL_ATOMIC_RELAXED_STORE and OPAL_ATOMIC_RELAXED_LOAD avoid atomic operations in paths where atomicity and memory ordering is not required. Signed-off-by: Joseph Schuchart --- ompi/mca/pml/ob1/pml_ob1_recvfrag.c | 2 +- ompi/mca/pml/ob1/pml_ob1_recvreq.c | 6 ++--- opal/class/opal_lifo.h | 6 ++--- opal/class/opal_object.h | 2 +- opal/include/opal/sys/atomic.h | 6 +++++ opal/include/opal/sys/atomic_stdc.h | 4 +++ opal/mca/threads/thread_usage.h | 39 +++++++++++++++-------------- 7 files changed, 38 insertions(+), 27 deletions(-) diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c index f9c5aeb18d1..83cbf04379d 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c @@ -594,7 +594,7 @@ void mca_pml_ob1_recv_frag_callback_match (mca_btl_base_module_t *btl, iov, &iov_count, &bytes_received ); - match->req_bytes_received = bytes_received; + OPAL_ATOMIC_RELAXED_STORE(&match->req_bytes_received, bytes_received); SPC_USER_OR_MPI(match->req_recv.req_base.req_ompi.req_status.MPI_TAG, (ompi_spc_value_t)bytes_received, OMPI_SPC_BYTES_RECEIVED_USER, OMPI_SPC_BYTES_RECEIVED_MPI); /* diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index 016addfb135..68f2714d621 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -1280,9 +1280,9 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req) #endif /* init/re-init the request */ - req->req_lock = 0; - req->req_pipeline_depth = 0; - req->req_bytes_received = 0; + OPAL_ATOMIC_RELAXED_STORE(&req->req_lock, 0); + OPAL_ATOMIC_RELAXED_STORE(&req->req_pipeline_depth, 0); + OPAL_ATOMIC_RELAXED_STORE(&req->req_bytes_received, 0); req->req_bytes_expected = 0; /* What about req_rdma_cnt ? */ req->req_rdma_idx = 0; diff --git a/opal/class/opal_lifo.h b/opal/class/opal_lifo.h index ba973417dff..604fcdc976c 100644 --- a/opal/class/opal_lifo.h +++ b/opal/class/opal_lifo.h @@ -295,15 +295,15 @@ static inline opal_list_item_t *opal_lifo_push_st(opal_lifo_t *lifo, opal_list_i { item->opal_list_next = (opal_list_item_t *) lifo->opal_lifo_head.data.item; item->item_free = 0; - lifo->opal_lifo_head.data.item = (intptr_t) item; + OPAL_ATOMIC_RELAXED_STORE(&lifo->opal_lifo_head.data.item, (intptr_t) item); return (opal_list_item_t *) item->opal_list_next; } static inline opal_list_item_t *opal_lifo_pop_st(opal_lifo_t *lifo) { opal_list_item_t *item; - item = (opal_list_item_t *) lifo->opal_lifo_head.data.item; - lifo->opal_lifo_head.data.item = (intptr_t) item->opal_list_next; + item = (opal_list_item_t *) OPAL_ATOMIC_RELAXED_LOAD(&lifo->opal_lifo_head.data.item); + OPAL_ATOMIC_RELAXED_STORE(&lifo->opal_lifo_head.data.item, (intptr_t) item->opal_list_next); if (item == &lifo->opal_lifo_ghost) { return NULL; } diff --git a/opal/class/opal_object.h b/opal/class/opal_object.h index 6b60726f1c3..5a4f2ec6c2a 100644 --- a/opal/class/opal_object.h +++ b/opal/class/opal_object.h @@ -377,7 +377,7 @@ static inline opal_object_t *opal_obj_new_debug(opal_class_t *type, const char * opal_class_initialize((type)); \ } \ ((opal_object_t *) (object))->obj_class = (type); \ - ((opal_object_t *) (object))->obj_reference_count = 1; \ + OPAL_ATOMIC_RELAXED_STORE(&((opal_object_t *) (object))->obj_reference_count, 1); \ opal_obj_run_constructors((opal_object_t *) (object)); \ OBJ_REMEMBER_FILE_AND_LINENO(object, __FILE__, __LINE__); \ } while (0) diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index be647260b73..57fe6a371d7 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -463,6 +463,12 @@ static inline void opal_atomic_sc_ptr(opal_atomic_intptr_t *addr, intptr_t newva # define OPAL_HAVE_ATOMIC_LLSC_PTR 0 #endif +/****** Relaxed load and store ********/ + +#if !defined(OPAL_ATOMIC_HAVE_RELAXED_LOAD_STORE) +#define OPAL_ATOMIC_RELAXED_LOAD(a) *(a) +#define OPAL_ATOMIC_RELAXED_STORE(a, v) *(a) = (v); +#endif // !OPAL_ATOMIC_HAVE_RELAXED_LOAD_STORE END_C_DECLS #endif /* OPAL_SYS_ATOMIC_H */ diff --git a/opal/include/opal/sys/atomic_stdc.h b/opal/include/opal/sys/atomic_stdc.h index 5eca86c9edb..f4ccd628178 100644 --- a/opal/include/opal/sys/atomic_stdc.h +++ b/opal/include/opal/sys/atomic_stdc.h @@ -212,4 +212,8 @@ OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(sub, size_t, size_t, -) #include "opal/sys/atomic_impl_minmax_math.h" +#define OPAL_ATOMIC_HAVE_RELAXED_LOAD_STORE 1 +#define OPAL_ATOMIC_RELAXED_LOAD(a) atomic_load_explicit((a), memory_order_relaxed) +#define OPAL_ATOMIC_RELAXED_STORE(a, v) atomic_store_explicit((a), (v), memory_order_relaxed) + #endif /* !defined(OPAL_ATOMIC_STDC_H) */ diff --git a/opal/mca/threads/thread_usage.h b/opal/mca/threads/thread_usage.h index 3eeca968c06..062d2dfb2f0 100644 --- a/opal/mca/threads/thread_usage.h +++ b/opal/mca/threads/thread_usage.h @@ -101,8 +101,9 @@ static inline bool opal_set_using_threads(bool have) return opal_atomic_##name##_fetch_##suffix(addr, delta); \ } \ \ - *addr = *addr operator delta; \ - return *addr; \ + type value = OPAL_ATOMIC_RELAXED_LOAD(addr) operator delta; \ + OPAL_ATOMIC_RELAXED_STORE(addr, value); \ + return value; \ } \ \ static inline type opal_thread_fetch_##name##_##suffix(opal_atomic_##type *addr, type delta) \ @@ -111,8 +112,8 @@ static inline bool opal_set_using_threads(bool have) return opal_atomic_fetch_##name##_##suffix(addr, delta); \ } \ \ - type old = *addr; \ - *addr = old operator delta; \ + type old = OPAL_ATOMIC_RELAXED_LOAD(addr); \ + OPAL_ATOMIC_RELAXED_STORE(addr, old operator delta); \ return old; \ } @@ -124,28 +125,28 @@ static inline bool opal_set_using_threads(bool have) return opal_atomic_compare_exchange_strong_##suffix(addr, (addr_type *) compare, \ (addr_type) value); \ } \ - \ - if ((type) *addr == *compare) { \ - ((type *) addr)[0] = value; \ + type old = OPAL_ATOMIC_RELAXED_LOAD(addr); \ + if (old == *compare) { \ + OPAL_ATOMIC_RELAXED_STORE(addr, value); \ return true; \ } \ \ - *compare = ((type *) addr)[0]; \ + *compare = old; \ \ return false; \ } -#define OPAL_THREAD_DEFINE_ATOMIC_SWAP(type, addr_type, suffix) \ - static inline type opal_thread_swap_##suffix(opal_atomic_##addr_type *ptr, type newvalue) \ - { \ - if (opal_using_threads()) { \ - return (type) opal_atomic_swap_##suffix(ptr, (addr_type) newvalue); \ - } \ - \ - type old = ((type *) ptr)[0]; \ - ((type *) ptr)[0] = newvalue; \ - \ - return old; \ +#define OPAL_THREAD_DEFINE_ATOMIC_SWAP(type, addr_type, suffix) \ + static inline type opal_thread_swap_##suffix(opal_atomic_##addr_type *addr, type newvalue) \ + { \ + if (opal_using_threads()) { \ + return (type) opal_atomic_swap_##suffix(addr, (addr_type) newvalue); \ + } \ + \ + type old = OPAL_ATOMIC_RELAXED_LOAD(addr); \ + OPAL_ATOMIC_RELAXED_STORE(addr, newvalue); \ + \ + return old; \ } OPAL_THREAD_DEFINE_ATOMIC_OP(int32_t, add, +, 32)