diff --git a/opal/mca/pmix/pmix-internal.h b/opal/mca/pmix/pmix-internal.h index a02a6c39d0b..277a46ec751 100644 --- a/opal/mca/pmix/pmix-internal.h +++ b/opal/mca/pmix/pmix-internal.h @@ -10,6 +10,8 @@ * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. * All Rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021 Argonne National Laboratory. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,6 +39,7 @@ #include "opal/hash_string.h" #include "opal/mca/mca.h" +#include "opal/class/opal_list.h" #include "opal/mca/threads/threads.h" #include "opal/util/error.h" #include "opal/util/event.h" diff --git a/opal/mca/threads/argobots/configure.m4 b/opal/mca/threads/argobots/configure.m4 index aeeba2b517d..6a02735a8a1 100644 --- a/opal/mca/threads/argobots/configure.m4 +++ b/opal/mca/threads/argobots/configure.m4 @@ -16,6 +16,7 @@ # Copyright (c) 2019 Sandia National Laboratories. All rights reserved. # Copyright (c) 2019 Triad National Security, LLC. All rights # Reserved. +# Copyright (c) 2021 Argonne National Laboratory. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -38,6 +39,7 @@ AC_DEFUN([OPAL_CONFIG_ARGOBOTS_THREADS],[ opal_check_argo_save_LIBS=$LIBS opal_argo_happy=yes + opal_argo11_happy=yes AS_IF([test "$with_argo" = "no"], [opal_argo_happy=no]) @@ -63,7 +65,15 @@ AC_DEFUN([OPAL_CONFIG_ARGOBOTS_THREADS],[ [], [opal_argo_happy=no])]) - AS_IF([test $opal_argo_happy = yes && test -n "$opal_argo_dir"], + + # ABT_unit_get_thread() is a new Argobots 1.1 API. + # It was introduced after static mutex/cond initializers. + AS_IF([test $opal_argo_happy = yes], + [AC_CHECK_FUNCS([ABT_unit_get_thread], [], [opal_argo11_happy="yes"])]) + + AS_IF([test $opal_argo_happy = yes && test $opal_argo11_happy = no], + [AC_MSG_ERROR([Open MPI requires Argobots 1.1 or newer.])]) + AS_IF([test $opal_argo_happy = yes && test $opal_argo11_happy = yes && test -n "$opal_argo_dir"], [OPAL_ARGO_INCLUDE_PATH="$opal_argo_dir/include/"], [OPAL_ARGO_INCLUDE_PATH=""]) diff --git a/opal/mca/threads/argobots/threads_argobots_module.c b/opal/mca/threads/argobots/threads_argobots_module.c index 3d58de041eb..708a7da645a 100644 --- a/opal/mca/threads/argobots/threads_argobots_module.c +++ b/opal/mca/threads/argobots/threads_argobots_module.c @@ -11,8 +11,9 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2019 Sandia National Laboratories. All rights reserved. - * * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2021 Argonne National Laboratory. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,16 +33,6 @@ #include "opal/util/output.h" #include "opal/util/sys_limits.h" -struct opal_tsd_key_value { - opal_tsd_key_t key; - opal_tsd_destructor_t destructor; -}; - -static opal_mutex_t opal_tsd_lock = OPAL_MUTEX_STATIC_INIT; -static struct opal_tsd_key_value *opal_tsd_key_values = NULL; -static int opal_tsd_key_values_count = 0; -static int opal_tsd_key_values_size = 0; - /* * Constructor */ diff --git a/opal/mca/threads/argobots/threads_argobots_mutex.c b/opal/mca/threads/argobots/threads_argobots_mutex.c index 330d7120439..c4cf2820875 100644 --- a/opal/mca/threads/argobots/threads_argobots_mutex.c +++ b/opal/mca/threads/argobots/threads_argobots_mutex.c @@ -15,6 +15,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2019 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2021 Argonne National Laboratory. All rights reserved. * * $COPYRIGHT$ * @@ -27,6 +28,7 @@ #include "opal/mca/threads/argobots/threads_argobots.h" #include +#include #include "opal/constants.h" #include "opal/mca/threads/argobots/threads_argobots_mutex.h" @@ -41,7 +43,8 @@ bool opal_uses_threads = false; static void mca_threads_argobots_mutex_constructor(opal_mutex_t *p_mutex) { opal_threads_argobots_ensure_init(); - p_mutex->m_lock_argobots = OPAL_ABT_MUTEX_NULL; + const ABT_mutex_memory init_mutex = ABT_MUTEX_INITIALIZER; + memcpy(&p_mutex->m_lock_argobots, &init_mutex, sizeof(ABT_mutex_memory)); p_mutex->m_recursive = 0; #if OPAL_ENABLE_DEBUG p_mutex->m_lock_debug = 0; @@ -51,17 +54,11 @@ static void mca_threads_argobots_mutex_constructor(opal_mutex_t *p_mutex) opal_atomic_lock_init(&p_mutex->m_lock_atomic, 0); } -static void mca_threads_argobots_mutex_destructor(opal_mutex_t *p_mutex) -{ - if (OPAL_ABT_MUTEX_NULL != p_mutex->m_lock_argobots) { - ABT_mutex_free(&p_mutex->m_lock_argobots); - } -} - static void mca_threads_argobots_recursive_mutex_constructor(opal_recursive_mutex_t *p_mutex) { opal_threads_argobots_ensure_init(); - p_mutex->m_lock_argobots = OPAL_ABT_MUTEX_NULL; + const ABT_mutex_memory init_mutex = ABT_RECURSIVE_MUTEX_INITIALIZER; + memcpy(&p_mutex->m_lock_argobots, &init_mutex, sizeof(ABT_mutex_memory)); p_mutex->m_recursive = 1; #if OPAL_ENABLE_DEBUG p_mutex->m_lock_debug = 0; @@ -71,97 +68,40 @@ static void mca_threads_argobots_recursive_mutex_constructor(opal_recursive_mute opal_atomic_lock_init(&p_mutex->m_lock_atomic, 0); } -static void mca_threads_argobots_recursive_mutex_destructor(opal_recursive_mutex_t *p_mutex) -{ - if (OPAL_ABT_MUTEX_NULL != p_mutex->m_lock_argobots) { - ABT_mutex_free(&p_mutex->m_lock_argobots); - } -} - -OBJ_CLASS_INSTANCE(opal_mutex_t, opal_object_t, mca_threads_argobots_mutex_constructor, - mca_threads_argobots_mutex_destructor); +OBJ_CLASS_INSTANCE(opal_mutex_t, opal_object_t, mca_threads_argobots_mutex_constructor, NULL); OBJ_CLASS_INSTANCE(opal_recursive_mutex_t, opal_object_t, - mca_threads_argobots_recursive_mutex_constructor, - mca_threads_argobots_recursive_mutex_destructor); - -void opal_mutex_create(struct opal_mutex_t *m) -{ - opal_threads_argobots_ensure_init(); - while (OPAL_ABT_MUTEX_NULL == m->m_lock_argobots) { - ABT_mutex abt_mutex; - if (m->m_recursive) { - ABT_mutex_attr abt_mutex_attr; - ABT_mutex_attr_create(&abt_mutex_attr); - ABT_mutex_attr_set_recursive(abt_mutex_attr, ABT_TRUE); - ABT_mutex_create_with_attr(abt_mutex_attr, &abt_mutex); - ABT_mutex_attr_free(&abt_mutex_attr); - } else { - ABT_mutex_create(&abt_mutex); - } - void *null_ptr = OPAL_ABT_MUTEX_NULL; - if (opal_atomic_compare_exchange_strong_ptr((opal_atomic_intptr_t *) &m->m_lock_argobots, - (intptr_t *) &null_ptr, (intptr_t) abt_mutex)) { - /* mutex is successfully created and substituted. */ - return; - } - ABT_mutex_free(&abt_mutex); - } -} - -static void opal_cond_create(opal_cond_t *cond) -{ - opal_threads_argobots_ensure_init(); - while (OPAL_ABT_COND_NULL == *cond) { - ABT_cond new_cond; - ABT_cond_create(&new_cond); - void *null_ptr = OPAL_ABT_COND_NULL; - if (opal_atomic_compare_exchange_strong_ptr((opal_atomic_intptr_t *) cond, - (intptr_t *) &null_ptr, (intptr_t) new_cond)) { - /* cond is successfully created and substituted. */ - return; - } - ABT_cond_free(&new_cond); - } -} + mca_threads_argobots_recursive_mutex_constructor, NULL); int opal_cond_init(opal_cond_t *cond) { - *cond = OPAL_ABT_COND_NULL; + const ABT_cond_memory init_cond = ABT_COND_INITIALIZER; + memcpy(cond, &init_cond, sizeof(ABT_cond_memory)); return OPAL_SUCCESS; } int opal_cond_wait(opal_cond_t *cond, opal_mutex_t *lock) { - if (OPAL_ABT_COND_NULL == *cond) { - opal_cond_create(cond); - } - int ret = ABT_cond_wait(*cond, lock->m_lock_argobots); + ABT_mutex abt_mutex = ABT_MUTEX_MEMORY_GET_HANDLE(&lock->m_lock_argobots); + ABT_cond abt_cond = ABT_COND_MEMORY_GET_HANDLE(cond); + int ret = ABT_cond_wait(abt_cond, abt_mutex); return ABT_SUCCESS == ret ? OPAL_SUCCESS : OPAL_ERROR; } int opal_cond_broadcast(opal_cond_t *cond) { - if (OPAL_ABT_COND_NULL == *cond) { - opal_cond_create(cond); - } - int ret = ABT_cond_broadcast(*cond); + ABT_cond abt_cond = ABT_COND_MEMORY_GET_HANDLE(cond); + int ret = ABT_cond_broadcast(abt_cond); return ABT_SUCCESS == ret ? OPAL_SUCCESS : OPAL_ERROR; } int opal_cond_signal(opal_cond_t *cond) { - if (OPAL_ABT_COND_NULL == *cond) { - opal_cond_create(cond); - } - int ret = ABT_cond_signal(*cond); + ABT_cond abt_cond = ABT_COND_MEMORY_GET_HANDLE(cond); + int ret = ABT_cond_signal(abt_cond); return ABT_SUCCESS == ret ? OPAL_SUCCESS : OPAL_ERROR; } int opal_cond_destroy(opal_cond_t *cond) { - int ret = ABT_SUCCESS; - if (OPAL_ABT_COND_NULL != *cond) { - ret = ABT_cond_free(cond); - } - return ABT_SUCCESS == ret ? OPAL_SUCCESS : OPAL_ERROR; + return OPAL_SUCCESS; } diff --git a/opal/mca/threads/argobots/threads_argobots_mutex.h b/opal/mca/threads/argobots/threads_argobots_mutex.h index a60c229002c..adf2b2c8230 100644 --- a/opal/mca/threads/argobots/threads_argobots_mutex.h +++ b/opal/mca/threads/argobots/threads_argobots_mutex.h @@ -17,8 +17,8 @@ * Copyright (c) 2019 Sandia National Laboratories. All rights reserved. * Copyright (c) 2020 Triad National Security, LLC. All rights * reserved. - * * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2021 Argonne National Laboratory. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,21 +34,18 @@ #include #include -#include "opal/mca/threads/argobots/threads_argobots.h" - #include "opal/class/opal_object.h" +#include "opal/mca/threads/argobots/threads_argobots.h" +#include "opal/mca/threads/mutex.h" #include "opal/sys/atomic.h" #include "opal/util/output.h" BEGIN_C_DECLS -/* Don't use ABT_MUTEX_NULL, since it might be not NULL. */ -#define OPAL_ABT_MUTEX_NULL 0 - struct opal_mutex_t { opal_object_t super; - ABT_mutex m_lock_argobots; + ABT_mutex_memory m_lock_argobots; int m_recursive; #if OPAL_ENABLE_DEBUG @@ -64,32 +61,34 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_mutex_t); OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_recursive_mutex_t); #if OPAL_ENABLE_DEBUG -# define OPAL_MUTEX_STATIC_INIT \ - { \ - .super = OPAL_OBJ_STATIC_INIT(opal_mutex_t), .m_lock_argobots = OPAL_ABT_MUTEX_NULL, \ - .m_recursive = 0, .m_lock_debug = 0, .m_lock_file = NULL, .m_lock_line = 0, \ - .m_lock_atomic = OPAL_ATOMIC_LOCK_INIT, \ +# define OPAL_MUTEX_STATIC_INIT \ + { \ + .super = OPAL_OBJ_STATIC_INIT(opal_mutex_t), .m_lock_argobots = ABT_MUTEX_INITIALIZER, \ + .m_recursive = 0, .m_lock_debug = 0, .m_lock_file = NULL, .m_lock_line = 0, \ + .m_lock_atomic = OPAL_ATOMIC_LOCK_INIT, \ } #else -# define OPAL_MUTEX_STATIC_INIT \ - { \ - .super = OPAL_OBJ_STATIC_INIT(opal_mutex_t), .m_lock_argobots = OPAL_ABT_MUTEX_NULL, \ - .m_recursive = 0, .m_lock_atomic = OPAL_ATOMIC_LOCK_INIT, \ +# define OPAL_MUTEX_STATIC_INIT \ + { \ + .super = OPAL_OBJ_STATIC_INIT(opal_mutex_t), .m_lock_argobots = ABT_MUTEX_INITIALIZER, \ + .m_recursive = 0, .m_lock_atomic = OPAL_ATOMIC_LOCK_INIT, \ } #endif #if OPAL_ENABLE_DEBUG -# define OPAL_RECURSIVE_MUTEX_STATIC_INIT \ - { \ - .super = OPAL_OBJ_STATIC_INIT(opal_mutex_t), .m_lock_argobots = OPAL_ABT_MUTEX_NULL, \ - .m_recursive = 1, .m_lock_debug = 0, .m_lock_file = NULL, .m_lock_line = 0, \ - .m_lock_atomic = OPAL_ATOMIC_LOCK_INIT, \ +# define OPAL_RECURSIVE_MUTEX_STATIC_INIT \ + { \ + .super = OPAL_OBJ_STATIC_INIT(opal_mutex_t), \ + .m_lock_argobots = ABT_RECURSIVE_MUTEX_INITIALIZER, .m_recursive = 1, \ + .m_lock_debug = 0, .m_lock_file = NULL, .m_lock_line = 0, \ + .m_lock_atomic = OPAL_ATOMIC_LOCK_INIT, \ } #else -# define OPAL_RECURSIVE_MUTEX_STATIC_INIT \ - { \ - .super = OPAL_OBJ_STATIC_INIT(opal_mutex_t), .m_lock_argobots = OPAL_ABT_MUTEX_NULL, \ - .m_recursive = 1, .m_lock_atomic = OPAL_ATOMIC_LOCK_INIT, \ +# define OPAL_RECURSIVE_MUTEX_STATIC_INIT \ + { \ + .super = OPAL_OBJ_STATIC_INIT(opal_mutex_t), \ + .m_lock_argobots = ABT_RECURSIVE_MUTEX_INITIALIZER, .m_recursive = 1, \ + .m_lock_atomic = OPAL_ATOMIC_LOCK_INIT, \ } #endif @@ -99,14 +98,10 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_recursive_mutex_t); * ************************************************************************/ -void opal_mutex_create(struct opal_mutex_t *m); - static inline int opal_mutex_trylock(opal_mutex_t *m) { - if (OPAL_ABT_MUTEX_NULL == m->m_lock_argobots) { - opal_mutex_create(m); - } - int ret = ABT_mutex_trylock(m->m_lock_argobots); + ABT_mutex mutex = ABT_MUTEX_MEMORY_GET_HANDLE(&m->m_lock_argobots); + int ret = ABT_mutex_trylock(mutex); if (ABT_ERR_MUTEX_LOCKED == ret) { return 1; } else if (ABT_SUCCESS != ret) { @@ -120,31 +115,27 @@ static inline int opal_mutex_trylock(opal_mutex_t *m) static inline void opal_mutex_lock(opal_mutex_t *m) { - if (OPAL_ABT_MUTEX_NULL == m->m_lock_argobots) { - opal_mutex_create(m); - } + ABT_mutex mutex = ABT_MUTEX_MEMORY_GET_HANDLE(&m->m_lock_argobots); #if OPAL_ENABLE_DEBUG - int ret = ABT_mutex_lock(m->m_lock_argobots); + int ret = ABT_mutex_lock(mutex); if (ABT_SUCCESS != ret) { opal_output(0, "opal_mutex_lock()"); } #else - ABT_mutex_lock(m->m_lock_argobots); + ABT_mutex_lock(mutex); #endif } static inline void opal_mutex_unlock(opal_mutex_t *m) { - if (OPAL_ABT_MUTEX_NULL == m->m_lock_argobots) { - opal_mutex_create(m); - } + ABT_mutex mutex = ABT_MUTEX_MEMORY_GET_HANDLE(&m->m_lock_argobots); #if OPAL_ENABLE_DEBUG - int ret = ABT_mutex_unlock(m->m_lock_argobots); + int ret = ABT_mutex_unlock(mutex); if (ABT_SUCCESS != ret) { opal_output(0, "opal_mutex_unlock()"); } #else - ABT_mutex_unlock(m->m_lock_argobots); + ABT_mutex_unlock(mutex); #endif /* For fairness of locking. */ ABT_thread_yield(); @@ -200,9 +191,8 @@ static inline void opal_mutex_atomic_unlock(opal_mutex_t *m) #endif -#define OPAL_ABT_COND_NULL NULL -typedef ABT_cond opal_cond_t; -#define OPAL_CONDITION_STATIC_INIT OPAL_ABT_COND_NULL +typedef ABT_cond_memory opal_cond_t; +#define OPAL_CONDITION_STATIC_INIT ABT_COND_INITIALIZER int opal_cond_init(opal_cond_t *cond); int opal_cond_wait(opal_cond_t *cond, opal_mutex_t *lock); diff --git a/opal/mca/threads/argobots/threads_argobots_wait_sync.c b/opal/mca/threads/argobots/threads_argobots_wait_sync.c index 5735fd128be..ceb5eef0ce9 100644 --- a/opal/mca/threads/argobots/threads_argobots_wait_sync.c +++ b/opal/mca/threads/argobots/threads_argobots_wait_sync.c @@ -7,6 +7,7 @@ * reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2019 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2021 Argonne National Laboratory. All rights reserved. * * $COPYRIGHT$ * @@ -21,6 +22,32 @@ static opal_mutex_t wait_sync_lock = OPAL_MUTEX_STATIC_INIT; static ompi_wait_sync_t *wait_sync_list = NULL; +void wait_sync_global_wakeup_st(int status) +{ + ompi_wait_sync_t *sync; + for (sync = wait_sync_list; sync != NULL; sync = sync->next) { + wait_sync_update(sync, 0, status); + } +} + +void wait_sync_global_wakeup_mt(int status) +{ + ompi_wait_sync_t *sync; + opal_mutex_lock(&wait_sync_lock); + for (sync = wait_sync_list; sync != NULL; sync = sync->next) { + /* sync_update is going to take the sync->lock from within + * the wait_sync_lock. Thread lightly here: Idealy we should + * find a way to not take a lock in a lock as this is deadlock prone, + * but as of today we are the only place doing this so it is safe. + */ + wait_sync_update(sync, 0, status); + if (sync->next == wait_sync_list) { + break; /* special case for rings */ + } + } + opal_mutex_unlock(&wait_sync_lock); +} + static opal_atomic_int32_t num_thread_in_progress = 0; #define WAIT_SYNC_PASS_OWNERSHIP(who) \ diff --git a/opal/mca/threads/argobots/threads_argobots_wait_sync.h b/opal/mca/threads/argobots/threads_argobots_wait_sync.h index 4117e92e5c1..375264597d3 100644 --- a/opal/mca/threads/argobots/threads_argobots_wait_sync.h +++ b/opal/mca/threads/argobots/threads_argobots_wait_sync.h @@ -15,6 +15,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2019 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2021 Argonne National Laboratory. All rights reserved. * * $COPYRIGHT$ * @@ -100,4 +101,14 @@ static inline int sync_wait_st(ompi_wait_sync_t *sync) } \ } while (0) +/** + * Wake up all syncs with a particular status. If status is OMPI_SUCCESS this + * operation is a NO-OP. Otherwise it will trigger the "error condition" from + * all registered sync. + */ +OPAL_DECLSPEC void wait_sync_global_wakeup_st(int status); +OPAL_DECLSPEC void wait_sync_global_wakeup_mt(int status); +#define wait_sync_global_wakeup(st) \ + (opal_using_threads() ? wait_sync_global_wakeup_mt(st) : wait_sync_global_wakeup_st(st)) + #endif /* OPAL_MCA_THREADS_ARGOBOTS_THREADS_ARGOBOTS_WAIT_SYNC_H */