Skip to content

opal/mca/threads: fix Argobots support [5.0.x] #9375

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Sep 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions opal/mca/pmix/pmix-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
* Copyright (c) 2020 Amazon.com, Inc. or its affiliates.
* All Rights reserved.
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2021 Argonne National Laboratory. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -37,6 +39,7 @@

#include "opal/hash_string.h"
#include "opal/mca/mca.h"
#include "opal/class/opal_list.h"
#include "opal/mca/threads/threads.h"
#include "opal/util/error.h"
#include "opal/util/event.h"
Expand Down
12 changes: 11 additions & 1 deletion opal/mca/threads/argobots/configure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# Copyright (c) 2019 Sandia National Laboratories. All rights reserved.
# Copyright (c) 2019 Triad National Security, LLC. All rights
# Reserved.
# Copyright (c) 2021 Argonne National Laboratory. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
Expand All @@ -38,6 +39,7 @@ AC_DEFUN([OPAL_CONFIG_ARGOBOTS_THREADS],[
opal_check_argo_save_LIBS=$LIBS

opal_argo_happy=yes
opal_argo11_happy=yes
AS_IF([test "$with_argo" = "no"],
[opal_argo_happy=no])

Expand All @@ -63,7 +65,15 @@ AC_DEFUN([OPAL_CONFIG_ARGOBOTS_THREADS],[
[],
[opal_argo_happy=no])])

AS_IF([test $opal_argo_happy = yes && test -n "$opal_argo_dir"],

# ABT_unit_get_thread() is a new Argobots 1.1 API.
# It was introduced after static mutex/cond initializers.
AS_IF([test $opal_argo_happy = yes],
[AC_CHECK_FUNCS([ABT_unit_get_thread], [], [opal_argo11_happy="yes"])])

AS_IF([test $opal_argo_happy = yes && test $opal_argo11_happy = no],
[AC_MSG_ERROR([Open MPI requires Argobots 1.1 or newer.])])
AS_IF([test $opal_argo_happy = yes && test $opal_argo11_happy = yes && test -n "$opal_argo_dir"],
[OPAL_ARGO_INCLUDE_PATH="$opal_argo_dir/include/"],
[OPAL_ARGO_INCLUDE_PATH=""])

Expand Down
13 changes: 2 additions & 11 deletions opal/mca/threads/argobots/threads_argobots_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2019 Sandia National Laboratories. All rights reserved.
*
* Copyright (c) 2020 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2021 Argonne National Laboratory. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -32,16 +33,6 @@
#include "opal/util/output.h"
#include "opal/util/sys_limits.h"

struct opal_tsd_key_value {
opal_tsd_key_t key;
opal_tsd_destructor_t destructor;
};

static opal_mutex_t opal_tsd_lock = OPAL_MUTEX_STATIC_INIT;
static struct opal_tsd_key_value *opal_tsd_key_values = NULL;
static int opal_tsd_key_values_count = 0;
static int opal_tsd_key_values_size = 0;

/*
* Constructor
*/
Expand Down
96 changes: 18 additions & 78 deletions opal/mca/threads/argobots/threads_argobots_mutex.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2019 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2021 Argonne National Laboratory. All rights reserved.
*
* $COPYRIGHT$
*
Expand All @@ -27,6 +28,7 @@
#include "opal/mca/threads/argobots/threads_argobots.h"

#include <errno.h>
#include <string.h>

#include "opal/constants.h"
#include "opal/mca/threads/argobots/threads_argobots_mutex.h"
Expand All @@ -41,7 +43,8 @@ bool opal_uses_threads = false;
static void mca_threads_argobots_mutex_constructor(opal_mutex_t *p_mutex)
{
opal_threads_argobots_ensure_init();
p_mutex->m_lock_argobots = OPAL_ABT_MUTEX_NULL;
const ABT_mutex_memory init_mutex = ABT_MUTEX_INITIALIZER;
memcpy(&p_mutex->m_lock_argobots, &init_mutex, sizeof(ABT_mutex_memory));
p_mutex->m_recursive = 0;
#if OPAL_ENABLE_DEBUG
p_mutex->m_lock_debug = 0;
Expand All @@ -51,17 +54,11 @@ static void mca_threads_argobots_mutex_constructor(opal_mutex_t *p_mutex)
opal_atomic_lock_init(&p_mutex->m_lock_atomic, 0);
}

static void mca_threads_argobots_mutex_destructor(opal_mutex_t *p_mutex)
{
if (OPAL_ABT_MUTEX_NULL != p_mutex->m_lock_argobots) {
ABT_mutex_free(&p_mutex->m_lock_argobots);
}
}

static void mca_threads_argobots_recursive_mutex_constructor(opal_recursive_mutex_t *p_mutex)
{
opal_threads_argobots_ensure_init();
p_mutex->m_lock_argobots = OPAL_ABT_MUTEX_NULL;
const ABT_mutex_memory init_mutex = ABT_RECURSIVE_MUTEX_INITIALIZER;
memcpy(&p_mutex->m_lock_argobots, &init_mutex, sizeof(ABT_mutex_memory));
p_mutex->m_recursive = 1;
#if OPAL_ENABLE_DEBUG
p_mutex->m_lock_debug = 0;
Expand All @@ -71,97 +68,40 @@ static void mca_threads_argobots_recursive_mutex_constructor(opal_recursive_mute
opal_atomic_lock_init(&p_mutex->m_lock_atomic, 0);
}

static void mca_threads_argobots_recursive_mutex_destructor(opal_recursive_mutex_t *p_mutex)
{
if (OPAL_ABT_MUTEX_NULL != p_mutex->m_lock_argobots) {
ABT_mutex_free(&p_mutex->m_lock_argobots);
}
}

OBJ_CLASS_INSTANCE(opal_mutex_t, opal_object_t, mca_threads_argobots_mutex_constructor,
mca_threads_argobots_mutex_destructor);
OBJ_CLASS_INSTANCE(opal_mutex_t, opal_object_t, mca_threads_argobots_mutex_constructor, NULL);
OBJ_CLASS_INSTANCE(opal_recursive_mutex_t, opal_object_t,
mca_threads_argobots_recursive_mutex_constructor,
mca_threads_argobots_recursive_mutex_destructor);

void opal_mutex_create(struct opal_mutex_t *m)
{
opal_threads_argobots_ensure_init();
while (OPAL_ABT_MUTEX_NULL == m->m_lock_argobots) {
ABT_mutex abt_mutex;
if (m->m_recursive) {
ABT_mutex_attr abt_mutex_attr;
ABT_mutex_attr_create(&abt_mutex_attr);
ABT_mutex_attr_set_recursive(abt_mutex_attr, ABT_TRUE);
ABT_mutex_create_with_attr(abt_mutex_attr, &abt_mutex);
ABT_mutex_attr_free(&abt_mutex_attr);
} else {
ABT_mutex_create(&abt_mutex);
}
void *null_ptr = OPAL_ABT_MUTEX_NULL;
if (opal_atomic_compare_exchange_strong_ptr((opal_atomic_intptr_t *) &m->m_lock_argobots,
(intptr_t *) &null_ptr, (intptr_t) abt_mutex)) {
/* mutex is successfully created and substituted. */
return;
}
ABT_mutex_free(&abt_mutex);
}
}

static void opal_cond_create(opal_cond_t *cond)
{
opal_threads_argobots_ensure_init();
while (OPAL_ABT_COND_NULL == *cond) {
ABT_cond new_cond;
ABT_cond_create(&new_cond);
void *null_ptr = OPAL_ABT_COND_NULL;
if (opal_atomic_compare_exchange_strong_ptr((opal_atomic_intptr_t *) cond,
(intptr_t *) &null_ptr, (intptr_t) new_cond)) {
/* cond is successfully created and substituted. */
return;
}
ABT_cond_free(&new_cond);
}
}
mca_threads_argobots_recursive_mutex_constructor, NULL);

int opal_cond_init(opal_cond_t *cond)
{
*cond = OPAL_ABT_COND_NULL;
const ABT_cond_memory init_cond = ABT_COND_INITIALIZER;
memcpy(cond, &init_cond, sizeof(ABT_cond_memory));
return OPAL_SUCCESS;
}

int opal_cond_wait(opal_cond_t *cond, opal_mutex_t *lock)
{
if (OPAL_ABT_COND_NULL == *cond) {
opal_cond_create(cond);
}
int ret = ABT_cond_wait(*cond, lock->m_lock_argobots);
ABT_mutex abt_mutex = ABT_MUTEX_MEMORY_GET_HANDLE(&lock->m_lock_argobots);
ABT_cond abt_cond = ABT_COND_MEMORY_GET_HANDLE(cond);
int ret = ABT_cond_wait(abt_cond, abt_mutex);
return ABT_SUCCESS == ret ? OPAL_SUCCESS : OPAL_ERROR;
}

int opal_cond_broadcast(opal_cond_t *cond)
{
if (OPAL_ABT_COND_NULL == *cond) {
opal_cond_create(cond);
}
int ret = ABT_cond_broadcast(*cond);
ABT_cond abt_cond = ABT_COND_MEMORY_GET_HANDLE(cond);
int ret = ABT_cond_broadcast(abt_cond);
return ABT_SUCCESS == ret ? OPAL_SUCCESS : OPAL_ERROR;
}

int opal_cond_signal(opal_cond_t *cond)
{
if (OPAL_ABT_COND_NULL == *cond) {
opal_cond_create(cond);
}
int ret = ABT_cond_signal(*cond);
ABT_cond abt_cond = ABT_COND_MEMORY_GET_HANDLE(cond);
int ret = ABT_cond_signal(abt_cond);
return ABT_SUCCESS == ret ? OPAL_SUCCESS : OPAL_ERROR;
}

int opal_cond_destroy(opal_cond_t *cond)
{
int ret = ABT_SUCCESS;
if (OPAL_ABT_COND_NULL != *cond) {
ret = ABT_cond_free(cond);
}
return ABT_SUCCESS == ret ? OPAL_SUCCESS : OPAL_ERROR;
return OPAL_SUCCESS;
}
78 changes: 34 additions & 44 deletions opal/mca/threads/argobots/threads_argobots_mutex.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
* Copyright (c) 2019 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2020 Triad National Security, LLC. All rights
* reserved.
*
* Copyright (c) 2020 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2021 Argonne National Laboratory. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -34,21 +34,18 @@
#include <errno.h>
#include <stdio.h>

#include "opal/mca/threads/argobots/threads_argobots.h"

#include "opal/class/opal_object.h"
#include "opal/mca/threads/argobots/threads_argobots.h"
#include "opal/mca/threads/mutex.h"
#include "opal/sys/atomic.h"
#include "opal/util/output.h"

BEGIN_C_DECLS

/* Don't use ABT_MUTEX_NULL, since it might be not NULL. */
#define OPAL_ABT_MUTEX_NULL 0

struct opal_mutex_t {
opal_object_t super;

ABT_mutex m_lock_argobots;
ABT_mutex_memory m_lock_argobots;
int m_recursive;

#if OPAL_ENABLE_DEBUG
Expand All @@ -64,32 +61,34 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_mutex_t);
OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_recursive_mutex_t);

#if OPAL_ENABLE_DEBUG
# define OPAL_MUTEX_STATIC_INIT \
{ \
.super = OPAL_OBJ_STATIC_INIT(opal_mutex_t), .m_lock_argobots = OPAL_ABT_MUTEX_NULL, \
.m_recursive = 0, .m_lock_debug = 0, .m_lock_file = NULL, .m_lock_line = 0, \
.m_lock_atomic = OPAL_ATOMIC_LOCK_INIT, \
# define OPAL_MUTEX_STATIC_INIT \
{ \
.super = OPAL_OBJ_STATIC_INIT(opal_mutex_t), .m_lock_argobots = ABT_MUTEX_INITIALIZER, \
.m_recursive = 0, .m_lock_debug = 0, .m_lock_file = NULL, .m_lock_line = 0, \
.m_lock_atomic = OPAL_ATOMIC_LOCK_INIT, \
}
#else
# define OPAL_MUTEX_STATIC_INIT \
{ \
.super = OPAL_OBJ_STATIC_INIT(opal_mutex_t), .m_lock_argobots = OPAL_ABT_MUTEX_NULL, \
.m_recursive = 0, .m_lock_atomic = OPAL_ATOMIC_LOCK_INIT, \
# define OPAL_MUTEX_STATIC_INIT \
{ \
.super = OPAL_OBJ_STATIC_INIT(opal_mutex_t), .m_lock_argobots = ABT_MUTEX_INITIALIZER, \
.m_recursive = 0, .m_lock_atomic = OPAL_ATOMIC_LOCK_INIT, \
}
#endif

#if OPAL_ENABLE_DEBUG
# define OPAL_RECURSIVE_MUTEX_STATIC_INIT \
{ \
.super = OPAL_OBJ_STATIC_INIT(opal_mutex_t), .m_lock_argobots = OPAL_ABT_MUTEX_NULL, \
.m_recursive = 1, .m_lock_debug = 0, .m_lock_file = NULL, .m_lock_line = 0, \
.m_lock_atomic = OPAL_ATOMIC_LOCK_INIT, \
# define OPAL_RECURSIVE_MUTEX_STATIC_INIT \
{ \
.super = OPAL_OBJ_STATIC_INIT(opal_mutex_t), \
.m_lock_argobots = ABT_RECURSIVE_MUTEX_INITIALIZER, .m_recursive = 1, \
.m_lock_debug = 0, .m_lock_file = NULL, .m_lock_line = 0, \
.m_lock_atomic = OPAL_ATOMIC_LOCK_INIT, \
}
#else
# define OPAL_RECURSIVE_MUTEX_STATIC_INIT \
{ \
.super = OPAL_OBJ_STATIC_INIT(opal_mutex_t), .m_lock_argobots = OPAL_ABT_MUTEX_NULL, \
.m_recursive = 1, .m_lock_atomic = OPAL_ATOMIC_LOCK_INIT, \
# define OPAL_RECURSIVE_MUTEX_STATIC_INIT \
{ \
.super = OPAL_OBJ_STATIC_INIT(opal_mutex_t), \
.m_lock_argobots = ABT_RECURSIVE_MUTEX_INITIALIZER, .m_recursive = 1, \
.m_lock_atomic = OPAL_ATOMIC_LOCK_INIT, \
}
#endif

Expand All @@ -99,14 +98,10 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_recursive_mutex_t);
*
************************************************************************/

void opal_mutex_create(struct opal_mutex_t *m);

static inline int opal_mutex_trylock(opal_mutex_t *m)
{
if (OPAL_ABT_MUTEX_NULL == m->m_lock_argobots) {
opal_mutex_create(m);
}
int ret = ABT_mutex_trylock(m->m_lock_argobots);
ABT_mutex mutex = ABT_MUTEX_MEMORY_GET_HANDLE(&m->m_lock_argobots);
int ret = ABT_mutex_trylock(mutex);
if (ABT_ERR_MUTEX_LOCKED == ret) {
return 1;
} else if (ABT_SUCCESS != ret) {
Expand All @@ -120,31 +115,27 @@ static inline int opal_mutex_trylock(opal_mutex_t *m)

static inline void opal_mutex_lock(opal_mutex_t *m)
{
if (OPAL_ABT_MUTEX_NULL == m->m_lock_argobots) {
opal_mutex_create(m);
}
ABT_mutex mutex = ABT_MUTEX_MEMORY_GET_HANDLE(&m->m_lock_argobots);
#if OPAL_ENABLE_DEBUG
int ret = ABT_mutex_lock(m->m_lock_argobots);
int ret = ABT_mutex_lock(mutex);
if (ABT_SUCCESS != ret) {
opal_output(0, "opal_mutex_lock()");
}
#else
ABT_mutex_lock(m->m_lock_argobots);
ABT_mutex_lock(mutex);
#endif
}

static inline void opal_mutex_unlock(opal_mutex_t *m)
{
if (OPAL_ABT_MUTEX_NULL == m->m_lock_argobots) {
opal_mutex_create(m);
}
ABT_mutex mutex = ABT_MUTEX_MEMORY_GET_HANDLE(&m->m_lock_argobots);
#if OPAL_ENABLE_DEBUG
int ret = ABT_mutex_unlock(m->m_lock_argobots);
int ret = ABT_mutex_unlock(mutex);
if (ABT_SUCCESS != ret) {
opal_output(0, "opal_mutex_unlock()");
}
#else
ABT_mutex_unlock(m->m_lock_argobots);
ABT_mutex_unlock(mutex);
#endif
/* For fairness of locking. */
ABT_thread_yield();
Expand Down Expand Up @@ -200,9 +191,8 @@ static inline void opal_mutex_atomic_unlock(opal_mutex_t *m)

#endif

#define OPAL_ABT_COND_NULL NULL
typedef ABT_cond opal_cond_t;
#define OPAL_CONDITION_STATIC_INIT OPAL_ABT_COND_NULL
typedef ABT_cond_memory opal_cond_t;
#define OPAL_CONDITION_STATIC_INIT ABT_COND_INITIALIZER

int opal_cond_init(opal_cond_t *cond);
int opal_cond_wait(opal_cond_t *cond, opal_mutex_t *lock);
Expand Down
Loading