Skip to content

Move yield capability to opal thread component #8037

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions ompi/runtime/ompi_mpi_params.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
#include "opal/util/show_help.h"
#include "opal/runtime/opal.h"
#include "opal/runtime/opal_params.h"
#include "opal/mca/threads/threads.h"

/*
* Global variables
*
Expand All @@ -62,7 +64,8 @@ bool ompi_mpi_keep_fqdn_hostnames = false;
bool ompi_have_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE);
bool ompi_use_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE);

bool ompi_mpi_yield_when_idle = false;
/* if the threads module requires yielding we use that as default but allow it to be overridden */
bool ompi_mpi_yield_when_idle = OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT;
int ompi_mpi_event_tick_rate = -1;
char *ompi_mpi_show_mca_params_string = NULL;
bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE);
Expand Down Expand Up @@ -118,7 +121,9 @@ int ompi_mpi_register_params(void)
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_mpi_oversubscribe);
ompi_mpi_yield_when_idle = ompi_mpi_oversubscribe;

/* yield if the node is oversubscribed and allow users to override */
ompi_mpi_yield_when_idle |= ompi_mpi_oversubscribe;
(void) mca_base_var_register("ompi", "mpi", NULL, "yield_when_idle",
"Yield the processor when waiting for MPI communication (for MPI processes, will default to 1 when oversubscribing nodes)",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
Expand Down
12 changes: 11 additions & 1 deletion opal/mca/threads/argobots/threads_argobots_threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* Copyright (c) 2004-2020 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
Expand Down Expand Up @@ -37,4 +37,14 @@ struct opal_thread_t {
void *t_ret;
};


/* Argobots are cooperatively scheduled so yield when idle */
#define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT true

static inline
void opal_thread_yield(void)
{
ABT_thread_yield();
}

#endif /* OPAL_MCA_THREADS_ARGOBOTS_THREADS_ARGOBOTS_THREADS_H */
4 changes: 3 additions & 1 deletion opal/mca/threads/pthreads/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,6 @@ libmca_threads_pthreads_la_SOURCES = \
threads_pthreads_threads.h \
threads_pthreads_tsd.h \
threads_pthreads_wait_sync.c \
threads_pthreads_wait_sync.h
threads_pthreads_wait_sync.h \
threads_pthreads_yield.c \
threads_pthreads.h
27 changes: 27 additions & 0 deletions opal/mca/threads/pthreads/threads_pthreads.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2020 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/


#ifndef OPAL_MCA_THREADS_PTHREADS_THREADS_PTHREADS_H
#define OPAL_MCA_THREADS_PTHREADS_THREADS_PTHREADS_H

#include "opal_config.h"
#include <stdint.h>
#include <time.h>

typedef void (opal_threads_pthreads_yield_fn_t)(void);

OPAL_DECLSPEC int opal_threads_pthreads_yield_init(const mca_base_component_t *component);

OPAL_DECLSPEC extern opal_threads_pthreads_yield_fn_t *opal_threads_pthreads_yield_fn;

#endif /* OPAL_MCA_THREADS_PTHREADS_THREADS_PTHREADS_H */
11 changes: 10 additions & 1 deletion opal/mca/threads/pthreads/threads_pthreads_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* Copyright (c) 2004-2020 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
Expand All @@ -26,8 +26,11 @@
#include "opal/mca/threads/thread.h"
#include "opal/mca/threads/threads.h"
#include "opal/constants.h"
#include "opal/mca/threads/pthreads/threads_pthreads.h"


static int opal_threads_pthreads_open(void);
static int opal_threads_pthreads_register(void);

const opal_threads_base_component_1_0_0_t mca_threads_pthreads_component = {
/* First, the mca_component_t struct containing meta information
Expand All @@ -41,13 +44,19 @@ const opal_threads_base_component_1_0_0_t mca_threads_pthreads_component = {
OPAL_RELEASE_VERSION),

.mca_open_component = opal_threads_pthreads_open,
.mca_register_component_params = opal_threads_pthreads_register
},
.threadsc_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
};

int opal_threads_pthreads_register(void)
{
return opal_threads_pthreads_yield_init(&mca_threads_pthreads_component.threadsc_version);
}

int opal_threads_pthreads_open(void)
{
return OPAL_SUCCESS;
Expand Down
14 changes: 13 additions & 1 deletion opal/mca/threads/pthreads/threads_pthreads_threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* Copyright (c) 2004-2020 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
Expand All @@ -30,11 +30,23 @@
#include <pthread.h>
#include <signal.h>

#include "opal/mca/threads/threads.h"
#include "opal/mca/threads/pthreads/threads_pthreads.h"

struct opal_thread_t {
opal_object_t super;
opal_thread_fn_t t_run;
void *t_arg;
pthread_t t_handle;
};

/* Pthreads do not need to yield when idle */
#define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT false

static inline
void opal_thread_yield(void)
{
opal_threads_pthreads_yield_fn();
}

#endif /* OPAL_MCA_THREADS_PTHREADS_THREADS_PTHREADS_THREADS_H */
89 changes: 89 additions & 0 deletions opal/mca/threads/pthreads/threads_pthreads_yield.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2020 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/

#include "opal_config.h"
#include <time.h>
#ifdef HAVE_SCHED_H
#include <sched.h>
#endif

#include "opal/constants.h"
#include "opal/mca/threads/thread.h"
#include "opal/mca/threads/pthreads/threads_pthreads.h"

static void opal_thread_pthreads_yield_sched_yield(void);
static void opal_thread_pthreads_yield_nanosleep(void);

typedef enum {
OPAL_PTHREADS_YIELD_SCHED_YIELD = 0,
OPAL_PTHREADS_YIELD_NANOSLEEP
} opal_threads_pthreads_yield_strategy_t;

static mca_base_var_enum_value_t yield_strategy_values[] = {
{OPAL_PTHREADS_YIELD_SCHED_YIELD, "sched_yield"},
{OPAL_PTHREADS_YIELD_NANOSLEEP, "nanosleep"},
{0, NULL}};



/* Number of nanoseconds to nanosleep, if enabled */
static uint64_t yield_nsleep_nanosecs;
/* The time to nanosleep, if enabled */
static struct timespec yield_nsleep_time = {.tv_sec = 0, .tv_nsec = 1};
static opal_threads_pthreads_yield_strategy_t yield_strategy = OPAL_PTHREADS_YIELD_SCHED_YIELD;

opal_threads_pthreads_yield_fn_t *opal_threads_pthreads_yield_fn = &opal_thread_pthreads_yield_sched_yield;

int opal_threads_pthreads_yield_init(const mca_base_component_t *component)
{
mca_base_var_enum_t *yield_strategy_enumerator;
mca_base_var_enum_create("pthread_yield_strategies", yield_strategy_values, &yield_strategy_enumerator);

(void) mca_base_component_var_register(component, "yield_strategy",
"Pthread yield strategy to use",
MCA_BASE_VAR_TYPE_INT, yield_strategy_enumerator, 0, 0, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL, &yield_strategy);
switch(yield_strategy) {
case OPAL_PTHREADS_YIELD_NANOSLEEP:
opal_threads_pthreads_yield_fn = &opal_thread_pthreads_yield_nanosleep;
break;
default:
/* use initial value */
break;
}

OBJ_RELEASE(yield_strategy_enumerator);

yield_nsleep_nanosecs = (yield_nsleep_time.tv_sec * 1E9) + yield_nsleep_time.tv_nsec;
(void) mca_base_component_var_register(component, "nanosleep_time",
"Number of nanoseconds to sleep when using nanosleep as the pthread yield strategy",
MCA_BASE_VAR_TYPE_UINT64_T, NULL, 0, 0, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL, &yield_nsleep_nanosecs);
yield_nsleep_time.tv_sec = yield_nsleep_nanosecs / 1E9;
yield_nsleep_time.tv_nsec = yield_nsleep_nanosecs - (uint64_t)(yield_nsleep_time.tv_sec * 1E9);

return OPAL_SUCCESS;

}

void opal_thread_pthreads_yield_sched_yield(void)
{
#ifdef HAVE_SCHED_H
sched_yield();
#endif
}

void opal_thread_pthreads_yield_nanosleep(void)
{
nanosleep(&yield_nsleep_time, NULL);
}

11 changes: 10 additions & 1 deletion opal/mca/threads/qthreads/threads_qthreads_threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* Copyright (c) 2004-2020 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
Expand Down Expand Up @@ -38,4 +38,13 @@ struct opal_thread_t {
aligned_t *t_thread_ret_ptr;
};

/* Qthreads are cooperatively scheduled so yield when idle */
#define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT true

static inline
void opal_thread_yield(void)
{
qthread_yield();
}

#endif /* OPAL_MCA_THREADS_QTHREADS_THREADS_QTHREADS_THREADS_H */
2 changes: 2 additions & 0 deletions opal/mca/threads/threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ OPAL_DECLSPEC opal_thread_t *opal_thread_get_self(void);
OPAL_DECLSPEC void opal_thread_kill(opal_thread_t *, int sig);
OPAL_DECLSPEC void opal_thread_set_main(void);

static inline void opal_thread_yield(void);

END_C_DECLS

#endif /* OPAL_MCA_THREADS_THREADS_H */
17 changes: 7 additions & 10 deletions opal/runtime/opal_progress.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* Copyright (c) 2004-2020 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
Expand All @@ -27,10 +27,6 @@

#include "opal_config.h"

#ifdef HAVE_SCHED_H
#include <sched.h>
#endif

#include "opal/runtime/opal_progress.h"
#include "opal/util/event.h"
#include "opal/mca/base/mca_base_var.h"
Expand All @@ -39,6 +35,7 @@
#include "opal/util/output.h"
#include "opal/runtime/opal_params.h"
#include "opal/runtime/opal.h"
#include "opal/mca/threads/threads.h"

#define OPAL_PROGRESS_USE_TIMERS (OPAL_TIMER_CYCLE_SUPPORTED || OPAL_TIMER_USEC_SUPPORTED)
#define OPAL_PROGRESS_ONLY_USEC_NATIVE (OPAL_TIMER_USEC_NATIVE && !OPAL_TIMER_CYCLE_NATIVE)
Expand Down Expand Up @@ -68,7 +65,7 @@ static volatile opal_progress_callback_t *callbacks_lp = NULL;
static size_t callbacks_lp_len = 0;
static size_t callbacks_lp_size = 0;

/* do we want to call sched_yield() if nothing happened */
/* do we want to yield() if nothing happened */
bool opal_progress_yield_when_idle = false;

#if OPAL_PROGRESS_USE_TIMERS
Expand Down Expand Up @@ -212,7 +209,7 @@ static int opal_progress_events(void)
* be called. We don't propogate errors from the progress functions,
* so no action is taken if they return failures. The functions are
* expected to return the number of events progressed, to determine
* whether or not we should call sched_yield() during MPI progress.
* whether or not we should yield the CPU during MPI progress.
* This is only losely tracked, as an error return can cause the number
* of progressed events to appear lower than it actually is. We don't
* care, as the cost of that happening is far outweighed by the cost
Expand Down Expand Up @@ -246,16 +243,16 @@ opal_progress(void)
opal_progress_events();
}

#if OPAL_HAVE_SCHED_YIELD
if (opal_progress_yield_when_idle && events <= 0) {
/* If there is nothing to do - yield the processor - otherwise
* we could consume the processor for the entire time slice. If
* the processor is oversubscribed - this will result in a best-case
* latency equivalent to the time-slice.
* With some thread implementations, yielding might be required
* to ensure correct scheduling of all communicating threads.
*/
sched_yield();
opal_thread_yield();
}
#endif /* defined(HAVE_SCHED_YIELD) */
}


Expand Down