diff --git a/ompi/runtime/ompi_mpi_params.c b/ompi/runtime/ompi_mpi_params.c index ad8ed8ac91e..91e0244646c 100644 --- a/ompi/runtime/ompi_mpi_params.c +++ b/ompi/runtime/ompi_mpi_params.c @@ -43,6 +43,8 @@ #include "opal/util/show_help.h" #include "opal/runtime/opal.h" #include "opal/runtime/opal_params.h" +#include "opal/mca/threads/threads.h" + /* * Global variables * @@ -62,7 +64,8 @@ bool ompi_mpi_keep_fqdn_hostnames = false; bool ompi_have_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE); bool ompi_use_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE); -bool ompi_mpi_yield_when_idle = false; +/* if the threads module requires yielding we use that as default but allow it to be overridden */ +bool ompi_mpi_yield_when_idle = OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT; int ompi_mpi_event_tick_rate = -1; char *ompi_mpi_show_mca_params_string = NULL; bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE); @@ -118,7 +121,9 @@ int ompi_mpi_register_params(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &ompi_mpi_oversubscribe); - ompi_mpi_yield_when_idle = ompi_mpi_oversubscribe; + + /* yield if the node is oversubscribed and allow users to override */ + ompi_mpi_yield_when_idle |= ompi_mpi_oversubscribe; (void) mca_base_var_register("ompi", "mpi", NULL, "yield_when_idle", "Yield the processor when waiting for MPI communication (for MPI processes, will default to 1 when oversubscribing nodes)", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, diff --git a/opal/mca/threads/argobots/threads_argobots_threads.h b/opal/mca/threads/argobots/threads_argobots_threads.h index fe513532530..3992b32f274 100644 --- a/opal/mca/threads/argobots/threads_argobots_threads.h +++ b/opal/mca/threads/argobots/threads_argobots_threads.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2020 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -37,4 +37,14 @@ struct opal_thread_t { void *t_ret; }; + +/* Argobots are cooperatively scheduled so yield when idle */ +#define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT true + +static inline +void opal_thread_yield(void) +{ + ABT_thread_yield(); +} + #endif /* OPAL_MCA_THREADS_ARGOBOTS_THREADS_ARGOBOTS_THREADS_H */ diff --git a/opal/mca/threads/pthreads/Makefile.am b/opal/mca/threads/pthreads/Makefile.am index 833950d5e17..0f5aa1d45de 100644 --- a/opal/mca/threads/pthreads/Makefile.am +++ b/opal/mca/threads/pthreads/Makefile.am @@ -29,4 +29,6 @@ libmca_threads_pthreads_la_SOURCES = \ threads_pthreads_threads.h \ threads_pthreads_tsd.h \ threads_pthreads_wait_sync.c \ - threads_pthreads_wait_sync.h + threads_pthreads_wait_sync.h \ + threads_pthreads_yield.c \ + threads_pthreads.h diff --git a/opal/mca/threads/pthreads/threads_pthreads.h b/opal/mca/threads/pthreads/threads_pthreads.h new file mode 100644 index 00000000000..e382abde635 --- /dev/null +++ b/opal/mca/threads/pthreads/threads_pthreads.h @@ -0,0 +1,27 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2020 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#ifndef OPAL_MCA_THREADS_PTHREADS_THREADS_PTHREADS_H +#define OPAL_MCA_THREADS_PTHREADS_THREADS_PTHREADS_H + +#include "opal_config.h" +#include +#include + +typedef void (opal_threads_pthreads_yield_fn_t)(void); + +OPAL_DECLSPEC int opal_threads_pthreads_yield_init(const mca_base_component_t *component); + +OPAL_DECLSPEC extern opal_threads_pthreads_yield_fn_t *opal_threads_pthreads_yield_fn; + +#endif /* OPAL_MCA_THREADS_PTHREADS_THREADS_PTHREADS_H */ diff --git a/opal/mca/threads/pthreads/threads_pthreads_component.c b/opal/mca/threads/pthreads/threads_pthreads_component.c index fcd00368831..30c6ca70336 100644 --- a/opal/mca/threads/pthreads/threads_pthreads_component.c +++ b/opal/mca/threads/pthreads/threads_pthreads_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2020 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -26,8 +26,11 @@ #include "opal/mca/threads/thread.h" #include "opal/mca/threads/threads.h" #include "opal/constants.h" +#include "opal/mca/threads/pthreads/threads_pthreads.h" + static int opal_threads_pthreads_open(void); +static int opal_threads_pthreads_register(void); const opal_threads_base_component_1_0_0_t mca_threads_pthreads_component = { /* First, the mca_component_t struct containing meta information @@ -41,6 +44,7 @@ const opal_threads_base_component_1_0_0_t mca_threads_pthreads_component = { OPAL_RELEASE_VERSION), .mca_open_component = opal_threads_pthreads_open, + .mca_register_component_params = opal_threads_pthreads_register }, .threadsc_data = { /* The component is checkpoint ready */ @@ -48,6 +52,11 @@ const opal_threads_base_component_1_0_0_t mca_threads_pthreads_component = { }, }; +int opal_threads_pthreads_register(void) +{ + return opal_threads_pthreads_yield_init(&mca_threads_pthreads_component.threadsc_version); +} + int opal_threads_pthreads_open(void) { return OPAL_SUCCESS; diff --git a/opal/mca/threads/pthreads/threads_pthreads_threads.h b/opal/mca/threads/pthreads/threads_pthreads_threads.h index 27ad13e8e1d..2d5d062f1d0 100644 --- a/opal/mca/threads/pthreads/threads_pthreads_threads.h +++ b/opal/mca/threads/pthreads/threads_pthreads_threads.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2020 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -30,6 +30,9 @@ #include #include +#include "opal/mca/threads/threads.h" +#include "opal/mca/threads/pthreads/threads_pthreads.h" + struct opal_thread_t { opal_object_t super; opal_thread_fn_t t_run; @@ -37,4 +40,13 @@ struct opal_thread_t { pthread_t t_handle; }; +/* Pthreads do not need to yield when idle */ +#define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT false + +static inline +void opal_thread_yield(void) +{ + opal_threads_pthreads_yield_fn(); +} + #endif /* OPAL_MCA_THREADS_PTHREADS_THREADS_PTHREADS_THREADS_H */ diff --git a/opal/mca/threads/pthreads/threads_pthreads_yield.c b/opal/mca/threads/pthreads/threads_pthreads_yield.c new file mode 100644 index 00000000000..d68126fea4a --- /dev/null +++ b/opal/mca/threads/pthreads/threads_pthreads_yield.c @@ -0,0 +1,89 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2020 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include +#ifdef HAVE_SCHED_H +#include +#endif + +#include "opal/constants.h" +#include "opal/mca/threads/thread.h" +#include "opal/mca/threads/pthreads/threads_pthreads.h" + +static void opal_thread_pthreads_yield_sched_yield(void); +static void opal_thread_pthreads_yield_nanosleep(void); + +typedef enum { + OPAL_PTHREADS_YIELD_SCHED_YIELD = 0, + OPAL_PTHREADS_YIELD_NANOSLEEP +} opal_threads_pthreads_yield_strategy_t; + +static mca_base_var_enum_value_t yield_strategy_values[] = { + {OPAL_PTHREADS_YIELD_SCHED_YIELD, "sched_yield"}, + {OPAL_PTHREADS_YIELD_NANOSLEEP, "nanosleep"}, + {0, NULL}}; + + + +/* Number of nanoseconds to nanosleep, if enabled */ +static uint64_t yield_nsleep_nanosecs; +/* The time to nanosleep, if enabled */ +static struct timespec yield_nsleep_time = {.tv_sec = 0, .tv_nsec = 1}; +static opal_threads_pthreads_yield_strategy_t yield_strategy = OPAL_PTHREADS_YIELD_SCHED_YIELD; + +opal_threads_pthreads_yield_fn_t *opal_threads_pthreads_yield_fn = &opal_thread_pthreads_yield_sched_yield; + +int opal_threads_pthreads_yield_init(const mca_base_component_t *component) +{ + mca_base_var_enum_t *yield_strategy_enumerator; + mca_base_var_enum_create("pthread_yield_strategies", yield_strategy_values, &yield_strategy_enumerator); + + (void) mca_base_component_var_register(component, "yield_strategy", + "Pthread yield strategy to use", + MCA_BASE_VAR_TYPE_INT, yield_strategy_enumerator, 0, 0, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, &yield_strategy); + switch(yield_strategy) { + case OPAL_PTHREADS_YIELD_NANOSLEEP: + opal_threads_pthreads_yield_fn = &opal_thread_pthreads_yield_nanosleep; + break; + default: + /* use initial value */ + break; + } + + OBJ_RELEASE(yield_strategy_enumerator); + + yield_nsleep_nanosecs = (yield_nsleep_time.tv_sec * 1E9) + yield_nsleep_time.tv_nsec; + (void) mca_base_component_var_register(component, "nanosleep_time", + "Number of nanoseconds to sleep when using nanosleep as the pthread yield strategy", + MCA_BASE_VAR_TYPE_UINT64_T, NULL, 0, 0, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, &yield_nsleep_nanosecs); + yield_nsleep_time.tv_sec = yield_nsleep_nanosecs / 1E9; + yield_nsleep_time.tv_nsec = yield_nsleep_nanosecs - (uint64_t)(yield_nsleep_time.tv_sec * 1E9); + + return OPAL_SUCCESS; + +} + +void opal_thread_pthreads_yield_sched_yield(void) +{ +#ifdef HAVE_SCHED_H + sched_yield(); +#endif +} + +void opal_thread_pthreads_yield_nanosleep(void) +{ + nanosleep(&yield_nsleep_time, NULL); +} + diff --git a/opal/mca/threads/qthreads/threads_qthreads_threads.h b/opal/mca/threads/qthreads/threads_qthreads_threads.h index e00553078f5..c5eebe10fc8 100644 --- a/opal/mca/threads/qthreads/threads_qthreads_threads.h +++ b/opal/mca/threads/qthreads/threads_qthreads_threads.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2020 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -38,4 +38,13 @@ struct opal_thread_t { aligned_t *t_thread_ret_ptr; }; +/* Qthreads are cooperatively scheduled so yield when idle */ +#define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT true + +static inline +void opal_thread_yield(void) +{ + qthread_yield(); +} + #endif /* OPAL_MCA_THREADS_QTHREADS_THREADS_QTHREADS_THREADS_H */ diff --git a/opal/mca/threads/threads.h b/opal/mca/threads/threads.h index 21a25db6ac8..7d51a9c6a12 100644 --- a/opal/mca/threads/threads.h +++ b/opal/mca/threads/threads.h @@ -133,6 +133,8 @@ OPAL_DECLSPEC opal_thread_t *opal_thread_get_self(void); OPAL_DECLSPEC void opal_thread_kill(opal_thread_t *, int sig); OPAL_DECLSPEC void opal_thread_set_main(void); +static inline void opal_thread_yield(void); + END_C_DECLS #endif /* OPAL_MCA_THREADS_THREADS_H */ diff --git a/opal/runtime/opal_progress.c b/opal/runtime/opal_progress.c index 6f1032ee6a6..0ba1e1ff007 100644 --- a/opal/runtime/opal_progress.c +++ b/opal/runtime/opal_progress.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2020 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -27,10 +27,6 @@ #include "opal_config.h" -#ifdef HAVE_SCHED_H -#include -#endif - #include "opal/runtime/opal_progress.h" #include "opal/util/event.h" #include "opal/mca/base/mca_base_var.h" @@ -39,6 +35,7 @@ #include "opal/util/output.h" #include "opal/runtime/opal_params.h" #include "opal/runtime/opal.h" +#include "opal/mca/threads/threads.h" #define OPAL_PROGRESS_USE_TIMERS (OPAL_TIMER_CYCLE_SUPPORTED || OPAL_TIMER_USEC_SUPPORTED) #define OPAL_PROGRESS_ONLY_USEC_NATIVE (OPAL_TIMER_USEC_NATIVE && !OPAL_TIMER_CYCLE_NATIVE) @@ -68,7 +65,7 @@ static volatile opal_progress_callback_t *callbacks_lp = NULL; static size_t callbacks_lp_len = 0; static size_t callbacks_lp_size = 0; -/* do we want to call sched_yield() if nothing happened */ +/* do we want to yield() if nothing happened */ bool opal_progress_yield_when_idle = false; #if OPAL_PROGRESS_USE_TIMERS @@ -212,7 +209,7 @@ static int opal_progress_events(void) * be called. We don't propogate errors from the progress functions, * so no action is taken if they return failures. The functions are * expected to return the number of events progressed, to determine - * whether or not we should call sched_yield() during MPI progress. + * whether or not we should yield the CPU during MPI progress. * This is only losely tracked, as an error return can cause the number * of progressed events to appear lower than it actually is. We don't * care, as the cost of that happening is far outweighed by the cost @@ -246,16 +243,16 @@ opal_progress(void) opal_progress_events(); } -#if OPAL_HAVE_SCHED_YIELD if (opal_progress_yield_when_idle && events <= 0) { /* If there is nothing to do - yield the processor - otherwise * we could consume the processor for the entire time slice. If * the processor is oversubscribed - this will result in a best-case * latency equivalent to the time-slice. + * With some thread implementations, yielding might be required + * to ensure correct scheduling of all communicating threads. */ - sched_yield(); + opal_thread_yield(); } -#endif /* defined(HAVE_SCHED_YIELD) */ }