Skip to content

Commit 45bfe03

Browse files
committed
Move yield capability to opal thread component
This adds two new mca parameters for the pthreads component: threads_pthreads_yield_strategy to choose the strategy (valid values: sched_yield or nanosleep), threads_pthreads_nanosleep_time (time passed to nanosleep) A thread component may also signal that yield-when-idle should be the default (used for Argobots and Qthreads) Signed-off-by: Joseph Schuchart <[email protected]>
1 parent 282be20 commit 45bfe03

10 files changed

+179
-17
lines changed

ompi/runtime/ompi_mpi_params.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
#include "opal/util/show_help.h"
4444
#include "opal/runtime/opal.h"
4545
#include "opal/runtime/opal_params.h"
46+
#include "opal/mca/threads/threads.h"
47+
4648
/*
4749
* Global variables
4850
*
@@ -62,7 +64,8 @@ bool ompi_mpi_keep_fqdn_hostnames = false;
6264
bool ompi_have_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE);
6365
bool ompi_use_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE);
6466

65-
bool ompi_mpi_yield_when_idle = false;
67+
/* if the threads module requires yielding we use that as default but allow it to be overridden */
68+
bool ompi_mpi_yield_when_idle = OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT;
6669
int ompi_mpi_event_tick_rate = -1;
6770
char *ompi_mpi_show_mca_params_string = NULL;
6871
bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE);
@@ -118,7 +121,9 @@ int ompi_mpi_register_params(void)
118121
OPAL_INFO_LVL_9,
119122
MCA_BASE_VAR_SCOPE_READONLY,
120123
&ompi_mpi_oversubscribe);
121-
ompi_mpi_yield_when_idle = ompi_mpi_oversubscribe;
124+
125+
/* yield if the node is oversubscribed and allow users to override */
126+
ompi_mpi_yield_when_idle |= ompi_mpi_oversubscribe;
122127
(void) mca_base_var_register("ompi", "mpi", NULL, "yield_when_idle",
123128
"Yield the processor when waiting for MPI communication (for MPI processes, will default to 1 when oversubscribing nodes)",
124129
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,

opal/mca/threads/argobots/threads_argobots_threads.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Copyright (c) 2004-2005 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
9-
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9+
* Copyright (c) 2004-2020 High Performance Computing Center Stuttgart,
1010
* University of Stuttgart. All rights reserved.
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
@@ -37,4 +37,14 @@ struct opal_thread_t {
3737
void *t_ret;
3838
};
3939

40+
41+
/* Argobots are cooperatively scheduled so yield when idle */
42+
#define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT true
43+
44+
static inline
45+
void opal_thread_yield(void)
46+
{
47+
ABT_thread_yield();
48+
}
49+
4050
#endif /* OPAL_MCA_THREADS_ARGOBOTS_THREADS_ARGOBOTS_THREADS_H */

opal/mca/threads/pthreads/Makefile.am

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,6 @@ libmca_threads_pthreads_la_SOURCES = \
2929
threads_pthreads_threads.h \
3030
threads_pthreads_tsd.h \
3131
threads_pthreads_wait_sync.c \
32-
threads_pthreads_wait_sync.h
32+
threads_pthreads_wait_sync.h \
33+
threads_pthreads_yield.c \
34+
threads_pthreads.h
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2+
/*
3+
* Copyright (c) 2020 High Performance Computing Center Stuttgart,
4+
* University of Stuttgart. All rights reserved.
5+
*
6+
* $COPYRIGHT$
7+
*
8+
* Additional copyrights may follow
9+
*
10+
* $HEADER$
11+
*/
12+
13+
14+
#ifndef OPAL_MCA_THREADS_PTHREADS_THREADS_PTHREADS_H
15+
#define OPAL_MCA_THREADS_PTHREADS_THREADS_PTHREADS_H
16+
17+
#include "opal_config.h"
18+
#include <stdint.h>
19+
#include <time.h>
20+
21+
typedef void (opal_threads_pthreads_yield_fn_t)(void);
22+
23+
OPAL_DECLSPEC int opal_threads_pthreads_yield_init(const mca_base_component_t *component);
24+
25+
OPAL_DECLSPEC extern opal_threads_pthreads_yield_fn_t *opal_threads_pthreads_yield_fn;
26+
27+
#endif /* OPAL_MCA_THREADS_PTHREADS_THREADS_PTHREADS_H */

opal/mca/threads/pthreads/threads_pthreads_component.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Copyright (c) 2004-2014 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
9-
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9+
* Copyright (c) 2004-2020 High Performance Computing Center Stuttgart,
1010
* University of Stuttgart. All rights reserved.
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
@@ -26,8 +26,11 @@
2626
#include "opal/mca/threads/thread.h"
2727
#include "opal/mca/threads/threads.h"
2828
#include "opal/constants.h"
29+
#include "opal/mca/threads/pthreads/threads_pthreads.h"
30+
2931

3032
static int opal_threads_pthreads_open(void);
33+
static int opal_threads_pthreads_register(void);
3134

3235
const opal_threads_base_component_1_0_0_t mca_threads_pthreads_component = {
3336
/* First, the mca_component_t struct containing meta information
@@ -41,13 +44,19 @@ const opal_threads_base_component_1_0_0_t mca_threads_pthreads_component = {
4144
OPAL_RELEASE_VERSION),
4245

4346
.mca_open_component = opal_threads_pthreads_open,
47+
.mca_register_component_params = opal_threads_pthreads_register
4448
},
4549
.threadsc_data = {
4650
/* The component is checkpoint ready */
4751
MCA_BASE_METADATA_PARAM_CHECKPOINT
4852
},
4953
};
5054

55+
int opal_threads_pthreads_register(void)
56+
{
57+
return opal_threads_pthreads_yield_init(&mca_threads_pthreads_component.threadsc_version);
58+
}
59+
5160
int opal_threads_pthreads_open(void)
5261
{
5362
return OPAL_SUCCESS;

opal/mca/threads/pthreads/threads_pthreads_threads.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Copyright (c) 2004-2006 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
9-
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9+
* Copyright (c) 2004-2020 High Performance Computing Center Stuttgart,
1010
* University of Stuttgart. All rights reserved.
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
@@ -30,11 +30,23 @@
3030
#include <pthread.h>
3131
#include <signal.h>
3232

33+
#include "opal/mca/threads/threads.h"
34+
#include "opal/mca/threads/pthreads/threads_pthreads.h"
35+
3336
struct opal_thread_t {
3437
opal_object_t super;
3538
opal_thread_fn_t t_run;
3639
void *t_arg;
3740
pthread_t t_handle;
3841
};
3942

43+
/* Pthreads do not need to yield when idle */
44+
#define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT false
45+
46+
static inline
47+
void opal_thread_yield(void)
48+
{
49+
opal_threads_pthreads_yield_fn();
50+
}
51+
4052
#endif /* OPAL_MCA_THREADS_PTHREADS_THREADS_PTHREADS_THREADS_H */
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2+
/*
3+
* Copyright (c) 2020 High Performance Computing Center Stuttgart,
4+
* University of Stuttgart. All rights reserved.
5+
*
6+
* $COPYRIGHT$
7+
*
8+
* Additional copyrights may follow
9+
*
10+
* $HEADER$
11+
*/
12+
13+
#include "opal_config.h"
14+
#include <time.h>
15+
#ifdef HAVE_SCHED_H
16+
#include <sched.h>
17+
#endif
18+
19+
#include "opal/constants.h"
20+
#include "opal/mca/threads/thread.h"
21+
#include "opal/mca/threads/pthreads/threads_pthreads.h"
22+
23+
static void opal_thread_pthreads_yield_sched_yield(void);
24+
static void opal_thread_pthreads_yield_nanosleep(void);
25+
26+
typedef enum {
27+
OPAL_PTHREADS_YIELD_SCHED_YIELD = 0,
28+
OPAL_PTHREADS_YIELD_NANOSLEEP
29+
} opal_threads_pthreads_yield_strategy_t;
30+
31+
static mca_base_var_enum_value_t yield_strategy_values[] = {
32+
{OPAL_PTHREADS_YIELD_SCHED_YIELD, "sched_yield"},
33+
{OPAL_PTHREADS_YIELD_NANOSLEEP, "nanosleep"},
34+
{0, NULL}};
35+
36+
37+
38+
/* Number of nanoseconds to nanosleep, if enabled */
39+
static uint64_t yield_nsleep_nanosecs;
40+
/* The time to nanosleep, if enabled */
41+
static struct timespec yield_nsleep_time = {.tv_sec = 0, .tv_nsec = 1};
42+
static opal_threads_pthreads_yield_strategy_t yield_strategy = OPAL_PTHREADS_YIELD_SCHED_YIELD;
43+
44+
opal_threads_pthreads_yield_fn_t *opal_threads_pthreads_yield_fn = &opal_thread_pthreads_yield_sched_yield;
45+
46+
int opal_threads_pthreads_yield_init(const mca_base_component_t *component)
47+
{
48+
mca_base_var_enum_t *yield_strategy_enumerator;
49+
mca_base_var_enum_create("pthread_yield_strategies", yield_strategy_values, &yield_strategy_enumerator);
50+
51+
(void) mca_base_component_var_register(component, "yield_strategy",
52+
"Pthread yield strategy to use",
53+
MCA_BASE_VAR_TYPE_INT, yield_strategy_enumerator, 0, 0, OPAL_INFO_LVL_3,
54+
MCA_BASE_VAR_SCOPE_LOCAL, &yield_strategy);
55+
switch(yield_strategy) {
56+
case OPAL_PTHREADS_YIELD_NANOSLEEP:
57+
opal_threads_pthreads_yield_fn = &opal_thread_pthreads_yield_nanosleep;
58+
break;
59+
default:
60+
/* use initial value */
61+
break;
62+
}
63+
64+
OBJ_RELEASE(yield_strategy_enumerator);
65+
66+
yield_nsleep_nanosecs = (yield_nsleep_time.tv_sec * 1E9) + yield_nsleep_time.tv_nsec;
67+
(void) mca_base_component_var_register(component, "nanosleep_time",
68+
"Number of nanoseconds to sleep when using nanosleep as the pthread yield strategy",
69+
MCA_BASE_VAR_TYPE_UINT64_T, NULL, 0, 0, OPAL_INFO_LVL_3,
70+
MCA_BASE_VAR_SCOPE_LOCAL, &yield_nsleep_nanosecs);
71+
yield_nsleep_time.tv_sec = yield_nsleep_nanosecs / 1E9;
72+
yield_nsleep_time.tv_nsec = yield_nsleep_nanosecs - (uint64_t)(yield_nsleep_time.tv_sec * 1E9);
73+
74+
return OPAL_SUCCESS;
75+
76+
}
77+
78+
void opal_thread_pthreads_yield_sched_yield(void)
79+
{
80+
#ifdef HAVE_SCHED_H
81+
sched_yield();
82+
#endif
83+
}
84+
85+
void opal_thread_pthreads_yield_nanosleep(void)
86+
{
87+
nanosleep(&yield_nsleep_time, NULL);
88+
}
89+

opal/mca/threads/qthreads/threads_qthreads_threads.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Copyright (c) 2004-2005 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
9-
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9+
* Copyright (c) 2004-2020 High Performance Computing Center Stuttgart,
1010
* University of Stuttgart. All rights reserved.
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
@@ -38,4 +38,13 @@ struct opal_thread_t {
3838
aligned_t *t_thread_ret_ptr;
3939
};
4040

41+
/* Qthreads are cooperatively scheduled so yield when idle */
42+
#define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT true
43+
44+
static inline
45+
void opal_thread_yield(void)
46+
{
47+
qthread_yield();
48+
}
49+
4150
#endif /* OPAL_MCA_THREADS_QTHREADS_THREADS_QTHREADS_THREADS_H */

opal/mca/threads/threads.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ OPAL_DECLSPEC opal_thread_t *opal_thread_get_self(void);
133133
OPAL_DECLSPEC void opal_thread_kill(opal_thread_t *, int sig);
134134
OPAL_DECLSPEC void opal_thread_set_main(void);
135135

136+
static inline void opal_thread_yield(void);
137+
136138
END_C_DECLS
137139

138140
#endif /* OPAL_MCA_THREADS_THREADS_H */

opal/runtime/opal_progress.c

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Copyright (c) 2004-2005 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
9-
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9+
* Copyright (c) 2004-2020 High Performance Computing Center Stuttgart,
1010
* University of Stuttgart. All rights reserved.
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
@@ -27,10 +27,6 @@
2727

2828
#include "opal_config.h"
2929

30-
#ifdef HAVE_SCHED_H
31-
#include <sched.h>
32-
#endif
33-
3430
#include "opal/runtime/opal_progress.h"
3531
#include "opal/util/event.h"
3632
#include "opal/mca/base/mca_base_var.h"
@@ -39,6 +35,7 @@
3935
#include "opal/util/output.h"
4036
#include "opal/runtime/opal_params.h"
4137
#include "opal/runtime/opal.h"
38+
#include "opal/mca/threads/threads.h"
4239

4340
#define OPAL_PROGRESS_USE_TIMERS (OPAL_TIMER_CYCLE_SUPPORTED || OPAL_TIMER_USEC_SUPPORTED)
4441
#define OPAL_PROGRESS_ONLY_USEC_NATIVE (OPAL_TIMER_USEC_NATIVE && !OPAL_TIMER_CYCLE_NATIVE)
@@ -68,7 +65,7 @@ static volatile opal_progress_callback_t *callbacks_lp = NULL;
6865
static size_t callbacks_lp_len = 0;
6966
static size_t callbacks_lp_size = 0;
7067

71-
/* do we want to call sched_yield() if nothing happened */
68+
/* do we want to yield() if nothing happened */
7269
bool opal_progress_yield_when_idle = false;
7370

7471
#if OPAL_PROGRESS_USE_TIMERS
@@ -212,7 +209,7 @@ static int opal_progress_events(void)
212209
* be called. We don't propogate errors from the progress functions,
213210
* so no action is taken if they return failures. The functions are
214211
* expected to return the number of events progressed, to determine
215-
* whether or not we should call sched_yield() during MPI progress.
212+
* whether or not we should yield the CPU during MPI progress.
216213
* This is only losely tracked, as an error return can cause the number
217214
* of progressed events to appear lower than it actually is. We don't
218215
* care, as the cost of that happening is far outweighed by the cost
@@ -246,16 +243,16 @@ opal_progress(void)
246243
opal_progress_events();
247244
}
248245

249-
#if OPAL_HAVE_SCHED_YIELD
250246
if (opal_progress_yield_when_idle && events <= 0) {
251247
/* If there is nothing to do - yield the processor - otherwise
252248
* we could consume the processor for the entire time slice. If
253249
* the processor is oversubscribed - this will result in a best-case
254250
* latency equivalent to the time-slice.
251+
* With some thread implementations, yielding might be required
252+
* to ensure correct scheduling of all communicating threads.
255253
*/
256-
sched_yield();
254+
opal_thread_yield();
257255
}
258-
#endif /* defined(HAVE_SCHED_YIELD) */
259256
}
260257

261258

0 commit comments

Comments
 (0)