-
-
Notifications
You must be signed in to change notification settings - Fork 32k
bpo-30703: Improve signal delivery #2415
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
0a68ec4
161cd38
b577db5
e0ce64b
e9cec43
ea34f8c
542465c
a2851eb
a71a8fe
c37e5d8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
import os | ||
import random | ||
import signal | ||
import socket | ||
import subprocess | ||
|
@@ -941,6 +942,101 @@ def handler(signum, frame): | |
(exitcode, stdout)) | ||
|
||
|
||
class StressTest(unittest.TestCase): | ||
""" | ||
Stress signal delivery, especially when a signal arrives in | ||
the middle of recomputing the signal state or executing | ||
previously tripped signal handlers. | ||
""" | ||
|
||
@unittest.skipUnless(hasattr(signal, "setitimer"), | ||
"test needs setitimer()") | ||
def test_stress_delivery_dependent(self): | ||
""" | ||
This test uses dependent signal handlers. | ||
""" | ||
N = 10000 | ||
sigs = [] | ||
|
||
def first_handler(signum, frame): | ||
# 1e-6 is the minimum non-zero value for `setitimer()`. | ||
# Choose a random delay so as to improve chances of | ||
# triggering a race condition. Ideally the signal is received | ||
# when inside critical signal-handling routines such as | ||
# Py_MakePendingCalls(). | ||
signal.setitimer(signal.ITIMER_REAL, 1e-6 + random.random() * 1e-5) | ||
|
||
def second_handler(signum=None, frame=None): | ||
sigs.append(signum) | ||
|
||
def setsig(signum, handler): | ||
old_handler = signal.signal(signum, handler) | ||
self.addCleanup(signal.signal, signum, old_handler) | ||
|
||
# Here on Linux, SIGPROF > SIGALRM > SIGUSR1. By using both | ||
# ascending and descending sequences (SIGUSR1 then SIGALRM, | ||
# SIGPROF then SIGALRM), we maximize chances of hitting a bug. | ||
setsig(signal.SIGPROF, first_handler) | ||
setsig(signal.SIGUSR1, first_handler) | ||
setsig(signal.SIGALRM, second_handler) # for ITIMER_REAL | ||
|
||
expected_sigs = 0 | ||
deadline = time.time() + 15.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 10 000 and 15 seconds? Oh, this test seems expensive :-/ Maybe tag it using the "cpu" resource? Or reduce these parameters? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The test takes 0.5 second on my machine. The large 15 seconds timeout was just there to make you (and the buildbots) happy :-) |
||
|
||
while expected_sigs < N: | ||
os.kill(os.getpid(), signal.SIGPROF) | ||
expected_sigs += 1 | ||
# Wait for handlers to run to avoid signal coalescing | ||
while len(sigs) < expected_sigs and time.time() < deadline: | ||
time.sleep(1e-5) | ||
|
||
os.kill(os.getpid(), signal.SIGUSR1) | ||
expected_sigs += 1 | ||
while len(sigs) < expected_sigs and time.time() < deadline: | ||
time.sleep(1e-5) | ||
|
||
# All ITIMER_REAL signals should have been delivered to the | ||
# Python handler | ||
self.assertEqual(len(sigs), N, "Some signals were lost") | ||
|
||
@unittest.skipUnless(hasattr(signal, "setitimer"), | ||
"test needs setitimer()") | ||
def test_stress_delivery_simultaneous(self): | ||
""" | ||
This test uses simultaneous signal handlers. | ||
""" | ||
N = 10000 | ||
sigs = [] | ||
|
||
def handler(signum, frame): | ||
sigs.append(signum) | ||
|
||
def setsig(signum, handler): | ||
old_handler = signal.signal(signum, handler) | ||
self.addCleanup(signal.signal, signum, old_handler) | ||
|
||
setsig(signal.SIGUSR1, handler) | ||
setsig(signal.SIGALRM, handler) # for ITIMER_REAL | ||
|
||
expected_sigs = 0 | ||
deadline = time.time() + 15.0 | ||
|
||
while expected_sigs < N: | ||
# Hopefully the SIGALRM will be received somewhere during | ||
# initial processing of SIGUSR1. | ||
signal.setitimer(signal.ITIMER_REAL, 1e-6 + random.random() * 1e-5) | ||
os.kill(os.getpid(), signal.SIGUSR1) | ||
|
||
expected_sigs += 2 | ||
# Wait for handlers to run to avoid signal coalescing | ||
while len(sigs) < expected_sigs and time.time() < deadline: | ||
time.sleep(1e-5) | ||
|
||
# All ITIMER_REAL signals should have been delivered to the | ||
# Python handler | ||
self.assertEqual(len(sigs), N, "Some signals were lost") | ||
|
||
|
||
def tearDownModule(): | ||
support.reap_children() | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
Improve signal delivery. | ||
|
||
Avoid using Py_AddPendingCall from signal handler, to avoid calling signal- | ||
unsafe functions. The tests I'm adding here fail without the rest of the | ||
patch, on Linux and OS X. This means our signal delivery logic had defects | ||
(some signals could be lost). |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -188,12 +188,6 @@ The default handler for SIGINT installed by Python.\n\ | |
It raises KeyboardInterrupt."); | ||
|
||
|
||
static int | ||
checksignals_witharg(void * unused) | ||
{ | ||
return PyErr_CheckSignals(); | ||
} | ||
|
||
static int | ||
report_wakeup_write_error(void *data) | ||
{ | ||
|
@@ -244,17 +238,15 @@ trip_signal(int sig_num) | |
|
||
Handlers[sig_num].tripped = 1; | ||
|
||
if (!is_tripped) { | ||
/* Set is_tripped after setting .tripped, as it gets | ||
cleared in PyErr_CheckSignals() before .tripped. */ | ||
is_tripped = 1; | ||
Py_AddPendingCall(checksignals_witharg, NULL); | ||
} | ||
/* Set is_tripped after setting .tripped, as it gets | ||
cleared in PyErr_CheckSignals() before .tripped. */ | ||
is_tripped = 1; | ||
_PyEval_SignalReceived(); | ||
|
||
/* And then write to the wakeup fd *after* setting all the globals and | ||
doing the Py_AddPendingCall. We used to write to the wakeup fd and then | ||
set the flag, but this allowed the following sequence of events | ||
(especially on windows, where trip_signal runs in a new thread): | ||
doing the _PyEval_SignalReceived. We used to write to the wakeup fd | ||
and then set the flag, but this allowed the following sequence of events | ||
(especially on windows, where trip_signal may run in a new thread): | ||
|
||
- main thread blocks on select([wakeup_fd], ...) | ||
- signal arrives | ||
|
@@ -289,6 +281,8 @@ trip_signal(int sig_num) | |
wakeup.send_err_set = 1; | ||
wakeup.send_errno = errno; | ||
wakeup.send_win_error = GetLastError(); | ||
/* Py_AddPendingCall() isn't signal-safe, but we | ||
still use it for this exceptional case. */ | ||
Py_AddPendingCall(report_wakeup_send_error, NULL); | ||
} | ||
} | ||
|
@@ -302,6 +296,8 @@ trip_signal(int sig_num) | |
rc = _Py_write_noraise(fd, &byte, 1); | ||
|
||
if (rc < 0) { | ||
/* Py_AddPendingCall() isn't signal-safe, but we | ||
still use it for this exceptional case. */ | ||
Py_AddPendingCall(report_wakeup_write_error, | ||
(void *)(intptr_t)errno); | ||
} | ||
|
@@ -1556,8 +1552,10 @@ PyErr_CheckSignals(void) | |
arglist); | ||
Py_DECREF(arglist); | ||
} | ||
if (!result) | ||
if (!result) { | ||
is_tripped = 1; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IHMO it would be more explicit to have a "error:" label and use goto here, and if arglist creation failed. But well, it's just a coding style suggestion, it's up to you ;-) |
||
return -1; | ||
} | ||
|
||
Py_DECREF(result); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -140,6 +140,15 @@ static long dxp[256]; | |
do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0) | ||
|
||
|
||
/* This single variable consolidates all requests to break out of the fast path | ||
in the eval loop. */ | ||
static _Py_atomic_int eval_breaker = {0}; | ||
/* Request for running pending calls. */ | ||
static _Py_atomic_int pendingcalls_to_do = {0}; | ||
/* Request for looking at the `async_exc` field of the current thread state. | ||
Guarded by the GIL. */ | ||
static int pending_async_exc = 0; | ||
|
||
#ifdef WITH_THREAD | ||
|
||
#ifdef HAVE_ERRNO_H | ||
|
@@ -149,16 +158,8 @@ static long dxp[256]; | |
|
||
static PyThread_type_lock pending_lock = 0; /* for pending calls */ | ||
static unsigned long main_thread = 0; | ||
/* This single variable consolidates all requests to break out of the fast path | ||
in the eval loop. */ | ||
static _Py_atomic_int eval_breaker = {0}; | ||
/* Request for dropping the GIL */ | ||
static _Py_atomic_int gil_drop_request = {0}; | ||
/* Request for running pending calls. */ | ||
static _Py_atomic_int pendingcalls_to_do = {0}; | ||
/* Request for looking at the `async_exc` field of the current thread state. | ||
Guarded by the GIL. */ | ||
static int pending_async_exc = 0; | ||
|
||
#include "ceval_gil.h" | ||
|
||
|
@@ -253,9 +254,6 @@ PyEval_ReInitThreads(void) | |
_PyThreadState_DeleteExcept(current_tstate); | ||
} | ||
|
||
#else | ||
static _Py_atomic_int eval_breaker = {0}; | ||
static int pending_async_exc = 0; | ||
#endif /* WITH_THREAD */ | ||
|
||
/* This function is used to signal that async exceptions are waiting to be | ||
|
@@ -330,6 +328,15 @@ PyEval_RestoreThread(PyThreadState *tstate) | |
#endif | ||
*/ | ||
|
||
void | ||
_PyEval_SignalReceived(void) | ||
{ | ||
/* bpo-30703: Function called when the C signal handler of Python gets a | ||
signal. We cannot queue a callback using Py_AddPendingCall() since | ||
that function is not async-signal-safe. */ | ||
SIGNAL_PENDING_CALLS(); | ||
} | ||
|
||
#ifdef WITH_THREAD | ||
|
||
/* The WITH_THREAD implementation is thread-safe. It allows | ||
|
@@ -394,6 +401,8 @@ Py_MakePendingCalls(void) | |
int i; | ||
int r = 0; | ||
|
||
assert(PyGILState_Check()); | ||
|
||
if (!pending_lock) { | ||
/* initial allocation of the lock */ | ||
pending_lock = PyThread_allocate_lock(); | ||
|
@@ -408,6 +417,16 @@ Py_MakePendingCalls(void) | |
if (busy) | ||
return 0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would you mind to add assert(PyGILState_Check()); at the function entry to make it more explicit that this function requires to hold the GIL? (Right now, it's not obvious for me if the pending_lock initialization is safe.) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, will do. |
||
busy = 1; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand well the purpose of busy... is it a flag to catch reentrant call? If no, maybe it should use volalite? If yes, please add a comment on the variable declaration to document this flag. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "don't perform recursive pending calls" as written above :-) |
||
/* unsignal before starting to call callbacks, so that any callback | ||
added in-between re-signals */ | ||
UNSIGNAL_PENDING_CALLS(); | ||
|
||
/* Python signal handler doesn't really queue a callback: it only signals | ||
that a signal was received, see _PyEval_SignalReceived(). */ | ||
if (PyErr_CheckSignals() < 0) { | ||
goto error; | ||
} | ||
|
||
/* perform a bounded number of calls, in case of recursion */ | ||
for (i=0; i<NPENDINGCALLS; i++) { | ||
int j; | ||
|
@@ -424,20 +443,23 @@ Py_MakePendingCalls(void) | |
arg = pendingcalls[j].arg; | ||
pendingfirst = (j + 1) % NPENDINGCALLS; | ||
} | ||
if (pendingfirst != pendinglast) | ||
SIGNAL_PENDING_CALLS(); | ||
else | ||
UNSIGNAL_PENDING_CALLS(); | ||
PyThread_release_lock(pending_lock); | ||
/* having released the lock, perform the callback */ | ||
if (func == NULL) | ||
break; | ||
r = func(arg); | ||
if (r) | ||
break; | ||
if (r) { | ||
goto error; | ||
} | ||
} | ||
|
||
busy = 0; | ||
return r; | ||
|
||
error: | ||
busy = 0; | ||
SIGNAL_PENDING_CALLS(); /* We're not done yet */ | ||
return -1; | ||
} | ||
|
||
#else /* if ! defined WITH_THREAD */ | ||
|
@@ -472,7 +494,6 @@ static struct { | |
} pendingcalls[NPENDINGCALLS]; | ||
static volatile int pendingfirst = 0; | ||
static volatile int pendinglast = 0; | ||
static _Py_atomic_int pendingcalls_to_do = {0}; | ||
|
||
int | ||
Py_AddPendingCall(int (*func)(void *), void *arg) | ||
|
@@ -506,7 +527,16 @@ Py_MakePendingCalls(void) | |
if (busy) | ||
return 0; | ||
busy = 1; | ||
|
||
/* unsignal before starting to call callbacks, so that any callback | ||
added in-between re-signals */ | ||
UNSIGNAL_PENDING_CALLS(); | ||
/* Python signal handler doesn't really queue a callback: it only signals | ||
that a signal was received, see _PyEval_SignalReceived(). */ | ||
if (PyErr_CheckSignals() < 0) { | ||
goto error; | ||
} | ||
|
||
for (;;) { | ||
int i; | ||
int (*func)(void *); | ||
|
@@ -518,13 +548,16 @@ Py_MakePendingCalls(void) | |
arg = pendingcalls[i].arg; | ||
pendingfirst = (i + 1) % NPENDINGCALLS; | ||
if (func(arg) < 0) { | ||
busy = 0; | ||
SIGNAL_PENDING_CALLS(); /* We're not done yet */ | ||
return -1; | ||
goto error: | ||
} | ||
} | ||
busy = 0; | ||
return 0; | ||
|
||
error: | ||
busy = 0; | ||
SIGNAL_PENDING_CALLS(); /* We're not done yet */ | ||
return -1; | ||
} | ||
|
||
#endif /* WITH_THREAD */ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh... I dislike such tests, I expect that I will have to fix corner cases in the test itself on BSD, Solaris, etc. I understand why you wrote the test, but I also expect that it will be a pain to "maintain" it :-(