Skip to content

bpo-30703: Improve signal delivery #2415

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jun 28, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Include/ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ PyAPI_FUNC(int) PyEval_MergeCompilerFlags(PyCompilerFlags *cf);
#endif

PyAPI_FUNC(int) Py_AddPendingCall(int (*func)(void *), void *arg);
PyAPI_FUNC(void) _PyEval_SignalReceived(void);
PyAPI_FUNC(int) Py_MakePendingCalls(void);

/* Protection against deeply nested recursive calls
Expand Down
96 changes: 96 additions & 0 deletions Lib/test/test_signal.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import random
import signal
import socket
import subprocess
Expand Down Expand Up @@ -941,6 +942,101 @@ def handler(signum, frame):
(exitcode, stdout))


class StressTest(unittest.TestCase):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh... I dislike such tests, I expect that I will have to fix corner cases in the test itself on BSD, Solaris, etc. I understand why you wrote the test, but I also expect that it will be a pain to "maintain" it :-(

"""
Stress signal delivery, especially when a signal arrives in
the middle of recomputing the signal state or executing
previously tripped signal handlers.
"""

@unittest.skipUnless(hasattr(signal, "setitimer"),
"test needs setitimer()")
def test_stress_delivery_dependent(self):
"""
This test uses dependent signal handlers.
"""
N = 10000
sigs = []

def first_handler(signum, frame):
# 1e-6 is the minimum non-zero value for `setitimer()`.
# Choose a random delay so as to improve chances of
# triggering a race condition. Ideally the signal is received
# when inside critical signal-handling routines such as
# Py_MakePendingCalls().
signal.setitimer(signal.ITIMER_REAL, 1e-6 + random.random() * 1e-5)

def second_handler(signum=None, frame=None):
sigs.append(signum)

def setsig(signum, handler):
old_handler = signal.signal(signum, handler)
self.addCleanup(signal.signal, signum, old_handler)

# Here on Linux, SIGPROF > SIGALRM > SIGUSR1. By using both
# ascending and descending sequences (SIGUSR1 then SIGALRM,
# SIGPROF then SIGALRM), we maximize chances of hitting a bug.
setsig(signal.SIGPROF, first_handler)
setsig(signal.SIGUSR1, first_handler)
setsig(signal.SIGALRM, second_handler) # for ITIMER_REAL

expected_sigs = 0
deadline = time.time() + 15.0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

10 000 and 15 seconds? Oh, this test seems expensive :-/ Maybe tag it using the "cpu" resource? Or reduce these parameters?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test takes 0.5 second on my machine. The large 15 seconds timeout was just there to make you (and the buildbots) happy :-)


while expected_sigs < N:
os.kill(os.getpid(), signal.SIGPROF)
expected_sigs += 1
# Wait for handlers to run to avoid signal coalescing
while len(sigs) < expected_sigs and time.time() < deadline:
time.sleep(1e-5)

os.kill(os.getpid(), signal.SIGUSR1)
expected_sigs += 1
while len(sigs) < expected_sigs and time.time() < deadline:
time.sleep(1e-5)

# All ITIMER_REAL signals should have been delivered to the
# Python handler
self.assertEqual(len(sigs), N, "Some signals were lost")

@unittest.skipUnless(hasattr(signal, "setitimer"),
"test needs setitimer()")
def test_stress_delivery_simultaneous(self):
"""
This test uses simultaneous signal handlers.
"""
N = 10000
sigs = []

def handler(signum, frame):
sigs.append(signum)

def setsig(signum, handler):
old_handler = signal.signal(signum, handler)
self.addCleanup(signal.signal, signum, old_handler)

setsig(signal.SIGUSR1, handler)
setsig(signal.SIGALRM, handler) # for ITIMER_REAL

expected_sigs = 0
deadline = time.time() + 15.0

while expected_sigs < N:
# Hopefully the SIGALRM will be received somewhere during
# initial processing of SIGUSR1.
signal.setitimer(signal.ITIMER_REAL, 1e-6 + random.random() * 1e-5)
os.kill(os.getpid(), signal.SIGUSR1)

expected_sigs += 2
# Wait for handlers to run to avoid signal coalescing
while len(sigs) < expected_sigs and time.time() < deadline:
time.sleep(1e-5)

# All ITIMER_REAL signals should have been delivered to the
# Python handler
self.assertEqual(len(sigs), N, "Some signals were lost")


def tearDownModule():
support.reap_children()

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Improve signal delivery.

Avoid using Py_AddPendingCall from signal handler, to avoid calling signal-
unsafe functions. The tests I'm adding here fail without the rest of the
patch, on Linux and OS X. This means our signal delivery logic had defects
(some signals could be lost).
30 changes: 14 additions & 16 deletions Modules/signalmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,12 +188,6 @@ The default handler for SIGINT installed by Python.\n\
It raises KeyboardInterrupt.");


static int
checksignals_witharg(void * unused)
{
return PyErr_CheckSignals();
}

static int
report_wakeup_write_error(void *data)
{
Expand Down Expand Up @@ -244,17 +238,15 @@ trip_signal(int sig_num)

Handlers[sig_num].tripped = 1;

if (!is_tripped) {
/* Set is_tripped after setting .tripped, as it gets
cleared in PyErr_CheckSignals() before .tripped. */
is_tripped = 1;
Py_AddPendingCall(checksignals_witharg, NULL);
}
/* Set is_tripped after setting .tripped, as it gets
cleared in PyErr_CheckSignals() before .tripped. */
is_tripped = 1;
_PyEval_SignalReceived();

/* And then write to the wakeup fd *after* setting all the globals and
doing the Py_AddPendingCall. We used to write to the wakeup fd and then
set the flag, but this allowed the following sequence of events
(especially on windows, where trip_signal runs in a new thread):
doing the _PyEval_SignalReceived. We used to write to the wakeup fd
and then set the flag, but this allowed the following sequence of events
(especially on windows, where trip_signal may run in a new thread):

- main thread blocks on select([wakeup_fd], ...)
- signal arrives
Expand Down Expand Up @@ -289,6 +281,8 @@ trip_signal(int sig_num)
wakeup.send_err_set = 1;
wakeup.send_errno = errno;
wakeup.send_win_error = GetLastError();
/* Py_AddPendingCall() isn't signal-safe, but we
still use it for this exceptional case. */
Py_AddPendingCall(report_wakeup_send_error, NULL);
}
}
Expand All @@ -302,6 +296,8 @@ trip_signal(int sig_num)
rc = _Py_write_noraise(fd, &byte, 1);

if (rc < 0) {
/* Py_AddPendingCall() isn't signal-safe, but we
still use it for this exceptional case. */
Py_AddPendingCall(report_wakeup_write_error,
(void *)(intptr_t)errno);
}
Expand Down Expand Up @@ -1556,8 +1552,10 @@ PyErr_CheckSignals(void)
arglist);
Py_DECREF(arglist);
}
if (!result)
if (!result) {
is_tripped = 1;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IHMO it would be more explicit to have a "error:" label and use goto here, and if arglist creation failed. But well, it's just a coding style suggestion, it's up to you ;-)

return -1;
}

Py_DECREF(result);
}
Expand Down
75 changes: 54 additions & 21 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,15 @@ static long dxp[256];
do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0)


/* This single variable consolidates all requests to break out of the fast path
in the eval loop. */
static _Py_atomic_int eval_breaker = {0};
/* Request for running pending calls. */
static _Py_atomic_int pendingcalls_to_do = {0};
/* Request for looking at the `async_exc` field of the current thread state.
Guarded by the GIL. */
static int pending_async_exc = 0;

#ifdef WITH_THREAD

#ifdef HAVE_ERRNO_H
Expand All @@ -149,16 +158,8 @@ static long dxp[256];

static PyThread_type_lock pending_lock = 0; /* for pending calls */
static unsigned long main_thread = 0;
/* This single variable consolidates all requests to break out of the fast path
in the eval loop. */
static _Py_atomic_int eval_breaker = {0};
/* Request for dropping the GIL */
static _Py_atomic_int gil_drop_request = {0};
/* Request for running pending calls. */
static _Py_atomic_int pendingcalls_to_do = {0};
/* Request for looking at the `async_exc` field of the current thread state.
Guarded by the GIL. */
static int pending_async_exc = 0;

#include "ceval_gil.h"

Expand Down Expand Up @@ -253,9 +254,6 @@ PyEval_ReInitThreads(void)
_PyThreadState_DeleteExcept(current_tstate);
}

#else
static _Py_atomic_int eval_breaker = {0};
static int pending_async_exc = 0;
#endif /* WITH_THREAD */

/* This function is used to signal that async exceptions are waiting to be
Expand Down Expand Up @@ -330,6 +328,15 @@ PyEval_RestoreThread(PyThreadState *tstate)
#endif
*/

void
_PyEval_SignalReceived(void)
{
/* bpo-30703: Function called when the C signal handler of Python gets a
signal. We cannot queue a callback using Py_AddPendingCall() since
that function is not async-signal-safe. */
SIGNAL_PENDING_CALLS();
}

#ifdef WITH_THREAD

/* The WITH_THREAD implementation is thread-safe. It allows
Expand Down Expand Up @@ -394,6 +401,8 @@ Py_MakePendingCalls(void)
int i;
int r = 0;

assert(PyGILState_Check());

if (!pending_lock) {
/* initial allocation of the lock */
pending_lock = PyThread_allocate_lock();
Expand All @@ -408,6 +417,16 @@ Py_MakePendingCalls(void)
if (busy)
return 0;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you mind to add assert(PyGILState_Check()); at the function entry to make it more explicit that this function requires to hold the GIL?

(Right now, it's not obvious for me if the pending_lock initialization is safe.)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, will do.

busy = 1;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand well the purpose of busy... is it a flag to catch reentrant call? If no, maybe it should use volalite? If yes, please add a comment on the variable declaration to document this flag.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"don't perform recursive pending calls" as written above :-)

/* unsignal before starting to call callbacks, so that any callback
added in-between re-signals */
UNSIGNAL_PENDING_CALLS();

/* Python signal handler doesn't really queue a callback: it only signals
that a signal was received, see _PyEval_SignalReceived(). */
if (PyErr_CheckSignals() < 0) {
goto error;
}

/* perform a bounded number of calls, in case of recursion */
for (i=0; i<NPENDINGCALLS; i++) {
int j;
Expand All @@ -424,20 +443,23 @@ Py_MakePendingCalls(void)
arg = pendingcalls[j].arg;
pendingfirst = (j + 1) % NPENDINGCALLS;
}
if (pendingfirst != pendinglast)
SIGNAL_PENDING_CALLS();
else
UNSIGNAL_PENDING_CALLS();
PyThread_release_lock(pending_lock);
/* having released the lock, perform the callback */
if (func == NULL)
break;
r = func(arg);
if (r)
break;
if (r) {
goto error;
}
}

busy = 0;
return r;

error:
busy = 0;
SIGNAL_PENDING_CALLS(); /* We're not done yet */
return -1;
}

#else /* if ! defined WITH_THREAD */
Expand Down Expand Up @@ -472,7 +494,6 @@ static struct {
} pendingcalls[NPENDINGCALLS];
static volatile int pendingfirst = 0;
static volatile int pendinglast = 0;
static _Py_atomic_int pendingcalls_to_do = {0};

int
Py_AddPendingCall(int (*func)(void *), void *arg)
Expand Down Expand Up @@ -506,7 +527,16 @@ Py_MakePendingCalls(void)
if (busy)
return 0;
busy = 1;

/* unsignal before starting to call callbacks, so that any callback
added in-between re-signals */
UNSIGNAL_PENDING_CALLS();
/* Python signal handler doesn't really queue a callback: it only signals
that a signal was received, see _PyEval_SignalReceived(). */
if (PyErr_CheckSignals() < 0) {
goto error;
}

for (;;) {
int i;
int (*func)(void *);
Expand All @@ -518,13 +548,16 @@ Py_MakePendingCalls(void)
arg = pendingcalls[i].arg;
pendingfirst = (i + 1) % NPENDINGCALLS;
if (func(arg) < 0) {
busy = 0;
SIGNAL_PENDING_CALLS(); /* We're not done yet */
return -1;
goto error:
}
}
busy = 0;
return 0;

error:
busy = 0;
SIGNAL_PENDING_CALLS(); /* We're not done yet */
return -1;
}

#endif /* WITH_THREAD */
Expand Down