Skip to content

Commit c08177a

Browse files
authored
bpo-30703: Improve signal delivery (#2415)
* Improve signal delivery Avoid using Py_AddPendingCall from signal handler, to avoid calling signal-unsafe functions. * Remove unused function * Improve comments * Add stress test * Adapt for --without-threads * Add second stress test * Add NEWS blurb * Address comments @Haypo
1 parent 9f3bdcb commit c08177a

File tree

5 files changed

+171
-37
lines changed

5 files changed

+171
-37
lines changed

Include/ceval.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ PyAPI_FUNC(int) PyEval_MergeCompilerFlags(PyCompilerFlags *cf);
5454
#endif
5555

5656
PyAPI_FUNC(int) Py_AddPendingCall(int (*func)(void *), void *arg);
57+
PyAPI_FUNC(void) _PyEval_SignalReceived(void);
5758
PyAPI_FUNC(int) Py_MakePendingCalls(void);
5859

5960
/* Protection against deeply nested recursive calls

Lib/test/test_signal.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import random
23
import signal
34
import socket
45
import subprocess
@@ -941,6 +942,101 @@ def handler(signum, frame):
941942
(exitcode, stdout))
942943

943944

945+
class StressTest(unittest.TestCase):
946+
"""
947+
Stress signal delivery, especially when a signal arrives in
948+
the middle of recomputing the signal state or executing
949+
previously tripped signal handlers.
950+
"""
951+
952+
@unittest.skipUnless(hasattr(signal, "setitimer"),
953+
"test needs setitimer()")
954+
def test_stress_delivery_dependent(self):
955+
"""
956+
This test uses dependent signal handlers.
957+
"""
958+
N = 10000
959+
sigs = []
960+
961+
def first_handler(signum, frame):
962+
# 1e-6 is the minimum non-zero value for `setitimer()`.
963+
# Choose a random delay so as to improve chances of
964+
# triggering a race condition. Ideally the signal is received
965+
# when inside critical signal-handling routines such as
966+
# Py_MakePendingCalls().
967+
signal.setitimer(signal.ITIMER_REAL, 1e-6 + random.random() * 1e-5)
968+
969+
def second_handler(signum=None, frame=None):
970+
sigs.append(signum)
971+
972+
def setsig(signum, handler):
973+
old_handler = signal.signal(signum, handler)
974+
self.addCleanup(signal.signal, signum, old_handler)
975+
976+
# Here on Linux, SIGPROF > SIGALRM > SIGUSR1. By using both
977+
# ascending and descending sequences (SIGUSR1 then SIGALRM,
978+
# SIGPROF then SIGALRM), we maximize chances of hitting a bug.
979+
setsig(signal.SIGPROF, first_handler)
980+
setsig(signal.SIGUSR1, first_handler)
981+
setsig(signal.SIGALRM, second_handler) # for ITIMER_REAL
982+
983+
expected_sigs = 0
984+
deadline = time.time() + 15.0
985+
986+
while expected_sigs < N:
987+
os.kill(os.getpid(), signal.SIGPROF)
988+
expected_sigs += 1
989+
# Wait for handlers to run to avoid signal coalescing
990+
while len(sigs) < expected_sigs and time.time() < deadline:
991+
time.sleep(1e-5)
992+
993+
os.kill(os.getpid(), signal.SIGUSR1)
994+
expected_sigs += 1
995+
while len(sigs) < expected_sigs and time.time() < deadline:
996+
time.sleep(1e-5)
997+
998+
# All ITIMER_REAL signals should have been delivered to the
999+
# Python handler
1000+
self.assertEqual(len(sigs), N, "Some signals were lost")
1001+
1002+
@unittest.skipUnless(hasattr(signal, "setitimer"),
1003+
"test needs setitimer()")
1004+
def test_stress_delivery_simultaneous(self):
1005+
"""
1006+
This test uses simultaneous signal handlers.
1007+
"""
1008+
N = 10000
1009+
sigs = []
1010+
1011+
def handler(signum, frame):
1012+
sigs.append(signum)
1013+
1014+
def setsig(signum, handler):
1015+
old_handler = signal.signal(signum, handler)
1016+
self.addCleanup(signal.signal, signum, old_handler)
1017+
1018+
setsig(signal.SIGUSR1, handler)
1019+
setsig(signal.SIGALRM, handler) # for ITIMER_REAL
1020+
1021+
expected_sigs = 0
1022+
deadline = time.time() + 15.0
1023+
1024+
while expected_sigs < N:
1025+
# Hopefully the SIGALRM will be received somewhere during
1026+
# initial processing of SIGUSR1.
1027+
signal.setitimer(signal.ITIMER_REAL, 1e-6 + random.random() * 1e-5)
1028+
os.kill(os.getpid(), signal.SIGUSR1)
1029+
1030+
expected_sigs += 2
1031+
# Wait for handlers to run to avoid signal coalescing
1032+
while len(sigs) < expected_sigs and time.time() < deadline:
1033+
time.sleep(1e-5)
1034+
1035+
# All ITIMER_REAL signals should have been delivered to the
1036+
# Python handler
1037+
self.assertEqual(len(sigs), N, "Some signals were lost")
1038+
1039+
9441040
def tearDownModule():
9451041
support.reap_children()
9461042

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Improve signal delivery.
2+
3+
Avoid using Py_AddPendingCall from signal handler, to avoid calling signal-
4+
unsafe functions. The tests I'm adding here fail without the rest of the
5+
patch, on Linux and OS X. This means our signal delivery logic had defects
6+
(some signals could be lost).

Modules/signalmodule.c

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -188,12 +188,6 @@ The default handler for SIGINT installed by Python.\n\
188188
It raises KeyboardInterrupt.");
189189

190190

191-
static int
192-
checksignals_witharg(void * unused)
193-
{
194-
return PyErr_CheckSignals();
195-
}
196-
197191
static int
198192
report_wakeup_write_error(void *data)
199193
{
@@ -244,17 +238,15 @@ trip_signal(int sig_num)
244238

245239
Handlers[sig_num].tripped = 1;
246240

247-
if (!is_tripped) {
248-
/* Set is_tripped after setting .tripped, as it gets
249-
cleared in PyErr_CheckSignals() before .tripped. */
250-
is_tripped = 1;
251-
Py_AddPendingCall(checksignals_witharg, NULL);
252-
}
241+
/* Set is_tripped after setting .tripped, as it gets
242+
cleared in PyErr_CheckSignals() before .tripped. */
243+
is_tripped = 1;
244+
_PyEval_SignalReceived();
253245

254246
/* And then write to the wakeup fd *after* setting all the globals and
255-
doing the Py_AddPendingCall. We used to write to the wakeup fd and then
256-
set the flag, but this allowed the following sequence of events
257-
(especially on windows, where trip_signal runs in a new thread):
247+
doing the _PyEval_SignalReceived. We used to write to the wakeup fd
248+
and then set the flag, but this allowed the following sequence of events
249+
(especially on windows, where trip_signal may run in a new thread):
258250
259251
- main thread blocks on select([wakeup_fd], ...)
260252
- signal arrives
@@ -289,6 +281,8 @@ trip_signal(int sig_num)
289281
wakeup.send_err_set = 1;
290282
wakeup.send_errno = errno;
291283
wakeup.send_win_error = GetLastError();
284+
/* Py_AddPendingCall() isn't signal-safe, but we
285+
still use it for this exceptional case. */
292286
Py_AddPendingCall(report_wakeup_send_error, NULL);
293287
}
294288
}
@@ -302,6 +296,8 @@ trip_signal(int sig_num)
302296
rc = _Py_write_noraise(fd, &byte, 1);
303297

304298
if (rc < 0) {
299+
/* Py_AddPendingCall() isn't signal-safe, but we
300+
still use it for this exceptional case. */
305301
Py_AddPendingCall(report_wakeup_write_error,
306302
(void *)(intptr_t)errno);
307303
}
@@ -1556,8 +1552,10 @@ PyErr_CheckSignals(void)
15561552
arglist);
15571553
Py_DECREF(arglist);
15581554
}
1559-
if (!result)
1555+
if (!result) {
1556+
is_tripped = 1;
15601557
return -1;
1558+
}
15611559

15621560
Py_DECREF(result);
15631561
}

Python/ceval.c

Lines changed: 54 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,15 @@ static long dxp[256];
140140
do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0)
141141

142142

143+
/* This single variable consolidates all requests to break out of the fast path
144+
in the eval loop. */
145+
static _Py_atomic_int eval_breaker = {0};
146+
/* Request for running pending calls. */
147+
static _Py_atomic_int pendingcalls_to_do = {0};
148+
/* Request for looking at the `async_exc` field of the current thread state.
149+
Guarded by the GIL. */
150+
static int pending_async_exc = 0;
151+
143152
#ifdef WITH_THREAD
144153

145154
#ifdef HAVE_ERRNO_H
@@ -149,16 +158,8 @@ static long dxp[256];
149158

150159
static PyThread_type_lock pending_lock = 0; /* for pending calls */
151160
static unsigned long main_thread = 0;
152-
/* This single variable consolidates all requests to break out of the fast path
153-
in the eval loop. */
154-
static _Py_atomic_int eval_breaker = {0};
155161
/* Request for dropping the GIL */
156162
static _Py_atomic_int gil_drop_request = {0};
157-
/* Request for running pending calls. */
158-
static _Py_atomic_int pendingcalls_to_do = {0};
159-
/* Request for looking at the `async_exc` field of the current thread state.
160-
Guarded by the GIL. */
161-
static int pending_async_exc = 0;
162163

163164
#include "ceval_gil.h"
164165

@@ -253,9 +254,6 @@ PyEval_ReInitThreads(void)
253254
_PyThreadState_DeleteExcept(current_tstate);
254255
}
255256

256-
#else
257-
static _Py_atomic_int eval_breaker = {0};
258-
static int pending_async_exc = 0;
259257
#endif /* WITH_THREAD */
260258

261259
/* This function is used to signal that async exceptions are waiting to be
@@ -330,6 +328,15 @@ PyEval_RestoreThread(PyThreadState *tstate)
330328
#endif
331329
*/
332330

331+
void
332+
_PyEval_SignalReceived(void)
333+
{
334+
/* bpo-30703: Function called when the C signal handler of Python gets a
335+
signal. We cannot queue a callback using Py_AddPendingCall() since
336+
that function is not async-signal-safe. */
337+
SIGNAL_PENDING_CALLS();
338+
}
339+
333340
#ifdef WITH_THREAD
334341

335342
/* The WITH_THREAD implementation is thread-safe. It allows
@@ -394,6 +401,8 @@ Py_MakePendingCalls(void)
394401
int i;
395402
int r = 0;
396403

404+
assert(PyGILState_Check());
405+
397406
if (!pending_lock) {
398407
/* initial allocation of the lock */
399408
pending_lock = PyThread_allocate_lock();
@@ -408,6 +417,16 @@ Py_MakePendingCalls(void)
408417
if (busy)
409418
return 0;
410419
busy = 1;
420+
/* unsignal before starting to call callbacks, so that any callback
421+
added in-between re-signals */
422+
UNSIGNAL_PENDING_CALLS();
423+
424+
/* Python signal handler doesn't really queue a callback: it only signals
425+
that a signal was received, see _PyEval_SignalReceived(). */
426+
if (PyErr_CheckSignals() < 0) {
427+
goto error;
428+
}
429+
411430
/* perform a bounded number of calls, in case of recursion */
412431
for (i=0; i<NPENDINGCALLS; i++) {
413432
int j;
@@ -424,20 +443,23 @@ Py_MakePendingCalls(void)
424443
arg = pendingcalls[j].arg;
425444
pendingfirst = (j + 1) % NPENDINGCALLS;
426445
}
427-
if (pendingfirst != pendinglast)
428-
SIGNAL_PENDING_CALLS();
429-
else
430-
UNSIGNAL_PENDING_CALLS();
431446
PyThread_release_lock(pending_lock);
432447
/* having released the lock, perform the callback */
433448
if (func == NULL)
434449
break;
435450
r = func(arg);
436-
if (r)
437-
break;
451+
if (r) {
452+
goto error;
453+
}
438454
}
455+
439456
busy = 0;
440457
return r;
458+
459+
error:
460+
busy = 0;
461+
SIGNAL_PENDING_CALLS(); /* We're not done yet */
462+
return -1;
441463
}
442464

443465
#else /* if ! defined WITH_THREAD */
@@ -472,7 +494,6 @@ static struct {
472494
} pendingcalls[NPENDINGCALLS];
473495
static volatile int pendingfirst = 0;
474496
static volatile int pendinglast = 0;
475-
static _Py_atomic_int pendingcalls_to_do = {0};
476497

477498
int
478499
Py_AddPendingCall(int (*func)(void *), void *arg)
@@ -506,7 +527,16 @@ Py_MakePendingCalls(void)
506527
if (busy)
507528
return 0;
508529
busy = 1;
530+
531+
/* unsignal before starting to call callbacks, so that any callback
532+
added in-between re-signals */
509533
UNSIGNAL_PENDING_CALLS();
534+
/* Python signal handler doesn't really queue a callback: it only signals
535+
that a signal was received, see _PyEval_SignalReceived(). */
536+
if (PyErr_CheckSignals() < 0) {
537+
goto error;
538+
}
539+
510540
for (;;) {
511541
int i;
512542
int (*func)(void *);
@@ -518,13 +548,16 @@ Py_MakePendingCalls(void)
518548
arg = pendingcalls[i].arg;
519549
pendingfirst = (i + 1) % NPENDINGCALLS;
520550
if (func(arg) < 0) {
521-
busy = 0;
522-
SIGNAL_PENDING_CALLS(); /* We're not done yet */
523-
return -1;
551+
goto error:
524552
}
525553
}
526554
busy = 0;
527555
return 0;
556+
557+
error:
558+
busy = 0;
559+
SIGNAL_PENDING_CALLS(); /* We're not done yet */
560+
return -1;
528561
}
529562

530563
#endif /* WITH_THREAD */

0 commit comments

Comments
 (0)