python · pitrou · Jun 28, 2017 · Jun 26, 2017 · Jun 26, 2017 · Jun 26, 2017
diff --git a/Include/ceval.h b/Include/ceval.h
@@ -54,6 +54,7 @@ PyAPI_FUNC(int) PyEval_MergeCompilerFlags(PyCompilerFlags *cf);
 #endif
 
 PyAPI_FUNC(int) Py_AddPendingCall(int (*func)(void *), void *arg);
+PyAPI_FUNC(void) _PyEval_SignalReceived(void);
 PyAPI_FUNC(int) Py_MakePendingCalls(void);
 
 /* Protection against deeply nested recursive calls

diff --git a/Lib/test/test_signal.py b/Lib/test/test_signal.py
@@ -1,4 +1,5 @@
 import os
+import random
 import signal
 import socket
 import subprocess
@@ -941,6 +942,101 @@ def handler(signum, frame):
                                 (exitcode, stdout))
 
 
+class StressTest(unittest.TestCase):
+    """
+    Stress signal delivery, especially when a signal arrives in
+    the middle of recomputing the signal state or executing
+    previously tripped signal handlers.
+    """
+
+    @unittest.skipUnless(hasattr(signal, "setitimer"),
+                         "test needs setitimer()")
+    def test_stress_delivery_dependent(self):
+        """
+        This test uses dependent signal handlers.
+        """
+        N = 10000
+        sigs = []
+
+        def first_handler(signum, frame):
+            # 1e-6 is the minimum non-zero value for `setitimer()`.
+            # Choose a random delay so as to improve chances of
+            # triggering a race condition.  Ideally the signal is received
+            # when inside critical signal-handling routines such as
+            # Py_MakePendingCalls().
+            signal.setitimer(signal.ITIMER_REAL, 1e-6 + random.random() * 1e-5)
+
+        def second_handler(signum=None, frame=None):
+            sigs.append(signum)
+
+        def setsig(signum, handler):
+            old_handler = signal.signal(signum, handler)
+            self.addCleanup(signal.signal, signum, old_handler)
+
+        # Here on Linux, SIGPROF > SIGALRM > SIGUSR1.  By using both
+        # ascending and descending sequences (SIGUSR1 then SIGALRM,
+        # SIGPROF then SIGALRM), we maximize chances of hitting a bug.
+        setsig(signal.SIGPROF, first_handler)
+        setsig(signal.SIGUSR1, first_handler)
+        setsig(signal.SIGALRM, second_handler)  # for ITIMER_REAL
+
+        expected_sigs = 0
+        deadline = time.time() + 15.0
+
+        while expected_sigs < N:
+            os.kill(os.getpid(), signal.SIGPROF)
+            expected_sigs += 1
+            # Wait for handlers to run to avoid signal coalescing
+            while len(sigs) < expected_sigs and time.time() < deadline:
+                time.sleep(1e-5)
+
+            os.kill(os.getpid(), signal.SIGUSR1)
+            expected_sigs += 1
+            while len(sigs) < expected_sigs and time.time() < deadline:
+                time.sleep(1e-5)
+
+        # All ITIMER_REAL signals should have been delivered to the
+        # Python handler
+        self.assertEqual(len(sigs), N, "Some signals were lost")
+
+    @unittest.skipUnless(hasattr(signal, "setitimer"),
+                         "test needs setitimer()")
+    def test_stress_delivery_simultaneous(self):
+        """
+        This test uses simultaneous signal handlers.
+        """
+        N = 10000
+        sigs = []
+
+        def handler(signum, frame):
+            sigs.append(signum)
+
+        def setsig(signum, handler):
+            old_handler = signal.signal(signum, handler)
+            self.addCleanup(signal.signal, signum, old_handler)
+
+        setsig(signal.SIGUSR1, handler)
+        setsig(signal.SIGALRM, handler)  # for ITIMER_REAL
+
+        expected_sigs = 0
+        deadline = time.time() + 15.0
+
+        while expected_sigs < N:
+            # Hopefully the SIGALRM will be received somewhere during
+            # initial processing of SIGUSR1.
+            signal.setitimer(signal.ITIMER_REAL, 1e-6 + random.random() * 1e-5)
+            os.kill(os.getpid(), signal.SIGUSR1)
+
+            expected_sigs += 2
+            # Wait for handlers to run to avoid signal coalescing
+            while len(sigs) < expected_sigs and time.time() < deadline:
+                time.sleep(1e-5)
+
+        # All ITIMER_REAL signals should have been delivered to the
+        # Python handler
+        self.assertEqual(len(sigs), N, "Some signals were lost")
+
+
 def tearDownModule():
     support.reap_children()
 

diff --git a/Misc/NEWS.d/next/Core and Builtins/2017-06-28-21-07-32.bpo-30703.ULCdFp.rst b/Misc/NEWS.d/next/Core and Builtins/2017-06-28-21-07-32.bpo-30703.ULCdFp.rst
@@ -0,0 +1,6 @@
+Improve signal delivery.
+
+Avoid using Py_AddPendingCall from signal handler, to avoid calling signal-
+unsafe functions. The tests I'm adding here fail without the rest of the
+patch, on Linux and OS X. This means our signal delivery logic had defects
+(some signals could be lost).
diff --git a/Modules/signalmodule.c b/Modules/signalmodule.c
@@ -188,12 +188,6 @@ The default handler for SIGINT installed by Python.\n\
 It raises KeyboardInterrupt.");
 
 
-static int
-checksignals_witharg(void * unused)
-{
-    return PyErr_CheckSignals();
-}
-
 static int
 report_wakeup_write_error(void *data)
 {
@@ -244,17 +238,15 @@ trip_signal(int sig_num)
 
     Handlers[sig_num].tripped = 1;
 
-    if (!is_tripped) {
-        /* Set is_tripped after setting .tripped, as it gets
-           cleared in PyErr_CheckSignals() before .tripped. */
-        is_tripped = 1;
-        Py_AddPendingCall(checksignals_witharg, NULL);
-    }
+    /* Set is_tripped after setting .tripped, as it gets
+       cleared in PyErr_CheckSignals() before .tripped. */
+    is_tripped = 1;
+    _PyEval_SignalReceived();
 
     /* And then write to the wakeup fd *after* setting all the globals and
-       doing the Py_AddPendingCall. We used to write to the wakeup fd and then
-       set the flag, but this allowed the following sequence of events
-       (especially on windows, where trip_signal runs in a new thread):
+       doing the _PyEval_SignalReceived. We used to write to the wakeup fd
+       and then set the flag, but this allowed the following sequence of events
+       (especially on windows, where trip_signal may run in a new thread):
 
        - main thread blocks on select([wakeup_fd], ...)
        - signal arrives
@@ -289,6 +281,8 @@ trip_signal(int sig_num)
                 wakeup.send_err_set = 1;
                 wakeup.send_errno = errno;
                 wakeup.send_win_error = GetLastError();
+                /* Py_AddPendingCall() isn't signal-safe, but we
+                   still use it for this exceptional case. */
                 Py_AddPendingCall(report_wakeup_send_error, NULL);
             }
         }
@@ -302,6 +296,8 @@ trip_signal(int sig_num)
             rc = _Py_write_noraise(fd, &byte, 1);
 
             if (rc < 0) {
+                /* Py_AddPendingCall() isn't signal-safe, but we
+                   still use it for this exceptional case. */
                 Py_AddPendingCall(report_wakeup_write_error,
                                   (void *)(intptr_t)errno);
             }
@@ -1556,8 +1552,10 @@ PyErr_CheckSignals(void)
                                            arglist);
                 Py_DECREF(arglist);
             }
-            if (!result)
+            if (!result) {
+                is_tripped = 1;
                 return -1;
+            }
 
             Py_DECREF(result);
         }

diff --git a/Python/ceval.c b/Python/ceval.c
@@ -140,6 +140,15 @@ static long dxp[256];
     do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0)
 
 
+/* This single variable consolidates all requests to break out of the fast path
+   in the eval loop. */
+static _Py_atomic_int eval_breaker = {0};
+/* Request for running pending calls. */
+static _Py_atomic_int pendingcalls_to_do = {0};
+/* Request for looking at the `async_exc` field of the current thread state.
+   Guarded by the GIL. */
+static int pending_async_exc = 0;
+
 #ifdef WITH_THREAD
 
 #ifdef HAVE_ERRNO_H
@@ -149,16 +158,8 @@ static long dxp[256];
 
 static PyThread_type_lock pending_lock = 0; /* for pending calls */
 static unsigned long main_thread = 0;
-/* This single variable consolidates all requests to break out of the fast path
-   in the eval loop. */
-static _Py_atomic_int eval_breaker = {0};
 /* Request for dropping the GIL */
 static _Py_atomic_int gil_drop_request = {0};
-/* Request for running pending calls. */
-static _Py_atomic_int pendingcalls_to_do = {0};
-/* Request for looking at the `async_exc` field of the current thread state.
-   Guarded by the GIL. */
-static int pending_async_exc = 0;
 
 #include "ceval_gil.h"
 
@@ -253,9 +254,6 @@ PyEval_ReInitThreads(void)
     _PyThreadState_DeleteExcept(current_tstate);
 }
 
-#else
-static _Py_atomic_int eval_breaker = {0};
-static int pending_async_exc = 0;
 #endif /* WITH_THREAD */
 
 /* This function is used to signal that async exceptions are waiting to be
@@ -330,6 +328,15 @@ PyEval_RestoreThread(PyThreadState *tstate)
 #endif
 */
 
+void
+_PyEval_SignalReceived(void)
+{
+    /* bpo-30703: Function called when the C signal handler of Python gets a
+       signal. We cannot queue a callback using Py_AddPendingCall() since
+       that function is not async-signal-safe. */
+    SIGNAL_PENDING_CALLS();
+}
+
 #ifdef WITH_THREAD
 
 /* The WITH_THREAD implementation is thread-safe.  It allows
@@ -394,6 +401,8 @@ Py_MakePendingCalls(void)
     int i;
     int r = 0;
 
+    assert(PyGILState_Check());
+
     if (!pending_lock) {
         /* initial allocation of the lock */
         pending_lock = PyThread_allocate_lock();
@@ -408,6 +417,16 @@ Py_MakePendingCalls(void)
     if (busy)
         return 0;
     busy = 1;
+    /* unsignal before starting to call callbacks, so that any callback
+       added in-between re-signals */
+    UNSIGNAL_PENDING_CALLS();
+
+    /* Python signal handler doesn't really queue a callback: it only signals
+       that a signal was received, see _PyEval_SignalReceived(). */
+    if (PyErr_CheckSignals() < 0) {
+        goto error;
+    }
+
     /* perform a bounded number of calls, in case of recursion */
     for (i=0; i<NPENDINGCALLS; i++) {
         int j;
@@ -424,20 +443,23 @@ Py_MakePendingCalls(void)
             arg = pendingcalls[j].arg;
             pendingfirst = (j + 1) % NPENDINGCALLS;
         }
-        if (pendingfirst != pendinglast)
-            SIGNAL_PENDING_CALLS();
-        else
-            UNSIGNAL_PENDING_CALLS();
         PyThread_release_lock(pending_lock);
         /* having released the lock, perform the callback */
         if (func == NULL)
             break;
         r = func(arg);
-        if (r)
-            break;
+        if (r) {
+            goto error;
+        }
     }
+
     busy = 0;
     return r;
+
+error:
+    busy = 0;
+    SIGNAL_PENDING_CALLS(); /* We're not done yet */
+    return -1;
 }
 
 #else /* if ! defined WITH_THREAD */
@@ -472,7 +494,6 @@ static struct {
 } pendingcalls[NPENDINGCALLS];
 static volatile int pendingfirst = 0;
 static volatile int pendinglast = 0;
-static _Py_atomic_int pendingcalls_to_do = {0};
 
 int
 Py_AddPendingCall(int (*func)(void *), void *arg)
@@ -506,7 +527,16 @@ Py_MakePendingCalls(void)
     if (busy)
         return 0;
     busy = 1;
+
+    /* unsignal before starting to call callbacks, so that any callback
+       added in-between re-signals */
     UNSIGNAL_PENDING_CALLS();
+    /* Python signal handler doesn't really queue a callback: it only signals
+       that a signal was received, see _PyEval_SignalReceived(). */
+    if (PyErr_CheckSignals() < 0) {
+        goto error;
+    }
+
     for (;;) {
         int i;
         int (*func)(void *);
@@ -518,13 +548,16 @@ Py_MakePendingCalls(void)
         arg = pendingcalls[i].arg;
         pendingfirst = (i + 1) % NPENDINGCALLS;
         if (func(arg) < 0) {
-            busy = 0;
-            SIGNAL_PENDING_CALLS(); /* We're not done yet */
-            return -1;
+            goto error:
         }
     }
     busy = 0;
     return 0;
+
+error:
+    busy = 0;
+    SIGNAL_PENDING_CALLS(); /* We're not done yet */
+    return -1;
 }
 
 #endif /* WITH_THREAD */