Skip to content

Commit 8ec7a39

Browse files
os/signal: avoid race between Stop and receiving on channel
When Stop is called on a channel, wait until all signals have been delivered to the channel before returning. Use atomic operations in sigqueue to communicate more reliably between the os/signal goroutine and the signal handler. Fixes #14571 Change-Id: I6c5a9eea1cff85e37a34dffe96f4bb2699e12c6e Reviewed-on: https://go-review.googlesource.com/46003 Run-TryBot: Ian Lance Taylor <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Austin Clements <[email protected]>
1 parent 3785457 commit 8ec7a39

File tree

4 files changed

+221
-9
lines changed

4 files changed

+221
-9
lines changed

src/os/signal/signal.go

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,21 @@ import (
1111

1212
var handlers struct {
1313
sync.Mutex
14-
m map[chan<- os.Signal]*handler
14+
// Map a channel to the signals that should be sent to it.
15+
m map[chan<- os.Signal]*handler
16+
// Map a signal to the number of channels receiving it.
1517
ref [numSig]int64
18+
// Map channels to signals while the channel is being stopped.
19+
// Not a map because entries live here only very briefly.
20+
// We need a separate container because we need m to correspond to ref
21+
// at all times, and we also need to keep track of the *handler
22+
// value for a channel being stopped. See the Stop function.
23+
stopping []stopping
24+
}
25+
26+
type stopping struct {
27+
c chan<- os.Signal
28+
h *handler
1629
}
1730

1831
type handler struct {
@@ -142,10 +155,10 @@ func Reset(sig ...os.Signal) {
142155
// When Stop returns, it is guaranteed that c will receive no more signals.
143156
func Stop(c chan<- os.Signal) {
144157
handlers.Lock()
145-
defer handlers.Unlock()
146158

147159
h := handlers.m[c]
148160
if h == nil {
161+
handlers.Unlock()
149162
return
150163
}
151164
delete(handlers.m, c)
@@ -158,8 +171,40 @@ func Stop(c chan<- os.Signal) {
158171
}
159172
}
160173
}
174+
175+
// Signals will no longer be delivered to the channel.
176+
// We want to avoid a race for a signal such as SIGINT:
177+
// it should be either delivered to the channel,
178+
// or the program should take the default action (that is, exit).
179+
// To avoid the possibility that the signal is delivered,
180+
// and the signal handler invoked, and then Stop deregisters
181+
// the channel before the process function below has a chance
182+
// to send it on the channel, put the channel on a list of
183+
// channels being stopped and wait for signal delivery to
184+
// quiesce before fully removing it.
185+
186+
handlers.stopping = append(handlers.stopping, stopping{c, h})
187+
188+
handlers.Unlock()
189+
190+
signalWaitUntilIdle()
191+
192+
handlers.Lock()
193+
194+
for i, s := range handlers.stopping {
195+
if s.c == c {
196+
handlers.stopping = append(handlers.stopping[:i], handlers.stopping[i+1:]...)
197+
break
198+
}
199+
}
200+
201+
handlers.Unlock()
161202
}
162203

204+
// Wait until there are no more signals waiting to be delivered.
205+
// Defined by the runtime package.
206+
func signalWaitUntilIdle()
207+
163208
func process(sig os.Signal) {
164209
n := signum(sig)
165210
if n < 0 {
@@ -178,4 +223,14 @@ func process(sig os.Signal) {
178223
}
179224
}
180225
}
226+
227+
// Avoid the race mentioned in Stop.
228+
for _, d := range handlers.stopping {
229+
if d.h.want(n) {
230+
select {
231+
case d.c <- sig:
232+
default:
233+
}
234+
}
235+
}
181236
}

src/os/signal/signal_test.go

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,16 @@
77
package signal_test
88

99
import (
10+
"bytes"
1011
"flag"
12+
"fmt"
1113
"io/ioutil"
1214
"os"
1315
"os/exec"
1416
. "os/signal"
1517
"runtime"
1618
"strconv"
19+
"sync"
1720
"syscall"
1821
"testing"
1922
"time"
@@ -302,3 +305,88 @@ func TestSIGCONT(t *testing.T) {
302305
syscall.Kill(syscall.Getpid(), syscall.SIGCONT)
303306
waitSig(t, c, syscall.SIGCONT)
304307
}
308+
309+
// Test race between stopping and receiving a signal (issue 14571).
310+
func TestAtomicStop(t *testing.T) {
311+
if os.Getenv("GO_TEST_ATOMIC_STOP") != "" {
312+
atomicStopTestProgram()
313+
t.Fatal("atomicStopTestProgram returned")
314+
}
315+
316+
const execs = 10
317+
for i := 0; i < execs; i++ {
318+
cmd := exec.Command(os.Args[0], "-test.run=TestAtomicStop")
319+
cmd.Env = append(os.Environ(), "GO_TEST_ATOMIC_STOP=1")
320+
out, err := cmd.CombinedOutput()
321+
if err == nil {
322+
t.Logf("iteration %d: output %s", i, out)
323+
} else {
324+
t.Logf("iteration %d: exit status %q: output: %s", i, err, out)
325+
}
326+
327+
lost := bytes.Contains(out, []byte("lost signal"))
328+
if lost {
329+
t.Errorf("iteration %d: lost signal", i)
330+
}
331+
332+
// The program should either die due to SIGINT,
333+
// or exit with success without printing "lost signal".
334+
if err == nil {
335+
if len(out) > 0 && !lost {
336+
t.Errorf("iteration %d: unexpected output", i)
337+
}
338+
} else {
339+
if ee, ok := err.(*exec.ExitError); !ok {
340+
t.Errorf("iteration %d: error (%v) has type %T; expected exec.ExitError", i, err, err)
341+
} else if ws, ok := ee.Sys().(syscall.WaitStatus); !ok {
342+
t.Errorf("iteration %d: error.Sys (%v) has type %T; expected syscall.WaitStatus", i, ee.Sys(), ee.Sys())
343+
} else if !ws.Signaled() || ws.Signal() != syscall.SIGINT {
344+
t.Errorf("iteration %d: got exit status %v; expected SIGINT", i, ee)
345+
}
346+
}
347+
}
348+
}
349+
350+
// atomicStopTestProgram is run in a subprocess by TestAtomicStop.
351+
// It tries to trigger a signal delivery race. This function should
352+
// either catch a signal or die from it.
353+
func atomicStopTestProgram() {
354+
const tries = 10
355+
pid := syscall.Getpid()
356+
printed := false
357+
for i := 0; i < tries; i++ {
358+
cs := make(chan os.Signal, 1)
359+
Notify(cs, syscall.SIGINT)
360+
361+
var wg sync.WaitGroup
362+
wg.Add(1)
363+
go func() {
364+
defer wg.Done()
365+
Stop(cs)
366+
}()
367+
368+
syscall.Kill(pid, syscall.SIGINT)
369+
370+
// At this point we should either die from SIGINT or
371+
// get a notification on cs. If neither happens, we
372+
// dropped the signal. Give it a second to deliver,
373+
// which is far far longer than it should require.
374+
375+
select {
376+
case <-cs:
377+
case <-time.After(1 * time.Second):
378+
if !printed {
379+
fmt.Print("lost signal on iterations:")
380+
printed = true
381+
}
382+
fmt.Printf(" %d", i)
383+
}
384+
385+
wg.Wait()
386+
}
387+
if printed {
388+
fmt.Print("\n")
389+
}
390+
391+
os.Exit(0)
392+
}

src/runtime/sigqueue.go

Lines changed: 56 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,17 @@ import (
3333
_ "unsafe" // for go:linkname
3434
)
3535

36+
// sig handles communication between the signal handler and os/signal.
37+
// Other than the inuse and recv fields, the fields are accessed atomically.
38+
//
39+
// The wanted and ignored fields are only written by one goroutine at
40+
// a time; access is controlled by the handlers Mutex in os/signal.
41+
// The fields are only read by that one goroutine and by the signal handler.
42+
// We access them atomically to minimize the race between setting them
43+
// in the goroutine calling os/signal and the signal handler,
44+
// which may be running in a different thread. That race is unavoidable,
45+
// as there is no connection between handling a signal and receiving one,
46+
// but atomic instructions should minimize it.
3647
var sig struct {
3748
note note
3849
mask [(_NSIG + 31) / 32]uint32
@@ -53,7 +64,11 @@ const (
5364
// Reports whether the signal was sent. If not, the caller typically crashes the program.
5465
func sigsend(s uint32) bool {
5566
bit := uint32(1) << uint(s&31)
56-
if !sig.inuse || s >= uint32(32*len(sig.wanted)) || sig.wanted[s/32]&bit == 0 {
67+
if !sig.inuse || s >= uint32(32*len(sig.wanted)) {
68+
return false
69+
}
70+
71+
if w := atomic.Load(&sig.wanted[s/32]); w&bit == 0 {
5772
return false
5873
}
5974

@@ -131,6 +146,23 @@ func signal_recv() uint32 {
131146
}
132147
}
133148

149+
// signalWaitUntilIdle waits until the signal delivery mechanism is idle.
150+
// This is used to ensure that we do not drop a signal notification due
151+
// to a race between disabling a signal and receiving a signal.
152+
// This assumes that signal delivery has already been disabled for
153+
// the signal(s) in question, and here we are just waiting to make sure
154+
// that all the signals have been delivered to the user channels
155+
// by the os/signal package.
156+
//go:linkname signalWaitUntilIdle os/signal.signalWaitUntilIdle
157+
func signalWaitUntilIdle() {
158+
// Although WaitUntilIdle seems like the right name for this
159+
// function, the state we are looking for is sigReceiving, not
160+
// sigIdle. The sigIdle state is really more like sigProcessing.
161+
for atomic.Load(&sig.state) != sigReceiving {
162+
Gosched()
163+
}
164+
}
165+
134166
// Must only be called from a single goroutine at a time.
135167
//go:linkname signal_enable os/signal.signal_enable
136168
func signal_enable(s uint32) {
@@ -146,8 +178,15 @@ func signal_enable(s uint32) {
146178
if s >= uint32(len(sig.wanted)*32) {
147179
return
148180
}
149-
sig.wanted[s/32] |= 1 << (s & 31)
150-
sig.ignored[s/32] &^= 1 << (s & 31)
181+
182+
w := sig.wanted[s/32]
183+
w |= 1 << (s & 31)
184+
atomic.Store(&sig.wanted[s/32], w)
185+
186+
i := sig.ignored[s/32]
187+
i &^= 1 << (s & 31)
188+
atomic.Store(&sig.ignored[s/32], i)
189+
151190
sigenable(s)
152191
}
153192

@@ -157,8 +196,11 @@ func signal_disable(s uint32) {
157196
if s >= uint32(len(sig.wanted)*32) {
158197
return
159198
}
160-
sig.wanted[s/32] &^= 1 << (s & 31)
161199
sigdisable(s)
200+
201+
w := sig.wanted[s/32]
202+
w &^= 1 << (s & 31)
203+
atomic.Store(&sig.wanted[s/32], w)
162204
}
163205

164206
// Must only be called from a single goroutine at a time.
@@ -167,12 +209,19 @@ func signal_ignore(s uint32) {
167209
if s >= uint32(len(sig.wanted)*32) {
168210
return
169211
}
170-
sig.wanted[s/32] &^= 1 << (s & 31)
171-
sig.ignored[s/32] |= 1 << (s & 31)
172212
sigignore(s)
213+
214+
w := sig.wanted[s/32]
215+
w &^= 1 << (s & 31)
216+
atomic.Store(&sig.wanted[s/32], w)
217+
218+
i := sig.ignored[s/32]
219+
i |= 1 << (s & 31)
220+
atomic.Store(&sig.ignored[s/32], i)
173221
}
174222

175223
// Checked by signal handlers.
176224
func signal_ignored(s uint32) bool {
177-
return sig.ignored[s/32]&(1<<(s&31)) != 0
225+
i := atomic.Load(&sig.ignored[s/32])
226+
return i&(1<<(s&31)) != 0
178227
}

src/runtime/sigqueue_plan9.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,26 @@ func signal_recv() string {
110110
}
111111
}
112112

113+
// signalWaitUntilIdle waits until the signal delivery mechanism is idle.
114+
// This is used to ensure that we do not drop a signal notification due
115+
// to a race between disabling a signal and receiving a signal.
116+
// This assumes that signal delivery has already been disabled for
117+
// the signal(s) in question, and here we are just waiting to make sure
118+
// that all the signals have been delivered to the user channels
119+
// by the os/signal package.
120+
//go:linkname signalWaitUntilIdle os/signal.signalWaitUntilIdle
121+
func signalWaitUntilIdle() {
122+
for {
123+
lock(&sig.lock)
124+
sleeping := sig.sleeping
125+
unlock(&sig.lock)
126+
if sleeping {
127+
return
128+
}
129+
Gosched()
130+
}
131+
}
132+
113133
// Must only be called from a single goroutine at a time.
114134
//go:linkname signal_enable os/signal.signal_enable
115135
func signal_enable(s uint32) {

0 commit comments

Comments
 (0)