Skip to content

Commit d6c3b0a

Browse files
runtime: don't crash holding locks on racy timer access
If we run into data corruption due to the program accessing timers in a racy way, do a normal panic rather than a hard crash with "panic holding locks". The hope is to make the problem less confusing for users. Fixes #25686 Change-Id: I863417adf21f7f8c088675b67a3acf49a0cdef41 Reviewed-on: https://go-review.googlesource.com/115815 Reviewed-by: Austin Clements <[email protected]>
1 parent cf2c2ea commit d6c3b0a

File tree

2 files changed

+100
-11
lines changed

2 files changed

+100
-11
lines changed

src/runtime/time.go

Lines changed: 63 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,10 @@ func timeSleep(ns int64) {
9898
t.arg = gp
9999
tb := t.assignBucket()
100100
lock(&tb.lock)
101-
tb.addtimerLocked(t)
101+
if !tb.addtimerLocked(t) {
102+
unlock(&tb.lock)
103+
badTimer()
104+
}
102105
goparkunlock(&tb.lock, waitReasonSleep, traceEvGoSleep, 2)
103106
}
104107

@@ -128,22 +131,29 @@ func goroutineReady(arg interface{}, seq uintptr) {
128131
func addtimer(t *timer) {
129132
tb := t.assignBucket()
130133
lock(&tb.lock)
131-
tb.addtimerLocked(t)
134+
ok := tb.addtimerLocked(t)
132135
unlock(&tb.lock)
136+
if !ok {
137+
badTimer()
138+
}
133139
}
134140

135141
// Add a timer to the heap and start or kick timerproc if the new timer is
136142
// earlier than any of the others.
137143
// Timers are locked.
138-
func (tb *timersBucket) addtimerLocked(t *timer) {
144+
// Returns whether all is well: false if the data structure is corrupt
145+
// due to user-level races.
146+
func (tb *timersBucket) addtimerLocked(t *timer) bool {
139147
// when must never be negative; otherwise timerproc will overflow
140148
// during its delta calculation and never expire other runtime timers.
141149
if t.when < 0 {
142150
t.when = 1<<63 - 1
143151
}
144152
t.i = len(tb.t)
145153
tb.t = append(tb.t, t)
146-
siftupTimer(tb.t, t.i)
154+
if !siftupTimer(tb.t, t.i) {
155+
return false
156+
}
147157
if t.i == 0 {
148158
// siftup moved to top: new earliest deadline.
149159
if tb.sleeping {
@@ -159,6 +169,7 @@ func (tb *timersBucket) addtimerLocked(t *timer) {
159169
tb.created = true
160170
go timerproc(tb)
161171
}
172+
return true
162173
}
163174

164175
// Delete timer t from the heap.
@@ -191,11 +202,19 @@ func deltimer(t *timer) bool {
191202
}
192203
tb.t[last] = nil
193204
tb.t = tb.t[:last]
205+
ok := true
194206
if i != last {
195-
siftupTimer(tb.t, i)
196-
siftdownTimer(tb.t, i)
207+
if !siftupTimer(tb.t, i) {
208+
ok = false
209+
}
210+
if !siftdownTimer(tb.t, i) {
211+
ok = false
212+
}
197213
}
198214
unlock(&tb.lock)
215+
if !ok {
216+
badTimer()
217+
}
199218
return true
200219
}
201220

@@ -219,10 +238,13 @@ func timerproc(tb *timersBucket) {
219238
if delta > 0 {
220239
break
221240
}
241+
ok := true
222242
if t.period > 0 {
223243
// leave in heap but adjust next time to fire
224244
t.when += t.period * (1 + -delta/t.period)
225-
siftdownTimer(tb.t, 0)
245+
if !siftdownTimer(tb.t, 0) {
246+
ok = false
247+
}
226248
} else {
227249
// remove from heap
228250
last := len(tb.t) - 1
@@ -233,14 +255,19 @@ func timerproc(tb *timersBucket) {
233255
tb.t[last] = nil
234256
tb.t = tb.t[:last]
235257
if last > 0 {
236-
siftdownTimer(tb.t, 0)
258+
if !siftdownTimer(tb.t, 0) {
259+
ok = false
260+
}
237261
}
238262
t.i = -1 // mark as removed
239263
}
240264
f := t.f
241265
arg := t.arg
242266
seq := t.seq
243267
unlock(&tb.lock)
268+
if !ok {
269+
badTimer()
270+
}
244271
if raceenabled {
245272
raceacquire(unsafe.Pointer(t))
246273
}
@@ -326,8 +353,20 @@ func timeSleepUntil() int64 {
326353
}
327354

328355
// Heap maintenance algorithms.
329-
330-
func siftupTimer(t []*timer, i int) {
356+
// These algorithms check for slice index errors manually.
357+
// Slice index error can happen if the program is using racy
358+
// access to timers. We don't want to panic here, because
359+
// it will cause the program to crash with a mysterious
360+
// "panic holding locks" message. Instead, we panic while not
361+
// holding a lock.
362+
// The races can occur despite the bucket locks because assignBucket
363+
// itself is called without locks, so racy calls can cause a timer to
364+
// change buckets while executing these functions.
365+
366+
func siftupTimer(t []*timer, i int) bool {
367+
if i >= len(t) {
368+
return false
369+
}
331370
when := t[i].when
332371
tmp := t[i]
333372
for i > 0 {
@@ -343,10 +382,14 @@ func siftupTimer(t []*timer, i int) {
343382
t[i] = tmp
344383
t[i].i = i
345384
}
385+
return true
346386
}
347387

348-
func siftdownTimer(t []*timer, i int) {
388+
func siftdownTimer(t []*timer, i int) bool {
349389
n := len(t)
390+
if i >= n {
391+
return false
392+
}
350393
when := t[i].when
351394
tmp := t[i]
352395
for {
@@ -382,6 +425,15 @@ func siftdownTimer(t []*timer, i int) {
382425
t[i] = tmp
383426
t[i].i = i
384427
}
428+
return true
429+
}
430+
431+
// badTimer is called if the timer data structures have been corrupted,
432+
// presumably due to racy use by the program. We panic here rather than
433+
// panicing due to invalid slice access while holding locks.
434+
// See issue #25686.
435+
func badTimer() {
436+
panic(errorString("racy use of timers"))
385437
}
386438

387439
// Entry points for net, time to call nanotime.

src/time/time_test.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,13 @@ import (
99
"encoding/gob"
1010
"encoding/json"
1111
"fmt"
12+
"internal/race"
1213
"math/big"
1314
"math/rand"
1415
"os"
1516
"runtime"
1617
"strings"
18+
"sync"
1719
"testing"
1820
"testing/quick"
1921
. "time"
@@ -1341,3 +1343,38 @@ func TestReadFileLimit(t *testing.T) {
13411343
t.Errorf("readFile(%q) error = %v; want error containing 'is too large'", zero, err)
13421344
}
13431345
}
1346+
1347+
// Issue 25686: hard crash on concurrent timer access.
1348+
// This test deliberately invokes a race condition.
1349+
// We are testing that we don't crash with "fatal error: panic holding locks".
1350+
func TestConcurrentTimerReset(t *testing.T) {
1351+
if race.Enabled {
1352+
t.Skip("skipping test under race detector")
1353+
}
1354+
1355+
// We expect this code to panic rather than crash.
1356+
// Don't worry if it doesn't panic.
1357+
catch := func(i int) {
1358+
if e := recover(); e != nil {
1359+
t.Logf("panic in goroutine %d, as expected, with %q", i, e)
1360+
} else {
1361+
t.Logf("no panic in goroutine %d", i)
1362+
}
1363+
}
1364+
1365+
const goroutines = 8
1366+
const tries = 1000
1367+
var wg sync.WaitGroup
1368+
wg.Add(goroutines)
1369+
timer := NewTimer(Hour)
1370+
for i := 0; i < goroutines; i++ {
1371+
go func(i int) {
1372+
defer wg.Done()
1373+
defer catch(i)
1374+
for j := 0; j < tries; j++ {
1375+
timer.Reset(Hour + Duration(i*j))
1376+
}
1377+
}(i)
1378+
}
1379+
wg.Wait()
1380+
}

0 commit comments

Comments
 (0)