Skip to content

Commit 990124d

Browse files
committed
runtime: use balanced tree for addr lookup in semaphore implementation
CL 36792 fixed #17953, a linear scan caused by n goroutines piling into two different locks that hashed to the same bucket in the semaphore table. In that CL, n goroutines contending for 2 unfortunately chosen locks went from O(n²) to O(n). This CL fixes a different linear scan, when n goroutines are contending for n/2 different locks that all hash to the same bucket in the semaphore table. In this CL, n goroutines contending for n/2 unfortunately chosen locks goes from O(n²) to O(n log n). This case is much less likely, but any linear scan eventually hurts, so we might as well fix it while the problem is fresh in our minds. The new test in this CL checks for both linear scans. The effect of this CL on the sync benchmarks is negligible (but it fixes the new test). name old time/op new time/op delta Cond1-48 576ns ±10% 575ns ±13% ~ (p=0.679 n=71+71) Cond2-48 1.59µs ± 8% 1.61µs ± 9% ~ (p=0.107 n=73+69) Cond4-48 4.56µs ± 7% 4.55µs ± 7% ~ (p=0.670 n=74+72) Cond8-48 9.87µs ± 9% 9.90µs ± 7% ~ (p=0.507 n=69+73) Cond16-48 20.4µs ± 7% 20.4µs ±10% ~ (p=0.588 n=69+71) Cond32-48 45.4µs ±10% 45.4µs ±14% ~ (p=0.944 n=73+73) UncontendedSemaphore-48 19.7ns ±12% 19.7ns ± 8% ~ (p=0.589 n=65+63) ContendedSemaphore-48 55.4ns ±26% 54.9ns ±32% ~ (p=0.441 n=75+75) MutexUncontended-48 0.63ns ± 0% 0.63ns ± 0% ~ (all equal) Mutex-48 210ns ± 6% 213ns ±10% +1.30% (p=0.035 n=70+74) MutexSlack-48 210ns ± 7% 211ns ± 9% ~ (p=0.184 n=71+72) MutexWork-48 299ns ± 5% 300ns ± 5% ~ (p=0.678 n=73+75) MutexWorkSlack-48 302ns ± 6% 300ns ± 5% ~ (p=0.149 n=74+72) MutexNoSpin-48 135ns ± 6% 135ns ±10% ~ (p=0.788 n=67+75) MutexSpin-48 693ns ± 5% 689ns ± 6% ~ (p=0.092 n=65+74) Once-48 0.22ns ±25% 0.22ns ±24% ~ (p=0.882 n=74+73) Pool-48 5.88ns ±36% 5.79ns ±24% ~ (p=0.655 n=69+69) PoolOverflow-48 4.79µs ±18% 4.87µs ±20% ~ (p=0.233 n=75+75) SemaUncontended-48 0.80ns ± 1% 0.82ns ± 8% +2.46% (p=0.000 n=60+74) SemaSyntNonblock-48 103ns ± 4% 102ns ± 5% -1.11% (p=0.003 n=75+75) SemaSyntBlock-48 104ns ± 4% 104ns ± 5% ~ (p=0.231 n=71+75) SemaWorkNonblock-48 128ns ± 4% 129ns ± 6% +1.51% (p=0.000 n=63+75) SemaWorkBlock-48 129ns ± 8% 130ns ± 7% ~ (p=0.072 n=75+74) RWMutexUncontended-48 2.35ns ± 1% 2.35ns ± 0% ~ (p=0.144 n=70+55) RWMutexWrite100-48 139ns ±18% 141ns ±21% ~ (p=0.071 n=75+73) RWMutexWrite10-48 145ns ± 9% 145ns ± 8% ~ (p=0.553 n=75+75) RWMutexWorkWrite100-48 297ns ±13% 297ns ±15% ~ (p=0.519 n=75+74) RWMutexWorkWrite10-48 588ns ± 7% 585ns ± 5% ~ (p=0.173 n=73+70) WaitGroupUncontended-48 0.87ns ± 0% 0.87ns ± 0% ~ (all equal) WaitGroupAddDone-48 63.2ns ± 4% 62.7ns ± 4% -0.82% (p=0.027 n=72+75) WaitGroupAddDoneWork-48 109ns ± 5% 109ns ± 4% ~ (p=0.233 n=75+75) WaitGroupWait-48 0.17ns ± 0% 0.16ns ±16% -8.55% (p=0.000 n=56+75) WaitGroupWaitWork-48 1.78ns ± 1% 2.08ns ± 5% +16.92% (p=0.000 n=74+70) WaitGroupActuallyWait-48 52.0ns ± 3% 50.6ns ± 5% -2.70% (p=0.000 n=71+69) https://perf.golang.org/search?q=upload:20170215.1 Change-Id: Ia29a8bd006c089e401ec4297c3038cca656bcd0a Reviewed-on: https://go-review.googlesource.com/37103 Run-TryBot: Russ Cox <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]> TryBot-Result: Gobot Gobot <[email protected]>
1 parent fc456c7 commit 990124d

File tree

3 files changed

+296
-40
lines changed

3 files changed

+296
-40
lines changed

src/runtime/runtime2.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,7 @@ type sudog struct {
286286
acquiretime int64
287287
releasetime int64
288288
ticket uint32
289+
parent *sudog // semaRoot binary tree
289290
waitlink *sudog // g.waiting list or semaRoot
290291
waittail *sudog // semaRoot
291292
c *hchan // channel

src/runtime/sema.go

Lines changed: 156 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -27,24 +27,19 @@ import (
2727

2828
// Asynchronous semaphore for sync.Mutex.
2929

30-
// A semaRoot holds a linked list of sudog with distinct addresses (s.elem).
30+
// A semaRoot holds a balanced tree of sudog with distinct addresses (s.elem).
3131
// Each of those sudog may in turn point (through s.waitlink) to a list
3232
// of other sudogs waiting on the same address.
3333
// The operations on the inner lists of sudogs with the same address
34-
// are all O(1). Only the scanning of the top-level semaRoot list is O(n),
34+
// are all O(1). The scanning of the top-level semaRoot list is O(log n),
3535
// where n is the number of distinct addresses with goroutines blocked
3636
// on them that hash to the given semaRoot.
37-
// In systems with many goroutines, most queue up on a few addresses,
38-
// so the linear search across unique addresses is probably OK.
39-
// At least, we'll use this until it's not.
40-
// The next step is probably to make the top-level list a treap instead
41-
// of a linked list.
4237
// See golang.org/issue/17953 for a program that worked badly
43-
// before we introduced the second level of list.
38+
// before we introduced the second level of list, and test/locklinear.go
39+
// for a test that exercises this.
4440
type semaRoot struct {
4541
lock mutex
46-
head *sudog
47-
tail *sudog
42+
treap *sudog // root of balanced tree of unique waiters.
4843
nwait uint32 // Number of waiters. Read w/o the lock.
4944
}
5045

@@ -205,8 +200,12 @@ func cansemacquire(addr *uint32) bool {
205200
func (root *semaRoot) queue(addr *uint32, s *sudog) {
206201
s.g = getg()
207202
s.elem = unsafe.Pointer(addr)
203+
s.next = nil
204+
s.prev = nil
208205

209-
for t := root.head; t != nil; t = t.next {
206+
var last *sudog
207+
pt := &root.treap
208+
for t := *pt; t != nil; t = *pt {
210209
if t.elem == unsafe.Pointer(addr) {
211210
// Already have addr in list; add s to end of per-addr list.
212211
if t.waittail == nil {
@@ -218,29 +217,55 @@ func (root *semaRoot) queue(addr *uint32, s *sudog) {
218217
s.waitlink = nil
219218
return
220219
}
220+
last = t
221+
if uintptr(unsafe.Pointer(addr)) < uintptr(t.elem) {
222+
pt = &t.prev
223+
} else {
224+
pt = &t.next
225+
}
221226
}
222227

223-
// Add s as new entry in list of unique addrs.
224-
s.next = nil
225-
s.prev = root.tail
226-
if root.tail != nil {
227-
root.tail.next = s
228-
} else {
229-
root.head = s
228+
// Add s as new leaf in tree of unique addrs.
229+
// The balanced tree is a treap using ticket as the random heap priority.
230+
// That is, it is a binary tree ordered according to the elem addresses,
231+
// but then among the space of possible binary trees respecting those
232+
// addresses, it is kept balanced on average by maintaining a heap ordering
233+
// on the ticket: s.ticket <= both s.prev.ticket and s.next.ticket.
234+
// https://en.wikipedia.org/wiki/Treap
235+
// http://faculty.washington.edu/aragon/pubs/rst89.pdf
236+
s.ticket = fastrand()
237+
s.parent = last
238+
*pt = s
239+
240+
// Rotate up into tree according to ticket (priority).
241+
for s.parent != nil && s.parent.ticket > s.ticket {
242+
if s.parent.prev == s {
243+
root.rotateRight(s.parent)
244+
} else {
245+
if s.parent.next != s {
246+
panic("semaRoot queue")
247+
}
248+
root.rotateLeft(s.parent)
249+
}
230250
}
231-
root.tail = s
232251
}
233252

234253
// dequeue searches for and finds the first goroutine
235254
// in semaRoot blocked on addr.
236255
// If the sudog was being profiled, dequeue returns the time
237256
// at which it was woken up as now. Otherwise now is 0.
238257
func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now int64) {
239-
s := root.head
240-
for ; s != nil; s = s.next {
258+
ps := &root.treap
259+
s := *ps
260+
for ; s != nil; s = *ps {
241261
if s.elem == unsafe.Pointer(addr) {
242262
goto Found
243263
}
264+
if uintptr(unsafe.Pointer(addr)) < uintptr(s.elem) {
265+
ps = &s.prev
266+
} else {
267+
ps = &s.next
268+
}
244269
}
245270
return nil, 0
246271

@@ -250,18 +275,17 @@ Found:
250275
now = cputicks()
251276
}
252277
if t := s.waitlink; t != nil {
253-
// Substitute t, also waiting on addr, for s in root list of unique addrs.
278+
// Substitute t, also waiting on addr, for s in root tree of unique addrs.
279+
*ps = t
280+
t.ticket = s.ticket
281+
t.parent = s.parent
254282
t.prev = s.prev
255-
t.next = s.next
256283
if t.prev != nil {
257-
t.prev.next = t
258-
} else {
259-
root.head = t
284+
t.prev.parent = t
260285
}
286+
t.next = s.next
261287
if t.next != nil {
262-
t.next.prev = t
263-
} else {
264-
root.tail = t
288+
t.next.parent = t
265289
}
266290
if t.waitlink != nil {
267291
t.waittail = s.waittail
@@ -272,24 +296,104 @@ Found:
272296
s.waitlink = nil
273297
s.waittail = nil
274298
} else {
275-
// Remove s from list.
276-
if s.next != nil {
277-
s.next.prev = s.prev
278-
} else {
279-
root.tail = s.prev
299+
// Rotate s down to be leaf of tree for removal, respecting priorities.
300+
for s.next != nil || s.prev != nil {
301+
if s.next == nil || s.prev != nil && s.prev.ticket < s.next.ticket {
302+
root.rotateRight(s)
303+
} else {
304+
root.rotateLeft(s)
305+
}
280306
}
281-
if s.prev != nil {
282-
s.prev.next = s.next
307+
// Remove s, now a leaf.
308+
if s.parent != nil {
309+
if s.parent.prev == s {
310+
s.parent.prev = nil
311+
} else {
312+
s.parent.next = nil
313+
}
283314
} else {
284-
root.head = s.next
315+
root.treap = nil
285316
}
286317
}
318+
s.parent = nil
287319
s.elem = nil
288320
s.next = nil
289321
s.prev = nil
290322
return s, now
291323
}
292324

325+
// rotateLeft rotates the tree rooted at node x.
326+
// turning (x a (y b c)) into (y (x a b) c).
327+
func (root *semaRoot) rotateLeft(x *sudog) {
328+
// p -> (x a (y b c))
329+
p := x.parent
330+
a, y := x.prev, x.next
331+
b, c := y.prev, y.next
332+
333+
y.prev = x
334+
x.parent = y
335+
y.next = c
336+
if c != nil {
337+
c.parent = y
338+
}
339+
x.prev = a
340+
if a != nil {
341+
a.parent = x
342+
}
343+
x.next = b
344+
if b != nil {
345+
b.parent = x
346+
}
347+
348+
y.parent = p
349+
if p == nil {
350+
root.treap = y
351+
} else if p.prev == x {
352+
p.prev = y
353+
} else {
354+
if p.next != x {
355+
throw("semaRoot rotateLeft")
356+
}
357+
p.next = y
358+
}
359+
}
360+
361+
// rotateRight rotates the tree rooted at node y.
362+
// turning (y (x a b) c) into (x a (y b c)).
363+
func (root *semaRoot) rotateRight(y *sudog) {
364+
// p -> (y (x a b) c)
365+
p := y.parent
366+
x, c := y.prev, y.next
367+
a, b := x.prev, x.next
368+
369+
x.prev = a
370+
if a != nil {
371+
a.parent = x
372+
}
373+
x.next = y
374+
y.parent = x
375+
y.prev = b
376+
if b != nil {
377+
b.parent = y
378+
}
379+
y.next = c
380+
if c != nil {
381+
c.parent = y
382+
}
383+
384+
x.parent = p
385+
if p == nil {
386+
root.treap = x
387+
} else if p.prev == y {
388+
p.prev = x
389+
} else {
390+
if p.next != y {
391+
throw("semaRoot rotateRight")
392+
}
393+
p.next = x
394+
}
395+
}
396+
293397
// notifyList is a ticket-based notification list used to implement sync.Cond.
294398
//
295399
// It must be kept in sync with the sync package.
@@ -414,10 +518,22 @@ func notifyListNotifyOne(l *notifyList) {
414518
return
415519
}
416520

417-
// Update the next notify ticket number, and try to find the G that
418-
// needs to be notified. If it hasn't made it to the list yet we won't
419-
// find it, but it won't park itself once it sees the new notify number.
521+
// Update the next notify ticket number.
420522
atomic.Store(&l.notify, t+1)
523+
524+
// Try to find the g that needs to be notified.
525+
// If it hasn't made it to the list yet we won't find it,
526+
// but it won't park itself once it sees the new notify number.
527+
//
528+
// This scan looks linear but essentially always stops quickly.
529+
// Because g's queue separately from taking numbers,
530+
// there may be minor reorderings in the list, but we
531+
// expect the g we're looking for to be near the front.
532+
// The g has others in front of it on the list only to the
533+
// extent that it lost the race, so the iteration will not
534+
// be too long. This applies even when the g is missing:
535+
// it hasn't yet gotten to sleep and has lost the race to
536+
// the (few) other g's that we find on the list.
421537
for p, s := (*sudog)(nil), l.head; s != nil; p, s = s, s.next {
422538
if s.ticket == t {
423539
n := s.next

0 commit comments

Comments
 (0)