Skip to content

Commit d5d24db

Browse files
kels-ngodeke-em
authored andcommitted
sync: improve sync.Pool object stealing
This CL provide abilty to randomly select P to steal object from its shared queue. In order to provide such ability randomOrder structure was copied from runtime/proc.go. It should reduce contention in firsts Ps and improve balance of object stealing across all Ps. Also, the patch provides new benchmark PoolStarvation which force Ps to steal objects. Benchmarks: name old time/op new time/op delta Pool-8 2.16ns ±14% 2.14ns ±16% ~ (p=0.425 n=10+10) PoolOverflow-8 489ns ± 0% 489ns ± 0% ~ (p=0.719 n=9+10) PoolStarvation-8 7.00µs ± 4% 6.59µs ± 2% -5.86% (p=0.000 n=10+10) PoolSTW-8 15.1µs ± 1% 15.2µs ± 1% +0.99% (p=0.001 n=10+10) PoolExpensiveNew-8 1.25ms ±10% 1.31ms ± 9% ~ (p=0.143 n=10+10) [Geo mean] 2.68µs 2.68µs -0.28% name old p50-ns/STW new p50-ns/STW delta PoolSTW-8 15.0k ± 1% 15.1k ± 1% +0.92% (p=0.000 n=10+10) name old p95-ns/STW new p95-ns/STW delta PoolSTW-8 16.2k ± 3% 16.4k ± 2% ~ (p=0.143 n=10+10) name old GCs/op new GCs/op delta PoolExpensiveNew-8 0.29 ± 2% 0.30 ± 1% +2.84% (p=0.000 n=8+10) name old New/op new New/op delta PoolExpensiveNew-8 8.07 ±11% 8.49 ±10% ~ (p=0.123 n=10+10) Change-Id: I3ca1d0bf1f358b1148c58e64740fb2d5bfc0bc02 Reviewed-on: https://go-review.googlesource.com/c/go/+/303949 Reviewed-by: David Chase <[email protected]> Trust: Emmanuel Odeke <[email protected]>
1 parent 1f7ddf5 commit d5d24db

File tree

2 files changed

+111
-10
lines changed

2 files changed

+111
-10
lines changed

src/sync/pool.go

Lines changed: 93 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,57 @@ type poolLocal struct {
7070
pad [128 - unsafe.Sizeof(poolLocalInternal{})%128]byte
7171
}
7272

73+
// The randomOrder and randomEnum are copied from runtime/proc.go
74+
type randomOrder struct {
75+
count uint32
76+
coprimes []uint32
77+
}
78+
79+
type randomEnum struct {
80+
i uint32
81+
count uint32
82+
pos uint32
83+
inc uint32
84+
}
85+
86+
func (ord *randomOrder) reset(count uint32) {
87+
ord.count = count
88+
ord.coprimes = ord.coprimes[:0]
89+
for i := uint32(1); i <= count; i++ {
90+
if gcd(i, count) == 1 {
91+
ord.coprimes = append(ord.coprimes, i)
92+
}
93+
}
94+
}
95+
96+
func (ord *randomOrder) start(i uint32) randomEnum {
97+
return randomEnum{
98+
count: ord.count,
99+
pos: i % ord.count,
100+
inc: ord.coprimes[i%uint32(len(ord.coprimes))],
101+
}
102+
}
103+
104+
func (enum *randomEnum) done() bool {
105+
return enum.i == enum.count
106+
}
107+
108+
func (enum *randomEnum) next() {
109+
enum.i++
110+
enum.pos = (enum.pos + enum.inc) % enum.count
111+
}
112+
113+
func (enum *randomEnum) position() uint32 {
114+
return enum.pos
115+
}
116+
117+
func gcd(a, b uint32) uint32 {
118+
for b != 0 {
119+
a, b = b, a%b
120+
}
121+
return a
122+
}
123+
73124
// from runtime
74125
func fastrand() uint32
75126

@@ -153,29 +204,53 @@ func (p *Pool) Get() interface{} {
153204
func (p *Pool) getSlow(pid int) interface{} {
154205
// See the comment in pin regarding ordering of the loads.
155206
size := runtime_LoadAcquintptr(&p.localSize) // load-acquire
156-
locals := p.local // load-consume
157-
// Try to steal one element from other procs.
158-
for i := 0; i < int(size); i++ {
159-
l := indexLocal(locals, (pid+i+1)%int(size))
160-
if x, _ := l.shared.popTail(); x != nil {
161-
return x
207+
// Load pOrder atomically to prevent possible races
208+
order := (*randomOrder)(atomic.LoadPointer((*unsafe.Pointer)(unsafe.Pointer(&pOrder)))) // load-consume
209+
210+
// Pin function always returns non-zero localSize, and it will remain so until runtime_procUnpin
211+
// is called. This invariant is maintained by pin ensuring that locals is always big enough to
212+
// account for the current P and that poolCleanup can never execute concurrently with a pinned P
213+
// due to disabled preemtion.
214+
// So, we can remove this condition which protects from division by zero in loop's body,
215+
// but we leave it here just to be sure there is no any possibility for error
216+
if size != 0 {
217+
locals := p.local // load-consume
218+
// Try to steal one element from other procs.
219+
for rndp := order.start(fastrand()); !rndp.done(); rndp.next() {
220+
i := int(rndp.position())
221+
// While pOrder is limited to returning indexes within the range of Ps,
222+
// locals may be smaller either because it was reset or because of a race
223+
// with pinSlow. Hence, we must still mod the local index by size.
224+
l := indexLocal(locals, (pid+i+1)%int(size))
225+
if x, _ := l.shared.popTail(); x != nil {
226+
return x
227+
}
162228
}
163229
}
164230

165231
// Try the victim cache. We do this after attempting to steal
166232
// from all primary caches because we want objects in the
167233
// victim cache to age out if at all possible.
168234
size = atomic.LoadUintptr(&p.victimSize)
235+
236+
// We also have to ensure that victim cache is big enough to account current P
237+
// and size is not equal to zero (protects from division by zero) similar as pin
238+
// function do
169239
if uintptr(pid) >= size {
170240
return nil
171241
}
172-
locals = p.victim
242+
locals := p.victim
173243
l := indexLocal(locals, pid)
244+
245+
// Check private cache
174246
if x := l.private; x != nil {
175247
l.private = nil
176248
return x
177249
}
178-
for i := 0; i < int(size); i++ {
250+
251+
// Try to fetch from the tail of other P queues
252+
for rndp := order.start(fastrand()); !rndp.done(); rndp.next() {
253+
i := int(rndp.position())
179254
l := indexLocal(locals, (pid+i)%int(size))
180255
if x, _ := l.shared.popTail(); x != nil {
181256
return x
@@ -224,9 +299,13 @@ func (p *Pool) pinSlow() (*poolLocal, int) {
224299
}
225300
// If GOMAXPROCS changes between GCs, we re-allocate the array and lose the old one.
226301
size := runtime.GOMAXPROCS(0)
302+
// Set count of Ps for random ordering
303+
order := &randomOrder{}
304+
order.reset(uint32(size))
227305
local := make([]poolLocal, size)
228-
atomic.StorePointer(&p.local, unsafe.Pointer(&local[0])) // store-release
229-
runtime_StoreReluintptr(&p.localSize, uintptr(size)) // store-release
306+
atomic.StorePointer(&p.local, unsafe.Pointer(&local[0])) // store-release
307+
atomic.StorePointer((*unsafe.Pointer)(unsafe.Pointer(&pOrder)), unsafe.Pointer(order)) // store-release
308+
runtime_StoreReluintptr(&p.localSize, uintptr(size)) // store-release
230309
return &local[pid], pid
231310
}
232311

@@ -267,6 +346,10 @@ var (
267346
// oldPools is the set of pools that may have non-empty victim
268347
// caches. Protected by STW.
269348
oldPools []*Pool
349+
350+
// pOrder is a random order of Ps used for stealing. Writes
351+
// are protected by allPoolsMu. Reads are atomic.
352+
pOrder *randomOrder
270353
)
271354

272355
func init() {

src/sync/pool_test.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,24 @@ func BenchmarkPoolOverflow(b *testing.B) {
271271
})
272272
}
273273

274+
// Simulate object starvation in order to force Ps to steal objects
275+
// from other Ps.
276+
func BenchmarkPoolStarvation(b *testing.B) {
277+
var p Pool
278+
count := 100
279+
count_starved := count - (count / runtime.GOMAXPROCS(0))
280+
b.RunParallel(func(pb *testing.PB) {
281+
for pb.Next() {
282+
for b := 0; b < count_starved; b++ {
283+
p.Put(1)
284+
}
285+
for b := 0; b < count; b++ {
286+
p.Get()
287+
}
288+
}
289+
})
290+
}
291+
274292
var globalSink interface{}
275293

276294
func BenchmarkPoolSTW(b *testing.B) {

0 commit comments

Comments
 (0)