Skip to content

Commit fe67ea3

Browse files
committed
runtime: add background scavenger
This change adds a background scavenging goroutine whose pacing is determined when the heap goal changes. The scavenger is paced to use at most 1% of the mutator's time for most systems. Furthermore, the scavenger's pacing is computed based on the estimated number of scavengable huge pages to take advantage of optimizations provided by the OS. The purpose of this scavenger is to deal with a shrinking heap: if the heap goal is falling over time, the scavenger should kick in and start returning free pages from the heap to the OS. Also, now that we have a pacing system, the credit system used by scavengeLocked has become redundant. Replace it with a mechanism which only scavenges on the allocation path if it makes sense to do so with respect to the new pacing system. Fixes #30333. Change-Id: I6203f8dc84affb26c3ab04528889dd9663530edc Reviewed-on: https://go-review.googlesource.com/c/go/+/142960 Run-TryBot: Michael Knyszek <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Austin Clements <[email protected]>
1 parent eaa1c87 commit fe67ea3

File tree

4 files changed

+419
-41
lines changed

4 files changed

+419
-41
lines changed

src/runtime/mgc.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,10 +202,14 @@ func readgogc() int32 {
202202

203203
// gcenable is called after the bulk of the runtime initialization,
204204
// just before we're about to start letting user code run.
205-
// It kicks off the background sweeper goroutine and enables GC.
205+
// It kicks off the background sweeper goroutine, the background
206+
// scavenger goroutine, and enables GC.
206207
func gcenable() {
207-
c := make(chan int, 1)
208+
// Kick off sweeping and scavenging.
209+
c := make(chan int, 2)
208210
go bgsweep(c)
211+
go bgscavenge(c)
212+
<-c
209213
<-c
210214
memstats.enablegc = true // now that runtime is initialized, GC is okay
211215
}
@@ -850,6 +854,8 @@ func gcSetTriggerRatio(triggerRatio float64) {
850854
atomic.Store64(&mheap_.pagesSweptBasis, pagesSwept)
851855
}
852856
}
857+
858+
gcPaceScavenger()
853859
}
854860

855861
// gcEffectiveGrowthRatio returns the current effective heap growth

src/runtime/mgcscavenge.go

Lines changed: 365 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,365 @@
1+
// Copyright 2019 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// Scavenging free pages.
6+
//
7+
// This file implements scavenging (the release of physical pages backing mapped
8+
// memory) of free and unused pages in the heap as a way to deal with page-level
9+
// fragmentation and reduce the RSS of Go applications.
10+
//
11+
// Scavenging in Go happens on two fronts: there's the background
12+
// (asynchronous) scavenger and the heap-growth (synchronous) scavenger.
13+
//
14+
// The former happens on a goroutine much like the background sweeper which is
15+
// soft-capped at using scavengePercent of the mutator's time, based on
16+
// order-of-magnitude estimates of the costs of scavenging. The background
17+
// scavenger's primary goal is to bring the estimated heap RSS of the
18+
// application down to a goal.
19+
//
20+
// That goal is defined as (retainExtraPercent+100) / 100 * next_gc.
21+
//
22+
// The goal is updated after each GC and the scavenger's pacing parameters
23+
// (which live in mheap_) are updated to match. The pacing parameters work much
24+
// like the background sweeping parameters. The parameters define a line whose
25+
// horizontal axis is time and vertical axis is estimated heap RSS, and the
26+
// scavenger attempts to stay below that line at all times.
27+
//
28+
// The synchronous heap-growth scavenging happens whenever the heap grows in
29+
// size, for some definition of heap-growth. The intuition behind this is that
30+
// the application had to grow the heap because existing fragments were
31+
// not sufficiently large to satisfy a page-level memory allocation, so we
32+
// scavenge those fragments eagerly to offset the growth in RSS that results.
33+
34+
package runtime
35+
36+
const (
37+
// The background scavenger is paced according to these parameters.
38+
//
39+
// scavengePercent represents the portion of mutator time we're willing
40+
// to spend on scavenging in percent.
41+
//
42+
// scavengePageLatency is a worst-case estimate (order-of-magnitude) of
43+
// the time it takes to scavenge one (regular-sized) page of memory.
44+
// scavengeHugePageLatency is the same but for huge pages.
45+
//
46+
// scavengePagePeriod is derived from scavengePercent and scavengePageLatency,
47+
// and represents the average time between scavenging one page that we're
48+
// aiming for. scavengeHugePagePeriod is the same but for huge pages.
49+
// These constants are core to the scavenge pacing algorithm.
50+
scavengePercent = 1 // 1%
51+
scavengePageLatency = 10e3 // 10µs
52+
scavengeHugePageLatency = 10e3 // 10µs
53+
scavengePagePeriod = scavengePageLatency / (scavengePercent / 100.0)
54+
scavengeHugePagePeriod = scavengePageLatency / (scavengePercent / 100.0)
55+
56+
// retainExtraPercent represents the amount of memory over the heap goal
57+
// that the scavenger should keep as a buffer space for the allocator.
58+
//
59+
// The purpose of maintaining this overhead is to have a greater pool of
60+
// unscavenged memory available for allocation (since using scavenged memory
61+
// incurs an additional cost), to account for heap fragmentation and
62+
// the ever-changing layout of the heap.
63+
retainExtraPercent = 10
64+
)
65+
66+
// heapRetained returns an estimate of the current heap RSS.
67+
//
68+
// mheap_.lock must be held or the world must be stopped.
69+
func heapRetained() uint64 {
70+
return memstats.heap_sys - memstats.heap_released
71+
}
72+
73+
// gcPaceScavenger updates the scavenger's pacing, particularly
74+
// its rate and RSS goal.
75+
//
76+
// The RSS goal is based on the current heap goal with a small overhead
77+
// to accomodate non-determinism in the allocator.
78+
//
79+
// The pacing is based on scavengePageRate, which applies to both regular and
80+
// huge pages. See that constant for more information.
81+
//
82+
// mheap_.lock must be held or the world must be stopped.
83+
func gcPaceScavenger() {
84+
// Compute our scavenging goal and align it to a physical page boundary
85+
// to make the following calculations more exact.
86+
retainedGoal := memstats.next_gc
87+
// Add retainExtraPercent overhead to retainedGoal. This calculation
88+
// looks strange but the purpose is to arrive at an integer division
89+
// (e.g. if retainExtraPercent = 12.5, then we get a divisor of 8)
90+
// that also avoids the overflow from a multiplication.
91+
retainedGoal += retainedGoal / (1.0 / (retainExtraPercent / 100.0))
92+
retainedGoal = (retainedGoal + uint64(physPageSize) - 1) &^ (uint64(physPageSize) - 1)
93+
94+
// Represents where we are now in the heap's contribution to RSS in bytes.
95+
//
96+
// Guaranteed to always be a multiple of physPageSize on systems where
97+
// physPageSize <= pageSize since we map heap_sys at a rate larger than
98+
// any physPageSize and released memory in multiples of the physPageSize.
99+
//
100+
// However, certain functions recategorize heap_sys as other stats (e.g.
101+
// stack_sys) and this happens in multiples of pageSize, so on systems
102+
// where physPageSize > pageSize the calculations below will not be exact.
103+
// Generally this is OK since we'll be off by at most one regular
104+
// physical page.
105+
retainedNow := heapRetained()
106+
107+
// If we're already below our goal, publish the goal in case it changed
108+
// then disable the background scavenger.
109+
if retainedNow <= retainedGoal {
110+
mheap_.scavengeRetainedGoal = retainedGoal
111+
mheap_.scavengeBytesPerNS = 0
112+
return
113+
}
114+
115+
// Now we start to compute the total amount of work necessary and the total
116+
// amount of time we're willing to give the scavenger to complete this work.
117+
// This will involve calculating how much of the work consists of huge pages
118+
// and how much consists of regular pages since the former can let us scavenge
119+
// more memory in the same time.
120+
totalWork := retainedNow - retainedGoal
121+
122+
// On systems without huge page support, all work is regular work.
123+
regularWork := totalWork
124+
hugeTime := uint64(0)
125+
126+
// On systems where we have huge pages, we want to do as much of the
127+
// scavenging work as possible on huge pages, because the costs are the
128+
// same per page, but we can give back more more memory in a shorter
129+
// period of time.
130+
if physHugePageSize != 0 {
131+
// Start by computing the amount of free memory we have in huge pages
132+
// in total. Trivially, this is all the huge page work we need to do.
133+
hugeWork := uint64(mheap_.free.unscavHugePages * physHugePageSize)
134+
135+
// ...but it could turn out that there's more huge work to do than
136+
// total work, so cap it at total work. This might happen for very large
137+
// heaps where the additional factor of retainExtraPercent can make it so
138+
// that there are free chunks of memory larger than a huge page that we don't want
139+
// to scavenge.
140+
if hugeWork >= totalWork {
141+
hugePages := totalWork / uint64(physHugePageSize)
142+
hugeWork = hugePages * uint64(physHugePageSize)
143+
}
144+
// Everything that's not huge work is regular work. At this point we
145+
// know huge work so we can calculate how much time that will take
146+
// based on scavengePageRate (which applies to pages of any size).
147+
regularWork = totalWork - hugeWork
148+
hugeTime = hugeWork / uint64(physHugePageSize) * scavengeHugePagePeriod
149+
}
150+
// Finally, we can compute how much time it'll take to do the regular work
151+
// and the total time to do all the work.
152+
regularTime := regularWork / uint64(physPageSize) * scavengePagePeriod
153+
totalTime := hugeTime + regularTime
154+
155+
now := nanotime()
156+
157+
lock(&scavenge.lock)
158+
159+
// Update all the pacing parameters in mheap with scavenge.lock held,
160+
// so that scavenge.gen is kept in sync with the updated values.
161+
mheap_.scavengeRetainedGoal = retainedGoal
162+
mheap_.scavengeRetainedBasis = retainedNow
163+
mheap_.scavengeTimeBasis = now
164+
mheap_.scavengeBytesPerNS = float64(totalWork) / float64(totalTime)
165+
scavenge.gen++ // increase scavenge generation
166+
167+
// Wake up background scavenger if needed, since the pacing was just updated.
168+
wakeScavengerLocked()
169+
170+
unlock(&scavenge.lock)
171+
}
172+
173+
// State of the background scavenger.
174+
var scavenge struct {
175+
lock mutex
176+
g *g
177+
parked bool
178+
timer *timer
179+
gen uint32 // read with either lock or mheap_.lock, write with both
180+
}
181+
182+
// wakeScavengerLocked unparks the scavenger if necessary. It must be called
183+
// after any pacing update.
184+
//
185+
// scavenge.lock must be held.
186+
func wakeScavengerLocked() {
187+
if scavenge.parked {
188+
// Try to stop the timer but we don't really care if we succeed.
189+
// It's possible that either a timer was never started, or that
190+
// we're racing with it.
191+
// In the case that we're racing with there's the low chance that
192+
// we experience a spurious wake-up of the scavenger, but that's
193+
// totally safe.
194+
stopTimer(scavenge.timer)
195+
196+
// Unpark the goroutine and tell it that there may have been a pacing
197+
// change.
198+
scavenge.parked = false
199+
ready(scavenge.g, 0, true)
200+
}
201+
}
202+
203+
// scavengeSleep attempts to put the scavenger to sleep for ns.
204+
// It also checks to see if gen != scavenge.gen before going to sleep,
205+
// and aborts if true (meaning an update had occurred).
206+
//
207+
// Note that this function should only be called by the scavenger.
208+
//
209+
// The scavenger may be woken up earlier by a pacing change, and it may not go
210+
// to sleep at all if there's a pending pacing change.
211+
//
212+
// Returns false if awoken early (i.e. true means a complete sleep).
213+
func scavengeSleep(gen uint32, ns int64) bool {
214+
lock(&scavenge.lock)
215+
216+
// If there was an update, just abort the sleep.
217+
if scavenge.gen != gen {
218+
unlock(&scavenge.lock)
219+
return false
220+
}
221+
222+
// Set the timer.
223+
now := nanotime()
224+
scavenge.timer.when = now + ns
225+
startTimer(scavenge.timer)
226+
227+
// Park the goroutine. It's fine that we don't publish the
228+
// fact that the timer was set; even if the timer wakes up
229+
// and fire scavengeReady before we park, it'll block on
230+
// scavenge.lock.
231+
scavenge.parked = true
232+
goparkunlock(&scavenge.lock, waitReasonSleep, traceEvGoSleep, 2)
233+
234+
// Return true if we completed the full sleep.
235+
return (nanotime() - now) >= ns
236+
}
237+
238+
// Background scavenger.
239+
//
240+
// The background scavenger maintains the RSS of the application below
241+
// the line described by the proportional scavenging statistics in
242+
// the mheap struct.
243+
func bgscavenge(c chan int) {
244+
scavenge.g = getg()
245+
246+
lock(&scavenge.lock)
247+
scavenge.parked = true
248+
249+
scavenge.timer = new(timer)
250+
scavenge.timer.f = func(_ interface{}, _ uintptr) {
251+
lock(&scavenge.lock)
252+
wakeScavengerLocked()
253+
unlock(&scavenge.lock)
254+
}
255+
256+
c <- 1
257+
goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1)
258+
259+
// Parameters for sleeping.
260+
//
261+
// If we end up doing more work than we need, we should avoid spinning
262+
// until we have more work to do: instead, we know exactly how much time
263+
// until more work will need to be done, so we sleep.
264+
//
265+
// We should avoid sleeping for less than minSleepNS because Gosched()
266+
// overheads among other things will work out better in that case.
267+
//
268+
// There's no reason to set a maximum on sleep time because we'll always
269+
// get woken up earlier if there's any kind of update that could change
270+
// the scavenger's pacing.
271+
//
272+
// retryDelayNS tracks how much to sleep next time we fail to do any
273+
// useful work.
274+
const minSleepNS = int64(100 * 1000) // 100 µs
275+
276+
retryDelayNS := minSleepNS
277+
278+
for {
279+
released := uintptr(0)
280+
park := false
281+
ttnext := int64(0)
282+
gen := uint32(0)
283+
284+
// Run on the system stack since we grab the heap lock,
285+
// and a stack growth with the heap lock means a deadlock.
286+
systemstack(func() {
287+
lock(&mheap_.lock)
288+
289+
gen = scavenge.gen
290+
291+
// If background scavenging is disabled or if there's no work to do just park.
292+
retained := heapRetained()
293+
if mheap_.scavengeBytesPerNS == 0 || retained <= mheap_.scavengeRetainedGoal {
294+
unlock(&mheap_.lock)
295+
park = true
296+
return
297+
}
298+
299+
// Calculate how big we want the retained heap to be
300+
// at this point in time.
301+
//
302+
// The formula is for that of a line, y = b - mx
303+
// We want y (want),
304+
// m = scavengeBytesPerNS (> 0)
305+
// x = time between scavengeTimeBasis and now
306+
// b = scavengeRetainedBasis
307+
rate := mheap_.scavengeBytesPerNS
308+
tdist := nanotime() - mheap_.scavengeTimeBasis
309+
rdist := uint64(rate * float64(tdist))
310+
want := mheap_.scavengeRetainedBasis - rdist
311+
312+
// If we're above the line, scavenge to get below the
313+
// line.
314+
if retained > want {
315+
released = mheap_.scavengeLocked(uintptr(retained - want))
316+
}
317+
unlock(&mheap_.lock)
318+
319+
// If we over-scavenged a bit, calculate how much time it'll
320+
// take at the current rate for us to make that up. We definitely
321+
// won't have any work to do until at least that amount of time
322+
// passes.
323+
if released > uintptr(retained-want) {
324+
extra := released - uintptr(retained-want)
325+
ttnext = int64(float64(extra) / rate)
326+
}
327+
})
328+
329+
if park {
330+
lock(&scavenge.lock)
331+
scavenge.parked = true
332+
goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1)
333+
continue
334+
}
335+
336+
if debug.gctrace > 0 {
337+
if released > 0 {
338+
print("scvg: ", released>>20, " MB released\n")
339+
}
340+
print("scvg: inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n")
341+
}
342+
343+
if released == 0 {
344+
// If we were unable to release anything this may be because there's
345+
// no free memory available to scavenge. Go to sleep and try again.
346+
if scavengeSleep(gen, retryDelayNS) {
347+
// If we successfully slept through the delay, back off exponentially.
348+
retryDelayNS *= 2
349+
}
350+
continue
351+
}
352+
retryDelayNS = minSleepNS
353+
354+
if ttnext > 0 && ttnext > minSleepNS {
355+
// If there's an appreciable amount of time until the next scavenging
356+
// goal, just sleep. We'll get woken up if anything changes and this
357+
// way we avoid spinning.
358+
scavengeSleep(gen, ttnext)
359+
continue
360+
}
361+
362+
// Give something else a chance to run, no locks are held.
363+
Gosched()
364+
}
365+
}

0 commit comments

Comments
 (0)