Skip to content

Commit 1b1fbb3

Browse files
committed
runtime: use inUse ranges to map in summary memory only as needed
Prior to this change, if the heap was very discontiguous (such as in TestArenaCollision) it's possible we could map a large amount of memory as R/W and commit it. We would use only the start and end to track what should be mapped, and we would extend that mapping as needed to accomodate a potentially fragmented address space. After this change, we only map exactly the part of the summary arrays that we need by using the inUse ranges from the previous change. This reduces the GCSys footprint of TestArenaCollision from 300 MiB to 18 MiB. Because summaries are no longer mapped contiguously, this means the scavenger can no longer iterate directly. This change also updates the scavenger to borrow ranges out of inUse and iterate over only the parts of the heap which are actually currently in use. This is both an optimization and necessary for correctness. Fixes #35514. Change-Id: I96bf0c73ed0d2d89a00202ece7b9d089a53bac90 Reviewed-on: https://go-review.googlesource.com/c/go/+/207758 Run-TryBot: Michael Knyszek <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Austin Clements <[email protected]>
1 parent 6f2b834 commit 1b1fbb3

File tree

6 files changed

+196
-129
lines changed

6 files changed

+196
-129
lines changed

src/runtime/export_test.go

+1
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,7 @@ func RunGetgThreadSwitchTest() {
578578
const (
579579
PageSize = pageSize
580580
PallocChunkPages = pallocChunkPages
581+
PageAlloc64Bit = pageAlloc64Bit
581582
)
582583

583584
// Expose pallocSum for testing.

src/runtime/mgcscavenge.go

+84-47
Original file line numberDiff line numberDiff line change
@@ -405,15 +405,14 @@ func (s *pageAlloc) scavengeOne(max uintptr, locked bool) uintptr {
405405
}
406406

407407
lockHeap()
408-
top := chunkIndex(s.scavAddr)
409-
if top < s.start {
408+
ci := chunkIndex(s.scavAddr)
409+
if ci < s.start {
410410
unlockHeap()
411411
return 0
412412
}
413413

414414
// Check the chunk containing the scav addr, starting at the addr
415415
// and see if there are any free and unscavenged pages.
416-
ci := chunkIndex(s.scavAddr)
417416
if s.summary[len(s.summary)-1][ci].max() >= uint(minPages) {
418417
// We only bother looking for a candidate if there at least
419418
// minPages free pages at all. It's important that we only
@@ -429,59 +428,97 @@ func (s *pageAlloc) scavengeOne(max uintptr, locked bool) uintptr {
429428
return uintptr(npages) * pageSize
430429
}
431430
}
432-
unlockHeap()
433431

434-
// Slow path: iterate optimistically looking for any free and unscavenged page.
435-
// If we think we see something, stop and verify it!
436-
for i := top - 1; i >= s.start; i-- {
437-
// If this chunk is totally in-use or has no unscavenged pages, don't bother
438-
// doing a more sophisticated check.
439-
//
440-
// Note we're accessing the summary and the chunks without a lock, but
441-
// that's fine. We're being optimistic anyway.
442-
443-
// Check if there are enough free pages at all. It's imperative that we
444-
// check this before the chunk itself so that we quickly skip over
445-
// unused parts of the address space, which may have a cleared bitmap
446-
// but a zero'd summary which indicates not to allocate from there.
447-
if s.summary[len(s.summary)-1][i].max() < uint(minPages) {
448-
continue
432+
// getInUseRange returns the highest range in the
433+
// intersection of [0, addr] and s.inUse.
434+
//
435+
// s.mheapLock must be held.
436+
getInUseRange := func(addr uintptr) addrRange {
437+
top := s.inUse.findSucc(addr)
438+
if top == 0 {
439+
return addrRange{}
440+
}
441+
r := s.inUse.ranges[top-1]
442+
// addr is inclusive, so treat it as such when
443+
// updating the limit, which is exclusive.
444+
if r.limit > addr+1 {
445+
r.limit = addr + 1
449446
}
447+
return r
448+
}
450449

451-
// Run over the chunk looking harder for a candidate. Again, we could
452-
// race with a lot of different pieces of code, but we're just being
453-
// optimistic. Make sure we load the l2 pointer atomically though, to
454-
// avoid races with heap growth. It may or may not be possible to also
455-
// see a nil pointer in this case if we do race with heap growth, but
456-
// just defensively ignore the nils. This operation is optimistic anyway.
457-
l2 := (*[1 << pallocChunksL2Bits]pallocData)(atomic.Loadp(unsafe.Pointer(&s.chunks[i.l1()])))
458-
if l2 == nil || !l2[i.l2()].hasScavengeCandidate(minPages) {
459-
continue
450+
// Slow path: iterate optimistically over the in-use address space
451+
// looking for any free and unscavenged page. If we think we see something,
452+
// lock and verify it!
453+
//
454+
// We iterate over the address space by taking ranges from inUse.
455+
newRange:
456+
for {
457+
r := getInUseRange(s.scavAddr)
458+
if r.size() == 0 {
459+
break
460460
}
461+
unlockHeap()
461462

462-
// We found a candidate, so let's lock and verify it.
463-
lockHeap()
463+
// Iterate over all of the chunks described by r.
464+
// Note that r.limit is the exclusive upper bound, but what
465+
// we want is the top chunk instead, inclusive, so subtract 1.
466+
bot, top := chunkIndex(r.base), chunkIndex(r.limit-1)
467+
for i := top; i >= bot; i-- {
468+
// If this chunk is totally in-use or has no unscavenged pages, don't bother
469+
// doing a more sophisticated check.
470+
//
471+
// Note we're accessing the summary and the chunks without a lock, but
472+
// that's fine. We're being optimistic anyway.
473+
474+
// Check quickly if there are enough free pages at all.
475+
if s.summary[len(s.summary)-1][i].max() < uint(minPages) {
476+
continue
477+
}
464478

465-
// Find, verify, and scavenge if we can.
466-
chunk := s.chunkOf(i)
467-
base, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, maxPages)
468-
if npages > 0 {
469-
// We found memory to scavenge! Mark the bits and report that up.
470-
s.scavengeRangeLocked(i, base, npages)
471-
unlockHeap()
472-
return uintptr(npages) * pageSize
479+
// Run over the chunk looking harder for a candidate. Again, we could
480+
// race with a lot of different pieces of code, but we're just being
481+
// optimistic. Make sure we load the l2 pointer atomically though, to
482+
// avoid races with heap growth. It may or may not be possible to also
483+
// see a nil pointer in this case if we do race with heap growth, but
484+
// just defensively ignore the nils. This operation is optimistic anyway.
485+
l2 := (*[1 << pallocChunksL2Bits]pallocData)(atomic.Loadp(unsafe.Pointer(&s.chunks[i.l1()])))
486+
if l2 == nil || !l2[i.l2()].hasScavengeCandidate(minPages) {
487+
continue
488+
}
489+
490+
// We found a candidate, so let's lock and verify it.
491+
lockHeap()
492+
493+
// Find, verify, and scavenge if we can.
494+
chunk := s.chunkOf(i)
495+
base, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, maxPages)
496+
if npages > 0 {
497+
// We found memory to scavenge! Mark the bits and report that up.
498+
// scavengeRangeLocked will update scavAddr for us, also.
499+
s.scavengeRangeLocked(i, base, npages)
500+
unlockHeap()
501+
return uintptr(npages) * pageSize
502+
}
503+
504+
// We were fooled, let's take this opportunity to move the scavAddr
505+
// all the way down to where we searched as scavenged for future calls
506+
// and keep iterating. Then, go get a new range.
507+
s.scavAddr = chunkBase(i-1) + pallocChunkPages*pageSize - 1
508+
continue newRange
473509
}
510+
lockHeap()
474511

475-
// We were fooled, let's take this opportunity to move the scavAddr
476-
// all the way down to where we searched as scavenged for future calls
477-
// and keep iterating.
478-
s.scavAddr = chunkBase(i-1) + pallocChunkPages*pageSize - 1
479-
unlockHeap()
512+
// Move the scavenger down the heap, past everything we just searched.
513+
// Since we don't check if scavAddr moved while twe let go of the heap lock,
514+
// it's possible that it moved down and we're moving it up here. This
515+
// raciness could result in us searching parts of the heap unnecessarily.
516+
// TODO(mknyszek): Remove this racy behavior through explicit address
517+
// space reservations, which are difficult to do with just scavAddr.
518+
s.scavAddr = r.base - 1
480519
}
481-
482-
lockHeap()
483-
// We couldn't find anything, so signal that there's nothing left
484-
// to scavenge.
520+
// We reached the end of the in-use address space and couldn't find anything,
521+
// so signal that there's nothing left to scavenge.
485522
s.scavAddr = minScavAddr
486523
unlockHeap()
487524

src/runtime/mpagealloc.go

+7-53
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,10 @@ type pageAlloc struct {
182182
// runtime segmentation fault, we get a much friendlier out-of-bounds
183183
// error.
184184
//
185+
// To iterate over a summary level, use inUse to determine which ranges
186+
// are currently available. Otherwise one might try to access
187+
// memory which is only Reserved which may result in a hard fault.
188+
//
185189
// We may still get segmentation faults < len since some of that
186190
// memory may not be committed yet.
187191
summary [summaryLevels][]pallocSum
@@ -212,12 +216,9 @@ type pageAlloc struct {
212216
// making the impact on BSS too high (note the L1 is stored directly
213217
// in pageAlloc).
214218
//
215-
// summary[len(s.summary)-1][i] should always be checked, at least
216-
// for a zero max value, before accessing chunks[i]. It's possible the
217-
// bitmap at that index is mapped in and zeroed, indicating that it
218-
// contains free space, but in actuality it is unused since its
219-
// corresponding summary was never updated. Tests may ignore this
220-
// and assume the zero value (and that it is mapped).
219+
// To iterate over the bitmap, use inUse to determine which ranges
220+
// are currently available. Otherwise one might iterate over unused
221+
// ranges.
221222
//
222223
// TODO(mknyszek): Consider changing the definition of the bitmap
223224
// such that 1 means free and 0 means in-use so that summaries and
@@ -297,53 +298,6 @@ func (s *pageAlloc) init(mheapLock *mutex, sysStat *uint64) {
297298
s.mheapLock = mheapLock
298299
}
299300

300-
// extendMappedRegion ensures that all the memory in the range
301-
// [base+nbase, base+nlimit) is in the Ready state.
302-
// base must refer to the beginning of a memory region in the
303-
// Reserved state. extendMappedRegion assumes that the region
304-
// [base+mbase, base+mlimit) is already mapped.
305-
//
306-
// Note that extendMappedRegion only supports extending
307-
// mappings in one direction. Therefore,
308-
// nbase < mbase && nlimit > mlimit is an invalid input
309-
// and this function will throw.
310-
func extendMappedRegion(base unsafe.Pointer, mbase, mlimit, nbase, nlimit uintptr, sysStat *uint64) {
311-
if uintptr(base)%physPageSize != 0 {
312-
print("runtime: base = ", base, "\n")
313-
throw("extendMappedRegion: base not page-aligned")
314-
}
315-
// Round the offsets to a physical page.
316-
mbase = alignDown(mbase, physPageSize)
317-
nbase = alignDown(nbase, physPageSize)
318-
mlimit = alignUp(mlimit, physPageSize)
319-
nlimit = alignUp(nlimit, physPageSize)
320-
321-
// If none of the region is mapped, don't bother
322-
// trying to figure out which parts are.
323-
if mlimit-mbase != 0 {
324-
// Determine which part of the region actually needs
325-
// mapping.
326-
if nbase < mbase && nlimit > mlimit {
327-
// TODO(mknyszek): Consider supporting this case. It can't
328-
// ever happen currently in the page allocator, but may be
329-
// useful in the future. Also, it would make this function's
330-
// purpose simpler to explain.
331-
throw("mapped region extended in two directions")
332-
} else if nbase < mbase && nlimit <= mlimit {
333-
nlimit = mbase
334-
} else if nbase >= mbase && nlimit > mlimit {
335-
nbase = mlimit
336-
} else {
337-
return
338-
}
339-
}
340-
341-
// Transition from Reserved to Ready.
342-
rbase := add(base, nbase)
343-
sysMap(rbase, nlimit-nbase, sysStat)
344-
sysUsed(rbase, nlimit-nbase)
345-
}
346-
347301
// compareSearchAddrTo compares an address against s.searchAddr in a linearized
348302
// view of the address space on systems with discontinuous process address spaces.
349303
// This linearized view is the same one generated by chunkIndex and arenaIndex,

src/runtime/mpagealloc_64bit.go

+64-27
Original file line numberDiff line numberDiff line change
@@ -102,42 +102,79 @@ func (s *pageAlloc) sysGrow(base, limit uintptr) {
102102
throw("sysGrow bounds not aligned to pallocChunkBytes")
103103
}
104104

105+
// addrRangeToSummaryRange converts a range of addresses into a range
106+
// of summary indices which must be mapped to support those addresses
107+
// in the summary range.
108+
addrRangeToSummaryRange := func(level int, r addrRange) (int, int) {
109+
sumIdxBase, sumIdxLimit := addrsToSummaryRange(level, r.base, r.limit)
110+
return blockAlignSummaryRange(level, sumIdxBase, sumIdxLimit)
111+
}
112+
113+
// summaryRangeToSumAddrRange converts a range of indices in any
114+
// level of s.summary into page-aligned addresses which cover that
115+
// range of indices.
116+
summaryRangeToSumAddrRange := func(level, sumIdxBase, sumIdxLimit int) addrRange {
117+
baseOffset := alignDown(uintptr(sumIdxBase)*pallocSumBytes, physPageSize)
118+
limitOffset := alignUp(uintptr(sumIdxLimit)*pallocSumBytes, physPageSize)
119+
base := unsafe.Pointer(&s.summary[level][0])
120+
return addrRange{
121+
uintptr(add(base, baseOffset)),
122+
uintptr(add(base, limitOffset)),
123+
}
124+
}
125+
126+
// addrRangeToSumAddrRange is a convienience function that converts
127+
// an address range r to the address range of the given summary level
128+
// that stores the summaries for r.
129+
addrRangeToSumAddrRange := func(level int, r addrRange) addrRange {
130+
sumIdxBase, sumIdxLimit := addrRangeToSummaryRange(level, r)
131+
return summaryRangeToSumAddrRange(level, sumIdxBase, sumIdxLimit)
132+
}
133+
134+
// Find the first inUse index which is strictly greater than base.
135+
//
136+
// Because this function will never be asked remap the same memory
137+
// twice, this index is effectively the index at which we would insert
138+
// this new growth, and base will never overlap/be contained within
139+
// any existing range.
140+
//
141+
// This will be used to look at what memory in the summary array is already
142+
// mapped before and after this new range.
143+
inUseIndex := s.inUse.findSucc(base)
144+
105145
// Walk up the radix tree and map summaries in as needed.
106-
cbase, climit := chunkBase(s.start), chunkBase(s.end)
107-
for l := len(s.summary) - 1; l >= 0; l-- {
146+
for l := range s.summary {
108147
// Figure out what part of the summary array this new address space needs.
109-
// Note that we need to align the ranges to the block width (1<<levelBits[l])
110-
// at this level because the full block is needed to compute the summary for
111-
// the next level.
112-
lo, hi := addrsToSummaryRange(l, base, limit)
113-
lo, hi = blockAlignSummaryRange(l, lo, hi)
148+
needIdxBase, needIdxLimit := addrRangeToSummaryRange(l, addrRange{base, limit})
114149

115150
// Update the summary slices with a new upper-bound. This ensures
116151
// we get tight bounds checks on at least the top bound.
117152
//
118-
// We must do this regardless of whether we map new memory, because we
119-
// may be extending further into the mapped memory.
120-
if hi > len(s.summary[l]) {
121-
s.summary[l] = s.summary[l][:hi]
153+
// We must do this regardless of whether we map new memory.
154+
if needIdxLimit > len(s.summary[l]) {
155+
s.summary[l] = s.summary[l][:needIdxLimit]
122156
}
123157

124-
// Figure out what part of the summary array is already mapped.
125-
// If we're doing our first growth, just pass zero.
126-
// addrsToSummaryRange won't accept cbase == climit.
127-
var mlo, mhi int
128-
if s.start != 0 {
129-
mlo, mhi = addrsToSummaryRange(l, cbase, climit)
130-
mlo, mhi = blockAlignSummaryRange(l, mlo, mhi)
158+
// Compute the needed address range in the summary array for level l.
159+
need := summaryRangeToSumAddrRange(l, needIdxBase, needIdxLimit)
160+
161+
// Prune need down to what needs to be newly mapped. Some parts of it may
162+
// already be mapped by what inUse describes due to page alignment requirements
163+
// for mapping. prune's invariants are guaranteed by the fact that this
164+
// function will never be asked to remap the same memory twice.
165+
if inUseIndex > 0 {
166+
need = need.subtract(addrRangeToSumAddrRange(l, s.inUse.ranges[inUseIndex-1]))
167+
}
168+
if inUseIndex < len(s.inUse.ranges) {
169+
need = need.subtract(addrRangeToSumAddrRange(l, s.inUse.ranges[inUseIndex]))
170+
}
171+
// It's possible that after our pruning above, there's nothing new to map.
172+
if need.size() == 0 {
173+
continue
131174
}
132175

133-
// Extend the mappings for this summary level.
134-
extendMappedRegion(
135-
unsafe.Pointer(&s.summary[l][0]),
136-
uintptr(mlo)*pallocSumBytes,
137-
uintptr(mhi)*pallocSumBytes,
138-
uintptr(lo)*pallocSumBytes,
139-
uintptr(hi)*pallocSumBytes,
140-
s.sysStat,
141-
)
176+
// Map and commit need.
177+
sysMap(unsafe.Pointer(need.base), need.size(), s.sysStat)
178+
sysUsed(unsafe.Pointer(need.base), need.size())
142179
}
143180
}

src/runtime/mpagealloc_test.go

+15-2
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,11 @@ func checkPageAlloc(t *testing.T, want, got *PageAlloc) {
4141
}
4242

4343
func TestPageAllocGrow(t *testing.T) {
44-
tests := map[string]struct {
44+
type test struct {
4545
chunks []ChunkIdx
4646
inUse []AddrRange
47-
}{
47+
}
48+
tests := map[string]test{
4849
"One": {
4950
chunks: []ChunkIdx{
5051
BaseChunkIdx,
@@ -112,6 +113,18 @@ func TestPageAllocGrow(t *testing.T) {
112113
},
113114
},
114115
}
116+
if PageAlloc64Bit != 0 {
117+
tests["ExtremelyDiscontiguous"] = test{
118+
chunks: []ChunkIdx{
119+
BaseChunkIdx,
120+
BaseChunkIdx + 0x100000, // constant translates to O(TiB)
121+
},
122+
inUse: []AddrRange{
123+
{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)},
124+
{PageBase(BaseChunkIdx+0x100000, 0), PageBase(BaseChunkIdx+0x100001, 0)},
125+
},
126+
}
127+
}
115128
for name, v := range tests {
116129
v := v
117130
t.Run(name, func(t *testing.T) {

0 commit comments

Comments
 (0)