Skip to content

Commit a2cd2bd

Browse files
committed
runtime: add per-p page allocation cache
This change adds a per-p free page cache which the page allocator may allocate out of without a lock. The change also introduces a completely lockless page allocator fast path. Although the cache contains at most 64 pages (and usually less), the vast majority (85%+) of page allocations are exactly 1 page in size. Updates #35112. Change-Id: I170bf0a9375873e7e3230845eb1df7e5cf741b78 Reviewed-on: https://go-review.googlesource.com/c/go/+/195701 Run-TryBot: Michael Knyszek <[email protected]> Reviewed-by: Austin Clements <[email protected]>
1 parent 81640ea commit a2cd2bd

File tree

5 files changed

+86
-20
lines changed

5 files changed

+86
-20
lines changed

src/runtime/export_test.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
package runtime
88

99
import (
10+
"math/bits"
1011
"runtime/internal/atomic"
1112
"runtime/internal/sys"
1213
"unsafe"
@@ -358,6 +359,10 @@ func ReadMemStatsSlow() (base, slow MemStats) {
358359
pg := mheap_.pages.chunks[i].scavenged.popcntRange(0, pallocChunkPages)
359360
slow.HeapReleased += uint64(pg) * pageSize
360361
}
362+
for _, p := range allp {
363+
pg := bits.OnesCount64(p.pcache.scav)
364+
slow.HeapReleased += uint64(pg) * pageSize
365+
}
361366

362367
// Unused space in the current arena also counts as released space.
363368
slow.HeapReleased += uint64(mheap_.curArena.end - mheap_.curArena.base)
@@ -879,3 +884,20 @@ func CheckScavengedBitsCleared(mismatches []BitsMismatch) (n int, ok bool) {
879884
})
880885
return
881886
}
887+
888+
func PageCachePagesLeaked() (leaked uintptr) {
889+
stopTheWorld("PageCachePagesLeaked")
890+
891+
// Walk over destroyed Ps and look for unflushed caches.
892+
deadp := allp[len(allp):cap(allp)]
893+
for _, p := range deadp {
894+
// Since we're going past len(allp) we may see nil Ps.
895+
// Just ignore them.
896+
if p != nil {
897+
leaked += uintptr(bits.OnesCount64(p.pcache.cache))
898+
}
899+
}
900+
901+
startTheWorld()
902+
return
903+
}

src/runtime/malloc_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,14 @@ func TestTinyAlloc(t *testing.T) {
168168
}
169169
}
170170

171+
func TestPageCacheLeak(t *testing.T) {
172+
defer GOMAXPROCS(GOMAXPROCS(1))
173+
leaked := PageCachePagesLeaked()
174+
if leaked != 0 {
175+
t.Fatalf("found %d leaked pages in page caches", leaked)
176+
}
177+
}
178+
171179
func TestPhysicalMemoryUtilization(t *testing.T) {
172180
got := runTestProg(t, "testprog", "GCPhys")
173181
want := "OK\n"

src/runtime/mheap.go

Lines changed: 53 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1073,28 +1073,60 @@ func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysS
10731073
gp := getg()
10741074
base, scav := uintptr(0), uintptr(0)
10751075

1076-
// Try to allocate a cached span.
1077-
s = h.tryAllocMSpan()
1076+
// If the allocation is small enough, try the page cache!
1077+
pp := gp.m.p.ptr()
1078+
if pp != nil && npages < pageCachePages/4 {
1079+
c := &pp.pcache
10781080

1079-
// We failed to do what we need to do without the lock.
1080-
lock(&h.lock)
1081+
// If the cache is empty, refill it.
1082+
if c.empty() {
1083+
lock(&h.lock)
1084+
*c = h.pages.allocToCache()
1085+
unlock(&h.lock)
1086+
}
10811087

1082-
// Try to acquire a base address.
1083-
base, scav = h.pages.alloc(npages)
1084-
if base != 0 {
1085-
goto HaveBase
1086-
}
1087-
if !h.grow(npages) {
1088-
unlock(&h.lock)
1089-
return nil
1090-
}
1091-
base, scav = h.pages.alloc(npages)
1092-
if base != 0 {
1093-
goto HaveBase
1088+
// Try to allocate from the cache.
1089+
base, scav = c.alloc(npages)
1090+
if base != 0 {
1091+
s = h.tryAllocMSpan()
1092+
1093+
if s != nil && gcBlackenEnabled == 0 && (manual || spanclass.sizeclass() != 0) {
1094+
goto HaveSpan
1095+
}
1096+
// We're either running duing GC, failed to acquire a mspan,
1097+
// or the allocation is for a large object. This means we
1098+
// have to lock the heap and do a bunch of extra work,
1099+
// so go down the HaveBaseLocked path.
1100+
//
1101+
// We must do this during GC to avoid skew with heap_scan
1102+
// since we flush mcache stats whenever we lock.
1103+
//
1104+
// TODO(mknyszek): It would be nice to not have to
1105+
// lock the heap if it's a large allocation, but
1106+
// it's fine for now. The critical section here is
1107+
// short and large object allocations are relatively
1108+
// infrequent.
1109+
}
10941110
}
1095-
throw("grew heap, but no adequate free space found")
10961111

1097-
HaveBase:
1112+
// For one reason or another, we couldn't get the
1113+
// whole job done without the heap lock.
1114+
lock(&h.lock)
1115+
1116+
if base == 0 {
1117+
// Try to acquire a base address.
1118+
base, scav = h.pages.alloc(npages)
1119+
if base == 0 {
1120+
if !h.grow(npages) {
1121+
unlock(&h.lock)
1122+
return nil
1123+
}
1124+
base, scav = h.pages.alloc(npages)
1125+
if base == 0 {
1126+
throw("grew heap, but no adequate free space found")
1127+
}
1128+
}
1129+
}
10981130
if s == nil {
10991131
// We failed to get an mspan earlier, so grab
11001132
// one now that we have the heap lock.
@@ -1124,7 +1156,9 @@ HaveBase:
11241156
}
11251157
unlock(&h.lock)
11261158

1127-
// Initialize the span.
1159+
HaveSpan:
1160+
// At this point, both s != nil and base != 0, and the heap
1161+
// lock is no longer held. Initialize the span.
11281162
s.init(base, npages)
11291163
if h.allocNeedsZero(base, npages) {
11301164
s.needzero = 1

src/runtime/proc.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4088,6 +4088,7 @@ func (pp *p) destroy() {
40884088
mheap_.spanalloc.free(unsafe.Pointer(pp.mspancache.buf[i]))
40894089
}
40904090
pp.mspancache.len = 0
4091+
pp.pcache.flush(&mheap_.pages)
40914092
})
40924093
freemcache(pp.mcache)
40934094
pp.mcache = nil

src/runtime/runtime2.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,7 @@ type p struct {
555555
sysmontick sysmontick // last tick observed by sysmon
556556
m muintptr // back-link to associated m (nil if idle)
557557
mcache *mcache
558+
pcache pageCache
558559
raceprocctx uintptr
559560

560561
deferpool [5][]*_defer // pool of available defer structs of different sizes (see panic.go)
@@ -611,7 +612,7 @@ type p struct {
611612

612613
palloc persistentAlloc // per-P to avoid mutex
613614

614-
// _ uint32 // Alignment for atomic fields below
615+
_ uint32 // Alignment for atomic fields below
615616

616617
// Per-P GC state
617618
gcAssistTime int64 // Nanoseconds in assistAlloc

0 commit comments

Comments
 (0)