Skip to content

Commit f449452

Browse files
nsrip-ddgopherbot
authored andcommitted
runtime: use frame pointer unwinding for block and mutex profilers
Use frame pointer unwinding, where supported, to collect call stacks for the block, and mutex profilers. This method of collecting call stacks is typically an order of magnitude faster than callers/tracebackPCs. The marginal benefit for these profile types is likely small compared to using frame pointer unwinding for the execution tracer. However, the block profiler can have noticeable overhead unless the sampling rate is very high. Additionally, using frame pointer unwinding in more places helps ensure more testing/support, which benefits systems like the execution tracer which rely on frame pointer unwinding to be practical to use. Change-Id: I4b36c90cd2df844645fd275a41b247352d635727 Reviewed-on: https://go-review.googlesource.com/c/go/+/533258 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Cherry Mui <[email protected]> Auto-Submit: Cherry Mui <[email protected]> Reviewed-by: Michael Pratt <[email protected]>
1 parent b5bfb5a commit f449452

File tree

1 file changed

+43
-12
lines changed

1 file changed

+43
-12
lines changed

src/runtime/mprof.go

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,10 @@ const (
4343
// Note that it's only used internally as a guard against
4444
// wildly out-of-bounds slicing of the PCs that come after
4545
// a bucket struct, and it could increase in the future.
46-
maxStack = 32
46+
// The "+ 1" is to account for the first stack entry being
47+
// taken up by a "skip" sentinel value for profilers which
48+
// defer inline frame expansion until the profile is reported.
49+
maxStack = 32 + 1
4750
)
4851

4952
type bucketType int
@@ -502,14 +505,40 @@ func blocksampled(cycles, rate int64) bool {
502505
return true
503506
}
504507

508+
// saveblockevent records a profile event of the type specified by which.
509+
// cycles is the quantity associated with this event and rate is the sampling rate,
510+
// used to adjust the cycles value in the manner determined by the profile type.
511+
// skip is the number of frames to omit from the traceback associated with the event.
512+
// The traceback will be recorded from the stack of the goroutine associated with the current m.
513+
// skip should be positive if this event is recorded from the current stack
514+
// (e.g. when this is not called from a system stack)
505515
func saveblockevent(cycles, rate int64, skip int, which bucketType) {
506-
var nstk int
507516
gp := getg()
508517
mp := acquirem() // we must not be preempted while accessing profstack
509-
if gp.m.curg == nil || gp.m.curg == gp {
510-
nstk = callers(skip, mp.profStack)
518+
nstk := 1
519+
if tracefpunwindoff() || gp.m.hasCgoOnStack() {
520+
mp.profStack[0] = logicalStackSentinel
521+
if gp.m.curg == nil || gp.m.curg == gp {
522+
nstk = callers(skip, mp.profStack[1:])
523+
} else {
524+
nstk = gcallers(gp.m.curg, skip, mp.profStack[1:])
525+
}
511526
} else {
512-
nstk = gcallers(gp.m.curg, skip, mp.profStack)
527+
mp.profStack[0] = uintptr(skip)
528+
if gp.m.curg == nil || gp.m.curg == gp {
529+
if skip > 0 {
530+
// We skip one fewer frame than the provided value for frame
531+
// pointer unwinding because the skip value includes the current
532+
// frame, whereas the saved frame pointer will give us the
533+
// caller's return address first (so, not including
534+
// saveblockevent)
535+
mp.profStack[0] -= 1
536+
}
537+
nstk += fpTracebackPCs(unsafe.Pointer(getfp()), mp.profStack[1:])
538+
} else {
539+
mp.profStack[1] = gp.m.curg.sched.pc
540+
nstk += 1 + fpTracebackPCs(unsafe.Pointer(gp.m.curg.sched.bp), mp.profStack[2:])
541+
}
513542
}
514543

515544
saveBlockEventStack(cycles, rate, mp.profStack[:nstk], which)
@@ -689,9 +718,10 @@ func (prof *mLockProfile) captureStack() {
689718
}
690719
prof.pending = 0
691720

721+
prof.stack[0] = logicalStackSentinel
692722
if debug.runtimeContentionStacks.Load() == 0 {
693-
prof.stack[0] = abi.FuncPCABIInternal(_LostContendedRuntimeLock) + sys.PCQuantum
694-
prof.stack[1] = 0
723+
prof.stack[1] = abi.FuncPCABIInternal(_LostContendedRuntimeLock) + sys.PCQuantum
724+
prof.stack[2] = 0
695725
return
696726
}
697727

@@ -702,7 +732,7 @@ func (prof *mLockProfile) captureStack() {
702732
systemstack(func() {
703733
var u unwinder
704734
u.initAt(pc, sp, 0, gp, unwindSilentErrors|unwindJumpStack)
705-
nstk = tracebackPCs(&u, skip, prof.stack)
735+
nstk = 1 + tracebackPCs(&u, skip, prof.stack[1:])
706736
})
707737
if nstk < len(prof.stack) {
708738
prof.stack[nstk] = 0
@@ -732,6 +762,7 @@ func (prof *mLockProfile) store() {
732762
saveBlockEventStack(cycles, rate, prof.stack[:nstk], mutexProfile)
733763
if lost > 0 {
734764
lostStk := [...]uintptr{
765+
logicalStackSentinel,
735766
abi.FuncPCABIInternal(_LostContendedRuntimeLock) + sys.PCQuantum,
736767
}
737768
saveBlockEventStack(lost, rate, lostStk[:], mutexProfile)
@@ -952,8 +983,8 @@ func record(r *MemProfileRecord, b *bucket) {
952983
if asanenabled {
953984
asanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0))
954985
}
955-
copy(r.Stack0[:], b.stk())
956-
clear(r.Stack0[b.nstk:])
986+
i := copy(r.Stack0[:], b.stk())
987+
clear(r.Stack0[i:])
957988
}
958989

959990
func iterate_memprof(fn func(*bucket, uintptr, *uintptr, uintptr, uintptr, uintptr)) {
@@ -1008,7 +1039,7 @@ func BlockProfile(p []BlockProfileRecord) (n int, ok bool) {
10081039
if asanenabled {
10091040
asanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0))
10101041
}
1011-
i := copy(r.Stack0[:], b.stk())
1042+
i := fpunwindExpand(r.Stack0[:], b.stk())
10121043
clear(r.Stack0[i:])
10131044
p = p[1:]
10141045
}
@@ -1036,7 +1067,7 @@ func MutexProfile(p []BlockProfileRecord) (n int, ok bool) {
10361067
r := &p[0]
10371068
r.Count = int64(bp.count)
10381069
r.Cycles = bp.cycles
1039-
i := copy(r.Stack0[:], b.stk())
1070+
i := fpunwindExpand(r.Stack0[:], b.stk())
10401071
clear(r.Stack0[i:])
10411072
p = p[1:]
10421073
}

0 commit comments

Comments
 (0)