Skip to content

Commit 7398169

Browse files
committed
runtime: push down systemstack requirement for tracer where possible
Currently lots of functions require systemstack because the trace buffer might get flushed, but that will already switch to the systemstack for the most critical bits (grabbing trace.lock). That means a lot of this code is non-preemptible when it doesn't need to be. We've seen this cause problems at scale, when dumping very large numbers of stacks at once, for example. This is a re-land of CL 572095 which was reverted in CL 577376. This re-land includes a fix of the test that broke on the longtest builders. Change-Id: Ia8d7cbe3aaa8398cf4a1818bac66c3415a399348 Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest Reviewed-on: https://go-review.googlesource.com/c/go/+/577377 Reviewed-by: David Chase <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Carlos Amedee <[email protected]>
1 parent 236fe24 commit 7398169

File tree

5 files changed

+42
-42
lines changed

5 files changed

+42
-42
lines changed

src/runtime/crash_test.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -909,18 +909,22 @@ func TestCrashWhileTracing(t *testing.T) {
909909
if err != nil {
910910
t.Fatalf("could not create trace.NewReader: %v", err)
911911
}
912-
var seen bool
912+
var seen, seenSync bool
913913
i := 1
914914
loop:
915915
for ; ; i++ {
916916
ev, err := r.ReadEvent()
917917
if err != nil {
918+
// We may have a broken tail to the trace -- that's OK.
919+
// We'll make sure we saw at least one complete generation.
918920
if err != io.EOF {
919-
t.Errorf("error at event %d: %v", i, err)
921+
t.Logf("error at event %d: %v", i, err)
920922
}
921923
break loop
922924
}
923925
switch ev.Kind() {
926+
case tracev2.EventSync:
927+
seenSync = true
924928
case tracev2.EventLog:
925929
v := ev.Log()
926930
if v.Category == "xyzzy-cat" && v.Message == "xyzzy-msg" {
@@ -934,6 +938,9 @@ loop:
934938
if err := cmd.Wait(); err == nil {
935939
t.Error("the process should have panicked")
936940
}
941+
if !seenSync {
942+
t.Errorf("expected at least one full generation to have been emitted before the trace was considered broken")
943+
}
937944
if !seen {
938945
t.Errorf("expected one matching log event matching, but none of the %d received trace events match", i)
939946
}

src/runtime/trace2.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -551,17 +551,17 @@ func traceAdvance(stopTrace bool) {
551551
// Read everything out of the last gen's CPU profile buffer.
552552
traceReadCPU(gen)
553553

554-
systemstack(func() {
555-
// Flush CPU samples, stacks, and strings for the last generation. This is safe,
556-
// because we're now certain no M is writing to the last generation.
557-
//
558-
// Ordering is important here. traceCPUFlush may generate new stacks and dumping
559-
// stacks may generate new strings.
560-
traceCPUFlush(gen)
561-
trace.stackTab[gen%2].dump(gen)
562-
trace.stringTab[gen%2].reset(gen)
554+
// Flush CPU samples, stacks, and strings for the last generation. This is safe,
555+
// because we're now certain no M is writing to the last generation.
556+
//
557+
// Ordering is important here. traceCPUFlush may generate new stacks and dumping
558+
// stacks may generate new strings.
559+
traceCPUFlush(gen)
560+
trace.stackTab[gen%2].dump(gen)
561+
trace.stringTab[gen%2].reset(gen)
563562

564-
// That's it. This generation is done producing buffers.
563+
// That's it. This generation is done producing buffers.
564+
systemstack(func() {
565565
lock(&trace.lock)
566566
trace.flushedGen.Store(gen)
567567
unlock(&trace.lock)

src/runtime/trace2cpu.go

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -195,18 +195,15 @@ func traceReadCPU(gen uintptr) bool {
195195

196196
// traceCPUFlush flushes trace.cpuBuf[gen%2]. The caller must be certain that gen
197197
// has completed and that there are no more writers to it.
198-
//
199-
// Must run on the systemstack because it flushes buffers and acquires trace.lock
200-
// to do so.
201-
//
202-
//go:systemstack
203198
func traceCPUFlush(gen uintptr) {
204199
// Flush any remaining trace buffers containing CPU samples.
205200
if buf := trace.cpuBuf[gen%2]; buf != nil {
206-
lock(&trace.lock)
207-
traceBufFlush(buf, gen)
208-
unlock(&trace.lock)
209-
trace.cpuBuf[gen%2] = nil
201+
systemstack(func() {
202+
lock(&trace.lock)
203+
traceBufFlush(buf, gen)
204+
unlock(&trace.lock)
205+
trace.cpuBuf[gen%2] = nil
206+
})
210207
}
211208
}
212209

src/runtime/trace2stack.go

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -138,11 +138,6 @@ func (t *traceStackTable) put(pcs []uintptr) uint64 {
138138
// dump writes all previously cached stacks to trace buffers,
139139
// releases all memory and resets state. It must only be called once the caller
140140
// can guarantee that there are no more writers to the table.
141-
//
142-
// This must run on the system stack because it flushes buffers and thus
143-
// may acquire trace.lock.
144-
//
145-
//go:systemstack
146141
func (t *traceStackTable) dump(gen uintptr) {
147142
w := unsafeTraceWriter(gen, nil)
148143

@@ -194,9 +189,11 @@ func (t *traceStackTable) dump(gen uintptr) {
194189
}
195190
// Still, hold the lock over reset. The callee expects it, even though it's
196191
// not strictly necessary.
197-
lock(&t.tab.lock)
198-
t.tab.reset()
199-
unlock(&t.tab.lock)
192+
systemstack(func() {
193+
lock(&t.tab.lock)
194+
t.tab.reset()
195+
unlock(&t.tab.lock)
196+
})
200197

201198
w.flush().end()
202199
}

src/runtime/trace2string.go

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ func (t *traceStringTable) emit(gen uintptr, s string) uint64 {
4949

5050
// writeString writes the string to t.buf.
5151
//
52-
// Must run on the systemstack because it may flush buffers and thus could acquire trace.lock.
52+
// Must run on the systemstack because it acquires t.lock.
5353
//
5454
//go:systemstack
5555
func (t *traceStringTable) writeString(gen uintptr, id uint64, s string) {
@@ -75,7 +75,7 @@ func (t *traceStringTable) writeString(gen uintptr, id uint64, s string) {
7575
w.varint(uint64(len(s)))
7676
w.stringData(s)
7777

78-
// Store back buf if it was updated during ensure.
78+
// Store back buf in case it was updated during ensure.
7979
t.buf = w.traceBuf
8080
unlock(&t.lock)
8181
}
@@ -84,21 +84,20 @@ func (t *traceStringTable) writeString(gen uintptr, id uint64, s string) {
8484
//
8585
// Must be called only once the caller is certain nothing else will be
8686
// added to this table.
87-
//
88-
// Because it flushes buffers, this may acquire trace.lock and thus
89-
// must run on the systemstack.
90-
//
91-
//go:systemstack
9287
func (t *traceStringTable) reset(gen uintptr) {
9388
if t.buf != nil {
94-
lock(&trace.lock)
95-
traceBufFlush(t.buf, gen)
96-
unlock(&trace.lock)
89+
systemstack(func() {
90+
lock(&trace.lock)
91+
traceBufFlush(t.buf, gen)
92+
unlock(&trace.lock)
93+
})
9794
t.buf = nil
9895
}
9996

10097
// Reset the table.
101-
lock(&t.tab.lock)
102-
t.tab.reset()
103-
unlock(&t.tab.lock)
98+
systemstack(func() {
99+
lock(&t.tab.lock)
100+
t.tab.reset()
101+
unlock(&t.tab.lock)
102+
})
104103
}

0 commit comments

Comments
 (0)