Skip to content

Commit b9dee7e

Browse files
rhyshmknyszek
authored andcommitted
runtime/pprof: stress test goroutine profiler
For #33250 Change-Id: Ic7aa74b1bb5da9c4319718bac96316b236cb40b2 Reviewed-on: https://go-review.googlesource.com/c/go/+/387414 Run-TryBot: Rhys Hiltner <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Michael Knyszek <[email protected]> Reviewed-by: David Chase <[email protected]>
1 parent 209942f commit b9dee7e

File tree

3 files changed

+371
-1
lines changed

3 files changed

+371
-1
lines changed

src/runtime/pprof/pprof_test.go

+273
Original file line numberDiff line numberDiff line change
@@ -1389,6 +1389,279 @@ func containsCountsLabels(prof *profile.Profile, countLabels map[int64]map[strin
13891389
return true
13901390
}
13911391

1392+
func TestGoroutineProfileConcurrency(t *testing.T) {
1393+
goroutineProf := Lookup("goroutine")
1394+
1395+
profilerCalls := func(s string) int {
1396+
return strings.Count(s, "\truntime/pprof.runtime_goroutineProfileWithLabels+")
1397+
}
1398+
1399+
includesFinalizer := func(s string) bool {
1400+
return strings.Contains(s, "runtime.runfinq")
1401+
}
1402+
1403+
// Concurrent calls to the goroutine profiler should not trigger data races
1404+
// or corruption.
1405+
t.Run("overlapping profile requests", func(t *testing.T) {
1406+
ctx := context.Background()
1407+
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
1408+
defer cancel()
1409+
1410+
var wg sync.WaitGroup
1411+
for i := 0; i < 2; i++ {
1412+
wg.Add(1)
1413+
Do(ctx, Labels("i", fmt.Sprint(i)), func(context.Context) {
1414+
go func() {
1415+
defer wg.Done()
1416+
for ctx.Err() == nil {
1417+
var w bytes.Buffer
1418+
goroutineProf.WriteTo(&w, 1)
1419+
prof := w.String()
1420+
count := profilerCalls(prof)
1421+
if count >= 2 {
1422+
t.Logf("prof %d\n%s", count, prof)
1423+
cancel()
1424+
}
1425+
}
1426+
}()
1427+
})
1428+
}
1429+
wg.Wait()
1430+
})
1431+
1432+
// The finalizer goroutine should not show up in most profiles, since it's
1433+
// marked as a system goroutine when idle.
1434+
t.Run("finalizer not present", func(t *testing.T) {
1435+
var w bytes.Buffer
1436+
goroutineProf.WriteTo(&w, 1)
1437+
prof := w.String()
1438+
if includesFinalizer(prof) {
1439+
t.Errorf("profile includes finalizer (but finalizer should be marked as system):\n%s", prof)
1440+
}
1441+
})
1442+
1443+
// The finalizer goroutine should show up when it's running user code.
1444+
t.Run("finalizer present", func(t *testing.T) {
1445+
obj := new(byte)
1446+
ch1, ch2 := make(chan int), make(chan int)
1447+
defer close(ch2)
1448+
runtime.SetFinalizer(obj, func(_ interface{}) {
1449+
close(ch1)
1450+
<-ch2
1451+
})
1452+
obj = nil
1453+
for i := 10; i >= 0; i-- {
1454+
select {
1455+
case <-ch1:
1456+
default:
1457+
if i == 0 {
1458+
t.Fatalf("finalizer did not run")
1459+
}
1460+
runtime.GC()
1461+
}
1462+
}
1463+
var w bytes.Buffer
1464+
goroutineProf.WriteTo(&w, 1)
1465+
prof := w.String()
1466+
if !includesFinalizer(prof) {
1467+
t.Errorf("profile does not include finalizer (and it should be marked as user):\n%s", prof)
1468+
}
1469+
})
1470+
1471+
// Check that new goroutines only show up in order.
1472+
testLaunches := func(t *testing.T) {
1473+
var done sync.WaitGroup
1474+
defer done.Wait()
1475+
1476+
ctx := context.Background()
1477+
ctx, cancel := context.WithCancel(ctx)
1478+
defer cancel()
1479+
1480+
ch := make(chan int)
1481+
defer close(ch)
1482+
1483+
var ready sync.WaitGroup
1484+
1485+
// These goroutines all survive until the end of the subtest, so we can
1486+
// check that a (numbered) goroutine appearing in the profile implies
1487+
// that all older goroutines also appear in the profile.
1488+
ready.Add(1)
1489+
done.Add(1)
1490+
go func() {
1491+
defer done.Done()
1492+
for i := 0; ctx.Err() == nil; i++ {
1493+
// Use SetGoroutineLabels rather than Do we can always expect an
1494+
// extra goroutine (this one) with most recent label.
1495+
SetGoroutineLabels(WithLabels(ctx, Labels(t.Name()+"-loop-i", fmt.Sprint(i))))
1496+
done.Add(1)
1497+
go func() {
1498+
<-ch
1499+
done.Done()
1500+
}()
1501+
for j := 0; j < i; j++ {
1502+
// Spin for longer and longer as the test goes on. This
1503+
// goroutine will do O(N^2) work with the number of
1504+
// goroutines it launches. This should be slow relative to
1505+
// the work involved in collecting a goroutine profile,
1506+
// which is O(N) with the high-water mark of the number of
1507+
// goroutines in this process (in the allgs slice).
1508+
runtime.Gosched()
1509+
}
1510+
if i == 0 {
1511+
ready.Done()
1512+
}
1513+
}
1514+
}()
1515+
1516+
// Short-lived goroutines exercise different code paths (goroutines with
1517+
// status _Gdead, for instance). This churn doesn't have behavior that
1518+
// we can test directly, but does help to shake out data races.
1519+
ready.Add(1)
1520+
var churn func(i int)
1521+
churn = func(i int) {
1522+
SetGoroutineLabels(WithLabels(ctx, Labels(t.Name()+"-churn-i", fmt.Sprint(i))))
1523+
if i == 0 {
1524+
ready.Done()
1525+
}
1526+
if ctx.Err() == nil {
1527+
go churn(i + 1)
1528+
}
1529+
}
1530+
go func() {
1531+
churn(0)
1532+
}()
1533+
1534+
ready.Wait()
1535+
1536+
var w [3]bytes.Buffer
1537+
for i := range w {
1538+
goroutineProf.WriteTo(&w[i], 0)
1539+
}
1540+
for i := range w {
1541+
p, err := profile.Parse(bytes.NewReader(w[i].Bytes()))
1542+
if err != nil {
1543+
t.Errorf("error parsing protobuf profile: %v", err)
1544+
}
1545+
1546+
// High-numbered loop-i goroutines imply that every lower-numbered
1547+
// loop-i goroutine should be present in the profile too.
1548+
counts := make(map[string]int)
1549+
for _, s := range p.Sample {
1550+
label := s.Label[t.Name()+"-loop-i"]
1551+
if len(label) > 0 {
1552+
counts[label[0]]++
1553+
}
1554+
}
1555+
for j, max := 0, len(counts)-1; j <= max; j++ {
1556+
n := counts[fmt.Sprint(j)]
1557+
if n == 1 || (n == 2 && j == max) {
1558+
continue
1559+
}
1560+
t.Errorf("profile #%d's goroutines with label loop-i:%d; %d != 1 (or 2 for the last entry, %d)",
1561+
i+1, j, n, max)
1562+
t.Logf("counts %v", counts)
1563+
break
1564+
}
1565+
}
1566+
}
1567+
1568+
runs := 100
1569+
if testing.Short() {
1570+
runs = 5
1571+
}
1572+
for i := 0; i < runs; i++ {
1573+
// Run multiple times to shake out data races
1574+
t.Run("goroutine launches", testLaunches)
1575+
}
1576+
}
1577+
1578+
func BenchmarkGoroutine(b *testing.B) {
1579+
withIdle := func(n int, fn func(b *testing.B)) func(b *testing.B) {
1580+
return func(b *testing.B) {
1581+
c := make(chan int)
1582+
var ready, done sync.WaitGroup
1583+
defer func() {
1584+
close(c)
1585+
done.Wait()
1586+
}()
1587+
1588+
for i := 0; i < n; i++ {
1589+
ready.Add(1)
1590+
done.Add(1)
1591+
go func() {
1592+
ready.Done()
1593+
<-c
1594+
done.Done()
1595+
}()
1596+
}
1597+
// Let goroutines block on channel
1598+
ready.Wait()
1599+
for i := 0; i < 5; i++ {
1600+
runtime.Gosched()
1601+
}
1602+
1603+
fn(b)
1604+
}
1605+
}
1606+
1607+
withChurn := func(fn func(b *testing.B)) func(b *testing.B) {
1608+
return func(b *testing.B) {
1609+
ctx := context.Background()
1610+
ctx, cancel := context.WithCancel(ctx)
1611+
defer cancel()
1612+
1613+
var ready sync.WaitGroup
1614+
ready.Add(1)
1615+
var count int64
1616+
var churn func(i int)
1617+
churn = func(i int) {
1618+
SetGoroutineLabels(WithLabels(ctx, Labels("churn-i", fmt.Sprint(i))))
1619+
atomic.AddInt64(&count, 1)
1620+
if i == 0 {
1621+
ready.Done()
1622+
}
1623+
if ctx.Err() == nil {
1624+
go churn(i + 1)
1625+
}
1626+
}
1627+
go func() {
1628+
churn(0)
1629+
}()
1630+
ready.Wait()
1631+
1632+
fn(b)
1633+
b.ReportMetric(float64(atomic.LoadInt64(&count))/float64(b.N), "concurrent_launches/op")
1634+
}
1635+
}
1636+
1637+
benchWriteTo := func(b *testing.B) {
1638+
goroutineProf := Lookup("goroutine")
1639+
b.ResetTimer()
1640+
for i := 0; i < b.N; i++ {
1641+
goroutineProf.WriteTo(io.Discard, 0)
1642+
}
1643+
b.StopTimer()
1644+
}
1645+
1646+
benchGoroutineProfile := func(b *testing.B) {
1647+
p := make([]runtime.StackRecord, 10000)
1648+
b.ResetTimer()
1649+
for i := 0; i < b.N; i++ {
1650+
runtime.GoroutineProfile(p)
1651+
}
1652+
b.StopTimer()
1653+
}
1654+
1655+
// Note that some costs of collecting a goroutine profile depend on the
1656+
// length of the runtime.allgs slice, which never shrinks. Stay within race
1657+
// detector's 8k-goroutine limit
1658+
for _, n := range []int{50, 500, 5000} {
1659+
b.Run(fmt.Sprintf("Profile.WriteTo idle %d", n), withIdle(n, benchWriteTo))
1660+
b.Run(fmt.Sprintf("Profile.WriteTo churn %d", n), withIdle(n, withChurn(benchWriteTo)))
1661+
b.Run(fmt.Sprintf("runtime.GoroutineProfile churn %d", n), withIdle(n, withChurn(benchGoroutineProfile)))
1662+
}
1663+
}
1664+
13921665
var emptyCallStackTestRun int64
13931666

13941667
// Issue 18836.

src/runtime/proc.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -5053,7 +5053,7 @@ func checkdead() {
50535053
}
50545054
}
50555055

5056-
unlock(&sched.lock) // unlock so that GODEBUG=scheddetail=1 doesn't hang
5056+
unlock(&sched.lock) // unlock so that GODEBUG=scheddetail=1 doesn't hang
50575057
fatal("all goroutines are asleep - deadlock!")
50585058
}
50595059

0 commit comments

Comments
 (0)