Skip to content

Commit 4bea115

Browse files
committed
gopls/internal/lsp/source/typerefs: collapse SCCs of ref graph
This CL uses Tarjan's algorithm to collapse the strong components of the intra-package reference graph. It is the first of several graph optimizations I plan to evaluate for the reference graph to make it compactly serializable and efficiently searchable. By eliminating cycles, the number of edges that must be traversed by the reachability query is reduced, but more importantly, the resulting graph becomes acyclic, which means that there is no need to revisit any node, whereas previously a complete traversal was initiated from every node. Also: - use two-pass algorithm in Refs to gather decls and then build cross-references; - document the algorithm; - unexport various fields of Package - stop indexing PackageIDs in the local preprocessing (it's not necessary). Details: k8s benchmark on M1 MBP shows a 60% reduction in real time: xtools$ go test ./gopls/internal/lsp/source/typerefs/ -run=NONE -bench=BuildP -dir=$(pwd)/../kubernetes/ -verify=false -query=./... Before: BenchmarkBuildPackageGraph-8 1 2997634916 ns/op BenchmarkBuildPackageGraph-8 1 2623176209 ns/op After: BenchmarkBuildPackageGraph-8 1 1142697000 ns/op BenchmarkBuildPackageGraph-8 1 1144183250 ns/op Next steps: simplify the reference graph; serialize it. Change-Id: Ic8a6154c0d8d7174529b90774a8683ea9450f484 Reviewed-on: https://go-review.googlesource.com/c/tools/+/481784 TryBot-Result: Gopher Robot <[email protected]> Run-TryBot: Alan Donovan <[email protected]> Reviewed-by: Robert Findley <[email protected]>
1 parent 5ef3193 commit 4bea115

File tree

5 files changed

+491
-301
lines changed

5 files changed

+491
-301
lines changed

gopls/internal/lsp/source/typerefs/doc.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,51 @@
5959
// analysis. Notably, we also skip identifiers that refer to type parameters in
6060
// generic declarations.
6161
//
62+
// # Graph optimizations
63+
//
64+
// The references extracted from the syntax are used to construct
65+
// edges between declNodes. Edges are of two kinds: internal
66+
// references, from one package-level declaration to another; and
67+
// external references, from a symbol in this package to a symbol
68+
// imported from a direct dependency.
69+
//
70+
// Once the symbol reference graph is constructed, we find its
71+
// strongly connected components (SCCs) using Tarjan's algorithm. A
72+
// node from each SCC is chosen arbitrarily to be its representative,
73+
// and all the edges (internal and external) of the SCC are
74+
// accumulated into the representative, thus forming the strong
75+
// component graph, which is acyclic. This property simplifies the
76+
// logic and improves the efficiency of the reachability query.
77+
//
78+
// TODO(adonovan): opt: subsequent planned optimizations include:
79+
//
80+
// - The Hash-Value Numbering optimization described in
81+
// Hardekopf and Lin; see golang.org/x/go/pointer/hvn.go for an
82+
// implementation. (Like pointer analysis, our problem is
83+
// fundamentally one of graph reachability.)
84+
//
85+
// The "pointer equivalence" (PE) portion of this algorithm uses a
86+
// hash table to create a mapping from unique sets of external
87+
// references to small integers. Each of the n external symbols
88+
// referenced by the package is assigned a integer from 1 to n;
89+
// this number stands for a singleton set. Higher numbers refer to
90+
// unions of strictly smaller sets. The PE algorithm allows us to
91+
// coalesce redundant graph nodes. For example, all functions that
92+
// ultimately reference only {fmt.Println,fmt.Sprintf} would be
93+
// marked as equivalent to each other, and to the union of
94+
// the sets of {fmt.Sprint} and {fmt.Println}.
95+
//
96+
// This reduces the worst-case size of the Refs() result. Consider
97+
// M decls that each reference type t, which references N imported
98+
// types. The source code has O(M + N) lines but the Refs result
99+
// is current O(M*N). Preserving the essential structure of the
100+
// reference graph (as a DAG of union operations) will reduce the
101+
// asymptote.
102+
//
103+
// - Serializing the SC graph obtained each package and saving it in
104+
// the file cache. Once we have a DAG of unions, we can serialize
105+
// it easily and amortize the cost of the local preprocessing.
106+
//
62107
// # API
63108
//
64109
// The main entry point for this analysis is the [Refs] function, which

gopls/internal/lsp/source/typerefs/pkgrefs.go

Lines changed: 56 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ package typerefs
66

77
import (
88
"context"
9-
"go/token"
109
"runtime"
1110
"sync"
1211

@@ -24,42 +23,20 @@ const (
2423

2524
// A Package holds reference information for a single package.
2625
type Package struct {
27-
idx packageIdx // memoized index of this package's ID, to save map lookups
26+
// metadata holds metadata about this package and its dependencies.
27+
metadata *source.Metadata
2828

29-
// Metadata holds metadata about this package and its dependencies.
30-
Metadata *source.Metadata
31-
32-
// Refs records syntactic edges between declarations in this package and
33-
// declarations in this package or another package. See the package
34-
// documentation for a detailed description of what these edges do (and do
35-
// not) represent.
36-
Refs map[string][]Ref
37-
38-
// TransitiveRefs records, for each declaration in the package, the
29+
// transitiveRefs records, for each exported declaration in the package, the
3930
// transitive set of packages within the containing graph that are
4031
// transitively reachable through references, starting with the given decl.
41-
TransitiveRefs map[string]*PackageSet
32+
transitiveRefs map[string]*PackageSet
4233

4334
// ReachesViaDeps records the set of packages in the containing graph whose
4435
// syntax may affect the current package's types. See the package
4536
// documentation for more details of what this means.
4637
ReachesByDeps *PackageSet
4738
}
4839

49-
// A Ref is a referenced declaration.
50-
//
51-
// Unpack it using the Unpack method, with the PackageIndex instance that was
52-
// used to construct the references.
53-
type Ref struct {
54-
pkg packageIdx
55-
name string
56-
}
57-
58-
// UnpackRef unpacks the actual PackageID an name encoded in ref.
59-
func (r Ref) Unpack(index *PackageIndex) (PackageID source.PackageID, name string) {
60-
return index.id(r.pkg), r.name
61-
}
62-
6340
// A PackageGraph represents a fully analyzed graph of packages and their
6441
// dependencies.
6542
type PackageGraph struct {
@@ -139,111 +116,78 @@ func (g *PackageGraph) Package(ctx context.Context, id source.PackageID) (*Packa
139116
// only be called from Package.
140117
func (g *PackageGraph) buildPackage(ctx context.Context, id source.PackageID) (*Package, error) {
141118
p := &Package{
142-
idx: g.pkgIndex.idx(id),
143-
Metadata: g.meta.Metadata(id),
144-
Refs: make(map[string][]Ref),
145-
TransitiveRefs: make(map[string]*PackageSet),
119+
metadata: g.meta.Metadata(id),
120+
transitiveRefs: make(map[string]*PackageSet),
146121
}
147122
var files []*source.ParsedGoFile
148-
for _, filename := range p.Metadata.CompiledGoFiles {
123+
for _, filename := range p.metadata.CompiledGoFiles {
149124
f, err := g.parse(ctx, filename)
150125
if err != nil {
151126
return nil, err
152127
}
153128
files = append(files, f)
154129
}
155130
imports := make(map[source.ImportPath]*source.Metadata)
156-
for impPath, depID := range p.Metadata.DepsByImpPath {
131+
for impPath, depID := range p.metadata.DepsByImpPath {
157132
if depID != "" {
158133
imports[impPath] = g.meta.Metadata(depID)
159134
}
160135
}
161-
p.Refs = Refs(files, id, imports, g.pkgIndex)
162-
163-
// Compute packages reachable from each exported symbol of this package.
164-
for name := range p.Refs {
165-
if token.IsExported(name) {
166-
set := g.pkgIndex.New()
167-
g.reachableByName(ctx, p, name, set, make(map[string]bool))
168-
p.TransitiveRefs[name] = set
169-
}
170-
}
171136

172-
var err error
173-
p.ReachesByDeps, err = g.reachesByDeps(ctx, p.Metadata)
174-
if err != nil {
175-
return nil, err
176-
}
177-
return p, nil
178-
}
179-
180-
// ExternalRefs returns a new map whose keys are the exported symbols
181-
// of the package (of the specified id, pkgIndex, and refs). The
182-
// corresponding value of each key is the set of exported symbols
183-
// indirectly referenced by it.
184-
//
185-
// TODO(adonovan): simplify the API once the SCC-based optimization lands.
186-
func ExternalRefs(pkgIndex *PackageIndex, id source.PackageID, refs map[string][]Ref) map[string]map[Ref]bool {
187-
// (This intrapackage recursion will go away in a follow-up CL.)
188-
var visit func(name string, res map[Ref]bool, seen map[string]bool)
189-
visit = func(name string, res map[Ref]bool, seen map[string]bool) {
190-
if !seen[name] {
191-
seen[name] = true
192-
for _, ref := range refs[name] {
193-
if pkgIndex.id(ref.pkg) == id {
194-
visit(ref.name, res, seen) // intrapackage recursion
195-
} else {
196-
res[ref] = true // cross-package ref
197-
}
198-
}
199-
}
200-
}
137+
// Compute the symbol-level dependencies through this package.
138+
//
139+
// refs records syntactic edges between declarations in this
140+
// package and declarations in this package or another
141+
// package. See the package documentation for a detailed
142+
// description of what these edges do (and do not) represent.
143+
//
144+
// TODO(adonovan): opt: serialize and deserialize the refs
145+
// result computed above and persist it in the filecache.
146+
refs := Refs(files, id, imports)
201147

202-
results := make(map[string]map[Ref]bool)
203-
for name := range refs {
204-
if token.IsExported(name) {
205-
res := make(map[Ref]bool)
206-
seen := make(map[string]bool)
207-
visit(name, res, seen)
208-
results[name] = res
209-
}
210-
}
211-
return results
212-
}
148+
// This point separates the local preprocessing
149+
// -- of a single package (above) from the global --
150+
// transitive reachability query (below).
213151

214-
// reachableByName computes the set of packages that are reachable through
215-
// references, starting with the declaration for name in package p.
216-
func (g *PackageGraph) reachableByName(ctx context.Context, p *Package, name string, set *PackageSet, seen map[string]bool) error {
217-
if seen[name] {
218-
return nil
219-
}
220-
seen[name] = true
221-
222-
// Opt: when we compact reachable edges inside the Refs algorithm, we handle
223-
// all edges to a given package in a batch, so they should be adjacent to
224-
// each other in the resulting slice. Therefore remembering the last P here
225-
// can save on lookups.
226-
depP := p
227-
for _, node := range p.Refs[name] {
228-
if node.pkg == p.idx {
229-
// same package
230-
g.reachableByName(ctx, p, node.name, set, seen)
231-
} else {
232-
// cross-package ref
233-
if depP.idx != node.pkg {
234-
id := g.pkgIndex.id(node.pkg)
152+
// Now compute the transitive closure of packages reachable
153+
// from any exported symbol of this package.
154+
//
155+
// TODO(adonovan): opt: many elements of refs[name] are
156+
// identical, so this does redundant work. Choose a data type
157+
// for the result of Refs() that expresses the M:N structure
158+
// explicitly.
159+
for name, nodes := range refs {
160+
set := g.pkgIndex.New()
161+
162+
// The nodes slice is sorted by (package, name),
163+
// so we can economize by calling g.Package only
164+
// when the package id changes.
165+
depP := p
166+
for _, node := range nodes {
167+
assert(node.PkgID != id, "intra-package edge")
168+
if depP.metadata.ID != node.PkgID {
169+
// package changed
235170
var err error
236-
depP, err = g.Package(ctx, id)
171+
depP, err = g.Package(ctx, node.PkgID)
237172
if err != nil {
238-
return err
173+
return nil, err
239174
}
240175
}
241-
set.add(node.pkg)
242-
set.Union(depP.TransitiveRefs[node.name])
176+
set.add(g.pkgIndex.idx(node.PkgID))
177+
set.Union(depP.transitiveRefs[node.Name])
243178
}
179+
p.transitiveRefs[name] = set
180+
}
181+
182+
// Finally compute the union of transitiveRefs
183+
// across the direct deps of this package.
184+
byDeps, err := g.reachesByDeps(ctx, p.metadata)
185+
if err != nil {
186+
return nil, err
244187
}
188+
p.ReachesByDeps = byDeps
245189

246-
return nil
190+
return p, nil
247191
}
248192

249193
// reachesByDeps computes the set of packages that are reachable through
@@ -255,11 +199,9 @@ func (g *PackageGraph) reachesByDeps(ctx context.Context, m *source.Metadata) (*
255199
if err != nil {
256200
return nil, err
257201
}
258-
transitive.add(dep.idx)
259-
for name, set := range dep.TransitiveRefs {
260-
if token.IsExported(name) {
261-
transitive.Union(set)
262-
}
202+
transitive.add(g.pkgIndex.idx(dep.metadata.ID))
203+
for _, set := range dep.transitiveRefs {
204+
transitive.Union(set)
263205
}
264206
}
265207
return transitive, nil

gopls/internal/lsp/source/typerefs/pkgrefs_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,6 @@ func TestBuildPackageGraph(t *testing.T) {
195195
}
196196
fmt.Printf("%52s%8.1f%8.1f%8.1f%8.1f%8.1f%8.1f\n", "averages:", avg(imports), avg(importedBy), avg(reaches), avg(reachedBy), avg(loads), avg(loadedBy))
197197
}
198-
199198
}
200199

201200
func importMap(id PackageID, meta MetadataSource) map[PackagePath]PackageID {

0 commit comments

Comments
 (0)