Skip to content

Commit 04f8c52

Browse files
committed
maintner: flesh out godata package, add caching network mutation source
Also, the maintner.Corpus constructor is now gone. An API is added for maintnerd to become the leader and specify the cache dir. Updates golang/go#19866 Change-Id: Ia726aa00ca1337b6c130cfee040ff9a1f935d0c2 Reviewed-on: https://go-review.googlesource.com/42148 Reviewed-by: Kevin Burke <[email protected]>
1 parent a5b059a commit 04f8c52

File tree

8 files changed

+310
-50
lines changed

8 files changed

+310
-50
lines changed

maintner/gerrit.go

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ import (
3131
// Gerrit holds information about a number of Gerrit projects.
3232
type Gerrit struct {
3333
c *Corpus
34-
dataDir string // the root Corpus data directory
3534
projects map[string]*GerritProject // keyed by "go.googlesource.com/build"
3635

3736
clsReferencingGithubIssue map[GitHubIssueRef][]*GerritCL
@@ -46,7 +45,6 @@ func (g *Gerrit) getOrCreateProject(gerritProj string) *GerritProject {
4645
proj = &GerritProject{
4746
gerrit: g,
4847
proj: gerritProj,
49-
gitDir: filepath.Join(g.dataDir, url.PathEscape(gerritProj)),
5048
cls: map[int32]*GerritCL{},
5149
remote: map[gerritCLVersion]GitHash{},
5250
}
@@ -69,15 +67,15 @@ func (g *Gerrit) ForeachProjectUnsorted(fn func(*GerritProject) error) error {
6967
type GerritProject struct {
7068
gerrit *Gerrit
7169
proj string // "go.googlesource.com/net"
72-
// TODO: Many different Git remotes can share the same Gerrit instance, e.g.
73-
// the Go Gerrit instance supports build, gddo, go. For the moment these are
74-
// all treated separately, since the remotes are separate.
75-
gitDir string
7670
cls map[int32]*GerritCL
7771
remote map[gerritCLVersion]GitHash
7872
need map[GitHash]bool
7973
}
8074

75+
func (gp *GerritProject) gitDir() string {
76+
return filepath.Join(gp.gerrit.c.getDataDir(), url.PathEscape(gp.proj))
77+
}
78+
8179
func (gp *GerritProject) ServerSlashProject() string { return gp.proj }
8280

8381
// Server returns the Gerrit server, such as "go.googlesource.com".
@@ -215,7 +213,6 @@ func (c *Corpus) initGerrit() {
215213
}
216214
c.gerrit = &Gerrit{
217215
c: c,
218-
dataDir: c.dataDir,
219216
projects: map[string]*GerritProject{},
220217
clsReferencingGithubIssue: map[GitHubIssueRef][]*GerritCL{},
221218
}
@@ -231,6 +228,7 @@ type watchedGerritRepo struct {
231228
func (c *Corpus) AddGerrit(gerritProj string) {
232229
c.mu.Lock()
233230
defer c.mu.Unlock()
231+
234232
if strings.Count(gerritProj, "/") != 1 {
235233
panic(fmt.Sprintf("gerrit project argument %q expected to contain exactly 1 slash", gerritProj))
236234
}
@@ -463,18 +461,19 @@ func (gp *GerritProject) sync(ctx context.Context, loop bool) error {
463461

464462
func (gp *GerritProject) syncOnce(ctx context.Context) error {
465463
c := gp.gerrit.c
464+
gitDir := gp.gitDir()
466465

467466
fetchCtx, cancel := context.WithTimeout(ctx, time.Minute)
468467
cmd := exec.CommandContext(fetchCtx, "git", "fetch", "origin")
469-
cmd.Dir = gp.gitDir
468+
cmd.Dir = gitDir
470469
out, err := cmd.CombinedOutput()
471470
cancel()
472471
if err != nil {
473472
return fmt.Errorf("git fetch origin: %v, %s", err, out)
474473
}
475474

476475
cmd = exec.CommandContext(ctx, "git", "ls-remote")
477-
cmd.Dir = gp.gitDir
476+
cmd.Dir = gitDir
478477
out, err = cmd.CombinedOutput()
479478
if err != nil {
480479
return fmt.Errorf("git ls-remote: %v, %s", err, out)
@@ -567,7 +566,7 @@ func (gp *GerritProject) syncCommits(ctx context.Context) (n int, err error) {
567566
lastLog = now
568567
gp.logf("parsing commits (%v done)", n)
569568
}
570-
commit, err := parseCommitFromGit(gp.gitDir, hash)
569+
commit, err := parseCommitFromGit(gp.gitDir(), hash)
571570
if err != nil {
572571
return n, err
573572
}
@@ -603,7 +602,7 @@ func (gp *GerritProject) fetchHashes(ctx context.Context, hashes []GitHash) erro
603602
}
604603
gp.logf("fetching %v hashes...", len(hashes))
605604
cmd := exec.CommandContext(ctx, "git", args...)
606-
cmd.Dir = gp.gitDir
605+
cmd.Dir = gp.gitDir()
607606
if out, err := cmd.CombinedOutput(); err != nil {
608607
log.Printf("error fetching %d hashes from gerrit project %s: %s", len(hashes), gp.proj, out)
609608
return err
@@ -620,7 +619,8 @@ func formatExecError(err error) string {
620619
}
621620

622621
func (gp *GerritProject) init(ctx context.Context) error {
623-
if err := os.MkdirAll(gp.gitDir, 0755); err != nil {
622+
gitDir := gp.gitDir()
623+
if err := os.MkdirAll(gitDir, 0755); err != nil {
624624
return err
625625
}
626626
// try to short circuit a git init error, since the init error matching is
@@ -629,12 +629,12 @@ func (gp *GerritProject) init(ctx context.Context) error {
629629
return fmt.Errorf("looking for git binary: %v", err)
630630
}
631631

632-
if _, err := os.Stat(filepath.Join(gp.gitDir, ".git", "config")); err == nil {
632+
if _, err := os.Stat(filepath.Join(gitDir, ".git", "config")); err == nil {
633633
cmd := exec.CommandContext(ctx, "git", "remote", "-v")
634-
cmd.Dir = gp.gitDir
634+
cmd.Dir = gitDir
635635
remoteBytes, err := cmd.Output()
636636
if err != nil {
637-
return fmt.Errorf("running git remote -v in %v: %v", gp.gitDir, formatExecError(err))
637+
return fmt.Errorf("running git remote -v in %v: %v", gitDir, formatExecError(err))
638638
}
639639
if !strings.Contains(string(remoteBytes), "origin") && !strings.Contains(string(remoteBytes), "https://"+gp.proj) {
640640
return fmt.Errorf("didn't find origin & gp.url in remote output %s", string(remoteBytes))
@@ -647,7 +647,7 @@ func (gp *GerritProject) init(ctx context.Context) error {
647647
buf := new(bytes.Buffer)
648648
cmd.Stdout = buf
649649
cmd.Stderr = buf
650-
cmd.Dir = gp.gitDir
650+
cmd.Dir = gitDir
651651
if err := cmd.Run(); err != nil {
652652
log.Printf(`Error running "git init": %s`, buf.String())
653653
return err
@@ -656,7 +656,7 @@ func (gp *GerritProject) init(ctx context.Context) error {
656656
cmd = exec.CommandContext(ctx, "git", "remote", "add", "origin", "https://"+gp.proj)
657657
cmd.Stdout = buf
658658
cmd.Stderr = buf
659-
cmd.Dir = gp.gitDir
659+
cmd.Dir = gitDir
660660
if err := cmd.Run(); err != nil {
661661
log.Printf(`Error running "git remote add origin": %s`, buf.String())
662662
return err

maintner/git.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ func (c *Corpus) processGitCommit(commit *maintpb.GitCommit) (*GitCommit, error)
333333
if c.gitCommitTodo != nil {
334334
delete(c.gitCommitTodo, hash)
335335
}
336-
if c.Verbose {
336+
if c.verbose {
337337
now := time.Now()
338338
if now.After(c.lastGitCount.Add(time.Second)) {
339339
c.lastGitCount = now

maintner/github.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -717,7 +717,7 @@ type githubIssueDiffer struct {
717717
}
718718

719719
func (d githubIssueDiffer) verbose() bool {
720-
return d.gr.github != nil && d.gr.github.c != nil && d.gr.github.c.Verbose
720+
return d.gr.github != nil && d.gr.github.c != nil && d.gr.github.c.verbose
721721
}
722722

723723
// returns nil if no changes.

maintner/godata/godata.go

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,55 @@ package godata
77

88
import (
99
"context"
10+
"log"
1011
"os"
12+
"os/user"
1113
"path/filepath"
14+
"runtime"
1215

1316
"golang.org/x/build/maintner"
1417
)
1518

1619
// Get returns the Go project's corpus.
1720
func Get(ctx context.Context) (*maintner.Corpus, error) {
18-
// TODO: this is a dummy implementation for now. It should
19-
// really create a cache dir, and slurp as-needed from the
20-
// network (once we run a server), and then load it. For now
21-
// we assume it's already on disk.
22-
dir := filepath.Join(os.Getenv("HOME"), "var", "maintnerd")
23-
logger := maintner.NewDiskMutationLogger(dir)
24-
corpus := maintner.NewCorpus(logger, dir)
25-
if err := corpus.Initialize(ctx, logger); err != nil {
21+
targetDir := filepath.Join(xdgCacheDir(), "golang-maintner")
22+
if err := os.MkdirAll(targetDir, 0700); err != nil {
23+
return nil, err
24+
}
25+
mutSrc := maintner.NewNetworkMutationSource("https://maintner.golang.org/logs", targetDir)
26+
corpus := new(maintner.Corpus)
27+
if err := corpus.Initialize(ctx, mutSrc); err != nil {
2628
return nil, err
2729
}
2830
return corpus, nil
2931
}
32+
33+
// xdgCacheDir returns the XDG Base Directory Specification cache
34+
// directory.
35+
func xdgCacheDir() string {
36+
cache := os.Getenv("XDG_CACHE_HOME")
37+
if cache != "" {
38+
return cache
39+
}
40+
home := homeDir()
41+
// Not XDG but standard for OS X.
42+
if runtime.GOOS == "darwin" {
43+
return filepath.Join(home, "Library/Caches")
44+
}
45+
return filepath.Join(home, ".cache")
46+
}
47+
48+
func homeDir() string {
49+
if runtime.GOOS == "windows" {
50+
return os.Getenv("HOMEDRIVE") + os.Getenv("HOMEPATH")
51+
}
52+
home := os.Getenv("HOME")
53+
if home != "" {
54+
return home
55+
}
56+
u, err := user.Current()
57+
if err != nil {
58+
log.Fatalf("failed to get home directory or current user: %v", err)
59+
}
60+
return u.HomeDir
61+
}

maintner/maintner.go

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,9 @@ import (
3131
// is populated from a MutationSource (disk, database), and the polling phase,
3232
// when the Corpus polls for new events and stores/writes them to disk.
3333
type Corpus struct {
34-
MutationLogger MutationLogger
35-
Verbose bool
34+
mutationLogger MutationLogger // non-nil when this is a self-updating corpus
35+
verbose bool
36+
dataDir string
3637

3738
mu sync.RWMutex // guards all following fields
3839
// corpus state:
@@ -56,17 +57,27 @@ type Corpus struct {
5657
gitCommitTodo map[GitHash]bool // -> true
5758
gitOfHg map[string]GitHash // hg hex hash -> git hash
5859
zoneCache map[string]*time.Location // "+0530" => location
59-
dataDir string
6060
}
6161

6262
type polledGitCommits struct {
6363
repo *maintpb.GitRepo
6464
dir string
6565
}
6666

67-
// NewCorpus creates a new Corpus.
68-
func NewCorpus(logger MutationLogger, dataDir string) *Corpus {
69-
return &Corpus{MutationLogger: logger, dataDir: dataDir}
67+
// EnableLeaderMode prepares c to be the leader.
68+
// The provided scratchDir will store git checkouts.
69+
func (c *Corpus) EnableLeaderMode(logger MutationLogger, scratchDir string) {
70+
c.mutationLogger = logger
71+
c.dataDir = scratchDir
72+
}
73+
74+
func (c *Corpus) SetVerbose(v bool) { c.verbose = v }
75+
76+
func (c *Corpus) getDataDir() string {
77+
if c.dataDir == "" {
78+
panic("getDataDir called before Corpus.EnableLeaderMode")
79+
}
80+
return c.dataDir
7081
}
7182

7283
// GitHub returns the corpus's github data.
@@ -214,17 +225,17 @@ func (c *Corpus) Update(ctx context.Context) error {
214225

215226
// addMutation adds a mutation to the log and immediately processes it.
216227
func (c *Corpus) addMutation(m *maintpb.Mutation) {
217-
if c.Verbose {
228+
if c.verbose {
218229
log.Printf("mutation: %v", m)
219230
}
220231
c.mu.Lock()
221232
c.processMutationLocked(m)
222233
c.mu.Unlock()
223234

224-
if c.MutationLogger == nil {
235+
if c.mutationLogger == nil {
225236
return
226237
}
227-
err := c.MutationLogger.Log(m)
238+
err := c.mutationLogger.Log(m)
228239
if err != nil {
229240
// TODO: handle errors better? failing is only safe option.
230241
log.Fatalf("could not log mutation %v: %v\n", m, err)

maintner/maintnerd/gcslog.go

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -153,14 +153,6 @@ func (gl *gcsLog) initLoad(ctx context.Context) error {
153153
return nil
154154
}
155155

156-
type LogSegmentJSON struct {
157-
Number int `json:"number"`
158-
Size int64 `json:"size"`
159-
SHA224 string `json:"sha224"`
160-
URL string `json:"url"` // TODO ....
161-
// TODO ....
162-
}
163-
164156
func (gl *gcsLog) serveLogFile(w http.ResponseWriter, r *http.Request) {
165157
if r.Method != "GET" && r.Method != "HEAD" {
166158
http.Error(w, "bad method", http.StatusBadRequest)
@@ -210,24 +202,24 @@ func (gl *gcsLog) serveJSONLogsIndex(w http.ResponseWriter, r *http.Request) {
210202
w.Write(body)
211203
}
212204

213-
func (gl *gcsLog) getJSONLogs(startSeg int) (segs []LogSegmentJSON) {
205+
func (gl *gcsLog) getJSONLogs(startSeg int) (segs []maintner.LogSegmentJSON) {
214206
gl.mu.Lock()
215207
defer gl.mu.Unlock()
216208
if startSeg > gl.curNum || startSeg < 0 {
217209
startSeg = 0
218210
}
219-
segs = make([]LogSegmentJSON, 0, gl.curNum-startSeg)
211+
segs = make([]maintner.LogSegmentJSON, 0, gl.curNum-startSeg)
220212
for i := startSeg; i < gl.curNum; i++ {
221213
seg := gl.seg[i]
222-
segs = append(segs, LogSegmentJSON{
214+
segs = append(segs, maintner.LogSegmentJSON{
223215
Number: i,
224216
Size: seg.size,
225217
SHA224: seg.sha224,
226218
URL: fmt.Sprintf("https://storage.googleapis.com/%s/%s", gl.bucketName, seg.ObjectName()),
227219
})
228220
}
229221
if gl.logBuf.Len() > 0 {
230-
segs = append(segs, LogSegmentJSON{
222+
segs = append(segs, maintner.LogSegmentJSON{
231223
Number: gl.curNum,
232224
Size: int64(gl.logBuf.Len()),
233225
SHA224: fmt.Sprintf("%x", gl.logSHA224.Sum(nil)),

maintner/maintnerd/maintnerd.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,12 @@ func main() {
106106
logger = maintner.NewDiskMutationLogger(*dataDir)
107107
}
108108

109-
corpus := maintner.NewCorpus(logger, *dataDir)
109+
corpus := new(maintner.Corpus)
110+
corpus.EnableLeaderMode(logger, *dataDir)
110111
if *debug {
111112
corpus.SetDebug()
112113
}
113-
corpus.Verbose = *verbose
114+
corpus.SetVerbose(*verbose)
114115
switch *config {
115116
case "":
116117
// Nothing

0 commit comments

Comments
 (0)