Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 104 additions & 1 deletion cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,49 @@ type ActionID [HashSize]byte
// An OutputID is a cache output key, the hash of an output of a computation.
type OutputID [HashSize]byte

// Cacher is the interface as used by the cmd/go.
// NOTE: name is changed from upstream's Cache to avoid breaking changes.
type Cacher interface {
// Get returns the cache entry for the provided ActionID.
// On miss, the error type should be of type *entryNotFoundError.
//
// After a successful call to Get, OutputFile(Entry.OutputID) must
// exist on disk until Close is called (at the end of the process).
Get(ActionID) (Entry, error)

// Put adds an item to the cache.
//
// The seeker is only used to seek to the beginning. After a call to Put,
// the seek position is not guaranteed to be in any particular state.
//
// As a special case, if the ReadSeeker is of type noVerifyReadSeeker,
// the verification from GODEBUG=goverifycache=1 is skipped.
//
// After a successful call to Put, OutputFile(OutputID) must
// exist on disk until Close is called (at the end of the process).
Put(ActionID, io.ReadSeeker) (_ OutputID, size int64, _ error)

// Close is called at the end of the go process. Implementations can do
// cache cleanup work at this phase, or wait for and report any errors from
// background cleanup work started earlier. Any cache trimming in one
// process should not cause the invariants of this interface to be
// violated in another process. Namely, a cache trim from one process should
// not delete an OutputID from disk that was recently Get or Put from
// another process. As a rule of thumb, don't trim things used in the last
// day.
Close() error

// OutputFile returns the path on disk where OutputID is stored.
//
// It's only called after a successful get or put call so it doesn't need
// to return an error; it's assumed that if the previous get or put succeeded,
// it's already on disk.
OutputFile(OutputID) string

// FuzzDir returns where fuzz files are stored.
FuzzDir() string
}

// A Cache is a package cache, backed by a file system directory tree.
type Cache struct {
dir string
Expand Down Expand Up @@ -266,6 +309,39 @@ func (c *Cache) GetMmap(id ActionID) ([]byte, Entry, error) {
}
*/

// GetFile looks up the action ID in the cache and returns
// the name of the corresponding data file.
func GetFile(c Cacher, id ActionID) (file string, entry Entry, err error) {
entry, err = c.Get(id)
if err != nil {
return "", Entry{}, err
}
file = c.OutputFile(entry.OutputID)
info, err := os.Stat(file)
if err != nil {
return "", Entry{}, &entryNotFoundError{Err: err}
}
if info.Size() != entry.Size {
return "", Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")}
}
return file, entry, nil
}

// GetBytes looks up the action ID in the cache and returns
// the corresponding output bytes.
// GetBytes should only be used for data that can be expected to fit in memory.
func GetBytes(c Cacher, id ActionID) ([]byte, Entry, error) {
entry, err := c.Get(id)
if err != nil {
return nil, entry, err
}
data, _ := os.ReadFile(c.OutputFile(entry.OutputID))
if sha256.Sum256(data) != entry.OutputID {
return nil, entry, &entryNotFoundError{Err: errors.New("bad checksum")}
}
return data, entry, nil
}

// OutputFile returns the name of the cache file storing output with the given OutputID.
func (c *Cache) OutputFile(out OutputID) string {
file := c.fileName(out, "d")
Expand Down Expand Up @@ -308,6 +384,8 @@ func (c *Cache) used(file string) {
os.Chtimes(file, c.now(), c.now())
}

func (c *Cache) Close() error { return c.Trim() }

// Trim removes old cache entries that are likely not to be reused.
func (c *Cache) Trim() error {
now := c.now()
Expand Down Expand Up @@ -431,10 +509,21 @@ func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify
return nil
}

// noVerifyReadSeeker is an io.ReadSeeker wrapper sentinel type
// that says that Cache.Put should skip the verify check
// (from GODEBUG=goverifycache=1).
type noVerifyReadSeeker struct {
io.ReadSeeker
}

// Put stores the given output in the cache as the output for the action ID.
// It may read file twice. The content of file must not change between the two passes.
func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
return c.put(id, file, true)
wrapper, isNoVerify := file.(noVerifyReadSeeker)
if isNoVerify {
file = wrapper.ReadSeeker
}
return c.put(id, file, !isNoVerify)
}

// PutNoVerify is like Put but disables the verify check
Expand All @@ -445,6 +534,14 @@ func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, e
return c.put(id, file, false)
}

// PutNoVerify is like Put but disables the verify check
// when GODEBUG=goverifycache=1 is set.
// It is meant for data that is OK to cache but that we expect to vary slightly from run to run,
// like test output containing times and the like.
func PutNoVerify(c Cacher, id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
return c.Put(id, noVerifyReadSeeker{file})
}

func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) {
// Compute output ID.
h := sha256.New()
Expand Down Expand Up @@ -473,6 +570,12 @@ func (c *Cache) PutBytes(id ActionID, data []byte) error {
return err
}

// PutBytes stores the given bytes in the cache as the output for the action ID.
func PutBytes(c Cacher, id ActionID, data []byte) error {
_, _, err := c.Put(id, bytes.NewReader(data))
return err
}

// copyFile copies file into the cache, expecting it to have the given
// output ID and size, if that file is not present already.
func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
Expand Down
126 changes: 126 additions & 0 deletions cache/cacheprog/cacheprog.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package cacheprog defines the protocol for a GOCACHEPROG program.
//
// By default, the go command manages a build cache stored in the file system
// itself. GOCACHEPROG can be set to the name of a command (with optional
// space-separated flags) that implements the go command build cache externally.
// This permits defining a different cache policy.
//
// The go command will start the GOCACHEPROG as a subprocess and communicate
// with it via JSON messages over stdin/stdout. The subprocess's stderr will be
// connected to the go command's stderr.
//
// The subprocess should immediately send a [Response] with its capabilities.
// After that, the go command will send a stream of [Request] messages and the
// subprocess should reply to each [Request] with a [Response] message.
package cacheprog

import (
"io"
"time"
)

// Cmd is a command that can be issued to a child process.
//
// If the interface needs to grow, the go command can add new commands or new
// versioned commands like "get2" in the future. The initial [Response] from
// the child process indicates which commands it supports.
type Cmd string

const (
// CmdPut tells the cache program to store an object in the cache.
//
// [Request.ActionID] is the cache key of this object. The cache should
// store [Request.OutputID] and [Request.Body] under this key for a
// later "get" request. It must also store the Body in a file in the local
// file system and return the path to that file in [Response.DiskPath],
// which must exist at least until a "close" request.
CmdPut = Cmd("put")

// CmdGet tells the cache program to retrieve an object from the cache.
//
// [Request.ActionID] specifies the key of the object to get. If the
// cache does not contain this object, it should set [Response.Miss] to
// true. Otherwise, it should populate the fields of [Response],
// including setting [Response.OutputID] to the OutputID of the original
// "put" request and [Response.DiskPath] to the path of a local file
// containing the Body of the original "put" request. That file must
// continue to exist at least until a "close" request.
CmdGet = Cmd("get")

// CmdClose requests that the cache program exit gracefully.
//
// The cache program should reply to this request and then exit
// (thus closing its stdout).
CmdClose = Cmd("close")
)

// Request is the JSON-encoded message that's sent from the go command to
// the GOCACHEPROG child process over stdin. Each JSON object is on its own
// line. A ProgRequest of Type "put" with BodySize > 0 will be followed by a
// line containing a base64-encoded JSON string literal of the body.
type Request struct {
// ID is a unique number per process across all requests.
// It must be echoed in the Response from the child.
ID int64

// Command is the type of request.
// The go command will only send commands that were declared
// as supported by the child.
Command Cmd

// ActionID is the cache key for "put" and "get" requests.
ActionID []byte `json:",omitempty"` // or nil if not used

// OutputID is stored with the body for "put" requests.
OutputID []byte `json:",omitempty"` // or nil if not used

// Body is the body for "put" requests. It's sent after the JSON object
// as a base64-encoded JSON string when BodySize is non-zero.
// It's sent as a separate JSON value instead of being a struct field
// send in this JSON object so large values can be streamed in both directions.
// The base64 string body of a Request will always be written
// immediately after the JSON object and a newline.
Body io.Reader `json:"-"`

// BodySize is the number of bytes of Body. If zero, the body isn't written.
BodySize int64 `json:",omitempty"`
}

// Response is the JSON response from the child process to the go command.
//
// With the exception of the first protocol message that the child writes to its
// stdout with ID==0 and KnownCommands populated, these are only sent in
// response to a Request from the go command.
//
// Responses can be sent in any order. The ID must match the request they're
// replying to.
type Response struct {
ID int64 // that corresponds to Request; they can be answered out of order
Err string `json:",omitempty"` // if non-empty, the error

// KnownCommands is included in the first message that cache helper program
// writes to stdout on startup (with ID==0). It includes the
// Request.Command types that are supported by the program.
//
// This lets the go command extend the protocol gracefully over time (adding
// "get2", etc), or fail gracefully when needed. It also lets the go command
// verify the program wants to be a cache helper.
KnownCommands []Cmd `json:",omitempty"`

// For "get" requests.

Miss bool `json:",omitempty"` // cache miss
OutputID []byte `json:",omitempty"` // the OutputID stored with the body
Size int64 `json:",omitempty"` // body size in bytes
Time *time.Time `json:",omitempty"` // when the object was put in the cache (optional; used for cache expiration)

// For "get" and "put" requests.

// DiskPath is the absolute path on disk of the body corresponding to a
// "get" (on cache hit) or "put" request's ActionID.
DiskPath string `json:",omitempty"`
}
23 changes: 13 additions & 10 deletions cache/default.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,11 @@ import (

// Default returns the default cache to use.
// It never returns nil.
func Default() *Cache {
defaultOnce.Do(initDefaultCache)
return defaultCache
func Default() Cacher {
return initDefaultCacheOnce()
}

var (
defaultOnce sync.Once
defaultCache *Cache
)
var initDefaultCacheOnce = sync.OnceValue(initDefaultCache)

// cacheREADME is a message stored in a README in the cache directory.
// Because the cache lives outside the normal Go trees, we leave the
Expand All @@ -35,7 +31,7 @@ See golang.org to learn more about Go.

// initDefaultCache does the work of finding the default cache
// the first time Default is called.
func initDefaultCache() {
func initDefaultCache() Cacher {
dir := DefaultDir()
if dir == "off" {
if defaultDirErr != nil {
Expand All @@ -51,11 +47,18 @@ func initDefaultCache() {
os.WriteFile(filepath.Join(dir, "README"), []byte(cacheREADME), 0666)
}

c, err := Open(dir)
diskCache, err := Open(dir)
if err != nil {
log.Fatalf("failed to initialize build cache at %s: %s\n", dir, err)
}
defaultCache = c

// NOTE: changed from upstream's cfg.Getenv, so it will ignore "go env -w".
// Consider calling "go env" or copying the cfg package instead.
if cacheprog := os.Getenv("GOCACHEPROG"); cacheprog != "" {
return startCacheProg(cacheprog, diskCache)
}

return diskCache
}

var (
Expand Down
Loading