Skip to content

Commit 530a108

Browse files
authored
Merge pull request #34 from infosiftr/cache-data
Update registry cache to (ab)use `Data` field of `Descriptor` objects
2 parents d54fee8 + 46e854a commit 530a108

File tree

11 files changed

+728
-169
lines changed

11 files changed

+728
-169
lines changed

.test/lookup-test.json

Lines changed: 359 additions & 64 deletions
Large diffs are not rendered by default.

.test/test.sh

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,50 @@ lookup=(
4848
# tianon/test:index-no-platform-smaller - a "broken" index with *zero* platform objects in it (so every manifest requires a platform lookup)
4949
'tianon/test@sha256:347290ddd775c1b85a3e381b09edde95242478eb65153e9b17225356f4c072ac'
5050
# (doing these in the same run means the manifest from above should be cached and exercise more codepaths for better coverage)
51+
52+
--type manifest 'tianon/test@sha256:347290ddd775c1b85a3e381b09edde95242478eb65153e9b17225356f4c072ac' # same manifest again, but without SynthesizeIndex
53+
--type blob 'tianon/test@sha256:d2c94e258dcb3c5ac2798d32e1249e42ef01cba4841c2234249495f87264ac5a' # first config blob from the above
54+
# and again, but this time HEADs
55+
--head --type manifest 'tianon/test@sha256:347290ddd775c1b85a3e381b09edde95242478eb65153e9b17225356f4c072ac'
56+
--head --type blob 'tianon/test@sha256:d2c94e258dcb3c5ac2798d32e1249e42ef01cba4841c2234249495f87264ac5a'
57+
58+
# again with things that aren't cached yet (tianon/true:oci, specifically)
59+
--head --type blob 'tianon/true@sha256:25be82253336f0b8c4347bc4ecbbcdc85d0e0f118ccf8dc2e119c0a47a0a486e' # config blob
60+
--head --type manifest 'tianon/true:oci@sha256:9ef42f1d602fb423fad935aac1caa0cfdbce1ad7edce64d080a4eb7b13f7cd9d'
61+
--type blob 'tianon/true@sha256:25be82253336f0b8c4347bc4ecbbcdc85d0e0f118ccf8dc2e119c0a47a0a486e' # config blob
62+
--type manifest 'tianon/true:oci@sha256:9ef42f1d602fb423fad935aac1caa0cfdbce1ad7edce64d080a4eb7b13f7cd9d'
63+
'tianon/true:oci@sha256:9ef42f1d602fb423fad935aac1caa0cfdbce1ad7edce64d080a4eb7b13f7cd9d'
64+
65+
# tag lookup! (but with a hopefully stable example tag -- a build of notary:server)
66+
--head 'oisupport/staging-amd64:71756dd75e41c4bc5144b64d36b4834a5a960c495470915eb69f96e9f2cb6694'
67+
--head 'oisupport/staging-amd64:71756dd75e41c4bc5144b64d36b4834a5a960c495470915eb69f96e9f2cb6694' # twice, to exercise "tag is cached" case
68+
--type manifest 'oisupport/staging-amd64:71756dd75e41c4bc5144b64d36b4834a5a960c495470915eb69f96e9f2cb6694'
69+
'oisupport/staging-amd64:71756dd75e41c4bc5144b64d36b4834a5a960c495470915eb69f96e9f2cb6694'
70+
71+
# exercise 404 codepaths
72+
"tianon/this-is-a-repository-that-will-never-ever-exist-$RANDOM-$RANDOM:$RANDOM-$RANDOM"
73+
--head "tianon/this-is-a-repository-that-will-never-ever-exist-$RANDOM-$RANDOM:$RANDOM-$RANDOM"
74+
'tianon/test@sha256:0000000000000000000000000000000000000000000000000000000000000000'
5175
)
52-
"$dir/../bin/lookup" "${lookup[@]}" | jq -s > "$dir/lookup-test.json"
76+
"$dir/../bin/lookup" "${lookup[@]}" | jq -s '
77+
[
78+
reduce (
79+
$ARGS.positional[]
80+
| if startswith("tianon/this-is-a-repository-that-will-never-ever-exist-") then
81+
gsub("[0-9]+"; "$RANDOM")
82+
else . end
83+
) as $a ([];
84+
if .[-1][-1] == "--type" then
85+
.[-1][-1] += " " + $a
86+
elif length > 0 and (.[-1][-1] | startswith("--")) then
87+
.[-1] += [$a]
88+
else
89+
. += [[$a]]
90+
end
91+
),
92+
.
93+
] | transpose
94+
' --args -- "${lookup[@]}" > "$dir/lookup-test.json"
5395

5496
# don't leave around the "-cover" versions of these binaries
5597
rm -f "$dir/../bin/builds" "$dir/../bin/lookup"

cmd/lookup/main.go

Lines changed: 64 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package main
55
import (
66
"context"
77
"encoding/json"
8+
"io"
89
"os"
910
"os/signal"
1011

@@ -15,21 +16,79 @@ func main() {
1516
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt)
1617
defer stop()
1718

18-
for _, img := range os.Args[1:] {
19+
var (
20+
zeroOpts registry.LookupOptions
21+
opts = zeroOpts
22+
)
23+
24+
args := os.Args[1:]
25+
for len(args) > 0 {
26+
img := args[0]
27+
args = args[1:]
28+
switch img {
29+
case "--type":
30+
opts.Type = registry.LookupType(args[0])
31+
args = args[1:]
32+
continue
33+
case "--head":
34+
opts.Head = true
35+
continue
36+
}
37+
1938
ref, err := registry.ParseRef(img)
2039
if err != nil {
2140
panic(err)
2241
}
2342

24-
index, err := registry.SynthesizeIndex(ctx, ref)
25-
if err != nil {
26-
panic(err)
43+
var obj any
44+
if opts == zeroOpts {
45+
// if we have no explicit type and didn't request a HEAD, invoke SynthesizeIndex instead of Lookup
46+
obj, err = registry.SynthesizeIndex(ctx, ref)
47+
if err != nil {
48+
panic(err)
49+
}
50+
} else {
51+
r, err := registry.Lookup(ctx, ref, &opts)
52+
if err != nil {
53+
panic(err)
54+
}
55+
if r != nil {
56+
desc := r.Descriptor()
57+
if opts.Head {
58+
obj = desc
59+
} else {
60+
b, err := io.ReadAll(r)
61+
if err != nil {
62+
r.Close()
63+
panic(err)
64+
}
65+
if opts.Type == registry.LookupTypeManifest {
66+
// if it was a manifest lookup, cast the byte slice to json.RawMessage so we get the actual JSON (not base64)
67+
obj = json.RawMessage(b)
68+
} else {
69+
obj = b
70+
}
71+
}
72+
err = r.Close()
73+
if err != nil {
74+
panic(err)
75+
}
76+
} else {
77+
obj = nil
78+
}
2779
}
2880

2981
e := json.NewEncoder(os.Stdout)
3082
e.SetIndent("", "\t")
31-
if err := e.Encode(index); err != nil {
83+
if err := e.Encode(obj); err != nil {
3284
panic(err)
3385
}
86+
87+
// reset state
88+
opts = zeroOpts
89+
}
90+
91+
if opts != zeroOpts {
92+
panic("dangling --type, --head, etc (without a following reference for it to apply to)")
3493
}
3594
}

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,5 @@ require (
2020
google.golang.org/protobuf v1.28.1 // indirect
2121
)
2222

23-
// https://github.com/cue-labs/oci/pull/27
24-
replace cuelabs.dev/go/oci/ociregistry => github.com/tianon/cuelabs-oci/ociregistry v0.0.0-20240216044210-8aa0c990bd77
23+
// https://github.com/cue-labs/oci/pull/29
24+
replace cuelabs.dev/go/oci/ociregistry => github.com/tianon/cuelabs-oci/ociregistry v0.0.0-20240322151419-7d3242933116

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVs
3232
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
3333
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
3434
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
35-
github.com/tianon/cuelabs-oci/ociregistry v0.0.0-20240216044210-8aa0c990bd77 h1:9EPZm+sGlYHo6LleMXWR6s3P8SJEYA7/aovpJ76JSpw=
36-
github.com/tianon/cuelabs-oci/ociregistry v0.0.0-20240216044210-8aa0c990bd77/go.mod h1:ApHceQLLwcOkCEXM1+DyCXTHEJhNGDpJ2kmV6axsx24=
35+
github.com/tianon/cuelabs-oci/ociregistry v0.0.0-20240322151419-7d3242933116 h1:ZDy4uRAhzODJXRo4EoNpJTCiSeOs8wwrkfMJy3JyDps=
36+
github.com/tianon/cuelabs-oci/ociregistry v0.0.0-20240322151419-7d3242933116/go.mod h1:pK23AUVXuNzzTpfMCA06sxZGeVQ/75FdVtW249de9Uo=
3737
golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g=
3838
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
3939
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=

registry/cache.go

Lines changed: 99 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,7 @@ func RegistryCache(r ociregistry.Interface) ociregistry.Interface {
2121
registry: r, // TODO support "nil" here so this can be a poor-man's ocimem implementation? 👀 see also https://github.com/cue-labs/oci/issues/24
2222
has: map[string]bool{},
2323
tags: map[string]ociregistry.Digest{},
24-
types: map[ociregistry.Digest]string{},
25-
data: map[ociregistry.Digest][]byte{},
24+
data: map[ociregistry.Digest]ociregistry.Descriptor{},
2625
}
2726
}
2827

@@ -32,11 +31,10 @@ type registryCache struct {
3231
registry ociregistry.Interface
3332

3433
// https://github.com/cue-labs/oci/issues/24
35-
mu sync.Mutex // TODO some kind of per-object/name/digest mutex so we don't request the same object from the upstream registry concurrently (on *top* of our maps mutex)?
36-
has map[string]bool // "repo/name@digest" => true (whether a given repo has the given digest)
37-
tags map[string]ociregistry.Digest // "repo/name:tag" => digest
38-
types map[ociregistry.Digest]string // digest => "mediaType" (most recent *storing* / "cache-miss" lookup wins, in the case of upstream/cross-repo ambiguity)
39-
data map[ociregistry.Digest][]byte // digest => data
34+
mu sync.Mutex // TODO some kind of per-object/name/digest mutex so we don't request the same object from the upstream registry concurrently (on *top* of our maps mutex)?
35+
has map[string]bool // "repo/name@digest" => true (whether a given repo has the given digest)
36+
tags map[string]ociregistry.Digest // "repo/name:tag" => digest
37+
data map[ociregistry.Digest]ociregistry.Descriptor // digest => mediaType+size(+data) (most recent *storing* / "cache-miss" lookup wins, in the case of upstream/cross-repo ambiguity)
4038
}
4139

4240
func cacheKeyDigest(repo string, digest ociregistry.Digest) string {
@@ -52,41 +50,38 @@ func (rc *registryCache) getBlob(ctx context.Context, repo string, digest ocireg
5250
rc.mu.Lock()
5351
defer rc.mu.Unlock()
5452

55-
if b, ok := rc.data[digest]; ok && rc.has[cacheKeyDigest(repo, digest)] {
56-
return ocimem.NewBytesReader(b, ociregistry.Descriptor{
57-
MediaType: rc.types[digest],
58-
Digest: digest,
59-
Size: int64(len(b)),
60-
}), nil
53+
if desc, ok := rc.data[digest]; ok && desc.Data != nil && rc.has[cacheKeyDigest(repo, digest)] {
54+
return ocimem.NewBytesReader(desc.Data, desc), nil
6155
}
6256

6357
r, err := f(ctx, repo, digest)
6458
if err != nil {
6559
return nil, err
6660
}
67-
//defer r.Close()
61+
// defer r.Close() happens later when we know we aren't making Close the caller's responsibility
6862

6963
desc := r.Descriptor()
64+
digest = desc.Digest // if this isn't a no-op, we've got a naughty registry
7065

71-
rc.has[cacheKeyDigest(repo, desc.Digest)] = true
72-
rc.types[desc.Digest] = desc.MediaType
66+
rc.has[cacheKeyDigest(repo, digest)] = true
67+
68+
if desc.Size > manifestSizeLimit {
69+
rc.data[digest] = desc
70+
return r, nil
71+
}
72+
defer r.Close()
7373

74-
b, err := io.ReadAll(r)
74+
desc.Data, err = io.ReadAll(r)
7575
if err != nil {
76-
r.Close()
7776
return nil, err
7877
}
7978
if err := r.Close(); err != nil {
8079
return nil, err
8180
}
8281

83-
if len(b) <= manifestSizeLimit {
84-
rc.data[desc.Digest] = b
85-
} else {
86-
delete(rc.data, desc.Digest)
87-
}
82+
rc.data[digest] = desc
8883

89-
return ocimem.NewBytesReader(b, desc), nil
84+
return ocimem.NewBytesReader(desc.Data, desc), nil
9085
}
9186

9287
func (rc *registryCache) GetBlob(ctx context.Context, repo string, digest ociregistry.Digest) (ociregistry.BlobReader, error) {
@@ -104,43 +99,108 @@ func (rc *registryCache) GetTag(ctx context.Context, repo string, tag string) (o
10499
tagKey := cacheKeyTag(repo, tag)
105100

106101
if digest, ok := rc.tags[tagKey]; ok {
107-
if b, ok := rc.data[digest]; ok {
108-
return ocimem.NewBytesReader(b, ociregistry.Descriptor{
109-
MediaType: rc.types[digest],
110-
Digest: digest,
111-
Size: int64(len(b)),
112-
}), nil
102+
if desc, ok := rc.data[digest]; ok && desc.Data != nil {
103+
return ocimem.NewBytesReader(desc.Data, desc), nil
113104
}
114105
}
115106

116107
r, err := rc.registry.GetTag(ctx, repo, tag)
117108
if err != nil {
118109
return nil, err
119110
}
120-
//defer r.Close()
111+
// defer r.Close() happens later when we know we aren't making Close the caller's responsibility
121112

122113
desc := r.Descriptor()
123114

124115
rc.has[cacheKeyDigest(repo, desc.Digest)] = true
125116
rc.tags[tagKey] = desc.Digest
126-
rc.types[desc.Digest] = desc.MediaType
127117

128-
b, err := io.ReadAll(r)
118+
if desc.Size > manifestSizeLimit {
119+
rc.data[desc.Digest] = desc
120+
return r, nil
121+
}
122+
defer r.Close()
123+
124+
desc.Data, err = io.ReadAll(r)
129125
if err != nil {
130-
r.Close()
131126
return nil, err
132127
}
133128
if err := r.Close(); err != nil {
134129
return nil, err
135130
}
136131

137-
if len(b) <= manifestSizeLimit {
138-
rc.data[desc.Digest] = b
139-
} else {
140-
delete(rc.data, desc.Digest)
132+
rc.data[desc.Digest] = desc
133+
134+
return ocimem.NewBytesReader(desc.Data, desc), nil
135+
}
136+
137+
func (rc *registryCache) resolveBlob(ctx context.Context, repo string, digest ociregistry.Digest, f func(ctx context.Context, repo string, digest ociregistry.Digest) (ociregistry.Descriptor, error)) (ociregistry.Descriptor, error) {
138+
rc.mu.Lock()
139+
defer rc.mu.Unlock()
140+
141+
if desc, ok := rc.data[digest]; ok && rc.has[cacheKeyDigest(repo, digest)] {
142+
return desc, nil
143+
}
144+
145+
desc, err := f(ctx, repo, digest)
146+
if err != nil {
147+
return desc, err
148+
}
149+
150+
digest = desc.Digest // if this isn't a no-op, we've got a naughty registry
151+
152+
rc.has[cacheKeyDigest(repo, digest)] = true
153+
154+
// carefully copy only valid Resolve* fields such that any other existing fields are kept (this matters more if we ever make our mutexes better/less aggressive 👀)
155+
if d, ok := rc.data[digest]; ok {
156+
d.MediaType = desc.MediaType
157+
d.Digest = desc.Digest
158+
d.Size = desc.Size
159+
desc = d
160+
}
161+
rc.data[digest] = desc
162+
163+
return desc, nil
164+
}
165+
166+
func (rc *registryCache) ResolveManifest(ctx context.Context, repo string, digest ociregistry.Digest) (ociregistry.Descriptor, error) {
167+
return rc.resolveBlob(ctx, repo, digest, rc.registry.ResolveManifest)
168+
}
169+
170+
func (rc *registryCache) ResolveBlob(ctx context.Context, repo string, digest ociregistry.Digest) (ociregistry.Descriptor, error) {
171+
return rc.resolveBlob(ctx, repo, digest, rc.registry.ResolveBlob)
172+
}
173+
174+
func (rc *registryCache) ResolveTag(ctx context.Context, repo string, tag string) (ociregistry.Descriptor, error) {
175+
rc.mu.Lock()
176+
defer rc.mu.Unlock()
177+
178+
tagKey := cacheKeyTag(repo, tag)
179+
180+
if digest, ok := rc.tags[tagKey]; ok {
181+
if desc, ok := rc.data[digest]; ok {
182+
return desc, nil
183+
}
184+
}
185+
186+
desc, err := rc.registry.ResolveTag(ctx, repo, tag)
187+
if err != nil {
188+
return desc, err
189+
}
190+
191+
rc.has[cacheKeyDigest(repo, desc.Digest)] = true
192+
rc.tags[tagKey] = desc.Digest
193+
194+
// carefully copy only valid Resolve* fields such that any other existing fields are kept (this matters more if we ever make our mutexes better/less aggressive 👀)
195+
if d, ok := rc.data[desc.Digest]; ok {
196+
d.MediaType = desc.MediaType
197+
d.Digest = desc.Digest
198+
d.Size = desc.Size
199+
desc = d
141200
}
201+
rc.data[desc.Digest] = desc
142202

143-
return ocimem.NewBytesReader(b, desc), nil
203+
return desc, nil
144204
}
145205

146206
// TODO more methods (currently only implements what's actually necessary for SynthesizeIndex)

0 commit comments

Comments
 (0)