Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

Commit 8f52c50

Browse files
filipnavaramcuadros
authored andcommitted
plumbing: format/packfile, performance optimizations for reading large commit histories (#963)
Signed-off-by: Filip Navara <[email protected]>
1 parent 3dbfb89 commit 8f52c50

File tree

7 files changed

+126
-61
lines changed

7 files changed

+126
-61
lines changed

plumbing/format/packfile/fsobject.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ func NewFSObject(
4848
// Reader implements the plumbing.EncodedObject interface.
4949
func (o *FSObject) Reader() (io.ReadCloser, error) {
5050
obj, ok := o.cache.Get(o.hash)
51-
if ok {
51+
if ok && obj != o {
5252
reader, err := obj.Reader()
5353
if err != nil {
5454
return nil, err

plumbing/format/packfile/packfile.go

+28-23
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,16 @@ var (
2121
ErrZLib = NewError("zlib reading error")
2222
)
2323

24+
// When reading small objects from packfile it is beneficial to do so at
25+
// once to exploit the buffered I/O. In many cases the objects are so small
26+
// that they were already loaded to memory when the object header was
27+
// loaded from the packfile. Wrapping in FSObject would cause this buffered
28+
// data to be thrown away and then re-read later, with the additional
29+
// seeking causing reloads from disk. Objects smaller than this threshold
30+
// are now always read into memory and stored in cache instead of being
31+
// wrapped in FSObject.
32+
const smallObjectThreshold = 16 * 1024
33+
2434
// Packfile allows retrieving information from inside a packfile.
2535
type Packfile struct {
2636
idxfile.Index
@@ -79,15 +89,7 @@ func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) {
7989
}
8090
}
8191

82-
if _, err := p.s.SeekFromStart(o); err != nil {
83-
if err == io.EOF || isInvalid(err) {
84-
return nil, plumbing.ErrObjectNotFound
85-
}
86-
87-
return nil, err
88-
}
89-
90-
return p.nextObject()
92+
return p.objectAtOffset(o)
9193
}
9294

9395
// GetSizeByOffset retrieves the size of the encoded object from the
@@ -108,6 +110,12 @@ func (p *Packfile) GetSizeByOffset(o int64) (size int64, err error) {
108110
return h.Length, nil
109111
}
110112

113+
func (p *Packfile) objectHeaderAtOffset(offset int64) (*ObjectHeader, error) {
114+
h, err := p.s.SeekObjectHeader(offset)
115+
p.s.pendingObject = nil
116+
return h, err
117+
}
118+
111119
func (p *Packfile) nextObjectHeader() (*ObjectHeader, error) {
112120
h, err := p.s.NextObjectHeader()
113121
p.s.pendingObject = nil
@@ -154,11 +162,7 @@ func (p *Packfile) getObjectType(h *ObjectHeader) (typ plumbing.ObjectType, err
154162
if baseType, ok := p.offsetToType[offset]; ok {
155163
typ = baseType
156164
} else {
157-
if _, err = p.s.SeekFromStart(offset); err != nil {
158-
return
159-
}
160-
161-
h, err = p.nextObjectHeader()
165+
h, err = p.objectHeaderAtOffset(offset)
162166
if err != nil {
163167
return
164168
}
@@ -175,8 +179,8 @@ func (p *Packfile) getObjectType(h *ObjectHeader) (typ plumbing.ObjectType, err
175179
return
176180
}
177181

178-
func (p *Packfile) nextObject() (plumbing.EncodedObject, error) {
179-
h, err := p.nextObjectHeader()
182+
func (p *Packfile) objectAtOffset(offset int64) (plumbing.EncodedObject, error) {
183+
h, err := p.objectHeaderAtOffset(offset)
180184
if err != nil {
181185
if err == io.EOF || isInvalid(err) {
182186
return nil, plumbing.ErrObjectNotFound
@@ -190,6 +194,13 @@ func (p *Packfile) nextObject() (plumbing.EncodedObject, error) {
190194
return p.getNextObject(h)
191195
}
192196

197+
// If the object is not a delta and it's small enough then read it
198+
// completely into memory now since it is already read from disk
199+
// into buffer anyway.
200+
if h.Length <= smallObjectThreshold && h.Type != plumbing.OFSDeltaObject && h.Type != plumbing.REFDeltaObject {
201+
return p.getNextObject(h)
202+
}
203+
193204
hash, err := p.FindHash(h.Offset)
194205
if err != nil {
195206
return nil, err
@@ -233,11 +244,7 @@ func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) {
233244
}
234245
}
235246

236-
if _, err := p.s.SeekFromStart(offset); err != nil {
237-
return nil, err
238-
}
239-
240-
h, err := p.nextObjectHeader()
247+
h, err := p.objectHeaderAtOffset(offset)
241248
if err != nil {
242249
return nil, err
243250
}
@@ -329,8 +336,6 @@ func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset
329336
if err != nil {
330337
return err
331338
}
332-
333-
p.cachePut(base)
334339
}
335340

336341
obj.SetType(base.Type())

plumbing/format/packfile/parser.go

+1-5
Original file line numberDiff line numberDiff line change
@@ -398,11 +398,7 @@ func (p *Parser) readData(o *objectInfo) ([]byte, error) {
398398
return data, nil
399399
}
400400

401-
if _, err := p.scanner.SeekFromStart(o.Offset); err != nil {
402-
return nil, err
403-
}
404-
405-
if _, err := p.scanner.NextObjectHeader(); err != nil {
401+
if _, err := p.scanner.SeekObjectHeader(o.Offset); err != nil {
406402
return nil, err
407403
}
408404

plumbing/format/packfile/scanner.go

+42-4
Original file line numberDiff line numberDiff line change
@@ -138,14 +138,52 @@ func (s *Scanner) readCount() (uint32, error) {
138138
return binary.ReadUint32(s.r)
139139
}
140140

141+
// SeekObjectHeader seeks to specified offset and returns the ObjectHeader
142+
// for the next object in the reader
143+
func (s *Scanner) SeekObjectHeader(offset int64) (*ObjectHeader, error) {
144+
// if seeking we assume that you are not interested in the header
145+
if s.version == 0 {
146+
s.version = VersionSupported
147+
}
148+
149+
if _, err := s.r.Seek(offset, io.SeekStart); err != nil {
150+
return nil, err
151+
}
152+
153+
h, err := s.nextObjectHeader()
154+
if err != nil {
155+
return nil, err
156+
}
157+
158+
h.Offset = offset
159+
return h, nil
160+
}
161+
141162
// NextObjectHeader returns the ObjectHeader for the next object in the reader
142163
func (s *Scanner) NextObjectHeader() (*ObjectHeader, error) {
143-
defer s.Flush()
144-
145164
if err := s.doPending(); err != nil {
146165
return nil, err
147166
}
148167

168+
offset, err := s.r.Seek(0, io.SeekCurrent)
169+
if err != nil {
170+
return nil, err
171+
}
172+
173+
h, err := s.nextObjectHeader()
174+
if err != nil {
175+
return nil, err
176+
}
177+
178+
h.Offset = offset
179+
return h, nil
180+
}
181+
182+
// nextObjectHeader returns the ObjectHeader for the next object in the reader
183+
// without the Offset field
184+
func (s *Scanner) nextObjectHeader() (*ObjectHeader, error) {
185+
defer s.Flush()
186+
149187
s.crc.Reset()
150188

151189
h := &ObjectHeader{}
@@ -308,7 +346,7 @@ var byteSlicePool = sync.Pool{
308346
// SeekFromStart sets a new offset from start, returns the old position before
309347
// the change.
310348
func (s *Scanner) SeekFromStart(offset int64) (previous int64, err error) {
311-
// if seeking we assume that you are not interested on the header
349+
// if seeking we assume that you are not interested in the header
312350
if s.version == 0 {
313351
s.version = VersionSupported
314352
}
@@ -385,7 +423,7 @@ type bufferedSeeker struct {
385423
}
386424

387425
func (r *bufferedSeeker) Seek(offset int64, whence int) (int64, error) {
388-
if whence == io.SeekCurrent {
426+
if whence == io.SeekCurrent && offset == 0 {
389427
current, err := r.r.Seek(offset, whence)
390428
if err != nil {
391429
return current, err

plumbing/format/packfile/scanner_test.go

+17
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,23 @@ func (s *ScannerSuite) TestNextObjectHeaderWithOutReadObjectNonSeekable(c *C) {
118118
c.Assert(n, Equals, f.PackfileHash)
119119
}
120120

121+
func (s *ScannerSuite) TestSeekObjectHeader(c *C) {
122+
r := fixtures.Basic().One().Packfile()
123+
p := NewScanner(r)
124+
125+
h, err := p.SeekObjectHeader(expectedHeadersOFS[4].Offset)
126+
c.Assert(err, IsNil)
127+
c.Assert(h, DeepEquals, &expectedHeadersOFS[4])
128+
}
129+
130+
func (s *ScannerSuite) TestSeekObjectHeaderNonSeekable(c *C) {
131+
r := io.MultiReader(fixtures.Basic().One().Packfile())
132+
p := NewScanner(r)
133+
134+
_, err := p.SeekObjectHeader(expectedHeadersOFS[4].Offset)
135+
c.Assert(err, Equals, ErrSeekNotSupported)
136+
}
137+
121138
var expectedHeadersOFS = []ObjectHeader{
122139
{Type: plumbing.CommitObject, Offset: 12, Length: 254},
123140
{Type: plumbing.OFSDeltaObject, Offset: 186, Length: 93, OffsetReference: 12},

storage/filesystem/object.go

+36-23
Original file line numberDiff line numberDiff line change
@@ -20,24 +20,25 @@ import (
2020
type ObjectStorage struct {
2121
options Options
2222

23-
// deltaBaseCache is an object cache uses to cache delta's bases when
24-
deltaBaseCache cache.Object
23+
// objectCache is an object cache uses to cache delta's bases and also recently
24+
// loaded loose objects
25+
objectCache cache.Object
2526

2627
dir *dotgit.DotGit
2728
index map[plumbing.Hash]idxfile.Index
2829
}
2930

3031
// NewObjectStorage creates a new ObjectStorage with the given .git directory and cache.
31-
func NewObjectStorage(dir *dotgit.DotGit, cache cache.Object) *ObjectStorage {
32-
return NewObjectStorageWithOptions(dir, cache, Options{})
32+
func NewObjectStorage(dir *dotgit.DotGit, objectCache cache.Object) *ObjectStorage {
33+
return NewObjectStorageWithOptions(dir, objectCache, Options{})
3334
}
3435

3536
// NewObjectStorageWithOptions creates a new ObjectStorage with the given .git directory, cache and extra options
36-
func NewObjectStorageWithOptions(dir *dotgit.DotGit, cache cache.Object, ops Options) *ObjectStorage {
37+
func NewObjectStorageWithOptions(dir *dotgit.DotGit, objectCache cache.Object, ops Options) *ObjectStorage {
3738
return &ObjectStorage{
38-
options: ops,
39-
deltaBaseCache: cache,
40-
dir: dir,
39+
options: ops,
40+
objectCache: objectCache,
41+
dir: dir,
4142
}
4243
}
4344

@@ -206,7 +207,7 @@ func (s *ObjectStorage) encodedObjectSizeFromPackfile(h plumbing.Hash) (
206207
idx := s.index[pack]
207208
hash, err := idx.FindHash(offset)
208209
if err == nil {
209-
obj, ok := s.deltaBaseCache.Get(hash)
210+
obj, ok := s.objectCache.Get(hash)
210211
if ok {
211212
return obj.Size(), nil
212213
}
@@ -215,8 +216,8 @@ func (s *ObjectStorage) encodedObjectSizeFromPackfile(h plumbing.Hash) (
215216
}
216217

217218
var p *packfile.Packfile
218-
if s.deltaBaseCache != nil {
219-
p = packfile.NewPackfileWithCache(idx, s.dir.Fs(), f, s.deltaBaseCache)
219+
if s.objectCache != nil {
220+
p = packfile.NewPackfileWithCache(idx, s.dir.Fs(), f, s.objectCache)
220221
} else {
221222
p = packfile.NewPackfile(idx, s.dir.Fs(), f)
222223
}
@@ -241,9 +242,19 @@ func (s *ObjectStorage) EncodedObjectSize(h plumbing.Hash) (
241242
// EncodedObject returns the object with the given hash, by searching for it in
242243
// the packfile and the git object directories.
243244
func (s *ObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (plumbing.EncodedObject, error) {
244-
obj, err := s.getFromUnpacked(h)
245-
if err == plumbing.ErrObjectNotFound {
245+
var obj plumbing.EncodedObject
246+
var err error
247+
248+
if s.index != nil {
246249
obj, err = s.getFromPackfile(h, false)
250+
if err == plumbing.ErrObjectNotFound {
251+
obj, err = s.getFromUnpacked(h)
252+
}
253+
} else {
254+
obj, err = s.getFromUnpacked(h)
255+
if err == plumbing.ErrObjectNotFound {
256+
obj, err = s.getFromPackfile(h, false)
257+
}
247258
}
248259

249260
// If the error is still object not found, check if it's a shared object
@@ -254,7 +265,7 @@ func (s *ObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (p
254265
// Create a new object storage with the DotGit(s) and check for the
255266
// required hash object. Skip when not found.
256267
for _, dg := range dotgits {
257-
o := NewObjectStorage(dg, s.deltaBaseCache)
268+
o := NewObjectStorage(dg, s.objectCache)
258269
enobj, enerr := o.EncodedObject(t, h)
259270
if enerr != nil {
260271
continue
@@ -296,6 +307,10 @@ func (s *ObjectStorage) DeltaObject(t plumbing.ObjectType,
296307
}
297308

298309
func (s *ObjectStorage) getFromUnpacked(h plumbing.Hash) (obj plumbing.EncodedObject, err error) {
310+
if cacheObj, found := s.objectCache.Get(h); found {
311+
return cacheObj, nil
312+
}
313+
299314
f, err := s.dir.Object(h)
300315
if err != nil {
301316
if os.IsNotExist(err) {
@@ -327,6 +342,8 @@ func (s *ObjectStorage) getFromUnpacked(h plumbing.Hash) (obj plumbing.EncodedOb
327342
return nil, err
328343
}
329344

345+
s.objectCache.Put(obj)
346+
330347
_, err = io.Copy(w, r)
331348
return obj, err
332349
}
@@ -369,7 +386,7 @@ func (s *ObjectStorage) decodeObjectAt(
369386
) (plumbing.EncodedObject, error) {
370387
hash, err := idx.FindHash(offset)
371388
if err == nil {
372-
obj, ok := s.deltaBaseCache.Get(hash)
389+
obj, ok := s.objectCache.Get(hash)
373390
if ok {
374391
return obj, nil
375392
}
@@ -380,8 +397,8 @@ func (s *ObjectStorage) decodeObjectAt(
380397
}
381398

382399
var p *packfile.Packfile
383-
if s.deltaBaseCache != nil {
384-
p = packfile.NewPackfileWithCache(idx, s.dir.Fs(), f, s.deltaBaseCache)
400+
if s.objectCache != nil {
401+
p = packfile.NewPackfileWithCache(idx, s.dir.Fs(), f, s.objectCache)
385402
} else {
386403
p = packfile.NewPackfile(idx, s.dir.Fs(), f)
387404
}
@@ -400,11 +417,7 @@ func (s *ObjectStorage) decodeDeltaObjectAt(
400417
}
401418

402419
p := packfile.NewScanner(f)
403-
if _, err := p.SeekFromStart(offset); err != nil {
404-
return nil, err
405-
}
406-
407-
header, err := p.NextObjectHeader()
420+
header, err := p.SeekObjectHeader(offset)
408421
if err != nil {
409422
return nil, err
410423
}
@@ -495,7 +508,7 @@ func (s *ObjectStorage) buildPackfileIters(
495508
}
496509
return newPackfileIter(
497510
s.dir.Fs(), pack, t, seen, s.index[h],
498-
s.deltaBaseCache, s.options.KeepDescriptors,
511+
s.objectCache, s.options.KeepDescriptors,
499512
)
500513
},
501514
}, nil

storage/filesystem/storage.go

+1-5
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,7 @@ func NewStorageWithOptions(fs billy.Filesystem, cache cache.Object, ops Options)
5151
fs: fs,
5252
dir: dir,
5353

54-
ObjectStorage: ObjectStorage{
55-
options: ops,
56-
deltaBaseCache: cache,
57-
dir: dir,
58-
},
54+
ObjectStorage: *NewObjectStorageWithOptions(dir, cache, ops),
5955
ReferenceStorage: ReferenceStorage{dir: dir},
6056
IndexStorage: IndexStorage{dir: dir},
6157
ShallowStorage: ShallowStorage{dir: dir},

0 commit comments

Comments
 (0)