|
9 | 9 | "context"
|
10 | 10 | "crypto/sha256"
|
11 | 11 | "fmt"
|
| 12 | + "io/fs" |
12 | 13 | "io/ioutil"
|
13 | 14 | "os"
|
14 | 15 | "path/filepath"
|
@@ -103,49 +104,27 @@ type Workdir struct {
|
103 | 104 | files map[string]fileID
|
104 | 105 | }
|
105 | 106 |
|
106 |
| -// fileID is a file identity for the purposes of detecting on-disk |
107 |
| -// modifications. |
108 |
| -type fileID struct { |
109 |
| - hash string |
110 |
| - mtime time.Time |
111 |
| -} |
112 |
| - |
113 | 107 | // NewWorkdir writes the txtar-encoded file data in txt to dir, and returns a
|
114 | 108 | // Workir for operating on these files using
|
115 |
| -func NewWorkdir(dir string) *Workdir { |
116 |
| - return &Workdir{RelativeTo: RelativeTo(dir)} |
117 |
| -} |
118 |
| - |
119 |
| -func hashFile(data []byte) string { |
120 |
| - return fmt.Sprintf("%x", sha256.Sum256(data)) |
121 |
| -} |
122 |
| - |
123 |
| -func (w *Workdir) writeInitialFiles(files map[string][]byte) error { |
124 |
| - w.files = map[string]fileID{} |
| 109 | +func NewWorkdir(dir string, files map[string][]byte) (*Workdir, error) { |
| 110 | + w := &Workdir{RelativeTo: RelativeTo(dir)} |
125 | 111 | for name, data := range files {
|
126 | 112 | if err := writeFileData(name, data, w.RelativeTo); err != nil {
|
127 |
| - return fmt.Errorf("writing to workdir: %w", err) |
| 113 | + return nil, fmt.Errorf("writing to workdir: %w", err) |
128 | 114 | }
|
129 |
| - fp := w.AbsPath(name) |
| 115 | + } |
| 116 | + _, err := w.pollFiles() // poll files to populate the files map. |
| 117 | + return w, err |
| 118 | +} |
130 | 119 |
|
131 |
| - // We need the mtime of the file just written for the purposes of tracking |
132 |
| - // file identity. Calling Stat here could theoretically return an mtime |
133 |
| - // that is inconsistent with the file contents represented by the hash, but |
134 |
| - // since we "own" this file we assume that the mtime is correct. |
135 |
| - // |
136 |
| - // Furthermore, see the documentation for Workdir.files for why mismatches |
137 |
| - // between identifiers are considered to be benign. |
138 |
| - fi, err := os.Stat(fp) |
139 |
| - if err != nil { |
140 |
| - return fmt.Errorf("reading file info: %v", err) |
141 |
| - } |
| 120 | +// fileID identifies a file version on disk. |
| 121 | +type fileID struct { |
| 122 | + mtime time.Time |
| 123 | + hash string // empty if mtime is old enough to be reliabe; otherwise a file digest |
| 124 | +} |
142 | 125 |
|
143 |
| - w.files[name] = fileID{ |
144 |
| - hash: hashFile(data), |
145 |
| - mtime: fi.ModTime(), |
146 |
| - } |
147 |
| - } |
148 |
| - return nil |
| 126 | +func hashFile(data []byte) string { |
| 127 | + return fmt.Sprintf("%x", sha256.Sum256(data)) |
149 | 128 | }
|
150 | 129 |
|
151 | 130 | // RootURI returns the root URI for this working directory of this scratch
|
@@ -335,49 +314,21 @@ func (w *Workdir) RenameFile(ctx context.Context, oldPath, newPath string) error
|
335 | 314 | // ListFiles returns a new sorted list of the relative paths of files in dir,
|
336 | 315 | // recursively.
|
337 | 316 | func (w *Workdir) ListFiles(dir string) ([]string, error) {
|
338 |
| - m, err := w.listFiles(dir) |
339 |
| - if err != nil { |
340 |
| - return nil, err |
341 |
| - } |
342 |
| - |
343 |
| - var paths []string |
344 |
| - for p := range m { |
345 |
| - paths = append(paths, p) |
346 |
| - } |
347 |
| - sort.Strings(paths) |
348 |
| - return paths, nil |
349 |
| -} |
350 |
| - |
351 |
| -// listFiles lists files in the given directory, returning a map of relative |
352 |
| -// path to contents and modification time. |
353 |
| -func (w *Workdir) listFiles(dir string) (map[string]fileID, error) { |
354 |
| - files := make(map[string]fileID) |
355 | 317 | absDir := w.AbsPath(dir)
|
| 318 | + var paths []string |
356 | 319 | if err := filepath.Walk(absDir, func(fp string, info os.FileInfo, err error) error {
|
357 | 320 | if err != nil {
|
358 | 321 | return err
|
359 | 322 | }
|
360 |
| - if info.IsDir() { |
361 |
| - return nil |
362 |
| - } |
363 |
| - path := w.RelPath(fp) |
364 |
| - |
365 |
| - data, err := ioutil.ReadFile(fp) |
366 |
| - if err != nil { |
367 |
| - return err |
368 |
| - } |
369 |
| - // The content returned by ioutil.ReadFile could be inconsistent with |
370 |
| - // info.ModTime(), due to a subsequent modification. See the documentation |
371 |
| - // for w.files for why we consider this to be benign. |
372 |
| - files[path] = fileID{ |
373 |
| - hash: hashFile(data), |
374 |
| - mtime: info.ModTime(), |
| 323 | + if info.Mode()&(fs.ModeDir|fs.ModeSymlink) == 0 { |
| 324 | + paths = append(paths, w.RelPath(fp)) |
375 | 325 | }
|
376 | 326 | return nil
|
377 | 327 | }); err != nil {
|
378 | 328 | return nil, err
|
379 | 329 | }
|
380 |
| - return files, nil |
| 330 | + sort.Strings(paths) |
| 331 | + return paths, nil |
381 | 332 | }
|
382 | 333 |
|
383 | 334 | // CheckForFileChanges walks the working directory and checks for any files
|
@@ -406,36 +357,82 @@ func (w *Workdir) pollFiles() ([]protocol.FileEvent, error) {
|
406 | 357 | w.fileMu.Lock()
|
407 | 358 | defer w.fileMu.Unlock()
|
408 | 359 |
|
409 |
| - files, err := w.listFiles(".") |
410 |
| - if err != nil { |
411 |
| - return nil, err |
412 |
| - } |
| 360 | + newFiles := make(map[string]fileID) |
413 | 361 | var evts []protocol.FileEvent
|
414 |
| - // Check which files have been added or modified. |
415 |
| - for path, id := range files { |
416 |
| - oldID, ok := w.files[path] |
417 |
| - delete(w.files, path) |
418 |
| - var typ protocol.FileChangeType |
419 |
| - switch { |
420 |
| - case !ok: |
421 |
| - typ = protocol.Created |
422 |
| - case oldID != id: |
423 |
| - typ = protocol.Changed |
424 |
| - default: |
425 |
| - continue |
| 362 | + if err := filepath.Walk(string(w.RelativeTo), func(fp string, info os.FileInfo, err error) error { |
| 363 | + if err != nil { |
| 364 | + return err |
426 | 365 | }
|
427 |
| - evts = append(evts, protocol.FileEvent{ |
428 |
| - URI: w.URI(path), |
429 |
| - Type: typ, |
430 |
| - }) |
| 366 | + // Skip directories and symbolic links (which may be links to directories). |
| 367 | + // |
| 368 | + // The latter matters for repos like Kubernetes, which use symlinks. |
| 369 | + if info.Mode()&(fs.ModeDir|fs.ModeSymlink) != 0 { |
| 370 | + return nil |
| 371 | + } |
| 372 | + |
| 373 | + // Opt: avoid reading the file if mtime is sufficently old to be reliable. |
| 374 | + // |
| 375 | + // If mtime is recent, it may not sufficiently identify the file contents: |
| 376 | + // a subsequent write could result in the same mtime. For these cases, we |
| 377 | + // must read the file contents. |
| 378 | + id := fileID{mtime: info.ModTime()} |
| 379 | + if time.Since(info.ModTime()) < 2*time.Second { |
| 380 | + data, err := ioutil.ReadFile(fp) |
| 381 | + if err != nil { |
| 382 | + return err |
| 383 | + } |
| 384 | + id.hash = hashFile(data) |
| 385 | + } |
| 386 | + path := w.RelPath(fp) |
| 387 | + newFiles[path] = id |
| 388 | + |
| 389 | + if w.files != nil { |
| 390 | + oldID, ok := w.files[path] |
| 391 | + delete(w.files, path) |
| 392 | + switch { |
| 393 | + case !ok: |
| 394 | + evts = append(evts, protocol.FileEvent{ |
| 395 | + URI: w.URI(path), |
| 396 | + Type: protocol.Created, |
| 397 | + }) |
| 398 | + case oldID != id: |
| 399 | + changed := true |
| 400 | + |
| 401 | + // Check whether oldID and id do not match because oldID was polled at |
| 402 | + // a recent enough to time such as to require hashing. |
| 403 | + // |
| 404 | + // In this case, read the content to check whether the file actually |
| 405 | + // changed. |
| 406 | + if oldID.mtime.Equal(id.mtime) && oldID.hash != "" && id.hash == "" { |
| 407 | + data, err := ioutil.ReadFile(fp) |
| 408 | + if err != nil { |
| 409 | + return err |
| 410 | + } |
| 411 | + if hashFile(data) == oldID.hash { |
| 412 | + changed = false |
| 413 | + } |
| 414 | + } |
| 415 | + if changed { |
| 416 | + evts = append(evts, protocol.FileEvent{ |
| 417 | + URI: w.URI(path), |
| 418 | + Type: protocol.Changed, |
| 419 | + }) |
| 420 | + } |
| 421 | + } |
| 422 | + } |
| 423 | + |
| 424 | + return nil |
| 425 | + }); err != nil { |
| 426 | + return nil, err |
431 | 427 | }
|
| 428 | + |
432 | 429 | // Any remaining files must have been deleted.
|
433 | 430 | for path := range w.files {
|
434 | 431 | evts = append(evts, protocol.FileEvent{
|
435 | 432 | URI: w.URI(path),
|
436 | 433 | Type: protocol.Deleted,
|
437 | 434 | })
|
438 | 435 | }
|
439 |
| - w.files = files |
| 436 | + w.files = newFiles |
440 | 437 | return evts, nil
|
441 | 438 | }
|
0 commit comments