Skip to content

Commit f85dc05

Browse files
committed
archive/tar: require opt-in to PAX or GNU format for time features
Nearly every Header obtained from FileInfoHeader via the FS has timestamps with sub-second resolution and the AccessTime and ChangeTime fields populated. This forces the PAX format to almost always be used, which has the following problems: * PAX is still not as widely supported compared to USTAR * The PAX headers will occupy at minimum 1KiB for every entry The old behavior of tar Writer had no support for sub-second resolution nor any support for AccessTime or ChangeTime, so had neither problem. Instead the Writer would just truncate sub-second information and ignore the AccessTime and ChangeTime fields. In this CL, we preserve the behavior such that the *default* behavior would output a USTAR header for most cases by truncating sub-second time measurements and ignoring AccessTime and ChangeTime. To use either of the features, users will need to explicitly specify that the format is PAX or GNU. The exact policy chosen is this: * USTAR and GNU may still be chosen even if sub-second measurements are present; they simply truncate the timestamp to the nearest second. As before, PAX uses sub-second resolutions. * If the Format is unspecified, then WriteHeader ignores AccessTime and ChangeTime when using the USTAR format. This ensures that USTAR may still be chosen for a vast majority of file entries obtained through FileInfoHeader. Updates #11171 Updates #17876 Change-Id: Icc5274d4245922924498fd79b8d3ae94d5717271 Reviewed-on: https://go-review.googlesource.com/59230 Run-TryBot: Joe Tsai <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]>
1 parent 0592a1a commit f85dc05

File tree

3 files changed

+105
-14
lines changed

3 files changed

+105
-14
lines changed

src/archive/tar/common.go

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,10 @@ type Header struct {
151151
Uname string // User name of owner
152152
Gname string // Group name of owner
153153

154+
// The PAX format encodes the timestamps with sub-second resolution,
155+
// while the other formats (USTAR and GNU) truncate to the nearest second.
156+
// If the Format is unspecified, then Writer.WriteHeader ignores
157+
// AccessTime and ChangeTime when using the USTAR format.
154158
ModTime time.Time // Modification time
155159
AccessTime time.Time // Access time (requires either PAX or GNU support)
156160
ChangeTime time.Time // Change time (requires either PAX or GNU support)
@@ -203,9 +207,9 @@ type Header struct {
203207
// Since the Reader liberally reads some non-compliant files,
204208
// it is possible for this to be FormatUnknown.
205209
//
206-
// When Writer.WriteHeader is called, if this is FormatUnknown,
207-
// then it tries to encode the header in the order of USTAR, PAX, then GNU.
208-
// Otherwise, it tries to use the specified format.
210+
// If the format is unspecified when Writer.WriteHeader is called,
211+
// then it uses the first format (in the order of USTAR, PAX, GNU)
212+
// capable of encoding this Header (see Format).
209213
Format Format
210214
}
211215

@@ -338,6 +342,7 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err
338342
paxHdrs = make(map[string]string)
339343

340344
var whyNoUSTAR, whyNoPAX, whyNoGNU string
345+
var preferPAX bool // Prefer PAX over USTAR
341346
verifyString := func(s string, size int, name, paxKey string) {
342347
// NUL-terminator is optional for path and linkpath.
343348
// Technically, it is required for uname and gname,
@@ -388,15 +393,20 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err
388393
if ts.IsZero() {
389394
return // Always okay
390395
}
391-
needsNano := ts.Nanosecond() != 0
392-
hasFieldUSTAR := paxKey == paxMtime
393-
if !fitsInBase256(size, ts.Unix()) || needsNano {
396+
if !fitsInBase256(size, ts.Unix()) {
394397
whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%v", name, ts)
395398
format.mustNotBe(FormatGNU)
396399
}
397-
if !fitsInOctal(size, ts.Unix()) || needsNano || !hasFieldUSTAR {
400+
isMtime := paxKey == paxMtime
401+
fitsOctal := fitsInOctal(size, ts.Unix())
402+
noACTime := !isMtime && h.Format != FormatUnknown
403+
if (isMtime && !fitsOctal) || noACTime {
398404
whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%v", name, ts)
399405
format.mustNotBe(FormatUSTAR)
406+
}
407+
needsNano := ts.Nanosecond() != 0
408+
if !isMtime || !fitsOctal || needsNano {
409+
preferPAX = true // USTAR may truncate sub-second measurements
400410
if paxKey == paxNone {
401411
whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%v", name, ts)
402412
format.mustNotBe(FormatPAX)
@@ -493,7 +503,7 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err
493503

494504
// Check desired format.
495505
if wantFormat := h.Format; wantFormat != FormatUnknown {
496-
if wantFormat.has(FormatPAX) {
506+
if wantFormat.has(FormatPAX) && !preferPAX {
497507
wantFormat.mayBe(FormatUSTAR) // PAX implies USTAR allowed too
498508
}
499509
format.mayOnlyBe(wantFormat) // Set union of formats allowed and format wanted

src/archive/tar/format.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,41 @@ package tar
66

77
import "strings"
88

9+
// Format represents the tar archive format.
10+
//
11+
// The original tar format was introduced in Unix V7.
12+
// Since then, there have been multiple competing formats attempting to
13+
// standardize or extend the V7 format to overcome its limitations.
14+
// The most common formats are the USTAR, PAX, and GNU formats,
15+
// each with their own advantages and limitations.
16+
//
17+
// The following table captures the capabilities of each format:
18+
//
19+
// | USTAR | PAX | GNU
20+
// ------------------+--------+-----------+----------
21+
// Name | 256B | unlimited | unlimited
22+
// Linkname | 100B | unlimited | unlimited
23+
// Size | uint33 | unlimited | uint89
24+
// Mode | uint21 | uint21 | uint57
25+
// Uid/Gid | uint21 | unlimited | uint57
26+
// Uname/Gname | 32B | unlimited | 32B
27+
// ModTime | uint33 | unlimited | int89
28+
// AccessTime | n/a | unlimited | int89
29+
// ChangeTime | n/a | unlimited | int89
30+
// Devmajor/Devminor | uint21 | uint21 | uint57
31+
// ------------------+--------+-----------+----------
32+
// string encoding | ASCII | UTF-8 | binary
33+
// sub-second times | no | yes | no
34+
// sparse files | no | yes | yes
35+
//
36+
// The table's upper portion shows the Header fields, where each format reports
37+
// the maximum number of bytes allowed for each string field and
38+
// the integer type used to store each numeric field
39+
// (where timestamps are stored as the number of seconds since the Unix epoch).
40+
//
41+
// The table's lower portion shows specialized features of each format,
42+
// such as supported string encodings, support for sub-second timestamps,
43+
// or support for sparse files.
944
type Format int
1045

1146
// Constants to identify various tar formats.

src/archive/tar/tar_test.go

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -583,30 +583,76 @@ func TestHeaderAllowedFormats(t *testing.T) {
583583
header: &Header{ModTime: time.Unix(-1, 0)},
584584
paxHdrs: map[string]string{paxMtime: "-1"},
585585
formats: FormatPAX | FormatGNU,
586+
}, {
587+
header: &Header{ModTime: time.Unix(1, 500)},
588+
paxHdrs: map[string]string{paxMtime: "1.0000005"},
589+
formats: FormatUSTAR | FormatPAX | FormatGNU,
590+
}, {
591+
header: &Header{ModTime: time.Unix(1, 0)},
592+
formats: FormatUSTAR | FormatPAX | FormatGNU,
593+
}, {
594+
header: &Header{ModTime: time.Unix(1, 0), Format: FormatPAX},
595+
formats: FormatUSTAR | FormatPAX,
596+
}, {
597+
header: &Header{ModTime: time.Unix(1, 500), Format: FormatUSTAR},
598+
paxHdrs: map[string]string{paxMtime: "1.0000005"},
599+
formats: FormatUSTAR,
600+
}, {
601+
header: &Header{ModTime: time.Unix(1, 500), Format: FormatPAX},
602+
paxHdrs: map[string]string{paxMtime: "1.0000005"},
603+
formats: FormatPAX,
604+
}, {
605+
header: &Header{ModTime: time.Unix(1, 500), Format: FormatGNU},
606+
paxHdrs: map[string]string{paxMtime: "1.0000005"},
607+
formats: FormatGNU,
586608
}, {
587609
header: &Header{ModTime: time.Unix(-1, 500)},
588610
paxHdrs: map[string]string{paxMtime: "-0.9999995"},
589-
formats: FormatPAX,
611+
formats: FormatPAX | FormatGNU,
590612
}, {
591613
header: &Header{ModTime: time.Unix(-1, 500), Format: FormatGNU},
592614
paxHdrs: map[string]string{paxMtime: "-0.9999995"},
593-
formats: FormatUnknown,
615+
formats: FormatGNU,
594616
}, {
595617
header: &Header{AccessTime: time.Unix(0, 0)},
596618
paxHdrs: map[string]string{paxAtime: "0"},
597-
formats: FormatPAX | FormatGNU,
619+
formats: FormatUSTAR | FormatPAX | FormatGNU,
620+
}, {
621+
header: &Header{AccessTime: time.Unix(0, 0), Format: FormatUSTAR},
622+
paxHdrs: map[string]string{paxAtime: "0"},
623+
formats: FormatUnknown,
624+
}, {
625+
header: &Header{AccessTime: time.Unix(0, 0), Format: FormatPAX},
626+
paxHdrs: map[string]string{paxAtime: "0"},
627+
formats: FormatPAX,
628+
}, {
629+
header: &Header{AccessTime: time.Unix(0, 0), Format: FormatGNU},
630+
paxHdrs: map[string]string{paxAtime: "0"},
631+
formats: FormatGNU,
598632
}, {
599633
header: &Header{AccessTime: time.Unix(-123, 0)},
600634
paxHdrs: map[string]string{paxAtime: "-123"},
601-
formats: FormatPAX | FormatGNU,
635+
formats: FormatUSTAR | FormatPAX | FormatGNU,
636+
}, {
637+
header: &Header{AccessTime: time.Unix(-123, 0), Format: FormatPAX},
638+
paxHdrs: map[string]string{paxAtime: "-123"},
639+
formats: FormatPAX,
602640
}, {
603641
header: &Header{ChangeTime: time.Unix(123, 456)},
604642
paxHdrs: map[string]string{paxCtime: "123.000000456"},
605-
formats: FormatPAX,
643+
formats: FormatUSTAR | FormatPAX | FormatGNU,
606644
}, {
607-
header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatGNU},
645+
header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatUSTAR},
608646
paxHdrs: map[string]string{paxCtime: "123.000000456"},
609647
formats: FormatUnknown,
648+
}, {
649+
header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatGNU},
650+
paxHdrs: map[string]string{paxCtime: "123.000000456"},
651+
formats: FormatGNU,
652+
}, {
653+
header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatPAX},
654+
paxHdrs: map[string]string{paxCtime: "123.000000456"},
655+
formats: FormatPAX,
610656
}, {
611657
header: &Header{Name: "sparse.db", Size: 1000, SparseHoles: []SparseEntry{{0, 500}}},
612658
formats: FormatPAX,

0 commit comments

Comments
 (0)