Skip to content

Commit 30bfafc

Browse files
dsnetbradfitz
authored andcommitted
archive/tar: centralize all information about tar header format
The Reader and Writer have hard-coded constants regarding the offsets and lengths of certain fields in the tar format sprinkled all over. This makes it harder to verify that the offsets are correct since a reviewer would need to search for them throughout the code. Instead, all information about the layout of header fields should be centralized in one single file. This has the advantage of being both centralized, and also acting as a form of documentation about the header struct format. This method was chosen over using "encoding/binary" since that method would cause an allocation of a header struct every time binary.Read was called. This method causes zero allocations and its logic is no longer than if structs were declared. Updates #12594 Change-Id: Ic7a0565d2a2cd95d955547ace3b6dea2b57fab34 Reviewed-on: https://go-review.googlesource.com/14669 Reviewed-by: Brad Fitzpatrick <[email protected]>
1 parent 88d3db0 commit 30bfafc

File tree

5 files changed

+314
-209
lines changed

5 files changed

+314
-209
lines changed

src/archive/tar/common.go

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,8 @@ import (
2121
"time"
2222
)
2323

24+
// Header type flags.
2425
const (
25-
blockSize = 512
26-
27-
// Types
2826
TypeReg = '0' // regular file
2927
TypeRegA = '\x00' // regular file
3028
TypeLink = '1' // hard link
@@ -61,12 +59,6 @@ type Header struct {
6159
Xattrs map[string]string
6260
}
6361

64-
// File name constants from the tar spec.
65-
const (
66-
fileNameSize = 100 // Maximum number of bytes in a standard tar name.
67-
fileNamePrefixSize = 155 // Maximum number of ustar extension bytes.
68-
)
69-
7062
// FileInfo returns an os.FileInfo for the Header.
7163
func (h *Header) FileInfo() os.FileInfo {
7264
return headerFileInfo{h}
@@ -279,33 +271,6 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) {
279271
return h, nil
280272
}
281273

282-
var zeroBlock = make([]byte, blockSize)
283-
284-
// POSIX specifies a sum of the unsigned byte values, but the Sun tar uses signed byte values.
285-
// We compute and return both.
286-
func checksum(header []byte) (unsigned int64, signed int64) {
287-
for i := 0; i < len(header); i++ {
288-
if i == 148 {
289-
// The chksum field (header[148:156]) is special: it should be treated as space bytes.
290-
unsigned += ' ' * 8
291-
signed += ' ' * 8
292-
i += 7
293-
continue
294-
}
295-
unsigned += int64(header[i])
296-
signed += int64(int8(header[i]))
297-
}
298-
return
299-
}
300-
301-
type slicer []byte
302-
303-
func (sp *slicer) next(n int) (b []byte) {
304-
s := *sp
305-
b, *sp = s[0:n], s[n:]
306-
return
307-
}
308-
309274
func isASCII(s string) bool {
310275
for _, c := range s {
311276
if c >= 0x80 {

src/archive/tar/format.go

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
// Copyright 2016 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package tar
6+
7+
// Constants to identify various tar formats.
8+
const (
9+
// The format is unknown.
10+
formatUnknown = (1 << iota) / 2 // Sequence of 0, 1, 2, 4, 8, etc...
11+
12+
// The format of the original Unix V7 tar tool prior to standardization.
13+
formatV7
14+
15+
// The old and new GNU formats, which are incompatible with USTAR.
16+
// This does cover the old GNU sparse extension.
17+
// This does not cover the GNU sparse extensions using PAX headers,
18+
// versions 0.0, 0.1, and 1.0; these fall under the PAX format.
19+
formatGNU
20+
21+
// Schily's tar format, which is incompatible with USTAR.
22+
// This does not cover STAR extensions to the PAX format; these fall under
23+
// the PAX format.
24+
formatSTAR
25+
26+
// USTAR is the former standardization of tar defined in POSIX.1-1988.
27+
// This is incompatible with the GNU and STAR formats.
28+
formatUSTAR
29+
30+
// PAX is the latest standardization of tar defined in POSIX.1-2001.
31+
// This is an extension of USTAR and is "backwards compatible" with it.
32+
//
33+
// Some newer formats add their own extensions to PAX, such as GNU sparse
34+
// files and SCHILY extended attributes. Since they are backwards compatible
35+
// with PAX, they will be labelled as "PAX".
36+
formatPAX
37+
)
38+
39+
// Magics used to identify various formats.
40+
const (
41+
magicGNU, versionGNU = "ustar ", " \x00"
42+
magicUSTAR, versionUSTAR = "ustar\x00", "00"
43+
trailerSTAR = "tar\x00"
44+
)
45+
46+
// Size constants from various tar specifications.
47+
const (
48+
blockSize = 512 // Size of each block in a tar stream
49+
nameSize = 100 // Max length of the name field in USTAR format
50+
prefixSize = 155 // Max length of the prefix field in USTAR format
51+
)
52+
53+
var zeroBlock block
54+
55+
type block [blockSize]byte
56+
57+
// Convert block to any number of formats.
58+
func (b *block) V7() *headerV7 { return (*headerV7)(b) }
59+
func (b *block) GNU() *headerGNU { return (*headerGNU)(b) }
60+
func (b *block) STAR() *headerSTAR { return (*headerSTAR)(b) }
61+
func (b *block) USTAR() *headerUSTAR { return (*headerUSTAR)(b) }
62+
func (b *block) Sparse() sparseArray { return (sparseArray)(b[:]) }
63+
64+
// GetFormat checks that the block is a valid tar header based on the checksum.
65+
// It then attempts to guess the specific format based on magic values.
66+
// If the checksum fails, then formatUnknown is returned.
67+
func (b *block) GetFormat() (format int) {
68+
// Verify checksum.
69+
var p parser
70+
value := p.parseOctal(b.V7().Chksum())
71+
chksum1, chksum2 := b.ComputeChecksum()
72+
if p.err != nil || (value != chksum1 && value != chksum2) {
73+
return formatUnknown
74+
}
75+
76+
// Guess the magic values.
77+
magic := string(b.USTAR().Magic())
78+
version := string(b.USTAR().Version())
79+
trailer := string(b.STAR().Trailer())
80+
switch {
81+
case magic == magicUSTAR && trailer == trailerSTAR:
82+
return formatSTAR
83+
case magic == magicUSTAR:
84+
return formatUSTAR
85+
case magic == magicGNU && version == versionGNU:
86+
return formatGNU
87+
default:
88+
return formatV7
89+
}
90+
}
91+
92+
// SetFormat writes the magic values necessary for specified format
93+
// and then updates the checksum accordingly.
94+
func (b *block) SetFormat(format int) {
95+
// Set the magic values.
96+
switch format {
97+
case formatV7:
98+
// Do nothing.
99+
case formatGNU:
100+
copy(b.GNU().Magic(), magicGNU)
101+
copy(b.GNU().Version(), versionGNU)
102+
case formatSTAR:
103+
copy(b.STAR().Magic(), magicUSTAR)
104+
copy(b.STAR().Version(), versionUSTAR)
105+
copy(b.STAR().Trailer(), trailerSTAR)
106+
case formatUSTAR, formatPAX:
107+
copy(b.USTAR().Magic(), magicUSTAR)
108+
copy(b.USTAR().Version(), versionUSTAR)
109+
default:
110+
panic("invalid format")
111+
}
112+
113+
// Update checksum.
114+
// This field is special in that it is terminated by a NULL then space.
115+
var f formatter
116+
field := b.V7().Chksum()
117+
chksum, _ := b.ComputeChecksum() // Possible values are 256..128776
118+
f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143
119+
field[7] = ' '
120+
}
121+
122+
// ComputeChecksum computes the checksum for the header block.
123+
// POSIX specifies a sum of the unsigned byte values, but the Sun tar used
124+
// signed byte values.
125+
// We compute and return both.
126+
func (b *block) ComputeChecksum() (unsigned, signed int64) {
127+
for i, c := range b {
128+
if 148 <= i && i < 156 {
129+
c = ' ' // Treat the checksum field itself as all spaces.
130+
}
131+
unsigned += int64(uint8(c))
132+
signed += int64(int8(c))
133+
}
134+
return unsigned, signed
135+
}
136+
137+
type headerV7 [blockSize]byte
138+
139+
func (h *headerV7) Name() []byte { return h[000:][:100] }
140+
func (h *headerV7) Mode() []byte { return h[100:][:8] }
141+
func (h *headerV7) UID() []byte { return h[108:][:8] }
142+
func (h *headerV7) GID() []byte { return h[116:][:8] }
143+
func (h *headerV7) Size() []byte { return h[124:][:12] }
144+
func (h *headerV7) ModTime() []byte { return h[136:][:12] }
145+
func (h *headerV7) Chksum() []byte { return h[148:][:8] }
146+
func (h *headerV7) TypeFlag() []byte { return h[156:][:1] }
147+
func (h *headerV7) LinkName() []byte { return h[157:][:100] }
148+
149+
type headerGNU [blockSize]byte
150+
151+
func (h *headerGNU) V7() *headerV7 { return (*headerV7)(h) }
152+
func (h *headerGNU) Magic() []byte { return h[257:][:6] }
153+
func (h *headerGNU) Version() []byte { return h[263:][:2] }
154+
func (h *headerGNU) UserName() []byte { return h[265:][:32] }
155+
func (h *headerGNU) GroupName() []byte { return h[297:][:32] }
156+
func (h *headerGNU) DevMajor() []byte { return h[329:][:8] }
157+
func (h *headerGNU) DevMinor() []byte { return h[337:][:8] }
158+
func (h *headerGNU) AccessTime() []byte { return h[345:][:12] }
159+
func (h *headerGNU) ChangeTime() []byte { return h[357:][:12] }
160+
func (h *headerGNU) Sparse() sparseArray { return (sparseArray)(h[386:][:24*4+1]) }
161+
func (h *headerGNU) RealSize() []byte { return h[483:][:12] }
162+
163+
type headerSTAR [blockSize]byte
164+
165+
func (h *headerSTAR) V7() *headerV7 { return (*headerV7)(h) }
166+
func (h *headerSTAR) Magic() []byte { return h[257:][:6] }
167+
func (h *headerSTAR) Version() []byte { return h[263:][:2] }
168+
func (h *headerSTAR) UserName() []byte { return h[265:][:32] }
169+
func (h *headerSTAR) GroupName() []byte { return h[297:][:32] }
170+
func (h *headerSTAR) DevMajor() []byte { return h[329:][:8] }
171+
func (h *headerSTAR) DevMinor() []byte { return h[337:][:8] }
172+
func (h *headerSTAR) Prefix() []byte { return h[345:][:131] }
173+
func (h *headerSTAR) AccessTime() []byte { return h[476:][:12] }
174+
func (h *headerSTAR) ChangeTime() []byte { return h[488:][:12] }
175+
func (h *headerSTAR) Trailer() []byte { return h[508:][:4] }
176+
177+
type headerUSTAR [blockSize]byte
178+
179+
func (h *headerUSTAR) V7() *headerV7 { return (*headerV7)(h) }
180+
func (h *headerUSTAR) Magic() []byte { return h[257:][:6] }
181+
func (h *headerUSTAR) Version() []byte { return h[263:][:2] }
182+
func (h *headerUSTAR) UserName() []byte { return h[265:][:32] }
183+
func (h *headerUSTAR) GroupName() []byte { return h[297:][:32] }
184+
func (h *headerUSTAR) DevMajor() []byte { return h[329:][:8] }
185+
func (h *headerUSTAR) DevMinor() []byte { return h[337:][:8] }
186+
func (h *headerUSTAR) Prefix() []byte { return h[345:][:155] }
187+
188+
type sparseArray []byte
189+
190+
func (s sparseArray) Entry(i int) sparseNode { return (sparseNode)(s[i*24:]) }
191+
func (s sparseArray) IsExtended() []byte { return s[24*s.MaxEntries():][:1] }
192+
func (s sparseArray) MaxEntries() int { return len(s) / 24 }
193+
194+
type sparseNode []byte
195+
196+
func (s sparseNode) Offset() []byte { return s[00:][:12] }
197+
func (s sparseNode) NumBytes() []byte { return s[12:][:12] }

0 commit comments

Comments
 (0)