Skip to content

txtar: add CRLF handling #435

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 61 additions & 23 deletions txtar/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,42 @@ type File struct {
// Format returns the serialized form of an Archive.
// It is assumed that the Archive data structure is well-formed:
// a.Comment and all a.File[i].Data contain no file marker lines,
// and all a.File[i].Name is non-empty.
// and all a.File[i].Name is non-empty. Format uses line separators
// based on the line separator encountered in the comment section.
func Format(a *Archive) []byte {
firstSep, ok := lineEnd(a.Comment)
if !ok {
for _, f := range a.Files {
firstSep, ok = lineEnd(f.Data)
if ok {
break
}
}
}

var buf bytes.Buffer
buf.Write(fixNL(a.Comment))
buf.Write(fixNL(a.Comment, firstSep))
for _, f := range a.Files {
fmt.Fprintf(&buf, "-- %s --\n", f.Name)
buf.Write(fixNL(f.Data))
fmt.Fprintf(&buf, "-- %s --%s", f.Name, firstSep)
buf.Write(fixNL(f.Data, firstSep))
}
return buf.Bytes()
}

// lineEnd returns the line separator that was used at the end
// of the passed byte slice and a bool flag that says if the line separator
// was really found or it is a default value (lf).
func lineEnd(b []byte) ([]byte, bool) {
switch {
case bytes.HasSuffix(b, crlf):
return crlf, true
case bytes.HasSuffix(b, lf):
return lf, true
default:
return lf, false
}
}

// ParseFile parses the named file as an archive.
func ParseFile(file string) (*Archive, error) {
data, err := os.ReadFile(file)
Expand All @@ -78,16 +103,19 @@ func ParseFile(file string) (*Archive, error) {
func Parse(data []byte) *Archive {
a := new(Archive)
var name string
a.Comment, name, data = findFileMarker(data)
var lineSeparator []byte
a.Comment, name, lineSeparator, data = findFileMarker(data, nil)
for name != "" {
f := File{name, nil}
f.Data, name, data = findFileMarker(data)
f.Data, name, lineSeparator, data = findFileMarker(data, lineSeparator)
a.Files = append(a.Files, f)
}
return a
}

var (
crlf = []byte("\r\n")
lf = []byte("\n")
newlineMarker = []byte("\n-- ")
marker = []byte("-- ")
markerEnd = []byte(" --")
Expand All @@ -96,45 +124,55 @@ var (
// findFileMarker finds the next file marker in data,
// extracts the file name, and returns the data before the marker,
// the file name, and the data after the marker.
// lineSep states if \n or \r\n should be appended by fixNL.
// If there is no next marker, findFileMarker returns before = fixNL(data), name = "", after = nil.
func findFileMarker(data []byte) (before []byte, name string, after []byte) {
func findFileMarker(data, lineSep []byte) (before []byte, name string, lineSeparator []byte, after []byte) {
var i int
for {
if name, after = isMarker(data[i:]); name != "" {
return data[:i], name, after
if name, lineSeparator, after = isMarker(data[i:]); name != "" {
return data[:i], name, lineSeparator, after
}
j := bytes.Index(data[i:], newlineMarker)
if j < 0 {
return fixNL(data), "", nil
return fixNL(data, lineSep), "", lineSep, nil
}
i += j + 1 // positioned at start of new possible marker
}
}

// isMarker checks whether data begins with a file marker line.
// If so, it returns the name from the line and the data after the line.
// Otherwise it returns name == "" with an unspecified after.
func isMarker(data []byte) (name string, after []byte) {
// If so, it returns the name from the line, used line separator and the data after the line.
// Otherwise it returns name == "" with nil lineSeparator ("\n") and after.
// If the data does not contain a new line the default line separator is returned ("\n").
func isMarker(data []byte) (name string, lineSeparator, after []byte) {
if !bytes.HasPrefix(data, marker) {
return "", nil
return "", nil, nil
}
lineSeparator = lf
if i := bytes.IndexByte(data, '\n'); i >= 0 {
data, after = data[:i], data[i+1:]
if len(data) > 0 && data[i-1] == '\r' {
data, after = data[:i-1], data[i+1:]
lineSeparator = crlf
} else {
data, after = data[:i], data[i+1:]
lineSeparator = lf
}
}
if !(bytes.HasSuffix(data, markerEnd) && len(data) >= len(marker)+len(markerEnd)) {
return "", nil
return "", nil, nil
}
return strings.TrimSpace(string(data[len(marker) : len(data)-len(markerEnd)])), after
return strings.TrimSpace(string(data[len(marker) : len(data)-len(markerEnd)])), lineSeparator, after
}

// If data is empty or ends in \n, fixNL returns data.
// Otherwise fixNL returns a new slice consisting of data with a final \n added.
func fixNL(data []byte) []byte {
if len(data) == 0 || data[len(data)-1] == '\n' {
// If data is empty or ends in lineSeparator, fixNL returns data.
// lineSeparator states if \n or \r\n should be appended as a line separator if it is not present.
// Otherwise fixNL returns a new slice consisting of data with a final lineSeparator added.
func fixNL(data , lineSeparator []byte) []byte {
if len(data) == 0 || bytes.HasSuffix(data, lf) {
return data
}
d := make([]byte, len(data)+1)
d := make([]byte, len(data)+len(lineSeparator))
copy(d, data)
d[len(data)] = '\n'
copy(d[len(data):], lineSeparator)
return d
}
128 changes: 121 additions & 7 deletions txtar/archive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ func TestParse(t *testing.T) {
parsed *Archive
}{
{
name: "basic",
name: "basic with line ending LF",
text: `comment1
comment2
-- file1 --
Expand All @@ -33,14 +33,73 @@ hello world
-- empty filename line --
some content
-- --`,
parsed: &Archive{
Comment: []byte("comment1\ncomment2\n"),
Files: []File{
{"file1", []byte("File 1 text.\n-- foo ---\nMore file 1 text.\n")},
{"file 2", []byte("File 2 text.\n")},
{"empty", []byte{}},
{"noNL", []byte("hello world\n")},
{"empty filename line", []byte("some content\n-- --\n")},
},
},
},
{
name: "basic with line ending CRLF",
text: "comment1\r\n" +
"comment2\r\n" +
"-- file1 --\r\n" +
"File 1 text.\r\n" +
"-- foo ---\r\n" +
"More file 1 text.\r\n" +
"-- file 2 --\r\n" +
"File 2 text.\r\n" +
"-- empty --\r\n" +
"-- noNL --\r\n" +
"hello world\r\n" +
"-- empty filename line --\r\n" +
"some content\r\n" +
"-- --\r\n",
parsed: &Archive{
Comment: []byte("comment1\ncomment2\n"),
Comment: []byte("comment1\r\ncomment2\r\n"),
Files: []File{
{"file1", []byte("File 1 text.\n-- foo ---\nMore file 1 text.\n")},
{"file 2", []byte("File 2 text.\n")},
{"file1", []byte("File 1 text.\r\n-- foo ---\r\nMore file 1 text.\r\n")},
{"file 2", []byte("File 2 text.\r\n")},
{"empty", []byte{}},
{"noNL", []byte("hello world\r\n")},
{"empty filename line", []byte("some content\r\n-- --\r\n")},
},
},
},
{
name: "mixed line endings",
text: "comment1\n" +
"comment2\r\n" +
"-- file1 --\r\n" +
"File 1 text.\n" +
"-- foo ---\r\n" +
"More file 1 text.\r\n" +
"-- file 2 --\r\n" +
"File 2 text.\r\n" +
"-- file 3 --\r\n" +
"File 3 text.\r\n" +
"-- foo ---\r\n" +
"More file 3 text.\r\n" +
"-- empty --\r\n" +
"-- noNL --\r\n" +
"hello world\r\n" +
"-- empty filename line --\r\n" +
"some content\r\n" +
"-- --\n",
parsed: &Archive{
Comment: []byte("comment1\ncomment2\r\n"),
Files: []File{
{"file1", []byte("File 1 text.\n-- foo ---\r\nMore file 1 text.\r\n")},
{"file 2", []byte("File 2 text.\r\n")},
{"file 3", []byte("File 3 text.\r\n-- foo ---\r\nMore file 3 text.\r\n")},
{"empty", []byte{}},
{"noNL", []byte("hello world\n")},
{"empty filename line", []byte("some content\n-- --\n")},
{"noNL", []byte("hello world\r\n")},
{"empty filename line", []byte("some content\r\n-- --\n")},
},
},
},
Expand All @@ -67,7 +126,7 @@ func TestFormat(t *testing.T) {
wanted string
}{
{
name: "basic",
name: "basic with line ending LF",
input: &Archive{
Comment: []byte("comment1\ncomment2\n"),
Files: []File{
Expand All @@ -90,6 +149,61 @@ File 2 text.
hello world
`,
},
{
name: "basic with line ending CRLF",
input: &Archive{
Comment: []byte("comment1\r\ncomment2\r\n"),
Files: []File{
{"file1", []byte("File 1 text.\r\n-- foo ---\r\nMore file 1 text.\r\n")},
{"file 2", []byte("File 2 text.\r\n")},
{"empty", []byte{}},
{"noNL", []byte("hello world")},
},
},
wanted: "comment1\r\n" +
"comment2\r\n" +
"-- file1 --\r\n" +
"File 1 text.\r\n" +
"-- foo ---\r\n" +
"More file 1 text.\r\n" +
"-- file 2 --\r\n" +
"File 2 text.\r\n" +
"-- empty --\r\n" +
"-- noNL --\r\n" +
"hello world\r\n",
},
{
name: "mixed line endings",
input: &Archive{
Comment: []byte("comment1\ncomment2\r\n"),
Files: []File{
{"file1", []byte("File 1 text.\n-- foo ---\r\nMore file 1 text.\r\n")},
{"file 2", []byte("File 2 text.\r\n")},
{"file 3", []byte("File 3 text.\r\n-- foo ---\r\nMore file 3 text.\r\n")},
{"empty", []byte{}},
{"noNL", []byte("hello world\r\n")},
{"empty filename line", []byte("some content\r\n-- --\n")},
},
},
wanted: "comment1\n" +
"comment2\r\n" +
"-- file1 --\r\n" +
"File 1 text.\n" +
"-- foo ---\r\n" +
"More file 1 text.\r\n" +
"-- file 2 --\r\n" +
"File 2 text.\r\n" +
"-- file 3 --\r\n" +
"File 3 text.\r\n" +
"-- foo ---\r\n" +
"More file 3 text.\r\n" +
"-- empty --\r\n" +
"-- noNL --\r\n" +
"hello world\r\n" +
"-- empty filename line --\r\n" +
"some content\r\n" +
"-- --\n",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down