diff --git a/txtar/archive.go b/txtar/archive.go index fd95f1e64a1..72d0b4fe7f3 100644 --- a/txtar/archive.go +++ b/txtar/archive.go @@ -53,17 +53,42 @@ type File struct { // Format returns the serialized form of an Archive. // It is assumed that the Archive data structure is well-formed: // a.Comment and all a.File[i].Data contain no file marker lines, -// and all a.File[i].Name is non-empty. +// and all a.File[i].Name is non-empty. Format uses line separators +// based on the line separator encountered in the comment section. func Format(a *Archive) []byte { + firstSep, ok := lineEnd(a.Comment) + if !ok { + for _, f := range a.Files { + firstSep, ok = lineEnd(f.Data) + if ok { + break + } + } + } + var buf bytes.Buffer - buf.Write(fixNL(a.Comment)) + buf.Write(fixNL(a.Comment, firstSep)) for _, f := range a.Files { - fmt.Fprintf(&buf, "-- %s --\n", f.Name) - buf.Write(fixNL(f.Data)) + fmt.Fprintf(&buf, "-- %s --%s", f.Name, firstSep) + buf.Write(fixNL(f.Data, firstSep)) } return buf.Bytes() } +// lineEnd returns the line separator that was used at the end +// of the passed byte slice and a bool flag that says if the line separator +// was really found or it is a default value (lf). +func lineEnd(b []byte) ([]byte, bool) { + switch { + case bytes.HasSuffix(b, crlf): + return crlf, true + case bytes.HasSuffix(b, lf): + return lf, true + default: + return lf, false + } +} + // ParseFile parses the named file as an archive. func ParseFile(file string) (*Archive, error) { data, err := os.ReadFile(file) @@ -78,16 +103,19 @@ func ParseFile(file string) (*Archive, error) { func Parse(data []byte) *Archive { a := new(Archive) var name string - a.Comment, name, data = findFileMarker(data) + var lineSeparator []byte + a.Comment, name, lineSeparator, data = findFileMarker(data, nil) for name != "" { f := File{name, nil} - f.Data, name, data = findFileMarker(data) + f.Data, name, lineSeparator, data = findFileMarker(data, lineSeparator) a.Files = append(a.Files, f) } return a } var ( + crlf = []byte("\r\n") + lf = []byte("\n") newlineMarker = []byte("\n-- ") marker = []byte("-- ") markerEnd = []byte(" --") @@ -96,45 +124,55 @@ var ( // findFileMarker finds the next file marker in data, // extracts the file name, and returns the data before the marker, // the file name, and the data after the marker. +// lineSep states if \n or \r\n should be appended by fixNL. // If there is no next marker, findFileMarker returns before = fixNL(data), name = "", after = nil. -func findFileMarker(data []byte) (before []byte, name string, after []byte) { +func findFileMarker(data, lineSep []byte) (before []byte, name string, lineSeparator []byte, after []byte) { var i int for { - if name, after = isMarker(data[i:]); name != "" { - return data[:i], name, after + if name, lineSeparator, after = isMarker(data[i:]); name != "" { + return data[:i], name, lineSeparator, after } j := bytes.Index(data[i:], newlineMarker) if j < 0 { - return fixNL(data), "", nil + return fixNL(data, lineSep), "", lineSep, nil } i += j + 1 // positioned at start of new possible marker } } // isMarker checks whether data begins with a file marker line. -// If so, it returns the name from the line and the data after the line. -// Otherwise it returns name == "" with an unspecified after. -func isMarker(data []byte) (name string, after []byte) { +// If so, it returns the name from the line, used line separator and the data after the line. +// Otherwise it returns name == "" with nil lineSeparator ("\n") and after. +// If the data does not contain a new line the default line separator is returned ("\n"). +func isMarker(data []byte) (name string, lineSeparator, after []byte) { if !bytes.HasPrefix(data, marker) { - return "", nil + return "", nil, nil } + lineSeparator = lf if i := bytes.IndexByte(data, '\n'); i >= 0 { - data, after = data[:i], data[i+1:] + if len(data) > 0 && data[i-1] == '\r' { + data, after = data[:i-1], data[i+1:] + lineSeparator = crlf + } else { + data, after = data[:i], data[i+1:] + lineSeparator = lf + } } if !(bytes.HasSuffix(data, markerEnd) && len(data) >= len(marker)+len(markerEnd)) { - return "", nil + return "", nil, nil } - return strings.TrimSpace(string(data[len(marker) : len(data)-len(markerEnd)])), after + return strings.TrimSpace(string(data[len(marker) : len(data)-len(markerEnd)])), lineSeparator, after } -// If data is empty or ends in \n, fixNL returns data. -// Otherwise fixNL returns a new slice consisting of data with a final \n added. -func fixNL(data []byte) []byte { - if len(data) == 0 || data[len(data)-1] == '\n' { +// If data is empty or ends in lineSeparator, fixNL returns data. +// lineSeparator states if \n or \r\n should be appended as a line separator if it is not present. +// Otherwise fixNL returns a new slice consisting of data with a final lineSeparator added. +func fixNL(data , lineSeparator []byte) []byte { + if len(data) == 0 || bytes.HasSuffix(data, lf) { return data } - d := make([]byte, len(data)+1) + d := make([]byte, len(data)+len(lineSeparator)) copy(d, data) - d[len(data)] = '\n' + copy(d[len(data):], lineSeparator) return d } diff --git a/txtar/archive_test.go b/txtar/archive_test.go index 6534f530103..009d0d63c3a 100644 --- a/txtar/archive_test.go +++ b/txtar/archive_test.go @@ -18,7 +18,7 @@ func TestParse(t *testing.T) { parsed *Archive }{ { - name: "basic", + name: "basic with line ending LF", text: `comment1 comment2 -- file1 -- @@ -33,14 +33,73 @@ hello world -- empty filename line -- some content -- --`, +parsed: &Archive{ + Comment: []byte("comment1\ncomment2\n"), + Files: []File{ + {"file1", []byte("File 1 text.\n-- foo ---\nMore file 1 text.\n")}, + {"file 2", []byte("File 2 text.\n")}, + {"empty", []byte{}}, + {"noNL", []byte("hello world\n")}, + {"empty filename line", []byte("some content\n-- --\n")}, + }, +}, + }, + { + name: "basic with line ending CRLF", + text: "comment1\r\n" + + "comment2\r\n" + + "-- file1 --\r\n" + + "File 1 text.\r\n" + + "-- foo ---\r\n" + + "More file 1 text.\r\n" + + "-- file 2 --\r\n" + + "File 2 text.\r\n" + + "-- empty --\r\n" + + "-- noNL --\r\n" + + "hello world\r\n" + + "-- empty filename line --\r\n" + + "some content\r\n" + + "-- --\r\n", parsed: &Archive{ - Comment: []byte("comment1\ncomment2\n"), + Comment: []byte("comment1\r\ncomment2\r\n"), Files: []File{ - {"file1", []byte("File 1 text.\n-- foo ---\nMore file 1 text.\n")}, - {"file 2", []byte("File 2 text.\n")}, + {"file1", []byte("File 1 text.\r\n-- foo ---\r\nMore file 1 text.\r\n")}, + {"file 2", []byte("File 2 text.\r\n")}, + {"empty", []byte{}}, + {"noNL", []byte("hello world\r\n")}, + {"empty filename line", []byte("some content\r\n-- --\r\n")}, + }, + }, + }, + { + name: "mixed line endings", + text: "comment1\n" + + "comment2\r\n" + + "-- file1 --\r\n" + + "File 1 text.\n" + + "-- foo ---\r\n" + + "More file 1 text.\r\n" + + "-- file 2 --\r\n" + + "File 2 text.\r\n" + + "-- file 3 --\r\n" + + "File 3 text.\r\n" + + "-- foo ---\r\n" + + "More file 3 text.\r\n" + + "-- empty --\r\n" + + "-- noNL --\r\n" + + "hello world\r\n" + + "-- empty filename line --\r\n" + + "some content\r\n" + + "-- --\n", + parsed: &Archive{ + Comment: []byte("comment1\ncomment2\r\n"), + Files: []File{ + {"file1", []byte("File 1 text.\n-- foo ---\r\nMore file 1 text.\r\n")}, + {"file 2", []byte("File 2 text.\r\n")}, + {"file 3", []byte("File 3 text.\r\n-- foo ---\r\nMore file 3 text.\r\n")}, {"empty", []byte{}}, - {"noNL", []byte("hello world\n")}, - {"empty filename line", []byte("some content\n-- --\n")}, + {"noNL", []byte("hello world\r\n")}, + {"empty filename line", []byte("some content\r\n-- --\n")}, }, }, }, @@ -67,7 +126,7 @@ func TestFormat(t *testing.T) { wanted string }{ { - name: "basic", + name: "basic with line ending LF", input: &Archive{ Comment: []byte("comment1\ncomment2\n"), Files: []File{ @@ -90,6 +149,61 @@ File 2 text. hello world `, }, + { + name: "basic with line ending CRLF", + input: &Archive{ + Comment: []byte("comment1\r\ncomment2\r\n"), + Files: []File{ + {"file1", []byte("File 1 text.\r\n-- foo ---\r\nMore file 1 text.\r\n")}, + {"file 2", []byte("File 2 text.\r\n")}, + {"empty", []byte{}}, + {"noNL", []byte("hello world")}, + }, + }, + wanted: "comment1\r\n" + + "comment2\r\n" + + "-- file1 --\r\n" + + "File 1 text.\r\n" + + "-- foo ---\r\n" + + "More file 1 text.\r\n" + + "-- file 2 --\r\n" + + "File 2 text.\r\n" + + "-- empty --\r\n" + + "-- noNL --\r\n" + + "hello world\r\n", + }, + { + name: "mixed line endings", + input: &Archive{ + Comment: []byte("comment1\ncomment2\r\n"), + Files: []File{ + {"file1", []byte("File 1 text.\n-- foo ---\r\nMore file 1 text.\r\n")}, + {"file 2", []byte("File 2 text.\r\n")}, + {"file 3", []byte("File 3 text.\r\n-- foo ---\r\nMore file 3 text.\r\n")}, + {"empty", []byte{}}, + {"noNL", []byte("hello world\r\n")}, + {"empty filename line", []byte("some content\r\n-- --\n")}, + }, + }, + wanted: "comment1\n" + + "comment2\r\n" + + "-- file1 --\r\n" + + "File 1 text.\n" + + "-- foo ---\r\n" + + "More file 1 text.\r\n" + + "-- file 2 --\r\n" + + "File 2 text.\r\n" + + "-- file 3 --\r\n" + + "File 3 text.\r\n" + + "-- foo ---\r\n" + + "More file 3 text.\r\n" + + "-- empty --\r\n" + + "-- noNL --\r\n" + + "hello world\r\n" + + "-- empty filename line --\r\n" + + "some content\r\n" + + "-- --\n", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) {