diff --git a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx index ac632535dfd180..1c595d0bc0409d 100644 --- a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx +++ b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx @@ -205,4 +205,7 @@ Cannot write the unseekable data stream of entry '{0}' into an unseekable archive stream. + + The extended header contains invalid records. + diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs index bda8886efd5a8e..efa933d02fa70b 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs @@ -64,23 +64,26 @@ public override long Position public override bool CanWrite => false; - internal bool HasReachedEnd + private long Remaining => _endInSuperStream - _positionInSuperStream; + + private int LimitByRemaining(int bufferSize) => (int)Math.Min(Remaining, bufferSize); + + internal ValueTask AdvanceToEndAsync(CancellationToken cancellationToken) { - get - { - if (!_hasReachedEnd && _positionInSuperStream > _endInSuperStream) - { - _hasReachedEnd = true; - } - return _hasReachedEnd; - } - set - { - if (value) // Don't allow revert to false - { - _hasReachedEnd = true; - } - } + _hasReachedEnd = true; + + long remaining = Remaining; + _positionInSuperStream = _endInSuperStream; + return TarHelpers.AdvanceStreamAsync(_superStream, remaining, cancellationToken); + } + + internal void AdvanceToEnd() + { + _hasReachedEnd = true; + + long remaining = Remaining; + _positionInSuperStream = _endInSuperStream; + TarHelpers.AdvanceStream(_superStream, remaining); } protected void ThrowIfDisposed() @@ -90,7 +93,7 @@ protected void ThrowIfDisposed() private void ThrowIfBeyondEndOfStream() { - if (HasReachedEnd) + if (_hasReachedEnd) { throw new EndOfStreamException(); } @@ -107,21 +110,12 @@ public override int Read(Span destination) ThrowIfDisposed(); ThrowIfBeyondEndOfStream(); - // parameter validation sent to _superStream.Read - int origCount = destination.Length; - int count = destination.Length; - - if (_positionInSuperStream + count > _endInSuperStream) - { - count = (int)(_endInSuperStream - _positionInSuperStream); - } - - Debug.Assert(count >= 0); - Debug.Assert(count <= origCount); + destination = destination[..LimitByRemaining(destination.Length)]; - int ret = _superStream.Read(destination.Slice(0, count)); + int ret = _superStream.Read(destination); _positionInSuperStream += ret; + return ret; } @@ -158,14 +152,12 @@ protected async ValueTask ReadAsyncCore(Memory buffer, CancellationTo cancellationToken.ThrowIfCancellationRequested(); - if (_positionInSuperStream > _endInSuperStream - buffer.Length) - { - buffer = buffer.Slice(0, (int)(_endInSuperStream - _positionInSuperStream)); - } + buffer = buffer[..LimitByRemaining(buffer.Length)]; int ret = await _superStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false); _positionInSuperStream += ret; + return ret; } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs index 37570d0d33a5ea..509bcd02a24abf 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs @@ -5,6 +5,7 @@ using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Globalization; using System.IO; using System.Text; using System.Threading; @@ -384,7 +385,7 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca // Continue with the rest of the fields that require no special checks TarHeader header = new(initialFormat, - name: TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.Name, FieldLengths.Name)), + name: TarHelpers.ParseUtf8String(buffer.Slice(FieldLocations.Name, FieldLengths.Name)), mode: TarHelpers.ParseNumeric(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)), mTime: ParseAsTimestamp(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime)), typeFlag: (TarEntryType)buffer[FieldLocations.TypeFlag]) @@ -393,7 +394,7 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca _size = size, _uid = TarHelpers.ParseNumeric(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)), _gid = TarHelpers.ParseNumeric(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)), - _linkName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName)) + _linkName = TarHelpers.ParseUtf8String(buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName)) }; if (header._format == TarEntryFormat.Unknown) @@ -517,8 +518,8 @@ private void ReadVersionAttribute(ReadOnlySpan buffer) private void ReadPosixAndGnuSharedAttributes(ReadOnlySpan buffer) { // Convert the byte arrays - _uName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.UName, FieldLengths.UName)); - _gName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.GName, FieldLengths.GName)); + _uName = TarHelpers.ParseUtf8String(buffer.Slice(FieldLocations.UName, FieldLengths.UName)); + _gName = TarHelpers.ParseUtf8String(buffer.Slice(FieldLocations.GName, FieldLengths.GName)); // DevMajor and DevMinor only have values with character devices and block devices. // For all other typeflags, the values in these fields are irrelevant. @@ -560,7 +561,7 @@ private static DateTimeOffset ParseAsTimestamp(ReadOnlySpan buffer) // Throws if a conversion to an expected data type fails. private void ReadUstarAttributes(ReadOnlySpan buffer) { - _prefix = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix)); + _prefix = TarHelpers.ParseUtf8String(buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix)); // In ustar, Prefix is used to store the *leading* path segments of // Name, if the full path did not fit in the Name byte array. @@ -631,8 +632,6 @@ void ThrowSizeFieldTooLarge() => // Returns a dictionary containing the extended attributes collected from the provided byte buffer. private void ReadExtendedAttributesFromBuffer(ReadOnlySpan buffer, string name) { - buffer = TarHelpers.TrimEndingNullsAndSpaces(buffer); - while (TryGetNextExtendedAttribute(ref buffer, out string? key, out string? value)) { if (!ExtendedAttributes.TryAdd(key, value)) @@ -640,6 +639,11 @@ private void ReadExtendedAttributesFromBuffer(ReadOnlySpan buffer, string throw new InvalidDataException(SR.Format(SR.TarDuplicateExtendedAttribute, name)); } } + + if (buffer.Length > 0) + { + throw new InvalidDataException(SR.Format(SR.ExtHeaderInvalidRecords)); + } } // Reads the long path found in the data section of a GNU entry of type 'K' or 'L' @@ -691,7 +695,7 @@ private async ValueTask ReadGnuLongPathDataBlockAsync(Stream archiveStream, Canc // Collects the GNU long path info from the buffer and sets it in the right field depending on the type flag. private void ReadGnuLongPathDataFromBuffer(ReadOnlySpan buffer) { - string longPath = TarHelpers.GetTrimmedUtf8String(buffer); + string longPath = TarHelpers.ParseUtf8String(buffer); if (_typeFlag == TarEntryType.LongLink) { @@ -725,15 +729,21 @@ private static bool TryGetNextExtendedAttribute( } ReadOnlySpan line = buffer.Slice(0, newlinePos); - // Update buffer to point to the next line for the next call - buffer = buffer.Slice(newlinePos + 1); - - // Find the end of the length and remove everything up through it. + // Find the end of the length int spacePos = line.IndexOf((byte)' '); if (spacePos < 0) { return false; } + + // Check the length matches the line length + ReadOnlySpan length = buffer.Slice(0, spacePos); + if (!int.TryParse(length, NumberStyles.None, CultureInfo.InvariantCulture, out int lengthValue) || lengthValue != (line.Length + 1)) + { + return false; + } + + // Remove the length line = line.Slice(spacePos + 1); // Find the equal separator. @@ -749,6 +759,9 @@ private static bool TryGetNextExtendedAttribute( // Return the parsed key and value. key = Encoding.UTF8.GetString(keySlice); value = Encoding.UTF8.GetString(valueSlice); + + // Update buffer to point to the next line for the next call + buffer = buffer.Slice(newlinePos + 1); return true; } } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs index a2e037e615e730..77cfc47de0bf70 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs @@ -240,8 +240,10 @@ internal static T ParseNumeric(ReadOnlySpan buffer) where T : struct, I /// Parses a byte span that represents an ASCII string containing a number in octal base. internal static T ParseOctal(ReadOnlySpan buffer) where T : struct, INumber { - buffer = TrimEndingNullsAndSpaces(buffer); - buffer = TrimLeadingNullsAndSpaces(buffer); + buffer = TrimNullTerminated(buffer); + + // We ignore spaces because some archives seem to have them (even though they shouldn't). + buffer = buffer.Trim((byte)' '); if (buffer.Length == 0) { @@ -268,44 +270,23 @@ internal static T ParseOctal(ReadOnlySpan buffer) where T : struct, INu private static void ThrowInvalidNumber() => throw new InvalidDataException(SR.Format(SR.TarInvalidNumber)); - // Returns the string contained in the specified buffer of bytes, - // in the specified encoding, removing the trailing null or space chars. - private static string GetTrimmedString(ReadOnlySpan buffer, Encoding encoding) + // Returns the null-terminated UTF8 string contained in the specified buffer. + internal static string ParseUtf8String(ReadOnlySpan buffer) { - buffer = TrimEndingNullsAndSpaces(buffer); - return buffer.IsEmpty ? string.Empty : encoding.GetString(buffer); - } - - internal static ReadOnlySpan TrimEndingNullsAndSpaces(ReadOnlySpan buffer) - { - int trimmedLength = buffer.Length; - while (trimmedLength > 0 && buffer[trimmedLength - 1] is 0 or 32) - { - trimmedLength--; - } - - return buffer.Slice(0, trimmedLength); + buffer = TrimNullTerminated(buffer); + return Encoding.UTF8.GetString(buffer); } - private static ReadOnlySpan TrimLeadingNullsAndSpaces(ReadOnlySpan buffer) + internal static ReadOnlySpan TrimNullTerminated(ReadOnlySpan buffer) { - int newStart = 0; - while (newStart < buffer.Length && buffer[newStart] is 0 or 32) + int i = buffer.IndexOf((byte)0); + if (i != -1) { - newStart++; + buffer = buffer[0..i]; } - - return buffer.Slice(newStart); + return buffer; } - // Returns the ASCII string contained in the specified buffer of bytes, - // removing the trailing null or space chars. - internal static string GetTrimmedAsciiString(ReadOnlySpan buffer) => GetTrimmedString(buffer, Encoding.ASCII); - - // Returns the UTF8 string contained in the specified buffer of bytes, - // removing the trailing null or space chars. - internal static string GetTrimmedUtf8String(ReadOnlySpan buffer) => GetTrimmedString(buffer, Encoding.UTF8); - // After the file contents, there may be zero or more null characters, // which exist to ensure the data is aligned to the record size. Skip them and // set the stream position to the first byte of the next entry. diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs index fc2e4001a2a661..747cba3102fecc 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs @@ -221,17 +221,8 @@ internal void AdvanceDataStreamIfNeeded() return; } - if (!dataStream.HasReachedEnd) - { - // If the user did not advance the position, we need to make sure the position - // pointer is located at the beginning of the next header. - if (dataStream.Position < (_previouslyReadEntry._header._size - 1)) - { - long bytesToSkip = _previouslyReadEntry._header._size - dataStream.Position; - TarHelpers.AdvanceStream(_archiveStream, bytesToSkip); - dataStream.HasReachedEnd = true; // Now the pointer is beyond the limit, so any read attempts should throw - } - } + dataStream.AdvanceToEnd(); + TarHelpers.SkipBlockAlignmentPadding(_archiveStream, _previouslyReadEntry._header._size); } } @@ -263,17 +254,8 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel return; } - if (!dataStream.HasReachedEnd) - { - // If the user did not advance the position, we need to make sure the position - // pointer is located at the beginning of the next header. - if (dataStream.Position < (_previouslyReadEntry._header._size - 1)) - { - long bytesToSkip = _previouslyReadEntry._header._size - dataStream.Position; - await TarHelpers.AdvanceStreamAsync(_archiveStream, bytesToSkip, cancellationToken).ConfigureAwait(false); - dataStream.HasReachedEnd = true; // Now the pointer is beyond the limit, so any read attempts should throw - } - } + await dataStream.AdvanceToEndAsync(cancellationToken).ConfigureAwait(false); + await TarHelpers.SkipBlockAlignmentPaddingAsync(_archiveStream, _previouslyReadEntry._header._size, cancellationToken).ConfigureAwait(false); } } diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs index e603e6e9aa4691..9692dcca89756a 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs @@ -6,6 +6,7 @@ using System.IO; using System.IO.Compression; using System.Linq; +using System.Text; using System.Threading.Tasks; using Xunit; @@ -261,7 +262,8 @@ public async Task AllowSpacesInOctalFieldsAsync(string folderName, string testCa [InlineData("gnu-multi-hdrs")] // Multiple consecutive GNU metadata entries [InlineData("neg-size")] // Garbage chars [InlineData("invalid-go17")] // Many octal fields are all zero chars - [InlineData("issue11169")] // Checksum with null in the middle + [InlineData("issue11169")] // Extended header uses spaces instead of newlines to separate records + [InlineData("pax-bad-hdr-file")] // Extended header record is not terminated by newline [InlineData("issue10968")] // Garbage chars public async Task Throw_ArchivesWithRandomCharsAsync(string testCaseName) { @@ -308,6 +310,32 @@ public async Task SparseEntryNotSupportedAsync(string testFolderName, string tes await Assert.ThrowsAsync(async () => await reader.GetNextEntryAsync()); } + [Fact] + public async Task ReaderIgnoresFieldValueAfterTrailingNullAsync() + { + // Fields in the tar archives are terminated by a trailing null. + // When reading these fields the reader must ignore all bytes past that null. + + // Construct an archive that has a filename with some data after the trailing null. + const string FileName = " filename "; + const string FileNameWithDataPastTrailingNull = $"{FileName}\0nonesense"; + using MemoryStream ms = new(); + using (TarWriter writer = new(ms, leaveOpen: true)) + { + var entry = new UstarTarEntry(TarEntryType.RegularFile, FileNameWithDataPastTrailingNull); + writer.WriteEntry(entry); + } + ms.Position = 0; + // Check the writer serialized the complete name passed to the constructor. + bool archiveIsExpected = ms.ToArray().IndexOf(Encoding.UTF8.GetBytes(FileNameWithDataPastTrailingNull)) != -1; + Assert.True(archiveIsExpected); + + // Verify the reader doesn't return the data past the trailing null. + using TarReader reader = new(ms); + TarEntry firstEntry = await reader.GetNextEntryAsync(); + Assert.Equal(FileName, firstEntry.Name); + } + [Fact] public async Task DirectoryListRegularFileAndSparseAsync() { diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs index 955f29c841849f..0d8557d62a22b2 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs @@ -6,6 +6,7 @@ using System.IO; using System.IO.Compression; using System.Linq; +using System.Text; using Xunit; using static System.Formats.Tar.Tests.TarTestsBase; @@ -269,7 +270,8 @@ public void AllowSpacesInOctalFields(string folderName, string testCaseName) [InlineData("gnu-multi-hdrs")] // Multiple consecutive GNU metadata entries [InlineData("neg-size")] // Garbage chars [InlineData("invalid-go17")] // Many octal fields are all zero chars - [InlineData("issue11169")] // Checksum with null in the middle + [InlineData("issue11169")] // Extended header uses spaces instead of newlines to separate records + [InlineData("pax-bad-hdr-file")] // Extended header record is not terminated by newline [InlineData("issue10968")] // Garbage chars public void Throw_ArchivesWithRandomChars(string testCaseName) { @@ -316,6 +318,32 @@ public void SparseEntryNotSupported(string testFolderName, string testCaseName) Assert.Throws(() => reader.GetNextEntry()); } + [Fact] + public void ReaderIgnoresFieldValueAfterTrailingNull() + { + // Fields in the tar archives are terminated by a trailing null. + // When reading these fields the reader must ignore all bytes past that null. + + // Construct an archive that has a filename with some data after the trailing null. + const string FileName = " filename "; + const string FileNameWithDataPastTrailingNull = $"{FileName}\0nonesense"; + using MemoryStream ms = new(); + using (TarWriter writer = new(ms, leaveOpen: true)) + { + var entry = new UstarTarEntry(TarEntryType.RegularFile, FileNameWithDataPastTrailingNull); + writer.WriteEntry(entry); + } + ms.Position = 0; + // Check the writer serialized the complete name passed to the constructor. + bool archiveIsExpected = ms.ToArray().IndexOf(Encoding.UTF8.GetBytes(FileNameWithDataPastTrailingNull)) != -1; + Assert.True(archiveIsExpected); + + // Verify the reader doesn't return the data past the trailing null. + using TarReader reader = new(ms); + TarEntry firstEntry = reader.GetNextEntry(); + Assert.Equal(FileName, firstEntry.Name); + } + [Fact] public void DirectoryListRegularFileAndSparse() { diff --git a/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs b/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs index b68493996a095d..16352979db2217 100644 --- a/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs +++ b/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs @@ -137,7 +137,6 @@ public abstract partial class TarTestsBase : FileCleanupTestBase "gnu", "hardlink", "nil-uid", - "pax-bad-hdr-file", "pax-bad-mtime-file", "pax-global-records", "pax-nul-path",