diff --git a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx
index ac632535dfd180..1c595d0bc0409d 100644
--- a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx
+++ b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx
@@ -205,4 +205,7 @@
Cannot write the unseekable data stream of entry '{0}' into an unseekable archive stream.
+
+ The extended header contains invalid records.
+
diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs
index bda8886efd5a8e..efa933d02fa70b 100644
--- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs
+++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs
@@ -64,23 +64,26 @@ public override long Position
public override bool CanWrite => false;
- internal bool HasReachedEnd
+ private long Remaining => _endInSuperStream - _positionInSuperStream;
+
+ private int LimitByRemaining(int bufferSize) => (int)Math.Min(Remaining, bufferSize);
+
+ internal ValueTask AdvanceToEndAsync(CancellationToken cancellationToken)
{
- get
- {
- if (!_hasReachedEnd && _positionInSuperStream > _endInSuperStream)
- {
- _hasReachedEnd = true;
- }
- return _hasReachedEnd;
- }
- set
- {
- if (value) // Don't allow revert to false
- {
- _hasReachedEnd = true;
- }
- }
+ _hasReachedEnd = true;
+
+ long remaining = Remaining;
+ _positionInSuperStream = _endInSuperStream;
+ return TarHelpers.AdvanceStreamAsync(_superStream, remaining, cancellationToken);
+ }
+
+ internal void AdvanceToEnd()
+ {
+ _hasReachedEnd = true;
+
+ long remaining = Remaining;
+ _positionInSuperStream = _endInSuperStream;
+ TarHelpers.AdvanceStream(_superStream, remaining);
}
protected void ThrowIfDisposed()
@@ -90,7 +93,7 @@ protected void ThrowIfDisposed()
private void ThrowIfBeyondEndOfStream()
{
- if (HasReachedEnd)
+ if (_hasReachedEnd)
{
throw new EndOfStreamException();
}
@@ -107,21 +110,12 @@ public override int Read(Span destination)
ThrowIfDisposed();
ThrowIfBeyondEndOfStream();
- // parameter validation sent to _superStream.Read
- int origCount = destination.Length;
- int count = destination.Length;
-
- if (_positionInSuperStream + count > _endInSuperStream)
- {
- count = (int)(_endInSuperStream - _positionInSuperStream);
- }
-
- Debug.Assert(count >= 0);
- Debug.Assert(count <= origCount);
+ destination = destination[..LimitByRemaining(destination.Length)];
- int ret = _superStream.Read(destination.Slice(0, count));
+ int ret = _superStream.Read(destination);
_positionInSuperStream += ret;
+
return ret;
}
@@ -158,14 +152,12 @@ protected async ValueTask ReadAsyncCore(Memory buffer, CancellationTo
cancellationToken.ThrowIfCancellationRequested();
- if (_positionInSuperStream > _endInSuperStream - buffer.Length)
- {
- buffer = buffer.Slice(0, (int)(_endInSuperStream - _positionInSuperStream));
- }
+ buffer = buffer[..LimitByRemaining(buffer.Length)];
int ret = await _superStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false);
_positionInSuperStream += ret;
+
return ret;
}
diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs
index 37570d0d33a5ea..509bcd02a24abf 100644
--- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs
+++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs
@@ -5,6 +5,7 @@
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
+using System.Globalization;
using System.IO;
using System.Text;
using System.Threading;
@@ -384,7 +385,7 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
// Continue with the rest of the fields that require no special checks
TarHeader header = new(initialFormat,
- name: TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.Name, FieldLengths.Name)),
+ name: TarHelpers.ParseUtf8String(buffer.Slice(FieldLocations.Name, FieldLengths.Name)),
mode: TarHelpers.ParseNumeric(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)),
mTime: ParseAsTimestamp(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime)),
typeFlag: (TarEntryType)buffer[FieldLocations.TypeFlag])
@@ -393,7 +394,7 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
_size = size,
_uid = TarHelpers.ParseNumeric(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)),
_gid = TarHelpers.ParseNumeric(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)),
- _linkName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName))
+ _linkName = TarHelpers.ParseUtf8String(buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName))
};
if (header._format == TarEntryFormat.Unknown)
@@ -517,8 +518,8 @@ private void ReadVersionAttribute(ReadOnlySpan buffer)
private void ReadPosixAndGnuSharedAttributes(ReadOnlySpan buffer)
{
// Convert the byte arrays
- _uName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.UName, FieldLengths.UName));
- _gName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.GName, FieldLengths.GName));
+ _uName = TarHelpers.ParseUtf8String(buffer.Slice(FieldLocations.UName, FieldLengths.UName));
+ _gName = TarHelpers.ParseUtf8String(buffer.Slice(FieldLocations.GName, FieldLengths.GName));
// DevMajor and DevMinor only have values with character devices and block devices.
// For all other typeflags, the values in these fields are irrelevant.
@@ -560,7 +561,7 @@ private static DateTimeOffset ParseAsTimestamp(ReadOnlySpan buffer)
// Throws if a conversion to an expected data type fails.
private void ReadUstarAttributes(ReadOnlySpan buffer)
{
- _prefix = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix));
+ _prefix = TarHelpers.ParseUtf8String(buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix));
// In ustar, Prefix is used to store the *leading* path segments of
// Name, if the full path did not fit in the Name byte array.
@@ -631,8 +632,6 @@ void ThrowSizeFieldTooLarge() =>
// Returns a dictionary containing the extended attributes collected from the provided byte buffer.
private void ReadExtendedAttributesFromBuffer(ReadOnlySpan buffer, string name)
{
- buffer = TarHelpers.TrimEndingNullsAndSpaces(buffer);
-
while (TryGetNextExtendedAttribute(ref buffer, out string? key, out string? value))
{
if (!ExtendedAttributes.TryAdd(key, value))
@@ -640,6 +639,11 @@ private void ReadExtendedAttributesFromBuffer(ReadOnlySpan buffer, string
throw new InvalidDataException(SR.Format(SR.TarDuplicateExtendedAttribute, name));
}
}
+
+ if (buffer.Length > 0)
+ {
+ throw new InvalidDataException(SR.Format(SR.ExtHeaderInvalidRecords));
+ }
}
// Reads the long path found in the data section of a GNU entry of type 'K' or 'L'
@@ -691,7 +695,7 @@ private async ValueTask ReadGnuLongPathDataBlockAsync(Stream archiveStream, Canc
// Collects the GNU long path info from the buffer and sets it in the right field depending on the type flag.
private void ReadGnuLongPathDataFromBuffer(ReadOnlySpan buffer)
{
- string longPath = TarHelpers.GetTrimmedUtf8String(buffer);
+ string longPath = TarHelpers.ParseUtf8String(buffer);
if (_typeFlag == TarEntryType.LongLink)
{
@@ -725,15 +729,21 @@ private static bool TryGetNextExtendedAttribute(
}
ReadOnlySpan line = buffer.Slice(0, newlinePos);
- // Update buffer to point to the next line for the next call
- buffer = buffer.Slice(newlinePos + 1);
-
- // Find the end of the length and remove everything up through it.
+ // Find the end of the length
int spacePos = line.IndexOf((byte)' ');
if (spacePos < 0)
{
return false;
}
+
+ // Check the length matches the line length
+ ReadOnlySpan length = buffer.Slice(0, spacePos);
+ if (!int.TryParse(length, NumberStyles.None, CultureInfo.InvariantCulture, out int lengthValue) || lengthValue != (line.Length + 1))
+ {
+ return false;
+ }
+
+ // Remove the length
line = line.Slice(spacePos + 1);
// Find the equal separator.
@@ -749,6 +759,9 @@ private static bool TryGetNextExtendedAttribute(
// Return the parsed key and value.
key = Encoding.UTF8.GetString(keySlice);
value = Encoding.UTF8.GetString(valueSlice);
+
+ // Update buffer to point to the next line for the next call
+ buffer = buffer.Slice(newlinePos + 1);
return true;
}
}
diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs
index a2e037e615e730..77cfc47de0bf70 100644
--- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs
+++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs
@@ -240,8 +240,10 @@ internal static T ParseNumeric(ReadOnlySpan buffer) where T : struct, I
/// Parses a byte span that represents an ASCII string containing a number in octal base.
internal static T ParseOctal(ReadOnlySpan buffer) where T : struct, INumber
{
- buffer = TrimEndingNullsAndSpaces(buffer);
- buffer = TrimLeadingNullsAndSpaces(buffer);
+ buffer = TrimNullTerminated(buffer);
+
+ // We ignore spaces because some archives seem to have them (even though they shouldn't).
+ buffer = buffer.Trim((byte)' ');
if (buffer.Length == 0)
{
@@ -268,44 +270,23 @@ internal static T ParseOctal(ReadOnlySpan buffer) where T : struct, INu
private static void ThrowInvalidNumber() =>
throw new InvalidDataException(SR.Format(SR.TarInvalidNumber));
- // Returns the string contained in the specified buffer of bytes,
- // in the specified encoding, removing the trailing null or space chars.
- private static string GetTrimmedString(ReadOnlySpan buffer, Encoding encoding)
+ // Returns the null-terminated UTF8 string contained in the specified buffer.
+ internal static string ParseUtf8String(ReadOnlySpan buffer)
{
- buffer = TrimEndingNullsAndSpaces(buffer);
- return buffer.IsEmpty ? string.Empty : encoding.GetString(buffer);
- }
-
- internal static ReadOnlySpan TrimEndingNullsAndSpaces(ReadOnlySpan buffer)
- {
- int trimmedLength = buffer.Length;
- while (trimmedLength > 0 && buffer[trimmedLength - 1] is 0 or 32)
- {
- trimmedLength--;
- }
-
- return buffer.Slice(0, trimmedLength);
+ buffer = TrimNullTerminated(buffer);
+ return Encoding.UTF8.GetString(buffer);
}
- private static ReadOnlySpan TrimLeadingNullsAndSpaces(ReadOnlySpan buffer)
+ internal static ReadOnlySpan TrimNullTerminated(ReadOnlySpan buffer)
{
- int newStart = 0;
- while (newStart < buffer.Length && buffer[newStart] is 0 or 32)
+ int i = buffer.IndexOf((byte)0);
+ if (i != -1)
{
- newStart++;
+ buffer = buffer[0..i];
}
-
- return buffer.Slice(newStart);
+ return buffer;
}
- // Returns the ASCII string contained in the specified buffer of bytes,
- // removing the trailing null or space chars.
- internal static string GetTrimmedAsciiString(ReadOnlySpan buffer) => GetTrimmedString(buffer, Encoding.ASCII);
-
- // Returns the UTF8 string contained in the specified buffer of bytes,
- // removing the trailing null or space chars.
- internal static string GetTrimmedUtf8String(ReadOnlySpan buffer) => GetTrimmedString(buffer, Encoding.UTF8);
-
// After the file contents, there may be zero or more null characters,
// which exist to ensure the data is aligned to the record size. Skip them and
// set the stream position to the first byte of the next entry.
diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs
index fc2e4001a2a661..747cba3102fecc 100644
--- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs
+++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs
@@ -221,17 +221,8 @@ internal void AdvanceDataStreamIfNeeded()
return;
}
- if (!dataStream.HasReachedEnd)
- {
- // If the user did not advance the position, we need to make sure the position
- // pointer is located at the beginning of the next header.
- if (dataStream.Position < (_previouslyReadEntry._header._size - 1))
- {
- long bytesToSkip = _previouslyReadEntry._header._size - dataStream.Position;
- TarHelpers.AdvanceStream(_archiveStream, bytesToSkip);
- dataStream.HasReachedEnd = true; // Now the pointer is beyond the limit, so any read attempts should throw
- }
- }
+ dataStream.AdvanceToEnd();
+
TarHelpers.SkipBlockAlignmentPadding(_archiveStream, _previouslyReadEntry._header._size);
}
}
@@ -263,17 +254,8 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel
return;
}
- if (!dataStream.HasReachedEnd)
- {
- // If the user did not advance the position, we need to make sure the position
- // pointer is located at the beginning of the next header.
- if (dataStream.Position < (_previouslyReadEntry._header._size - 1))
- {
- long bytesToSkip = _previouslyReadEntry._header._size - dataStream.Position;
- await TarHelpers.AdvanceStreamAsync(_archiveStream, bytesToSkip, cancellationToken).ConfigureAwait(false);
- dataStream.HasReachedEnd = true; // Now the pointer is beyond the limit, so any read attempts should throw
- }
- }
+ await dataStream.AdvanceToEndAsync(cancellationToken).ConfigureAwait(false);
+
await TarHelpers.SkipBlockAlignmentPaddingAsync(_archiveStream, _previouslyReadEntry._header._size, cancellationToken).ConfigureAwait(false);
}
}
diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs
index e603e6e9aa4691..9692dcca89756a 100644
--- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs
+++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs
@@ -6,6 +6,7 @@
using System.IO;
using System.IO.Compression;
using System.Linq;
+using System.Text;
using System.Threading.Tasks;
using Xunit;
@@ -261,7 +262,8 @@ public async Task AllowSpacesInOctalFieldsAsync(string folderName, string testCa
[InlineData("gnu-multi-hdrs")] // Multiple consecutive GNU metadata entries
[InlineData("neg-size")] // Garbage chars
[InlineData("invalid-go17")] // Many octal fields are all zero chars
- [InlineData("issue11169")] // Checksum with null in the middle
+ [InlineData("issue11169")] // Extended header uses spaces instead of newlines to separate records
+ [InlineData("pax-bad-hdr-file")] // Extended header record is not terminated by newline
[InlineData("issue10968")] // Garbage chars
public async Task Throw_ArchivesWithRandomCharsAsync(string testCaseName)
{
@@ -308,6 +310,32 @@ public async Task SparseEntryNotSupportedAsync(string testFolderName, string tes
await Assert.ThrowsAsync(async () => await reader.GetNextEntryAsync());
}
+ [Fact]
+ public async Task ReaderIgnoresFieldValueAfterTrailingNullAsync()
+ {
+ // Fields in the tar archives are terminated by a trailing null.
+ // When reading these fields the reader must ignore all bytes past that null.
+
+ // Construct an archive that has a filename with some data after the trailing null.
+ const string FileName = " filename ";
+ const string FileNameWithDataPastTrailingNull = $"{FileName}\0nonesense";
+ using MemoryStream ms = new();
+ using (TarWriter writer = new(ms, leaveOpen: true))
+ {
+ var entry = new UstarTarEntry(TarEntryType.RegularFile, FileNameWithDataPastTrailingNull);
+ writer.WriteEntry(entry);
+ }
+ ms.Position = 0;
+ // Check the writer serialized the complete name passed to the constructor.
+ bool archiveIsExpected = ms.ToArray().IndexOf(Encoding.UTF8.GetBytes(FileNameWithDataPastTrailingNull)) != -1;
+ Assert.True(archiveIsExpected);
+
+ // Verify the reader doesn't return the data past the trailing null.
+ using TarReader reader = new(ms);
+ TarEntry firstEntry = await reader.GetNextEntryAsync();
+ Assert.Equal(FileName, firstEntry.Name);
+ }
+
[Fact]
public async Task DirectoryListRegularFileAndSparseAsync()
{
diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs
index 955f29c841849f..0d8557d62a22b2 100644
--- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs
+++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs
@@ -6,6 +6,7 @@
using System.IO;
using System.IO.Compression;
using System.Linq;
+using System.Text;
using Xunit;
using static System.Formats.Tar.Tests.TarTestsBase;
@@ -269,7 +270,8 @@ public void AllowSpacesInOctalFields(string folderName, string testCaseName)
[InlineData("gnu-multi-hdrs")] // Multiple consecutive GNU metadata entries
[InlineData("neg-size")] // Garbage chars
[InlineData("invalid-go17")] // Many octal fields are all zero chars
- [InlineData("issue11169")] // Checksum with null in the middle
+ [InlineData("issue11169")] // Extended header uses spaces instead of newlines to separate records
+ [InlineData("pax-bad-hdr-file")] // Extended header record is not terminated by newline
[InlineData("issue10968")] // Garbage chars
public void Throw_ArchivesWithRandomChars(string testCaseName)
{
@@ -316,6 +318,32 @@ public void SparseEntryNotSupported(string testFolderName, string testCaseName)
Assert.Throws(() => reader.GetNextEntry());
}
+ [Fact]
+ public void ReaderIgnoresFieldValueAfterTrailingNull()
+ {
+ // Fields in the tar archives are terminated by a trailing null.
+ // When reading these fields the reader must ignore all bytes past that null.
+
+ // Construct an archive that has a filename with some data after the trailing null.
+ const string FileName = " filename ";
+ const string FileNameWithDataPastTrailingNull = $"{FileName}\0nonesense";
+ using MemoryStream ms = new();
+ using (TarWriter writer = new(ms, leaveOpen: true))
+ {
+ var entry = new UstarTarEntry(TarEntryType.RegularFile, FileNameWithDataPastTrailingNull);
+ writer.WriteEntry(entry);
+ }
+ ms.Position = 0;
+ // Check the writer serialized the complete name passed to the constructor.
+ bool archiveIsExpected = ms.ToArray().IndexOf(Encoding.UTF8.GetBytes(FileNameWithDataPastTrailingNull)) != -1;
+ Assert.True(archiveIsExpected);
+
+ // Verify the reader doesn't return the data past the trailing null.
+ using TarReader reader = new(ms);
+ TarEntry firstEntry = reader.GetNextEntry();
+ Assert.Equal(FileName, firstEntry.Name);
+ }
+
[Fact]
public void DirectoryListRegularFileAndSparse()
{
diff --git a/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs b/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs
index b68493996a095d..16352979db2217 100644
--- a/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs
+++ b/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs
@@ -137,7 +137,6 @@ public abstract partial class TarTestsBase : FileCleanupTestBase
"gnu",
"hardlink",
"nil-uid",
- "pax-bad-hdr-file",
"pax-bad-mtime-file",
"pax-global-records",
"pax-nul-path",