Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
<Compile Include="System\Web\IHtmlString.cs" />
<Compile Include="System\Web\HttpUtility.cs" />
<Compile Include="System\Web\Util\HttpEncoder.cs" />
<Compile Include="System\Web\Util\HttpEncoderUtility.cs" />
<Compile Include="System\Web\Util\UriUtil.cs" />
<Compile Include="System\Web\Util\Utf16StringValidator.cs" />
<Compile Include="$(CommonPath)System\HexConverter.cs"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,6 @@ internal static void HtmlEncode(string? value, TextWriter output)
private static int IndexOfHtmlAttributeEncodingChars(string s) =>
s.AsSpan().IndexOfAny("<\"'&");

private static bool IsNonAsciiByte(byte b) => b >= 0x7F || b < 0x20;

internal static string JavaScriptStringEncode(string? value, bool addDoubleQuotes)
{
int i = value.AsSpan().IndexOfAny(s_invalidJavaScriptChars);
Expand Down Expand Up @@ -474,58 +472,6 @@ internal static byte[] UrlEncode(string str, Encoding e)
: bytes;
}

// Helper to encode the non-ASCII url characters only
private static string UrlEncodeNonAscii(string str, Encoding e)
{
Debug.Assert(!string.IsNullOrEmpty(str));
Debug.Assert(e != null);
byte[] bytes = e.GetBytes(str);
byte[] encodedBytes = UrlEncodeNonAscii(bytes, 0, bytes.Length);
return Encoding.ASCII.GetString(encodedBytes);
}

private static byte[] UrlEncodeNonAscii(byte[] bytes, int offset, int count)
{
int cNonAscii = 0;

// count them first
for (int i = 0; i < count; i++)
{
if (IsNonAsciiByte(bytes[offset + i]))
{
cNonAscii++;
}
}

// nothing to expand?
if (cNonAscii == 0)
{
return bytes;
}

// expand not 'safe' characters into %XX, spaces to +s
byte[] expandedBytes = new byte[count + cNonAscii * 2];
int pos = 0;

for (int i = 0; i < count; i++)
{
byte b = bytes[offset + i];

if (IsNonAsciiByte(b))
{
expandedBytes[pos++] = (byte)'%';
expandedBytes[pos++] = (byte)HexConverter.ToCharLower(b >> 4);
expandedBytes[pos++] = (byte)HexConverter.ToCharLower(b);
}
else
{
expandedBytes[pos++] = b;
}
}

return expandedBytes;
}

[Obsolete("This method produces non-standards-compliant output and has interoperability issues. The preferred alternative is UrlEncode(*).")]
[return: NotNullIfNotNull(nameof(value))]
internal static string? UrlEncodeUnicode(string? value)
Expand Down Expand Up @@ -580,21 +526,19 @@ private static byte[] UrlEncodeNonAscii(byte[] bytes, int offset, int count)
return value;
}

string? schemeAndAuthority;
ReadOnlySpan<char> schemeAndAuthority;
string? path;
string? queryAndFragment;
ReadOnlySpan<char> queryAndFragment;

if (!UriUtil.TrySplitUriForPathEncode(value, out schemeAndAuthority, out path, out queryAndFragment))
{
// If the value is not a valid url, we treat it as a relative url.
// We don't need to extract query string from the url since UrlPathEncode()
// does not encode query string.
schemeAndAuthority = null;
path = value;
queryAndFragment = null;
return UrlPathEncodeImpl(value);
}

return schemeAndAuthority + UrlPathEncodeImpl(path) + queryAndFragment;
return string.Concat(schemeAndAuthority, UrlPathEncodeImpl(path), queryAndFragment);
}

// This is the original UrlPathEncode(string)
Expand All @@ -605,15 +549,48 @@ private static string UrlPathEncodeImpl(string value)
return value;
}

// recurse in case there is a query string
int i = value.IndexOf('?');
if (i >= 0)
int i = value.AsSpan().IndexOfAnyExceptInRange((char)0x21, (char)0x7F);
if (i < 0)
{
return value;
}

int indexOfQuery = value.IndexOf('?');
if ((uint)indexOfQuery < (uint)i)
{
return string.Concat(UrlPathEncodeImpl(value.Substring(0, i)), value.AsSpan(i));
// Everything before the Query is valid ASCII
return value;
}

ReadOnlySpan<char> toEncode = indexOfQuery >= 0
? value.AsSpan(i, indexOfQuery - i)
: value.AsSpan(i);

byte[] bytes = ArrayPool<byte>.Shared.Rent(Encoding.UTF8.GetMaxByteCount(toEncode.Length));
int utf8Length = Encoding.UTF8.GetBytes(toEncode, bytes);
char[] chars = ArrayPool<char>.Shared.Rent(utf8Length * 3);
int charCount = 0;
foreach (byte b in bytes.AsSpan(0, utf8Length))
{
if (!char.IsBetween((char)b, (char)0x21, (char)0x7F))
{
chars[charCount++] = '%';
chars[charCount++] = HexConverter.ToCharLower(b >> 4);
chars[charCount++] = HexConverter.ToCharLower(b);
}
else
{
chars[charCount++] = (char)b;
}
}

// encode DBCS characters and spaces only
return HttpEncoderUtility.UrlEncodeSpaces(UrlEncodeNonAscii(value, Encoding.UTF8));
ArrayPool<byte>.Shared.Return(bytes);
string result = string.Concat(
value.AsSpan(0, i),
chars.AsSpan(0, charCount),
indexOfQuery >= 0 ? value.AsSpan(indexOfQuery) : ReadOnlySpan<char>.Empty);
ArrayPool<char>.Shared.Return(chars);
return result;
}

private static bool ValidateUrlEncodingParameters([NotNullWhen(true)] byte[]? bytes, int offset, int count)
Expand Down

This file was deleted.

34 changes: 13 additions & 21 deletions src/libraries/System.Web.HttpUtility/src/System/Web/Util/UriUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,25 @@ namespace System.Web.Util
{
internal static class UriUtil
{
// Just extracts the query string and fragment from the input path by splitting on the separator characters.
// Doesn't perform any validation as to whether the input represents a valid URL.
// Attempts to split a URI into its constituent pieces.
// Even if this method returns true, one or more of the out parameters might contain a null or empty string, e.g. if there is no query / fragment.
// Concatenating the pieces back together will form the original input string.
private static void ExtractQueryAndFragment(string input, out string path, out string? queryAndFragment)
internal static bool TrySplitUriForPathEncode(string input, out ReadOnlySpan<char> schemeAndAuthority, [NotNullWhen(true)] out string? path, out ReadOnlySpan<char> queryAndFragment)
{
// Strip off ?query and #fragment if they exist, since we're not going to look at them
int queryFragmentSeparatorPos = input.AsSpan().IndexOfAny('?', '#'); // query fragment separators
string inputWithoutQueryFragment;
if (queryFragmentSeparatorPos >= 0)
{
path = input.Substring(0, queryFragmentSeparatorPos);
queryAndFragment = input.Substring(queryFragmentSeparatorPos);
inputWithoutQueryFragment = input.Substring(0, queryFragmentSeparatorPos);
queryAndFragment = input.AsSpan(queryFragmentSeparatorPos);
}
else
{
// no query or fragment separator
path = input;
queryAndFragment = null;
inputWithoutQueryFragment = input;
queryAndFragment = ReadOnlySpan<char>.Empty;
}
}

// Attempts to split a URI into its constituent pieces.
// Even if this method returns true, one or more of the out parameters might contain a null or empty string, e.g. if there is no query / fragment.
// Concatenating the pieces back together will form the original input string.
internal static bool TrySplitUriForPathEncode(string input, [NotNullWhen(true)] out string? schemeAndAuthority, [NotNullWhen(true)] out string? path, out string? queryAndFragment)
{
// Strip off ?query and #fragment if they exist, since we're not going to look at them
string inputWithoutQueryFragment;
ExtractQueryAndFragment(input, out inputWithoutQueryFragment, out queryAndFragment);

// Use Uri class to parse the url into authority and path, use that to help decide
// where to split the string. Do not rebuild the url from the Uri instance, as that
Expand All @@ -48,20 +40,20 @@ internal static bool TrySplitUriForPathEncode(string input, [NotNullWhen(true)]
// To retain the same string as originally given, find the authority in the original url and include
// everything up to that.
int authorityIndex = inputWithoutQueryFragment.IndexOf(authority, StringComparison.OrdinalIgnoreCase);
if (authorityIndex != -1)
if (authorityIndex >= 0)
{
int schemeAndAuthorityLength = authorityIndex + authority.Length;
schemeAndAuthority = inputWithoutQueryFragment.Substring(0, schemeAndAuthorityLength);
schemeAndAuthority = input.AsSpan(0, schemeAndAuthorityLength);
path = inputWithoutQueryFragment.Substring(schemeAndAuthorityLength);
return true;
}
}
}

// Not a safe URL
schemeAndAuthority = null;
schemeAndAuthority = ReadOnlySpan<char>.Empty;
path = null;
queryAndFragment = null;
queryAndFragment = ReadOnlySpan<char>.Empty;
return false;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -776,9 +776,11 @@ public void UrlEncodeUnicodeToBytes(string decoded, string encoded)
[InlineData("http://eXample.net:80/default.xxx?sdsd=sds", "http://eXample.net:80/default.xxx?sdsd=sds")]
[InlineData("http://EXAMPLE.NET/default.xxx?sdsd=sds", "http://EXAMPLE.NET/default.xxx?sdsd=sds")]
[InlineData("http://EXAMPLE.NET/d\u00E9fault.xxx?sdsd=sds", "http://EXAMPLE.NET/d%c3%a9fault.xxx?sdsd=sds")]
[InlineData("http://EXAMPLE.NET/d fault.xxx?sdsd=sds", "http://EXAMPLE.NET/d%20fault.xxx?sdsd=sds")]
[InlineData("file:///C/Users", "file:///C/Users")]
[InlineData("mailto:[email protected]", "mailto:[email protected]")]
[InlineData("http://example\u200E.net/", "http://example%e2%80%8e.net/")]
[InlineData("http://ex ample\u200E.net/", "http://ex%20ample%e2%80%8e.net/")]
public void UrlPathEncode(string decoded, string encoded)
{
Assert.Equal(encoded, HttpUtility.UrlPathEncode(decoded));
Expand Down