diff --git a/src/libraries/System.Web.HttpUtility/src/System.Web.HttpUtility.csproj b/src/libraries/System.Web.HttpUtility/src/System.Web.HttpUtility.csproj index 5da18b0fae6c82..73aa6d15930f72 100644 --- a/src/libraries/System.Web.HttpUtility/src/System.Web.HttpUtility.csproj +++ b/src/libraries/System.Web.HttpUtility/src/System.Web.HttpUtility.csproj @@ -10,7 +10,6 @@ - s.AsSpan().IndexOfAny("<\"'&"); - private static bool IsNonAsciiByte(byte b) => b >= 0x7F || b < 0x20; - internal static string JavaScriptStringEncode(string? value, bool addDoubleQuotes) { int i = value.AsSpan().IndexOfAny(s_invalidJavaScriptChars); @@ -474,58 +472,6 @@ internal static byte[] UrlEncode(string str, Encoding e) : bytes; } - // Helper to encode the non-ASCII url characters only - private static string UrlEncodeNonAscii(string str, Encoding e) - { - Debug.Assert(!string.IsNullOrEmpty(str)); - Debug.Assert(e != null); - byte[] bytes = e.GetBytes(str); - byte[] encodedBytes = UrlEncodeNonAscii(bytes, 0, bytes.Length); - return Encoding.ASCII.GetString(encodedBytes); - } - - private static byte[] UrlEncodeNonAscii(byte[] bytes, int offset, int count) - { - int cNonAscii = 0; - - // count them first - for (int i = 0; i < count; i++) - { - if (IsNonAsciiByte(bytes[offset + i])) - { - cNonAscii++; - } - } - - // nothing to expand? - if (cNonAscii == 0) - { - return bytes; - } - - // expand not 'safe' characters into %XX, spaces to +s - byte[] expandedBytes = new byte[count + cNonAscii * 2]; - int pos = 0; - - for (int i = 0; i < count; i++) - { - byte b = bytes[offset + i]; - - if (IsNonAsciiByte(b)) - { - expandedBytes[pos++] = (byte)'%'; - expandedBytes[pos++] = (byte)HexConverter.ToCharLower(b >> 4); - expandedBytes[pos++] = (byte)HexConverter.ToCharLower(b); - } - else - { - expandedBytes[pos++] = b; - } - } - - return expandedBytes; - } - [Obsolete("This method produces non-standards-compliant output and has interoperability issues. The preferred alternative is UrlEncode(*).")] [return: NotNullIfNotNull(nameof(value))] internal static string? UrlEncodeUnicode(string? value) @@ -580,21 +526,19 @@ private static byte[] UrlEncodeNonAscii(byte[] bytes, int offset, int count) return value; } - string? schemeAndAuthority; + ReadOnlySpan schemeAndAuthority; string? path; - string? queryAndFragment; + ReadOnlySpan queryAndFragment; if (!UriUtil.TrySplitUriForPathEncode(value, out schemeAndAuthority, out path, out queryAndFragment)) { // If the value is not a valid url, we treat it as a relative url. // We don't need to extract query string from the url since UrlPathEncode() // does not encode query string. - schemeAndAuthority = null; - path = value; - queryAndFragment = null; + return UrlPathEncodeImpl(value); } - return schemeAndAuthority + UrlPathEncodeImpl(path) + queryAndFragment; + return string.Concat(schemeAndAuthority, UrlPathEncodeImpl(path), queryAndFragment); } // This is the original UrlPathEncode(string) @@ -605,15 +549,48 @@ private static string UrlPathEncodeImpl(string value) return value; } - // recurse in case there is a query string - int i = value.IndexOf('?'); - if (i >= 0) + int i = value.AsSpan().IndexOfAnyExceptInRange((char)0x21, (char)0x7F); + if (i < 0) + { + return value; + } + + int indexOfQuery = value.IndexOf('?'); + if ((uint)indexOfQuery < (uint)i) { - return string.Concat(UrlPathEncodeImpl(value.Substring(0, i)), value.AsSpan(i)); + // Everything before the Query is valid ASCII + return value; + } + + ReadOnlySpan toEncode = indexOfQuery >= 0 + ? value.AsSpan(i, indexOfQuery - i) + : value.AsSpan(i); + + byte[] bytes = ArrayPool.Shared.Rent(Encoding.UTF8.GetMaxByteCount(toEncode.Length)); + int utf8Length = Encoding.UTF8.GetBytes(toEncode, bytes); + char[] chars = ArrayPool.Shared.Rent(utf8Length * 3); + int charCount = 0; + foreach (byte b in bytes.AsSpan(0, utf8Length)) + { + if (!char.IsBetween((char)b, (char)0x21, (char)0x7F)) + { + chars[charCount++] = '%'; + chars[charCount++] = HexConverter.ToCharLower(b >> 4); + chars[charCount++] = HexConverter.ToCharLower(b); + } + else + { + chars[charCount++] = (char)b; + } } - // encode DBCS characters and spaces only - return HttpEncoderUtility.UrlEncodeSpaces(UrlEncodeNonAscii(value, Encoding.UTF8)); + ArrayPool.Shared.Return(bytes); + string result = string.Concat( + value.AsSpan(0, i), + chars.AsSpan(0, charCount), + indexOfQuery >= 0 ? value.AsSpan(indexOfQuery) : ReadOnlySpan.Empty); + ArrayPool.Shared.Return(chars); + return result; } private static bool ValidateUrlEncodingParameters([NotNullWhen(true)] byte[]? bytes, int offset, int count) diff --git a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoderUtility.cs b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoderUtility.cs deleted file mode 100644 index a2cb3b9afe5c79..00000000000000 --- a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoderUtility.cs +++ /dev/null @@ -1,15 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Diagnostics; -using System.Diagnostics.CodeAnalysis; - -namespace System.Web.Util -{ - internal static class HttpEncoderUtility - { - // Helper to encode spaces only - [return: NotNullIfNotNull(nameof(str))] - internal static string? UrlEncodeSpaces(string? str) => str != null && str.Contains(' ') ? str.Replace(" ", "%20") : str; - } -} diff --git a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/UriUtil.cs b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/UriUtil.cs index 9ef10f6f689443..95f7363f8fb9cc 100644 --- a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/UriUtil.cs +++ b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/UriUtil.cs @@ -7,33 +7,25 @@ namespace System.Web.Util { internal static class UriUtil { - // Just extracts the query string and fragment from the input path by splitting on the separator characters. - // Doesn't perform any validation as to whether the input represents a valid URL. + // Attempts to split a URI into its constituent pieces. + // Even if this method returns true, one or more of the out parameters might contain a null or empty string, e.g. if there is no query / fragment. // Concatenating the pieces back together will form the original input string. - private static void ExtractQueryAndFragment(string input, out string path, out string? queryAndFragment) + internal static bool TrySplitUriForPathEncode(string input, out ReadOnlySpan schemeAndAuthority, [NotNullWhen(true)] out string? path, out ReadOnlySpan queryAndFragment) { + // Strip off ?query and #fragment if they exist, since we're not going to look at them int queryFragmentSeparatorPos = input.AsSpan().IndexOfAny('?', '#'); // query fragment separators + string inputWithoutQueryFragment; if (queryFragmentSeparatorPos >= 0) { - path = input.Substring(0, queryFragmentSeparatorPos); - queryAndFragment = input.Substring(queryFragmentSeparatorPos); + inputWithoutQueryFragment = input.Substring(0, queryFragmentSeparatorPos); + queryAndFragment = input.AsSpan(queryFragmentSeparatorPos); } else { // no query or fragment separator - path = input; - queryAndFragment = null; + inputWithoutQueryFragment = input; + queryAndFragment = ReadOnlySpan.Empty; } - } - - // Attempts to split a URI into its constituent pieces. - // Even if this method returns true, one or more of the out parameters might contain a null or empty string, e.g. if there is no query / fragment. - // Concatenating the pieces back together will form the original input string. - internal static bool TrySplitUriForPathEncode(string input, [NotNullWhen(true)] out string? schemeAndAuthority, [NotNullWhen(true)] out string? path, out string? queryAndFragment) - { - // Strip off ?query and #fragment if they exist, since we're not going to look at them - string inputWithoutQueryFragment; - ExtractQueryAndFragment(input, out inputWithoutQueryFragment, out queryAndFragment); // Use Uri class to parse the url into authority and path, use that to help decide // where to split the string. Do not rebuild the url from the Uri instance, as that @@ -48,10 +40,10 @@ internal static bool TrySplitUriForPathEncode(string input, [NotNullWhen(true)] // To retain the same string as originally given, find the authority in the original url and include // everything up to that. int authorityIndex = inputWithoutQueryFragment.IndexOf(authority, StringComparison.OrdinalIgnoreCase); - if (authorityIndex != -1) + if (authorityIndex >= 0) { int schemeAndAuthorityLength = authorityIndex + authority.Length; - schemeAndAuthority = inputWithoutQueryFragment.Substring(0, schemeAndAuthorityLength); + schemeAndAuthority = input.AsSpan(0, schemeAndAuthorityLength); path = inputWithoutQueryFragment.Substring(schemeAndAuthorityLength); return true; } @@ -59,9 +51,9 @@ internal static bool TrySplitUriForPathEncode(string input, [NotNullWhen(true)] } // Not a safe URL - schemeAndAuthority = null; + schemeAndAuthority = ReadOnlySpan.Empty; path = null; - queryAndFragment = null; + queryAndFragment = ReadOnlySpan.Empty; return false; } } diff --git a/src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs b/src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs index 27942d50051001..fb4494072cba6c 100644 --- a/src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs +++ b/src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs @@ -776,9 +776,11 @@ public void UrlEncodeUnicodeToBytes(string decoded, string encoded) [InlineData("http://eXample.net:80/default.xxx?sdsd=sds", "http://eXample.net:80/default.xxx?sdsd=sds")] [InlineData("http://EXAMPLE.NET/default.xxx?sdsd=sds", "http://EXAMPLE.NET/default.xxx?sdsd=sds")] [InlineData("http://EXAMPLE.NET/d\u00E9fault.xxx?sdsd=sds", "http://EXAMPLE.NET/d%c3%a9fault.xxx?sdsd=sds")] + [InlineData("http://EXAMPLE.NET/d fault.xxx?sdsd=sds", "http://EXAMPLE.NET/d%20fault.xxx?sdsd=sds")] [InlineData("file:///C/Users", "file:///C/Users")] [InlineData("mailto:user@example.net", "mailto:user@example.net")] [InlineData("http://example\u200E.net/", "http://example%e2%80%8e.net/")] + [InlineData("http://ex ample\u200E.net/", "http://ex%20ample%e2%80%8e.net/")] public void UrlPathEncode(string decoded, string encoded) { Assert.Equal(encoded, HttpUtility.UrlPathEncode(decoded));