From e920339311fa0837cd7aba9b6080d68985d1e48d Mon Sep 17 00:00:00 2001 From: Traian Zaprianov Date: Fri, 31 May 2024 10:59:41 +0300 Subject: [PATCH 1/5] Reduce allocation in HttpUtility.UrlPathEncode. --- .../src/System/Web/Util/HttpEncoder.cs | 58 ++++++++++++++----- .../src/System/Web/Util/UriUtil.cs | 32 ++++------ .../tests/HttpUtility/HttpUtilityTest.cs | 2 + 3 files changed, 59 insertions(+), 33 deletions(-) diff --git a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs index 690645e0759147..8ff857fa2e2add 100644 --- a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs +++ b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs @@ -126,7 +126,7 @@ internal static void HtmlEncode(string? value, TextWriter output) private static int IndexOfHtmlAttributeEncodingChars(string s) => s.AsSpan().IndexOfAny("<\"'&"); - private static bool IsNonAsciiByte(byte b) => b >= 0x7F || b < 0x20; + private static bool IsNonAsciiOrSpaceByte(byte b) => b >= 0x7F || b <= 0x20; internal static string JavaScriptStringEncode(string? value, bool addDoubleQuotes) { @@ -580,21 +580,19 @@ private static byte[] UrlEncodeNonAscii(byte[] bytes, int offset, int count) return value; } - string? schemeAndAuthority; + ReadOnlySpan schemeAndAuthority; string? path; - string? queryAndFragment; + ReadOnlySpan queryAndFragment; if (!UriUtil.TrySplitUriForPathEncode(value, out schemeAndAuthority, out path, out queryAndFragment)) { // If the value is not a valid url, we treat it as a relative url. // We don't need to extract query string from the url since UrlPathEncode() // does not encode query string. - schemeAndAuthority = null; - path = value; - queryAndFragment = null; + return UrlPathEncodeImpl(value); } - return schemeAndAuthority + UrlPathEncodeImpl(path) + queryAndFragment; + return string.Concat(schemeAndAuthority, UrlPathEncodeImpl(path), queryAndFragment); } // This is the original UrlPathEncode(string) @@ -605,15 +603,49 @@ private static string UrlPathEncodeImpl(string value) return value; } - // recurse in case there is a query string - int i = value.IndexOf('?'); - if (i >= 0) + int i = value.AsSpan().IndexOfAnyExceptInRange((char)0x21, (char)0x7F); + if (i < 0) + { + return value; + } + + int indexOfQuery = value.IndexOf('?'); + if (indexOfQuery >= 0 && indexOfQuery < i) + { + // Everything before the Query is valid ASCII + return value; + } + + ReadOnlySpan toEncode = indexOfQuery >= 0 + ? value.AsSpan(i, indexOfQuery - i) + : value.AsSpan(i); + + byte[] bytes = ArrayPool.Shared.Rent(Encoding.UTF8.GetMaxByteCount(toEncode.Length)); + char[] chars = ArrayPool.Shared.Rent(bytes.Length * 3); + + int utf8Length = Encoding.UTF8.GetBytes(toEncode, bytes); + int charCount = 0; + foreach (byte b in bytes.AsSpan(0, utf8Length)) { - return string.Concat(UrlPathEncodeImpl(value.Substring(0, i)), value.AsSpan(i)); + if (IsNonAsciiOrSpaceByte(b)) + { + chars[charCount++] = '%'; + chars[charCount++] = HexConverter.ToCharLower(b >> 4); + chars[charCount++] = HexConverter.ToCharLower(b); + } + else + { + chars[charCount++] = (char)b; + } } - // encode DBCS characters and spaces only - return HttpEncoderUtility.UrlEncodeSpaces(UrlEncodeNonAscii(value, Encoding.UTF8)); + ArrayPool.Shared.Return(bytes); + string result = string.Concat( + value.AsSpan(0, i), + chars.AsSpan(0, charCount), + indexOfQuery >= 0 ? value.AsSpan(indexOfQuery) : ReadOnlySpan.Empty); + ArrayPool.Shared.Return(chars); + return result; } private static bool ValidateUrlEncodingParameters([NotNullWhen(true)] byte[]? bytes, int offset, int count) diff --git a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/UriUtil.cs b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/UriUtil.cs index 9ef10f6f689443..9d66d6192d5194 100644 --- a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/UriUtil.cs +++ b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/UriUtil.cs @@ -7,33 +7,25 @@ namespace System.Web.Util { internal static class UriUtil { - // Just extracts the query string and fragment from the input path by splitting on the separator characters. - // Doesn't perform any validation as to whether the input represents a valid URL. + // Attempts to split a URI into its constituent pieces. + // Even if this method returns true, one or more of the out parameters might contain a null or empty string, e.g. if there is no query / fragment. // Concatenating the pieces back together will form the original input string. - private static void ExtractQueryAndFragment(string input, out string path, out string? queryAndFragment) + internal static bool TrySplitUriForPathEncode(string input, out ReadOnlySpan schemeAndAuthority, [NotNullWhen(true)] out string? path, out ReadOnlySpan queryAndFragment) { + // Strip off ?query and #fragment if they exist, since we're not going to look at them int queryFragmentSeparatorPos = input.AsSpan().IndexOfAny('?', '#'); // query fragment separators + string inputWithoutQueryFragment; if (queryFragmentSeparatorPos >= 0) { - path = input.Substring(0, queryFragmentSeparatorPos); - queryAndFragment = input.Substring(queryFragmentSeparatorPos); + inputWithoutQueryFragment = input.Substring(0, queryFragmentSeparatorPos); + queryAndFragment = input.AsSpan(queryFragmentSeparatorPos); } else { // no query or fragment separator - path = input; - queryAndFragment = null; + inputWithoutQueryFragment = input; + queryAndFragment = ReadOnlySpan.Empty; } - } - - // Attempts to split a URI into its constituent pieces. - // Even if this method returns true, one or more of the out parameters might contain a null or empty string, e.g. if there is no query / fragment. - // Concatenating the pieces back together will form the original input string. - internal static bool TrySplitUriForPathEncode(string input, [NotNullWhen(true)] out string? schemeAndAuthority, [NotNullWhen(true)] out string? path, out string? queryAndFragment) - { - // Strip off ?query and #fragment if they exist, since we're not going to look at them - string inputWithoutQueryFragment; - ExtractQueryAndFragment(input, out inputWithoutQueryFragment, out queryAndFragment); // Use Uri class to parse the url into authority and path, use that to help decide // where to split the string. Do not rebuild the url from the Uri instance, as that @@ -51,7 +43,7 @@ internal static bool TrySplitUriForPathEncode(string input, [NotNullWhen(true)] if (authorityIndex != -1) { int schemeAndAuthorityLength = authorityIndex + authority.Length; - schemeAndAuthority = inputWithoutQueryFragment.Substring(0, schemeAndAuthorityLength); + schemeAndAuthority = input.AsSpan(0, schemeAndAuthorityLength); path = inputWithoutQueryFragment.Substring(schemeAndAuthorityLength); return true; } @@ -59,9 +51,9 @@ internal static bool TrySplitUriForPathEncode(string input, [NotNullWhen(true)] } // Not a safe URL - schemeAndAuthority = null; + schemeAndAuthority = ReadOnlySpan.Empty; path = null; - queryAndFragment = null; + queryAndFragment = ReadOnlySpan.Empty; return false; } } diff --git a/src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs b/src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs index 27942d50051001..fb4494072cba6c 100644 --- a/src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs +++ b/src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs @@ -776,9 +776,11 @@ public void UrlEncodeUnicodeToBytes(string decoded, string encoded) [InlineData("http://eXample.net:80/default.xxx?sdsd=sds", "http://eXample.net:80/default.xxx?sdsd=sds")] [InlineData("http://EXAMPLE.NET/default.xxx?sdsd=sds", "http://EXAMPLE.NET/default.xxx?sdsd=sds")] [InlineData("http://EXAMPLE.NET/d\u00E9fault.xxx?sdsd=sds", "http://EXAMPLE.NET/d%c3%a9fault.xxx?sdsd=sds")] + [InlineData("http://EXAMPLE.NET/d fault.xxx?sdsd=sds", "http://EXAMPLE.NET/d%20fault.xxx?sdsd=sds")] [InlineData("file:///C/Users", "file:///C/Users")] [InlineData("mailto:user@example.net", "mailto:user@example.net")] [InlineData("http://example\u200E.net/", "http://example%e2%80%8e.net/")] + [InlineData("http://ex ample\u200E.net/", "http://ex%20ample%e2%80%8e.net/")] public void UrlPathEncode(string decoded, string encoded) { Assert.Equal(encoded, HttpUtility.UrlPathEncode(decoded)); From 6961eff6a8d2fe396c9b999742750b7440944e66 Mon Sep 17 00:00:00 2001 From: Traian Zaprianov Date: Fri, 31 May 2024 14:59:00 +0300 Subject: [PATCH 2/5] Add feedback optimizations --- .../System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs | 4 ++-- .../System.Web.HttpUtility/src/System/Web/Util/UriUtil.cs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs index 8ff857fa2e2add..ff1f6cb0638acc 100644 --- a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs +++ b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs @@ -126,7 +126,7 @@ internal static void HtmlEncode(string? value, TextWriter output) private static int IndexOfHtmlAttributeEncodingChars(string s) => s.AsSpan().IndexOfAny("<\"'&"); - private static bool IsNonAsciiOrSpaceByte(byte b) => b >= 0x7F || b <= 0x20; + private static bool IsNonAsciiOrSpaceByte(byte b) => (uint)b - 0x20 - 1 >= 0x7F - 0x20 - 1; internal static string JavaScriptStringEncode(string? value, bool addDoubleQuotes) { @@ -610,7 +610,7 @@ private static string UrlPathEncodeImpl(string value) } int indexOfQuery = value.IndexOf('?'); - if (indexOfQuery >= 0 && indexOfQuery < i) + if ((uint)indexOfQuery < (uint)i) { // Everything before the Query is valid ASCII return value; diff --git a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/UriUtil.cs b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/UriUtil.cs index 9d66d6192d5194..95f7363f8fb9cc 100644 --- a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/UriUtil.cs +++ b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/UriUtil.cs @@ -40,7 +40,7 @@ internal static bool TrySplitUriForPathEncode(string input, out ReadOnlySpan= 0) { int schemeAndAuthorityLength = authorityIndex + authority.Length; schemeAndAuthority = input.AsSpan(0, schemeAndAuthorityLength); From 82cdaf20472121311a99b8d3653514042c64b351 Mon Sep 17 00:00:00 2001 From: Traian Zaprianov Date: Fri, 31 May 2024 22:21:47 +0300 Subject: [PATCH 3/5] new feedback --- .../src/System/Web/Util/HttpEncoder.cs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs index ff1f6cb0638acc..962d4dfbaa0980 100644 --- a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs +++ b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs @@ -126,8 +126,6 @@ internal static void HtmlEncode(string? value, TextWriter output) private static int IndexOfHtmlAttributeEncodingChars(string s) => s.AsSpan().IndexOfAny("<\"'&"); - private static bool IsNonAsciiOrSpaceByte(byte b) => (uint)b - 0x20 - 1 >= 0x7F - 0x20 - 1; - internal static string JavaScriptStringEncode(string? value, bool addDoubleQuotes) { int i = value.AsSpan().IndexOfAny(s_invalidJavaScriptChars); @@ -621,13 +619,12 @@ private static string UrlPathEncodeImpl(string value) : value.AsSpan(i); byte[] bytes = ArrayPool.Shared.Rent(Encoding.UTF8.GetMaxByteCount(toEncode.Length)); - char[] chars = ArrayPool.Shared.Rent(bytes.Length * 3); - int utf8Length = Encoding.UTF8.GetBytes(toEncode, bytes); + char[] chars = ArrayPool.Shared.Rent(utf8Length * 3); int charCount = 0; foreach (byte b in bytes.AsSpan(0, utf8Length)) { - if (IsNonAsciiOrSpaceByte(b)) + if (!char.IsBetween((char)b, (char)0x21, (char)0x7F)) { chars[charCount++] = '%'; chars[charCount++] = HexConverter.ToCharLower(b >> 4); From fda0521070219d0e227772a3f973f17e44543686 Mon Sep 17 00:00:00 2001 From: Traian Zaprianov Date: Tue, 4 Jun 2024 14:58:56 +0300 Subject: [PATCH 4/5] fix rebase --- .../src/System/Web/Util/HttpEncoder.cs | 52 ------------------- 1 file changed, 52 deletions(-) diff --git a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs index 962d4dfbaa0980..7bdb43b1e91cbc 100644 --- a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs +++ b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs @@ -472,58 +472,6 @@ internal static byte[] UrlEncode(string str, Encoding e) : bytes; } - // Helper to encode the non-ASCII url characters only - private static string UrlEncodeNonAscii(string str, Encoding e) - { - Debug.Assert(!string.IsNullOrEmpty(str)); - Debug.Assert(e != null); - byte[] bytes = e.GetBytes(str); - byte[] encodedBytes = UrlEncodeNonAscii(bytes, 0, bytes.Length); - return Encoding.ASCII.GetString(encodedBytes); - } - - private static byte[] UrlEncodeNonAscii(byte[] bytes, int offset, int count) - { - int cNonAscii = 0; - - // count them first - for (int i = 0; i < count; i++) - { - if (IsNonAsciiByte(bytes[offset + i])) - { - cNonAscii++; - } - } - - // nothing to expand? - if (cNonAscii == 0) - { - return bytes; - } - - // expand not 'safe' characters into %XX, spaces to +s - byte[] expandedBytes = new byte[count + cNonAscii * 2]; - int pos = 0; - - for (int i = 0; i < count; i++) - { - byte b = bytes[offset + i]; - - if (IsNonAsciiByte(b)) - { - expandedBytes[pos++] = (byte)'%'; - expandedBytes[pos++] = (byte)HexConverter.ToCharLower(b >> 4); - expandedBytes[pos++] = (byte)HexConverter.ToCharLower(b); - } - else - { - expandedBytes[pos++] = b; - } - } - - return expandedBytes; - } - [Obsolete("This method produces non-standards-compliant output and has interoperability issues. The preferred alternative is UrlEncode(*).")] [return: NotNullIfNotNull(nameof(value))] internal static string? UrlEncodeUnicode(string? value) From 8cb7c0a19b0cea5d8b2653e77011b64a5355aa51 Mon Sep 17 00:00:00 2001 From: Traian Zaprianov Date: Wed, 12 Jun 2024 16:42:04 +0300 Subject: [PATCH 5/5] Remove HttpEncoderUtility.cs as it is not used anymore --- .../src/System.Web.HttpUtility.csproj | 1 - .../src/System/Web/Util/HttpEncoderUtility.cs | 15 --------------- 2 files changed, 16 deletions(-) delete mode 100644 src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoderUtility.cs diff --git a/src/libraries/System.Web.HttpUtility/src/System.Web.HttpUtility.csproj b/src/libraries/System.Web.HttpUtility/src/System.Web.HttpUtility.csproj index 5da18b0fae6c82..73aa6d15930f72 100644 --- a/src/libraries/System.Web.HttpUtility/src/System.Web.HttpUtility.csproj +++ b/src/libraries/System.Web.HttpUtility/src/System.Web.HttpUtility.csproj @@ -10,7 +10,6 @@ - str != null && str.Contains(' ') ? str.Replace(" ", "%20") : str; - } -}