Skip to content

Commit c0ca2c2

Browse files
AlexRadchteo-tsirpanisstephentoub
authored
Includes type forwarding for System.Text.Unicode.Utf8 to the Microsoft.Bcl.Memory library (#111292)
* Add support for System.Text.Unicode.Utf8 This commit introduces the `System.Text.Unicode.Utf8` type to the `Microsoft.Bcl.Memory` library. It includes type forwarding for `Utf8` in `Microsoft.Bcl.Memory.Forwards.cs`, updates the documentation in `PACKAGE.md` to include `Utf8` functionality, and adds corresponding test cases in `Microsoft.Bcl.Memory.Tests.csproj`. The documentation now emphasizes `Utf8` alongside `Index`, `Range`, and `Base64Url`, highlighting its role in converting data between UTF-8 and UTF-16 encodings. * Enhance UTF-8 and Unicode support in BCL Memory Updated `PackageDescription` to include "Utf8" support. Added new `ItemGroup` for conditional compilation of UTF-8 and Unicode handling files for non-net8.0 frameworks. Modified visibility and implementations in `Ascii.Utility.Helpers.cs`, `Utf8.cs`, and `Utf8Utility` based on `MICROSOFT_BCL_MEMORY` define. * Enhance Unicode handling in tests and project structure Updated `Microsoft.Bcl.Memory.Tests.csproj` to include `UnicodeUtility.cs` and removed .NET 8.0 targeting condition. Modified `Utf8Tests.cs` by adjusting using directives and enhancing the `DecodeHex` method with conditional compilation for .NET 5.0+. * Add compilation constant for Microsoft BCL Memory Added a new property `<DefineConstants>$(DefineConstants);MICROSOFT_BCL_MEMORY</DefineConstants>` to the project file to define a new compilation constant for the project. * Apply suggestions from code review Co-authored-by: Theodore Tsirpanis <[email protected]> * Update Microsoft.Bcl.Memory for framework compatibility - Added polyfill for System.Numerics.BitOperations for .NET Standard 2.0. * Space * Remove MICROSOFT_BCL_MEMORY from project constants Removed the `DefineConstants` property from the project file, which included the constant `MICROSOFT_BCL_MEMORY`. This change may impact conditional compilation within the project. * Refactor Utf8Tests for .NET compatibility improvements * Use `Rune.DecodeFromUtf8` on all frameworks. For downlevel frameworks we add `Rune.cs` to `Microsoft.Bcl.Memory`. (cherry picked from commit 79ee05d) * Fix compile errors. (cherry picked from commit bf6f989) * Address PR feedback. (cherry picked from commit 445a232) --------- Co-authored-by: Theodore Tsirpanis <[email protected]> Co-authored-by: Stephen Toub <[email protected]>
1 parent 1db7a9c commit c0ca2c2

File tree

15 files changed

+451
-21
lines changed

15 files changed

+451
-21
lines changed

src/libraries/Microsoft.Bcl.Memory/src/Microsoft.Bcl.Memory.Forwards.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33

44
[assembly: System.Runtime.CompilerServices.TypeForwardedTo(typeof(System.Index))]
55
[assembly: System.Runtime.CompilerServices.TypeForwardedTo(typeof(System.Range))]
6+
#if NET
7+
[assembly: System.Runtime.CompilerServices.TypeForwardedTo(typeof(System.Text.Unicode.Utf8))]
8+
#endif
69
#if NET9_0_OR_GREATER
710
[assembly: System.Runtime.CompilerServices.TypeForwardedTo(typeof(System.Buffers.Text.Base64Url))]
811
#endif

src/libraries/Microsoft.Bcl.Memory/src/Microsoft.Bcl.Memory.csproj

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
<Project Sdk="Microsoft.NET.Sdk">
1+
<Project Sdk="Microsoft.NET.Sdk">
22

33
<PropertyGroup>
44
<TargetFrameworks>$(NetCoreAppCurrent);$(NetCoreAppPrevious);$(NetCoreAppMinimum);netstandard2.1;netstandard2.0;$(NetFrameworkMinimum)</TargetFrameworks>
55
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
66
<DefineConstants>$(DefineConstants);MICROSOFT_BCL_MEMORY</DefineConstants>
77
<IsPackable>true</IsPackable>
8-
<PackageDescription>Provides Base64Url, Index and Range types support for .NET Framework and .NET Standard.</PackageDescription>
8+
<PackageDescription>Provides Base64Url, Utf8, Index, and Range types support for .NET Framework and .NET Standard.</PackageDescription>
99
</PropertyGroup>
1010

1111
<!-- DesignTimeBuild requires all the TargetFramework Derived Properties to not be present in the first property group. -->
@@ -27,6 +27,22 @@
2727
<Compile Include="$(CoreLibSharedDir)System\Buffers\Text\Base64Url\Base64UrlValidator.cs" Link="System\Buffers\Text\Base64Url\Base64UrlValidator.cs" />
2828
</ItemGroup>
2929

30+
<ItemGroup Condition="'$(TargetFrameworkIdentifier)' != '.NETCoreApp'">
31+
<Compile Include="Polyfills\System.Numerics.BitOperations.netstandard20.cs" />
32+
<Compile Include="System\ThrowHelper.cs" />
33+
<Compile Include="$(CoreLibSharedDir)System\Text\Ascii.Utility.cs" Link="System\Text\Ascii.Utility.cs" />
34+
<Compile Include="$(CoreLibSharedDir)System\Text\Ascii.Utility.Helpers.cs" Link="System\Text\Ascii.Utility.Helpers.cs" />
35+
<Compile Include="$(CoreLibSharedDir)System\Text\Rune.cs" Link="System\Text\Rune.cs" />
36+
<Compile Include="$(CoreLibSharedDir)System\Text\UnicodeDebug.cs" Link="System\Text\UnicodeDebug.cs" />
37+
<Compile Include="$(CoreLibSharedDir)System\Text\UnicodeUtility.cs" Link="System\Text\UnicodeUtility.cs" />
38+
<Compile Include="$(CoreLibSharedDir)System\Text\Unicode\Utf16Utility.cs" Link="System\Text\Unicode\Utf16Utility.cs" />
39+
<Compile Include="$(CoreLibSharedDir)System\Text\Unicode\Utf8.cs" Link="System\Text\Unicode\Utf8.cs" />
40+
<Compile Include="$(CoreLibSharedDir)System\Text\Unicode\Utf8Utility.cs" Link="System\Text\Unicode\Utf8Utility.cs" />
41+
<Compile Include="$(CoreLibSharedDir)System\Text\Unicode\Utf8Utility.Helpers.cs" Link="System\Text\Unicode\Utf8Utility.Helpers.cs" />
42+
<Compile Include="$(CoreLibSharedDir)System\Text\Unicode\Utf8Utility.Transcoding.cs" Link="System\Text\Unicode\Utf8Utility.Transcoding.cs" />
43+
<Compile Include="$(CoreLibSharedDir)System\Text\Unicode\Utf8Utility.Validation.cs" Link="System\Text\Unicode\Utf8Utility.Validation.cs" />
44+
</ItemGroup>
45+
3046
<ItemGroup Condition="!$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'netstandard2.1'))">
3147
<Compile Include="$(CoreLibSharedDir)System\Index.cs" />
3248
<Compile Include="$(CoreLibSharedDir)System\Range.cs" />

src/libraries/Microsoft.Bcl.Memory/src/PACKAGE.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
## About
22

33
Provides `Index` and `Range` types to simplify slicing operations on collections for .NET Framework and .NET Standard 2.0.
4-
Provides `Base64Url` for encoding data in a URL-safe manner on .NET Framework and .NET Standard.
4+
Provides `Base64Url` for encoding data in a URL-safe manner on older .NET platforms.
5+
Provides `Utf8` for converting chunked data between UTF-8 and UTF-16 encodings on .NET Framework and .NET Standard 2.0.
56

67
This library is not necessary nor recommended when targeting versions of .NET that include the relevant support.
78

89
## Key Features
910

1011
<!-- The key features of this package -->
1112

12-
* Enables the use of `Index` and `Range` types on older .NET platforms.
13+
* Enables the use of `Index` and `Range` types on .NET Framework and .NET Standard 2.0.
1314
* Provides `Base64Url` encoding, decoding, and validation for URL-safe data processing on older .NET platforms.
15+
* Provides `Utf8` encoding, decoding, and validation for chunked data between UTF-8 and UTF-16 on .NET Framework and .NET Standard 2.0.
1416

1517
## How to Use
1618

@@ -64,6 +66,7 @@ The main types provided by this library are:
6466
* `System.Index`
6567
* `System.Range`
6668
* `System.Buffers.Text.Base64Url`
69+
* `System.Text.Unicode.Utf8`
6770

6871
## Additional Documentation
6972

@@ -74,6 +77,7 @@ API documentation
7477
* [System.Index](https://learn.microsoft.com/dotnet/api/system.index)
7578
* [System.Range](https://learn.microsoft.com/dotnet/api/system.range)
7679
* [System.Buffers.Text.Base64Url](https://learn.microsoft.com/dotnet/api/system.buffers.text.base64url)
80+
* [System.Text.Unicode.Utf8](https://learn.microsoft.com/dotnet/api/system.text.unicode.utf8)
7781

7882
## Feedback & Contributing
7983

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Runtime.CompilerServices;
5+
using System.Runtime.InteropServices;
6+
7+
// Contains a polyfill implementation of System.Numerics.BitOperations that works on netstandard2.0.
8+
// Implementation copied from:
9+
// https://github.com/dotnet/runtime/blob/82ab89241b90ca3d64b22971f3a1e248da72828a/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs
10+
//
11+
// Some routines inspired by the Stanford Bit Twiddling Hacks by Sean Eron Anderson:
12+
// http://graphics.stanford.edu/~seander/bithacks.html
13+
14+
namespace System.Numerics
15+
{
16+
internal static class BitOperations
17+
{
18+
// C# no-alloc optimization that directly wraps the data section of the dll (similar to string constants)
19+
// https://github.com/dotnet/roslyn/pull/24621
20+
21+
private static ReadOnlySpan<byte> TrailingZeroCountDeBruijn => // 32
22+
[
23+
00, 01, 28, 02, 29, 14, 24, 03,
24+
30, 22, 20, 15, 25, 17, 04, 08,
25+
31, 27, 13, 23, 21, 19, 16, 07,
26+
26, 12, 18, 06, 11, 05, 10, 09
27+
];
28+
29+
/// <summary>
30+
/// Count the number of trailing zero bits in an integer value.
31+
/// Similar in behavior to the x86 instruction TZCNT.
32+
/// </summary>
33+
/// <param name="value">The value.</param>
34+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
35+
public static int TrailingZeroCount(uint value)
36+
{
37+
// Unguarded fallback contract is 0->0, BSF contract is 0->undefined
38+
if (value == 0)
39+
{
40+
return 32;
41+
}
42+
43+
// uint.MaxValue >> 27 is always in range [0 - 31] so we use Unsafe.AddByteOffset to avoid bounds check
44+
return Unsafe.AddByteOffset(
45+
// Using deBruijn sequence, k=2, n=5 (2^5=32) : 0b_0000_0111_0111_1100_1011_0101_0011_0001u
46+
ref MemoryMarshal.GetReference(TrailingZeroCountDeBruijn),
47+
// uint|long -> IntPtr cast on 32-bit platforms does expensive overflow checks not needed here
48+
(IntPtr)(int)(((value & (uint)-(int)value) * 0x077CB531u) >> 27)); // Multi-cast mitigates redundant conv.u8
49+
}
50+
51+
/// <summary>
52+
/// Rotates the specified value left by the specified number of bits.
53+
/// Similar in behavior to the x86 instruction ROL.
54+
/// </summary>
55+
/// <param name="value">The value to rotate.</param>
56+
/// <param name="offset">The number of bits to rotate by.
57+
/// Any value outside the range [0..31] is treated as congruent mod 32.</param>
58+
/// <returns>The rotated value.</returns>
59+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
60+
public static uint RotateLeft(uint value, int offset)
61+
=> (value << offset) | (value >> (32 - offset));
62+
63+
/// <summary>
64+
/// Rotates the specified value right by the specified number of bits.
65+
/// Similar in behavior to the x86 instruction ROR.
66+
/// </summary>
67+
/// <param name="value">The value to rotate.</param>
68+
/// <param name="offset">The number of bits to rotate by.
69+
/// Any value outside the range [0..31] is treated as congruent mod 32.</param>
70+
/// <returns>The rotated value.</returns>
71+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
72+
public static uint RotateRight(uint value, int offset)
73+
=> (value >> offset) | (value << (32 - offset));
74+
}
75+
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Diagnostics;
5+
using System.Diagnostics.CodeAnalysis;
6+
7+
namespace System
8+
{
9+
internal static class ThrowHelper
10+
{
11+
[DoesNotReturn]
12+
internal static void ThrowArgumentException_DestinationTooShort()
13+
{
14+
throw new ArgumentException(SR.Argument_DestinationTooShort, "destination");
15+
}
16+
17+
[DoesNotReturn]
18+
internal static void ThrowArgumentNullException(ExceptionArgument argument)
19+
{
20+
throw new ArgumentNullException(GetArgumentName(argument));
21+
}
22+
23+
[DoesNotReturn]
24+
internal static void ThrowArgumentOutOfRangeException(ExceptionArgument argument)
25+
{
26+
throw new ArgumentOutOfRangeException(GetArgumentName(argument));
27+
}
28+
29+
private static string GetArgumentName(ExceptionArgument argument)
30+
{
31+
switch (argument)
32+
{
33+
case ExceptionArgument.ch:
34+
return nameof(ExceptionArgument.ch);
35+
case ExceptionArgument.culture:
36+
return nameof(ExceptionArgument.culture);
37+
case ExceptionArgument.index:
38+
return nameof(ExceptionArgument.index);
39+
case ExceptionArgument.input:
40+
return nameof(ExceptionArgument.input);
41+
case ExceptionArgument.value:
42+
return nameof(ExceptionArgument.value);
43+
default:
44+
Debug.Fail("The enum value is not defined, please check the ExceptionArgument Enum.");
45+
return "";
46+
47+
}
48+
}
49+
}
50+
51+
//
52+
// The convention for this enum is using the argument name as the enum name
53+
//
54+
internal enum ExceptionArgument
55+
{
56+
ch,
57+
culture,
58+
index,
59+
input,
60+
value,
61+
}
62+
}

src/libraries/Microsoft.Bcl.Memory/tests/Microsoft.Bcl.Memory.Tests.csproj

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@
3030
<Compile Include="..\..\System.Runtime\tests\System.Runtime.Tests\System\IndexTests.cs">
3131
<Link>System\IndexTests.cs</Link>
3232
</Compile>
33+
<Compile Include="$(CoreLibSharedDir)System\Text\UnicodeDebug.cs" Link="System\Text\UnicodeDebug.cs" />
34+
<Compile Include="$(CoreLibSharedDir)System\Text\UnicodeUtility.cs" Link="System\Text\UnicodeUtility.cs" />
35+
<Compile Include="..\..\System.Runtime\tests\System.Runtime.Tests\System\Text\Unicode\Utf8Tests.cs">
36+
<Link>System\Text\Unicode\Utf8Tests.cs</Link>
37+
</Compile>
3338
</ItemGroup>
3439

3540
<ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETFramework'">

src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.Helpers.cs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,16 @@
44
using System.Diagnostics;
55
using System.Numerics;
66
using System.Runtime.CompilerServices;
7+
using System.Runtime.InteropServices;
78

89
namespace System.Text
910
{
10-
public static partial class Ascii
11+
#if SYSTEM_PRIVATE_CORELIB
12+
public
13+
#else
14+
internal
15+
#endif
16+
static partial class Ascii
1117
{
1218
/// <summary>
1319
/// A mask which selects only the high bit of each byte of the given <see cref="uint"/>.

0 commit comments

Comments
 (0)