diff --git a/src/libraries/System.Memory/ref/System.Memory.cs b/src/libraries/System.Memory/ref/System.Memory.cs index 66b95a12b5c25c..bc4e19712a310d 100644 --- a/src/libraries/System.Memory/ref/System.Memory.cs +++ b/src/libraries/System.Memory/ref/System.Memory.cs @@ -350,6 +350,10 @@ public static void Sort(this System.Span keys, System.Span(this System.Span keys, System.Span items, System.Comparison comparison) { } public static void Sort(this System.Span span, TComparer comparer) where TComparer : System.Collections.Generic.IComparer? { } public static void Sort(this System.Span keys, System.Span items, TComparer comparer) where TComparer : System.Collections.Generic.IComparer? { } + public static System.MemoryExtensions.SpanSplitEnumerator Split(this System.ReadOnlySpan source, T separator) where T : IEquatable { throw null; } + public static System.MemoryExtensions.SpanSplitEnumerator Split(this System.ReadOnlySpan source, System.ReadOnlySpan separator) where T : IEquatable { throw null; } + public static System.MemoryExtensions.SpanSplitEnumerator SplitAny(this System.ReadOnlySpan source, [System.Diagnostics.CodeAnalysis.UnscopedRef] params System.ReadOnlySpan separators) where T : IEquatable { throw null; } + public static System.MemoryExtensions.SpanSplitEnumerator SplitAny(this System.ReadOnlySpan source, System.Buffers.SearchValues separators) where T : IEquatable { throw null; } public static int Split(this System.ReadOnlySpan source, System.Span destination, char separator, System.StringSplitOptions options = System.StringSplitOptions.None) { throw null; } public static int Split(this System.ReadOnlySpan source, System.Span destination, System.ReadOnlySpan separator, System.StringSplitOptions options = System.StringSplitOptions.None) { throw null; } public static int SplitAny(this System.ReadOnlySpan source, System.Span destination, System.ReadOnlySpan separators, System.StringSplitOptions options = System.StringSplitOptions.None) { throw null; } @@ -411,6 +415,14 @@ public static void Sort(this System.Span keys, Sy public static bool TryWrite(this System.Span destination, System.IFormatProvider? provider, System.Text.CompositeFormat format, out int charsWritten, TArg0 arg0, TArg1 arg1, TArg2 arg2) { throw null; } public static bool TryWrite(this Span destination, System.IFormatProvider? provider, System.Text.CompositeFormat format, out int charsWritten, params object?[] args) { throw null; } public static bool TryWrite(this Span destination, System.IFormatProvider? provider, System.Text.CompositeFormat format, out int charsWritten, params System.ReadOnlySpan args) { throw null; } + public ref struct SpanSplitEnumerator where T : System.IEquatable + { + private object _dummy; + private int _dummyPrimitive; + public readonly System.Range Current { get { throw null; } } + public System.MemoryExtensions.SpanSplitEnumerator GetEnumerator() { throw null; } + public bool MoveNext() { throw null; } + } [System.ComponentModel.EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] [System.Runtime.CompilerServices.InterpolatedStringHandlerAttribute] public ref struct TryWriteInterpolatedStringHandler diff --git a/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs b/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs new file mode 100644 index 00000000000000..740e73c50e5f8f --- /dev/null +++ b/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs @@ -0,0 +1,214 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers; +using Xunit; + +namespace System.SpanTests +{ + public static partial class ReadOnlySpanTests + { + [Fact] + public static void DefaultSpanSplitEnumeratorBehaviour() + { + var charSpanEnumerator = new MemoryExtensions.SpanSplitEnumerator(); + Assert.Equal(new Range(0, 0), charSpanEnumerator.Current); + Assert.False(charSpanEnumerator.MoveNext()); + + // Implicit DoesNotThrow assertion + charSpanEnumerator.GetEnumerator(); + + var stringSpanEnumerator = new MemoryExtensions.SpanSplitEnumerator(); + Assert.Equal(new Range(0, 0), stringSpanEnumerator.Current); + Assert.False(stringSpanEnumerator.MoveNext()); + stringSpanEnumerator.GetEnumerator(); + } + + [Fact] + public static void Split_SingleElementSeparator() + { + // Split on default + Test((char[])['a', ' ', 'b'], default(char), (Range[])[0..3]); + Test((int[]) [1, 2, 3], default(int), (Range[])[0..3]); + Test((long[])[1, 2, 3], default(long), (Range[])[0..3]); + Test((byte[])[1, 2, 3], default(byte), (Range[])[0..3]); + Test((CustomStruct[])[new(1), new(2), new(3)], default(CustomStruct), (Range[])[0..3]); + Test((CustomClass[])[new(1), new(2), new(3)], default(CustomClass), (Range[])[0..3]); + + // Split no matching element + Test((char[])['a', ' ', 'b'], ',', (Range[])[0..3] ); + Test((int[]) [1, 2, 3], (int)4, (Range[])[0..3] ); + Test((long[])[1, 2, 3], (long)4, (Range[])[0..3] ); + Test((byte[])[1, 2, 3], (byte)4, (Range[])[0..3] ); + Test((CustomStruct[])[new(1), new(2), new(3)], new CustomStruct(4), (Range[])[0..3] ); + Test((CustomClass[])[new(1), new(2), new(3)], new CustomClass(4), (Range[])[0..3] ); + + // Split on sequence containing only a separator + Test((char[])[','], ',', (Range[])[0..0, 1..1] ); + Test((int[]) [1], (int)1, (Range[])[0..0, 1..1] ); + Test((long[])[1], (long)1, (Range[])[0..0, 1..1] ); + Test((byte[])[1], (byte)1, (Range[])[0..0, 1..1] ); + Test((CustomStruct[])[new(1)], new CustomStruct(1), (Range[])[0..0, 1..1] ); + Test((CustomClass[]) [new(1)], new CustomClass(1), (Range[])[0..0, 1..1] ); + + // Split on empty sequence with default separator + Test((char[])[], default(char), (Range[])[0..0] ); + Test((int[]) [], default(int), (Range[])[0..0] ); + Test((long[])[], default(long), (Range[])[0..0] ); + Test((byte[])[], default(byte), (Range[])[0..0] ); + Test((CustomStruct[])[], default(CustomStruct), (Range[])[0..0] ); + Test((CustomClass[]) [], default(CustomClass), (Range[])[0..0] ); + + Test((char[])['a', ',', 'b'], ',', (Range[]) [ 0..1, 2..3 ] ); + Test((int[]) [1, 2, 3], (int)2, (Range[]) [ 0..1, 2..3 ] ); + Test((long[])[1, 2, 3], (long)2, (Range[]) [ 0..1, 2..3 ] ); + Test((byte[])[1, 2, 3], (byte)2, (Range[]) [ 0..1, 2..3 ] ); + Test((CustomStruct[])[new(1), new(2), new(3)], new CustomStruct(2), (Range[]) [ 0..1, 2..3 ] ); + Test((CustomClass[])[new(1), new(2), new(3)], new CustomClass(2), (Range[]) [ 0..1, 2..3 ] ); + + Test((char[])['a', 'b', ',', ','], ',', (Range[]) [ 0..2, 3..3, 4..4 ] ); + Test((int[]) [1, 3, 2, 2], (int)2, (Range[]) [ 0..2, 3..3, 4..4 ] ); + Test((long[])[1, 3, 2, 2], (long)2, (Range[]) [ 0..2, 3..3, 4..4 ] ); + Test((byte[])[1, 3, 2, 2], (byte)2, (Range[]) [ 0..2, 3..3, 4..4 ] ); + Test((CustomStruct[])[new(1), new(3), new(2), new(2)], new CustomStruct(2), (Range[]) [ 0..2, 3..3, 4..4 ] ); + Test((CustomClass[])[new(1), new(3), new(2), new(2)], new CustomClass(2), (Range[]) [ 0..2, 3..3, 4..4 ] ); + + static void Test(T[] value, T separator, Range[] result) where T : IEquatable => + AssertEnsureCorrectEnumeration(new ReadOnlySpan(value).Split(separator), result); + } + + [Fact] + public static void Split_SequenceSeparator() + { + // Split no separators + Test((char[])['a', ' ', 'b'], (char[])[], (Range[])[0..3]); + Test((int[]) [1, 2, 3], (int[]) [], (Range[])[0..3]); + Test((long[])[1, 2, 3], (long[])[], (Range[])[0..3]); + Test((byte[])[1, 2, 3], (byte[])[], (Range[])[0..3]); + Test((CustomStruct[])[new(1), new(2), new(3)], (CustomStruct[])[], (Range[])[0..3]); + Test((CustomClass[])[new(1), new(2), new(3)], (CustomClass[])[], (Range[])[0..3]); + + // Split no matching elements + Test((char[])['a', ' ', 'b'], (char[])[',', '.'], (Range[])[0..3]); + Test((int[]) [1, 2, 3], (int[]) [4, 3], (Range[])[0..3]); + Test((long[])[1, 2, 3], (long[])[4, 3], (Range[])[0..3]); + Test((byte[])[1, 2, 3], (byte[])[4, 3], (Range[])[0..3]); + Test((CustomStruct[])[new(1), new(2), new(3)], (CustomStruct[])[new(4), new(3)], (Range[])[0..3]); + Test((CustomClass[])[new(1), new(2), new(3)], (CustomClass[])[new(4), new(3)], (Range[])[0..3]); + + // Split on input span with only a single sequence separator + Test((char[])[',', '.'], (char[])[',', '.'], (Range[])[0..0, 2..2]); + Test((int[]) [4, 3], (int[]) [4, 3], (Range[])[0..0, 2..2]); + Test((long[])[4, 3], (long[])[4, 3], (Range[])[0..0, 2..2]); + Test((byte[])[4, 3], (byte[])[4, 3], (Range[])[0..0, 2..2]); + Test((CustomStruct[])[new(4), new(3)], (CustomStruct[])[new(4), new(3)], (Range[])[0..0, 2..2]); + Test((CustomClass[])[new(4), new(3)], (CustomClass[])[new(4), new(3)], (Range[])[0..0, 2..2]); + + // Split on empty sequence with default separator + Test((char[])[], (char[])[default(char)], (Range[])[0..0]); + Test((int[]) [], (int[]) [default(int)], (Range[])[0..0]); + Test((long[])[], (long[])[default(long)], (Range[])[0..0]); + Test((byte[])[], (byte[])[default(byte)], (Range[])[0..0]); + Test((CustomStruct[])[], (CustomStruct[])[default], (Range[])[0..0]); + Test((CustomClass[]) [], (CustomClass[])[default], (Range[])[0..0]); + + Test((char[])['a', ',', '-', 'b'], (char[])[',', '-'], (Range[]) [ 0..1, 3..4 ]); + Test((int[]) [1, 2, 4, 3], (int[])[2, 4], (Range[]) [ 0..1, 3..4 ]); + Test((long[])[1, 2, 4, 3], (long[])[2, 4], (Range[]) [ 0..1, 3..4 ]); + Test((byte[])[1, 2, 4, 3], (byte[])[2, 4], (Range[]) [ 0..1, 3..4 ]); + Test((CustomStruct[])[new(1), new(2), new(4), new(3)], (CustomStruct[]) [new(2), new(4)], (Range[]) [ 0..1, 3..4 ]); + Test((CustomClass[])[new(1), new(2), new(4), new(3)], (CustomClass[])[new(2), new(4)], (Range[]) [ 0..1, 3..4 ]); + + Test((char[])[',', '-', 'a', ',', '-', 'b'], (char[])[',', '-'], (Range[]) [ 0..0, 2..3, 5..6 ]); + Test((int[]) [2, 4, 3, 2, 4, 5], (int[]) [2, 4], (Range[]) [ 0..0, 2..3, 5..6 ]); + Test((long[])[2, 4, 3, 2, 4, 5], (long[])[2, 4], (Range[]) [ 0..0, 2..3, 5..6 ]); + Test((byte[])[2, 4, 3, 2, 4, 5], (byte[])[2, 4], (Range[]) [ 0..0, 2..3, 5..6 ]); + Test((CustomStruct[])[new(2), new(4), new(3), new(2), new(4), new(5)], (CustomStruct[]) [new(2), new(4)], (Range[]) [ 0..0, 2..3, 5..6 ]); + Test((CustomClass[])[new(2), new(4), new(3), new(2), new(4), new(5)], (CustomClass[])[new(2), new(4)], (Range[]) [ 0..0, 2..3, 5..6 ]); + + static void Test(T[] value, T[] separator, Range[] result) where T : IEquatable => + AssertEnsureCorrectEnumeration(new ReadOnlySpan(value).Split(separator), result); + } + + [Fact] + public static void SplitAnySeparatorData() + { + // Split no separators + Test((char[])['a', ' ', 'b'], (char[])[], (Range[])[0..1, 2..3]); // an empty span of separators for char is handled as all whitespace being separators + Test((int[]) [1, 2, 3], (int[]) [], (Range[])[0..3]); + Test((long[])[1, 2, 3], (long[])[], (Range[])[0..3]); + Test((byte[])[1, 2, 3], (byte[])[], (Range[])[0..3]); + Test((CustomStruct[])[new(1), new(2), new(3)], (CustomStruct[])[], (Range[])[0..3]); + Test((CustomClass[])[new(1), new(2), new(3)], (CustomClass[])[], (Range[])[0..3]); + + // Split non-matching separators + Test((char[])['a', ' ', 'b'], (char[])[',', '.'], (Range[])[0..3]); + Test((int[]) [1, 2, 3], (int[]) [4, 5], (Range[])[0..3]); + Test((long[])[1, 2, 3], (long[])[4, 5], (Range[])[0..3]); + Test((byte[])[1, 2, 3], (byte[])[4, 5], (Range[])[0..3]); + Test((CustomStruct[])[new(1), new(2), new(3)], (CustomStruct[])[new(4), new(5)], (Range[])[0..3]); + Test((CustomClass[])[new(1), new(2), new(3)], (CustomClass[])[new(4), new(5)], (Range[])[0..3]); + + // Split on sequence containing only a separator + Test((char[])[','], (char[])[','], (Range[])[0..0, 1..1]); + Test((int[]) [1], (int[]) [1], (Range[])[0..0, 1..1]); + Test((long[])[1], (long[])[1], (Range[])[0..0, 1..1]); + Test((byte[])[1], (byte[])[1], (Range[])[0..0, 1..1]); + Test((CustomStruct[])[new(1)], (CustomStruct[])[new(1)], (Range[])[0..0, 1..1]); + Test((CustomClass[]) [new(1)], (CustomClass[])[new(1)], (Range[])[0..0, 1..1]); + + // Split on empty sequence with default separator + Test((char[])[], (char[])[default(char)], (Range[])[0..0]); + Test((int[]) [], (int[]) [default(int)], (Range[])[0..0]); + Test((long[])[], (long[])[default(long)], (Range[])[0..0]); + Test((byte[])[], (byte[])[default(byte)], (Range[])[0..0]); + Test((CustomStruct[])[], (CustomStruct[])[new(default)], (Range[])[0..0]); + Test((CustomClass[]) [], (CustomClass[])[new(default)], (Range[])[0..0]); + + Test((char[])['a', ',', '-', 'b'], (char[])[',', '-'], (Range[]) [ 0..1, 2..2, 3..4 ]); + Test((int[]) [1, 2, 4, 3], (int[])[2, 4], (Range[]) [ 0..1, 2..2, 3..4 ]); + Test((long[])[1, 2, 4, 3], (long[])[2, 4], (Range[]) [ 0..1, 2..2, 3..4 ]); + Test((byte[])[1, 2, 4, 3], (byte[])[2, 4], (Range[]) [ 0..1, 2..2, 3..4 ]); + Test((CustomStruct[])[new(1), new(2), new(4), new(3)], (CustomStruct[]) [new(2), new(4)], (Range[]) [ 0..1, 2..2, 3..4 ]); + Test((CustomClass[])[new(1), new(2), new(4), new(3)], (CustomClass[])[new(2), new(4)], (Range[]) [ 0..1, 2..2, 3..4 ]); + + Test((char[])[',', '-', 'a', ',', '-', 'b'], (char[])[',', '-'], (Range[]) [ 0..0, 1..1, 2..3, 4..4, 5..6 ]); + Test((int[]) [2, 4, 3, 2, 4, 5], (int[]) [2, 4], (Range[]) [ 0..0, 1..1, 2..3, 4..4, 5..6 ]); + Test((long[])[2, 4, 3, 2, 4, 5], (long[])[2, 4], (Range[]) [ 0..0, 1..1, 2..3, 4..4, 5..6 ]); + Test((byte[])[2, 4, 3, 2, 4, 5], (byte[])[2, 4], (Range[]) [ 0..0, 1..1, 2..3, 4..4, 5..6 ]); + Test((CustomStruct[])[new(2), new(4), new(3), new(2), new(4), new(5)], (CustomStruct[]) [new(2), new(4)], (Range[]) [ 0..0, 1..1, 2..3, 4..4, 5..6 ]); + Test((CustomClass[])[new(2), new(4), new(3), new(2), new(4), new(5)], (CustomClass[])[new(2), new(4)], (Range[]) [ 0..0, 1..1, 2..3, 4..4, 5..6 ]); + + static void Test(T[] value, T[] separator, Range[] result) where T : IEquatable + { + AssertEnsureCorrectEnumeration(new ReadOnlySpan(value).SplitAny(separator), result); + + if (value is char[] source && + separator is char[] separators && + separators.Length > 0) // the SearchValues overload does not special-case empty + { + var charEnumerator = new ReadOnlySpan(source).SplitAny(SearchValues.Create(separators)); + AssertEnsureCorrectEnumeration(charEnumerator, result); + } + } + } + + private static void AssertEnsureCorrectEnumeration(MemoryExtensions.SpanSplitEnumerator enumerator, Range[] result) where T : IEquatable + { + Assert.Equal(new Range(0, 0), enumerator.Current); + + for (int i = 0; i < result.Length; i++) + { + Assert.True(enumerator.MoveNext()); + Assert.Equal(result[i], enumerator.Current); + } + + Assert.False(enumerator.MoveNext()); + } + + public record struct CustomStruct(int value) : IEquatable; + + public record class CustomClass(int value) : IEquatable; + } +} diff --git a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj index 319edd9039f8f4..38b40b284d1b25 100644 --- a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj +++ b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj @@ -186,6 +186,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs index dccae42b3ef1ab..95ece6f6cb52c8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs @@ -3637,6 +3637,63 @@ private static void SliceLongerSpanToMatchShorterLength(ref ReadOnlySpan s Debug.Assert(span.Length == other.Length); } + /// + /// Returns a type that allows for enumeration of each element within a split span + /// using the provided separator character. + /// + /// The type of the elements. + /// The source span to be enumerated. + /// The separator character to be used to split the provided span. + /// Returns a . + public static SpanSplitEnumerator Split(this ReadOnlySpan source, T separator) where T : IEquatable => + new SpanSplitEnumerator(source, separator); + + /// + /// Returns a type that allows for enumeration of each element within a split span + /// using the provided separator span. + /// + /// The type of the elements. + /// The source span to be enumerated. + /// The separator span to be used to split the provided span. + /// Returns a . + public static SpanSplitEnumerator Split(this ReadOnlySpan source, ReadOnlySpan separator) where T : IEquatable => + new SpanSplitEnumerator(source, separator, treatAsSingleSeparator: true); + + /// + /// Returns a type that allows for enumeration of each element within a split span + /// using any of the provided elements. + /// + /// The type of the elements. + /// The source span to be enumerated. + /// The separators to be used to split the provided span. + /// Returns a . + /// + /// If is and if is empty, + /// all Unicode whitespace characters are used as the separators. This matches the behavior of when + /// and related overloads are used with an empty separator array, + /// or when + /// is used with an empty separator span. + /// + public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, [UnscopedRef] params ReadOnlySpan separators) where T : IEquatable => + new SpanSplitEnumerator(source, separators); + + /// + /// Returns a type that allows for enumeration of each element within a split span + /// using the provided . + /// + /// The type of the elements. + /// The source span to be enumerated. + /// The to be used to split the provided span. + /// Returns a . + /// + /// Unlike , the is not checked for being empty. + /// An empty will result in no separators being found, regardless of the type of , + /// whereas will use all Unicode whitespace characters as separators if is + /// empty and is . + /// + public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, SearchValues separators) where T : IEquatable => + new SpanSplitEnumerator(source, separators); + /// /// Parses the source for the specified , populating the span /// with instances representing the regions between the separators. @@ -4252,6 +4309,174 @@ private static bool TryWrite(Span destination, IForma return TryWrite(destination, provider, ref handler, out charsWritten); } + /// + /// Enables enumerating each split within a that has been divided using one or more separators. + /// + public ref struct SpanSplitEnumerator where T : IEquatable + { + /// The input span being split. + private readonly ReadOnlySpan _span; + + /// A single separator to use when is . + private readonly T _separator = default!; + /// + /// A separator span to use when is (in which case + /// it's treated as a single separator) or (in which case it's treated as a set of separators). + /// + private readonly ReadOnlySpan _separatorBuffer; + /// A set of separators to use when is . + private readonly SearchValues _searchValues = default!; + + /// Mode that dictates how the instance was configured and how its fields should be used in . + private SpanSplitEnumeratorMode _splitMode; + /// The inclusive starting index in of the current range. + private int _startCurrent = 0; + /// The exclusive ending index in of the current range. + private int _endCurrent = 0; + /// The index in from which the next separator search should start. + private int _startNext = 0; + + /// Gets an enumerator that allows for iteration over the split span. + /// Returns a that can be used to iterate over the split span. + public SpanSplitEnumerator GetEnumerator() => this; + + /// Gets the current element of the enumeration. + /// Returns a instance that indicates the bounds of the current element withing the source span. + public Range Current => new Range(_startCurrent, _endCurrent); + + /// Initializes the enumerator for . + internal SpanSplitEnumerator(ReadOnlySpan span, SearchValues searchValues) + { + _span = span; + _splitMode = SpanSplitEnumeratorMode.SearchValues; + _searchValues = searchValues; + } + + /// Initializes the enumerator for . + /// + /// If is empty and is , as an optimization + /// it will instead use with a cached + /// for all whitespace characters. + /// + internal SpanSplitEnumerator(ReadOnlySpan span, ReadOnlySpan separators) + { + _span = span; + if (typeof(T) == typeof(char) && separators.Length == 0) + { + _searchValues = Unsafe.As>(string.SearchValuesStorage.WhiteSpaceChars); + _splitMode = SpanSplitEnumeratorMode.SearchValues; + } + else + { + _separatorBuffer = separators; + _splitMode = SpanSplitEnumeratorMode.Any; + } + } + + /// Initializes the enumerator for (or if the separator is empty). + /// must be true. + internal SpanSplitEnumerator(ReadOnlySpan span, ReadOnlySpan separator, bool treatAsSingleSeparator) + { + Debug.Assert(treatAsSingleSeparator, "Should only ever be called as true; exists to differentiate from separators overload"); + + _span = span; + _separatorBuffer = separator; + _splitMode = separator.Length == 0 ? + SpanSplitEnumeratorMode.EmptySequence : + SpanSplitEnumeratorMode.Sequence; + } + + /// Initializes the enumerator for . + internal SpanSplitEnumerator(ReadOnlySpan span, T separator) + { + _span = span; + _separator = separator; + _splitMode = SpanSplitEnumeratorMode.SingleElement; + } + + /// + /// Advances the enumerator to the next element of the enumeration. + /// + /// if the enumerator was successfully advanced to the next element; if the enumerator has passed the end of the enumeration. + public bool MoveNext() + { + // Search for the next separator index. + int separatorIndex, separatorLength; + switch (_splitMode) + { + case SpanSplitEnumeratorMode.None: + return false; + + case SpanSplitEnumeratorMode.SingleElement: + separatorIndex = _span.Slice(_startNext).IndexOf(_separator); + separatorLength = 1; + break; + + case SpanSplitEnumeratorMode.Any: + separatorIndex = _span.Slice(_startNext).IndexOfAny(_separatorBuffer); + separatorLength = 1; + break; + + case SpanSplitEnumeratorMode.Sequence: + separatorIndex = _span.Slice(_startNext).IndexOf(_separatorBuffer); + separatorLength = _separatorBuffer.Length; + break; + + case SpanSplitEnumeratorMode.EmptySequence: + separatorIndex = -1; + separatorLength = 1; + break; + + default: + Debug.Assert(_splitMode == SpanSplitEnumeratorMode.SearchValues, $"Unknown split mode: {_splitMode}"); + separatorIndex = _span.Slice(_startNext).IndexOfAny(_searchValues); + separatorLength = 1; + break; + } + + _startCurrent = _startNext; + if (separatorIndex >= 0) + { + _endCurrent = _startCurrent + separatorIndex; + _startNext = _endCurrent + separatorLength; + } + else + { + _startNext = _endCurrent = _span.Length; + + // Set _splitMode to None so that subsequent MoveNext calls will return false. + _splitMode = SpanSplitEnumeratorMode.None; + } + + return true; + } + } + + /// Indicates in which mode is operating, with regards to how it should interpret its state. + private enum SpanSplitEnumeratorMode + { + /// Either a default was used, or the enumerator has finished enumerating and there's no more work to do. + None = 0, + + /// A single T separator was provided. + SingleElement, + + /// A span of separators was provided, each of which should be treated independently. + Any, + + /// The separator is a span of elements to be treated as a single sequence. + Sequence, + + /// The separator is an empty sequence, such that no splits should be performed. + EmptySequence, + + /// + /// A was provided and should behave the same as with but with the separators in the + /// instance instead of in a . + /// + SearchValues + } + /// Provides a handler used by the language compiler to format interpolated strings into character spans. [EditorBrowsable(EditorBrowsableState.Never)] [InterpolatedStringHandler] diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index 3d421f35d99a84..707b25442029a4 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -33,6 +33,21 @@ internal static class SearchValuesStorage /// public static readonly SearchValues NewLineChars = SearchValues.Create(NewLineCharsExceptLineFeed + "\n"); + + /// A for all of the Unicode whitespace characters + public static readonly SearchValues WhiteSpaceChars = + SearchValues.Create("\t\n\v\f\r\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"); + +#if DEBUG + static SearchValuesStorage() + { + SearchValues sv = WhiteSpaceChars; + for (int i = 0; i <= char.MaxValue; i++) + { + Debug.Assert(char.IsWhiteSpace((char)i) == sv.Contains((char)i)); + } + } +#endif } internal const int StackallocIntBufferSizeLimit = 128; diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs index 037e7a98a95878..3ed49c1ef14921 100644 --- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs +++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Collections.Generic; using System.Linq; using Xunit; @@ -132,6 +133,7 @@ public static void SplitOneCountSingleResult() public static void SplitNoMatchSingleResult() { const string Value = "a b"; + ReadOnlySpan SpanValue = "a b"; const int Count = int.MaxValue; const StringSplitOptions Options = StringSplitOptions.None; @@ -152,17 +154,22 @@ public static void SplitNoMatchSingleResult() Assert.Equal(expected, Value.Split(new[] { "," }, Count, Options)); Range[] ranges = new Range[10]; - Assert.Equal(1, Value.AsSpan().Split(ranges, ',', Options)); + Assert.Equal(1, SpanValue.Split(ranges, ',', Options)); Assert.Equal(0..3, ranges[0]); Array.Clear(ranges); - Assert.Equal(1, Value.AsSpan().Split(ranges, ",", Options)); + Assert.Equal(1, SpanValue.Split(ranges, ",", Options)); Assert.Equal(0..3, ranges[0]); Array.Clear(ranges); - Assert.Equal(1, Value.AsSpan().SplitAny(ranges, ",", Options)); + Assert.Equal(1, SpanValue.SplitAny(ranges, ",", Options)); Assert.Equal(0..3, ranges[0]); Array.Clear(ranges); + + AssertEqual(expected, SpanValue, SpanValue.Split(',')); + AssertEqual(expected, SpanValue, SpanValue.Split(",")); + AssertEqual(expected, SpanValue, SpanValue.SplitAny(',')); + AssertEqual(expected, SpanValue, SpanValue.SplitAny(Buffers.SearchValues.Create([',']))); } private const int M = int.MaxValue; @@ -519,6 +526,10 @@ public static void SplitCharSeparator(string value, char separator, int count, S Assert.Equal(expected, value.Split(new[] { separator })); Assert.Equal(expected, value.Split((ReadOnlySpan)new[] { separator })); Assert.Equal(expected, value.Split(separator.ToString())); + + AssertEqual(expected, value.AsSpan(), value.AsSpan().Split(separator)); + AssertEqual(expected, value.AsSpan(), value.AsSpan().SplitAny([separator])); + AssertEqual(expected, value.AsSpan(), value.AsSpan().SplitAny(Buffers.SearchValues.Create([separator]))); } Range[] ranges = new Range[count == int.MaxValue ? value.Length + 1 : count]; @@ -576,6 +587,7 @@ public static void SplitStringSeparator(string value, string separator, int coun if (count == int.MaxValue && options == StringSplitOptions.None) { Assert.Equal(expected, value.Split(separator)); + AssertEqual(expected, value.AsSpan(), value.AsSpan().Split(separator)); } Range[] ranges = new Range[count == int.MaxValue ? value.Length + 1 : count]; @@ -634,6 +646,15 @@ public static void SplitCharArraySeparator(string value, char[] separators, int Range[] ranges = new Range[count == int.MaxValue ? value.Length + 1 : count]; Assert.Equal(expected.Length, value.AsSpan().SplitAny(ranges, separators, options)); Assert.Equal(expected, ranges.Take(expected.Length).Select(r => value[r]).ToArray()); + + if (count == int.MaxValue && options is StringSplitOptions.None) + { + AssertEqual(expected, value.AsSpan(), value.AsSpan().SplitAny(separators)); + if (separators is { Length: > 0 }) // the SearchValues overload doesn't special-case empty to mean whitespace + { + AssertEqual(expected, value.AsSpan(), value.AsSpan().SplitAny(Buffers.SearchValues.Create(separators))); + } + } } [Theory] @@ -683,6 +704,11 @@ public static void SplitStringArraySeparator(string value, string[] separators, Range[] ranges = new Range[count == int.MaxValue ? value.Length + 1 : count]; Assert.Equal(expected.Length, value.AsSpan().SplitAny(ranges, separators, options)); Assert.Equal(expected, ranges.Take(expected.Length).Select(r => value[r]).ToArray()); + + if (separators is { Length: 1 } && count == int.MaxValue && options == StringSplitOptions.None) + { + AssertEqual(expected, value, value.AsSpan().Split(separators[0])); + } } private static string[] ToStringArray(char[] source) @@ -697,5 +723,16 @@ private static string[] ToStringArray(char[] source) } return result; } + + private static void AssertEqual(string[] items, ReadOnlySpan source, MemoryExtensions.SpanSplitEnumerator enumerator) + { + foreach (string item in items) + { + Assert.True(enumerator.MoveNext()); + Assert.Equal(item, source[enumerator.Current].ToString()); + } + + Assert.False(enumerator.MoveNext()); + } } }