Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/libraries/System.Data.Odbc/src/System.Data.Odbc.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ System.Data.Odbc.OdbcTransaction</PackageDescription>
Link="Common\DisableRuntimeMarshalling.cs" />
<Compile Include="$(CommonPath)System\Runtime\InteropServices\HandleRefMarshaller.cs"
Link="Common\System\Runtime\InteropServices\HandleRefMarshaller.cs" />
</ItemGroup>

<ItemGroup Condition="$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net8.0'))">
<ProjectReference Include="..\..\System.Text.RegularExpressions\gen\System.Text.RegularExpressions.Generator.csproj"
SetTargetFramework="TargetFramework=netstandard2.0"
OutputItemType="Analyzer"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ System.Data.OleDb.OleDbTransaction</PackageDescription>
<ProjectReference Include="$(LibrariesProjectRoot)System.Diagnostics.PerformanceCounter\src\System.Diagnostics.PerformanceCounter.csproj" />
</ItemGroup>

<ItemGroup Condition="$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net7.0'))">
<ItemGroup Condition="$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net8.0'))">
<ProjectReference Include="..\..\System.Text.RegularExpressions\gen\System.Text.RegularExpressions.Generator.csproj"
SetTargetFramework="TargetFramework=netstandard2.0"
OutputItemType="Analyzer"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -458,14 +458,15 @@ private static string EmitIndexOfAnyCustomHelper(string set, Dictionary<string,
// characters _not_ contained in the set, and then do a search for the inverse of that, which will be
// all of the target ASCII characters and all of non-ASCII.
var asciiChars = new List<char>();
for (int i = 0; i <= 0x7f; i++)
for (int i = 0; i < 128; i++)
{
if (!RegexCharClass.CharInClass((char)i, set))
{
asciiChars.Add((char)i);
}
}

// If this is a known set, use a predetermined simple name for the helper.
string? helperName = set switch
{
RegexCharClass.DigitClass => "IndexOfAnyDigit",
Expand Down Expand Up @@ -496,6 +497,18 @@ private static string EmitIndexOfAnyCustomHelper(string set, Dictionary<string,

_ => null,
};

// If this set is just from a few Unicode categories, derive a name from the categories.
if (helperName is null)
{
Span<UnicodeCategory> categories = stackalloc UnicodeCategory[5]; // arbitrary limit to keep names from being too unwieldy
if (RegexCharClass.TryGetOnlyCategories(set, categories, out int numCategories, out bool negatedCategory))
{
helperName = $"IndexOfAny{(negatedCategory ? "Except" : "")}{string.Concat(categories.Slice(0, numCategories).ToArray().Select(c => c.ToString()))}";
}
}

// As a final fallback, manufacture a name unique to the full set description.
if (helperName is null)
{
using (SHA256 sha = SHA256.Create())
Expand All @@ -522,7 +535,7 @@ private static string EmitIndexOfAnyCustomHelper(string set, Dictionary<string,
$" int i = span.IndexOfAnyExcept({EmitIndexOfAnyValues(asciiChars.ToArray(), requiredHelpers)});");
lines.Add($" if ((uint)i < (uint)span.Length)");
lines.Add($" {{");
lines.Add($" if (span[i] <= 0x7f)");
lines.Add($" if (char.IsAscii(span[i]))");
lines.Add($" {{");
lines.Add($" return i;");
lines.Add($" }}");
Expand Down Expand Up @@ -1004,9 +1017,13 @@ void EmitFixedSet_LeftToRight()

// Use IndexOf{Any} to accelerate the skip loop via vectorization to match the first prefix.
// But we avoid using it for the relatively common case of the starting set being '.', aka anything other than
// a newline, as it's very rare to have long, uninterrupted sequences of newlines.
// a newline, as it's very rare to have long, uninterrupted sequences of newlines. And we avoid using it
// for the case of the starting set being anything (e.g. '.' with SingleLine), as in that case it'll always match
// the first char.
int setIndex = 0;
bool canUseIndexOf = primarySet.Set != RegexCharClass.NotNewLineClass;
bool canUseIndexOf =
primarySet.Set != RegexCharClass.NotNewLineClass &&
primarySet.Set != RegexCharClass.AnyClass;
bool needLoop = !canUseIndexOf || setsToUse > 1;

FinishEmitBlock loopBlock = default;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1327,23 +1327,21 @@ private static bool CharInClassInternal(char ch, string set, int start, int setL
return false;
}

return CharInCategory(ch, set, start, setLength, categoryLength);
return CharInCategory(ch, set.AsSpan(SetStartIndex + start + setLength, categoryLength));
}

private static bool CharInCategory(char ch, string set, int start, int setLength, int categoryLength)
private static bool CharInCategory(char ch, ReadOnlySpan<char> categorySetSegment)
{
UnicodeCategory chcategory = char.GetUnicodeCategory(ch);

int i = start + SetStartIndex + setLength;
int end = i + categoryLength;
while (i < end)
for (int i = 0; i < categorySetSegment.Length; i++)
{
int curcat = (short)set[i];
int curcat = (short)categorySetSegment[i];

if (curcat == 0)
{
// zero is our marker for a group of categories - treated as a unit
if (CharInCategoryGroup(chcategory, set, ref i))
if (CharInCategoryGroup(chcategory, categorySetSegment, ref i))
{
return true;
}
Expand Down Expand Up @@ -1379,8 +1377,6 @@ private static bool CharInCategory(char ch, string set, int start, int setLength
return true;
}
}

i++;
}

return false;
Expand All @@ -1390,7 +1386,7 @@ private static bool CharInCategory(char ch, string set, int start, int setLength
/// This is used for categories which are composed of other categories - L, N, Z, W...
/// These groups need special treatment when they are negated
/// </summary>
private static bool CharInCategoryGroup(UnicodeCategory chcategory, string category, ref int i)
private static bool CharInCategoryGroup(UnicodeCategory chcategory, ReadOnlySpan<char> category, ref int i)
{
int pos = i + 1;
int curcat = (short)category[pos];
Expand Down Expand Up @@ -1717,6 +1713,52 @@ _subtractor is null &&
}
}
}

// If the class now has a range that includes everything, and if it doesn't have subtraction,
// we can remove all of its categories, as they're duplicative (the set already includes everything).
if (!_negate &&
_subtractor is null &&
_categories?.Length > 0 &&
rangelist.Count == 1 && rangelist[0].First == 0 && rangelist[0].Last == LastChar)
{
_categories.Clear();
}

// If there's only a single character omitted from ranges, if there's no subtractor, and if there are categories,
// see if that character is in the categories. If it is, then we can replace whole thing with a complete "any" range.
// If it's not, then we can remove the categories, as they're only duplicating the rest of the range, turning the set
// into a "not one". This primarily helps in the case of a synthesized set from analysis that ends up combining '.' with
// categories, as we want to reduce that set down to either [^\n] or [\0-\uFFFF]. (This can be extrapolated to any number
// of missing characters; in fact, categories in general are superfluous and the entire set can be represented as ranges.
// But categories serve as a space optimization, and we strike a balance between testing many characters and the time/complexity
// it takes to do so. Thus, we limit this to the common case of a single missing character.)
if (!_negate &&
_subtractor is null &&
_categories?.Length > 0 &&
rangelist.Count == 2 && rangelist[0].First == 0 && rangelist[0].Last + 2 == rangelist[1].First && rangelist[1].Last == LastChar)
{
var vsb = new ValueStringBuilder(stackalloc char[256]);
foreach (ReadOnlyMemory<char> chunk in _categories!.GetChunks())
{
vsb.Append(chunk.Span);
}

if (CharInCategory((char)(rangelist[0].Last + 1), vsb.AsSpan()))
{
rangelist.RemoveAt(1);
rangelist[0] = ('\0', LastChar);
}
else
{
_negate = true;
rangelist.RemoveAt(1);
char notOne = (char)(rangelist[0].Last + 1);
rangelist[0] = (notOne, notOne);
}
_categories.Clear();

vsb.Dispose();
}
}
}

Expand Down Expand Up @@ -1792,12 +1834,20 @@ public static string DescribeSet(string set)

void RenderRanges()
{
for (; index < SetStartIndex + set[SetLengthIndex]; index += 2)
int rangesEnd = SetStartIndex + set[SetLengthIndex];
while (index < rangesEnd)
{
ch1 = set[index];
ch2 = index + 1 < set.Length ?
(char)(set[index + 1] - 1) :
LastChar;
if (index + 1 < rangesEnd)
{
ch2 = (char)(set[index + 1] - 1);
index += 2;
}
else
{
ch2 = LastChar;
index++;
}

desc.Append(DescribeChar(ch1));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using System.Globalization;
using System.Reflection;
using System.Reflection.Emit;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Threading;

Expand Down Expand Up @@ -97,6 +98,7 @@ internal abstract class RegexCompiler
private static readonly MethodInfo s_stringGetCharsMethod = typeof(string).GetMethod("get_Chars", new Type[] { typeof(int) })!;
private static readonly MethodInfo s_arrayResize = typeof(Array).GetMethod("Resize")!.MakeGenericMethod(typeof(int));
private static readonly MethodInfo s_mathMinIntInt = typeof(Math).GetMethod("Min", new Type[] { typeof(int), typeof(int) })!;
private static readonly MethodInfo s_memoryMarshalGetArrayDataReferenceIndexOfAnyValues = typeof(MemoryMarshal).GetMethod("GetArrayDataReference", new Type[] { Type.MakeGenericMethodParameter(0).MakeArrayType() })!.MakeGenericMethod(typeof(IndexOfAnyValues<char>))!;
// Note:
// Single-range helpers like IsAsciiLetterLower, IsAsciiLetterUpper, IsAsciiDigit, and IsBetween aren't used here, as the IL generated for those
// single-range checks is as cheap as the method call, and there's no readability issue as with the source generator.
Expand Down Expand Up @@ -834,9 +836,13 @@ void EmitFixedSet_LeftToRight()

// Use IndexOf{Any} to accelerate the skip loop via vectorization to match the first prefix.
// But we avoid using it for the relatively common case of the starting set being '.', aka anything other than
// a newline, as it's very rare to have long, uninterrupted sequences of newlines.
// a newline, as it's very rare to have long, uninterrupted sequences of newlines. And we avoid using it
// for the case of the starting set being anything (e.g. '.' with SingleLine), as in that case it'll always match
// the first char.
int setIndex = 0;
bool canUseIndexOf = primarySet.Set != RegexCharClass.NotNewLineClass;
bool canUseIndexOf =
primarySet.Set != RegexCharClass.NotNewLineClass &&
primarySet.Set != RegexCharClass.AnyClass;
bool needLoop = !canUseIndexOf || setsToUse > 1;

Label checkSpanLengthLabel = default;
Expand Down Expand Up @@ -6098,10 +6104,19 @@ private void LoadIndexOfAnyValues(ReadOnlySpan<char> chars)
int index = list.Count;
list.Add(IndexOfAnyValues.Create(chars));

// this._indexOfAnyValues[index]
// Logically do _indexOfAnyValues[index], but avoid the bounds check on accessing the array,
// and cast to the known derived sealed type to enable devirtualization.

// DerivedIndexOfAnyValues d = Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(this._indexOfAnyValues), index);
// ... = d;
Ldthisfld(s_indexOfAnyValuesArrayField);
Ldc(index);
_ilg!.Emit(OpCodes.Ldelem_Ref);
Call(s_memoryMarshalGetArrayDataReferenceIndexOfAnyValues);
Ldc(index * IntPtr.Size);
Add();
_ilg!.Emit(OpCodes.Ldind_Ref);
LocalBuilder ioavLocal = _ilg!.DeclareLocal(list[index].GetType());
Stloc(ioavLocal);
Ldloc(ioavLocal);
}
}
}