Skip to content

Commit f1819bd

Browse files
Improve Ascii (and Utf8) encoding (#85266)
* Improve writing of lower vector part in ascii convertion * from 10 /17 to 1 instruction for 64/32 bit x86 * Add [MethodImpl(MethodImplOptions.AggressiveInlining)] to NarrowUtf16ToAscii_Intrinsified * rewrite StoreLower without Sse2.StoreScalar * move helper to Vector128 and call in case conversion * remove unused helpers
1 parent 0c423af commit f1819bd

File tree

3 files changed

+25
-63
lines changed

3 files changed

+25
-63
lines changed

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2718,6 +2718,24 @@ public static unsafe void StoreAligned<T>(this Vector128<T> source, T* destinati
27182718
public static unsafe void StoreAlignedNonTemporal<T>(this Vector128<T> source, T* destination)
27192719
where T : unmanaged => source.StoreAligned(destination);
27202720

2721+
/// <summary>
2722+
/// Stores to lower 64 bits of <paramref name="source"/> to memory destination of <paramref name="destination"/>[<paramref name="elementOffset"/>]
2723+
/// </summary>
2724+
/// <typeparam name="T">The type of the elements in the vector.</typeparam>
2725+
/// <param name="source">The vector that will be stored.</param>
2726+
/// <param name="destination">The destination to which <paramref name="elementOffset" /> will be added before the vector will be stored.</param>
2727+
/// <param name="elementOffset">The element offset from <paramref name="destination" /> from which the vector will be stored.</param>
2728+
/// <remarks>
2729+
/// Uses double instead of long to get a single instruction instead of storing temps on general porpose register (or stack)
2730+
/// </remarks>
2731+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
2732+
internal static void StoreLowerUnsafe<T>(this Vector128<T> source, ref T destination, nuint elementOffset = 0)
2733+
where T : struct
2734+
{
2735+
ref byte address = ref Unsafe.As<T, byte>(ref Unsafe.Add(ref destination, elementOffset));
2736+
Unsafe.WriteUnaligned<double>(ref address, source.AsDouble().ToScalar());
2737+
}
2738+
27212739
/// <summary>Stores a vector at the given destination.</summary>
27222740
/// <typeparam name="T">The type of the elements in the vector.</typeparam>
27232741
/// <param name="source">The vector that will be stored.</param>

src/libraries/System.Private.CoreLib/src/System/Text/Ascii.CaseConversion.cs

Lines changed: 3 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -463,41 +463,6 @@ private static unsafe nuint ChangeCase<TFrom, TTo, TCasing>(TFrom* pSrc, TTo* pD
463463
return i;
464464
}
465465

466-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
467-
private static unsafe void Widen8To16AndAndWriteTo(Vector128<byte> narrowVector, char* pDest, nuint destOffset)
468-
{
469-
if (Vector256.IsHardwareAccelerated)
470-
{
471-
Vector256<ushort> wide = Vector256.WidenLower(narrowVector.ToVector256Unsafe());
472-
wide.StoreUnsafe(ref *(ushort*)pDest, destOffset);
473-
}
474-
else
475-
{
476-
Vector128.WidenLower(narrowVector).StoreUnsafe(ref *(ushort*)pDest, destOffset);
477-
Vector128.WidenUpper(narrowVector).StoreUnsafe(ref *(ushort*)pDest, destOffset + 8);
478-
}
479-
}
480-
481-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
482-
private static unsafe void Narrow16To8AndAndWriteTo(Vector128<ushort> wideVector, byte* pDest, nuint destOffset)
483-
{
484-
Vector128<byte> narrow = Vector128.Narrow(wideVector, wideVector);
485-
486-
if (Sse2.IsSupported)
487-
{
488-
// MOVQ is supported even on x86, unaligned accesses allowed
489-
Sse2.StoreScalar((ulong*)(pDest + destOffset), narrow.AsUInt64());
490-
}
491-
else if (Vector64.IsHardwareAccelerated)
492-
{
493-
narrow.GetLower().StoreUnsafe(ref *pDest, destOffset);
494-
}
495-
else
496-
{
497-
Unsafe.WriteUnaligned<ulong>(pDest + destOffset, narrow.AsUInt64().ToScalar());
498-
}
499-
}
500-
501466
[MethodImpl(MethodImplOptions.AggressiveInlining)]
502467
private static unsafe void ChangeWidthAndWriteTo<TFrom, TTo>(Vector128<TFrom> vector, TTo* pDest, nuint elementOffset)
503468
where TFrom : unmanaged
@@ -524,12 +489,9 @@ private static unsafe void ChangeWidthAndWriteTo<TFrom, TTo>(Vector128<TFrom> ve
524489
}
525490
else if (sizeof(TFrom) == 2 && sizeof(TTo) == 1)
526491
{
527-
// narrowing operation required
528-
// since we know data is all-ASCII, special-case SSE2 to avoid unneeded PAND in Narrow call
529-
Vector128<byte> narrow = (Sse2.IsSupported)
530-
? Sse2.PackUnsignedSaturate(vector.AsInt16(), vector.AsInt16())
531-
: Vector128.Narrow(vector.AsUInt16(), vector.AsUInt16());
532-
narrow.GetLower().StoreUnsafe(ref *(byte*)pDest, elementOffset);
492+
// narrowing operation required, we know data is all-ASCII so use extract helper
493+
Vector128<byte> narrow = ExtractAsciiVector(vector.AsUInt16(), vector.AsUInt16());
494+
narrow.StoreLowerUnsafe(ref *(byte*)pDest, elementOffset);
533495
}
534496
else
535497
{
@@ -556,25 +518,6 @@ private static unsafe Vector128<T> SignedLessThan<T>(Vector128<T> left, Vector12
556518
}
557519
}
558520

559-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
560-
private static unsafe Vector128<TTo> NarrowOrWidenLowerVectorUnsigned<TFrom, TTo>(Vector128<TFrom> vector)
561-
where TFrom : unmanaged
562-
where TTo : unmanaged
563-
{
564-
if (sizeof(TFrom) == 1 && sizeof(TTo) == 2)
565-
{
566-
return Vector128.WidenLower(vector.AsByte()).As<ushort, TTo>();
567-
}
568-
else if (sizeof(TFrom) == 2 && sizeof(TTo) == 1)
569-
{
570-
return Vector128.Narrow(vector.AsUInt16(), vector.AsUInt16()).As<byte, TTo>();
571-
}
572-
else
573-
{
574-
throw new NotSupportedException();
575-
}
576-
}
577-
578521
private struct ToUpperConversion { }
579522
private struct ToLowerConversion { }
580523
}

src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1518,6 +1518,7 @@ private static Vector128<byte> ExtractAsciiVector(Vector128<ushort> vectorFirst,
15181518
}
15191519
}
15201520

1521+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
15211522
private static unsafe nuint NarrowUtf16ToAscii_Intrinsified(char* pUtf16Buffer, byte* pAsciiBuffer, nuint elementCount)
15221523
{
15231524
// This method contains logic optimized using vector instructions for both x64 and Arm64.
@@ -1550,7 +1551,7 @@ private static unsafe nuint NarrowUtf16ToAscii_Intrinsified(char* pUtf16Buffer,
15501551

15511552
ref byte asciiBuffer = ref *pAsciiBuffer;
15521553
Vector128<byte> asciiVector = ExtractAsciiVector(utf16VectorFirst, utf16VectorFirst);
1553-
asciiVector.GetLower().StoreUnsafe(ref asciiBuffer);
1554+
asciiVector.StoreLowerUnsafe(ref asciiBuffer, 0);
15541555
nuint currentOffsetInElements = SizeOfVector128 / 2; // we processed 8 elements so far
15551556

15561557
// We're going to get the best performance when we have aligned writes, so we'll take the
@@ -1577,7 +1578,7 @@ private static unsafe nuint NarrowUtf16ToAscii_Intrinsified(char* pUtf16Buffer,
15771578

15781579
// Turn the 8 ASCII chars we just read into 8 ASCII bytes, then copy it to the destination.
15791580
asciiVector = ExtractAsciiVector(utf16VectorFirst, utf16VectorFirst);
1580-
asciiVector.GetLower().StoreUnsafe(ref asciiBuffer, currentOffsetInElements);
1581+
asciiVector.StoreLowerUnsafe(ref asciiBuffer, currentOffsetInElements);
15811582
}
15821583

15831584
// Calculate how many elements we wrote in order to get pAsciiBuffer to its next alignment
@@ -1630,7 +1631,7 @@ private static unsafe nuint NarrowUtf16ToAscii_Intrinsified(char* pUtf16Buffer,
16301631

16311632
Debug.Assert(((nuint)pAsciiBuffer + currentOffsetInElements) % sizeof(ulong) == 0, "Destination should be ulong-aligned.");
16321633
asciiVector = ExtractAsciiVector(utf16VectorFirst, utf16VectorFirst);
1633-
asciiVector.GetLower().StoreUnsafe(ref asciiBuffer, currentOffsetInElements);
1634+
asciiVector.StoreLowerUnsafe(ref asciiBuffer, currentOffsetInElements);
16341635
currentOffsetInElements += SizeOfVector128 / 2;
16351636

16361637
goto Finish;

0 commit comments

Comments
 (0)