Skip to content
289 changes: 269 additions & 20 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,213 @@ CorInfoType Compiler::getBaseJitTypeFromArgIfNeeded(NamedIntrinsic intrins
return (diffInsCount >= 2);
}

struct HWIntrinsicIsaRange
{
NamedIntrinsic FirstId;
NamedIntrinsic LastId;
};

static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = {
// clang-format off
#if defined(TARGET_XARCH)
{ FIRST_NI_X86Base, LAST_NI_X86Base },
{ FIRST_NI_SSE, LAST_NI_SSE },
{ FIRST_NI_SSE2, LAST_NI_SSE2 },
{ FIRST_NI_SSE3, LAST_NI_SSE3 },
{ FIRST_NI_SSSE3, LAST_NI_SSSE3 },
{ FIRST_NI_SSE41, LAST_NI_SSE41 },
{ FIRST_NI_SSE42, LAST_NI_SSE42 },
{ FIRST_NI_AVX, LAST_NI_AVX },
{ FIRST_NI_AVX2, LAST_NI_AVX2 },
{ FIRST_NI_AES, LAST_NI_AES },
{ FIRST_NI_BMI1, LAST_NI_BMI1 },
{ FIRST_NI_BMI2, LAST_NI_BMI2 },
{ FIRST_NI_FMA, LAST_NI_FMA },
{ FIRST_NI_LZCNT, LAST_NI_LZCNT },
{ FIRST_NI_PCLMULQDQ, LAST_NI_PCLMULQDQ },
{ FIRST_NI_POPCNT, LAST_NI_POPCNT },
{ FIRST_NI_Vector128, LAST_NI_Vector128 },
{ FIRST_NI_Vector256, LAST_NI_Vector256 },
{ FIRST_NI_Vector512, LAST_NI_Vector512 },
{ FIRST_NI_AVXVNNI, LAST_NI_AVXVNNI },
{ NI_Illegal, NI_Illegal }, // MOVBE
{ FIRST_NI_X86Serialize, LAST_NI_X86Serialize },
{ NI_Illegal, NI_Illegal }, // EVEX
{ FIRST_NI_AVX512F, LAST_NI_AVX512F },
{ FIRST_NI_AVX512F_VL, LAST_NI_AVX512F_VL },
{ FIRST_NI_AVX512BW, LAST_NI_AVX512BW },
{ FIRST_NI_AVX512BW_VL, LAST_NI_AVX512BW_VL },
{ FIRST_NI_AVX512CD, LAST_NI_AVX512CD },
{ FIRST_NI_AVX512CD_VL, LAST_NI_AVX512CD_VL },
{ FIRST_NI_AVX512DQ, LAST_NI_AVX512DQ },
{ FIRST_NI_AVX512DQ_VL, LAST_NI_AVX512DQ_VL },
{ FIRST_NI_AVX512VBMI, LAST_NI_AVX512VBMI },
{ FIRST_NI_AVX512VBMI_VL, LAST_NI_AVX512VBMI_VL },
{ FIRST_NI_AVX10v1, LAST_NI_AVX10v1 },
{ FIRST_NI_AVX10v1_V512, LAST_NI_AVX10v1_V512 },
{ NI_Illegal, NI_Illegal }, // VectorT128
{ NI_Illegal, NI_Illegal }, // VectorT256
{ NI_Illegal, NI_Illegal }, // VectorT512
{ FIRST_NI_X86Base_X64, LAST_NI_X86Base_X64 },
{ FIRST_NI_SSE_X64, LAST_NI_SSE_X64 },
{ FIRST_NI_SSE2_X64, LAST_NI_SSE2_X64 },
{ NI_Illegal, NI_Illegal }, // SSE3_X64
{ NI_Illegal, NI_Illegal }, // SSSE3_X64
{ FIRST_NI_SSE41_X64, LAST_NI_SSE41_X64 },
{ FIRST_NI_SSE42_X64, LAST_NI_SSE42_X64 },
{ NI_Illegal, NI_Illegal }, // AVX_X64
{ NI_Illegal, NI_Illegal }, // AVX2_X64
{ NI_Illegal, NI_Illegal }, // AES_X64
{ FIRST_NI_BMI1_X64, LAST_NI_BMI1_X64 },
{ FIRST_NI_BMI2_X64, LAST_NI_BMI2_X64 },
{ NI_Illegal, NI_Illegal }, // FMA_X64
{ FIRST_NI_LZCNT_X64, LAST_NI_LZCNT_X64 },
{ NI_Illegal, NI_Illegal }, // PCLMULQDQ_X64
{ FIRST_NI_POPCNT_X64, LAST_NI_POPCNT_X64 },
{ NI_Illegal, NI_Illegal }, // AVXVNNI_X64
{ NI_Illegal, NI_Illegal }, // MOVBE_X64
{ NI_Illegal, NI_Illegal }, // X86Serialize_X64
{ NI_Illegal, NI_Illegal }, // EVEX_X64
{ FIRST_NI_AVX512F_X64, LAST_NI_AVX512F_X64 },
{ NI_Illegal, NI_Illegal }, // AVX512F_VL_X64
{ NI_Illegal, NI_Illegal }, // AVX512BW_X64
{ NI_Illegal, NI_Illegal }, // AVX512BW_VL_X64
{ NI_Illegal, NI_Illegal }, // AVX512CD_X64
{ NI_Illegal, NI_Illegal }, // AVX512CD_VL_X64
{ NI_Illegal, NI_Illegal }, // AVX512DQ_X64
{ NI_Illegal, NI_Illegal }, // AVX512DQ_VL_X64
{ NI_Illegal, NI_Illegal }, // AVX512VBMI_X64
{ NI_Illegal, NI_Illegal }, // AVX512VBMI_VL_X64
{ FIRST_NI_AVX10v1_X64, LAST_NI_AVX10v1_X64 },
{ NI_Illegal, NI_Illegal }, // AVX10v1_V512_X64
#elif defined (TARGET_ARM64)
{ FIRST_NI_ArmBase, LAST_NI_ArmBase },
{ FIRST_NI_AdvSimd, LAST_NI_AdvSimd },
{ FIRST_NI_Aes, LAST_NI_Aes },
{ FIRST_NI_Crc32, LAST_NI_Crc32 },
{ FIRST_NI_Dp, LAST_NI_Dp },
{ FIRST_NI_Rdm, LAST_NI_Rdm },
{ FIRST_NI_Sha1, LAST_NI_Sha1 },
{ FIRST_NI_Sha256, LAST_NI_Sha256 },
{ NI_Illegal, NI_Illegal }, // Atomics
{ FIRST_NI_Vector64, LAST_NI_Vector64 },
{ FIRST_NI_Vector128, LAST_NI_Vector128 },
{ NI_Illegal, NI_Illegal }, // Dczva
{ NI_Illegal, NI_Illegal }, // Rcpc
{ NI_Illegal, NI_Illegal }, // VectorT128
{ NI_Illegal, NI_Illegal }, // Rcpc2
{ FIRST_NI_Sve, LAST_NI_Sve },
{ FIRST_NI_ArmBase_Arm64, LAST_NI_ArmBase_Arm64 },
{ FIRST_NI_AdvSimd_Arm64, LAST_NI_AdvSimd_Arm64 },
{ NI_Illegal, NI_Illegal }, // Aes_Arm64
{ FIRST_NI_Crc32_Arm64, LAST_NI_Crc32_Arm64 },
{ NI_Illegal, NI_Illegal }, // Dp_Arm64
{ FIRST_NI_Rdm_Arm64, LAST_NI_Rdm_Arm64 },
{ NI_Illegal, NI_Illegal }, // Sha1_Arm64
{ NI_Illegal, NI_Illegal }, // Sha256_Arm64
{ NI_Illegal, NI_Illegal }, // Sve_Arm64
#else
#error Unsupported platform
#endif
// clang-format on
};

#if defined(DEBUG)
static void ValidateHWIntrinsicInfo(CORINFO_InstructionSet isa, NamedIntrinsic ni, const HWIntrinsicInfo& info)
{
// We should have found the entry we expected to find here
assert(info.id == ni);

// It should belong to the expected ISA
assert(info.isa == isa);

if ((info.simdSize != -1) && (info.simdSize != 0))
{
// We should only have known SIMD sizes
#if defined(TARGET_ARM64)
assert((info.simdSize == 8) || (info.simdSize == 16));
#elif defined(TARGET_XARCH)
assert((info.simdSize == 16) || (info.simdSize == 32) || (info.simdSize == 64));
#else
unreached();
#endif
}

if (info.numArgs != -1)
{
// We should only have an expected number of arguments
#if defined(TARGET_ARM64)
assert((info.numArgs >= 0) && (info.numArgs <= 4));
#elif defined(TARGET_XARCH)
assert((info.numArgs >= 0) && (info.numArgs <= 5));
#else
unreached();
#endif
}

// TODO: There's more we could validate here in terms of flags, instructions used, etc.
// Some of this is already done ad-hoc elsewhere throughout the JIT
}

static void ValidateHWIntrinsicIsaRange(CORINFO_InstructionSet isa, const HWIntrinsicIsaRange& isaRange)
{
// Both entries should be illegal if either is
if (isaRange.FirstId == NI_Illegal)
{
assert(isaRange.LastId == NI_Illegal);
return;
}
assert(isaRange.LastId != NI_Illegal);

// Both entries should belong to the expected ISA
assert(HWIntrinsicInfo::lookupIsa(isaRange.FirstId) == isa);
assert(HWIntrinsicInfo::lookupIsa(isaRange.LastId) == isa);

// The last ID should be the same as or after the first ID
assert(isaRange.FirstId <= isaRange.LastId);

// The ID before the range should not be part of the expected ISA
NamedIntrinsic prevId = static_cast<NamedIntrinsic>(isaRange.FirstId - 1);
assert((prevId == NI_HW_INTRINSIC_START) || (HWIntrinsicInfo::lookupIsa(prevId) != isa));

// The ID after the range should not be part of the expected ISA
NamedIntrinsic nextId = static_cast<NamedIntrinsic>(isaRange.LastId + 1);
#if defined(TARGET_ARM64)
assert((nextId == NI_HW_INTRINSIC_END) || (HWIntrinsicInfo::lookupIsa(nextId) != isa) ||
(nextId == SPECIAL_NI_Sve));
#else
assert((nextId == NI_HW_INTRINSIC_END) || (HWIntrinsicInfo::lookupIsa(nextId) != isa));
#endif

NamedIntrinsic ni = static_cast<NamedIntrinsic>(isaRange.FirstId);
const HWIntrinsicInfo* prevInfo = &HWIntrinsicInfo::lookup(ni);
ValidateHWIntrinsicInfo(isa, ni, *prevInfo);

size_t count = (isaRange.LastId - isaRange.FirstId) + 1;

for (size_t i = 1; i < count; i++)
{
ni = static_cast<NamedIntrinsic>(isaRange.FirstId + i);
const HWIntrinsicInfo* info = &HWIntrinsicInfo::lookup(ni);
ValidateHWIntrinsicInfo(isa, ni, *info);

// The current name should be sorted after the previous
assert(strcmp(info->name, prevInfo->name) > 0);

prevInfo = info;
}
}

static void ValidateHWIntrinsicIsaRangeArray()
{
for (size_t i = 0; i < ARRAY_SIZE(hwintrinsicIsaRangeArray); i++)
{
CORINFO_InstructionSet isa = static_cast<CORINFO_InstructionSet>(i + 1);
ValidateHWIntrinsicIsaRange(isa, hwintrinsicIsaRangeArray[i]);
}
}
#endif

//------------------------------------------------------------------------
// lookupId: Gets the NamedIntrinsic for a given method name and InstructionSet
//
Expand All @@ -487,7 +694,16 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp,
const char* methodName,
const char* enclosingClassName)
{
// TODO-Throughput: replace sequential search by binary search
#if defined(DEBUG)
static bool validationCompleted = false;

if (!validationCompleted)
{
ValidateHWIntrinsicIsaRangeArray();
validationCompleted = true;
}
#endif // DEBUG

CORINFO_InstructionSet isa = lookupIsa(className, enclosingClassName);

if (isa == InstructionSet_ILLEGAL)
Expand All @@ -496,9 +712,22 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp,
}

bool isIsaSupported = comp->compSupportsHWIntrinsic(isa);
bool isHardwareAcceleratedProp = (strcmp(methodName, "get_IsHardwareAccelerated") == 0);
bool isHardwareAcceleratedProp = false;
bool isSupportedProp = false;
uint32_t vectorByteLength = 0;

if (strncmp(methodName, "get_Is", 6) == 0)
{
if (strcmp(methodName + 6, "HardwareAccelerated") == 0)
{
isHardwareAcceleratedProp = true;
}
else if (strcmp(methodName + 6, "Supported") == 0)
{
isSupportedProp = true;
}
}

#ifdef TARGET_XARCH
if (isHardwareAcceleratedProp)
{
Expand All @@ -507,26 +736,29 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp,
// still can be cases where e.g. Sse41 might give an additional boost for Vector128, but it's
// not important enough to bump the minimal Sse version here)

if (strcmp(className, "Vector128") == 0)
if (isa == InstructionSet_Vector128)
{
isa = InstructionSet_SSE2;
vectorByteLength = 16;
}
else if (strcmp(className, "Vector256") == 0)
else if (isa == InstructionSet_Vector256)
{
isa = InstructionSet_AVX2;
vectorByteLength = 32;
}
else if (strcmp(className, "Vector512") == 0)
else if (isa == InstructionSet_Vector512)
{
isa = InstructionSet_AVX512F;
vectorByteLength = 64;
}
else
{
assert((strcmp(className, "Vector128") != 0) && (strcmp(className, "Vector256") != 0) &&
(strcmp(className, "Vector512") != 0));
}
}
#endif

bool isSupportedProp = (strcmp(methodName, "get_IsSupported") == 0);

if (isSupportedProp && (strncmp(className, "Vector", 6) == 0))
{
// The Vector*<T>.IsSupported props report if T is supported & is specially handled in lookupNamedIntrinsic
Expand Down Expand Up @@ -621,33 +853,50 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp,
}
#endif

for (int i = 0; i < (NI_HW_INTRINSIC_END - NI_HW_INTRINSIC_START - 1); i++)
size_t isaIndex = static_cast<size_t>(isa) - 1;
assert(isaIndex < ARRAY_SIZE(hwintrinsicIsaRangeArray));

const HWIntrinsicIsaRange& isaRange = hwintrinsicIsaRangeArray[isaIndex];

if (isaRange.FirstId == NI_Illegal)
{
const HWIntrinsicInfo& intrinsicInfo = hwIntrinsicInfoArray[i];
return NI_Illegal;
}

if (isa != hwIntrinsicInfoArray[i].isa)
{
continue;
}
size_t rangeLower = isaRange.FirstId;
size_t rangeUpper = isaRange.LastId;

while (rangeLower <= rangeUpper)
{
// This is safe since rangeLower and rangeUpper will never be negative
size_t rangeIndex = (rangeUpper + rangeLower) / 2;

int numArgs = static_cast<unsigned>(intrinsicInfo.numArgs);
NamedIntrinsic ni = static_cast<NamedIntrinsic>(rangeIndex);
const HWIntrinsicInfo& intrinsicInfo = HWIntrinsicInfo::lookup(ni);

if ((numArgs != -1) && (sig->numArgs != static_cast<unsigned>(intrinsicInfo.numArgs)))
int sortOrder = strcmp(methodName, intrinsicInfo.name);

if (sortOrder < 0)
{
continue;
rangeUpper = rangeIndex - 1;
}

if (strcmp(methodName, intrinsicInfo.name) == 0)
else if (sortOrder > 0)
{
NamedIntrinsic ni = intrinsicInfo.id;
rangeLower = rangeIndex + 1;
}
else
{
assert(sortOrder == 0);
assert((intrinsicInfo.numArgs == -1) || (sig->numArgs == static_cast<uint8_t>(intrinsicInfo.numArgs)));

#if defined(TARGET_XARCH)
// on AVX1-only CPUs we only support a subset of intrinsics in Vector256
if (isLimitedVector256Isa && !AvxOnlyCompatible(ni))
{
return NI_Illegal;
}
#endif
#endif // TARGET_XARCH

return ni;
}
}
Expand Down
19 changes: 14 additions & 5 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,15 +132,24 @@ CORINFO_InstructionSet HWIntrinsicInfo::lookupIsa(const char* className, const c
{
assert(className != nullptr);

if (strcmp(className, "Arm64") == 0)
if (enclosingClassName == nullptr)
{
assert(enclosingClassName != nullptr);
return Arm64VersionOfIsa(lookupInstructionSet(enclosingClassName));
// No nested class is the most common, so fast path it
return lookupInstructionSet(className);
}
else

// Since lookupId is only called for the xplat intrinsics
// or intrinsics in the platform specific namespace, we assume
// that it will be one we can handle and don't try to early out.

CORINFO_InstructionSet enclosingIsa = lookupInstructionSet(enclosingClassName);

if (strcmp(className, "Arm64") == 0)
{
return lookupInstructionSet(className);
return Arm64VersionOfIsa(enclosingIsa);
}

return InstructionSet_ILLEGAL;
}

//------------------------------------------------------------------------
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2087,11 +2087,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_Sve_Compute32BitAddresses:
case NI_Sve_Compute64BitAddresses:
{
static_assert_no_msg(AreContiguous(NI_Sve_Compute8BitAddresses, NI_Sve_Compute16BitAddresses,
NI_Sve_Compute32BitAddresses, NI_Sve_Compute64BitAddresses));
static_assert_no_msg(AreContiguous(NI_Sve_Compute16BitAddresses, NI_Sve_Compute32BitAddresses,
NI_Sve_Compute64BitAddresses, NI_Sve_Compute8BitAddresses));

GetEmitter()->emitInsSve_R_R_R_I(ins, EA_SCALABLE, targetReg, op1Reg, op2Reg,
(intrin.id - NI_Sve_Compute8BitAddresses), opt,
(intrin.id - NI_Sve_Compute16BitAddresses), opt,
INS_SCALABLE_OPTS_LSL_N);
break;
}
Expand Down
Loading