Skip to content
Merged
14 changes: 12 additions & 2 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1508,8 +1508,18 @@ void CodeGen::genCodeForPhysReg(GenTreePhysReg* tree)
var_types targetType = tree->TypeGet();
regNumber targetReg = tree->GetRegNum();

inst_Mov(targetType, targetReg, tree->gtSrcReg, /* canSkip */ true);
genTransferRegGCState(targetReg, tree->gtSrcReg);
#ifdef TARGET_ARM64
if (varTypeIsMask(targetType))
{
assert(tree->gtSrcReg == REG_FFR);
GetEmitter()->emitIns_R(INS_sve_rdffr, EA_SCALABLE, targetReg);
}
else
#endif
{
inst_Mov(targetType, targetReg, tree->gtSrcReg, /* canSkip */ true);
genTransferRegGCState(targetReg, tree->gtSrcReg);
}

genProduceReg(tree);
}
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -4344,6 +4344,10 @@ class Compiler
#endif // defined(FEATURE_SIMD)

unsigned lvaGSSecurityCookie; // LclVar number
#ifdef TARGET_ARM64
unsigned lvaFfrRegister; // LclVar number
unsigned getFFRegisterVarNum();
#endif
bool lvaTempsHaveLargerOffsetThanVars();

// Returns "true" iff local variable "lclNum" is in SSA form.
Expand Down
12 changes: 12 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7448,7 +7448,11 @@ GenTreeIntCon* Compiler::gtNewFalse()
// return a new node representing the value in a physical register
GenTree* Compiler::gtNewPhysRegNode(regNumber reg, var_types type)
{
#ifdef TARGET_ARM64
assert(genIsValidIntReg(reg) || (reg == REG_SPBASE) || (reg == REG_FFR));
#else
assert(genIsValidIntReg(reg) || (reg == REG_SPBASE));
#endif
GenTree* result = new (this, GT_PHYSREG) GenTreePhysReg(reg, type);
return result;
}
Expand Down Expand Up @@ -11533,6 +11537,14 @@ void Compiler::gtGetLclVarNameInfo(unsigned lclNum, const char** ilKindOut, cons
ilKind = "cse";
ilNum = lclNum - optCSEstart;
}
#ifdef TARGET_ARM64
else if (lclNum == lvaFfrRegister)
{
// We introduce this LclVar in lowering, hence special case the printing of
// it instead of handling it in "rationalizer" below.
ilName = "FFReg";
}
#endif
else if (lclNum >= optCSEstart)
{
// Currently any new LclVar's introduced after the CSE phase
Expand Down
29 changes: 10 additions & 19 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2297,10 +2297,15 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,

switch (intrinsic)
{
case NI_Sve_CreateBreakAfterMask:
case NI_Sve_CreateBreakAfterPropagateMask:
case NI_Sve_CreateBreakBeforeMask:
case NI_Sve_CreateBreakBeforePropagateMask:
{
// HWInstrinsic requires a mask for op3
convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(3));
FALLTHROUGH;
}
case NI_Sve_CreateBreakAfterMask:
case NI_Sve_CreateBreakBeforeMask:
case NI_Sve_CreateMaskForFirstActiveElement:
case NI_Sve_CreateMaskForNextActiveElement:
case NI_Sve_GetActiveElementCount:
Expand All @@ -2310,30 +2315,16 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
{
// HWInstrinsic requires a mask for op2
convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(2));
break;
FALLTHROUGH;
}

default:
break;
}

switch (intrinsic)
{
case NI_Sve_CreateBreakAfterPropagateMask:
case NI_Sve_CreateBreakBeforePropagateMask:
{
// HWInstrinsic requires a mask for op3
convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(3));
// HWInstrinsic requires a mask for op1
convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(1));
break;
}

default:
break;
}

// HWInstrinsic requires a mask for op1
convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(1));

if (HWIntrinsicInfo::IsMultiReg(intrinsic))
{
assert(HWIntrinsicInfo::IsExplicitMaskedOperation(retNode->AsHWIntrinsic()->GetHWIntrinsicId()));
Expand Down
20 changes: 20 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2366,6 +2366,26 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_Sve_LoadVectorFirstFaulting:
{
if (intrin.numOperands == 3)
{
// We have extra argument which means there is a "use" of FFR here. Restore it back in FFR register.
assert(op3Reg != REG_NA);
GetEmitter()->emitIns_R(INS_sve_wrffr, emitSize, op3Reg, opt);
}

insScalableOpts sopt = (opt == INS_OPTS_SCALABLE_B) ? INS_SCALABLE_OPTS_NONE : INS_SCALABLE_OPTS_LSL_N;
GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, REG_ZR, opt, sopt);
break;
}

case NI_Sve_SetFfr:
{
assert(targetReg == REG_NA);
GetEmitter()->emitIns_R(ins, emitSize, op1Reg, opt);
break;
}
case NI_Sve_ConditionalExtractAfterLastActiveElementScalar:
case NI_Sve_ConditionalExtractLastActiveElementScalar:
{
Expand Down
10 changes: 10 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,14 @@ HARDWARE_INTRINSIC(Sve, GatherVectorUInt32WithByteOffsetsZeroExtend,
HARDWARE_INTRINSIC(Sve, GatherVectorUInt32ZeroExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, GatherVectorWithByteOffsets, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, GetActiveElementCount, -1, 2, true, {INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation)
HARDWARE_INTRINSIC(Sve, GetFfrByte, -1, -1, false, {INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, GetFfrInt16, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, GetFfrInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, GetFfrInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, GetFfrSByte, -1, -1, false, {INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, GetFfrUInt16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, GetFfrUInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, GetFfrUInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, InsertIntoShiftedVector, -1, 2, true, {INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, LeadingSignCount, -1, -1, false, {INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LeadingZeroCount, -1, -1, false, {INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
Expand All @@ -142,6 +150,7 @@ HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt64,
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorFirstFaulting, -1, -1, false, {INS_sve_ldff1b, INS_sve_ldff1b, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1w, INS_sve_ldff1w, INS_sve_ldff1d, INS_sve_ldff1d, INS_sve_ldff1w, INS_sve_ldff1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialSideEffectMask)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToUInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
Expand Down Expand Up @@ -237,6 +246,7 @@ HARDWARE_INTRINSIC(Sve, Scatter32BitNarrowing,
HARDWARE_INTRINSIC(Sve, Scatter32BitWithByteOffsetsNarrowing, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1w, INS_sve_st1w, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, Scatter8BitNarrowing, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, Scatter8BitWithByteOffsetsNarrowing, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SetFfr, -1, 1, true, {INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialSideEffectMask|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, ShiftLeftLogical, -1, -1, false, {INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, ShiftRightArithmetic, -1, -1, false, {INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, ShiftRightArithmeticForDivide, -1, -1, false, {INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand)
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/instr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -721,7 +721,13 @@ void CodeGen::inst_TT_RV(instruction ins, emitAttr size, GenTree* tree, regNumbe
unsigned varNum = tree->AsLclVarCommon()->GetLclNum();
assert(varNum < compiler->lvaCount);
#if CPU_LOAD_STORE_ARCH
#ifdef TARGET_ARM64
// Workaround until https://github.com/dotnet/runtime/issues/105512 is fixed.
assert(GetEmitter()->emitInsIsStore(ins) || (ins == INS_sve_str));
#else
assert(GetEmitter()->emitInsIsStore(ins));
#endif

#endif
GetEmitter()->emitIns_S_R(ins, size, reg, varNum, 0);
}
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/lclvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ void Compiler::lvaInit()
#endif // JIT32_GCENCODER
lvaNewObjArrayArgs = BAD_VAR_NUM;
lvaGSSecurityCookie = BAD_VAR_NUM;
#ifdef TARGET_ARM64
lvaFfrRegister = BAD_VAR_NUM;
#endif
#ifdef TARGET_X86
lvaVarargsBaseOfStkArgs = BAD_VAR_NUM;
#endif // TARGET_X86
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,9 @@ GenTree* Lowering::LowerNode(GenTree* node)
{
return newNode;
}
#ifdef TARGET_ARM64
m_ffrTrashed = true;
#endif
}
break;

Expand Down Expand Up @@ -7902,6 +7905,7 @@ void Lowering::LowerBlock(BasicBlock* block)
m_block = block;
#ifdef TARGET_ARM64
m_blockIndirs.Reset();
m_ffrTrashed = true;
#endif

// NOTE: some of the lowering methods insert calls before the node being
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ class Lowering final : public Phase
void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node);
void LowerModPow2(GenTree* node);
bool TryLowerAddForPossibleContainment(GenTreeOp* node, GenTree** next);
void StoreFFRValue(GenTreeHWIntrinsic* node);
#endif // !TARGET_XARCH && !TARGET_ARM64
GenTree* InsertNewSimdCreateScalarUnsafeNode(var_types type,
GenTree* op1,
Expand Down Expand Up @@ -629,6 +630,7 @@ class Lowering final : public Phase
}
};
ArrayStack<SavedIndir> m_blockIndirs;
bool m_ffrTrashed;
#endif
};

Expand Down
Loading