diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 16cad5618112b7..b0d5d5727fc50d 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -1508,8 +1508,18 @@ void CodeGen::genCodeForPhysReg(GenTreePhysReg* tree) var_types targetType = tree->TypeGet(); regNumber targetReg = tree->GetRegNum(); - inst_Mov(targetType, targetReg, tree->gtSrcReg, /* canSkip */ true); - genTransferRegGCState(targetReg, tree->gtSrcReg); +#ifdef TARGET_ARM64 + if (varTypeIsMask(targetType)) + { + assert(tree->gtSrcReg == REG_FFR); + GetEmitter()->emitIns_R(INS_sve_rdffr, EA_SCALABLE, targetReg); + } + else +#endif + { + inst_Mov(targetType, targetReg, tree->gtSrcReg, /* canSkip */ true); + genTransferRegGCState(targetReg, tree->gtSrcReg); + } genProduceReg(tree); } diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 7013184028a878..60a478e16d5fa1 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4344,6 +4344,10 @@ class Compiler #endif // defined(FEATURE_SIMD) unsigned lvaGSSecurityCookie; // LclVar number +#ifdef TARGET_ARM64 + unsigned lvaFfrRegister; // LclVar number + unsigned getFFRegisterVarNum(); +#endif bool lvaTempsHaveLargerOffsetThanVars(); // Returns "true" iff local variable "lclNum" is in SSA form. diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 759dde9b584fb9..eb5966cde50624 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -7448,7 +7448,11 @@ GenTreeIntCon* Compiler::gtNewFalse() // return a new node representing the value in a physical register GenTree* Compiler::gtNewPhysRegNode(regNumber reg, var_types type) { +#ifdef TARGET_ARM64 + assert(genIsValidIntReg(reg) || (reg == REG_SPBASE) || (reg == REG_FFR)); +#else assert(genIsValidIntReg(reg) || (reg == REG_SPBASE)); +#endif GenTree* result = new (this, GT_PHYSREG) GenTreePhysReg(reg, type); return result; } @@ -11533,6 +11537,14 @@ void Compiler::gtGetLclVarNameInfo(unsigned lclNum, const char** ilKindOut, cons ilKind = "cse"; ilNum = lclNum - optCSEstart; } +#ifdef TARGET_ARM64 + else if (lclNum == lvaFfrRegister) + { + // We introduce this LclVar in lowering, hence special case the printing of + // it instead of handling it in "rationalizer" below. + ilName = "FFReg"; + } +#endif else if (lclNum >= optCSEstart) { // Currently any new LclVar's introduced after the CSE phase diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 63455226ed2fbe..e977b649dc5a21 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -2297,10 +2297,15 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, switch (intrinsic) { - case NI_Sve_CreateBreakAfterMask: case NI_Sve_CreateBreakAfterPropagateMask: - case NI_Sve_CreateBreakBeforeMask: case NI_Sve_CreateBreakBeforePropagateMask: + { + // HWInstrinsic requires a mask for op3 + convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(3)); + FALLTHROUGH; + } + case NI_Sve_CreateBreakAfterMask: + case NI_Sve_CreateBreakBeforeMask: case NI_Sve_CreateMaskForFirstActiveElement: case NI_Sve_CreateMaskForNextActiveElement: case NI_Sve_GetActiveElementCount: @@ -2310,30 +2315,16 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, { // HWInstrinsic requires a mask for op2 convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(2)); - break; + FALLTHROUGH; } - default: - break; - } - - switch (intrinsic) - { - case NI_Sve_CreateBreakAfterPropagateMask: - case NI_Sve_CreateBreakBeforePropagateMask: { - // HWInstrinsic requires a mask for op3 - convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(3)); + // HWInstrinsic requires a mask for op1 + convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(1)); break; } - - default: - break; } - // HWInstrinsic requires a mask for op1 - convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(1)); - if (HWIntrinsicInfo::IsMultiReg(intrinsic)) { assert(HWIntrinsicInfo::IsExplicitMaskedOperation(retNode->AsHWIntrinsic()->GetHWIntrinsicId())); diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index a93fa678156ca8..934ed32b81d75d 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -2366,6 +2366,26 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } + case NI_Sve_LoadVectorFirstFaulting: + { + if (intrin.numOperands == 3) + { + // We have extra argument which means there is a "use" of FFR here. Restore it back in FFR register. + assert(op3Reg != REG_NA); + GetEmitter()->emitIns_R(INS_sve_wrffr, emitSize, op3Reg, opt); + } + + insScalableOpts sopt = (opt == INS_OPTS_SCALABLE_B) ? INS_SCALABLE_OPTS_NONE : INS_SCALABLE_OPTS_LSL_N; + GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, REG_ZR, opt, sopt); + break; + } + + case NI_Sve_SetFfr: + { + assert(targetReg == REG_NA); + GetEmitter()->emitIns_R(ins, emitSize, op1Reg, opt); + break; + } case NI_Sve_ConditionalExtractAfterLastActiveElementScalar: case NI_Sve_ConditionalExtractLastActiveElementScalar: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 46dd188785d197..8a531918261a3e 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -122,6 +122,14 @@ HARDWARE_INTRINSIC(Sve, GatherVectorUInt32WithByteOffsetsZeroExtend, HARDWARE_INTRINSIC(Sve, GatherVectorUInt32ZeroExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, GatherVectorWithByteOffsets, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, GetActiveElementCount, -1, 2, true, {INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation) +HARDWARE_INTRINSIC(Sve, GetFfrByte, -1, -1, false, {INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, GetFfrInt16, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, GetFfrInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, GetFfrInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, GetFfrSByte, -1, -1, false, {INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, GetFfrUInt16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, GetFfrUInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, GetFfrUInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, InsertIntoShiftedVector, -1, 2, true, {INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, LeadingSignCount, -1, -1, false, {INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LeadingZeroCount, -1, -1, false, {INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) @@ -142,6 +150,7 @@ HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt64, HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorFirstFaulting, -1, -1, false, {INS_sve_ldff1b, INS_sve_ldff1b, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1w, INS_sve_ldff1w, INS_sve_ldff1d, INS_sve_ldff1d, INS_sve_ldff1w, INS_sve_ldff1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialSideEffectMask) HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToUInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) @@ -237,6 +246,7 @@ HARDWARE_INTRINSIC(Sve, Scatter32BitNarrowing, HARDWARE_INTRINSIC(Sve, Scatter32BitWithByteOffsetsNarrowing, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1w, INS_sve_st1w, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, Scatter8BitNarrowing, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, Scatter8BitWithByteOffsetsNarrowing, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, SetFfr, -1, 1, true, {INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialSideEffectMask|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, ShiftLeftLogical, -1, -1, false, {INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, ShiftRightArithmetic, -1, -1, false, {INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, ShiftRightArithmeticForDivide, -1, -1, false, {INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index f31e8b364c6ed1..bc0300c83a846e 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -721,7 +721,13 @@ void CodeGen::inst_TT_RV(instruction ins, emitAttr size, GenTree* tree, regNumbe unsigned varNum = tree->AsLclVarCommon()->GetLclNum(); assert(varNum < compiler->lvaCount); #if CPU_LOAD_STORE_ARCH +#ifdef TARGET_ARM64 + // Workaround until https://github.com/dotnet/runtime/issues/105512 is fixed. + assert(GetEmitter()->emitInsIsStore(ins) || (ins == INS_sve_str)); +#else assert(GetEmitter()->emitInsIsStore(ins)); +#endif + #endif GetEmitter()->emitIns_S_R(ins, size, reg, varNum, 0); } diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 62d3769879b20a..4dc8fec909defc 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -63,6 +63,9 @@ void Compiler::lvaInit() #endif // JIT32_GCENCODER lvaNewObjArrayArgs = BAD_VAR_NUM; lvaGSSecurityCookie = BAD_VAR_NUM; +#ifdef TARGET_ARM64 + lvaFfrRegister = BAD_VAR_NUM; +#endif #ifdef TARGET_X86 lvaVarargsBaseOfStkArgs = BAD_VAR_NUM; #endif // TARGET_X86 diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index d57d1b893d68af..309eae1f9580c7 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -479,6 +479,9 @@ GenTree* Lowering::LowerNode(GenTree* node) { return newNode; } +#ifdef TARGET_ARM64 + m_ffrTrashed = true; +#endif } break; @@ -7902,6 +7905,7 @@ void Lowering::LowerBlock(BasicBlock* block) m_block = block; #ifdef TARGET_ARM64 m_blockIndirs.Reset(); + m_ffrTrashed = true; #endif // NOTE: some of the lowering methods insert calls before the node being diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 81e337abdaf668..7785bb48dcd019 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -429,6 +429,7 @@ class Lowering final : public Phase void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node); void LowerModPow2(GenTree* node); bool TryLowerAddForPossibleContainment(GenTreeOp* node, GenTree** next); + void StoreFFRValue(GenTreeHWIntrinsic* node); #endif // !TARGET_XARCH && !TARGET_ARM64 GenTree* InsertNewSimdCreateScalarUnsafeNode(var_types type, GenTree* op1, @@ -629,6 +630,7 @@ class Lowering final : public Phase } }; ArrayStack m_blockIndirs; + bool m_ffrTrashed; #endif }; diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index cd5032921804d2..50615dd4e4a1bb 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1518,6 +1518,73 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; case NI_Sve_ConditionalSelect: return LowerHWIntrinsicCndSel(node); + case NI_Sve_SetFfr: + { + StoreFFRValue(node); + break; + } + case NI_Sve_GetFfrByte: + case NI_Sve_GetFfrInt16: + case NI_Sve_GetFfrInt32: + case NI_Sve_GetFfrInt64: + case NI_Sve_GetFfrSByte: + case NI_Sve_GetFfrUInt16: + case NI_Sve_GetFfrUInt32: + case NI_Sve_GetFfrUInt64: + { + LIR::Use use; + bool foundUse = BlockRange().TryGetUse(node, &use); + if (foundUse) + { + unsigned lclNum = comp->getFFRegisterVarNum(); + GenTree* lclVar = comp->gtNewLclvNode(lclNum, TYP_MASK); + BlockRange().InsertBefore(node, lclVar); + use.ReplaceWith(lclVar); + GenTree* next = node->gtNext; + BlockRange().Remove(node); + return next; + } + else + { + node->SetUnusedValue(); + } + + break; + } + case NI_Sve_LoadVectorFirstFaulting: + { + LIR::Use use; + bool foundUse = BlockRange().TryGetUse(node, &use); + + if (m_ffrTrashed) + { + // Consume the FFR register value from local variable to simulate "use" of FFR, + // only if it was trashed. If it was not trashed, we do not have to reload the + // contents of the FFR register. + + GenTree* lclVar = comp->gtNewLclvNode(comp->lvaFfrRegister, TYP_MASK); + BlockRange().InsertBefore(node, lclVar); + LowerNode(lclVar); + + node->ResetHWIntrinsicId(intrinsicId, comp, node->Op(1), node->Op(2), lclVar); + } + + if (foundUse) + { + unsigned tmpNum = comp->lvaGrabTemp(true DEBUGARG("Return value result/FFR")); + LclVarDsc* tmpVarDsc = comp->lvaGetDesc(tmpNum); + tmpVarDsc->lvType = node->TypeGet(); + GenTree* storeLclVar; + use.ReplaceWithLclVar(comp, tmpNum, &storeLclVar); + } + else + { + node->SetUnusedValue(); + } + + StoreFFRValue(node); + break; + } default: break; } @@ -3777,6 +3844,37 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* cndSelNode) ContainCheckHWIntrinsic(cndSelNode); return cndSelNode->gtNext; } + +#if defined(TARGET_ARM64) +//---------------------------------------------------------------------------------------------- +// StoreFFRValue: For hwintrinsic that produce a first faulting register (FFR) value, create +// nodes to save its value to a local variable. +// +// Arguments: +// node - The node before which the pseudo definition is needed +// +void Lowering::StoreFFRValue(GenTreeHWIntrinsic* node) +{ +#ifdef DEBUG + switch (node->GetHWIntrinsicId()) + { + case NI_Sve_SetFfr: + case NI_Sve_LoadVectorFirstFaulting: + break; + default: + assert(!"Unexpected HWIntrinsicId"); + } +#endif + + // Create physReg FFR definition to store FFR register. + unsigned lclNum = comp->getFFRegisterVarNum(); + GenTree* ffrReg = comp->gtNewPhysRegNode(REG_FFR, TYP_MASK); + GenTree* storeLclVar = comp->gtNewStoreLclVarNode(lclNum, ffrReg); + BlockRange().InsertAfter(node, ffrReg, storeLclVar); + m_ffrTrashed = false; +} +#endif // TARGET_ARM64 + #endif // FEATURE_HW_INTRINSICS #endif // TARGET_ARMARCH diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 59963e02d7438f..b04af2082c90fe 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -511,7 +511,11 @@ class RegRecord : public Referenceable #if defined(FEATURE_MASKED_HW_INTRINSICS) else { +#ifdef TARGET_ARM64 + assert(emitter::isMaskReg(reg) || (reg == REG_FFR)); +#else assert(emitter::isMaskReg(reg)); +#endif registerType = MaskRegisterType; } #endif // FEATURE_MASKED_HW_INTRINSICS diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 5d6cf7f1945e4c..2352fa5ec0239a 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -607,9 +607,24 @@ int LinearScan::BuildNode(GenTree* tree) switch (tree->OperGet()) { default: + { srcCount = BuildSimple(tree); break; - + } + case GT_PHYSREG: + { + srcCount = 0; + if (varTypeIsMask(tree)) + { + assert(tree->AsPhysReg()->gtSrcReg == REG_FFR); + BuildDef(tree, getSingleTypeRegMask(tree->AsPhysReg()->gtSrcReg, TYP_MASK)); + } + else + { + BuildSimple(tree); + } + break; + } case GT_LCL_VAR: // We make a final determination about whether a GT_LCL_VAR is a candidate or contained // after liveness. In either case we don't build any uses or defs. Otherwise, this is a diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 46577c8050fef3..ee6d423db46d9a 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2841,10 +2841,7 @@ void LinearScan::buildIntervals() #ifdef HAS_MORE_THAN_64_REGISTERS else if (availableRegCount < (sizeof(regMaskTP) * 8)) { - // Mask out the bits that are between (8 * regMaskTP) ~ availableRegCount - // Subtract one extra for stack. - unsigned topRegCount = availableRegCount - sizeof(regMaskSmall) * 8 - 1; - actualRegistersMask = regMaskTP(~RBM_NONE, (1ULL << topRegCount) - 1); + actualRegistersMask = regMaskTP(~RBM_NONE, availableMaskRegs); } #endif else diff --git a/src/coreclr/jit/registerarm64.h b/src/coreclr/jit/registerarm64.h index d296ab9497858f..6b8091814251ee 100644 --- a/src/coreclr/jit/registerarm64.h +++ b/src/coreclr/jit/registerarm64.h @@ -116,13 +116,13 @@ REGDEF(P13, 13+PBASE, PMASK(13), "p13", "na") REGDEF(P14, 14+PBASE, PMASK(14), "p14", "na") REGDEF(P15, 15+PBASE, PMASK(15), "p15", "na") - // The registers with values 80 (NBASE) and above are not real register numbers #define NBASE 80 REGDEF(SP, 0+NBASE, 0x0000, "sp", "wsp?") +REGDEF(FFR, 1+NBASE, 0x0000, "ffr", "na") // This must be last! -REGDEF(STK, 1+NBASE, 0x0000, "STK", "STK") +REGDEF(STK, 2+NBASE, 0x0000, "STK", "STK") /*****************************************************************************/ #undef RMASK diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index 1157bf9e5bfc94..6256ee0c37799b 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -130,6 +130,29 @@ unsigned Compiler::getSIMDInitTempVarNum(var_types simdType) return lvaSIMDInitTempVarNum; } +#ifdef TARGET_ARM64 +//------------------------------------------------------------------------ +// Get, and allocate if necessary, the SIMD temp used for various operations. +// The temp is allocated as the maximum sized type of all operations required. +// +// Arguments: +// simdType - Required SIMD type +// +// Returns: +// The temp number +// +unsigned Compiler::getFFRegisterVarNum() +{ + if (lvaFfrRegister == BAD_VAR_NUM) + { + lvaFfrRegister = lvaGrabTemp(false DEBUGARG("Save the FFR value.")); + lvaTable[lvaFfrRegister].lvType = TYP_MASK; + lvaTable[lvaFfrRegister].lvUsedInSIMDIntrinsic = true; + } + return lvaFfrRegister; +} +#endif + //---------------------------------------------------------------------------------- // Return the base type and size of SIMD vector type given its type handle. //