diff --git a/src/coreclr/inc/patchpointinfo.h b/src/coreclr/inc/patchpointinfo.h
index 02b9fd89f338ae..27346a3c4aeea3 100644
--- a/src/coreclr/inc/patchpointinfo.h
+++ b/src/coreclr/inc/patchpointinfo.h
@@ -37,7 +37,7 @@ struct PatchpointInfo
// Initialize
void Initialize(unsigned localCount, int totalFrameSize)
{
- m_calleeSaveRegisters = 0;
+ m_calleeSaveGprRegisters = 0;
m_totalFrameSize = totalFrameSize;
m_numberOfLocals = localCount;
m_genericContextArgOffset = -1;
@@ -49,7 +49,7 @@ struct PatchpointInfo
// Copy
void Copy(const PatchpointInfo* original)
{
- m_calleeSaveRegisters = original->m_calleeSaveRegisters;
+ m_calleeSaveGprRegisters = original->m_calleeSaveGprRegisters;
m_genericContextArgOffset = original->m_genericContextArgOffset;
m_keptAliveThisOffset = original->m_keptAliveThisOffset;
m_securityCookieOffset = original->m_securityCookieOffset;
@@ -163,14 +163,14 @@ struct PatchpointInfo
// Callee save registers saved by the original method.
// Includes all saves that must be restored (eg includes pushed RBP on x64).
//
- uint64_t CalleeSaveRegisters() const
+ uint64_t CalleeSaveGprRegisters() const
{
- return m_calleeSaveRegisters;
+ return m_calleeSaveGprRegisters;
}
- void SetCalleeSaveRegisters(uint64_t registerMask)
+ void SetCalleeSaveGprRegisters(uint64_t gprRegs)
{
- m_calleeSaveRegisters = registerMask;
+ m_calleeSaveGprRegisters = gprRegs;
}
private:
@@ -180,7 +180,7 @@ struct PatchpointInfo
EXPOSURE_MASK = 0x1
};
- uint64_t m_calleeSaveRegisters;
+ uint64_t m_calleeSaveGprRegisters;
unsigned m_numberOfLocals;
int m_totalFrameSize;
int m_genericContextArgOffset;
diff --git a/src/coreclr/jit/abi.cpp b/src/coreclr/jit/abi.cpp
index e80533fc783a06..958a1a73fee96c 100644
--- a/src/coreclr/jit/abi.cpp
+++ b/src/coreclr/jit/abi.cpp
@@ -48,10 +48,10 @@ regNumber ABIPassingSegment::GetRegister() const
// Return Value:
// The register mask.
//
-regMaskTP ABIPassingSegment::GetRegisterMask() const
+regMaskOnlyOne ABIPassingSegment::GetRegisterMask() const
{
assert(IsPassedInRegister());
- regMaskTP reg = genRegMask(m_register);
+ regMaskOnlyOne reg = genRegMask(m_register);
#ifdef TARGET_ARM
if (genIsValidFloatReg(m_register) && (Size == 8))
diff --git a/src/coreclr/jit/abi.h b/src/coreclr/jit/abi.h
index 1e51a14d9c09a5..10d2fa3672f757 100644
--- a/src/coreclr/jit/abi.h
+++ b/src/coreclr/jit/abi.h
@@ -25,7 +25,7 @@ class ABIPassingSegment
// If this segment is passed in a register, return the particular register.
regNumber GetRegister() const;
- regMaskTP GetRegisterMask() const;
+ regMaskOnlyOne GetRegisterMask() const;
// If this segment is passed on the stack then return the particular stack
// offset, relative to the base of stack arguments.
diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h
index 500b5274b6f41c..5c19022df38a2f 100644
--- a/src/coreclr/jit/block.h
+++ b/src/coreclr/jit/block.h
@@ -1601,8 +1601,8 @@ struct BasicBlock : private LIR::Range
// is bbCodeOffsEnd - bbCodeOffs, assuming neither are BAD_IL_OFFSET.
#ifdef DEBUG
- void dspBlockILRange() const; // Display the block's IL range as [XXX...YYY), where XXX and YYY might be "???" for
- // BAD_IL_OFFSET.
+ void dspBlockILRange() const; // Display the block's IL range as [XXX...YYY), where XXX and YYY might be "???"
+ // for BAD_IL_OFFSET.
#endif // DEBUG
VARSET_TP bbVarUse; // variables used by block (before a definition)
@@ -1642,8 +1642,8 @@ struct BasicBlock : private LIR::Range
};
static MemoryPhiArg* EmptyMemoryPhiDef; // Special value (0x1, FWIW) to represent a to-be-filled in Phi arg list
// for Heap.
- MemoryPhiArg* bbMemorySsaPhiFunc[MemoryKindCount]; // If the "in" Heap SSA var is not a phi definition, this value
- // is NULL.
+ MemoryPhiArg* bbMemorySsaPhiFunc[MemoryKindCount]; // If the "in" Heap SSA var is not a phi definition, this
+ // value is NULL.
// Otherwise, it is either the special value EmptyMemoryPhiDefn, to indicate
// that Heap needs a phi definition on entry, or else it is the linked list
// of the phi arguments.
@@ -1677,7 +1677,8 @@ struct BasicBlock : private LIR::Range
{
EXPSET_TP bbCseOut; // CSEs available on exit
ASSERT_TP bbAssertionOut; // assertions available on exit (global prop, local prop & !BBJ_COND)
- ASSERT_TP bbAssertionOutIfFalse; // assertions available on exit along false/next edge (BBJ_COND, local prop)
+ ASSERT_TP bbAssertionOutIfFalse; // assertions available on exit along false/next edge (BBJ_COND, local
+ // prop)
};
void* bbEmitCookie;
@@ -1981,8 +1982,8 @@ struct BasicBlock : private LIR::Range
}
};
- // BBCompilerSuccEdgeList: adapter class for forward iteration of block successors edges, using range-based `for`,
- // normally used via BasicBlock::SuccEdges(), e.g.:
+ // BBCompilerSuccEdgeList: adapter class for forward iteration of block successors edges, using range-based
+ // `for`, normally used via BasicBlock::SuccEdges(), e.g.:
// for (FlowEdge* const succEdge : block->SuccEdges(compiler)) ...
//
// This version uses NumSucc(Compiler*)/GetSucc(Compiler*). See the documentation there for the explanation
@@ -2448,7 +2449,8 @@ inline BasicBlock* BBArrayIterator::operator*() const
return edgeTarget->getDestinationBlock();
}
-// Pred list iterator implementations (that are required to be defined after the declaration of BasicBlock and FlowEdge)
+// Pred list iterator implementations (that are required to be defined after the declaration of BasicBlock and
+// FlowEdge)
inline PredEdgeList::iterator::iterator(FlowEdge* pred)
: m_pred(pred)
diff --git a/src/coreclr/jit/clrjit.natvis b/src/coreclr/jit/clrjit.natvis
index cfbc6a181e9743..1b751541aec7e7 100644
--- a/src/coreclr/jit/clrjit.natvis
+++ b/src/coreclr/jit/clrjit.natvis
@@ -139,26 +139,26 @@ Documentation for VS debugger format specifiers: https://learn.microsoft.com/en-
- this->m_AvailableRegs
-
-
-
- - ((regNumber)regIndex),en
- regIndex++
- reg = reg >> 1
-
+
+
+
+ - ((regNumber)regIndex),en
+ regIndex++
+ reg = reg >> 1
+
- this->m_RegistersWithConstants
-
-
-
- - ((regNumber)regIndex),en
- regIndex++
- reg = reg >> 1
-
+
+
+
+ - ((regNumber)regIndex),en
+ regIndex++
+ reg = reg >> 1
+
-
+
[#{rpNum,d} - {refType,en}]
@@ -177,6 +177,21 @@ Documentation for VS debugger format specifiers: https://learn.microsoft.com/en-
+
+
+
+
+
+
+
+ - ((regNumber)regIndex),en
+ regIndex++
+ reg = reg >> 1
+
+
+
+
+
[U{this->relatedInterval->varNum,d}, #{this->intervalIndex, d}, reg={(regNumber)physReg, en}]
[V{this->varNum,d}, #{this->intervalIndex, d}, reg={(regNumber)physReg, en}]
diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h
index b5b0d19402f0a3..433af3b2fe89fa 100644
--- a/src/coreclr/jit/codegen.h
+++ b/src/coreclr/jit/codegen.h
@@ -261,9 +261,9 @@ class CodeGen final : public CodeGenInterface
// Prolog functions and data (there are a few exceptions for more generally used things)
//
- void genEstablishFramePointer(int delta, bool reportUnwindData);
- void genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed);
- regMaskTP genGetParameterHomingTempRegisterCandidates();
+ void genEstablishFramePointer(int delta, bool reportUnwindData);
+ void genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed);
+ RegBitSet64 genGetParameterHomingTempRegisterCandidates();
var_types genParamStackStoreType(LclVarDsc* dsc, const ABIPassingSegment& seg);
void genSpillOrAddRegisterParam(unsigned lclNum, class RegGraph* graph);
@@ -335,16 +335,23 @@ class CodeGen final : public CodeGenInterface
}
};
- static void genBuildRegPairsStack(regMaskTP regsMask, ArrayStack* regStack);
+ static void genBuildRegPairsStack(regMaskOnlyOne regsMask,
+ ArrayStack* regStack MORE_THAN_64_REG_ARG(var_types type));
static void genSetUseSaveNextPairs(ArrayStack* regStack);
- static int genGetSlotSizeForRegsInMask(regMaskTP regsMask);
+ static int genGetSlotSizeForRegsInMask(regMaskOnlyOne regsMask);
- void genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset);
- void genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset);
+ void genSaveCalleeSavedRegisterGroup(regMaskOnlyOne regsMask,
+ int spDelta,
+ int spOffset MORE_THAN_64_REG_ARG(var_types type));
+ void genRestoreCalleeSavedRegisterGroup(regMaskOnlyOne regsMask,
+ int spDelta,
+ int spOffset MORE_THAN_64_REG_ARG(var_types type));
- void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta);
- void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta);
+ void genSaveCalleeSavedRegistersHelp(CONSTREF_AllRegsMask regsToSaveMask, int lowestCalleeSavedOffset, int spDelta);
+ void genRestoreCalleeSavedRegistersHelp(CONSTREF_AllRegsMask regsToRestoreMask,
+ int lowestCalleeSavedOffset,
+ int spDelta);
void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed);
@@ -361,9 +368,9 @@ class CodeGen final : public CodeGenInterface
void genStackProbe(ssize_t frameSize, regNumber rOffset, regNumber rLimit, regNumber rPageSize);
#endif
- void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn);
+ void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskGpr maskArgRegsLiveIn);
- void genPoisonFrame(regMaskTP bbRegLiveIn);
+ void genPoisonFrame(regMaskGpr bbRegLiveIn);
#if defined(TARGET_ARM)
@@ -372,11 +379,9 @@ class CodeGen final : public CodeGenInterface
bool genStackPointerAdjustment(ssize_t spAdjustment, regNumber tmpReg);
- void genPushFltRegs(regMaskTP regMask);
- void genPopFltRegs(regMaskTP regMask);
- regMaskTP genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat);
-
- regMaskTP genJmpCallArgMask();
+ void genPushFltRegs(regMaskFloat regMask);
+ void genPopFltRegs(regMaskFloat regMask);
+ regMaskGpr genStackAllocRegisterMask(unsigned frameSize, regMaskFloat maskCalleeSavedFloat);
void genFreeLclFrame(unsigned frameSize,
/* IN OUT */ bool* pUnwindStarted);
@@ -393,11 +398,12 @@ class CodeGen final : public CodeGenInterface
// same.
struct FuncletFrameInfoDsc
{
- regMaskTP fiSaveRegs; // Set of registers saved in the funclet prolog (includes LR)
- unsigned fiFunctionCallerSPtoFPdelta; // Delta between caller SP and the frame pointer
- unsigned fiSpDelta; // Stack pointer delta
- unsigned fiPSP_slot_SP_offset; // PSP slot offset from SP
- int fiPSP_slot_CallerSP_offset; // PSP slot offset from Caller SP
+ regMaskGpr fiSaveGprRegs; // Set of GPR registers saved in the funclet prolog (includes LR)
+ regMaskFloat fiSaveFloatRegs; // Set of Float registers saved in the funclet prolog (includes LR)
+ unsigned fiFunctionCallerSPtoFPdelta; // Delta between caller SP and the frame pointer
+ unsigned fiSpDelta; // Stack pointer delta
+ unsigned fiPSP_slot_SP_offset; // PSP slot offset from SP
+ int fiPSP_slot_CallerSP_offset; // PSP slot offset from Caller SP
};
FuncletFrameInfoDsc genFuncletInfo;
@@ -409,7 +415,12 @@ class CodeGen final : public CodeGenInterface
// same.
struct FuncletFrameInfoDsc
{
- regMaskTP fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes LR)
+ regMaskGpr fiSaveGprRegs; // Set of callee-saved GPR registers saved in the funclet prolog (includes LR)
+ regMaskFloat fiSaveFloatRegs; // Set of callee-saved float registers saved in the funclet prolog (includes LR)
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ regMaskPredicate fiSavePredicateRegs; // Set of callee-saved predicate registers saved in the funclet prolog
+ // (includes LR)
+#endif
int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function
// (negative)
int fiSP_to_FPLR_save_delta; // FP/LR register save offset from SP (positive)
@@ -444,7 +455,7 @@ class CodeGen final : public CodeGenInterface
// and used by all funclet prologs and epilogs, which must all be the same.
struct FuncletFrameInfoDsc
{
- regMaskTP fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes RA)
+ regMaskMixed fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes RA)
int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function
// (negative)
int fiSP_to_CalleeSaved_delta; // CalleeSaved register save offset from SP (positive)
@@ -463,7 +474,7 @@ class CodeGen final : public CodeGenInterface
// and used by all funclet prologs and epilogs, which must all be the same.
struct FuncletFrameInfoDsc
{
- regMaskTP fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes RA)
+ regMaskMixed fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes RA)
int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function
// (negative)
int fiSP_to_CalleeSaved_delta; // CalleeSaved register save offset from SP (positive)
@@ -485,7 +496,7 @@ class CodeGen final : public CodeGenInterface
#endif // TARGET_XARCH
- void genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& initDblRegs, const regNumber& initReg);
+ void genZeroInitFltRegs(const regMaskFloat& initFltRegs, const regMaskFloat& initDblRegs, const regNumber& initReg);
regNumber genGetZeroReg(regNumber initReg, bool* pInitRegZeroed);
@@ -533,7 +544,7 @@ class CodeGen final : public CodeGenInterface
//
#if defined(TARGET_ARM)
- bool genCanUsePopToReturn(regMaskTP maskPopRegsInt, bool jmpEpilog);
+ bool genCanUsePopToReturn(bool jmpEpilog);
#endif
#if defined(TARGET_ARM64)
@@ -545,7 +556,7 @@ class CodeGen final : public CodeGenInterface
void genPopCalleeSavedRegisters(bool jmpEpilog = false);
#if defined(TARGET_XARCH)
- unsigned genPopCalleeSavedRegistersFromMask(regMaskTP rsPopRegs);
+ unsigned genPopCalleeSavedRegistersFromMask(regMaskGpr rsPopRegs);
#endif // !defined(TARGET_XARCH)
#endif // !defined(TARGET_ARM64)
@@ -671,10 +682,10 @@ class CodeGen final : public CodeGenInterface
//
//-------------------------------------------------------------------------
- void genSinglePush();
- void genSinglePop();
- regMaskTP genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs);
- void genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs);
+ void genSinglePush();
+ void genSinglePop();
+ regMaskGpr genPushRegs(regMaskGpr regs, regMaskGpr* byrefRegs, regMaskGpr* noRefRegs);
+ void genPopRegs(regMaskGpr regs, regMaskGpr byrefRegs, regMaskGpr noRefRegs);
/*
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp
index 2c010f116a2657..63287bacebcba8 100644
--- a/src/coreclr/jit/codegenarm.cpp
+++ b/src/coreclr/jit/codegenarm.cpp
@@ -1641,7 +1641,7 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
else
{
GetEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, callTargetReg, (ssize_t)pAddr);
- regSet.verifyRegUsed(callTargetReg);
+ regSet.verifyGprRegUsed(callTargetReg);
}
GetEmitter()->emitIns_Call(emitter::EC_INDIR_R, compiler->eeFindHelper(helper),
@@ -1663,7 +1663,7 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
);
}
- regSet.verifyRegistersUsed(RBM_CALLEE_TRASH);
+ regSet.verifyRegistersUsed(compiler->AllRegsMask_CALLEE_TRASH);
}
#ifdef PROFILING_SUPPORTED
@@ -1692,14 +1692,14 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
// On Arm arguments are prespilled on stack, which frees r0-r3.
// For generating Enter callout we would need two registers and one of them has to be r0 to pass profiler handle.
// The call target register could be any free register.
- regNumber argReg = REG_PROFILER_ENTER_ARG;
- regMaskTP argRegMask = genRegMask(argReg);
+ regNumber argReg = REG_PROFILER_ENTER_ARG;
+ regMaskGpr argRegMask = genRegMask(argReg);
assert((regSet.rsMaskPreSpillRegArg & argRegMask) != 0);
if (compiler->compProfilerMethHndIndirected)
{
GetEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
- regSet.verifyRegUsed(argReg);
+ regSet.verifyGprRegUsed(argReg);
}
else
{
@@ -1801,7 +1801,7 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper)
// profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract.
GetEmitter()->emitIns_Mov(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_R0, /* canSkip */ false);
genTransferRegGCState(REG_PROFILER_RET_SCRATCH, REG_R0);
- regSet.verifyRegUsed(REG_PROFILER_RET_SCRATCH);
+ regSet.verifyGprRegUsed(REG_PROFILER_RET_SCRATCH);
}
if (compiler->compProfilerMethHndIndirected)
@@ -1813,8 +1813,8 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper)
instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
}
- gcInfo.gcMarkRegSetNpt(RBM_R0);
- regSet.verifyRegUsed(REG_R0);
+ gcInfo.gcMarkGprRegNpt(REG_R0);
+ regSet.verifyGprRegUsed(REG_R0);
genEmitHelperCall(helper,
0, // argSize
@@ -1825,7 +1825,9 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper)
{
GetEmitter()->emitIns_Mov(INS_mov, attr, REG_R0, REG_PROFILER_RET_SCRATCH, /* canSkip */ false);
genTransferRegGCState(REG_R0, REG_PROFILER_RET_SCRATCH);
- gcInfo.gcMarkRegSetNpt(RBM_PROFILER_RET_SCRATCH);
+ assert(compiler->IsGprRegMask(compiler->AllRegsMask_PROFILER_RET_SCRATCH.gprRegs()));
+ assert(compiler->AllRegsMask_PROFILER_RET_SCRATCH.floatRegs(compiler) == RBM_NONE);
+ gcInfo.gcMarkRegSetNpt(compiler->AllRegsMask_PROFILER_RET_SCRATCH.gprRegs());
}
}
@@ -1872,7 +1874,10 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
// Return value:
// None
//
-void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
+void CodeGen::genAllocLclFrame(unsigned frameSize,
+ regNumber initReg,
+ bool* pInitRegZeroed,
+ regMaskGpr maskArgRegsLiveIn)
{
assert(compiler->compGeneratingProlog);
@@ -1908,9 +1913,9 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
genInstrWithConstant(INS_sub, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, frameSize,
INS_FLAGS_DONT_CARE, REG_STACK_PROBE_HELPER_ARG);
- regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG);
+ regSet.verifyGprRegUsed(REG_STACK_PROBE_HELPER_ARG);
genEmitHelperCall(CORINFO_HELP_STACK_PROBE, 0, EA_UNKNOWN, REG_STACK_PROBE_HELPER_CALL_TARGET);
- regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_CALL_TARGET);
+ regSet.verifyGprRegUsed(REG_STACK_PROBE_HELPER_CALL_TARGET);
compiler->unwindPadding();
GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG, /* canSkip */ false);
@@ -1924,15 +1929,15 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
compiler->unwindAllocStack(frameSize);
}
-void CodeGen::genPushFltRegs(regMaskTP regMask)
+void CodeGen::genPushFltRegs(regMaskFloat regMask)
{
- assert(regMask != 0); // Don't call uness we have some registers to push
- assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
+ assert(regMask != 0); // Don't call unless we have some registers to push
+ assert(compiler->IsFloatRegMask(regMask)); // Only floating point registers should be in regMask
regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
int slots = genCountBits(regMask);
// regMask should be contiguously set
- regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
+ regMaskFloat tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
assert((tmpMask & (tmpMask - 1)) == 0);
assert(lowReg == REG_F16); // Currently we expect to start at F16 in the unwind codes
@@ -1943,15 +1948,15 @@ void CodeGen::genPushFltRegs(regMaskTP regMask)
GetEmitter()->emitIns_R_I(INS_vpush, EA_8BYTE, lowReg, slots / 2);
}
-void CodeGen::genPopFltRegs(regMaskTP regMask)
+void CodeGen::genPopFltRegs(regMaskFloat regMask)
{
- assert(regMask != 0); // Don't call uness we have some registers to pop
- assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
+ assert(regMask != 0); // Don't call uness we have some registers to pop
+ assert(compiler->IsFloatRegMask(regMask)); // Only floasting point registers should be in regMask
regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
int slots = genCountBits(regMask);
// regMask should be contiguously set
- regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
+ regMaskFloat tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
assert((tmpMask & (tmpMask - 1)) == 0);
// Our calling convention requires that we only use vpop for TYP_DOUBLE registers
@@ -2081,7 +2086,7 @@ void CodeGen::genMov32RelocatableImmediate(emitAttr size, BYTE* addr, regNumber
* instead of using "sub sp" / "add sp". Returns RBM_NONE if either frame size
* is zero, or if we should use "sub sp" / "add sp" instead of push/pop.
*/
-regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat)
+regMaskGpr CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskFloat maskCalleeSavedFloat)
{
assert(compiler->compGeneratingProlog || compiler->compGeneratingEpilog);
@@ -2090,6 +2095,8 @@ regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskC
if (maskCalleeSavedFloat != RBM_NONE)
return RBM_NONE;
+ assert(compiler->IsFloatRegMask(maskCalleeSavedFloat));
+
// Allocate space for small frames by pushing extra registers. It generates smaller and faster code
// that extra sub sp,XXX/add sp,XXX.
// R0 and R1 may be used by return value. Keep things simple and just skip the optimization
@@ -2138,7 +2145,7 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind)
}
}
-bool CodeGen::genCanUsePopToReturn(regMaskTP maskPopRegsInt, bool jmpEpilog)
+bool CodeGen::genCanUsePopToReturn(bool jmpEpilog)
{
assert(compiler->compGeneratingEpilog);
@@ -2152,9 +2159,8 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
{
assert(compiler->compGeneratingEpilog);
- regMaskTP maskPopRegs = regSet.rsGetModifiedCalleeSavedRegsMask();
- regMaskTP maskPopRegsFloat = maskPopRegs & RBM_ALLFLOAT;
- regMaskTP maskPopRegsInt = maskPopRegs & ~maskPopRegsFloat;
+ regMaskFloat maskPopRegsFloat = regSet.rsGetModifiedCalleeSavedRegsMask().GetGprFloatCombinedMask() & RBM_ALLFLOAT;
+ regMaskGpr maskPopRegsInt = regSet.rsGetModifiedCalleeSavedRegsMask().gprRegs();
// First, pop float registers
@@ -2168,7 +2174,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
if (!jmpEpilog)
{
- regMaskTP maskStackAlloc = genStackAllocRegisterMask(compiler->compLclFrameSize, maskPopRegsFloat);
+ regMaskGpr maskStackAlloc = genStackAllocRegisterMask(compiler->compLclFrameSize, maskPopRegsFloat);
maskPopRegsInt |= maskStackAlloc;
}
@@ -2178,7 +2184,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
maskPopRegsInt |= RBM_FPBASE;
}
- if (genCanUsePopToReturn(maskPopRegsInt, jmpEpilog))
+ if (genCanUsePopToReturn(jmpEpilog))
{
maskPopRegsInt |= RBM_PC;
// Record the fact that we use a pop to the PC to perform the return
@@ -2313,10 +2319,10 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
compiler->unwindBegProlog();
- regMaskTP maskPushRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
- regMaskTP maskPushRegsInt = genFuncletInfo.fiSaveRegs & ~maskPushRegsFloat;
+ regMaskFloat maskPushRegsFloat = genFuncletInfo.fiSaveFloatRegs;
+ regMaskGpr maskPushRegsInt = genFuncletInfo.fiSaveGprRegs;
- regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPushRegsFloat);
+ regMaskGpr maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPushRegsFloat);
maskPushRegsInt |= maskStackAlloc;
assert(FitsIn(maskPushRegsInt));
@@ -2331,7 +2337,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
bool isFilter = (block->bbCatchTyp == BBCT_FILTER);
- regMaskTP maskArgRegsLiveIn;
+ regMaskGpr maskArgRegsLiveIn;
if (isFilter)
{
maskArgRegsLiveIn = RBM_R0 | RBM_R1;
@@ -2367,7 +2373,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
// This is the first block of a filter
GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, REG_R1, REG_R1, genFuncletInfo.fiPSP_slot_CallerSP_offset);
- regSet.verifyRegUsed(REG_R1);
+ regSet.verifyGprRegUsed(REG_R1);
GetEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_R1, REG_SPBASE, genFuncletInfo.fiPSP_slot_SP_offset);
GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_FPBASE, REG_R1,
genFuncletInfo.fiFunctionCallerSPtoFPdelta);
@@ -2377,7 +2383,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
// This is a non-filter funclet
GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE,
genFuncletInfo.fiFunctionCallerSPtoFPdelta);
- regSet.verifyRegUsed(REG_R3);
+ regSet.verifyGprRegUsed(REG_R3);
GetEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_R3, REG_SPBASE, genFuncletInfo.fiPSP_slot_SP_offset);
}
}
@@ -2407,12 +2413,12 @@ void CodeGen::genFuncletEpilog()
bool unwindStarted = false;
/* The saved regs info saves the LR register. We need to pop the PC register to return */
- assert(genFuncletInfo.fiSaveRegs & RBM_LR);
+ assert(genFuncletInfo.fiSaveGprRegs & RBM_LR);
- regMaskTP maskPopRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
- regMaskTP maskPopRegsInt = genFuncletInfo.fiSaveRegs & ~maskPopRegsFloat;
+ regMaskFloat maskPopRegsFloat = genFuncletInfo.fiSaveFloatRegs;
+ regMaskGpr maskPopRegsInt = genFuncletInfo.fiSaveGprRegs;
- regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPopRegsFloat);
+ regMaskGpr maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPopRegsFloat);
maskPopRegsInt |= maskStackAlloc;
if (maskStackAlloc == RBM_NONE)
@@ -2465,14 +2471,15 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
// of adding the number of callee-saved regs to CallerSP, we add 1 for lr and 1 for r11
// (plus the "pre spill regs"). Note that we assume r12 and r13 aren't saved
// (also assumed in genFnProlog()).
- assert((regSet.rsMaskCalleeSaved & (RBM_R12 | RBM_R13)) == 0);
+ assert((regSet.rsGprMaskCalleeSaved & (RBM_R12 | RBM_R13)) == 0);
unsigned preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
genFuncletInfo.fiFunctionCallerSPtoFPdelta = preSpillRegArgSize + 2 * REGSIZE_BYTES;
- regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
- unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
- unsigned saveRegsSize = saveRegsCount * REGSIZE_BYTES; // bytes of regs we're saving
- unsigned saveSizeWithPSP = saveRegsSize + REGSIZE_BYTES /* PSP sym */;
+ regMaskGpr rsGprMaskSaveRegs = regSet.rsGprMaskCalleeSaved;
+ regMaskFloat rsFloatMaskSaveRegs = regSet.rsFloatMaskCalleeSaved;
+ unsigned saveRegsCount = genCountBits(rsGprMaskSaveRegs) + genCountBits(rsFloatMaskSaveRegs);
+ unsigned saveRegsSize = saveRegsCount * REGSIZE_BYTES; // bytes of regs we're saving
+ unsigned saveSizeWithPSP = saveRegsSize + REGSIZE_BYTES /* PSP sym */;
if (compiler->lvaMonAcquired != BAD_VAR_NUM)
{
saveSizeWithPSP += TARGET_POINTER_SIZE;
@@ -2490,7 +2497,8 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
/* Now save it for future use */
- genFuncletInfo.fiSaveRegs = rsMaskSaveRegs;
+ genFuncletInfo.fiSaveGprRegs = rsGprMaskSaveRegs;
+ genFuncletInfo.fiSaveFloatRegs = rsFloatMaskSaveRegs;
genFuncletInfo.fiSpDelta = spDelta;
genFuncletInfo.fiPSP_slot_SP_offset = PSP_slot_SP_offset;
genFuncletInfo.fiPSP_slot_CallerSP_offset = PSP_slot_CallerSP_offset;
@@ -2502,7 +2510,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
printf("Funclet prolog / epilog info\n");
printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunctionCallerSPtoFPdelta);
printf(" Save regs: ");
- dspRegMask(rsMaskSaveRegs);
+ dspRegMask(AllRegsMask(rsGprMaskSaveRegs, rsFloatMaskSaveRegs));
printf("\n");
printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta);
printf(" PSP slot SP offset: %d\n", genFuncletInfo.fiPSP_slot_SP_offset);
@@ -2623,11 +2631,11 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu
//
// str rZero1,[rAddr] // When cnt is odd
- regNumber rAddr;
- regNumber rCnt = REG_NA; // Invalid
- regMaskTP regMask;
+ regNumber rAddr;
+ regNumber rCnt = REG_NA; // Invalid
+ regMaskGpr regMask;
- regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers
+ regMaskGpr availMask = regSet.rsGetModifiedGprRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers
availMask &= ~intRegState.rsCalleeRegArgMaskLiveIn; // Remove all of the incoming argument registers as they are
// currently live
availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for
diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp
index cd1b1558d93e64..11c468fb6f3322 100644
--- a/src/coreclr/jit/codegenarm64.cpp
+++ b/src/coreclr/jit/codegenarm64.cpp
@@ -36,16 +36,19 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
{
assert(compiler->compGeneratingEpilog);
- regMaskTP rsRestoreRegs = regSet.rsGetModifiedCalleeSavedRegsMask();
+ regMaskFloat rsRestoreFloatRegs =
+ regSet.rsGetModifiedCalleeSavedRegsMask().GetGprFloatCombinedMask() & RBM_ALLFLOAT;
+ regMaskGpr rsRestoreGprRegs = regSet.rsGetModifiedCalleeSavedRegsMask().gprRegs();
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ regMaskPredicate rsRestorePredicateRegs = regSet.rsGetModifiedPredicateRegsMask() & RBM_MSK_CALLEE_SAVED;
+#endif
if (isFramePointerUsed())
{
- rsRestoreRegs |= RBM_FPBASE;
+ rsRestoreGprRegs |= RBM_FPBASE;
}
- rsRestoreRegs |= RBM_LR; // We must save/restore the return address (in the LR register)
-
- regMaskTP regsToRestoreMask = rsRestoreRegs;
+ rsRestoreGprRegs |= RBM_LR; // We must save/restore the return address (in the LR register)
const int totalFrameSize = genTotalFrameSize();
@@ -71,7 +74,7 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
compiler->unwindSetFrameReg(REG_FPBASE, 0);
}
- regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
+ rsRestoreGprRegs &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
break;
}
@@ -91,7 +94,7 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta);
}
- regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
+ rsRestoreGprRegs &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
break;
}
@@ -104,7 +107,7 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
JITDUMP(" calleeSaveSpDelta=%d\n", calleeSaveSpDelta);
- regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and (hopefully) post-index SP.
+ rsRestoreGprRegs &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and (hopefully) post-index SP.
int remainingFrameSz = totalFrameSize - calleeSaveSpDelta;
assert(remainingFrameSz > 0);
@@ -203,7 +206,13 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
}
JITDUMP(" calleeSaveSpOffset=%d, calleeSaveSpDelta=%d\n", calleeSaveSpOffset, calleeSaveSpDelta);
- genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSpOffset, calleeSaveSpDelta);
+ genRestoreCalleeSavedRegistersHelp(AllRegsMask(rsRestoreGprRegs, rsRestoreFloatRegs
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ ,
+ rsRestorePredicateRegs
+#endif
+ ),
+ calleeSaveSpOffset, calleeSaveSpDelta);
switch (frameType)
{
@@ -718,7 +727,8 @@ void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, reg
// no return value; the regStack argument is modified.
//
// static
-void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack* regStack)
+void CodeGen::genBuildRegPairsStack(regMaskOnlyOne regsMask,
+ ArrayStack* regStack MORE_THAN_64_REG_ARG(var_types type))
{
assert(regStack != nullptr);
assert(regStack->Height() == 0);
@@ -727,13 +737,13 @@ void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack* reg
while (regsMask != RBM_NONE)
{
- regNumber reg1 = genFirstRegNumFromMaskAndToggle(regsMask);
+ regNumber reg1 = genFirstRegNumFromMaskAndToggle(regsMask MORE_THAN_64_REG_ARG(type));
regsCount -= 1;
bool isPairSave = false;
if (regsCount > 0)
{
- regNumber reg2 = genFirstRegNumFromMask(regsMask);
+ regNumber reg2 = genFirstRegNumFromMask(regsMask MORE_THAN_64_REG_ARG(type));
if (reg2 == REG_NEXT(reg1))
{
// The JIT doesn't allow saving pair (R28,FP), even though the
@@ -820,7 +830,7 @@ void CodeGen::genSetUseSaveNextPairs(ArrayStack* regStack)
// Note: Because int and float register type sizes match we can call this function with a mask that includes both.
//
// static
-int CodeGen::genGetSlotSizeForRegsInMask(regMaskTP regsMask)
+int CodeGen::genGetSlotSizeForRegsInMask(regMaskOnlyOne regsMask)
{
assert((regsMask & (RBM_CALLEE_SAVED | RBM_FP | RBM_LR)) == regsMask); // Do not expect anything else.
@@ -835,13 +845,18 @@ int CodeGen::genGetSlotSizeForRegsInMask(regMaskTP regsMask)
// regsMask - a mask of registers for prolog generation;
// spDelta - if non-zero, the amount to add to SP before the first register save (or together with it);
// spOffset - the offset from SP that is the beginning of the callee-saved register area;
+// type - The type of `regsMask` we are operating on.
//
-void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset)
+void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskOnlyOne regsMask,
+ int spDelta,
+ int spOffset MORE_THAN_64_REG_ARG(var_types type))
{
+ assert(compiler->IsOnlyOneRegMask(regsMask));
+
const int slotSize = genGetSlotSizeForRegsInMask(regsMask);
ArrayStack regStack(compiler->getAllocator(CMK_Codegen));
- genBuildRegPairsStack(regsMask, ®Stack);
+ genBuildRegPairsStack(regsMask, ®Stack MORE_THAN_64_REG_ARG(type));
for (int i = 0; i < regStack.Height(); ++i)
{
@@ -902,12 +917,27 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i
// The save set can contain LR in which case LR is saved along with the other callee-saved registers.
// But currently Jit doesn't use frames without frame pointer on arm64.
//
-void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta)
+void CodeGen::genSaveCalleeSavedRegistersHelp(CONSTREF_AllRegsMask regsToSaveMask,
+ int lowestCalleeSavedOffset,
+ int spDelta)
{
assert(spDelta <= 0);
assert(-spDelta <= STACK_PROBE_BOUNDARY_THRESHOLD_BYTES);
- unsigned regsToSaveCount = genCountBits(regsToSaveMask);
+ regMaskGpr maskSaveRegsInt = regsToSaveMask.gprRegs();
+ regMaskFloat maskSaveRegsFloat = regsToSaveMask.floatRegs(compiler);
+
+ assert(compiler->IsGprRegMask(maskSaveRegsInt));
+ assert(compiler->IsFloatRegMask(maskSaveRegsFloat));
+
+ unsigned regsToSaveCount = genCountBits(maskSaveRegsFloat) + genCountBits(maskSaveRegsInt);
+
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ regMaskPredicate maskSaveRegsPredicate = regsToSaveMask.predicateRegs(compiler);
+ assert(compiler->IsPredicateRegMask(maskSaveRegsPredicate));
+ regsToSaveCount += genCountBits(maskSaveRegsPredicate);
+#endif
+
if (regsToSaveCount == 0)
{
if (spDelta != 0)
@@ -924,21 +954,29 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe
// We also can save FP and LR, even though they are not in RBM_CALLEE_SAVED.
assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED | RBM_FP | RBM_LR));
- // Save integer registers at higher addresses than floating-point registers.
-
- regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT;
- regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat;
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ if (maskSaveRegsPredicate != RBM_NONE)
+ {
+ genSaveCalleeSavedRegisterGroup(maskSaveRegsPredicate, spDelta,
+ lowestCalleeSavedOffset MORE_THAN_64_REG_ARG(TYP_MASK));
+ spDelta = 0;
+ lowestCalleeSavedOffset += genCountBits(maskSaveRegsPredicate) * FPSAVE_REGSIZE_BYTES;
+ }
+#endif // FEATURE_MASKED_HW_INTRINSICS
+ // Save integer registers at higher addresses than floating-point registers.
if (maskSaveRegsFloat != RBM_NONE)
{
- genSaveCalleeSavedRegisterGroup(maskSaveRegsFloat, spDelta, lowestCalleeSavedOffset);
+ genSaveCalleeSavedRegisterGroup(maskSaveRegsFloat, spDelta,
+ lowestCalleeSavedOffset MORE_THAN_64_REG_ARG(TYP_FLOAT));
spDelta = 0;
lowestCalleeSavedOffset += genCountBits(maskSaveRegsFloat) * FPSAVE_REGSIZE_BYTES;
}
if (maskSaveRegsInt != RBM_NONE)
{
- genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta, lowestCalleeSavedOffset);
+ genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta,
+ lowestCalleeSavedOffset MORE_THAN_64_REG_ARG(TYP_INT));
// No need to update spDelta, lowestCalleeSavedOffset since they're not used after this.
}
}
@@ -949,14 +987,17 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe
// Arguments:
// regsMask - a mask of registers for epilog generation;
// spDelta - if non-zero, the amount to add to SP after the last register restore (or together with it);
-// spOffset - the offset from SP that is the beginning of the callee-saved register area;
+// spOffset - the offset from SP that is the beginning of the callee-saved register area;.
+// type - The type of `regsMask` we are operating on.
//
-void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset)
+void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskOnlyOne regsMask,
+ int spDelta,
+ int spOffset MORE_THAN_64_REG_ARG(var_types type))
{
const int slotSize = genGetSlotSizeForRegsInMask(regsMask);
ArrayStack regStack(compiler->getAllocator(CMK_Codegen));
- genBuildRegPairsStack(regsMask, ®Stack);
+ genBuildRegPairsStack(regsMask, ®Stack MORE_THAN_64_REG_ARG(type));
int stackDelta = 0;
for (int i = 0; i < regStack.Height(); ++i)
@@ -1017,10 +1058,26 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta
// Return Value:
// None.
-void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta)
+void CodeGen::genRestoreCalleeSavedRegistersHelp(CONSTREF_AllRegsMask regsToRestoreMask,
+ int lowestCalleeSavedOffset,
+ int spDelta)
{
assert(spDelta >= 0);
- unsigned regsToRestoreCount = genCountBits(regsToRestoreMask);
+
+ regMaskGpr maskRestoreRegsInt = regsToRestoreMask.gprRegs();
+ regMaskFloat maskRestoreRegsFloat = regsToRestoreMask.floatRegs(compiler);
+
+ assert(compiler->IsGprRegMask(maskRestoreRegsInt));
+ assert(compiler->IsFloatRegMask(maskRestoreRegsFloat));
+
+ unsigned regsToRestoreCount = genCountBits(maskRestoreRegsInt) + genCountBits(maskRestoreRegsFloat);
+
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ regMaskPredicate maskRestoreRegsPredicate = regsToRestoreMask.predicateRegs(compiler);
+ assert(compiler->IsPredicateRegMask(maskRestoreRegsPredicate));
+ regsToRestoreCount += genCountBits(maskRestoreRegsPredicate);
+#endif
+
if (regsToRestoreCount == 0)
{
if (spDelta != 0)
@@ -1043,24 +1100,28 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
// Save integer registers at higher addresses than floating-point registers.
- regMaskTP maskRestoreRegsFloat = regsToRestoreMask & RBM_ALLFLOAT;
- regMaskTP maskRestoreRegsInt = regsToRestoreMask & ~maskRestoreRegsFloat;
-
// Restore in the opposite order of saving.
-
if (maskRestoreRegsInt != RBM_NONE)
{
int spIntDelta = (maskRestoreRegsFloat != RBM_NONE) ? 0 : spDelta; // should we delay the SP adjustment?
- genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, spIntDelta, spOffset);
+ genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, spIntDelta, spOffset MORE_THAN_64_REG_ARG(TYP_INT));
spOffset -= genCountBits(maskRestoreRegsInt) * REGSIZE_BYTES;
}
if (maskRestoreRegsFloat != RBM_NONE)
{
// If there is any spDelta, it must be used here.
- genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, spDelta, spOffset);
- // No need to update spOffset since it's not used after this.
+ genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, spDelta, spOffset MORE_THAN_64_REG_ARG(TYP_FLOAT));
+ spOffset -= genCountBits(maskRestoreRegsInt) * FPSAVE_REGSIZE_BYTES;
+ }
+
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ if (maskRestoreRegsPredicate != RBM_NONE)
+ {
+ // TODO: Do we need to adjust spDelta?
+ genRestoreCalleeSavedRegisterGroup(maskRestoreRegsPredicate, spDelta, spOffset MORE_THAN_64_REG_ARG(TYP_MASK));
}
+#endif
}
// clang-format off
@@ -1368,8 +1429,12 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
compiler->unwindBegProlog();
- regMaskTP maskSaveRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
- regMaskTP maskSaveRegsInt = genFuncletInfo.fiSaveRegs & ~maskSaveRegsFloat;
+ regMaskFloat maskSaveRegsFloat = genFuncletInfo.fiSaveFloatRegs;
+ regMaskGpr maskSaveRegsInt = genFuncletInfo.fiSaveGprRegs;
+
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ regMaskPredicate maskSaveRegsPredicate = genFuncletInfo.fiSavePredicateRegs;
+#endif
// Funclets must always save LR and FP, since when we have funclets we must have an FP frame.
assert((maskSaveRegsInt & RBM_LR) != 0);
@@ -1377,7 +1442,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
bool isFilter = (block->bbCatchTyp == BBCT_FILTER);
- regMaskTP maskArgRegsLiveIn;
+ regMaskGpr maskArgRegsLiveIn;
if (isFilter)
{
maskArgRegsLiveIn = RBM_R0 | RBM_R1;
@@ -1486,7 +1551,14 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta +
genFuncletInfo.fiSpDelta2; // We haven't done the second adjustment of SP yet (if any)
- genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, lowestCalleeSavedOffset, 0);
+
+ genSaveCalleeSavedRegistersHelp(AllRegsMask(maskSaveRegsInt, maskSaveRegsFloat
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ ,
+ maskSaveRegsPredicate
+#endif
+ ),
+ lowestCalleeSavedOffset, 0);
if ((genFuncletInfo.fiFrameType == 3) || (genFuncletInfo.fiFrameType == 5))
{
@@ -1522,7 +1594,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
// function)
genInstrWithConstant(INS_ldr, EA_PTRSIZE, REG_R1, REG_R1, genFuncletInfo.fiCallerSP_to_PSP_slot_delta,
REG_R2, false);
- regSet.verifyRegUsed(REG_R1);
+ regSet.verifyGprRegUsed(REG_R1);
// Store the PSP value (aka CallerSP)
genInstrWithConstant(INS_str, EA_PTRSIZE, REG_R1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2,
@@ -1539,7 +1611,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
// compute the CallerSP, given the frame pointer. x3 is scratch.
genInstrWithConstant(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE,
-genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_R2, false);
- regSet.verifyRegUsed(REG_R3);
+ regSet.verifyGprRegUsed(REG_R3);
genInstrWithConstant(INS_str, EA_PTRSIZE, REG_R3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2,
false);
@@ -1572,8 +1644,12 @@ void CodeGen::genFuncletEpilog()
unwindStarted = true;
}
- regMaskTP maskRestoreRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
- regMaskTP maskRestoreRegsInt = genFuncletInfo.fiSaveRegs & ~maskRestoreRegsFloat;
+ regMaskFloat maskRestoreRegsFloat = genFuncletInfo.fiSaveFloatRegs;
+ regMaskGpr maskRestoreRegsInt = genFuncletInfo.fiSaveGprRegs;
+
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ regMaskPredicate maskRestoreRegsPredicate = genFuncletInfo.fiSavePredicateRegs;
+#endif
// Funclets must always save LR and FP, since when we have funclets we must have an FP frame.
assert((maskRestoreRegsInt & RBM_LR) != 0);
@@ -1596,13 +1672,18 @@ void CodeGen::genFuncletEpilog()
}
}
- regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat;
if ((genFuncletInfo.fiFrameType == 1) || (genFuncletInfo.fiFrameType == 2) || (genFuncletInfo.fiFrameType == 3))
{
- regsToRestoreMask &= ~(RBM_LR | RBM_FP); // We restore FP/LR at the end
+ maskRestoreRegsInt &= ~(RBM_LR | RBM_FP); // We restore FP/LR at the end
}
int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta + genFuncletInfo.fiSpDelta2;
- genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, lowestCalleeSavedOffset, 0);
+ genRestoreCalleeSavedRegistersHelp(AllRegsMask(maskRestoreRegsInt, maskRestoreRegsFloat
+#ifdef FEATURE_MASKED_HW_INSTRINSICS
+ ,
+ maskRestoreRegsPredicate
+#endif
+ ),
+ lowestCalleeSavedOffset, 0);
if (genFuncletInfo.fiFrameType == 1)
{
@@ -1733,11 +1814,18 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta() - osrPad;
- regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
- assert((rsMaskSaveRegs & RBM_LR) != 0);
- assert((rsMaskSaveRegs & RBM_FP) != 0);
+ regMaskGpr rsMaskSaveGprRegs = regSet.rsGprMaskCalleeSaved;
+ regMaskFloat rsMaskSaveFloatRegs = regSet.rsFloatMaskCalleeSaved;
+ regMaskPredicate rsMaskSavePredicateRegs = RBM_NONE;
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ rsMaskSavePredicateRegs = regSet.rsPredicateMaskCalleeSaved;
+#endif
+
+ assert((rsMaskSaveGprRegs & RBM_LR) != 0);
+ assert((rsMaskSaveGprRegs & RBM_FP) != 0);
- unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
+ unsigned saveRegsCount =
+ genCountBits(rsMaskSaveGprRegs) + genCountBits(rsMaskSaveFloatRegs) + genCountBits(rsMaskSavePredicateRegs);
unsigned saveRegsPlusPSPSize = saveRegsCount * REGSIZE_BYTES + PSPSize;
if (compiler->info.compIsVarArgs)
{
@@ -1856,7 +1944,11 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
/* Now save it for future use */
- genFuncletInfo.fiSaveRegs = rsMaskSaveRegs;
+ genFuncletInfo.fiSaveGprRegs = rsMaskSaveGprRegs;
+ genFuncletInfo.fiSaveFloatRegs = rsMaskSaveFloatRegs;
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ genFuncletInfo.fiSavePredicateRegs = rsMaskSavePredicateRegs;
+#endif
genFuncletInfo.fiSP_to_FPLR_save_delta = SP_to_FPLR_save_delta;
genFuncletInfo.fiSP_to_PSP_slot_delta = SP_to_PSP_slot_delta;
genFuncletInfo.fiSP_to_CalleeSave_delta = SP_to_PSP_slot_delta + PSPSize;
@@ -1868,7 +1960,12 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
printf("\n");
printf("Funclet prolog / epilog info\n");
printf(" Save regs: ");
- dspRegMask(genFuncletInfo.fiSaveRegs);
+ dspRegMask(AllRegsMask(genFuncletInfo.fiSaveGprRegs, genFuncletInfo.fiSaveFloatRegs
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ ,
+ genFuncletInfo.fiSavePredicateRegs
+#endif
+ ));
printf("\n");
if (compiler->opts.IsOSR())
{
@@ -3951,7 +4048,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
instGen_MemoryBarrier();
- gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask());
+ gcInfo.gcMarkRegSetNpt(addr->gtGetGprRegMask());
}
if (targetReg != REG_NA)
@@ -4101,7 +4198,7 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)
instGen_MemoryBarrier();
- gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask());
+ gcInfo.gcMarkRegSetNpt(addr->gtGetGprRegMask());
}
if (varTypeIsSmall(treeNode->TypeGet()) && varTypeIsSigned(treeNode->TypeGet()))
@@ -4345,10 +4442,14 @@ void CodeGen::genCodeForSwap(GenTreeOp* tree)
// FP swap is not yet implemented (and should have NYI'd in LSRA)
assert(!varTypeIsFloating(type1));
- regNumber oldOp1Reg = lcl1->GetRegNum();
- regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg);
- regNumber oldOp2Reg = lcl2->GetRegNum();
- regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg);
+ regNumber oldOp1Reg = lcl1->GetRegNum();
+ regNumber oldOp2Reg = lcl2->GetRegNum();
+
+ regMaskGpr oldOp1RegMask = genRegMask(oldOp1Reg);
+ regMaskGpr oldOp2RegMask = genRegMask(oldOp2Reg);
+
+ assert(compiler->IsGprRegMask(oldOp1RegMask));
+ assert(compiler->IsGprRegMask(oldOp2RegMask));
// We don't call genUpdateVarReg because we don't have a tree node with the new register.
varDsc1->SetRegNum(oldOp2Reg);
@@ -5109,11 +5210,11 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
}
- regMaskTP callTargetMask = genRegMask(callTargetReg);
- regMaskTP callKillSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
+ regMaskGpr callTargetMask = genRegMask(callTargetReg);
+ CONSTREF_AllRegsMask callKillSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
// assert that all registers in callTargetMask are in the callKillSet
- noway_assert((callTargetMask & callKillSet) == callTargetMask);
+ noway_assert((callTargetMask & callKillSet.gprRegs()) == callTargetMask);
callTarget = callTargetReg;
@@ -5132,7 +5233,7 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
false /* isJump */
);
- regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
+ CONSTREF_AllRegsMask killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
regSet.verifyRegistersUsed(killMask);
}
@@ -5432,6 +5533,7 @@ void CodeGen::genStoreLclTypeSimd12(GenTreeLclVarCommon* treeNode)
void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
{
assert(compiler->compGeneratingProlog);
+ assert(genIsValidIntReg(initReg));
if (!compiler->compIsProfilerHookNeeded())
{
@@ -5458,8 +5560,9 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
// If initReg is trashed, either because it was an arg to the enter
// callback, or because the enter callback itself trashes it, then it needs
// to be zero'ed again before using.
- if (((RBM_PROFILER_ENTER_TRASH | RBM_PROFILER_ENTER_ARG_FUNC_ID | RBM_PROFILER_ENTER_ARG_CALLER_SP) &
- genRegMask(initReg)) != RBM_NONE)
+ AllRegsMask profileEnterTrash = compiler->AllRegsMask_PROFILER_ENTER_TRASH;
+ profileEnterTrash.AddRegMaskForType(RBM_PROFILER_ENTER_ARG_FUNC_ID | RBM_PROFILER_ENTER_ARG_CALLER_SP, TYP_INT);
+ if (profileEnterTrash.IsRegNumInMask(initReg))
{
*pInitRegZeroed = false;
}
@@ -5559,9 +5662,13 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
// Return value:
// None
//
-void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
+void CodeGen::genAllocLclFrame(unsigned frameSize,
+ regNumber initReg,
+ bool* pInitRegZeroed,
+ regMaskGpr maskArgRegsLiveIn)
{
assert(compiler->compGeneratingProlog);
+ assert(compiler->IsGprRegMask(maskArgRegsLiveIn));
if (frameSize == 0)
{
@@ -5617,18 +5724,18 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
// until this is complete since the tickles could cause a stack overflow, and we need to be able to crawl
// the stack afterward (which means the stack pointer needs to be known).
- regMaskTP availMask = RBM_ALLINT & (regSet.rsGetModifiedRegsMask() | ~RBM_INT_CALLEE_SAVED);
+ regMaskGpr availMask = regSet.rsGetModifiedGprRegsMask() | ~RBM_INT_CALLEE_SAVED;
availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers as they are currently live
availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg
- regNumber rOffset = initReg;
- regNumber rLimit;
- regMaskTP tempMask;
+ regNumber rOffset = initReg;
+ regNumber rLimit;
+ regMaskGpr tempMask;
// We pick the next lowest register number for rLimit
noway_assert(availMask != RBM_NONE);
tempMask = genFindLowestBit(availMask);
- rLimit = genRegNumFromMask(tempMask);
+ rLimit = genRegNumFromMask(tempMask MORE_THAN_64_REG_ARG(TYP_INT));
// Generate:
//
diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp
index a9e2a41f73f945..2dfc9d791a6a99 100644
--- a/src/coreclr/jit/codegenarmarch.cpp
+++ b/src/coreclr/jit/codegenarmarch.cpp
@@ -1754,7 +1754,7 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node)
GetEmitter()->emitIns_R_R_I(INS_add, emitActualTypeSize(node), node->GetRegNum(), node->GetRegNum(),
node->gtElemOffset);
- gcInfo.gcMarkRegSetNpt(base->gtGetRegMask());
+ gcInfo.gcMarkRegSetNpt(base->gtGetGprRegMask());
genProduceReg(node);
}
@@ -3266,7 +3266,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode)
#endif
inst_JMP(EJ_ne, loop);
- gcInfo.gcMarkRegSetNpt(genRegMask(dstReg));
+ gcInfo.gcMarkGprRegNpt(dstReg);
}
}
@@ -3394,15 +3394,16 @@ void CodeGen::genCall(GenTreeCall* call)
// We should not have GC pointers in killed registers live around the call.
// GC info for arg registers were cleared when consuming arg nodes above
// and LSRA should ensure it for other trashed registers.
- regMaskTP killMask = RBM_CALLEE_TRASH;
+ AllRegsMask killMask = compiler->AllRegsMask_CALLEE_TRASH;
+
if (call->IsHelperCall())
{
CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd);
killMask = compiler->compHelperCallKillSet(helpFunc);
}
- assert((gcInfo.gcRegGCrefSetCur & killMask) == 0);
- assert((gcInfo.gcRegByrefSetCur & killMask) == 0);
+ assert(!killMask.IsGprMaskPresent(gcInfo.gcRegGCrefSetCur));
+ assert(!killMask.IsGprMaskPresent(gcInfo.gcRegByrefSetCur));
#endif
var_types returnType = call->TypeGet();
@@ -3477,7 +3478,7 @@ void CodeGen::genCall(GenTreeCall* call)
// However, for minopts or debuggable code, we keep it live to support managed return value debugging.
if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
{
- gcInfo.gcMarkRegSetNpt(RBM_INTRET);
+ gcInfo.gcMarkGprRegNpt(REG_INTRET);
}
}
@@ -3537,28 +3538,49 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
if (call->IsFastTailCall())
{
- regMaskTP trashedByEpilog = RBM_CALLEE_SAVED;
+ regMaskGpr trashedGprByEpilog = RBM_INT_CALLEE_SAVED;
+ regMaskFloat trashedFloatByEpilog = RBM_FLT_CALLEE_SAVED;
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ regMaskPredicate trashedPredicateByEpilog = RBM_MSK_CALLEE_SAVED;
+#endif // FEATURE_MASKED_HW_INTRINSICS
// The epilog may use and trash REG_GSCOOKIE_TMP_0/1. Make sure we have no
// non-standard args that may be trash if this is a tailcall.
if (compiler->getNeedsGSSecurityCookie())
{
- trashedByEpilog |= genRegMask(REG_GSCOOKIE_TMP_0);
- trashedByEpilog |= genRegMask(REG_GSCOOKIE_TMP_1);
+ trashedGprByEpilog |= genRegMask(REG_GSCOOKIE_TMP_0);
+ trashedGprByEpilog |= genRegMask(REG_GSCOOKIE_TMP_1);
}
for (CallArg& arg : call->gtArgs.Args())
{
for (unsigned j = 0; j < arg.AbiInfo.NumRegs; j++)
{
- regNumber reg = arg.AbiInfo.GetRegNum(j);
- if ((trashedByEpilog & genRegMask(reg)) != 0)
+ regNumber reg = arg.AbiInfo.GetRegNum(j);
+ var_types argType = arg.AbiInfo.ArgType;
+ if (varTypeUsesIntReg(argType) && ((trashedGprByEpilog & genRegMask(reg)) != 0))
+ {
+ JITDUMP("Tail call node:\n");
+ DISPTREE(call);
+ JITDUMP("Gpr Register used: %s\n", getRegName(reg));
+ assert(!"Argument to tailcall may be trashed by epilog");
+ }
+ else if (varTypeUsesFloatArgReg(argType) && ((trashedFloatByEpilog & genRegMask(reg)) != 0))
{
JITDUMP("Tail call node:\n");
DISPTREE(call);
- JITDUMP("Register used: %s\n", getRegName(reg));
+ JITDUMP("Float Register used: %s\n", getRegName(reg));
assert(!"Argument to tailcall may be trashed by epilog");
}
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ else if (varTypeUsesMaskReg(argType) && ((trashedPredicateByEpilog & genRegMask(reg)) != 0))
+ {
+ JITDUMP("Tail call node:\n");
+ DISPTREE(call);
+ JITDUMP("Mask Register used: %s\n", getRegName(reg));
+ assert(!"Argument to tailcall may be trashed by epilog");
+ }
+#endif // FEATURE_MASKED_HW_INTRINSICS
}
}
}
@@ -3786,7 +3808,8 @@ void CodeGen::genJmpMethod(GenTree* jmp)
// are not frequent.
for (varNum = 0; varNum < compiler->info.compArgsCount; varNum++)
{
- varDsc = compiler->lvaGetDesc(varNum);
+ varDsc = compiler->lvaGetDesc(varNum);
+ regNumber varReg = varDsc->GetRegNum();
if (varDsc->lvPromoted)
{
@@ -3797,17 +3820,17 @@ void CodeGen::genJmpMethod(GenTree* jmp)
}
noway_assert(varDsc->lvIsParam);
- if (varDsc->lvIsRegArg && (varDsc->GetRegNum() != REG_STK))
+ if (varDsc->lvIsRegArg && (varReg != REG_STK))
{
// Skip reg args which are already in its right register for jmp call.
// If not, we will spill such args to their stack locations.
//
// If we need to generate a tail call profiler hook, then spill all
// arg regs to free them up for the callback.
- if (!compiler->compIsProfilerHookNeeded() && (varDsc->GetRegNum() == varDsc->GetArgReg()))
+ if (!compiler->compIsProfilerHookNeeded() && (varReg == varDsc->GetArgReg()))
continue;
}
- else if (varDsc->GetRegNum() == REG_STK)
+ else if (varReg == REG_STK)
{
// Skip args which are currently living in stack.
continue;
@@ -3816,7 +3839,7 @@ void CodeGen::genJmpMethod(GenTree* jmp)
// If we came here it means either a reg argument not in the right register or
// a stack argument currently living in a register. In either case the following
// assert should hold.
- assert(varDsc->GetRegNum() != REG_STK);
+ assert(varReg != REG_STK);
assert(varDsc->IsEnregisterableLcl());
var_types storeType = varDsc->GetStackSlotHomeType();
emitAttr storeSize = emitActualTypeSize(storeType);
@@ -3841,9 +3864,8 @@ void CodeGen::genJmpMethod(GenTree* jmp)
// Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live.
// Note that we cannot modify varDsc->GetRegNum() here because another basic block may not be expecting it.
// Therefore manually update life of varDsc->GetRegNum().
- regMaskTP tempMask = genRegMask(varDsc->GetRegNum());
- regSet.RemoveMaskVars(tempMask);
- gcInfo.gcMarkRegSetNpt(tempMask);
+ regSet.RemoveMaskVars(varDsc->TypeGet(), genRegMask(varReg));
+ gcInfo.gcMarkRegNpt(varReg);
if (compiler->lvaIsGCTracked(varDsc))
{
VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
@@ -3857,8 +3879,8 @@ void CodeGen::genJmpMethod(GenTree* jmp)
#endif
// Next move any un-enregistered register arguments back to their register.
- regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method.
- unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
+ regMaskGpr fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method.
+ unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
for (varNum = 0; varNum < compiler->info.compArgsCount; varNum++)
{
varDsc = compiler->lvaGetDesc(varNum);
@@ -3930,7 +3952,7 @@ void CodeGen::genJmpMethod(GenTree* jmp)
// expecting it. Therefore manually update life of argReg. Note that GT_JMP marks the end of
// the basic block and after which reg life and gc info will be recomputed for the new block
// in genCodeForBBList().
- regSet.AddMaskVars(genRegMask(argReg));
+ regSet.AddMaskVars(varDsc->TypeGet(), genRegMask(argReg));
gcInfo.gcMarkRegPtrVal(argReg, loadType);
if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs))
@@ -3942,7 +3964,7 @@ void CodeGen::genJmpMethod(GenTree* jmp)
loadSize = emitActualTypeSize(loadType);
GetEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argRegNext, varNum, TARGET_POINTER_SIZE);
- regSet.AddMaskVars(genRegMask(argRegNext));
+ regSet.AddMaskVars(varDsc->TypeGet(), genRegMask(argRegNext));
gcInfo.gcMarkRegPtrVal(argRegNext, loadType);
}
@@ -4040,7 +4062,7 @@ void CodeGen::genJmpMethod(GenTree* jmp)
GetEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, slotReg, varNum, ofs);
}
- regSet.AddMaskVars(genRegMask(slotReg));
+ regSet.AddMaskVars(varDsc->TypeGet(), genRegMask(slotReg));
gcInfo.gcMarkRegPtrVal(slotReg, loadType);
if (genIsValidIntReg(slotReg) && compiler->info.compIsVarArgs)
{
@@ -4059,7 +4081,7 @@ void CodeGen::genJmpMethod(GenTree* jmp)
GetEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
}
- regSet.AddMaskVars(genRegMask(argReg));
+ regSet.AddMaskVars(varDsc->TypeGet(), genRegMask(argReg));
gcInfo.gcMarkRegPtrVal(argReg, loadType);
if (genIsValidIntReg(argReg) && compiler->info.compIsVarArgs)
@@ -4089,14 +4111,14 @@ void CodeGen::genJmpMethod(GenTree* jmp)
assert(compiler->info.compIsVarArgs);
assert(firstArgVarNum != BAD_VAR_NUM);
- regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
+ regMaskGpr remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
if (remainingIntArgMask != RBM_NONE)
{
GetEmitter()->emitDisableGC();
for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
{
- regNumber argReg = intArgRegs[argNum];
- regMaskTP argRegMask = genRegMask(argReg);
+ regNumber argReg = intArgRegs[argNum];
+ regMaskGpr argRegMask = genRegMask(argReg);
if ((remainingIntArgMask & argRegMask) != 0)
{
@@ -4893,7 +4915,12 @@ void CodeGen::genPushCalleeSavedRegisters()
intRegState.rsCalleeRegArgMaskLiveIn);
#endif
- regMaskTP rsPushRegs = regSet.rsGetModifiedCalleeSavedRegsMask();
+ regMaskGpr rsPushGprRegs = regSet.rsGetModifiedCalleeSavedRegsMask().gprRegs() & RBM_INT_CALLEE_SAVED;
+ regMaskFloat rsPushFloatRegs =
+ regSet.rsGetModifiedCalleeSavedRegsMask().GetGprFloatCombinedMask() & RBM_FLT_CALLEE_SAVED;
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ regMaskPredicate rsPushPredicateRegs = regSet.rsGetModifiedPredicateRegsMask() & RBM_MSK_CALLEE_SAVED;
+#endif
#if ETW_EBP_FRAMED
if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE))
@@ -4904,7 +4931,7 @@ void CodeGen::genPushCalleeSavedRegisters()
// On ARM we push the FP (frame-pointer) here along with all other callee saved registers
if (isFramePointerUsed())
- rsPushRegs |= RBM_FPBASE;
+ rsPushGprRegs |= RBM_FPBASE;
//
// It may be possible to skip pushing/popping lr for leaf methods. However, such optimization would require
@@ -4925,24 +4952,38 @@ void CodeGen::genPushCalleeSavedRegisters()
// Given the limited benefit from this optimization (<10k for CoreLib NGen image), the extra complexity
// is not worth it.
//
- rsPushRegs |= RBM_LR; // We must save the return address (in the LR register)
+ rsPushGprRegs |= RBM_LR; // We must save the return address (in the LR register)
- regSet.rsMaskCalleeSaved = rsPushRegs;
+ regSet.rsGprMaskCalleeSaved = rsPushGprRegs;
+ regSet.rsFloatMaskCalleeSaved = rsPushFloatRegs;
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ regSet.rsPredicateMaskCalleeSaved = rsPushPredicateRegs;
+#endif
#ifdef DEBUG
- if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs))
+ unsigned pushRegsCnt = genCountBits(rsPushGprRegs) + genCountBits(rsPushFloatRegs);
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ pushRegsCnt += genCountBits(rsPushPredicateRegs);
+#endif
+
+ if (compiler->compCalleeRegsPushed != pushRegsCnt)
{
printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ",
- compiler->compCalleeRegsPushed, genCountBits(rsPushRegs));
- dspRegMask(rsPushRegs);
+ compiler->compCalleeRegsPushed, pushRegsCnt);
+ dspRegMask(AllRegsMask(rsPushGprRegs, rsPushFloatRegs
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ ,
+ rsPushPredicateRegs
+#endif
+ ));
printf("\n");
- assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs));
+ assert(compiler->compCalleeRegsPushed == pushRegsCnt);
}
#endif // DEBUG
#if defined(TARGET_ARM)
- regMaskTP maskPushRegsFloat = rsPushRegs & RBM_ALLFLOAT;
- regMaskTP maskPushRegsInt = rsPushRegs & ~maskPushRegsFloat;
+ regMaskFloat maskPushRegsFloat = rsPushFloatRegs;
+ regMaskGpr maskPushRegsInt = rsPushGprRegs;
maskPushRegsInt |= genStackAllocRegisterMask(compiler->compLclFrameSize, maskPushRegsFloat);
@@ -5048,17 +5089,13 @@ void CodeGen::genPushCalleeSavedRegisters()
int offset; // This will be the starting place for saving the callee-saved registers, in increasing order.
- regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT;
- regMaskTP maskSaveRegsInt = rsPushRegs & ~maskSaveRegsFloat;
+ regMaskFloat maskSaveRegsFloat = rsPushFloatRegs;
+ regMaskGpr maskSaveRegsInt = rsPushGprRegs;
#ifdef DEBUG
if (verbose)
{
- printf("Save float regs: ");
- dspRegMask(maskSaveRegsFloat);
- printf("\n");
- printf("Save int regs: ");
- dspRegMask(maskSaveRegsInt);
+ dspRegMask(AllRegsMask(maskSaveRegsInt, maskSaveRegsFloat));
printf("\n");
}
#endif // DEBUG
@@ -5303,7 +5340,8 @@ void CodeGen::genPushCalleeSavedRegisters()
const int calleeSaveSpOffset = offset;
JITDUMP(" offset=%d, calleeSaveSpDelta=%d\n", offset, calleeSaveSpDelta);
- genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSpDelta);
+
+ genSaveCalleeSavedRegistersHelp(AllRegsMask(maskSaveRegsInt, maskSaveRegsFloat), offset, -calleeSaveSpDelta);
offset += genCountBits(maskSaveRegsInt | maskSaveRegsFloat) * REGSIZE_BYTES;
@@ -5486,10 +5524,10 @@ void CodeGen::genFnEpilog(BasicBlock* block)
dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
printf(", gcRegGCrefSetCur=");
printRegMaskInt(gcInfo.gcRegGCrefSetCur);
- GetEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
+ GetEmitter()->emitDispGprRegSet(gcInfo.gcRegGCrefSetCur);
printf(", gcRegByrefSetCur=");
printRegMaskInt(gcInfo.gcRegByrefSetCur);
- GetEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
+ GetEmitter()->emitDispGprRegSet(gcInfo.gcRegByrefSetCur);
printf("\n");
}
#endif // DEBUG
@@ -5540,8 +5578,8 @@ void CodeGen::genFnEpilog(BasicBlock* block)
compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
}
- if (jmpEpilog ||
- genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedFltCalleeSavedRegsMask()) == RBM_NONE)
+ if (jmpEpilog || genStackAllocRegisterMask(compiler->compLclFrameSize,
+ regSet.rsGetModifiedFloatRegsMask() & RBM_FLT_CALLEE_SAVED) == RBM_NONE)
{
genFreeLclFrame(compiler->compLclFrameSize, &unwindStarted);
}
@@ -5661,7 +5699,7 @@ void CodeGen::genFnEpilog(BasicBlock* block)
if (addrInfo.accessType == IAT_PVALUE)
{
GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
- regSet.verifyRegUsed(indCallReg);
+ regSet.verifyGprRegUsed(indCallReg);
}
break;
@@ -5675,7 +5713,7 @@ void CodeGen::genFnEpilog(BasicBlock* block)
indCallReg = REG_R12;
addr = NULL;
- regSet.verifyRegUsed(indCallReg);
+ regSet.verifyGprRegUsed(indCallReg);
break;
}
diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
index 1edfb2ea124a72..231e7332d53fcf 100644
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -130,7 +130,7 @@ CodeGen::CodeGen(Compiler* theCompiler)
#if defined(TARGET_XARCH)
// Shouldn't be used before it is set in genFnProlog()
- compiler->compCalleeFPRegsSavedMask = (regMaskTP)-1;
+ compiler->compCalleeFPRegsSavedMask = (regMaskFloat)-1;
#endif // defined(TARGET_XARCH)
#endif // DEBUG
@@ -484,9 +484,9 @@ void CodeGenInterface::genUpdateLife(VARSET_VALARG_TP newLife)
// Return the register mask for the given register variable
// inline
-regMaskTP CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc)
+regMaskOnlyOne CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc)
{
- regMaskTP regMask = RBM_NONE;
+ regMaskOnlyOne regMask = RBM_NONE;
assert(varDsc->lvIsInReg());
@@ -504,11 +504,11 @@ regMaskTP CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc)
// Return the register mask for the given lclVar or regVar tree node
// inline
-regMaskTP CodeGenInterface::genGetRegMask(GenTree* tree)
+regMaskOnlyOne CodeGenInterface::genGetRegMask(GenTree* tree)
{
assert(tree->gtOper == GT_LCL_VAR);
- regMaskTP regMask = RBM_NONE;
+ regMaskOnlyOne regMask = RBM_NONE;
const LclVarDsc* varDsc = compiler->lvaGetDesc(tree->AsLclVarCommon());
if (varDsc->lvPromoted)
{
@@ -535,7 +535,8 @@ regMaskTP CodeGenInterface::genGetRegMask(GenTree* tree)
// inline
void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bool isDying DEBUGARG(GenTree* tree))
{
- regMaskTP regMask = genGetRegMask(varDsc);
+ regMaskOnlyOne regMask = genGetRegMask(varDsc);
+ assert(compiler->IsOnlyOneRegMask(regMask));
#ifdef DEBUG
if (compiler->verbose)
@@ -554,15 +555,15 @@ void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bo
// We'd like to be able to assert the following, however if we are walking
// through a qmark/colon tree, we may encounter multiple last-use nodes.
// assert((regSet.GetMaskVars() & regMask) == regMask);
- regSet.RemoveMaskVars(regMask);
+ regSet.RemoveMaskVars(varDsc->TypeGet(), regMask);
}
else
{
// If this is going live, the register must not have a variable in it, except
// in the case of an exception or "spill at single-def" variable, which may be already treated
// as live in the register.
- assert(varDsc->IsAlwaysAliveInMemory() || ((regSet.GetMaskVars() & regMask) == 0));
- regSet.AddMaskVars(regMask);
+ assert(varDsc->IsAlwaysAliveInMemory() || ((regSet.GetMaskVars(varDsc->TypeGet()) & regMask) == 0));
+ regSet.AddMaskVars(varDsc->TypeGet(), regMask);
}
}
@@ -576,7 +577,7 @@ void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bo
// Return Value:
// Mask of register kills -- registers whose values are no longer guaranteed to be the same.
//
-regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
+CONSTREF_AllRegsMask Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
{
switch (helper)
{
@@ -588,19 +589,19 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
//
case CORINFO_HELP_ASSIGN_REF:
case CORINFO_HELP_CHECKED_ASSIGN_REF:
- return RBM_CALLEE_TRASH_WRITEBARRIER;
+ return AllRegsMask_CALLEE_TRASH_WRITEBARRIER;
case CORINFO_HELP_ASSIGN_BYREF:
- return RBM_CALLEE_TRASH_WRITEBARRIER_BYREF;
+ return AllRegsMask_CALLEE_TRASH_WRITEBARRIER_BYREF;
case CORINFO_HELP_PROF_FCN_ENTER:
- return RBM_PROFILER_ENTER_TRASH;
+ return AllRegsMask_PROFILER_ENTER_TRASH;
case CORINFO_HELP_PROF_FCN_LEAVE:
- return RBM_PROFILER_LEAVE_TRASH;
+ return AllRegsMask_PROFILER_LEAVE_TRASH;
case CORINFO_HELP_PROF_FCN_TAILCALL:
- return RBM_PROFILER_TAILCALL_TRASH;
+ return AllRegsMask_PROFILER_TAILCALL_TRASH;
#ifdef TARGET_X86
case CORINFO_HELP_ASSIGN_REF_EAX:
@@ -616,20 +617,20 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
case CORINFO_HELP_CHECKED_ASSIGN_REF_EBP:
case CORINFO_HELP_CHECKED_ASSIGN_REF_ESI:
case CORINFO_HELP_CHECKED_ASSIGN_REF_EDI:
- return RBM_EDX;
+ return AllRegsMask_EDX;
#endif
case CORINFO_HELP_STOP_FOR_GC:
- return RBM_STOP_FOR_GC_TRASH;
+ return AllRegsMask_STOP_FOR_GC_TRASH;
case CORINFO_HELP_INIT_PINVOKE_FRAME:
- return RBM_INIT_PINVOKE_FRAME_TRASH;
+ return AllRegsMask_INIT_PINVOKE_FRAME_TRASH;
case CORINFO_HELP_VALIDATE_INDIRECT_CALL:
- return RBM_VALIDATE_INDIRECT_CALL_TRASH;
+ return AllRegsMask_VALIDATE_INDIRECT_CALL_TRASH;
default:
- return RBM_CALLEE_TRASH;
+ return AllRegsMask_CALLEE_TRASH;
}
}
@@ -707,7 +708,7 @@ void Compiler::compChangeLife(VARSET_VALARG_TP newLife)
{
// TODO-Cleanup: Move the code from compUpdateLifeVar to genUpdateRegLife that updates the
// gc sets
- regMaskTP regMask = varDsc->lvRegMask();
+ regMaskOnlyOne regMask = varDsc->lvRegMask();
if (isGCRef)
{
codeGen->gcInfo.gcRegGCrefSetCur &= ~regMask;
@@ -752,7 +753,7 @@ void Compiler::compChangeLife(VARSET_VALARG_TP newLife)
VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
}
codeGen->genUpdateRegLife(varDsc, true /*isBorn*/, false /*isDying*/ DEBUGARG(nullptr));
- regMaskTP regMask = varDsc->lvRegMask();
+ regMaskOnlyOne regMask = varDsc->lvRegMask();
if (isGCRef)
{
codeGen->gcInfo.gcRegGCrefSetCur |= regMask;
@@ -3270,17 +3271,17 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed)
if ((node->outgoing != nullptr) && (node->copiedReg == REG_NA))
{
- var_types copyType = node->outgoing->type;
- regMaskTP tempRegCandidates = genGetParameterHomingTempRegisterCandidates();
+ var_types copyType = node->outgoing->type;
+ RegBitSet64 tempRegCandidates = genGetParameterHomingTempRegisterCandidates();
tempRegCandidates &= ~busyRegs;
- regMaskTP regTypeMask = varTypeUsesFloatReg(copyType) ? RBM_ALLFLOAT : RBM_ALLINT;
- regMaskTP availRegs = tempRegCandidates & regTypeMask;
+ regMaskOnlyOne regTypeMask = varTypeUsesFloatReg(copyType) ? RBM_ALLFLOAT : RBM_ALLINT;
+ regMaskOnlyOne availRegs = tempRegCandidates & regTypeMask;
// We should have ensured temporary registers are available in
// genFinalizeFrame.
noway_assert(availRegs != RBM_NONE);
- node->copiedReg = genFirstRegNumFromMask(availRegs);
+ node->copiedReg = genFirstRegNumFromMask(availRegs MORE_THAN_64_REG_ARG(copyType));
busyRegs |= genRegMask(node->copiedReg);
instruction ins = ins_Copy(node->reg, copyType);
@@ -3359,10 +3360,10 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed)
// destination register of a parameter, or because a value passed in one of
// these registers is still needed.
//
-regMaskTP CodeGen::genGetParameterHomingTempRegisterCandidates()
+RegBitSet64 CodeGen::genGetParameterHomingTempRegisterCandidates()
{
return RBM_CALLEE_TRASH | intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn |
- regSet.rsGetModifiedRegsMask();
+ regSet.rsGetModifiedRegsMask().GetGprFloatCombinedMask();
}
/*****************************************************************************
@@ -3679,7 +3680,7 @@ void CodeGen::genCheckUseBlockInit()
if (genUseBlockInit)
{
- regMaskTP maskCalleeRegArgMask = intRegState.rsCalleeRegArgMaskLiveIn;
+ regMaskGpr maskCalleeRegArgMask = intRegState.rsCalleeRegArgMaskLiveIn;
// If there is a secret stub param, don't count it, as it will no longer
// be live when we do block init.
@@ -3696,11 +3697,11 @@ void CodeGen::genCheckUseBlockInit()
//
int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~regSet.rsMaskPreSpillRegs(false)) - 1;
if (forceSpillRegCount > 0)
- regSet.rsSetRegsModified(RBM_R4);
+ regSet.rsSetGprRegsModified(RBM_R4);
if (forceSpillRegCount > 1)
- regSet.rsSetRegsModified(RBM_R5);
+ regSet.rsSetGprRegsModified(RBM_R5);
if (forceSpillRegCount > 2)
- regSet.rsSetRegsModified(RBM_R6);
+ regSet.rsSetGprRegsModified(RBM_R6);
#endif // TARGET_ARM
}
}
@@ -3715,9 +3716,13 @@ void CodeGen::genCheckUseBlockInit()
* initialized to 0. (Arm Only) Else copies from the integer register which
* is slower.
*/
-void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& initDblRegs, const regNumber& initReg)
+void CodeGen::genZeroInitFltRegs(const regMaskFloat& initFltRegs,
+ const regMaskFloat& initDblRegs,
+ const regNumber& initReg)
{
assert(compiler->compGeneratingProlog);
+ assert(compiler->IsFloatRegMask(initFltRegs));
+ assert(compiler->IsFloatRegMask(initDblRegs));
// The first float/double reg that is initialized to 0. So they can be used to
// initialize the remaining registers.
@@ -3726,7 +3731,7 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP&
// Iterate through float/double registers and initialize them to 0 or
// copy from already initialized register of the same type.
- regMaskTP regMask = genRegMask(REG_FP_FIRST);
+ regMaskFloat regMask = genRegMask(REG_FP_FIRST);
for (regNumber reg = REG_FP_FIRST; reg <= REG_FP_LAST; reg = REG_NEXT(reg), regMask <<= 1)
{
if (regMask & initFltRegs)
@@ -4162,8 +4167,8 @@ void CodeGen::genHomeSwiftStructParameters(bool handleStack)
if (seg.IsPassedInRegister())
{
- RegState* regState = genIsValidFloatReg(seg.GetRegister()) ? &floatRegState : &intRegState;
- regMaskTP regs = seg.GetRegisterMask();
+ RegState* regState = genIsValidFloatReg(seg.GetRegister()) ? &floatRegState : &intRegState;
+ regMaskOnlyOne regs = seg.GetRegisterMask();
if ((regState->rsCalleeRegArgMaskLiveIn & regs) != RBM_NONE)
{
@@ -4462,8 +4467,10 @@ void CodeGen::genReserveProlog(BasicBlock* block)
void CodeGen::genReserveEpilog(BasicBlock* block)
{
- regMaskTP gcrefRegsArg = gcInfo.gcRegGCrefSetCur;
- regMaskTP byrefRegsArg = gcInfo.gcRegByrefSetCur;
+ regMaskGpr gcrefRegsArg = gcInfo.gcRegGCrefSetCur;
+ regMaskGpr byrefRegsArg = gcInfo.gcRegByrefSetCur;
+ assert(compiler->IsGprRegMask(gcrefRegsArg));
+ assert(compiler->IsGprRegMask(byrefRegsArg));
/* The return value is special-cased: make sure it goes live for the epilog */
@@ -4577,7 +4584,7 @@ void CodeGen::genFinalizeFrame()
// registers (ebx, esi, edi). So, we need to make sure all the callee-saved registers
// actually get saved.
- regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED);
+ regSet.rsSetGprRegsModified(RBM_INT_CALLEE_SAVED);
}
#endif // TARGET_X86
@@ -4585,14 +4592,14 @@ void CodeGen::genFinalizeFrame()
// Make sure that callee-saved registers used by call to a stack probing helper generated are pushed on stack.
if (compiler->compLclFrameSize >= compiler->eeGetPageSize())
{
- regSet.rsSetRegsModified(RBM_STACK_PROBE_HELPER_ARG | RBM_STACK_PROBE_HELPER_CALL_TARGET |
- RBM_STACK_PROBE_HELPER_TRASH);
+ regSet.rsSetGprRegsModified(RBM_STACK_PROBE_HELPER_ARG | RBM_STACK_PROBE_HELPER_CALL_TARGET |
+ RBM_STACK_PROBE_HELPER_TRASH);
}
// If there are any reserved registers, add them to the modified set.
if (regSet.rsMaskResvd != RBM_NONE)
{
- regSet.rsSetRegsModified(regSet.rsMaskResvd);
+ regSet.rsSetGprRegsModified(regSet.rsMaskResvd);
}
#endif // TARGET_ARM
@@ -4611,15 +4618,19 @@ void CodeGen::genFinalizeFrame()
// We always save FP.
noway_assert(isFramePointerUsed());
#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
- regMaskTP okRegs = (RBM_CALLEE_TRASH | RBM_FPBASE | RBM_ENC_CALLEE_SAVED);
if (RBM_ENC_CALLEE_SAVED != 0)
{
- regSet.rsSetRegsModified(RBM_ENC_CALLEE_SAVED);
+ regSet.rsSetGprRegsModified(RBM_ENC_CALLEE_SAVED);
}
- noway_assert((regSet.rsGetModifiedRegsMask() & ~okRegs) == 0);
+ noway_assert(
+ (regSet.rsGetModifiedGprRegsMask() & ~(RBM_INT_CALLEE_TRASH | RBM_FPBASE | RBM_ENC_CALLEE_SAVED)) == 0);
+ noway_assert((regSet.rsGetModifiedFloatRegsMask() & ~RBM_FLT_CALLEE_TRASH) == 0);
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ noway_assert((regSet.rsGetModifiedPredicateRegsMask() & ~RBM_MSK_CALLEE_TRASH) == 0);
+#endif // FEATURE_MASKED_HW_INTRINSICS
#else // !TARGET_AMD64 && !TARGET_ARM64
// On x86 we save all callee saved regs so the saved reg area size is consistent
- regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+ regSet.rsSetGprRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
#endif // !TARGET_AMD64 && !TARGET_ARM64
}
@@ -4627,26 +4638,26 @@ void CodeGen::genFinalizeFrame()
if (compiler->compMethodRequiresPInvokeFrame())
{
noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
- regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+ regSet.rsSetGprRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
}
// Parameter homing may need an additional register to handle conflicts if
// all callee trash registers are used by parameters.
- regMaskTP homingCandidates = genGetParameterHomingTempRegisterCandidates();
+ RegBitSet64 homingCandidates = genGetParameterHomingTempRegisterCandidates();
if (((homingCandidates & ~intRegState.rsCalleeRegArgMaskLiveIn) & RBM_ALLINT) == RBM_NONE)
{
- regMaskTP extraRegMask = RBM_ALLINT & ~homingCandidates;
+ regMaskGpr extraRegMask = RBM_ALLINT & ~homingCandidates;
assert(extraRegMask != RBM_NONE);
- regNumber extraReg = genFirstRegNumFromMask(extraRegMask);
+ regNumber extraReg = genFirstRegNumFromMask(extraRegMask MORE_THAN_64_REG_ARG(TYP_INT));
JITDUMP("No temporary registers are available for integer parameter homing. Adding %s\n", getRegName(extraReg));
regSet.rsSetRegsModified(genRegMask(extraReg));
}
if (((homingCandidates & ~floatRegState.rsCalleeRegArgMaskLiveIn) & RBM_ALLFLOAT) == RBM_NONE)
{
- regMaskTP extraRegMask = RBM_ALLFLOAT & ~homingCandidates;
+ regMaskFloat extraRegMask = RBM_ALLFLOAT & ~homingCandidates;
assert(extraRegMask != RBM_NONE);
- regNumber extraReg = genFirstRegNumFromMask(extraRegMask);
+ regNumber extraReg = genFirstRegNumFromMask(extraRegMask MORE_THAN_64_REG_ARG(TYP_FLOAT));
JITDUMP("No temporary registers are available for float parameter homing. Adding %s\n", getRegName(extraReg));
regSet.rsSetRegsModified(genRegMask(extraReg));
}
@@ -4655,7 +4666,7 @@ void CodeGen::genFinalizeFrame()
// On Unix x64 we also save R14 and R15 for ELT profiler hook generation.
if (compiler->compIsProfilerHookNeeded())
{
- regSet.rsSetRegsModified(RBM_PROFILER_ENTER_ARG_0 | RBM_PROFILER_ENTER_ARG_1);
+ regSet.rsSetGprRegsModified(RBM_PROFILER_ENTER_ARG_0 | RBM_PROFILER_ENTER_ARG_1);
}
#endif
@@ -4669,14 +4680,19 @@ void CodeGen::genFinalizeFrame()
noway_assert(!regSet.rsRegsModified(RBM_FPBASE));
#endif
- regMaskTP maskCalleeRegsPushed = regSet.rsGetModifiedCalleeSavedRegsMask();
+ regMaskFloat maskPushRegsInt = regSet.rsGetModifiedCalleeSavedRegsMask().gprRegs();
+ regMaskGpr maskPushRegsFloat =
+ regSet.rsGetModifiedCalleeSavedRegsMask().GetGprFloatCombinedMask() & RBM_FLT_CALLEE_SAVED;
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ regMaskPredicate maskPushRegsPredicate = regSet.rsGetModifiedPredicateRegsMask() & RBM_MSK_CALLEE_SAVED;
+#endif
#ifdef TARGET_ARMARCH
if (isFramePointerUsed())
{
// For a FP based frame we have to push/pop the FP register
//
- maskCalleeRegsPushed |= RBM_FPBASE;
+ maskPushRegsInt |= RBM_FPBASE;
// This assert check that we are not using REG_FP
// as both the frame pointer and as a codegen register
@@ -4686,15 +4702,12 @@ void CodeGen::genFinalizeFrame()
// we always push LR. See genPushCalleeSavedRegisters
//
- maskCalleeRegsPushed |= RBM_LR;
+ maskPushRegsInt |= RBM_LR;
#if defined(TARGET_ARM)
// TODO-ARM64-Bug?: enable some variant of this for FP on ARM64?
- regMaskTP maskPushRegsFloat = maskCalleeRegsPushed & RBM_ALLFLOAT;
- regMaskTP maskPushRegsInt = maskCalleeRegsPushed & ~maskPushRegsFloat;
-
if ((maskPushRegsFloat != RBM_NONE) ||
- (compiler->opts.MinOpts() && (regSet.rsMaskResvd & maskCalleeRegsPushed & RBM_OPT_RSVD)))
+ (compiler->opts.MinOpts() && (regSet.rsMaskResvd & maskPushRegsInt & RBM_OPT_RSVD)))
{
// Here we try to keep stack double-aligned before the vpush
if ((genCountBits(regSet.rsMaskPreSpillRegs(true) | maskPushRegsInt) % 2) != 0)
@@ -4707,10 +4720,9 @@ void CodeGen::genFinalizeFrame()
if (extraPushedReg < REG_R11)
{
maskPushRegsInt |= genRegMask(extraPushedReg);
- regSet.rsSetRegsModified(genRegMask(extraPushedReg));
+ regSet.rsSetGprRegsModified(genRegMask(extraPushedReg));
}
}
- maskCalleeRegsPushed = maskPushRegsInt | maskPushRegsFloat;
}
// We currently only expect to push/pop consecutive FP registers
@@ -4719,7 +4731,7 @@ void CodeGen::genFinalizeFrame()
//
if (maskPushRegsFloat != RBM_NONE)
{
- regMaskTP contiguousMask = genRegMaskFloat(REG_F16);
+ regMaskFloat contiguousMask = genRegMaskFloat(REG_F16);
while (maskPushRegsFloat > contiguousMask)
{
contiguousMask <<= 2;
@@ -4727,10 +4739,10 @@ void CodeGen::genFinalizeFrame()
}
if (maskPushRegsFloat != contiguousMask)
{
- regMaskTP maskExtraRegs = contiguousMask - maskPushRegsFloat;
+ regMaskFloat maskExtraRegs = contiguousMask - maskPushRegsFloat;
+ maskPushRegsFloat |= maskExtraRegs;
+ regSet.rsSetFloatRegsModified(maskExtraRegs);
maskPushRegsFloat |= maskExtraRegs;
- regSet.rsSetRegsModified(maskExtraRegs);
- maskCalleeRegsPushed |= maskExtraRegs;
}
}
#endif // TARGET_ARM
@@ -4740,8 +4752,8 @@ void CodeGen::genFinalizeFrame()
// Compute the count of callee saved float regs saved on stack.
// On Amd64 we push only integer regs. Callee saved float (xmm6-xmm31)
// regs are stack allocated and preserved in their stack locations.
- compiler->compCalleeFPRegsSavedMask = maskCalleeRegsPushed & RBM_FLT_CALLEE_SAVED;
- maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED;
+ compiler->compCalleeFPRegsSavedMask = maskPushRegsFloat & RBM_FLT_CALLEE_SAVED;
+ maskPushRegsFloat &= ~RBM_FLT_CALLEE_SAVED;
#endif // defined(TARGET_XARCH)
#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
@@ -4749,7 +4761,7 @@ void CodeGen::genFinalizeFrame()
{
// For a FP based frame we have to push/pop the FP register
//
- maskCalleeRegsPushed |= RBM_FPBASE;
+ maskPushRegsInt |= RBM_FPBASE;
// This assert check that we are not using REG_FP
// as both the frame pointer and as a codegen register
@@ -4758,16 +4770,24 @@ void CodeGen::genFinalizeFrame()
}
// we always push RA. See genPushCalleeSavedRegisters
- maskCalleeRegsPushed |= RBM_RA;
+ maskPushRegsInt |= RBM_RA;
#endif // TARGET_LOONGARCH64 || TARGET_RISCV64
- compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed);
+ compiler->compCalleeRegsPushed = genCountBits(maskPushRegsInt) + genCountBits(maskPushRegsFloat);
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ compiler->compCalleeRegsPushed += genCountBits(maskPushRegsPredicate);
+#endif
#ifdef DEBUG
if (verbose)
{
printf("Callee-saved registers pushed: %d ", compiler->compCalleeRegsPushed);
- dspRegMask(maskCalleeRegsPushed);
+ dspRegMask(AllRegsMask(maskPushRegsInt, maskPushRegsFloat
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ ,
+ maskPushRegsPredicate
+#endif
+ ));
printf("\n");
}
#endif // DEBUG
@@ -4917,9 +4937,9 @@ void CodeGen::genFnProlog()
int GCrefHi = -INT_MAX;
bool hasGCRef = false;
- regMaskTP initRegs = RBM_NONE; // Registers which must be init'ed.
- regMaskTP initFltRegs = RBM_NONE; // FP registers which must be init'ed.
- regMaskTP initDblRegs = RBM_NONE;
+ regMaskGpr initRegs = RBM_NONE; // Registers which must be init'ed.
+ regMaskFloat initFltRegs = RBM_NONE; // FP registers which must be init'ed.
+ regMaskFloat initDblRegs = RBM_NONE;
unsigned varNum;
LclVarDsc* varDsc;
@@ -4992,8 +5012,8 @@ void CodeGen::genFnProlog()
if (isInReg)
{
- regNumber regForVar = varDsc->GetRegNum();
- regMaskTP regMask = genRegMask(regForVar);
+ regNumber regForVar = varDsc->GetRegNum();
+ singleRegMask regMask = genRegMask(regForVar);
if (!genIsValidFloatReg(regForVar))
{
initRegs |= regMask;
@@ -5105,9 +5125,9 @@ void CodeGen::genFnProlog()
// Track if initReg holds non-zero value. Start conservative and assume it has non-zero value.
// If initReg is ever set to zero, this variable is set to true and zero initializing initReg
// will be skipped.
- bool initRegZeroed = false;
- regMaskTP excludeMask = intRegState.rsCalleeRegArgMaskLiveIn;
- regMaskTP tempMask;
+ bool initRegZeroed = false;
+ regMaskGpr excludeMask = intRegState.rsCalleeRegArgMaskLiveIn;
+ regMaskGpr tempMask;
// We should not use the special PINVOKE registers as the initReg
// since they are trashed by the jithelper call to setup the PINVOKE frame
@@ -5154,18 +5174,18 @@ void CodeGen::genFnProlog()
// We will use one of the registers that we were planning to zero init anyway.
// We pick the lowest register number.
tempMask = genFindLowestBit(tempMask);
- initReg = genRegNumFromMask(tempMask);
+ initReg = genRegNumFromMask(tempMask MORE_THAN_64_REG_ARG(TYP_INT));
}
// Next we prefer to use one of the unused argument registers.
// If they aren't available we use one of the caller-saved integer registers.
else
{
- tempMask = regSet.rsGetModifiedRegsMask() & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd;
+ tempMask = regSet.rsGetModifiedGprRegsMask() & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd;
if (tempMask != RBM_NONE)
{
// We pick the lowest register number
tempMask = genFindLowestBit(tempMask);
- initReg = genRegNumFromMask(tempMask);
+ initReg = genRegNumFromMask(tempMask MORE_THAN_64_REG_ARG(TYP_INT));
}
}
@@ -5343,11 +5363,11 @@ void CodeGen::genFnProlog()
//-------------------------------------------------------------------------
#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
- regMaskTP maskStackAlloc = RBM_NONE;
+ regMaskGpr maskStackAlloc = RBM_NONE;
#ifdef TARGET_ARM
maskStackAlloc = genStackAllocRegisterMask(compiler->compLclFrameSize + extraFrameSize,
- regSet.rsGetModifiedFltCalleeSavedRegsMask());
+ regSet.rsGetModifiedFloatRegsMask() & RBM_FLT_CALLEE_SAVED);
#endif // TARGET_ARM
if (maskStackAlloc == RBM_NONE)
@@ -5372,7 +5392,7 @@ void CodeGen::genFnProlog()
if (compiler->compLocallocUsed)
{
GetEmitter()->emitIns_Mov(INS_mov, EA_4BYTE, REG_SAVED_LOCALLOC_SP, REG_SPBASE, /* canSkip */ false);
- regSet.verifyRegUsed(REG_SAVED_LOCALLOC_SP);
+ regSet.verifyGprRegUsed(REG_SAVED_LOCALLOC_SP);
compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
}
#endif // TARGET_ARMARCH
@@ -5564,7 +5584,7 @@ void CodeGen::genFnProlog()
if (initRegs)
{
- regMaskTP regMask = 0x1;
+ regMaskGpr regMask = 0x1;
for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg), regMask <<= 1)
{
@@ -5658,7 +5678,7 @@ void CodeGen::genFnProlog()
// MOV EAX,
assert(compiler->lvaVarargsHandleArg == compiler->info.compArgsCount - 1);
GetEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, compiler->lvaVarargsHandleArg, 0);
- regSet.verifyRegUsed(REG_EAX);
+ regSet.verifyGprRegUsed(REG_EAX);
// MOV EAX, [EAX]
GetEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, REG_EAX, 0);
@@ -6086,7 +6106,7 @@ void CodeGen::genSinglePop()
// Notes:
// This function does not check if the register is marked as used, etc.
//
-regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs)
+regMaskGpr CodeGen::genPushRegs(regMaskGpr regs, regMaskGpr* byrefRegs, regMaskGpr* noRefRegs)
{
*byrefRegs = RBM_NONE;
*noRefRegs = RBM_NONE;
@@ -6096,6 +6116,8 @@ regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP*
return RBM_NONE;
}
+ assert(compiler->IsGprRegMask(regs));
+
#if FEATURE_FIXED_OUT_ARGS
NYI("Don't call genPushRegs with real regs!");
@@ -6106,11 +6128,11 @@ regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP*
noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL));
noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));
- regMaskTP pushedRegs = regs;
+ regMaskGpr pushedRegs = regs;
for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg))
{
- regMaskTP regBit = regMaskTP(1) << reg;
+ regMaskGpr regBit = regMaskGpr(1) << reg;
if ((regBit & regs) == RBM_NONE)
continue;
@@ -6159,13 +6181,14 @@ regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP*
// Return Value:
// None
//
-void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs)
+void CodeGen::genPopRegs(regMaskGpr regs, regMaskGpr byrefRegs, regMaskGpr noRefRegs)
{
if (regs == RBM_NONE)
{
return;
}
+ assert(compiler->IsGprRegMask(regs));
#if FEATURE_FIXED_OUT_ARGS
NYI("Don't call genPopRegs with real regs!");
@@ -6182,7 +6205,7 @@ void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefReg
// Walk the registers in the reverse order as genPushRegs()
for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg))
{
- regMaskTP regBit = regMaskTP(1) << reg;
+ regMaskGpr regBit = regMaskGpr(1) << reg;
if ((regBit & regs) == RBM_NONE)
continue;
@@ -7035,7 +7058,7 @@ void CodeGen::genReturn(GenTree* treeNode)
if (compiler->compMethodReturnsRetBufAddr())
{
- gcInfo.gcMarkRegSetNpt(genRegMask(REG_INTRET));
+ gcInfo.gcMarkGprRegNpt(REG_INTRET);
}
else
{
@@ -7043,7 +7066,7 @@ void CodeGen::genReturn(GenTree* treeNode)
{
if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
{
- gcInfo.gcMarkRegSetNpt(genRegMask(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv)));
+ gcInfo.gcMarkRegNpt(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv));
}
}
}
@@ -7478,30 +7501,37 @@ void CodeGen::genRegCopy(GenTree* treeNode)
unsigned regCount = op1->GetMultiRegCount(compiler);
assert(regCount <= MAX_MULTIREG_COUNT);
- // First set the source registers as busy if they haven't been spilled.
- // (Note that this is just for verification that we don't have circular dependencies.)
- regMaskTP busyRegs = RBM_NONE;
+// First set the source registers as busy if they haven't been spilled.
+// (Note that this is just for verification that we don't have circular dependencies.)
+#ifdef DEBUG
+ AllRegsMask busyRegs;
for (unsigned i = 0; i < regCount; ++i)
{
if ((op1->GetRegSpillFlagByIdx(i) & GTF_SPILLED) == 0)
{
- busyRegs |= genRegMask(op1->GetRegByIndex(i));
+ regNumber reg = op1->GetRegByIndex(i);
+ busyRegs.AddRegNumInMask(reg);
}
}
+#endif // DEBUG
+
for (unsigned i = 0; i < regCount; ++i)
{
regNumber sourceReg = op1->GetRegByIndex(i);
// genRegCopy will consume the source register, perform any required reloads,
// and will return either the register copied to, or the original register if there's no copy.
regNumber targetReg = genRegCopy(treeNode, i);
+
+#ifdef DEBUG
+
if (targetReg != sourceReg)
{
- regMaskTP targetRegMask = genRegMask(targetReg);
- assert((busyRegs & targetRegMask) == 0);
- // Clear sourceReg from the busyRegs, and add targetReg.
- busyRegs &= ~genRegMask(sourceReg);
+ singleRegMask targetRegMask = genRegMask(targetReg);
+ assert(!busyRegs.IsRegNumInMask(targetReg));
+ busyRegs.RemoveRegNumFromMask(sourceReg);
}
- busyRegs |= genRegMask(targetReg);
+ busyRegs.AddRegNumInMask(targetReg);
+#endif // DEBUG
}
return;
}
@@ -7540,7 +7570,7 @@ void CodeGen::genRegCopy(GenTree* treeNode)
// The old location is dying
genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
- gcInfo.gcMarkRegSetNpt(genRegMask(op1->GetRegNum()));
+ gcInfo.gcMarkRegNpt(op1->GetRegNum());
genUpdateVarReg(varDsc, treeNode);
@@ -7607,7 +7637,7 @@ regNumber CodeGen::genRegCopy(GenTree* treeNode, unsigned multiRegIndex)
{
// The old location is dying
genUpdateRegLife(fieldVarDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
- gcInfo.gcMarkRegSetNpt(genRegMask(sourceReg));
+ gcInfo.gcMarkRegNpt(sourceReg);
genUpdateVarReg(fieldVarDsc, treeNode);
// Report the home change for this variable
@@ -7693,7 +7723,7 @@ unsigned CodeGenInterface::getCurrentStackLevel() const
// This function emits code to poison address exposed non-zero-inited local variables. We expect this function
// to be called when emitting code for the scratch BB that comes right after the prolog.
// The variables are poisoned using 0xcdcdcdcd.
-void CodeGen::genPoisonFrame(regMaskTP regLiveIn)
+void CodeGen::genPoisonFrame(regMaskGpr regLiveIn)
{
assert(compiler->compShouldPoisonFrame());
#if defined(TARGET_XARCH)
diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h
index ef87ccca858702..b774c1d41c715a 100644
--- a/src/coreclr/jit/codegeninterface.h
+++ b/src/coreclr/jit/codegeninterface.h
@@ -36,9 +36,9 @@ class emitter;
struct RegState
{
- regMaskTP rsCalleeRegArgMaskLiveIn; // mask of register arguments (live on entry to method)
- unsigned rsCalleeRegArgCount; // total number of incoming register arguments of this kind (int or float)
- bool rsIsFloat; // true for float argument registers, false for integer argument registers
+ regMaskOnlyOne rsCalleeRegArgMaskLiveIn; // mask of register arguments (live on entry to method)
+ unsigned rsCalleeRegArgCount; // total number of incoming register arguments of this kind (int or float)
+ bool rsIsFloat; // true for float argument registers, false for integer argument registers
};
//-------------------- CodeGenInterface ---------------------------------
@@ -60,31 +60,31 @@ class CodeGenInterface
}
#if defined(TARGET_AMD64)
- regMaskTP rbmAllFloat;
- regMaskTP rbmFltCalleeTrash;
+ regMaskFloat rbmAllFloat;
+ regMaskFloat rbmFltCalleeTrash;
- FORCEINLINE regMaskTP get_RBM_ALLFLOAT() const
+ FORCEINLINE regMaskFloat get_RBM_ALLFLOAT() const
{
return this->rbmAllFloat;
}
- FORCEINLINE regMaskTP get_RBM_FLT_CALLEE_TRASH() const
+ FORCEINLINE regMaskFloat get_RBM_FLT_CALLEE_TRASH() const
{
return this->rbmFltCalleeTrash;
}
#endif // TARGET_AMD64
#if defined(TARGET_XARCH)
- regMaskTP rbmAllMask;
- regMaskTP rbmMskCalleeTrash;
+ regMaskPredicate rbmAllMask;
+ regMaskPredicate rbmMskCalleeTrash;
// Call this function after the equivalent fields in Compiler have been initialized.
void CopyRegisterInfo();
- FORCEINLINE regMaskTP get_RBM_ALLMASK() const
+ FORCEINLINE regMaskPredicate get_RBM_ALLMASK() const
{
return this->rbmAllMask;
}
- FORCEINLINE regMaskTP get_RBM_MSK_CALLEE_TRASH() const
+ FORCEINLINE regMaskPredicate get_RBM_MSK_CALLEE_TRASH() const
{
return this->rbmMskCalleeTrash;
}
@@ -156,8 +156,8 @@ class CodeGenInterface
VARSET_TP genLastLiveSet; // A one element map (genLastLiveSet-> genLastLiveMask)
regMaskTP genLastLiveMask; // these two are used in genLiveMask
- regMaskTP genGetRegMask(const LclVarDsc* varDsc);
- regMaskTP genGetRegMask(GenTree* tree);
+ regMaskOnlyOne genGetRegMask(const LclVarDsc* varDsc);
+ regMaskOnlyOne genGetRegMask(GenTree* tree);
void genUpdateLife(GenTree* tree);
void genUpdateLife(VARSET_VALARG_TP newLife);
diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp
index 351ca14942838b..efd21811e81add 100644
--- a/src/coreclr/jit/codegenlinear.cpp
+++ b/src/coreclr/jit/codegenlinear.cpp
@@ -60,7 +60,7 @@ void CodeGen::genInitializeRegisterState()
regNumber reg = varDsc->GetRegNum();
if (genIsValidIntReg(reg))
{
- regSet.verifyRegUsed(reg);
+ regSet.verifyGprRegUsed(reg);
}
}
}
@@ -202,9 +202,9 @@ void CodeGen::genCodeForBBlist()
// change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
// here. That would require handling the changes in recordVarLocationsAtStartOfBB().
- regMaskTP newLiveRegSet = RBM_NONE;
- regMaskTP newRegGCrefSet = RBM_NONE;
- regMaskTP newRegByrefSet = RBM_NONE;
+ AllRegsMask newLiveRegSet;
+ regMaskGpr newRegGCrefSet = RBM_NONE;
+ regMaskGpr newRegByrefSet = RBM_NONE;
#ifdef DEBUG
VARSET_TP removedGCVars(VarSetOps::MakeEmpty(compiler));
VARSET_TP addedGCVars(VarSetOps::MakeEmpty(compiler));
@@ -217,14 +217,18 @@ void CodeGen::genCodeForBBlist()
if (varDsc->lvIsInReg())
{
- newLiveRegSet |= varDsc->lvRegMask();
+ regMaskOnlyOne varRegMask = varDsc->lvRegMask();
+ assert(compiler->IsOnlyOneRegMask(varRegMask));
+
+ newLiveRegSet.AddRegMaskForType(varRegMask, varDsc->TypeGet());
+
if (varDsc->lvType == TYP_REF)
{
- newRegGCrefSet |= varDsc->lvRegMask();
+ newRegGCrefSet |= varRegMask;
}
else if (varDsc->lvType == TYP_BYREF)
{
- newRegByrefSet |= varDsc->lvRegMask();
+ newRegByrefSet |= varRegMask;
}
if (!varDsc->IsAlwaysAliveInMemory())
{
@@ -400,7 +404,7 @@ void CodeGen::genCodeForBBlist()
// We cannot emit this code in the prolog as it might make the prolog too large.
if (compiler->compShouldPoisonFrame() && compiler->fgBBisScratch(block))
{
- genPoisonFrame(newLiveRegSet);
+ genPoisonFrame(newLiveRegSet.gprRegs());
}
// Traverse the block in linear order, generating code for each node as we
@@ -489,8 +493,8 @@ void CodeGen::genCodeForBBlist()
/* Make sure we didn't bungle pointer register tracking */
- regMaskTP ptrRegs = gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
- regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.GetMaskVars();
+ regMaskGpr ptrRegs = gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
+ regMaskGpr nonVarPtrRegs = ptrRegs & ~regSet.GetGprMaskVars();
// If return is a GC-type, clear it. Note that if a common
// epilog is generated (genReturnBB) it has a void return
@@ -508,14 +512,14 @@ void CodeGen::genCodeForBBlist()
if (nonVarPtrRegs)
{
printf("Regset after " FMT_BB " gcr=", block->bbNum);
- printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.GetMaskVars());
- compiler->GetEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.GetMaskVars());
+ printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.GetGprMaskVars());
+ compiler->GetEmitter()->emitDispGprRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.GetGprMaskVars());
printf(", byr=");
- printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.GetMaskVars());
- compiler->GetEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.GetMaskVars());
+ printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.GetGprMaskVars());
+ compiler->GetEmitter()->emitDispGprRegSet(gcInfo.gcRegByrefSetCur & ~regSet.GetGprMaskVars());
printf(", regVars=");
- printRegMaskInt(regSet.GetMaskVars());
- compiler->GetEmitter()->emitDispRegSet(regSet.GetMaskVars());
+ printRegMaskInt(regSet.GetGprMaskVars());
+ compiler->GetEmitter()->emitDispGprRegSet(regSet.GetGprMaskVars());
printf("\n");
}
@@ -955,7 +959,14 @@ void CodeGen::genSpillVar(GenTree* tree)
// Remove the live var from the register.
genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree));
- gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask());
+ if (varTypeUsesIntReg(varDsc))
+ {
+ // TYP_STRUCT are also VTR_INT and can return vector registers.
+ // Make sure that we pass the register, so Npt will be called
+ // only if the `reg` is Gpr.
+ regNumber reg = varDsc->GetRegNum();
+ gcInfo.gcMarkRegNpt(reg);
+ }
if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
{
@@ -1102,7 +1113,7 @@ void CodeGen::genUnspillLocal(
}
#endif // DEBUG
- regSet.AddMaskVars(genGetRegMask(varDsc));
+ regSet.AddMaskVars(varDsc->TypeGet(), genGetRegMask(varDsc));
}
gcInfo.gcMarkRegPtrVal(regNum, type);
@@ -1433,11 +1444,11 @@ regNumber CodeGen::genConsumeReg(GenTree* tree, unsigned multiRegIndex)
if (fldVarDsc->GetRegNum() == REG_STK)
{
// We have loaded this into a register only temporarily
- gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+ gcInfo.gcMarkRegNpt(reg);
}
else if (lcl->IsLastUse(multiRegIndex))
{
- gcInfo.gcMarkRegSetNpt(genRegMask(fldVarDsc->GetRegNum()));
+ gcInfo.gcMarkRegNpt(fldVarDsc->GetRegNum());
}
}
else
@@ -1445,7 +1456,7 @@ regNumber CodeGen::genConsumeReg(GenTree* tree, unsigned multiRegIndex)
regNumber regAtIndex = tree->GetRegByIndex(multiRegIndex);
if (regAtIndex != REG_NA)
{
- gcInfo.gcMarkRegSetNpt(genRegMask(regAtIndex));
+ gcInfo.gcMarkRegNpt(regAtIndex);
}
}
return reg;
@@ -1512,11 +1523,11 @@ regNumber CodeGen::genConsumeReg(GenTree* tree)
if (varDsc->GetRegNum() == REG_STK)
{
// We have loaded this into a register only temporarily
- gcInfo.gcMarkRegSetNpt(genRegMask(tree->GetRegNum()));
+ gcInfo.gcMarkRegNpt(tree->GetRegNum());
}
else if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
{
- gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->GetRegNum()));
+ gcInfo.gcMarkRegNpt(varDsc->GetRegNum());
}
}
else if (tree->gtSkipReloadOrCopy()->IsMultiRegLclVar())
@@ -1542,17 +1553,17 @@ regNumber CodeGen::genConsumeReg(GenTree* tree)
if (fldVarDsc->GetRegNum() == REG_STK)
{
// We have loaded this into a register only temporarily
- gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+ gcInfo.gcMarkRegNpt(reg);
}
else if (lcl->IsLastUse(i))
{
- gcInfo.gcMarkRegSetNpt(genRegMask(fldVarDsc->GetRegNum()));
+ gcInfo.gcMarkRegNpt(fldVarDsc->GetRegNum());
}
}
}
else
{
- gcInfo.gcMarkRegSetNpt(tree->gtGetRegMask());
+ gcInfo.gcMarkRegSetNpt(tree->gtGetGprRegMask());
}
genCheckConsumeNode(tree);
@@ -1835,7 +1846,7 @@ void CodeGen::genConsumeArgSplitStruct(GenTreePutArgSplit* putArgNode)
genUnspillRegIfNeeded(putArgNode);
- gcInfo.gcMarkRegSetNpt(putArgNode->gtGetRegMask());
+ gcInfo.gcMarkRegSetNpt(putArgNode->gtGetGprRegMask());
genCheckConsumeNode(putArgNode);
}
@@ -2133,14 +2144,14 @@ void CodeGen::genProduceReg(GenTree* tree)
{
regNumber reg = tree->GetRegByIndex(i);
regSet.rsSpillTree(reg, tree, i);
- gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+ gcInfo.gcMarkRegNpt(reg);
}
}
}
else
{
regSet.rsSpillTree(tree->GetRegNum(), tree);
- gcInfo.gcMarkRegSetNpt(genRegMask(tree->GetRegNum()));
+ gcInfo.gcMarkRegNpt(tree->GetRegNum());
}
tree->gtFlags |= GTF_SPILLED;
@@ -2234,8 +2245,8 @@ void CodeGen::genProduceReg(GenTree* tree)
// transfer gc/byref status of src reg to dst reg
void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
{
- regMaskTP srcMask = genRegMask(src);
- regMaskTP dstMask = genRegMask(dst);
+ regMaskOnlyOne srcMask = genRegMask(src);
+ regMaskOnlyOne dstMask = genRegMask(dst);
if (gcInfo.gcRegGCrefSetCur & srcMask)
{
@@ -2247,7 +2258,7 @@ void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
}
else
{
- gcInfo.gcMarkRegSetNpt(dstMask);
+ gcInfo.gcMarkRegNpt(dst);
}
}
diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
index 3e5f1a4b38a691..dda69cff0f874c 100644
--- a/src/coreclr/jit/codegenxarch.cpp
+++ b/src/coreclr/jit/codegenxarch.cpp
@@ -75,7 +75,7 @@ void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
// mov eax, dword ptr [compiler->gsGlobalSecurityCookieAddr]
// mov dword ptr [frame.GSSecurityCookie], eax
GetEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_EAX, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
- regSet.verifyRegUsed(REG_EAX);
+ regSet.verifyGprRegUsed(REG_EAX);
GetEmitter()->emitIns_S_R(INS_mov, EA_PTRSIZE, REG_EAX, compiler->lvaGSSecurityCookie, 0);
if (initReg == REG_EAX)
{
@@ -127,8 +127,8 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
}
}
- regNumber regGSCheck;
- regMaskTP regMaskGSCheck = RBM_NONE;
+ regNumber regGSCheck;
+ regMaskGpr regMaskGSCheck = RBM_NONE;
if (!pushReg)
{
@@ -163,9 +163,9 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
#endif // !TARGET_X86
}
- regMaskTP byrefPushedRegs = RBM_NONE;
- regMaskTP norefPushedRegs = RBM_NONE;
- regMaskTP pushedRegs = RBM_NONE;
+ regMaskGpr byrefPushedRegs = RBM_NONE;
+ regMaskGpr norefPushedRegs = RBM_NONE;
+ regMaskGpr pushedRegs = RBM_NONE;
if (compiler->gsGlobalSecurityCookieAddr == nullptr)
{
@@ -979,7 +979,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
{
emit->emitIns(INS_cdq, size);
// the cdq instruction writes RDX, So clear the gcInfo for RDX
- gcInfo.gcMarkRegSetNpt(RBM_RDX);
+ gcInfo.gcMarkGprRegNpt(REG_RDX);
}
// Perform the 'targetType' (64-bit or 32-bit) divide instruction
@@ -2382,8 +2382,12 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
// Return value:
// None
//
-void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
+void CodeGen::genAllocLclFrame(unsigned frameSize,
+ regNumber initReg,
+ bool* pInitRegZeroed,
+ regMaskGpr maskArgRegsLiveIn)
{
+ assert(compiler->IsGprRegMask(maskArgRegsLiveIn));
assert(compiler->compGeneratingProlog);
if (frameSize == 0)
@@ -2427,7 +2431,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
}
GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, spOffset);
- regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG);
+ regSet.verifyGprRegUsed(REG_STACK_PROBE_HELPER_ARG);
genEmitHelperCall(CORINFO_HELP_STACK_PROBE, 0, EA_UNKNOWN);
@@ -2445,7 +2449,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
RBM_NONE);
GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, -(int)frameSize);
- regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG);
+ regSet.verifyGprRegUsed(REG_STACK_PROBE_HELPER_ARG);
genEmitHelperCall(CORINFO_HELP_STACK_PROBE, 0, EA_UNKNOWN);
@@ -3394,7 +3398,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode)
GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, offsetReg, TARGET_POINTER_SIZE);
inst_JMP(EJ_jne, loop);
- gcInfo.gcMarkRegSetNpt(genRegMask(dstReg));
+ gcInfo.gcMarkGprRegNpt(dstReg);
}
}
@@ -4281,8 +4285,8 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
// Clear the gcInfo for RSI and RDI.
// While we normally update GC info prior to the last instruction that uses them,
// these actually live into the helper call.
- gcInfo.gcMarkRegSetNpt(RBM_RSI);
- gcInfo.gcMarkRegSetNpt(RBM_RDI);
+ gcInfo.gcMarkGprRegNpt(REG_RSI);
+ gcInfo.gcMarkGprRegNpt(REG_RDI);
}
// generate code do a switch statement based on a table of ip-relative offsets
@@ -4429,7 +4433,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* node)
GetEmitter()->emitIns_AR_R(INS_cmpxchg, size, tmpReg, addr->GetRegNum(), 0);
inst_JMP(EJ_jne, loop);
- gcInfo.gcMarkRegSetNpt(genRegMask(addr->GetRegNum()));
+ gcInfo.gcMarkGprRegNpt(addr->GetRegNum());
genProduceReg(node);
}
return;
@@ -5360,7 +5364,7 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node)
GetEmitter()->emitIns_R_ARX(INS_lea, emitTypeSize(node->TypeGet()), dstReg, baseReg, tmpReg, scale,
static_cast(node->gtElemOffset));
- gcInfo.gcMarkRegSetNpt(base->gtGetRegMask());
+ gcInfo.gcMarkRegSetNpt(base->gtGetGprRegMask());
genProduceReg(node);
}
@@ -5726,10 +5730,10 @@ void CodeGen::genCodeForSwap(GenTreeOp* tree)
// FP swap is not yet implemented (and should have NYI'd in LSRA)
assert(varTypeUsesIntReg(type1));
- regNumber oldOp1Reg = lcl1->GetRegNum();
- regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg);
- regNumber oldOp2Reg = lcl2->GetRegNum();
- regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg);
+ regNumber oldOp1Reg = lcl1->GetRegNum();
+ regMaskGpr oldOp1RegMask = genRegMask(oldOp1Reg);
+ regNumber oldOp2Reg = lcl2->GetRegNum();
+ regMaskGpr oldOp2RegMask = genRegMask(oldOp2Reg);
// We don't call genUpdateVarReg because we don't have a tree node with the new register.
varDsc1->SetRegNum(oldOp2Reg);
@@ -6026,15 +6030,16 @@ void CodeGen::genCall(GenTreeCall* call)
// We should not have GC pointers in killed registers live around the call.
// GC info for arg registers were cleared when consuming arg nodes above
// and LSRA should ensure it for other trashed registers.
- regMaskTP killMask = RBM_CALLEE_TRASH;
+ AllRegsMask killMask = compiler->AllRegsMask_CALLEE_TRASH;
+
if (call->IsHelperCall())
{
CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd);
killMask = compiler->compHelperCallKillSet(helpFunc);
}
- assert((gcInfo.gcRegGCrefSetCur & killMask) == 0);
- assert((gcInfo.gcRegByrefSetCur & killMask) == 0);
+ assert(!killMask.IsGprMaskPresent(gcInfo.gcRegGCrefSetCur));
+ assert(!killMask.IsGprMaskPresent(gcInfo.gcRegByrefSetCur));
#endif
var_types returnType = call->TypeGet();
@@ -6115,7 +6120,7 @@ void CodeGen::genCall(GenTreeCall* call)
// However, for minopts or debuggable code, we keep it live to support managed return value debugging.
if ((call->gtNext == nullptr) && compiler->opts.OptimizationEnabled())
{
- gcInfo.gcMarkRegSetNpt(RBM_INTRET);
+ gcInfo.gcMarkGprRegNpt(REG_INTRET);
}
#if defined(DEBUG) && defined(TARGET_X86)
@@ -6548,9 +6553,12 @@ void CodeGen::genJmpMethod(GenTree* jmp)
// Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live.
// Note that we cannot modify varDsc->GetRegNum() here because another basic block may not be expecting it.
// Therefore manually update life of varDsc->GetRegNum().
- regMaskTP tempMask = varDsc->lvRegMask();
- regSet.RemoveMaskVars(tempMask);
- gcInfo.gcMarkRegSetNpt(tempMask);
+ regMaskOnlyOne tempMask = varDsc->lvRegMask();
+ regSet.RemoveMaskVars(varDsc->TypeGet(), tempMask);
+ if (varTypeUsesIntReg(varDsc))
+ {
+ gcInfo.gcMarkRegSetNpt(tempMask);
+ }
if (compiler->lvaIsGCTracked(varDsc))
{
#ifdef DEBUG
@@ -6575,8 +6583,8 @@ void CodeGen::genJmpMethod(GenTree* jmp)
#endif
// Next move any un-enregistered register arguments back to their register.
- regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method.
- unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
+ regMaskGpr fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method.
+ unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
for (varNum = 0; varNum < compiler->info.compArgsCount; varNum++)
{
varDsc = compiler->lvaGetDesc(varNum);
@@ -6625,7 +6633,7 @@ void CodeGen::genJmpMethod(GenTree* jmp)
if (type0 != TYP_UNKNOWN)
{
GetEmitter()->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), varDsc->GetArgReg(), varNum, offset0);
- regSet.SetMaskVars(regSet.GetMaskVars() | genRegMask(varDsc->GetArgReg()));
+ regSet.AddMaskVars(varDsc->TypeGet(), genRegMask(varDsc->GetArgReg()));
gcInfo.gcMarkRegPtrVal(varDsc->GetArgReg(), type0);
}
@@ -6633,7 +6641,7 @@ void CodeGen::genJmpMethod(GenTree* jmp)
{
GetEmitter()->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), varDsc->GetOtherArgReg(), varNum,
offset1);
- regSet.SetMaskVars(regSet.GetMaskVars() | genRegMask(varDsc->GetOtherArgReg()));
+ regSet.AddMaskVars(varDsc->TypeGet(), genRegMask(varDsc->GetOtherArgReg()));
gcInfo.gcMarkRegPtrVal(varDsc->GetOtherArgReg(), type1);
}
@@ -6679,7 +6687,7 @@ void CodeGen::genJmpMethod(GenTree* jmp)
// expecting it. Therefore manually update life of argReg. Note that GT_JMP marks the end of the
// basic block and after which reg life and gc info will be recomputed for the new block in
// genCodeForBBList().
- regSet.AddMaskVars(genRegMask(argReg));
+ regSet.AddMaskVars(varDsc->TypeGet(), genRegMask(argReg));
gcInfo.gcMarkRegPtrVal(argReg, loadType);
if (compiler->lvaIsGCTracked(varDsc))
{
@@ -6749,14 +6757,14 @@ void CodeGen::genJmpMethod(GenTree* jmp)
assert(compiler->info.compIsVarArgs);
assert(firstArgVarNum != BAD_VAR_NUM);
- regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
+ regMaskGpr remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
if (remainingIntArgMask != RBM_NONE)
{
GetEmitter()->emitDisableGC();
for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
{
- regNumber argReg = intArgRegs[argNum];
- regMaskTP argRegMask = genRegMask(argReg);
+ regNumber argReg = intArgRegs[argNum];
+ regMaskGpr argRegMask = genRegMask(argReg);
if ((remainingIntArgMask & argRegMask) != 0)
{
@@ -8351,7 +8359,9 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
regNumber simdTmpReg = REG_NA;
if (putArgStk->AvailableTempRegCount() != 0)
{
- regMaskTP rsvdRegs = putArgStk->gtRsvdRegs;
+ regMaskOnlyOne rsvdRegs = putArgStk->gtRsvdRegs;
+ assert(compiler->IsOnlyOneRegMask(rsvdRegs));
+
if ((rsvdRegs & RBM_ALLINT) != 0)
{
intTmpReg = putArgStk->GetSingleTempReg(RBM_ALLINT);
@@ -9050,10 +9060,10 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
void* addr = nullptr;
void* pAddr = nullptr;
- emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
- addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr);
- regNumber callTarget = REG_NA;
- regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
+ emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
+ addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr);
+ regNumber callTarget = REG_NA;
+ CONSTREF_AllRegsMask killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
if (!addr)
{
@@ -9082,16 +9092,16 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
{
// If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
// this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
- callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
- regMaskTP callTargetMask = genRegMask(callTargetReg);
- noway_assert((callTargetMask & killMask) == callTargetMask);
+ callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
+ regMaskGpr callTargetMask = genRegMask(callTargetReg);
+ noway_assert((callTargetMask & killMask.gprRegs()) == callTargetMask);
}
else
{
// The call target must not overwrite any live variable, though it may not be in the
// kill set for the call.
- regMaskTP callTargetMask = genRegMask(callTargetReg);
- noway_assert((callTargetMask & regSet.GetMaskVars()) == RBM_NONE);
+ regMaskGpr callTargetMask = genRegMask(callTargetReg);
+ noway_assert((callTargetMask & regSet.GetGprMaskVars()) == RBM_NONE);
}
#endif
@@ -9505,7 +9515,9 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
// If initReg is trashed, either because it was an arg to the enter
// callback, or because the enter callback itself trashes it, then it needs
// to be zero'ed again before using.
- if (((RBM_PROFILER_ENTER_TRASH | RBM_ARG_0 | RBM_ARG_1) & genRegMask(initReg)) != 0)
+ AllRegsMask profileEnterTrash = compiler->AllRegsMask_PROFILER_ENTER_TRASH;
+ profileEnterTrash.AddRegMaskForType((RBM_ARG_0 | RBM_ARG_1), TYP_INT);
+ if (profileEnterTrash.IsRegNumInMask(initReg))
{
*pInitRegZeroed = false;
}
@@ -9544,7 +9556,9 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
// If initReg is trashed, either because it was an arg to the enter
// callback, or because the enter callback itself trashes it, then it needs
// to be zero'ed again before using.
- if (((RBM_PROFILER_ENTER_TRASH | RBM_PROFILER_ENTER_ARG_0 | RBM_PROFILER_ENTER_ARG_1) & genRegMask(initReg)) != 0)
+ AllRegsMask profileEnterTrash = compiler->AllRegsMask_PROFILER_ENTER_TRASH;
+ profileEnterTrash.AddRegMaskForType((RBM_PROFILER_ENTER_ARG_0 | RBM_PROFILER_ENTER_ARG_1), TYP_INT);
+ if (profileEnterTrash.IsRegNumInMask(initReg))
{
*pInitRegZeroed = false;
}
@@ -9584,8 +9598,8 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper)
// registers that profiler callback kills.
if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaGetDesc(compiler->info.compThisArg)->lvIsInReg())
{
- regMaskTP thisPtrMask = genRegMask(compiler->lvaGetDesc(compiler->info.compThisArg)->GetRegNum());
- noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == 0);
+ regNumber thisPtrReg = compiler->lvaGetDesc(compiler->info.compThisArg)->GetRegNum();
+ noway_assert(!compiler->AllRegsMask_PROFILER_LEAVE_TRASH.IsRegNumInMask(thisPtrReg));
}
// At this point return value is computed and stored in RAX or XMM0.
@@ -9699,13 +9713,12 @@ void CodeGen::genOSRRecordTier0CalleeSavedRegistersAndFrame()
// Figure out which set of int callee saves was already saved by Tier0.
// Emit appropriate unwind.
//
- PatchpointInfo* const patchpointInfo = compiler->info.compPatchpointInfo;
- regMaskTP const tier0CalleeSaves = (regMaskTP)patchpointInfo->CalleeSaveRegisters();
- regMaskTP tier0IntCalleeSaves = tier0CalleeSaves & RBM_OSR_INT_CALLEE_SAVED;
+ PatchpointInfo* const patchpointInfo = compiler->info.compPatchpointInfo;
+ regMaskGpr tier0IntCalleeSaves = patchpointInfo->CalleeSaveGprRegisters() & RBM_OSR_INT_CALLEE_SAVED;
int const tier0IntCalleeSaveUsedSize = genCountBits(tier0IntCalleeSaves) * REGSIZE_BYTES;
JITDUMP("--OSR--- tier0 has already saved ");
- JITDUMPEXEC(dspRegMask(tier0IntCalleeSaves));
+ JITDUMPEXEC(dspRegMask(tier0IntCalleeSaves, RBM_NONE));
JITDUMP("\n");
// We must account for the Tier0 callee saves.
@@ -9723,7 +9736,7 @@ void CodeGen::genOSRRecordTier0CalleeSavedRegistersAndFrame()
//
for (regNumber reg = REG_INT_LAST; tier0IntCalleeSaves != RBM_NONE; reg = REG_PREV(reg))
{
- regMaskTP regBit = genRegMask(reg);
+ regMaskGpr regBit = genRegMask(reg);
if ((regBit & tier0IntCalleeSaves) != 0)
{
@@ -9768,7 +9781,7 @@ void CodeGen::genOSRSaveRemainingCalleeSavedRegisters()
// x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack
// here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not
// here.
- regMaskTP rsPushRegs = regSet.rsGetModifiedOsrIntCalleeSavedRegsMask();
+ regMaskGpr rsPushRegs = regSet.rsGetModifiedGprRegsMask() & RBM_OSR_INT_CALLEE_SAVED;
#if ETW_EBP_FRAMED
if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE))
@@ -9779,19 +9792,18 @@ void CodeGen::genOSRSaveRemainingCalleeSavedRegisters()
// Figure out which set of int callee saves still needs saving.
//
- PatchpointInfo* const patchpointInfo = compiler->info.compPatchpointInfo;
- regMaskTP const tier0CalleeSaves = (regMaskTP)patchpointInfo->CalleeSaveRegisters();
- regMaskTP tier0IntCalleeSaves = tier0CalleeSaves & RBM_OSR_INT_CALLEE_SAVED;
+ PatchpointInfo* const patchpointInfo = compiler->info.compPatchpointInfo;
+ regMaskGpr tier0IntCalleeSaves = patchpointInfo->CalleeSaveGprRegisters() & RBM_OSR_INT_CALLEE_SAVED;
unsigned const tier0IntCalleeSaveUsedSize = genCountBits(tier0IntCalleeSaves) * REGSIZE_BYTES;
- regMaskTP const osrIntCalleeSaves = rsPushRegs & RBM_OSR_INT_CALLEE_SAVED;
- regMaskTP osrAdditionalIntCalleeSaves = osrIntCalleeSaves & ~tier0IntCalleeSaves;
+ regMaskGpr const osrIntCalleeSaves = rsPushRegs & RBM_OSR_INT_CALLEE_SAVED;
+ regMaskGpr osrAdditionalIntCalleeSaves = osrIntCalleeSaves & ~tier0IntCalleeSaves;
JITDUMP("---OSR--- int callee saves are ");
- JITDUMPEXEC(dspRegMask(osrIntCalleeSaves));
+ JITDUMPEXEC(dspRegMask(osrIntCalleeSaves, RBM_NONE));
JITDUMP("; tier0 already saved ");
- JITDUMPEXEC(dspRegMask(tier0IntCalleeSaves));
+ JITDUMPEXEC(dspRegMask(tier0IntCalleeSaves, RBM_NONE));
JITDUMP("; so only saving ");
- JITDUMPEXEC(dspRegMask(osrAdditionalIntCalleeSaves));
+ JITDUMPEXEC(dspRegMask(osrAdditionalIntCalleeSaves, RBM_NONE));
JITDUMP("\n");
// These remaining callee saves will be stored in the Tier0 callee save area
@@ -9807,14 +9819,14 @@ void CodeGen::genOSRSaveRemainingCalleeSavedRegisters()
// The tier0 frame is always an RBP frame, so the OSR method should never need to save RBP.
//
- assert((tier0CalleeSaves & RBM_FPBASE) == RBM_FPBASE);
+ assert((tier0IntCalleeSaves & RBM_FPBASE) == RBM_FPBASE);
assert((osrAdditionalIntCalleeSaves & RBM_FPBASE) == RBM_NONE);
// The OSR method must use MOVs to save additional callee saves.
//
for (regNumber reg = REG_INT_LAST; osrAdditionalIntCalleeSaves != RBM_NONE; reg = REG_PREV(reg))
{
- regMaskTP regBit = genRegMask(reg);
+ regMaskGpr regBit = genRegMask(reg);
if ((regBit & osrAdditionalIntCalleeSaves) != 0)
{
@@ -9849,7 +9861,7 @@ void CodeGen::genPushCalleeSavedRegisters()
// x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack
// here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not
// here.
- regMaskTP rsPushRegs = regSet.rsGetModifiedIntCalleeSavedRegsMask();
+ regMaskGpr rsPushRegs = regSet.rsGetModifiedIntCalleeSavedRegsMask();
#if ETW_EBP_FRAMED
if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE))
@@ -9869,7 +9881,7 @@ void CodeGen::genPushCalleeSavedRegisters()
{
printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ",
compiler->compCalleeRegsPushed, genCountBits(rsPushRegs));
- dspRegMask(rsPushRegs);
+ dspRegMask(rsPushRegs, RBM_NONE);
printf("\n");
assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs));
}
@@ -9879,7 +9891,7 @@ void CodeGen::genPushCalleeSavedRegisters()
// and all the other code that expects it to be in this order.
for (regNumber reg = REG_INT_LAST; rsPushRegs != RBM_NONE; reg = REG_PREV(reg))
{
- regMaskTP regBit = genRegMask(reg);
+ regMaskGpr regBit = genRegMask(reg);
if ((regBit & rsPushRegs) != 0)
{
@@ -9907,10 +9919,10 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
//
if (doesSupersetOfNormalPops)
{
- regMaskTP rsPopRegs = regSet.rsGetModifiedOsrIntCalleeSavedRegsMask();
- regMaskTP tier0CalleeSaves =
- ((regMaskTP)compiler->info.compPatchpointInfo->CalleeSaveRegisters()) & RBM_OSR_INT_CALLEE_SAVED;
- regMaskTP additionalCalleeSaves = rsPopRegs & ~tier0CalleeSaves;
+ regMaskGpr rsPopRegs = regSet.rsGetModifiedGprRegsMask() & RBM_OSR_INT_CALLEE_SAVED;
+ regMaskGpr tier0CalleeSaves =
+ ((regMaskGpr)compiler->info.compPatchpointInfo->CalleeSaveGprRegisters()) & RBM_OSR_INT_CALLEE_SAVED;
+ regMaskGpr additionalCalleeSaves = rsPopRegs & ~tier0CalleeSaves;
// Registers saved by the OSR prolog.
//
@@ -9927,7 +9939,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
// Registers saved by a normal prolog
//
- regMaskTP rsPopRegs = regSet.rsGetModifiedIntCalleeSavedRegsMask();
+ regMaskGpr rsPopRegs = regSet.rsGetModifiedIntCalleeSavedRegsMask();
const unsigned popCount = genPopCalleeSavedRegistersFromMask(rsPopRegs);
noway_assert(compiler->compCalleeRegsPushed == popCount);
}
@@ -9936,8 +9948,10 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
// genPopCalleeSavedRegistersFromMask: pop specified set of callee saves
// in the "standard" order
//
-unsigned CodeGen::genPopCalleeSavedRegistersFromMask(regMaskTP rsPopRegs)
+unsigned CodeGen::genPopCalleeSavedRegistersFromMask(regMaskGpr rsPopRegs)
{
+ assert(compiler->IsGprRegMask(rsPopRegs));
+
unsigned popCount = 0;
if ((rsPopRegs & RBM_EBX) != 0)
{
@@ -10040,10 +10054,10 @@ void CodeGen::genFnEpilog(BasicBlock* block)
dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
printf(", gcRegGCrefSetCur=");
printRegMaskInt(gcInfo.gcRegGCrefSetCur);
- GetEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
+ GetEmitter()->emitDispGprRegSet(gcInfo.gcRegGCrefSetCur);
printf(", gcRegByrefSetCur=");
printRegMaskInt(gcInfo.gcRegByrefSetCur);
- GetEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
+ GetEmitter()->emitDispGprRegSet(gcInfo.gcRegByrefSetCur);
printf("\n");
}
#endif
@@ -10112,15 +10126,14 @@ void CodeGen::genFnEpilog(BasicBlock* block)
//
PatchpointInfo* const patchpointInfo = compiler->info.compPatchpointInfo;
- regMaskTP const tier0CalleeSaves = (regMaskTP)patchpointInfo->CalleeSaveRegisters();
- regMaskTP const tier0IntCalleeSaves = tier0CalleeSaves & RBM_OSR_INT_CALLEE_SAVED;
- regMaskTP const osrIntCalleeSaves = regSet.rsGetModifiedOsrIntCalleeSavedRegsMask();
- regMaskTP const allIntCalleeSaves = osrIntCalleeSaves | tier0IntCalleeSaves;
- unsigned const tier0FrameSize = patchpointInfo->TotalFrameSize() + REGSIZE_BYTES;
- unsigned const tier0IntCalleeSaveUsedSize = genCountBits(allIntCalleeSaves) * REGSIZE_BYTES;
- unsigned const osrCalleeSaveSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
- unsigned const osrFramePointerSize = isFramePointerUsed() ? REGSIZE_BYTES : 0;
- unsigned const osrAdjust =
+ regMaskGpr const tier0IntCalleeSaves = patchpointInfo->CalleeSaveGprRegisters() & RBM_OSR_INT_CALLEE_SAVED;
+ regMaskGpr const osrIntCalleeSaves = regSet.rsGetModifiedGprRegsMask() & RBM_OSR_INT_CALLEE_SAVED;
+ regMaskGpr const allIntCalleeSaves = osrIntCalleeSaves | tier0IntCalleeSaves;
+ unsigned const tier0FrameSize = patchpointInfo->TotalFrameSize() + REGSIZE_BYTES;
+ unsigned const tier0IntCalleeSaveUsedSize = genCountBits(allIntCalleeSaves) * REGSIZE_BYTES;
+ unsigned const osrCalleeSaveSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
+ unsigned const osrFramePointerSize = isFramePointerUsed() ? REGSIZE_BYTES : 0;
+ unsigned const osrAdjust =
tier0FrameSize - tier0IntCalleeSaveUsedSize + osrCalleeSaveSize + osrFramePointerSize;
JITDUMP("OSR epilog adjust factors: tier0 frame %u, tier0 callee saves -%u, osr callee saves %u, osr "
@@ -10141,7 +10154,7 @@ void CodeGen::genFnEpilog(BasicBlock* block)
if ((frameSize == TARGET_POINTER_SIZE) && !compiler->compJmpOpUsed)
{
inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
- regSet.verifyRegUsed(REG_ECX);
+ regSet.verifyGprRegUsed(REG_ECX);
}
else
#endif // TARGET_X86
@@ -10206,7 +10219,7 @@ void CodeGen::genFnEpilog(BasicBlock* block)
// lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES]
needLea = true;
}
- else if (!regSet.rsRegsModified(RBM_CALLEE_SAVED))
+ else if (!regSet.rsRegsModified(compiler->AllRegsMask_CALLEE_SAVED))
{
if (compiler->compLclFrameSize != 0)
{
@@ -10229,7 +10242,7 @@ void CodeGen::genFnEpilog(BasicBlock* block)
{
// "pop ecx" will make ESP point to the callee-saved registers
inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
- regSet.verifyRegUsed(REG_ECX);
+ regSet.verifyGprRegUsed(REG_ECX);
}
#endif // TARGET_X86
else
@@ -10368,7 +10381,7 @@ void CodeGen::genFnEpilog(BasicBlock* block)
indCallReg = REG_RAX;
addr = nullptr;
instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr);
- regSet.verifyRegUsed(indCallReg);
+ regSet.verifyGprRegUsed(indCallReg);
}
}
else
@@ -10553,7 +10566,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
// Callee saved int registers are pushed to stack.
genPushCalleeSavedRegisters();
- regMaskTP maskArgRegsLiveIn;
+ regMaskGpr maskArgRegsLiveIn;
if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
{
maskArgRegsLiveIn = RBM_ARG_0;
@@ -10583,7 +10596,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
GetEmitter()->emitIns_R_AR(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_ARG_0, genFuncletInfo.fiPSP_slot_InitialSP_offset);
- regSet.verifyRegUsed(REG_FPBASE);
+ regSet.verifyGprRegUsed(REG_FPBASE);
GetEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, genFuncletInfo.fiPSP_slot_InitialSP_offset);
@@ -10642,7 +10655,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
assert(isFramePointerUsed());
assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be
// finalized
- assert(compiler->compCalleeFPRegsSavedMask != (regMaskTP)-1); // The float registers to be preserved is finalized
+ assert(compiler->compCalleeFPRegsSavedMask != (regMaskFloat)-1); // The float registers to be preserved is finalized
// Even though lvaToInitialSPRelativeOffset() depends on compLclFrameSize,
// that's ok, because we're figuring out an offset in the parent frame.
@@ -11100,10 +11113,10 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu
// funclet frames: this will be FuncletInfo.fiSpDelta.
void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize)
{
- regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
+ regMaskFloat regMask = compiler->compCalleeFPRegsSavedMask;
// Only callee saved floating point registers should be in regMask
- assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
+ assert(compiler->IsFloatRegMask(regMask));
if (GetEmitter()->ContainsCallNeedingVzeroupper() && !GetEmitter()->Contains256bitOrMoreAVX())
{
@@ -11141,7 +11154,7 @@ void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize)
for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
{
- regMaskTP regBit = genRegMask(reg);
+ regMaskFloat regBit = genRegMask(reg);
if ((regBit & regMask) != 0)
{
// ABI requires us to preserve lower 128-bits of YMM register.
@@ -11163,10 +11176,10 @@ void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize)
// funclet frames: this will be FuncletInfo.fiSpDelta.
void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
{
- regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
+ regMaskFloat regMask = compiler->compCalleeFPRegsSavedMask;
// Only callee saved floating point registers should be in regMask
- assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
+ assert(compiler->IsFloatRegMask(regMask));
if (GetEmitter()->Contains256bitOrMoreAVX())
{
@@ -11215,7 +11228,7 @@ void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
{
- regMaskTP regBit = genRegMask(reg);
+ regMaskFloat regBit = genRegMask(reg);
if ((regBit & regMask) != 0)
{
// ABI requires us to restore lower 128-bits of YMM register.
diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index c834a4f6f27810..9a850538649b07 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -2019,6 +2019,201 @@ void Compiler::compInit(ArenaAllocator* pAlloc,
new (&Metrics, jitstd::placement_t()) JitMetrics();
}
+void Compiler::compInitAllRegsMask()
+{
+#if defined(TARGET_AMD64)
+ rbmAllFloat = RBM_ALLFLOAT_INIT;
+ rbmFltCalleeTrash = RBM_FLT_CALLEE_TRASH_INIT;
+ cntCalleeTrashFloat = CNT_CALLEE_TRASH_FLOAT_INIT;
+
+ if (canUseEvexEncoding())
+ {
+ rbmAllFloat |= RBM_HIGHFLOAT;
+ rbmFltCalleeTrash |= RBM_HIGHFLOAT;
+ cntCalleeTrashFloat += CNT_CALLEE_TRASH_HIGHFLOAT;
+ }
+#endif // TARGET_AMD64
+
+#if defined(TARGET_XARCH)
+ rbmAllMask = RBM_ALLMASK_INIT;
+ rbmMskCalleeTrash = RBM_MSK_CALLEE_TRASH_INIT;
+ cntCalleeTrashMask = CNT_CALLEE_TRASH_MASK_INIT;
+
+ if (canUseEvexEncoding())
+ {
+ rbmAllMask |= RBM_ALLMASK_EVEX;
+ rbmMskCalleeTrash |= RBM_MSK_CALLEE_TRASH_EVEX;
+ cntCalleeTrashMask += CNT_CALLEE_TRASH_MASK_EVEX;
+ }
+#endif // TARGET_XARCH
+
+ AllRegsMask_NONE = AllRegsMask();
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ AllRegsMask_CALLEE_SAVED = AllRegsMask(RBM_INT_CALLEE_SAVED, RBM_FLT_CALLEE_SAVED, RBM_MSK_CALLEE_SAVED);
+ AllRegsMask_CALLEE_TRASH = AllRegsMask(RBM_INT_CALLEE_TRASH, RBM_FLT_CALLEE_TRASH, RBM_MSK_CALLEE_TRASH);
+#else
+ AllRegsMask_CALLEE_SAVED = AllRegsMask(RBM_CALLEE_SAVED);
+ AllRegsMask_CALLEE_TRASH = AllRegsMask(RBM_CALLEE_TRASH);
+#endif
+
+#if defined(TARGET_X86)
+
+ AllRegsMask_CALLEE_TRASH_NOGC = GprRegsMask(RBM_EDX);
+ // The registers trashed by profiler enter/leave/tailcall hook
+ // See vm\i386\asmhelpers.asm for more details.
+ AllRegsMask_PROFILER_ENTER_TRASH = AllRegsMask_NONE;
+ AllRegsMask_PROFILER_LEAVE_TRASH = AllRegsMask();
+ AllRegsMask_PROFILER_TAILCALL_TRASH = (AllRegsMask_CALLEE_TRASH & GprRegsMask(~RBM_ARG_REGS));
+
+#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
+ AllRegsMask_CALLEE_TRASH_WRITEBARRIER = GprRegsMask(RBM_EAX | RBM_EDX);
+#else // !FEATURE_USE_ASM_GC_WRITE_BARRIERS
+ AllRegsMask_CALLEE_TRASH_WRITEBARRIER = AllRegsMask_CALLEE_TRASH;
+#endif // !FEATURE_USE_ASM_GC_WRITE_BARRIERS
+
+ // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF.
+ AllRegsMask_CALLEE_GCTRASH_WRITEBARRIER = GprRegsMask(RBM_EDX);
+
+ // Registers killed by CORINFO_HELP_ASSIGN_BYREF.
+ AllRegsMask_CALLEE_TRASH_WRITEBARRIER_BYREF = GprRegsMask(RBM_ESI | RBM_EDI | RBM_ECX);
+
+ // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_BYREF.
+ // Note that RDI and RSI are still valid byref pointers after this helper call, despite their value being changed.
+ AllRegsMask_CALLEE_GCTRASH_WRITEBARRIER_BYREF = GprRegsMask(RBM_ECX);
+
+ // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper
+ AllRegsMask_STOP_FOR_GC_TRASH = AllRegsMask_CALLEE_TRASH;
+
+ // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. On x86, this helper has a custom calling
+ // convention that takes EDI as argument (but doesn't trash it), trashes EAX, and returns ESI.
+ AllRegsMask_INIT_PINVOKE_FRAME_TRASH = GprRegsMask(RBM_PINVOKE_SCRATCH | RBM_PINVOKE_TCB);
+
+ AllRegsMask_VALIDATE_INDIRECT_CALL_TRASH = GprRegsMask(RBM_INT_CALLEE_TRASH & ~RBM_ECX);
+ AllRegsMask_EDX = GprRegsMask(RBM_EDX);
+
+#elif defined(TARGET_AMD64)
+ AllRegsMask_CALLEE_TRASH_NOGC = AllRegsMask_CALLEE_TRASH;
+ // Registers killed by CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF.
+ AllRegsMask_CALLEE_TRASH_WRITEBARRIER = AllRegsMask_CALLEE_TRASH_NOGC;
+ // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF.
+ AllRegsMask_CALLEE_GCTRASH_WRITEBARRIER = AllRegsMask_CALLEE_TRASH_NOGC;
+ AllRegsMask_CALLEE_GCTRASH_WRITEBARRIER_BYREF = GprRegsMask(RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF);
+ AllRegsMask_CALLEE_TRASH_WRITEBARRIER_BYREF = GprRegsMask(RBM_CALLEE_TRASH_WRITEBARRIER_BYREF);
+
+#ifdef UNIX_AMD64_ABI
+
+ // See vm\amd64\unixasmhelpers.S for more details.
+ //
+ // On Unix a struct of size >=9 and <=16 bytes in size is returned in two return registers.
+ // The return registers could be any two from the set { RAX, RDX, XMM0, XMM1 }.
+ // STOP_FOR_GC helper preserves all the 4 possible return registers.
+ AllRegsMask_STOP_FOR_GC_TRASH =
+ AllRegsMask(RBM_INT_CALLEE_TRASH & ~(RBM_INTRET | RBM_INTRET_1),
+ (RBM_FLT_CALLEE_TRASH & ~(RBM_FLOATRET | RBM_FLOATRET_1)), RBM_MSK_CALLEE_TRASH);
+ AllRegsMask_PROFILER_ENTER_TRASH = AllRegsMask((RBM_INT_CALLEE_TRASH & ~RBM_ARG_REGS),
+ (RBM_FLT_CALLEE_TRASH & ~RBM_FLTARG_REGS), RBM_MSK_CALLEE_TRASH);
+#else
+ // See vm\amd64\asmhelpers.asm for more details.
+ AllRegsMask_STOP_FOR_GC_TRASH =
+ AllRegsMask((RBM_INT_CALLEE_TRASH & ~RBM_INTRET), (RBM_FLT_CALLEE_TRASH & ~RBM_FLOATRET), RBM_MSK_CALLEE_TRASH);
+ AllRegsMask_PROFILER_ENTER_TRASH = AllRegsMask_CALLEE_TRASH;
+#endif // UNIX_AMD64_ABI
+
+ AllRegsMask_PROFILER_LEAVE_TRASH = AllRegsMask_STOP_FOR_GC_TRASH;
+ AllRegsMask_PROFILER_TAILCALL_TRASH = AllRegsMask_PROFILER_LEAVE_TRASH;
+
+ // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
+ AllRegsMask_INIT_PINVOKE_FRAME_TRASH = AllRegsMask_CALLEE_TRASH;
+ AllRegsMask_VALIDATE_INDIRECT_CALL_TRASH = GprRegsMask(RBM_VALIDATE_INDIRECT_CALL_TRASH);
+
+#elif defined(TARGET_ARM)
+
+ AllRegsMask_CALLEE_TRASH_NOGC = GprRegsMask(RBM_CALLEE_TRASH_NOGC);
+ AllRegsMask_PROFILER_ENTER_TRASH = AllRegsMask_NONE;
+
+ // Registers killed by CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF.
+ AllRegsMask_CALLEE_TRASH_WRITEBARRIER = GprRegsMask(RBM_R0 | RBM_R3 | RBM_LR | RBM_DEFAULT_HELPER_CALL_TARGET);
+
+ // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF.
+ AllRegsMask_CALLEE_GCTRASH_WRITEBARRIER = AllRegsMask_CALLEE_TRASH_WRITEBARRIER;
+
+ // Registers killed by CORINFO_HELP_ASSIGN_BYREF.
+ AllRegsMask_CALLEE_TRASH_WRITEBARRIER_BYREF =
+ GprRegsMask(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF | RBM_CALLEE_TRASH_NOGC);
+
+ // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_BYREF.
+ // Note that r0 and r1 are still valid byref pointers after this helper call, despite their value being changed.
+ AllRegsMask_CALLEE_GCTRASH_WRITEBARRIER_BYREF = AllRegsMask_CALLEE_TRASH_NOGC;
+ AllRegsMask_PROFILER_RET_SCRATCH = GprRegsMask(RBM_R2);
+ // While REG_PROFILER_RET_SCRATCH is not trashed by the method, the register allocator must
+ // consider it killed by the return.
+ AllRegsMask_PROFILER_LEAVE_TRASH = AllRegsMask_PROFILER_RET_SCRATCH;
+ AllRegsMask_PROFILER_TAILCALL_TRASH = AllRegsMask_NONE;
+ // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper (JIT_RareDisableHelper).
+ // See vm\arm\amshelpers.asm for more details.
+ AllRegsMask_STOP_FOR_GC_TRASH =
+ AllRegsMask((RBM_INT_CALLEE_TRASH & ~(RBM_LNGRET | RBM_R7 | RBM_R8 | RBM_R11)),
+ (RBM_FLT_CALLEE_TRASH & ~(RBM_DOUBLERET | RBM_F2 | RBM_F3 | RBM_F4 | RBM_F5 | RBM_F6 | RBM_F7)));
+ // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
+ AllRegsMask_INIT_PINVOKE_FRAME_TRASH =
+ (AllRegsMask_CALLEE_TRASH | GprRegsMask(RBM_PINVOKE_TCB | RBM_PINVOKE_SCRATCH));
+
+ AllRegsMask_VALIDATE_INDIRECT_CALL_TRASH = GprRegsMask(RBM_INT_CALLEE_TRASH);
+
+#elif defined(TARGET_ARM64)
+
+ AllRegsMask_CALLEE_TRASH_NOGC = GprRegsMask(RBM_CALLEE_TRASH_NOGC);
+ AllRegsMask_PROFILER_ENTER_TRASH = AllRegsMask((RBM_INT_CALLEE_TRASH & ~(RBM_ARG_REGS | RBM_ARG_RET_BUFF | RBM_FP)),
+ (RBM_FLT_CALLEE_TRASH & ~RBM_FLTARG_REGS), RBM_MSK_CALLEE_TRASH);
+ // Registers killed by CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF.
+ AllRegsMask_CALLEE_TRASH_WRITEBARRIER = GprRegsMask(RBM_R14 | RBM_CALLEE_TRASH_NOGC);
+
+ // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF.
+ AllRegsMask_CALLEE_GCTRASH_WRITEBARRIER = AllRegsMask_CALLEE_TRASH_NOGC;
+
+ // Registers killed by CORINFO_HELP_ASSIGN_BYREF.
+ AllRegsMask_CALLEE_TRASH_WRITEBARRIER_BYREF =
+ GprRegsMask(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF | RBM_CALLEE_TRASH_NOGC);
+
+ // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_BYREF.
+ // Note that x13 and x14 are still valid byref pointers after this helper call, despite their value being changed.
+ AllRegsMask_CALLEE_GCTRASH_WRITEBARRIER_BYREF = AllRegsMask_CALLEE_TRASH_NOGC;
+
+ AllRegsMask_PROFILER_LEAVE_TRASH = AllRegsMask_PROFILER_ENTER_TRASH;
+ AllRegsMask_PROFILER_TAILCALL_TRASH = AllRegsMask_PROFILER_ENTER_TRASH;
+
+ // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper
+ AllRegsMask_STOP_FOR_GC_TRASH = AllRegsMask_CALLEE_TRASH;
+ // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
+ AllRegsMask_INIT_PINVOKE_FRAME_TRASH = AllRegsMask_CALLEE_TRASH;
+ AllRegsMask_VALIDATE_INDIRECT_CALL_TRASH = GprRegsMask(RBM_VALIDATE_INDIRECT_CALL_TRASH);
+#endif
+
+#if defined(TARGET_ARM)
+ // profiler scratch remains gc live
+ AllRegsMask_PROF_FNC_LEAVE = AllRegsMask_PROFILER_LEAVE_TRASH & ~AllRegsMask_PROFILER_RET_SCRATCH;
+#else
+ AllRegsMask_PROF_FNC_LEAVE = AllRegsMask_PROFILER_LEAVE_TRASH;
+#endif // TARGET_ARM
+
+#ifdef TARGET_XARCH
+
+ // Make sure we copy the register info and initialize the
+ // trash regs after the underlying fields are initialized
+
+ const regMaskTP vtCalleeTrashRegs[TYP_COUNT]{
+#define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) ctr,
+#include "typelist.h"
+#undef DEF_TP
+ };
+ memcpy(varTypeCalleeTrashRegs, vtCalleeTrashRegs, sizeof(regMaskTP) * TYP_COUNT);
+
+ if (codeGen != nullptr)
+ {
+ codeGen->CopyRegisterInfo();
+ }
+#endif // TARGET_XARCH
+}
/*****************************************************************************
*
* Destructor
@@ -3456,44 +3651,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
opts.compJitSaveFpLrWithCalleeSavedRegisters = JitConfig.JitSaveFpLrWithCalleeSavedRegisters();
}
#endif // defined(DEBUG) && defined(TARGET_ARM64)
-
-#if defined(TARGET_AMD64)
- rbmAllFloat = RBM_ALLFLOAT_INIT;
- rbmFltCalleeTrash = RBM_FLT_CALLEE_TRASH_INIT;
- cntCalleeTrashFloat = CNT_CALLEE_TRASH_FLOAT_INIT;
-
- if (canUseEvexEncoding())
- {
- rbmAllFloat |= RBM_HIGHFLOAT;
- rbmFltCalleeTrash |= RBM_HIGHFLOAT;
- cntCalleeTrashFloat += CNT_CALLEE_TRASH_HIGHFLOAT;
- }
-#endif // TARGET_AMD64
-
-#if defined(TARGET_XARCH)
- rbmAllMask = RBM_ALLMASK_INIT;
- rbmMskCalleeTrash = RBM_MSK_CALLEE_TRASH_INIT;
- cntCalleeTrashMask = CNT_CALLEE_TRASH_MASK_INIT;
-
- if (canUseEvexEncoding())
- {
- rbmAllMask |= RBM_ALLMASK_EVEX;
- rbmMskCalleeTrash |= RBM_MSK_CALLEE_TRASH_EVEX;
- cntCalleeTrashMask += CNT_CALLEE_TRASH_MASK_EVEX;
- }
-
- // Make sure we copy the register info and initialize the
- // trash regs after the underlying fields are initialized
-
- const regMaskTP vtCalleeTrashRegs[TYP_COUNT]{
-#define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) ctr,
-#include "typelist.h"
-#undef DEF_TP
- };
- memcpy(varTypeCalleeTrashRegs, vtCalleeTrashRegs, sizeof(regMaskTP) * TYP_COUNT);
-
- codeGen->CopyRegisterInfo();
-#endif // TARGET_XARCH
}
#ifdef DEBUG
@@ -3735,6 +3892,24 @@ bool Compiler::compPromoteFewerStructs(unsigned lclNum)
return rejectThisPromo;
}
+void Compiler::dumpRegMask(regMaskOnlyOne mask, var_types type) const
+{
+ if (varTypeUsesIntReg(type))
+ {
+ dumpRegMask(AllRegsMask(mask, RBM_NONE, RBM_NONE));
+ }
+ else if (varTypeUsesFloatReg(type))
+ {
+ dumpRegMask(AllRegsMask(RBM_NONE, mask, RBM_NONE));
+ }
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ else
+ {
+ dumpRegMask(AllRegsMask(RBM_NONE, RBM_NONE, mask));
+ }
+#endif
+}
+
//------------------------------------------------------------------------
// dumpRegMask: display a register mask. For well-known sets of registers, display a well-known token instead of
// a potentially large number of registers.
@@ -3742,33 +3917,33 @@ bool Compiler::compPromoteFewerStructs(unsigned lclNum)
// Arguments:
// regs - The set of registers to display
//
-void Compiler::dumpRegMask(regMaskTP regs) const
+void Compiler::dumpRegMask(CONSTREF_AllRegsMask mask) const
{
- if (regs == RBM_ALLINT)
+ if (mask.gprRegs() == RBM_ALLINT)
{
printf("[allInt]");
}
- else if (regs == (RBM_ALLINT & ~RBM_FPBASE))
+ else if (mask.gprRegs() == (RBM_ALLINT & ~RBM_FPBASE))
{
printf("[allIntButFP]");
}
- else if (regs == RBM_ALLFLOAT)
+ else if (mask.floatRegs(this) == RBM_ALLFLOAT)
{
printf("[allFloat]");
}
- else if (regs == RBM_ALLDOUBLE)
+ else if (mask.floatRegs(this) == RBM_ALLDOUBLE)
{
printf("[allDouble]");
}
-#ifdef TARGET_XARCH
- else if (regs == RBM_ALLMASK)
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ else if ((RBM_ALLMASK != RBM_NONE) && (mask.predicateRegs(this) == RBM_ALLMASK))
{
printf("[allMask]");
}
-#endif // TARGET_XARCH
+#endif // FEATURE_MASKED_HW_INTRINSICS
else
{
- dspRegMask(regs);
+ dspRegMask(mask);
}
}
@@ -5926,11 +6101,11 @@ void Compiler::generatePatchpointInfo()
// Record callee save registers.
// Currently only needed for x64.
//
- regMaskTP rsPushRegs = codeGen->regSet.rsGetModifiedCalleeSavedRegsMask();
- rsPushRegs |= RBM_FPBASE;
- patchpointInfo->SetCalleeSaveRegisters((uint64_t)rsPushRegs);
+ regMaskGpr rsPushGprRegs = codeGen->regSet.rsGetModifiedCalleeSavedRegsMask().gprRegs();
+ rsPushGprRegs |= RBM_FPBASE;
+ patchpointInfo->SetCalleeSaveGprRegisters(rsPushGprRegs);
JITDUMP("--OSR-- Tier0 callee saves: ");
- JITDUMPEXEC(dspRegMask((regMaskTP)patchpointInfo->CalleeSaveRegisters()));
+ JITDUMPEXEC(dspRegMask(patchpointInfo->CalleeSaveGprRegisters(), RBM_NONE));
JITDUMP("\n");
#endif
@@ -6961,6 +7136,7 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
// compInitOptions will set the correct verbose flag.
compInitOptions(compileFlags);
+ compInitAllRegsMask();
if (!compIsForInlining() && !opts.altJit && opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ALT_JIT))
{
@@ -9500,7 +9676,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
* cVN, dVN : Display a ValueNum (call vnPrint()).
*
* The following don't require a Compiler* to work:
- * dRegMask : Display a regMaskTP (call dspRegMask(mask)).
+ * dRegMask : Display a regMaskOnlyOne (call dspRegMask(mask)).
* dBlockList : Display a BasicBlockList*.
*
* The following find an object in the IR and return it, as well as setting a global variable with the value that can
@@ -10458,7 +10634,7 @@ JITDBGAPI void __cdecl dVN(ValueNum vn)
cVN(JitTls::GetCompiler(), vn);
}
-JITDBGAPI void __cdecl dRegMask(regMaskTP mask)
+JITDBGAPI void __cdecl dRegMask(regMaskOnlyOne mask)
{
static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
printf("===================================================================== dRegMask %u\n", sequenceNumber++);
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 05b3e07ebf63d1..188cd2339c558e 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -1013,9 +1013,9 @@ class LclVarDsc
return lvIsRegCandidate() && (GetRegNum() != REG_STK);
}
- regMaskTP lvRegMask() const
+ regMaskOnlyOne lvRegMask() const
{
- regMaskTP regMask = RBM_NONE;
+ regMaskOnlyOne regMask = RBM_NONE;
if (GetRegNum() != REG_STK)
{
if (varTypeUsesFloatReg(this))
@@ -3796,9 +3796,9 @@ class Compiler
VARSET_TP lvaLongVars; // set of long (64-bit) variables
#endif
VARSET_TP lvaFloatVars; // set of floating-point (32-bit and 64-bit) or SIMD variables
-#ifdef TARGET_XARCH
+#ifdef FEATURE_MASKED_HW_INTRINSICS
VARSET_TP lvaMaskVars; // set of mask variables
-#endif // TARGET_XARCH
+#endif // FEATURE_MASKED_HW_INTRINSICS
unsigned lvaCurEpoch; // VarSets are relative to a specific set of tracked var indices.
// It that changes, this changes. VarSets from different epochs
@@ -3925,7 +3925,7 @@ class Compiler
unsigned lvaGetMaxSpillTempSize();
#ifdef TARGET_ARM
- bool lvaIsPreSpilled(unsigned lclNum, regMaskTP preSpillMask);
+ bool lvaIsPreSpilled(unsigned lclNum, regMaskGpr preSpillMask);
#endif // TARGET_ARM
void lvaAssignFrameOffsets(FrameLayoutState curState);
void lvaFixVirtualFrameOffsets();
@@ -6774,11 +6774,11 @@ class Compiler
int m_loopVarFPCount;
int m_hoistedFPExprCount;
-#ifdef TARGET_XARCH
+#ifdef FEATURE_MASKED_HW_INTRINSICS
int m_loopVarInOutMskCount;
int m_loopVarMskCount;
int m_hoistedMskExprCount;
-#endif // TARGET_XARCH
+#endif // FEATURE_MASKED_HW_INTRINSICS
// Get the VN cache for current loop
VNSet* GetHoistedInCurLoop(Compiler* comp)
@@ -8503,7 +8503,7 @@ class Compiler
// Gets a register mask that represent the kill set for a helper call since
// not all JIT Helper calls follow the standard ABI on the target architecture.
- regMaskTP compHelperCallKillSet(CorInfoHelpFunc helper);
+ CONSTREF_AllRegsMask compHelperCallKillSet(CorInfoHelpFunc helper);
/*
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
@@ -8524,6 +8524,28 @@ class Compiler
// Infrastructure functions: start/stop/reserve/emit.
//
+ bool IsGprRegMask(regMaskTP regMask)
+ {
+ return (regMask & RBM_ALLFLOAT) == RBM_NONE;
+ }
+
+ bool IsFloatRegMask(regMaskTP regMask)
+ {
+ return (regMask & ~RBM_ALLFLOAT) == RBM_NONE;
+ }
+
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ bool IsPredicateRegMask(regMaskTP regMask)
+ {
+ return (regMask & ~RBM_ALLMASK) == RBM_NONE;
+ }
+#endif // FEATURE_MASKED_HW_INTRINSICS
+
+ bool IsOnlyOneRegMask(regMaskTP regMask)
+ {
+ return (regMask == RBM_NONE) || (IsGprRegMask(regMask) != IsFloatRegMask(regMask));
+ }
+
void unwindBegProlog();
void unwindEndProlog();
void unwindBegEpilog();
@@ -8542,10 +8564,10 @@ class Compiler
void unwindSaveReg(regNumber reg, unsigned offset);
#if defined(TARGET_ARM)
- void unwindPushMaskInt(regMaskTP mask);
- void unwindPushMaskFloat(regMaskTP mask);
- void unwindPopMaskInt(regMaskTP mask);
- void unwindPopMaskFloat(regMaskTP mask);
+ void unwindPushMaskInt(regMaskGpr mask);
+ void unwindPushMaskFloat(regMaskFloat mask);
+ void unwindPopMaskInt(regMaskGpr mask);
+ void unwindPopMaskFloat(regMaskFloat mask);
void unwindBranch16(); // The epilog terminates with a 16-bit branch (e.g., "bx lr")
void unwindNop(unsigned codeSizeInBytes); // Generate unwind NOP code. 'codeSizeInBytes' is 2 or 4 bytes. Only
// called via unwindPadding().
@@ -8618,8 +8640,8 @@ class Compiler
#endif // UNIX_AMD64_ABI
#elif defined(TARGET_ARM)
- void unwindPushPopMaskInt(regMaskTP mask, bool useOpsize16);
- void unwindPushPopMaskFloat(regMaskTP mask);
+ void unwindPushPopMaskInt(regMaskGpr mask, bool useOpsize16);
+ void unwindPushPopMaskFloat(regMaskFloat mask);
#endif // TARGET_ARM
@@ -8628,7 +8650,7 @@ class Compiler
void createCfiCode(FuncInfoDsc* func, UNATIVE_OFFSET codeOffset, UCHAR opcode, short dwarfReg, INT offset = 0);
void unwindPushPopCFI(regNumber reg);
void unwindBegPrologCFI();
- void unwindPushPopMaskCFI(regMaskTP regMask, bool isFloat);
+ void unwindPushPopMaskCFI(regMaskOnlyOne regMask, bool isFloat);
void unwindAllocStackCFI(unsigned size);
void unwindSetFrameRegCFI(regNumber reg, unsigned offset);
void unwindEmitFuncCFI(FuncInfoDsc* func, void* pHotCode, void* pColdCode);
@@ -10502,7 +10524,7 @@ class Compiler
#if defined(TARGET_XARCH)
// Mask of callee saved float regs on stack.
- regMaskTP compCalleeFPRegsSavedMask;
+ regMaskFloat compCalleeFPRegsSavedMask;
#endif
#ifdef TARGET_AMD64
// Quirk for VS debug-launch scenario to work:
@@ -10549,6 +10571,7 @@ class Compiler
COMP_HANDLE compHnd,
CORINFO_METHOD_INFO* methodInfo,
InlineInfo* inlineInfo);
+ void compInitAllRegsMask();
void compDone();
static void compDisplayStaticSizes();
@@ -10871,7 +10894,8 @@ class Compiler
bool compJitHaltMethod();
- void dumpRegMask(regMaskTP regs) const;
+ void dumpRegMask(regMaskOnlyOne mask, var_types type) const;
+ void dumpRegMask(CONSTREF_AllRegsMask mask) const;
#endif
@@ -11145,8 +11169,8 @@ class Compiler
//
// Users of these values need to define four accessor functions:
//
- // regMaskTP get_RBM_ALLFLOAT();
- // regMaskTP get_RBM_FLT_CALLEE_TRASH();
+ // regMaskFloat get_RBM_ALLFLOAT();
+ // regMaskFloat get_RBM_FLT_CALLEE_TRASH();
// unsigned get_CNT_CALLEE_TRASH_FLOAT();
// unsigned get_AVAILABLE_REG_COUNT();
//
@@ -11155,16 +11179,16 @@ class Compiler
// This was done to avoid polluting all `targetXXX.h` macro definitions with a compiler parameter, where only
// TARGET_AMD64 requires one.
//
- regMaskTP rbmAllFloat;
- regMaskTP rbmFltCalleeTrash;
- unsigned cntCalleeTrashFloat;
+ regMaskFloat rbmAllFloat;
+ regMaskFloat rbmFltCalleeTrash;
+ unsigned cntCalleeTrashFloat;
public:
- FORCEINLINE regMaskTP get_RBM_ALLFLOAT() const
+ FORCEINLINE regMaskFloat get_RBM_ALLFLOAT() const
{
return this->rbmAllFloat;
}
- FORCEINLINE regMaskTP get_RBM_FLT_CALLEE_TRASH() const
+ FORCEINLINE regMaskFloat get_RBM_FLT_CALLEE_TRASH() const
{
return this->rbmFltCalleeTrash;
}
@@ -11183,8 +11207,8 @@ class Compiler
//
// Users of these values need to define four accessor functions:
//
- // regMaskTP get_RBM_ALLMASK();
- // regMaskTP get_RBM_MSK_CALLEE_TRASH();
+ // regMaskPredicate get_RBM_ALLMASK();
+ // regMaskPredicate get_RBM_MSK_CALLEE_TRASH();
// unsigned get_CNT_CALLEE_TRASH_MASK();
// unsigned get_AVAILABLE_REG_COUNT();
//
@@ -11193,17 +11217,17 @@ class Compiler
// This was done to avoid polluting all `targetXXX.h` macro definitions with a compiler parameter, where only
// TARGET_XARCH requires one.
//
- regMaskTP rbmAllMask;
- regMaskTP rbmMskCalleeTrash;
- unsigned cntCalleeTrashMask;
- regMaskTP varTypeCalleeTrashRegs[TYP_COUNT];
+ regMaskPredicate rbmAllMask;
+ regMaskPredicate rbmMskCalleeTrash;
+ unsigned cntCalleeTrashMask;
+ regMaskOnlyOne varTypeCalleeTrashRegs[TYP_COUNT];
public:
- FORCEINLINE regMaskTP get_RBM_ALLMASK() const
+ FORCEINLINE regMaskPredicate get_RBM_ALLMASK() const
{
return this->rbmAllMask;
}
- FORCEINLINE regMaskTP get_RBM_MSK_CALLEE_TRASH() const
+ FORCEINLINE regMaskPredicate get_RBM_MSK_CALLEE_TRASH() const
{
return this->rbmMskCalleeTrash;
}
@@ -11213,6 +11237,24 @@ class Compiler
}
#endif // TARGET_XARCH
+ AllRegsMask AllRegsMask_CALLEE_GCTRASH_WRITEBARRIER;
+ AllRegsMask AllRegsMask_CALLEE_GCTRASH_WRITEBARRIER_BYREF;
+ AllRegsMask AllRegsMask_CALLEE_SAVED;
+ AllRegsMask AllRegsMask_CALLEE_TRASH;
+ AllRegsMask AllRegsMask_CALLEE_TRASH_NOGC;
+ AllRegsMask AllRegsMask_CALLEE_TRASH_WRITEBARRIER;
+ AllRegsMask AllRegsMask_CALLEE_TRASH_WRITEBARRIER_BYREF;
+ AllRegsMask AllRegsMask_EDX;
+ AllRegsMask AllRegsMask_INIT_PINVOKE_FRAME_TRASH;
+ AllRegsMask AllRegsMask_NONE;
+ AllRegsMask AllRegsMask_PROF_FNC_LEAVE;
+ AllRegsMask AllRegsMask_PROFILER_ENTER_TRASH;
+ AllRegsMask AllRegsMask_PROFILER_LEAVE_TRASH;
+ AllRegsMask AllRegsMask_PROFILER_RET_SCRATCH;
+ AllRegsMask AllRegsMask_PROFILER_TAILCALL_TRASH;
+ AllRegsMask AllRegsMask_STOP_FOR_GC_TRASH;
+ AllRegsMask AllRegsMask_VALIDATE_INDIRECT_CALL_TRASH;
+
}; // end of class Compiler
//---------------------------------------------------------------------------------------------------------------------
diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp
index 3c9b9ac9e5e284..5a6e7f75a41a03 100644
--- a/src/coreclr/jit/compiler.hpp
+++ b/src/coreclr/jit/compiler.hpp
@@ -895,7 +895,7 @@ inline unsigned Compiler::funGetFuncIdx(BasicBlock* block)
// Assumptions:
// The mask contains one and only one register.
-inline regNumber genRegNumFromMask(regMaskTP mask)
+inline regNumber genRegNumFromMask(regMaskOnlyOne mask MORE_THAN_64_REG_ARG(var_types type))
{
assert(mask != 0); // Must have one bit set, so can't have a mask of zero
@@ -907,6 +907,10 @@ inline regNumber genRegNumFromMask(regMaskTP mask)
assert(genRegMask(regNum) == mask);
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ // If this is mask type, add `64` to the regNumber
+ return (regNumber)((varTypeUsesMaskReg(type) << 6) + regNum);
+#endif
return regNum;
}
@@ -915,13 +919,14 @@ inline regNumber genRegNumFromMask(regMaskTP mask)
// register number and also toggle the bit in the `mask`.
// Arguments:
// mask - the register mask
+// type - the register type, `mask` represents.
//
// Return Value:
// The number of the first register contained in the mask and updates the `mask` to toggle
// the bit.
//
-inline regNumber genFirstRegNumFromMaskAndToggle(regMaskTP& mask)
+inline regNumber genFirstRegNumFromMaskAndToggle(regMaskOnlyOne& mask MORE_THAN_64_REG_ARG(var_types type))
{
assert(mask != 0); // Must have one bit set, so can't have a mask of zero
@@ -930,6 +935,10 @@ inline regNumber genFirstRegNumFromMaskAndToggle(regMaskTP& mask)
regNumber regNum = (regNumber)BitOperations::BitScanForward(mask);
mask ^= genRegMask(regNum);
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ // If this is mask type, add `64` to the regNumber
+ return (regNumber)((varTypeUsesMaskReg(type) << 6) + regNum);
+#endif
return regNum;
}
@@ -943,7 +952,45 @@ inline regNumber genFirstRegNumFromMaskAndToggle(regMaskTP& mask)
// The number of the first register contained in the mask.
//
-inline regNumber genFirstRegNumFromMask(regMaskTP mask)
+inline regNumber genFirstRegNumFromMask(AllRegsMask& mask)
+{
+ assert(!mask.IsEmpty()); // Must have one bit set, so can't have a mask of zero
+
+ /* Convert the mask to a register number */
+ regNumber regNum;
+
+ RegBitSet64 gprOrFloatMask = mask.GetGprFloatCombinedMask();
+
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ // Only check this condition if there are predicate register support
+ // is present.
+ // If not, then gprOrFloatMask should be non-empty, and we will hit the
+ // above assert of IsEmpty() anyway.
+ if (gprOrFloatMask != RBM_NONE)
+ {
+ regNum = (regNumber)BitOperations::BitScanForward(gprOrFloatMask);
+ }
+ else
+ {
+ regNum = (regNumber)(64 + BitOperations::BitScanForward(mask.predicateRegs(nullptr)));
+ }
+#else
+ regNum = (regNumber)BitOperations::BitScanForward(gprOrFloatMask);
+#endif
+ return regNum;
+}
+
+//------------------------------------------------------------------------------
+// genFirstRegNumFromMask : Maps first bit set in the register mask to a register number.
+//
+// Arguments:
+// mask - the register mask
+//
+// Return Value:
+// The number of the first register contained in the mask.
+//
+
+inline regNumber genFirstRegNumFromMask(regMaskOnlyOne mask MORE_THAN_64_REG_ARG(var_types type))
{
assert(mask != 0); // Must have one bit set, so can't have a mask of zero
@@ -951,6 +998,29 @@ inline regNumber genFirstRegNumFromMask(regMaskTP mask)
regNumber regNum = (regNumber)BitOperations::BitScanForward(mask);
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ // If this is mask type, add `64` to the regNumber
+ return (regNumber)((varTypeUsesMaskReg(type) << 6) + regNum);
+#endif
+ return regNum;
+}
+
+//------------------------------------------------------------------------------
+// genFirstRegNumFromMaskAndToggle : Maps first bit set in the register mask to a
+// register number and also toggle the bit in the `mask`.
+// Arguments:
+// mask - the register mask
+//
+// Return Value:
+// The number of the first register contained in the mask and updates the `mask` to toggle
+// the bit.
+//
+// TODO: We can make certain methods on compiler object and check if predicate is needed
+// and if yes, use optimize path
+inline regNumber genFirstRegNumFromMaskAndToggle(AllRegsMask& mask)
+{
+ regNumber regNum = genFirstRegNumFromMask(mask);
+ mask ^= regNum;
return regNum;
}
@@ -1105,6 +1175,7 @@ inline const char* varTypeGCstring(var_types type)
/*****************************************************************************/
const char* varTypeName(var_types);
+const int regIndexForRegister(regNumber reg);
/*****************************************************************************/
// Helpers to pull little-endian values out of a byte stream.
@@ -3296,14 +3367,14 @@ __forceinline regNumber genMapRegArgNumToRegNum(unsigned argNum, var_types type,
* (for a double on ARM) is returned.
*/
-inline regMaskTP genMapIntRegArgNumToRegMask(unsigned argNum)
+inline regMaskGpr genMapIntRegArgNumToRegMask(unsigned argNum)
{
assert(argNum < ArrLen(intArgMasks));
return intArgMasks[argNum];
}
-inline regMaskTP genMapFloatRegArgNumToRegMask(unsigned argNum)
+inline regMaskFloat genMapFloatRegArgNumToRegMask(unsigned argNum)
{
#ifndef TARGET_X86
assert(argNum < ArrLen(fltArgMasks));
@@ -3315,9 +3386,9 @@ inline regMaskTP genMapFloatRegArgNumToRegMask(unsigned argNum)
#endif
}
-__forceinline regMaskTP genMapArgNumToRegMask(unsigned argNum, var_types type)
+__forceinline regMaskOnlyOne genMapArgNumToRegMask(unsigned argNum, var_types type)
{
- regMaskTP result;
+ regMaskOnlyOne result;
if (varTypeUsesFloatArgReg(type))
{
result = genMapFloatRegArgNumToRegMask(argNum);
@@ -3451,39 +3522,6 @@ inline unsigned genMapRegNumToRegArgNum(regNumber regNum, var_types type, CorInf
}
}
-/*****************************************************************************/
-/* Return a register mask with the first 'numRegs' argument registers set.
- */
-
-inline regMaskTP genIntAllRegArgMask(unsigned numRegs)
-{
- assert(numRegs <= MAX_REG_ARG);
-
- regMaskTP result = RBM_NONE;
- for (unsigned i = 0; i < numRegs; i++)
- {
- result |= intArgMasks[i];
- }
- return result;
-}
-
-inline regMaskTP genFltAllRegArgMask(unsigned numRegs)
-{
-#ifndef TARGET_X86
- assert(numRegs <= MAX_FLOAT_REG_ARG);
-
- regMaskTP result = RBM_NONE;
- for (unsigned i = 0; i < numRegs; i++)
- {
- result |= fltArgMasks[i];
- }
- return result;
-#else
- assert(!"no x86 float arg regs\n");
- return RBM_NONE;
-#endif
-}
-
/*
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
@@ -4441,27 +4479,24 @@ inline void* operator new[](size_t sz, Compiler* compiler, CompMemKind cmk)
#ifdef DEBUG
-inline void printRegMask(regMaskTP mask)
-{
- printf(REG_MASK_ALL_FMT, mask);
-}
-
-inline char* regMaskToString(regMaskTP mask, Compiler* context)
+inline void printRegMask(AllRegsMask mask)
{
- const size_t cchRegMask = 24;
- char* regmask = new (context, CMK_Unknown) char[cchRegMask];
+ printf(REG_MASK_ALL_FMT, mask.gprRegs());
+ printf(" ");
+ printf(REG_MASK_ALL_FMT, mask.floatRegs(nullptr));
- sprintf_s(regmask, cchRegMask, REG_MASK_ALL_FMT, mask);
-
- return regmask;
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ printf(" ");
+ printf(REG_MASK_ALL_FMT, mask.predicateRegs(nullptr));
+#endif // FEATURE_MASKED_HW_INTRINSICS
}
-inline void printRegMaskInt(regMaskTP mask)
+inline void printRegMaskInt(regMaskGpr mask)
{
printf(REG_MASK_INT_FMT, (mask & RBM_ALLINT));
}
-inline char* regMaskIntToString(regMaskTP mask, Compiler* context)
+inline char* regMaskIntToString(regMaskGpr mask, Compiler* context)
{
const size_t cchRegMask = 24;
char* regmask = new (context, CMK_Unknown) char[cchRegMask];
@@ -4989,6 +5024,351 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitRegularExitBlocks(TFunc func)
return BasicBlockVisit::Continue;
}
+template
+FORCEINLINE int regIndexForType(T vt)
+{
+ int type = varTypeRegister[TypeGet(vt)];
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ assert(type <= 3);
+#endif
+
+#ifndef FEATURE_MASKED_HW_INTRINSICS
+ assert(type != VTR_MASK);
+#endif
+ return (type - 1);
+}
+
+void AllRegsMask::operator|=(CONSTREF_AllRegsMask other)
+{
+ _combinedRegisters |= other._combinedRegisters;
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ _predicateRegs |= other._predicateRegs;
+#endif
+}
+
+void AllRegsMask::operator&=(CONSTREF_AllRegsMask other)
+{
+ _combinedRegisters &= other._combinedRegisters;
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ _predicateRegs &= other._predicateRegs;
+#endif
+}
+
+void AllRegsMask::operator|=(const regNumber reg)
+{
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ int index = regIndexForRegister(reg);
+ RegBitSet64 value = genRegMask(reg);
+ _registers[index] |= encodeForIndex(index, value);
+#else
+ _combinedRegisters |= genRegMask(reg);
+#endif
+}
+
+void AllRegsMask::operator^=(const regNumber reg)
+{
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ int index = regIndexForRegister(reg);
+ RegBitSet64 value = genRegMask(reg);
+ _registers[index] ^= encodeForIndex(index, value);
+#else
+ _combinedRegisters ^= genRegMask(reg);
+#endif
+}
+
+AllRegsMask AllRegsMask::operator~() const
+{
+ AllRegsMask result;
+ result._combinedRegisters = ~_combinedRegisters;
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ result._predicateRegs = ~_predicateRegs;
+#endif
+ return result;
+}
+
+bool AllRegsMask::operator==(CONSTREF_AllRegsMask other) const
+{
+ return (_combinedRegisters == other._combinedRegisters)
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ && (_predicateRegs == other._predicateRegs);
+#endif
+ ;
+}
+
+bool AllRegsMask::operator!=(CONSTREF_AllRegsMask other) const
+{
+ return !(*this == other);
+}
+
+AllRegsMask AllRegsMask::operator&(CONSTREF_AllRegsMask other) const
+{
+ AllRegsMask result;
+ result._combinedRegisters = _combinedRegisters & other._combinedRegisters;
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ result._predicateRegs = _predicateRegs & other._predicateRegs;
+#endif
+ return result;
+}
+
+AllRegsMask AllRegsMask::operator|(CONSTREF_AllRegsMask other) const
+{
+ AllRegsMask result;
+ result._combinedRegisters = _combinedRegisters | other._combinedRegisters;
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ result._predicateRegs = _predicateRegs | other._predicateRegs;
+#endif
+ return result;
+}
+
+void AllRegsMask::Clear()
+{
+ _combinedRegisters = RBM_NONE;
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ _predicateRegs = RBM_NONE;
+#endif
+}
+
+bool AllRegsMask::IsEmpty() const
+{
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ return ((_combinedRegisters | _predicateRegs) == RBM_NONE);
+#else
+ return _combinedRegisters == RBM_NONE;
+#endif
+}
+
+unsigned AllRegsMask::Count() const
+{
+ return genCountBits(_combinedRegisters)
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ + genCountBits(_predicateRegs)
+#endif
+ ;
+}
+
+regMaskOnlyOne AllRegsMask::operator[](int index) const
+{
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ assert(index <= 2);
+ RegBitSet32 value = _registers[index];
+ return decodeForIndex(index, value);
+#else
+ return _combinedRegisters;
+#endif
+}
+
+void AllRegsMask::AddRegMaskForType(regMaskOnlyOne maskToAdd, var_types type)
+{
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ int index = regIndexForType(type);
+ _registers[index] |= encodeForIndex(index, maskToAdd);
+#else
+ _combinedRegisters |= maskToAdd;
+#endif
+}
+
+void AllRegsMask::AddGprRegMask(regMaskOnlyOne maskToAdd)
+{
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ _registers[0] |= maskToAdd;
+#else
+ _combinedRegisters |= maskToAdd;
+#endif
+}
+
+void AllRegsMask::AddFloatRegMask(regMaskOnlyOne maskToAdd)
+{
+ _combinedRegisters |= maskToAdd;
+}
+
+// Adds reg only if it is gpr register
+void AllRegsMask::AddGprRegInMask(regNumber reg)
+{
+ AddGprRegMask(genRegMask(reg));
+}
+
+// ----------------------------------------------------------
+// AddRegNumForType: Adds `reg` to the mask.
+//
+void AllRegsMask::AddRegNumInMask(regNumber reg)
+{
+ RegBitSet64 value = genRegMask(reg);
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ int index = regIndexForRegister(reg);
+ _registers[index] |= encodeForIndex(index, value);
+#else
+ _combinedRegisters |= value;
+#endif
+}
+
+// This is similar to AddRegNumInMask(reg, regType) for all platforms
+// except Arm. For Arm, it calls getRegMask() instead of genRegMask()
+// to create a mask that needs to be added.
+void AllRegsMask::AddRegNum(regNumber reg, var_types type)
+{
+#ifdef TARGET_ARM
+ _combinedRegisters |= getRegMask(reg, type);
+#else
+ AddRegNumInMask(reg);
+#endif
+}
+
+// ----------------------------------------------------------
+// RemoveRegNumFromMask: Removes `reg` to the mask.
+//
+void AllRegsMask::RemoveRegNumFromMask(regNumber reg)
+{
+ RegBitSet64 value = genRegMask(reg);
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ int index = regIndexForRegister(reg);
+ _registers[index] &= ~encodeForIndex(index, value);
+#else
+ _combinedRegisters &= ~value;
+#endif
+}
+
+// This is similar to RemoveRegNumFromMask(reg, regType) for all platforms
+// except Arm. For Arm, it calls getRegMask() instead of genRegMask()
+// to create a mask that needs to be added.
+void AllRegsMask::RemoveRegNum(regNumber reg, var_types type)
+{
+#ifdef TARGET_ARM
+ _combinedRegisters &= ~getRegMask(reg, type);
+#else
+ RemoveRegNumFromMask(reg);
+#endif
+}
+
+// ----------------------------------------------------------
+// IsRegNumInMask: Checks if `reg` is present in the mask.
+//
+bool AllRegsMask::IsRegNumInMask(regNumber reg) const
+{
+ RegBitSet64 value = genRegMask(reg);
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ int index = regIndexForRegister(reg);
+ return (_registers[index] & encodeForIndex(index, value)) != RBM_NONE;
+#else
+ return (_combinedRegisters & value) != RBM_NONE;
+#endif
+}
+
+// This is similar to IsRegNumInMask(reg, regType) for all platforms
+// except Arm. For Arm, it calls getRegMask() instead of genRegMask()
+// to create a mask that needs to be added.
+bool AllRegsMask::IsRegNumPresent(regNumber reg, var_types type) const
+{
+#ifdef TARGET_ARM
+ return (_combinedRegisters & getRegMask(reg, type)) != RBM_NONE;
+#else
+ return IsRegNumInMask(reg);
+#endif
+}
+
+#ifdef TARGET_ARM
+// ----------------------------------------------------------
+// AddRegNumForType: Adds `reg` to the mask. It is same as AddRegNumInMask(reg) except
+// that it takes `type` as an argument and adds `reg` to the mask for that type.
+//
+void AllRegsMask::AddRegNumInMask(regNumber reg, var_types type)
+{
+ _combinedRegisters |= genRegMask(reg, type);
+}
+
+// ----------------------------------------------------------
+// RemoveRegNumFromMask: Removes `reg` from the mask. It is same as RemoveRegNumFromMask(reg) except
+// that it takes `type` as an argument and adds `reg` to the mask for that type.
+//
+void AllRegsMask::RemoveRegNumFromMask(regNumber reg, var_types type)
+{
+ _combinedRegisters &= ~genRegMask(reg, type);
+}
+
+// ----------------------------------------------------------
+// IsRegNumInMask: Removes `reg` from the mask. It is same as IsRegNumInMask(reg) except
+// that it takes `type` as an argument and adds `reg` to the mask for that type.
+//
+bool AllRegsMask::IsRegNumInMask(regNumber reg, var_types type) const
+{
+ return (_combinedRegisters & genRegMask(reg, type)) != RBM_NONE;
+}
+#endif
+
+void AllRegsMask::RemoveRegTypeFromMask(regMaskOnlyOne regMaskToRemove, var_types type)
+{
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ int index = regIndexForType(type);
+ _registers[index] &= ~encodeForIndex(index, regMaskToRemove);
+#else
+ _combinedRegisters &= ~regMaskToRemove;
+#endif
+}
+
+bool AllRegsMask::IsGprMaskPresent(regMaskGpr maskToCheck) const
+{
+ return (gprRegs() & maskToCheck) != RBM_NONE;
+}
+
+bool AllRegsMask::IsFloatMaskPresent(Compiler* compiler, regMaskFloat maskToCheck) const
+{
+ return (floatRegs(compiler) & maskToCheck) != RBM_NONE;
+}
+
+regMaskOnlyOne AllRegsMask::GetRegMaskForType(var_types type) const
+{
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ int index = regIndexForType(type);
+ RegBitSet32 value = _registers[index];
+ return decodeForIndex(index, value);
+#else
+ return _combinedRegisters;
+#endif
+}
+
+RegBitSet64 AllRegsMask::GetGprFloatCombinedMask() const
+{
+ return _combinedRegisters;
+}
+
+bool AllRegsMask::IsGprOrFloatPresent() const
+{
+ return GetGprFloatCombinedMask() != RBM_NONE;
+}
+
+#ifndef HAS_MORE_THAN_64_REGISTERS
+RegBitSet64 AllRegsMask::GetAllRegistersMask() const
+{
+ return _combinedRegisters;
+}
+#endif
+
+regMaskGpr AllRegsMask::gprRegs() const
+{
+ return _combinedRegisters & RBM_ALLGPR;
+}
+
+regMaskFloat AllRegsMask::floatRegs(const Compiler* compiler) const
+{
+#ifdef TARGET_AMD64
+ regMaskOnlyOne allFloat = compiler != nullptr ? compiler->get_RBM_ALLFLOAT() : (RBM_HIGHFLOAT | RBM_LOWFLOAT);
+ return _combinedRegisters & allFloat;
+#else
+ return _combinedRegisters & RBM_ALLFLOAT;
+#endif // TARGET_AMD64
+}
+
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+regMaskPredicate AllRegsMask::predicateRegs(const Compiler* compiler) const
+{
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ return _predicateRegs;
+#else
+ regMaskOnlyOne allMask = compiler != nullptr ? compiler->get_RBM_ALLMASK() : (RBM_ALLMASK_EVEX);
+ return _combinedRegisters & allMask;
+#endif
+}
+#endif // FEATURE_MASKED_HW_INTRINSICS
+
/*****************************************************************************/
#endif //_COMPILER_HPP_
/*****************************************************************************/
diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp
index f8b320c07d44cd..6e0e326fbde496 100644
--- a/src/coreclr/jit/emit.cpp
+++ b/src/coreclr/jit/emit.cpp
@@ -273,10 +273,8 @@ void emitterStaticStats()
fprintf(fout, "Offset / size of igLoopBackEdge = %3zu / %2zu\n", offsetof(insGroup, igLoopBackEdge),
sizeof(igDummy->igLoopBackEdge));
#endif // FEATURE_LOOP_ALIGN
-#if !(REGMASK_BITS <= 32)
fprintf(fout, "Offset / size of igGCregs = %3zu / %2zu\n", offsetof(insGroup, igGCregs),
sizeof(igDummy->igGCregs));
-#endif // !(REGMASK_BITS <= 32)
fprintf(fout, "Offset / size of igData = %3zu / %2zu\n", offsetof(insGroup, igData),
sizeof(igDummy->igData));
fprintf(fout, "Offset / size of igPhData = %3zu / %2zu\n", offsetof(insGroup, igPhData),
@@ -289,10 +287,8 @@ void emitterStaticStats()
fprintf(fout, "Offset / size of igStkLvl = %3zu / %2zu\n", offsetof(insGroup, igStkLvl),
sizeof(igDummy->igStkLvl));
#endif // EMIT_TRACK_STACK_DEPTH
-#if REGMASK_BITS <= 32
fprintf(fout, "Offset / size of igGCregs = %3zu / %2zu\n", offsetof(insGroup, igGCregs),
sizeof(igDummy->igGCregs));
-#endif // REGMASK_BITS <= 32
fprintf(fout, "Offset / size of igInsCnt = %3zu / %2zu\n", offsetof(insGroup, igInsCnt),
sizeof(igDummy->igInsCnt));
fprintf(fout, "\n");
@@ -751,6 +747,7 @@ void emitter::emitBegCG(Compiler* comp, COMP_HANDLE cmpHandle)
#endif
#if defined(TARGET_AMD64)
+ rbmAllFloat = emitComp->rbmAllFloat;
rbmFltCalleeTrash = emitComp->rbmFltCalleeTrash;
#endif // TARGET_AMD64
@@ -2059,8 +2056,8 @@ void emitter::emitEndProlog()
void emitter::emitCreatePlaceholderIG(insGroupPlaceholderType igType,
BasicBlock* igBB,
VARSET_VALARG_TP GCvars,
- regMaskTP gcrefRegs,
- regMaskTP byrefRegs,
+ regMaskGpr gcrefRegs,
+ regMaskGpr byrefRegs,
bool last)
{
assert(igBB != nullptr);
@@ -2868,9 +2865,9 @@ bool emitter::emitNoGChelper(CORINFO_METHOD_HANDLE methHnd)
* Mark the current spot as having a label.
*/
-void* emitter::emitAddLabel(VARSET_VALARG_TP GCvars,
- regMaskTP gcrefRegs,
- regMaskTP byrefRegs DEBUG_ARG(BasicBlock* block))
+void* emitter::emitAddLabel(VARSET_VALARG_TP GCvars,
+ regMaskGpr gcrefRegs,
+ regMaskGpr byrefRegs DEBUG_ARG(BasicBlock* block))
{
/* Create a new IG if the current one is non-empty */
@@ -2901,10 +2898,10 @@ void* emitter::emitAddLabel(VARSET_VALARG_TP GCvars,
dumpConvertedVarSet(emitComp, GCvars);
printf(", gcrefRegs=");
printRegMaskInt(gcrefRegs);
- emitDispRegSet(gcrefRegs);
+ emitDispGprRegSet(gcrefRegs);
printf(", byrefRegs=");
printRegMaskInt(byrefRegs);
- emitDispRegSet(byrefRegs);
+ emitDispGprRegSet(byrefRegs);
printf("\n");
}
#endif
@@ -3450,21 +3447,21 @@ const char* emitter::emitGetFrameReg()
* Display a register set in a readable form.
*/
-void emitter::emitDispRegSet(regMaskTP regs)
+void emitter::emitDispRegSet(regNumber firstReg, regNumber lastReg, regMaskOnlyOne regs)
{
+ printf(" {");
+
regNumber reg;
bool sp = false;
- printf(" {");
-
- for (reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
+ for (reg = firstReg; reg <= lastReg; reg = REG_NEXT(reg))
{
if (regs == RBM_NONE)
{
break;
}
- regMaskTP curReg = genRegMask(reg);
+ singleRegMask curReg = genRegMask(reg);
if ((regs & curReg) == 0)
{
continue;
@@ -3483,10 +3480,35 @@ void emitter::emitDispRegSet(regMaskTP regs)
printf("%s", emitRegName(reg));
}
-
printf("}");
}
+void emitter::emitDispGprRegSet(regMaskGpr regs)
+{
+ emitDispRegSet(REG_INT_FIRST, REG_INT_LAST, regs);
+}
+
+void emitter::emitDispFloatRegSet(regMaskFloat regs)
+{
+ emitDispRegSet(REG_FP_FIRST, REG_FP_LAST, regs);
+}
+
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+void emitter::emitDispMaskRegSet(regMaskPredicate regs)
+{
+ emitDispRegSet(REG_MASK_FIRST, REG_MASK_LAST, regs);
+}
+#endif // FEATURE_MASKED_HW_INTRINSICS
+
+void emitter::emitDispRegSet(CONSTREF_AllRegsMask regs)
+{
+ emitDispGprRegSet(regs.gprRegs());
+ emitDispFloatRegSet(regs.floatRegs(nullptr));
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ emitDispMaskRegSet(regs.predicateRegs(nullptr));
+#endif
+}
+
/*****************************************************************************
*
* Display the current GC ref variable set in a readable form.
@@ -3577,11 +3599,13 @@ void emitter::emitSetSecondRetRegGCType(instrDescCGCA* id, emitAttr secondRetSiz
emitter::instrDesc* emitter::emitNewInstrCallInd(int argCnt,
ssize_t disp,
VARSET_VALARG_TP GCvars,
- regMaskTP gcrefRegs,
- regMaskTP byrefRegs,
+ regMaskGpr gcrefRegs,
+ regMaskGpr byrefRegs,
emitAttr retSizeIn
MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize))
{
+ assert(emitComp->IsGprRegMask(gcrefRegs));
+ assert(emitComp->IsGprRegMask(byrefRegs));
emitAttr retSize = (retSizeIn != EA_UNKNOWN) ? retSizeIn : EA_PTRSIZE;
bool gcRefRegsInScratch = ((gcrefRegs & RBM_CALLEE_TRASH) != 0);
@@ -3660,11 +3684,14 @@ emitter::instrDesc* emitter::emitNewInstrCallInd(int argCnt,
emitter::instrDesc* emitter::emitNewInstrCallDir(int argCnt,
VARSET_VALARG_TP GCvars,
- regMaskTP gcrefRegs,
- regMaskTP byrefRegs,
+ regMaskGpr gcrefRegs,
+ regMaskGpr byrefRegs,
emitAttr retSizeIn
MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize))
{
+ assert(emitComp->IsGprRegMask(gcrefRegs));
+ assert(emitComp->IsGprRegMask(byrefRegs));
+
emitAttr retSize = (retSizeIn != EA_UNKNOWN) ? retSizeIn : EA_PTRSIZE;
// Allocate a larger descriptor if new GC values need to be saved
@@ -3822,23 +3849,26 @@ void emitter::emitDispGCDeltaTitle(const char* title)
// prevRegs - The live GC registers before the recent instruction.
// curRegs - The live GC registers after the recent instruction.
//
-void emitter::emitDispGCRegDelta(const char* title, regMaskTP prevRegs, regMaskTP curRegs)
+void emitter::emitDispGCRegDelta(const char* title, regMaskGpr prevRegs, regMaskGpr curRegs)
{
+ assert(emitComp->IsGprRegMask(prevRegs));
+ assert(emitComp->IsGprRegMask(curRegs));
+
if (prevRegs != curRegs)
{
emitDispGCDeltaTitle(title);
- regMaskTP sameRegs = prevRegs & curRegs;
- regMaskTP removedRegs = prevRegs - sameRegs;
- regMaskTP addedRegs = curRegs - sameRegs;
+ regMaskGpr sameRegs = prevRegs & curRegs;
+ regMaskGpr removedRegs = prevRegs - sameRegs;
+ regMaskGpr addedRegs = curRegs - sameRegs;
if (removedRegs != RBM_NONE)
{
printf(" -");
- dspRegMask(removedRegs);
+ dspRegMask(removedRegs, RBM_NONE);
}
if (addedRegs != RBM_NONE)
{
printf(" +");
- dspRegMask(addedRegs);
+ dspRegMask(addedRegs, RBM_NONE);
}
printf("\n");
}
@@ -4066,10 +4096,10 @@ void emitter::emitDispIG(insGroup* ig, bool displayFunc, bool displayInstruction
dumpConvertedVarSet(emitComp, igPh->igPhData->igPhPrevGCrefVars);
printf(", PrevGCrefRegs=");
printRegMaskInt(igPh->igPhData->igPhPrevGCrefRegs);
- emitDispRegSet(igPh->igPhData->igPhPrevGCrefRegs);
+ emitDispGprRegSet(igPh->igPhData->igPhPrevGCrefRegs);
printf(", PrevByrefRegs=");
printRegMaskInt(igPh->igPhData->igPhPrevByrefRegs);
- emitDispRegSet(igPh->igPhData->igPhPrevByrefRegs);
+ emitDispGprRegSet(igPh->igPhData->igPhPrevByrefRegs);
printf("\n");
printf("%*s; InitGCVars=%s ", strlen(buff), "",
@@ -4077,10 +4107,10 @@ void emitter::emitDispIG(insGroup* ig, bool displayFunc, bool displayInstruction
dumpConvertedVarSet(emitComp, igPh->igPhData->igPhInitGCrefVars);
printf(", InitGCrefRegs=");
printRegMaskInt(igPh->igPhData->igPhInitGCrefRegs);
- emitDispRegSet(igPh->igPhData->igPhInitGCrefRegs);
+ emitDispGprRegSet(igPh->igPhData->igPhInitGCrefRegs);
printf(", InitByrefRegs=");
printRegMaskInt(igPh->igPhData->igPhInitByrefRegs);
- emitDispRegSet(igPh->igPhData->igPhInitByrefRegs);
+ emitDispGprRegSet(igPh->igPhData->igPhInitByrefRegs);
printf("\n");
assert(!(ig->igFlags & IGF_GC_VARS));
@@ -4116,7 +4146,7 @@ void emitter::emitDispIG(insGroup* ig, bool displayFunc, bool displayInstruction
{
printf("%sgcrefRegs=", separator);
printRegMaskInt(ig->igGCregs);
- emitDispRegSet(ig->igGCregs);
+ emitDispGprRegSet(ig->igGCregs);
separator = ", ";
}
@@ -4124,7 +4154,7 @@ void emitter::emitDispIG(insGroup* ig, bool displayFunc, bool displayInstruction
{
printf("%sbyrefRegs=", separator);
printRegMaskInt(ig->igByrefRegs());
- emitDispRegSet(ig->igByrefRegs());
+ emitDispGprRegSet(ig->igByrefRegs());
separator = ", ";
}
@@ -4220,26 +4250,26 @@ void emitter::emitDispGCinfo()
dumpConvertedVarSet(emitComp, emitPrevGCrefVars);
printf("\n emitPrevGCrefRegs(0x%p)=", dspPtr(&emitPrevGCrefRegs));
printRegMaskInt(emitPrevGCrefRegs);
- emitDispRegSet(emitPrevGCrefRegs);
+ emitDispGprRegSet(emitPrevGCrefRegs);
printf("\n emitPrevByrefRegs(0x%p)=", dspPtr(&emitPrevByrefRegs));
printRegMaskInt(emitPrevByrefRegs);
- emitDispRegSet(emitPrevByrefRegs);
+ emitDispGprRegSet(emitPrevByrefRegs);
printf("\n emitInitGCrefVars ");
dumpConvertedVarSet(emitComp, emitInitGCrefVars);
printf("\n emitInitGCrefRegs(0x%p)=", dspPtr(&emitInitGCrefRegs));
printRegMaskInt(emitInitGCrefRegs);
- emitDispRegSet(emitInitGCrefRegs);
+ emitDispGprRegSet(emitInitGCrefRegs);
printf("\n emitInitByrefRegs(0x%p)=", dspPtr(&emitInitByrefRegs));
printRegMaskInt(emitInitByrefRegs);
- emitDispRegSet(emitInitByrefRegs);
+ emitDispGprRegSet(emitInitByrefRegs);
printf("\n emitThisGCrefVars ");
dumpConvertedVarSet(emitComp, emitThisGCrefVars);
printf("\n emitThisGCrefRegs(0x%p)=", dspPtr(&emitThisGCrefRegs));
printRegMaskInt(emitThisGCrefRegs);
- emitDispRegSet(emitThisGCrefRegs);
+ emitDispGprRegSet(emitThisGCrefRegs);
printf("\n emitThisByrefRegs(0x%p)=", dspPtr(&emitThisByrefRegs));
printRegMaskInt(emitThisByrefRegs);
- emitDispRegSet(emitThisByrefRegs);
+ emitDispGprRegSet(emitThisByrefRegs);
printf("\n\n");
}
@@ -7261,7 +7291,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
/* Update the set of live GC ref registers */
{
- regMaskTP GCregs = ig->igGCregs;
+ regMaskGpr GCregs = ig->igGCregs;
if (GCregs != emitThisGCrefRegs)
{
@@ -8770,12 +8800,12 @@ void emitter::emitRecordGCcall(BYTE* codePos, unsigned char callInstrSize)
emitDispVarSet();
printf(", gcrefRegs=");
printRegMaskInt(emitThisGCrefRegs);
- emitDispRegSet(emitThisGCrefRegs);
+ emitDispGprRegSet(emitThisGCrefRegs);
// printRegMaskInt(emitThisGCrefRegs & ~RBM_INTRET & RBM_CALLEE_SAVED); // only display callee-saved
// emitDispRegSet (emitThisGCrefRegs & ~RBM_INTRET & RBM_CALLEE_SAVED); // only display callee-saved
printf(", byrefRegs=");
printRegMaskInt(emitThisByrefRegs);
- emitDispRegSet(emitThisByrefRegs);
+ emitDispGprRegSet(emitThisByrefRegs);
// printRegMaskInt(emitThisByrefRegs & ~RBM_INTRET & RBM_CALLEE_SAVED); // only display callee-saved
// emitDispRegSet (emitThisByrefRegs & ~RBM_INTRET & RBM_CALLEE_SAVED); // only display callee-saved
printf("\n");
@@ -8870,8 +8900,9 @@ void emitter::emitRecordGCcall(BYTE* codePos, unsigned char callInstrSize)
* Record a new set of live GC ref registers.
*/
-void emitter::emitUpdateLiveGCregs(GCtype gcType, regMaskTP regs, BYTE* addr)
+void emitter::emitUpdateLiveGCregs(GCtype gcType, regMaskGpr regs, BYTE* addr)
{
+ assert(emitComp->IsGprRegMask(regs));
assert(emitIssuing);
// Don't track GC changes in epilogs
@@ -8880,14 +8911,14 @@ void emitter::emitUpdateLiveGCregs(GCtype gcType, regMaskTP regs, BYTE* addr)
return;
}
- regMaskTP life;
- regMaskTP dead;
- regMaskTP chg;
+ regMaskGpr life;
+ regMaskGpr dead;
+ regMaskGpr chg;
assert(needsGC(gcType));
- regMaskTP& emitThisXXrefRegs = (gcType == GCT_GCREF) ? emitThisGCrefRegs : emitThisByrefRegs;
- regMaskTP& emitThisYYrefRegs = (gcType == GCT_GCREF) ? emitThisByrefRegs : emitThisGCrefRegs;
+ regMaskGpr& emitThisXXrefRegs = (gcType == GCT_GCREF) ? emitThisGCrefRegs : emitThisByrefRegs;
+ regMaskGpr& emitThisYYrefRegs = (gcType == GCT_GCREF) ? emitThisByrefRegs : emitThisGCrefRegs;
assert(emitThisXXrefRegs != regs);
if (emitFullGCinfo)
@@ -8908,8 +8939,8 @@ void emitter::emitUpdateLiveGCregs(GCtype gcType, regMaskTP regs, BYTE* addr)
do
{
- regMaskTP bit = genFindLowestBit(chg);
- regNumber reg = genRegNumFromMask(bit);
+ regMaskGpr bit = genFindLowestBit(chg);
+ regNumber reg = genRegNumFromMask(bit MORE_THAN_64_REG_ARG(TYP_INT));
if (life & bit)
{
@@ -8941,8 +8972,9 @@ void emitter::emitUpdateLiveGCregs(GCtype gcType, regMaskTP regs, BYTE* addr)
* Record the fact that the given register now contains a live GC ref.
*/
-void emitter::emitGCregLiveSet(GCtype gcType, regMaskTP regMask, BYTE* addr, bool isThis)
+void emitter::emitGCregLiveSet(GCtype gcType, regMaskGpr regMask, BYTE* addr, bool isThis)
{
+ assert(emitComp->IsGprRegMask(regMask));
assert(emitIssuing);
assert(needsGC(gcType));
@@ -8972,7 +9004,7 @@ void emitter::emitGCregLiveSet(GCtype gcType, regMaskTP regMask, BYTE* addr, boo
* Record the fact that the given register no longer contains a live GC ref.
*/
-void emitter::emitGCregDeadSet(GCtype gcType, regMaskTP regMask, BYTE* addr)
+void emitter::emitGCregDeadSet(GCtype gcType, regMaskGpr regMask, BYTE* addr)
{
assert(emitIssuing);
assert(needsGC(gcType));
@@ -9209,10 +9241,10 @@ void emitter::emitGCregLiveUpd(GCtype gcType, regNumber reg, BYTE* addr)
assert(needsGC(gcType));
- regMaskTP regMask = genRegMask(reg);
+ singleRegMask regMask = genRegMask(reg);
- regMaskTP& emitThisXXrefRegs = (gcType == GCT_GCREF) ? emitThisGCrefRegs : emitThisByrefRegs;
- regMaskTP& emitThisYYrefRegs = (gcType == GCT_GCREF) ? emitThisByrefRegs : emitThisGCrefRegs;
+ regMaskGpr& emitThisXXrefRegs = (gcType == GCT_GCREF) ? emitThisGCrefRegs : emitThisByrefRegs;
+ regMaskGpr& emitThisYYrefRegs = (gcType == GCT_GCREF) ? emitThisByrefRegs : emitThisGCrefRegs;
if ((emitThisXXrefRegs & regMask) == 0)
{
@@ -9249,7 +9281,7 @@ void emitter::emitGCregLiveUpd(GCtype gcType, regNumber reg, BYTE* addr)
* Record the fact that the given set of registers no longer contain live GC refs.
*/
-void emitter::emitGCregDeadUpdMask(regMaskTP regs, BYTE* addr)
+void emitter::emitGCregDeadUpdMask(regMaskGpr regs, BYTE* addr)
{
assert(emitIssuing);
@@ -9261,7 +9293,7 @@ void emitter::emitGCregDeadUpdMask(regMaskTP regs, BYTE* addr)
// First, handle the gcref regs going dead
- regMaskTP gcrefRegs = emitThisGCrefRegs & regs;
+ regMaskGpr gcrefRegs = emitThisGCrefRegs & regs;
// "this" can never go dead in synchronized methods, except in the epilog
// after the call to CORINFO_HELP_MON_EXIT.
@@ -9281,7 +9313,7 @@ void emitter::emitGCregDeadUpdMask(regMaskTP regs, BYTE* addr)
// Second, handle the byref regs going dead
- regMaskTP byrefRegs = emitThisByrefRegs & regs;
+ regMaskGpr byrefRegs = emitThisByrefRegs & regs;
if (byrefRegs)
{
@@ -9311,7 +9343,7 @@ void emitter::emitGCregDeadUpd(regNumber reg, BYTE* addr)
return;
}
- regMaskTP regMask = genRegMask(reg);
+ singleRegMask regMask = genRegMask(reg);
if ((emitThisGCrefRegs & regMask) != 0)
{
@@ -10010,7 +10042,7 @@ void emitter::emitStackPopLargeStk(BYTE* addr, bool isCall, unsigned char callIn
// of callee-saved registers only).
for (unsigned calleeSavedRegIdx = 0; calleeSavedRegIdx < CNT_CALLEE_SAVED; calleeSavedRegIdx++)
{
- regMaskTP calleeSavedRbm = raRbmCalleeSaveOrder[calleeSavedRegIdx];
+ regMaskGpr calleeSavedRbm = raRbmCalleeSaveOrder[calleeSavedRegIdx];
if (emitThisGCrefRegs & calleeSavedRbm)
{
gcrefRegs |= (1 << calleeSavedRegIdx);
@@ -10327,7 +10359,7 @@ const char* emitter::emitOffsetToLabel(unsigned offs)
// Return value:
// the saved set of registers.
//
-regMaskTP emitter::emitGetGCRegsSavedOrModified(CORINFO_METHOD_HANDLE methHnd)
+regMaskGpr emitter::emitGetGCRegsSavedOrModified(CORINFO_METHOD_HANDLE methHnd)
{
// Is it a helper with a special saved set?
bool isNoGCHelper = emitNoGChelper(methHnd);
@@ -10336,14 +10368,14 @@ regMaskTP emitter::emitGetGCRegsSavedOrModified(CORINFO_METHOD_HANDLE methHnd)
CorInfoHelpFunc helpFunc = Compiler::eeGetHelperNum(methHnd);
// Get the set of registers that this call kills and remove it from the saved set.
- regMaskTP savedSet = RBM_ALLINT & ~emitGetGCRegsKilledByNoGCCall(helpFunc);
+ regMaskGpr savedSet = RBM_ALLINT & ~emitGetGCRegsKilledByNoGCCall(helpFunc);
#ifdef DEBUG
if (emitComp->verbose)
{
printf("NoGC Call: savedSet=");
printRegMaskInt(savedSet);
- emitDispRegSet(savedSet);
+ emitDispGprRegSet(savedSet);
printf("\n");
}
#endif
@@ -10352,7 +10384,7 @@ regMaskTP emitter::emitGetGCRegsSavedOrModified(CORINFO_METHOD_HANDLE methHnd)
else
{
// This is the saved set of registers after a normal call.
- return RBM_CALLEE_SAVED;
+ return RBM_INT_CALLEE_SAVED;
}
}
@@ -10373,7 +10405,7 @@ regMaskTP emitter::emitGetGCRegsSavedOrModified(CORINFO_METHOD_HANDLE methHnd)
// Return Value:
// Mask of GC register kills
//
-regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper)
+RegBitSet64 emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper)
{
assert(emitNoGChelper(helper));
regMaskTP result;
@@ -10424,7 +10456,7 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper)
// compHelperCallKillSet returns a superset of the registers which values are not guaranteed to be the same
// after the call, if a register loses its GC or byref it has to be in the compHelperCallKillSet set as well.
- assert((result & emitComp->compHelperCallKillSet(helper)) == result);
+ assert((result & emitComp->compHelperCallKillSet(helper).GetGprFloatCombinedMask()) == result);
return result;
}
diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index 40a729dd70fee2..d9a4b276118d50 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -57,7 +57,7 @@ void emitterStats(FILE* fout);
void emitterStaticStats(FILE* fout); // Static stats about the emitter (data structure offsets, sizes, etc.)
#endif
-void printRegMaskInt(regMaskTP mask);
+void printRegMaskInt(regMaskGpr mask);
/*****************************************************************************/
/* Forward declarations */
@@ -250,11 +250,11 @@ struct insPlaceholderGroupData
insGroup* igPhNext;
BasicBlock* igPhBB;
VARSET_TP igPhInitGCrefVars;
- regMaskTP igPhInitGCrefRegs;
- regMaskTP igPhInitByrefRegs;
+ regMaskGpr igPhInitGCrefRegs;
+ regMaskGpr igPhInitByrefRegs;
VARSET_TP igPhPrevGCrefVars;
- regMaskTP igPhPrevGCrefRegs;
- regMaskTP igPhPrevByrefRegs;
+ regMaskGpr igPhPrevGCrefRegs;
+ regMaskGpr igPhPrevByrefRegs;
insGroupPlaceholderType igPhType;
}; // end of struct insPlaceholderGroupData
@@ -323,9 +323,7 @@ struct insGroup
// Try to do better packing based on how large regMaskSmall is (8, 16, or 64 bits).
-#if !(REGMASK_BITS <= 32)
regMaskSmall igGCregs; // set of registers with live GC refs
-#endif // !(REGMASK_BITS <= 32)
union
{
@@ -343,10 +341,6 @@ struct insGroup
unsigned igStkLvl; // stack level on entry
#endif // EMIT_TRACK_STACK_DEPTH
-#if REGMASK_BITS <= 32
- regMaskSmall igGCregs; // set of registers with live GC refs
-#endif // REGMASK_BITS <= 32
-
unsigned char igInsCnt; // # of instructions in this group
VARSET_VALRET_TP igGCvars() const
@@ -758,7 +752,7 @@ class emitter
// x86: 38 bits
// amd64: 38 bits
// arm: 32 bits
- // arm64: 44 bits
+ // arm64: 46 bits
// loongarch64: 28 bits
// risc-v: 28 bits
@@ -828,7 +822,7 @@ class emitter
// x86: 48 bits
// amd64: 48 bits
// arm: 48 bits
- // arm64: 53 bits
+ // arm64: 55 bits
// loongarch64: 46 bits
// risc-v: 46 bits
@@ -840,7 +834,7 @@ class emitter
#if defined(TARGET_ARM)
#define ID_EXTRA_BITFIELD_BITS (16)
#elif defined(TARGET_ARM64)
-#define ID_EXTRA_BITFIELD_BITS (21)
+#define ID_EXTRA_BITFIELD_BITS (23)
#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
#define ID_EXTRA_BITFIELD_BITS (14)
#elif defined(TARGET_XARCH)
@@ -881,7 +875,7 @@ class emitter
// x86: 54/50 bits
// amd64: 55/50 bits
// arm: 54/50 bits
- // arm64: 60/55 bits
+ // arm64: 62/57 bits
// loongarch64: 53/48 bits
// risc-v: 53/48 bits
@@ -897,7 +891,7 @@ class emitter
// x86: 10/14 bits
// amd64: 9/14 bits
// arm: 10/14 bits
- // arm64: 4/9 bits
+ // arm64: 2/7 bits
// loongarch64: 11/16 bits
// risc-v: 11/16 bits
@@ -2140,11 +2134,11 @@ class emitter
{
instrDescCGCA() = delete;
- VARSET_TP idcGCvars; // ... updated GC vars or
- ssize_t idcDisp; // ... big addrmode disp
- regMaskTP idcGcrefRegs; // ... gcref registers
- regMaskTP idcByrefRegs; // ... byref registers
- unsigned idcArgCnt; // ... lots of args or (<0 ==> caller pops args)
+ VARSET_TP idcGCvars; // ... updated GC vars or
+ ssize_t idcDisp; // ... big addrmode disp
+ regMaskGpr idcGcrefRegs; // ... gcref registers
+ regMaskGpr idcByrefRegs; // ... byref registers
+ unsigned idcArgCnt; // ... lots of args or (<0 ==> caller pops args)
#if MULTIREG_HAS_SECOND_GC_RET
// This method handle the GC-ness of the second register in a 2 register returned struct on System V.
@@ -2257,11 +2251,11 @@ class emitter
VARSET_TP debugPrevGCrefVars;
VARSET_TP debugThisGCrefVars;
regPtrDsc* debugPrevRegPtrDsc;
- regMaskTP debugPrevGCrefRegs;
- regMaskTP debugPrevByrefRegs;
+ regMaskGpr debugPrevGCrefRegs;
+ regMaskGpr debugPrevByrefRegs;
void emitDispInsIndent();
void emitDispGCDeltaTitle(const char* title);
- void emitDispGCRegDelta(const char* title, regMaskTP prevRegs, regMaskTP curRegs);
+ void emitDispGCRegDelta(const char* title, regMaskGpr prevRegs, regMaskGpr curRegs);
void emitDispGCVarDelta();
void emitDispRegPtrListDelta();
void emitDispGCInfoDelta();
@@ -2476,18 +2470,24 @@ class emitter
private:
#if defined(TARGET_AMD64)
- regMaskTP rbmFltCalleeTrash;
+ regMaskFloat rbmAllFloat;
+ regMaskFloat rbmFltCalleeTrash;
+
+ FORCEINLINE regMaskFloat get_RBM_ALLFLOAT() const
+ {
+ return this->rbmAllFloat;
+ }
- FORCEINLINE regMaskTP get_RBM_FLT_CALLEE_TRASH() const
+ FORCEINLINE regMaskFloat get_RBM_FLT_CALLEE_TRASH() const
{
return this->rbmFltCalleeTrash;
}
#endif // TARGET_AMD64
#if defined(TARGET_XARCH)
- regMaskTP rbmMskCalleeTrash;
+ regMaskPredicate rbmMskCalleeTrash;
- FORCEINLINE regMaskTP get_RBM_MSK_CALLEE_TRASH() const
+ FORCEINLINE regMaskPredicate get_RBM_MSK_CALLEE_TRASH() const
{
return this->rbmMskCalleeTrash;
}
@@ -2650,13 +2650,13 @@ class emitter
// in that tracking. See emitSavIG(): the important use of ByrefRegs is commented
// out, and GCrefRegs is always saved.
- VARSET_TP emitPrevGCrefVars;
- regMaskTP emitPrevGCrefRegs;
- regMaskTP emitPrevByrefRegs;
+ VARSET_TP emitPrevGCrefVars;
+ regMaskGpr emitPrevGCrefRegs;
+ regMaskGpr emitPrevByrefRegs;
- VARSET_TP emitInitGCrefVars;
- regMaskTP emitInitGCrefRegs;
- regMaskTP emitInitByrefRegs;
+ VARSET_TP emitInitGCrefVars;
+ regMaskGpr emitInitGCrefRegs;
+ regMaskGpr emitInitByrefRegs;
// If this is set, we ignore comparing emitPrev* and emitInit* to determine
// whether to save GC state (to save space in the IG), and always save it.
@@ -2673,9 +2673,9 @@ class emitter
// really the only one used; the others seem to be calculated, but not
// used due to bugs.
- VARSET_TP emitThisGCrefVars;
- regMaskTP emitThisGCrefRegs; // Current set of registers holding GC references
- regMaskTP emitThisByrefRegs; // Current set of registers holding BYREF references
+ VARSET_TP emitThisGCrefVars;
+ regMaskGpr emitThisGCrefRegs; // Current set of registers holding GC references
+ regMaskGpr emitThisByrefRegs; // Current set of registers holding BYREF references
bool emitThisGCrefVset; // Is "emitThisGCrefVars" up to date?
@@ -2685,7 +2685,7 @@ class emitter
void emitSetSecondRetRegGCType(instrDescCGCA* id, emitAttr secondRetSize);
#endif // MULTIREG_HAS_SECOND_GC_RET
- static void emitEncodeCallGCregs(regMaskTP regs, instrDesc* id);
+ static void emitEncodeCallGCregs(regMaskGpr regs, instrDesc* id);
static unsigned emitDecodeCallGCregs(instrDesc* id);
unsigned emitNxtIGnum;
@@ -2857,9 +2857,9 @@ class emitter
// Mark this instruction group as having a label; return the new instruction group.
// Sets the emitter's record of the currently live GC variables
// and registers.
- void* emitAddLabel(VARSET_VALARG_TP GCvars,
- regMaskTP gcrefRegs,
- regMaskTP byrefRegs DEBUG_ARG(BasicBlock* block = nullptr));
+ void* emitAddLabel(VARSET_VALARG_TP GCvars,
+ regMaskGpr gcrefRegs,
+ regMaskGpr byrefRegs DEBUG_ARG(BasicBlock* block = nullptr));
// Same as above, except the label is added and is conceptually "inline" in
// the current block. Thus it extends the previous block and the emitter
@@ -3140,10 +3140,10 @@ class emitter
bool emitFullGCinfo; // full GC pointer maps?
bool emitFullyInt; // fully interruptible code?
- regMaskTP emitGetGCRegsSavedOrModified(CORINFO_METHOD_HANDLE methHnd);
+ regMaskGpr emitGetGCRegsSavedOrModified(CORINFO_METHOD_HANDLE methHnd);
// Gets a register mask that represent the kill set for a NoGC helper call.
- regMaskTP emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper);
+ RegBitSet64 emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper);
#if EMIT_TRACK_STACK_DEPTH
unsigned emitCntStackDepth; // 0 in prolog/epilog, One DWORD elsewhere
@@ -3199,19 +3199,25 @@ class emitter
/* Liveness of stack variables, and registers */
void emitUpdateLiveGCvars(VARSET_VALARG_TP vars, BYTE* addr);
- void emitUpdateLiveGCregs(GCtype gcType, regMaskTP regs, BYTE* addr);
+ void emitUpdateLiveGCregs(GCtype gcType, regMaskGpr regs, BYTE* addr);
#ifdef DEBUG
const char* emitGetFrameReg();
- void emitDispRegSet(regMaskTP regs);
- void emitDispVarSet();
+ void emitDispRegSet(regNumber firstReg, regNumber lastReg, regMaskOnlyOne regs);
+ void emitDispGprRegSet(regMaskGpr regs);
+ void emitDispFloatRegSet(regMaskFloat regs);
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ void emitDispMaskRegSet(regMaskPredicate regs);
+#endif // FEATURE_MASKED_HW_INTRINSICS
+ void emitDispRegSet(CONSTREF_AllRegsMask regs);
+ void emitDispVarSet();
#endif
void emitGCregLiveUpd(GCtype gcType, regNumber reg, BYTE* addr);
- void emitGCregLiveSet(GCtype gcType, regMaskTP mask, BYTE* addr, bool isThis);
- void emitGCregDeadUpdMask(regMaskTP, BYTE* addr);
+ void emitGCregLiveSet(GCtype gcType, regMaskGpr mask, BYTE* addr, bool isThis);
+ void emitGCregDeadUpdMask(regMaskGpr, BYTE* addr);
void emitGCregDeadUpd(regNumber reg, BYTE* addr);
- void emitGCregDeadSet(GCtype gcType, regMaskTP mask, BYTE* addr);
+ void emitGCregDeadSet(GCtype gcType, regMaskGpr mask, BYTE* addr);
void emitGCvarLiveUpd(int offs, int varNum, GCtype gcType, BYTE* addr DEBUG_ARG(unsigned actualVarNum));
void emitGCvarLiveSet(int offs, GCtype gcType, BYTE* addr, ssize_t disp = -1);
diff --git a/src/coreclr/jit/emitarm.cpp b/src/coreclr/jit/emitarm.cpp
index 5a20f8a1f940ad..85b05ad801f3e3 100644
--- a/src/coreclr/jit/emitarm.cpp
+++ b/src/coreclr/jit/emitarm.cpp
@@ -4670,8 +4670,8 @@ void emitter::emitIns_Call(EmitCallType callType,
int argSize,
emitAttr retSize,
VARSET_VALARG_TP ptrVars,
- regMaskTP gcrefRegs,
- regMaskTP byrefRegs,
+ regMaskGpr gcrefRegs,
+ regMaskGpr byrefRegs,
const DebugInfo& di /* = DebugInfo() */,
regNumber ireg /* = REG_NA */,
regNumber xreg /* = REG_NA */,
@@ -4680,7 +4680,8 @@ void emitter::emitIns_Call(EmitCallType callType,
bool isJump /* = false */)
{
/* Sanity check the arguments depending on callType */
-
+ assert(emitComp->IsGprRegMask(gcrefRegs));
+ assert(emitComp->IsGprRegMask(byrefRegs));
assert(callType < EC_COUNT);
assert((callType != EC_FUNC_TOKEN) || (addr != nullptr && ireg == REG_NA));
assert(callType != EC_INDIR_R || (addr == nullptr && ireg < REG_COUNT));
@@ -4693,9 +4694,9 @@ void emitter::emitIns_Call(EmitCallType callType,
assert((unsigned)abs(argSize) <= codeGen->genStackLevel);
// Trim out any callee-trashed registers from the live set.
- regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd);
- gcrefRegs &= savedSet;
- byrefRegs &= savedSet;
+ AllRegsMask savedSet = emitGetGCRegsSavedOrModified(methHnd);
+ gcrefRegs &= savedSet.gprRegs();
+ byrefRegs &= savedSet.gprRegs();
#ifdef DEBUG
if (EMIT_GC_VERBOSE)
@@ -4704,10 +4705,10 @@ void emitter::emitIns_Call(EmitCallType callType,
dumpConvertedVarSet(emitComp, ptrVars);
printf(", gcrefRegs=");
printRegMaskInt(gcrefRegs);
- emitDispRegSet(gcrefRegs);
+ emitDispGprRegSet(gcrefRegs);
printf(", byrefRegs=");
printRegMaskInt(byrefRegs);
- emitDispRegSet(byrefRegs);
+ emitDispGprRegSet(byrefRegs);
printf("\n");
}
#endif
@@ -5762,10 +5763,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
switch (fmt)
{
- int imm;
- BYTE* addr;
- regMaskTP gcrefRegs;
- regMaskTP byrefRegs;
+ int imm;
+ BYTE* addr;
+ regMaskGpr gcrefRegs;
+ regMaskGpr byrefRegs;
case IF_T1_A: // T1_A ................
sz = SMALL_IDSC_SIZE;
@@ -6667,11 +6668,11 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
printf("Before emitOutputInstr for id->idDebugOnlyInfo()->idNum=0x%02x\n", id->idDebugOnlyInfo()->idNum);
printf(" emitThisGCrefRegs(0x%p)=", dspPtr(&emitThisGCrefRegs));
printRegMaskInt(emitThisGCrefRegs);
- emitDispRegSet(emitThisGCrefRegs);
+ emitDispGprRegSet(emitThisGCrefRegs);
printf("\n");
printf(" emitThisByrefRegs(0x%p)=", dspPtr(&emitThisByrefRegs));
printRegMaskInt(emitThisByrefRegs);
- emitDispRegSet(emitThisByrefRegs);
+ emitDispGprRegSet(emitThisByrefRegs);
printf("\n");
}
diff --git a/src/coreclr/jit/emitarm.h b/src/coreclr/jit/emitarm.h
index 6ae0c57dea6d26..83541a36427516 100644
--- a/src/coreclr/jit/emitarm.h
+++ b/src/coreclr/jit/emitarm.h
@@ -65,10 +65,10 @@ void emitDispInsHelp(instrDesc* id,
private:
instrDesc* emitNewInstrCallDir(
- int argCnt, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize);
+ int argCnt, VARSET_VALARG_TP GCvars, regMaskGpr gcrefRegs, regMaskGpr byrefRegs, emitAttr retSize);
instrDesc* emitNewInstrCallInd(
- int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize);
+ int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, regMaskGpr gcrefRegs, regMaskGpr byrefRegs, emitAttr retSize);
/************************************************************************/
/* Private helpers for instruction output */
@@ -328,8 +328,8 @@ void emitIns_Call(EmitCallType callType,
int argSize,
emitAttr retSize,
VARSET_VALARG_TP ptrVars,
- regMaskTP gcrefRegs,
- regMaskTP byrefRegs,
+ regMaskGpr gcrefRegs,
+ regMaskGpr byrefRegs,
const DebugInfo& di = DebugInfo(),
regNumber ireg = REG_NA,
regNumber xreg = REG_NA,
diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp
index 181b9706e41611..b327bccab2972c 100644
--- a/src/coreclr/jit/emitarm64.cpp
+++ b/src/coreclr/jit/emitarm64.cpp
@@ -1309,13 +1309,13 @@ emitAttr emitter::emitInsLoadStoreSize(instrDesc* id)
// clang-format off
static const char * const xRegNames[] =
{
- #define REGDEF(name, rnum, mask, xname, wname) xname,
+ #define REGDEF(name, rnum, mask, xname, wname, regTypeTag) xname,
#include "register.h"
};
static const char * const wRegNames[] =
{
- #define REGDEF(name, rnum, mask, xname, wname) wname,
+ #define REGDEF(name, rnum, mask, xname, wname, regTypeTag) wname,
#include "register.h"
};
@@ -8944,8 +8944,8 @@ void emitter::emitIns_Call(EmitCallType callType,
emitAttr retSize,
emitAttr secondRetSize,
VARSET_VALARG_TP ptrVars,
- regMaskTP gcrefRegs,
- regMaskTP byrefRegs,
+ regMaskGpr gcrefRegs,
+ regMaskGpr byrefRegs,
const DebugInfo& di /* = DebugInfo() */,
regNumber ireg /* = REG_NA */,
regNumber xreg /* = REG_NA */,
@@ -8954,7 +8954,8 @@ void emitter::emitIns_Call(EmitCallType callType,
bool isJump /* = false */)
{
/* Sanity check the arguments depending on callType */
-
+ assert(emitComp->IsGprRegMask(gcrefRegs));
+ assert(emitComp->IsGprRegMask(byrefRegs));
assert(callType < EC_COUNT);
assert((callType != EC_FUNC_TOKEN) || (addr != nullptr && ireg == REG_NA));
assert(callType != EC_INDIR_R || (addr == nullptr && ireg < REG_COUNT));
@@ -8967,7 +8968,7 @@ void emitter::emitIns_Call(EmitCallType callType,
assert((unsigned)std::abs(argSize) <= codeGen->genStackLevel);
// Trim out any callee-trashed registers from the live set.
- regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd);
+ regMaskGpr savedSet = emitGetGCRegsSavedOrModified(methHnd);
gcrefRegs &= savedSet;
byrefRegs &= savedSet;
@@ -8978,10 +8979,10 @@ void emitter::emitIns_Call(EmitCallType callType,
dumpConvertedVarSet(emitComp, ptrVars);
printf(", gcrefRegs=");
printRegMaskInt(gcrefRegs);
- emitDispRegSet(gcrefRegs);
+ emitDispGprRegSet(gcrefRegs);
printf(", byrefRegs=");
printRegMaskInt(byrefRegs);
- emitDispRegSet(byrefRegs);
+ emitDispGprRegSet(byrefRegs);
printf("\n");
}
#endif
@@ -10651,8 +10652,8 @@ BYTE* emitter::emitOutputVectorConstant(
unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code)
{
const unsigned char callInstrSize = sizeof(code_t); // 4 bytes
- regMaskTP gcrefRegs;
- regMaskTP byrefRegs;
+ regMaskGpr gcrefRegs;
+ regMaskGpr byrefRegs;
VARSET_TP GCvars(VarSetOps::UninitVal());
diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h
index cc3254c06810ab..68ec1621485770 100644
--- a/src/coreclr/jit/emitarm64.h
+++ b/src/coreclr/jit/emitarm64.h
@@ -95,16 +95,16 @@ void emitDispSvePrfop(insSvePrfop prfop, bool addComma);
private:
instrDesc* emitNewInstrCallDir(int argCnt,
VARSET_VALARG_TP GCvars,
- regMaskTP gcrefRegs,
- regMaskTP byrefRegs,
+ regMaskGpr gcrefRegs,
+ regMaskGpr byrefRegs,
emitAttr retSize,
emitAttr secondRetSize);
instrDesc* emitNewInstrCallInd(int argCnt,
ssize_t disp,
VARSET_VALARG_TP GCvars,
- regMaskTP gcrefRegs,
- regMaskTP byrefRegs,
+ regMaskGpr gcrefRegs,
+ regMaskGpr byrefRegs,
emitAttr retSize,
emitAttr secondRetSize);
@@ -1175,6 +1175,13 @@ inline static bool isGeneralRegisterOrSP(regNumber reg)
return isGeneralRegister(reg) || (reg == REG_SP);
} // Includes REG_SP, Excludes REG_ZR
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+inline static bool isMaskReg(regNumber reg)
+{
+ return (reg >= REG_MASK_FIRST && reg <= REG_MASK_LAST);
+}
+#endif // FEATURE_MASKED_HW_INTRINSICS
+
inline static bool isVectorRegister(regNumber reg)
{
return (reg >= REG_FP_FIRST && reg <= REG_FP_LAST);
@@ -1738,8 +1745,8 @@ void emitIns_Call(EmitCallType callType,
emitAttr retSize,
emitAttr secondRetSize,
VARSET_VALARG_TP ptrVars,
- regMaskTP gcrefRegs,
- regMaskTP byrefRegs,
+ regMaskGpr gcrefRegs,
+ regMaskGpr byrefRegs,
const DebugInfo& di,
regNumber ireg,
regNumber xreg,
diff --git a/src/coreclr/jit/emitinl.h b/src/coreclr/jit/emitinl.h
index 66a33b813d58fa..43e68885fac8b9 100644
--- a/src/coreclr/jit/emitinl.h
+++ b/src/coreclr/jit/emitinl.h
@@ -211,7 +211,7 @@ inline ssize_t emitter::emitGetInsAmdAny(const instrDesc* id) const
*
* Convert between a register mask and a smaller version for storage.
*/
-/*static*/ inline void emitter::emitEncodeCallGCregs(regMaskTP regmask, instrDesc* id)
+/*static*/ inline void emitter::emitEncodeCallGCregs(regMaskGpr regmask, instrDesc* id)
{
unsigned encodeMask;
diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp
index c69ea7c5a36e6f..0bd387ae5d4ad0 100644
--- a/src/coreclr/jit/emitloongarch64.cpp
+++ b/src/coreclr/jit/emitloongarch64.cpp
@@ -3887,7 +3887,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
// clang-format off
static const char* const RegNames[] =
{
- #define REGDEF(name, rnum, mask, sname) sname,
+ #define REGDEF(name, rnum, mask, sname, regTypeTag) sname,
#include "register.h"
};
// clang-format on
diff --git a/src/coreclr/jit/emitpub.h b/src/coreclr/jit/emitpub.h
index bf15ba33667cac..46fe21ed7b394d 100644
--- a/src/coreclr/jit/emitpub.h
+++ b/src/coreclr/jit/emitpub.h
@@ -53,8 +53,8 @@ void emitEndProlog();
void emitCreatePlaceholderIG(insGroupPlaceholderType igType,
BasicBlock* igBB,
VARSET_VALARG_TP GCvars,
- regMaskTP gcrefRegs,
- regMaskTP byrefRegs,
+ regMaskGpr gcrefRegs,
+ regMaskGpr byrefRegs,
bool last);
void emitGeneratePrologEpilog();
diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp
index 71fd3e323d518c..03eea8e8e29bcf 100644
--- a/src/coreclr/jit/emitriscv64.cpp
+++ b/src/coreclr/jit/emitriscv64.cpp
@@ -3413,7 +3413,7 @@ void emitter::emitDispIllegalInstruction(code_t instructionCode)
// clang-format off
static const char* const RegNames[] =
{
- #define REGDEF(name, rnum, mask, sname) sname,
+ #define REGDEF(name, rnum, mask, sname, regTypeTag) sname,
#include "register.h"
};
// clang-format on
diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index 6bf148cf2d8883..321b2ffb9dd1bb 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -9488,8 +9488,8 @@ void emitter::emitIns_Call(EmitCallType callType,
emitAttr retSize
MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
VARSET_VALARG_TP ptrVars,
- regMaskTP gcrefRegs,
- regMaskTP byrefRegs,
+ regMaskGpr gcrefRegs,
+ regMaskGpr byrefRegs,
const DebugInfo& di,
regNumber ireg,
regNumber xreg,
@@ -9499,6 +9499,10 @@ void emitter::emitIns_Call(EmitCallType callType,
// clang-format on
{
/* Sanity check the arguments depending on callType */
+ assert(emitComp->IsGprRegMask(gcrefRegs));
+ assert(emitComp->IsGprRegMask(byrefRegs));
+ assert(emitComp->IsGprRegMask(gcrefRegs));
+ assert(emitComp->IsGprRegMask(byrefRegs));
assert(callType < EC_COUNT);
if (!emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI))
@@ -9514,7 +9518,7 @@ void emitter::emitIns_Call(EmitCallType callType,
assert((unsigned)abs((signed)argSize) <= codeGen->genStackLevel);
// Trim out any callee-trashed registers from the live set.
- regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd);
+ regMaskGpr savedSet = emitGetGCRegsSavedOrModified(methHnd);
gcrefRegs &= savedSet;
byrefRegs &= savedSet;
@@ -9525,10 +9529,10 @@ void emitter::emitIns_Call(EmitCallType callType,
dumpConvertedVarSet(emitComp, ptrVars);
printf(", gcrefRegs=");
printRegMaskInt(gcrefRegs);
- emitDispRegSet(gcrefRegs);
+ emitDispGprRegSet(gcrefRegs);
printf(", byrefRegs=");
printRegMaskInt(byrefRegs);
- emitDispRegSet(byrefRegs);
+ emitDispGprRegSet(byrefRegs);
printf("\n");
}
#endif
@@ -10218,7 +10222,7 @@ const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName) con
const char* emitter::emitXMMregName(unsigned reg) const
{
static const char* const regNames[] = {
-#define REGDEF(name, rnum, mask, sname) "x" sname,
+#define REGDEF(name, rnum, mask, sname, regTypeTag) "x" sname,
#include "register.h"
};
@@ -10236,7 +10240,7 @@ const char* emitter::emitXMMregName(unsigned reg) const
const char* emitter::emitYMMregName(unsigned reg) const
{
static const char* const regNames[] = {
-#define REGDEF(name, rnum, mask, sname) "y" sname,
+#define REGDEF(name, rnum, mask, sname, regTypeTag) "y" sname,
#include "register.h"
};
@@ -10254,7 +10258,7 @@ const char* emitter::emitYMMregName(unsigned reg) const
const char* emitter::emitZMMregName(unsigned reg) const
{
static const char* const regNames[] = {
-#define REGDEF(name, rnum, mask, sname) "z" sname,
+#define REGDEF(name, rnum, mask, sname, regTypeTag) "z" sname,
#include "register.h"
};
@@ -14677,7 +14681,7 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id)
case IF_RRW:
{
#ifdef DEBUG
- regMaskTP regMask = genRegMask(reg);
+ singleRegMask regMask = genRegMask(reg);
#endif
if (id->idGCref())
{
@@ -15020,8 +15024,9 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
// instruction, if writing a GC ref even through reading a long, will go live here.
// These situations typically occur due to unsafe casting, such as with Span.
- regMaskTP regMask;
+ regMaskGpr regMask;
regMask = genRegMask(reg1) | genRegMask(reg2);
+ assert(emitComp->IsGprRegMask(regMask));
// r1/r2 could have been a GCREF as GCREF + int=BYREF
// or BYREF+/-int=BYREF
@@ -15520,7 +15525,7 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
assert(id->idGCref() == GCT_BYREF);
#ifdef DEBUG
- regMaskTP regMask;
+ regMaskGpr regMask;
regMask = genRegMask(reg);
// FIXNOW review the other places and relax the assert there too
@@ -16343,8 +16348,8 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
BYTE* addr;
bool recCall;
- regMaskTP gcrefRegs;
- regMaskTP byrefRegs;
+ regMaskGpr gcrefRegs;
+ regMaskGpr byrefRegs;
/********************************************************************/
/* No operands */
@@ -17910,11 +17915,11 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
printf("Before emitOutputInstr for id->idDebugOnlyInfo()->idNum=0x%02x\n", id->idDebugOnlyInfo()->idNum);
printf(" emitThisGCrefRegs(0x%p)=", emitComp->dspPtr(&emitThisGCrefRegs));
printRegMaskInt(emitThisGCrefRegs);
- emitDispRegSet(emitThisGCrefRegs);
+ emitDispGprRegSet(emitThisGCrefRegs);
printf("\n");
printf(" emitThisByrefRegs(0x%p)=", emitComp->dspPtr(&emitThisByrefRegs));
printRegMaskInt(emitThisByrefRegs);
- emitDispRegSet(emitThisByrefRegs);
+ emitDispGprRegSet(emitThisByrefRegs);
printf("\n");
}
@@ -17943,7 +17948,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
// The target of the 3-operand imul is implicitly encoded. Make sure
// that we detected the implicit register and cleared its GC-status.
- regMaskTP regMask = genRegMask(inst3opImulReg(ins));
+ singleRegMask regMask = genRegMask(inst3opImulReg(ins));
assert((regMask & (emitThisGCrefRegs | emitThisByrefRegs)) == 0);
}
diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h
index e32cab66254fe8..fa48eeb53f3a2c 100644
--- a/src/coreclr/jit/emitxarch.h
+++ b/src/coreclr/jit/emitxarch.h
@@ -23,10 +23,12 @@ inline static bool isDoubleReg(regNumber reg)
return isFloatReg(reg);
}
+#ifdef FEATURE_MASKED_HW_INTRINSICS
inline static bool isMaskReg(regNumber reg)
{
return (reg >= REG_MASK_FIRST && reg <= REG_MASK_LAST);
}
+#endif // FEATURE_MASKED_HW_INTRINSICS
inline static bool isHighSimdReg(regNumber reg)
{
@@ -534,15 +536,15 @@ instrDesc* emitNewInstrAmdCns(emitAttr attr, ssize_t dsp, int cns);
instrDesc* emitNewInstrCallDir(int argCnt,
VARSET_VALARG_TP GCvars,
- regMaskTP gcrefRegs,
- regMaskTP byrefRegs,
+ regMaskGpr gcrefRegs,
+ regMaskGpr byrefRegs,
emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize));
instrDesc* emitNewInstrCallInd(int argCnt,
ssize_t disp,
VARSET_VALARG_TP GCvars,
- regMaskTP gcrefRegs,
- regMaskTP byrefRegs,
+ regMaskGpr gcrefRegs,
+ regMaskGpr byrefRegs,
emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize));
void emitGetInsCns(const instrDesc* id, CnsVal* cv) const;
@@ -912,8 +914,8 @@ void emitIns_Call(EmitCallType callType,
emitAttr retSize
MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
VARSET_VALARG_TP ptrVars,
- regMaskTP gcrefRegs,
- regMaskTP byrefRegs,
+ regMaskGpr gcrefRegs,
+ regMaskGpr byrefRegs,
const DebugInfo& di = DebugInfo(),
regNumber ireg = REG_NA,
regNumber xreg = REG_NA,
diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp
index e21fe864984ef7..25ee06d61b8085 100644
--- a/src/coreclr/jit/gcencode.cpp
+++ b/src/coreclr/jit/gcencode.cpp
@@ -4471,8 +4471,8 @@ void GCInfo::gcMakeRegPtrTable(
assert(call->u1.cdArgMask == 0 && call->cdArgCnt == 0);
// Other than that, we just have to deal with the regmasks.
- regMaskSmall gcrefRegMask = call->cdGCrefRegs & RBM_CALLEE_SAVED;
- regMaskSmall byrefRegMask = call->cdByrefRegs & RBM_CALLEE_SAVED;
+ regMaskSmall gcrefRegMask = call->cdGCrefRegs & RBM_INT_CALLEE_SAVED;
+ regMaskSmall byrefRegMask = call->cdByrefRegs & RBM_INT_CALLEE_SAVED;
assert((gcrefRegMask & byrefRegMask) == 0);
@@ -4620,7 +4620,7 @@ void GCInfo::gcInfoRecordGCRegStateChange(GcInfoEncoder* gcInfoEncoder,
while (regMask)
{
// Get hold of the next register bit.
- regMaskTP tmpMask = genFindLowestBit(regMask);
+ regMaskGpr tmpMask = genFindLowestBit(regMask);
assert(tmpMask);
// Remember the new state of this register.
@@ -4637,7 +4637,7 @@ void GCInfo::gcInfoRecordGCRegStateChange(GcInfoEncoder* gcInfoEncoder,
}
// Figure out which register the next bit corresponds to.
- regNumber regNum = genRegNumFromMask(tmpMask);
+ regNumber regNum = genRegNumFromMask(tmpMask MORE_THAN_64_REG_ARG(TYP_INT));
/* Reserve SP future use */
assert(regNum != REG_SPBASE);
diff --git a/src/coreclr/jit/gcinfo.cpp b/src/coreclr/jit/gcinfo.cpp
index ff534a0afcbf21..e77446952647d5 100644
--- a/src/coreclr/jit/gcinfo.cpp
+++ b/src/coreclr/jit/gcinfo.cpp
@@ -84,7 +84,7 @@ void GCInfo::gcResetForBB()
* Print the changes in the gcRegGCrefSetCur sets.
*/
-void GCInfo::gcDspGCrefSetChanges(regMaskTP gcRegGCrefSetNew DEBUGARG(bool forceOutput))
+void GCInfo::gcDspGCrefSetChanges(regMaskGpr gcRegGCrefSetNew DEBUGARG(bool forceOutput))
{
if (compiler->verbose)
{
@@ -98,11 +98,11 @@ void GCInfo::gcDspGCrefSetChanges(regMaskTP gcRegGCrefSetNew DEBUGARG(bool force
else
{
printRegMaskInt(gcRegGCrefSetCur);
- compiler->GetEmitter()->emitDispRegSet(gcRegGCrefSetCur);
+ compiler->GetEmitter()->emitDispGprRegSet(gcRegGCrefSetCur);
printf(" => ");
}
printRegMaskInt(gcRegGCrefSetNew);
- compiler->GetEmitter()->emitDispRegSet(gcRegGCrefSetNew);
+ compiler->GetEmitter()->emitDispGprRegSet(gcRegGCrefSetNew);
printf("\n");
}
}
@@ -113,7 +113,7 @@ void GCInfo::gcDspGCrefSetChanges(regMaskTP gcRegGCrefSetNew DEBUGARG(bool force
* Print the changes in the gcRegByrefSetCur sets.
*/
-void GCInfo::gcDspByrefSetChanges(regMaskTP gcRegByrefSetNew DEBUGARG(bool forceOutput))
+void GCInfo::gcDspByrefSetChanges(regMaskGpr gcRegByrefSetNew DEBUGARG(bool forceOutput))
{
if (compiler->verbose)
{
@@ -127,11 +127,11 @@ void GCInfo::gcDspByrefSetChanges(regMaskTP gcRegByrefSetNew DEBUGARG(bool force
else
{
printRegMaskInt(gcRegByrefSetCur);
- compiler->GetEmitter()->emitDispRegSet(gcRegByrefSetCur);
+ compiler->GetEmitter()->emitDispGprRegSet(gcRegByrefSetCur);
printf(" => ");
}
printRegMaskInt(gcRegByrefSetNew);
- compiler->GetEmitter()->emitDispRegSet(gcRegByrefSetNew);
+ compiler->GetEmitter()->emitDispGprRegSet(gcRegByrefSetNew);
printf("\n");
}
}
@@ -145,14 +145,16 @@ void GCInfo::gcDspByrefSetChanges(regMaskTP gcRegByrefSetNew DEBUGARG(bool force
* GCref pointer values.
*/
-void GCInfo::gcMarkRegSetGCref(regMaskTP regMask DEBUGARG(bool forceOutput))
+void GCInfo::gcMarkRegSetGCref(regMaskGpr regMask DEBUGARG(bool forceOutput))
{
+ assert(compiler->IsGprRegMask(regMask));
+
// This set of registers are going to hold REFs.
// Make sure they were not holding BYREFs.
assert((gcRegByrefSetCur & regMask) == 0);
- regMaskTP gcRegByrefSetNew = gcRegByrefSetCur & ~regMask; // Clear it if set in Byref mask
- regMaskTP gcRegGCrefSetNew = gcRegGCrefSetCur | regMask; // Set it in GCref mask
+ regMaskGpr gcRegByrefSetNew = gcRegByrefSetCur & ~regMask; // Clear it if set in Byref mask
+ regMaskGpr gcRegGCrefSetNew = gcRegGCrefSetCur | regMask; // Set it in GCref mask
INDEBUG(gcDspGCrefSetChanges(gcRegGCrefSetNew, forceOutput));
INDEBUG(gcDspByrefSetChanges(gcRegByrefSetNew));
@@ -167,10 +169,12 @@ void GCInfo::gcMarkRegSetGCref(regMaskTP regMask DEBUGARG(bool forceOutput))
* Byref pointer values.
*/
-void GCInfo::gcMarkRegSetByref(regMaskTP regMask DEBUGARG(bool forceOutput))
+void GCInfo::gcMarkRegSetByref(regMaskGpr regMask DEBUGARG(bool forceOutput))
{
- regMaskTP gcRegByrefSetNew = gcRegByrefSetCur | regMask; // Set it in Byref mask
- regMaskTP gcRegGCrefSetNew = gcRegGCrefSetCur & ~regMask; // Clear it if set in GCref mask
+ assert(compiler->IsGprRegMask(regMask));
+
+ regMaskGpr gcRegByrefSetNew = gcRegByrefSetCur | regMask; // Set it in Byref mask
+ regMaskGpr gcRegGCrefSetNew = gcRegGCrefSetCur & ~regMask; // Clear it if set in GCref mask
INDEBUG(gcDspGCrefSetChanges(gcRegGCrefSetNew));
INDEBUG(gcDspByrefSetChanges(gcRegByrefSetNew, forceOutput));
@@ -179,18 +183,50 @@ void GCInfo::gcMarkRegSetByref(regMaskTP regMask DEBUGARG(bool forceOutput))
gcRegGCrefSetCur = gcRegGCrefSetNew;
}
+/*****************************************************************************
+ *
+ * Mark the gpr register as holding non-pointer values.
+ *
+ */
+
+void GCInfo::gcMarkGprRegNpt(regNumber reg DEBUGARG(bool forceOutput))
+{
+ assert(emitter::isGeneralRegister(reg));
+ gcMarkRegSetNpt(genRegMask(reg) DEBUGARG(forceOutput));
+}
+
+/*****************************************************************************
+ *
+ * Mark the register as holding non-pointer values.
+ *
+ */
+
+void GCInfo::gcMarkRegNpt(regNumber reg DEBUGARG(bool forceOutput))
+{
+ if (!emitter::isGeneralRegister(reg))
+ {
+ return;
+ }
+
+ gcMarkRegSetNpt(genRegMask(reg) DEBUGARG(forceOutput));
+}
+
/*****************************************************************************
*
* Mark the set of registers given by the specified mask as holding
* non-pointer values.
*/
-void GCInfo::gcMarkRegSetNpt(regMaskTP regMask DEBUGARG(bool forceOutput))
+void GCInfo::gcMarkRegSetNpt(regMaskGpr regMask DEBUGARG(bool forceOutput))
{
+ // We only care about gpr registers because those are the ones that hold
+ // gc pointers.
+ assert(compiler->IsGprRegMask(regMask));
+
/* NOTE: don't unmark any live register variables */
- regMaskTP gcRegByrefSetNew = gcRegByrefSetCur & ~(regMask & ~regSet->GetMaskVars());
- regMaskTP gcRegGCrefSetNew = gcRegGCrefSetCur & ~(regMask & ~regSet->GetMaskVars());
+ regMaskGpr gcRegByrefSetNew = gcRegByrefSetCur & ~(regMask & ~regSet->GetGprMaskVars());
+ regMaskGpr gcRegGCrefSetNew = gcRegGCrefSetCur & ~(regMask & ~regSet->GetGprMaskVars());
INDEBUG(gcDspGCrefSetChanges(gcRegGCrefSetNew, forceOutput));
INDEBUG(gcDspByrefSetChanges(gcRegByrefSetNew, forceOutput));
@@ -206,8 +242,12 @@ void GCInfo::gcMarkRegSetNpt(regMaskTP regMask DEBUGARG(bool forceOutput))
void GCInfo::gcMarkRegPtrVal(regNumber reg, var_types type)
{
- regMaskTP regMask = genRegMask(reg);
+ if (!emitter::isGeneralRegister(reg))
+ {
+ return;
+ }
+ singleRegMask regMask = genRegMask(reg);
switch (type)
{
case TYP_REF:
@@ -700,6 +740,7 @@ void GCInfo::gcRegPtrSetInit()
#endif // JIT32_GCENCODER
+#if 0
//------------------------------------------------------------------------
// gcUpdateForRegVarMove: Update the masks when a variable is moved
//
@@ -716,7 +757,7 @@ void GCInfo::gcRegPtrSetInit()
// It is also called by LinearScan::recordVarLocationAtStartOfBB() which is in turn called by
// CodeGen::genCodeForBBList() at the block boundary.
-void GCInfo::gcUpdateForRegVarMove(regMaskTP srcMask, regMaskTP dstMask, LclVarDsc* varDsc)
+void GCInfo::gcUpdateForRegVarMove(regMaskOnlyOne srcMask, regMaskOnlyOne dstMask, LclVarDsc* varDsc)
{
var_types type = varDsc->TypeGet();
bool isGCRef = (type == TYP_REF);
@@ -766,6 +807,6 @@ void GCInfo::gcUpdateForRegVarMove(regMaskTP srcMask, regMaskTP dstMask, LclVarD
VarSetOps::AddElemD(compiler, gcVarPtrSetCur, varDsc->lvVarIndex);
}
}
-
+#endif
/*****************************************************************************/
/*****************************************************************************/
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 7e90d26a3d6820..693c051d5f808b 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -1053,14 +1053,14 @@ bool GenTree::NeedsConsecutiveRegisters() const
// Return Value:
// Reg Mask of GenTree node.
//
-regMaskTP GenTree::gtGetContainedRegMask()
+regMaskGpr GenTree::gtGetContainedRegMask()
{
if (!isContained())
{
return isUsedFromReg() ? gtGetRegMask() : RBM_NONE;
}
- regMaskTP mask = 0;
+ regMaskGpr mask = RBM_NONE;
for (GenTree* operand : Operands())
{
mask |= operand->gtGetContainedRegMask();
@@ -1077,14 +1077,14 @@ regMaskTP GenTree::gtGetContainedRegMask()
// Return Value:
// Reg Mask of GenTree node.
//
-regMaskTP GenTree::gtGetRegMask() const
+RegBitSet64 GenTree::gtGetRegMask() const
{
- regMaskTP resultMask;
+ RegBitSet64 resultMask = RBM_NONE;
if (IsMultiRegCall())
{
resultMask = genRegMask(GetRegNum());
- resultMask |= AsCall()->GetOtherRegMask();
+ resultMask |= AsCall()->GetOtherRegMask().GetGprFloatCombinedMask();
}
else if (IsCopyOrReloadOfMultiRegCall())
{
@@ -1096,13 +1096,12 @@ regMaskTP GenTree::gtGetRegMask() const
const GenTreeCall* call = copyOrReload->gtGetOp1()->AsCall();
const unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount();
- resultMask = RBM_NONE;
for (unsigned i = 0; i < regCount; ++i)
{
regNumber reg = copyOrReload->GetRegNumByIdx(i);
if (reg != REG_NA)
{
- resultMask |= genRegMask(reg);
+ resultMask |= reg;
}
}
}
@@ -1112,12 +1111,11 @@ regMaskTP GenTree::gtGetRegMask() const
const GenTreePutArgSplit* splitArg = AsPutArgSplit();
const unsigned regCount = splitArg->gtNumRegs;
- resultMask = RBM_NONE;
for (unsigned i = 0; i < regCount; ++i)
{
regNumber reg = splitArg->GetRegNumByIdx(i);
assert(reg != REG_NA);
- resultMask |= genRegMask(reg);
+ resultMask |= reg;
}
}
#endif // FEATURE_ARG_SPLIT
@@ -1129,6 +1127,81 @@ regMaskTP GenTree::gtGetRegMask() const
return resultMask;
}
+//---------------------------------------------------------------
+// gtGetGprRegMask: Get the gpr reg mask of the node.
+//
+// Arguments:
+// None
+//
+// Return Value:
+// Reg Mask of GenTree node.
+//
+// Note: This method would populate the reg mask with only the GPR registers.
+regMaskGpr GenTree::gtGetGprRegMask() const
+{
+ regMaskGpr resultMask = RBM_NONE;
+
+ if (IsMultiRegCall())
+ {
+ regNumber reg = GetRegNum();
+ resultMask |= -static_cast(!regIndexForRegister(reg)) & genRegMask(reg);
+
+#if FEATURE_MULTIREG_RET
+ const GenTreeCall* call = AsCall();
+ for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i)
+ {
+ regNumber otherReg = (regNumber)call->gtOtherRegs[i];
+ if (otherReg != REG_NA)
+ {
+ resultMask |= -static_cast(!regIndexForRegister(otherReg)) & genRegMask(otherReg);
+ continue;
+ }
+ break;
+ }
+#endif
+ }
+ else if (IsCopyOrReloadOfMultiRegCall())
+ {
+ // A multi-reg copy or reload, will have valid regs for only those
+ // positions that need to be copied or reloaded. Hence we need
+ // to consider only those registers for computing reg mask.
+
+ const GenTreeCopyOrReload* copyOrReload = AsCopyOrReload();
+ const GenTreeCall* call = copyOrReload->gtGetOp1()->AsCall();
+ const unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount();
+
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ regNumber reg = copyOrReload->GetRegNumByIdx(i);
+ if (reg != REG_NA)
+ {
+ resultMask |= -static_cast(!regIndexForRegister(reg)) & genRegMask(reg);
+ }
+ }
+ }
+#if FEATURE_ARG_SPLIT
+ else if (compFeatureArgSplit() && OperIsPutArgSplit())
+ {
+ const GenTreePutArgSplit* splitArg = AsPutArgSplit();
+ const unsigned regCount = splitArg->gtNumRegs;
+
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ regNumber reg = splitArg->GetRegNumByIdx(i);
+ assert(reg != REG_NA);
+ resultMask |= -static_cast(!regIndexForRegister(reg)) & genRegMask(reg);
+ }
+ }
+#endif // FEATURE_ARG_SPLIT
+ else
+ {
+ regNumber reg = GetRegNum();
+ resultMask |= -static_cast(!regIndexForRegister(reg)) & genRegMask(reg);
+ }
+
+ return resultMask;
+}
+
void GenTreeFieldList::AddField(Compiler* compiler, GenTree* node, unsigned offset, var_types type)
{
m_uses.AddUse(new (compiler, CMK_ASTNode) Use(node, offset, type));
@@ -2155,16 +2228,17 @@ bool GenTreeCall::NeedsVzeroupper(Compiler* comp)
// Return Value:
// Reg mask of gtOtherRegs of call node.
//
-regMaskTP GenTreeCall::GetOtherRegMask() const
+AllRegsMask GenTreeCall::GetOtherRegMask() const
{
- regMaskTP resultMask = RBM_NONE;
+ AllRegsMask resultMask;
#if FEATURE_MULTIREG_RET
for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i)
{
- if (gtOtherRegs[i] != REG_NA)
+ regNumber otherReg = (regNumber)gtOtherRegs[i];
+ if (otherReg != REG_NA)
{
- resultMask |= genRegMask((regNumber)gtOtherRegs[i]);
+ resultMask |= otherReg;
continue;
}
break;
@@ -27568,14 +27642,15 @@ regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx, CorInfoCallConvExtension
// of return registers and wants to know the set of return registers.
//
// static
-regMaskTP ReturnTypeDesc::GetABIReturnRegs(CorInfoCallConvExtension callConv) const
+AllRegsMask ReturnTypeDesc::GetABIReturnRegs(CorInfoCallConvExtension callConv) const
{
- regMaskTP resultMask = RBM_NONE;
+ AllRegsMask resultMask;
unsigned count = GetReturnRegCount();
for (unsigned i = 0; i < count; ++i)
{
- resultMask |= genRegMask(GetABIReturnReg(i, callConv));
+ regNumber reg = GetABIReturnReg(i, callConv);
+ resultMask.AddRegNumInMask(reg);
}
return resultMask;
@@ -27595,7 +27670,7 @@ regMaskTP ReturnTypeDesc::GetABIReturnRegs(CorInfoCallConvExtension callConv) co
// Return Value:
// Count of available temporary registers in given set.
//
-unsigned GenTree::AvailableTempRegCount(regMaskTP mask /* = (regMaskTP)-1 */) const
+unsigned GenTree::AvailableTempRegCount(regMaskOnlyOne mask /* = (regMaskOnlyOne)-1 */) const
{
return genCountBits(gtRsvdRegs & mask);
}
@@ -27612,11 +27687,11 @@ unsigned GenTree::AvailableTempRegCount(regMaskTP mask /* = (regMaskTP)-1 */) co
// Return Value:
// Available temporary register in given mask.
//
-regNumber GenTree::GetSingleTempReg(regMaskTP mask /* = (regMaskTP)-1 */)
+regNumber GenTree::GetSingleTempReg(regMaskOnlyOne mask /* = (regMaskOnlyOne)-1 */)
{
- regMaskTP availableSet = gtRsvdRegs & mask;
+ regMaskOnlyOne availableSet = gtRsvdRegs & mask;
assert(genCountBits(availableSet) == 1);
- regNumber tempReg = genRegNumFromMask(availableSet);
+ regNumber tempReg = genRegNumFromMask(availableSet MORE_THAN_64_REG_ARG(TypeGet()));
INDEBUG(gtRsvdRegs &= ~availableSet;) // Remove the register from the set, so it can't be used again.
return tempReg;
}
@@ -27633,11 +27708,11 @@ regNumber GenTree::GetSingleTempReg(regMaskTP mask /* = (regMaskTP)-1 */)
// Return Value:
// Available temporary register in given mask.
//
-regNumber GenTree::ExtractTempReg(regMaskTP mask /* = (regMaskTP)-1 */)
+regNumber GenTree::ExtractTempReg(regMaskOnlyOne mask /* = (regMaskOnlyOne)-1 */)
{
- regMaskTP availableSet = gtRsvdRegs & mask;
+ regMaskOnlyOne availableSet = gtRsvdRegs & mask;
assert(genCountBits(availableSet) >= 1);
- regNumber tempReg = genFirstRegNumFromMask(availableSet);
+ regNumber tempReg = genFirstRegNumFromMask(availableSet MORE_THAN_64_REG_ARG(TypeGet()));
gtRsvdRegs ^= genRegMask(tempReg);
return tempReg;
}
diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h
index d5dbad500c16d2..65114ca89c2488 100644
--- a/src/coreclr/jit/gentree.h
+++ b/src/coreclr/jit/gentree.h
@@ -945,8 +945,9 @@ struct GenTree
int GetRegisterDstCount(Compiler* compiler) const;
- regMaskTP gtGetRegMask() const;
- regMaskTP gtGetContainedRegMask();
+ RegBitSet64 gtGetRegMask() const;
+ regMaskGpr gtGetGprRegMask() const;
+ regMaskGpr gtGetContainedRegMask();
GenTreeFlags gtFlags;
@@ -956,11 +957,11 @@ struct GenTree
ValueNumPair gtVNPair;
- regMaskSmall gtRsvdRegs; // set of fixed trashed registers
+ regMaskOnlyOne gtRsvdRegs; // set of fixed trashed registers
- unsigned AvailableTempRegCount(regMaskTP mask = (regMaskTP)-1) const;
- regNumber GetSingleTempReg(regMaskTP mask = (regMaskTP)-1);
- regNumber ExtractTempReg(regMaskTP mask = (regMaskTP)-1);
+ unsigned AvailableTempRegCount(regMaskOnlyOne mask = (regMaskOnlyOne)-1) const;
+ regNumber GetSingleTempReg(regMaskOnlyOne mask = (regMaskOnlyOne)-1);
+ regNumber ExtractTempReg(regMaskOnlyOne mask = (regMaskOnlyOne)-1);
void SetVNsFromNode(GenTree* tree)
{
@@ -4360,7 +4361,7 @@ struct ReturnTypeDesc
regNumber GetABIReturnReg(unsigned idx, CorInfoCallConvExtension callConv) const;
// Get reg mask of ABI return registers
- regMaskTP GetABIReturnRegs(CorInfoCallConvExtension callConv) const;
+ AllRegsMask GetABIReturnRegs(CorInfoCallConvExtension callConv) const;
};
class TailCallSiteInfo
@@ -5169,7 +5170,7 @@ struct GenTreeCall final : public GenTree
#endif // TARGET_XARCH
// Get reg mask of all the valid registers of gtOtherRegs array
- regMaskTP GetOtherRegMask() const;
+ AllRegsMask GetOtherRegMask() const;
GenTreeFlags GetRegSpillFlagByIdx(unsigned idx) const
{
diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp
index 79aae2c3345491..8d8a345ec90638 100644
--- a/src/coreclr/jit/instr.cpp
+++ b/src/coreclr/jit/instr.cpp
@@ -2012,13 +2012,20 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType)
return ins_Copy(dstType);
}
-#if defined(TARGET_XARCH) && defined(FEATURE_SIMD)
+#if defined(FEATURE_MASKED_HW_INTRINSICS)
if (genIsValidMaskReg(srcReg))
{
+#if defined(TARGET_XARCH)
// mask to int
return INS_kmovq_gpr;
+#elif defined(TARGET_ARM64)
+ unreached();
+ return INS_mov; // TODO-SVE: needs testing
+#else
+ unreached();
+#endif
}
-#endif // TARGET_XARCH && FEATURE_SIMD
+#endif // FEATURE_MASKED_HW_INTRINSICS
// float to int
assert(genIsValidFloatReg(srcReg));
@@ -2255,13 +2262,13 @@ instruction CodeGenInterface::ins_StoreFromSrc(regNumber srcReg, var_types dstTy
return ins_Store(dstType, aligned);
}
-#if defined(TARGET_XARCH) && defined(FEATURE_SIMD)
+#ifdef FEATURE_MASKED_HW_INTRINSICS
if (genIsValidMaskReg(srcReg))
{
// mask to int, treat as mask so it works on 32-bit
return ins_Store(TYP_MASK, aligned);
}
-#endif // TARGET_XARCH && FEATURE_SIMD
+#endif // FEATURE_MASKED_HW_INTRINSICS
// float to int, treat as float to float
assert(genIsValidFloatReg(srcReg));
@@ -2617,7 +2624,7 @@ void CodeGen::instGen_Set_Reg_To_Zero(emitAttr size, regNumber reg, insFlags fla
#error "Unknown TARGET"
#endif
- regSet.verifyRegUsed(reg);
+ regSet.verifyGprRegUsed(reg);
}
/*****************************************************************************/
diff --git a/src/coreclr/jit/jitgcinfo.h b/src/coreclr/jit/jitgcinfo.h
index 02fd49cead9cb3..a04223f0c43c74 100644
--- a/src/coreclr/jit/jitgcinfo.h
+++ b/src/coreclr/jit/jitgcinfo.h
@@ -93,14 +93,16 @@ class GCInfo
void gcResetForBB();
- void gcMarkRegSetGCref(regMaskTP regMask DEBUGARG(bool forceOutput = false));
- void gcMarkRegSetByref(regMaskTP regMask DEBUGARG(bool forceOutput = false));
- void gcMarkRegSetNpt(regMaskTP regMask DEBUGARG(bool forceOutput = false));
+ void gcMarkRegSetGCref(regMaskGpr regMask DEBUGARG(bool forceOutput = false));
+ void gcMarkRegSetByref(regMaskGpr regMask DEBUGARG(bool forceOutput = false));
+ void gcMarkGprRegNpt(regNumber reg DEBUGARG(bool forceOutput = false));
+ void gcMarkRegNpt(regNumber reg DEBUGARG(bool forceOutput = false));
+ void gcMarkRegSetNpt(regMaskGpr regMask DEBUGARG(bool forceOutput = false));
void gcMarkRegPtrVal(regNumber reg, var_types type);
#ifdef DEBUG
- void gcDspGCrefSetChanges(regMaskTP gcRegGCrefSetNew DEBUGARG(bool forceOutput = false));
- void gcDspByrefSetChanges(regMaskTP gcRegByrefSetNew DEBUGARG(bool forceOutput = false));
+ void gcDspGCrefSetChanges(regMaskGpr gcRegGCrefSetNew DEBUGARG(bool forceOutput = false));
+ void gcDspByrefSetChanges(regMaskGpr gcRegByrefSetNew DEBUGARG(bool forceOutput = false));
#endif // DEBUG
/*****************************************************************************/
@@ -111,8 +113,8 @@ class GCInfo
// values.
//
- regMaskTP gcRegGCrefSetCur; // current regs holding GCrefs
- regMaskTP gcRegByrefSetCur; // current regs holding Byrefs
+ regMaskGpr gcRegGCrefSetCur; // current regs holding GCrefs
+ regMaskGpr gcRegByrefSetCur; // current regs holding Byrefs
VARSET_TP gcTrkStkPtrLcls; // set of tracked stack ptr lcls (GCref and Byref) - no args
VARSET_TP gcVarPtrSetCur; // currently live part of "gcTrkStkPtrLcls"
@@ -390,9 +392,11 @@ class GCInfo
#endif // JIT32_GCENCODER
#endif // DUMP_GC_TABLES
+#if 0
public:
// This method updates the appropriate reg masks when a variable is moved.
- void gcUpdateForRegVarMove(regMaskTP srcMask, regMaskTP dstMask, LclVarDsc* varDsc);
+ void gcUpdateForRegVarMove(regMaskOnlyOne srcMask, regMaskOnlyOne dstMask, LclVarDsc* varDsc);
+#endif
private:
ReturnKind getReturnKind();
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index 774eee3fe3cb20..705c57158d242e 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -630,7 +630,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
}
#ifdef TARGET_ARM
- regMaskTP doubleAlignMask = RBM_NONE;
+ regMaskGpr doubleAlignMask = RBM_NONE;
#endif // TARGET_ARM
// Skip skipArgs arguments from the signature.
@@ -830,7 +830,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
{
break;
}
- regMaskTP regMask = genMapArgNumToRegMask(varDscInfo->regArgNum(TYP_INT) + ix, TYP_INT);
+ regMaskGpr regMask = genMapArgNumToRegMask(varDscInfo->regArgNum(TYP_INT) + ix, TYP_INT);
if (cAlign == 2)
{
doubleAlignMask |= regMask;
@@ -1750,7 +1750,9 @@ void Compiler::lvaClassifyParameterABI()
SwiftABIClassifier classifier(cInfo);
lvaClassifyParameterABI(classifier);
- regMaskTP argRegs = RBM_NONE;
+ // There is an assumption that args never has predicate registers in case
+ // of OSX/arm64.
+ RegBitSet64 argRegs = RBM_NONE;
// The calling convention details computed by the old ABI classifier
// are wrong since it does not handle the Swift ABI for structs
@@ -5801,8 +5803,9 @@ void Compiler::lvaFixVirtualFrameOffsets()
}
#ifdef TARGET_ARM
-bool Compiler::lvaIsPreSpilled(unsigned lclNum, regMaskTP preSpillMask)
+bool Compiler::lvaIsPreSpilled(unsigned lclNum, regMaskGpr preSpillMask)
{
+ assert(IsGprRegMask(preSpillMask));
const LclVarDsc& desc = lvaTable[lclNum];
return desc.lvIsRegArg && (preSpillMask & genRegMask(desc.GetArgReg()));
}
@@ -6020,8 +6023,8 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
unsigned argLcls = 0;
// Take care of pre spill registers first.
- regMaskTP preSpillMask = codeGen->regSet.rsMaskPreSpillRegs(false);
- regMaskTP tempMask = RBM_NONE;
+ regMaskGpr preSpillMask = codeGen->regSet.rsMaskPreSpillRegs(false);
+ regMaskGpr tempMask = RBM_NONE;
for (unsigned i = 0, preSpillLclNum = lclNum; i < argSigLen; ++i, ++preSpillLclNum)
{
if (lvaIsPreSpilled(preSpillLclNum, preSpillMask))
@@ -6254,7 +6257,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
// On ARM we spill the registers in codeGen->regSet.rsMaskPreSpillRegArg
// in the prolog, so we have to do SetStackOffset() here
//
- regMaskTP regMask = genRegMask(varDsc->GetArgReg());
+ singleRegMask regMask = genRegMask(varDsc->GetArgReg());
if (codeGen->regSet.rsMaskPreSpillRegArg & regMask)
{
// Signature: void foo(struct_8, int, struct_4)
diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp
index f96a2e6a746a7a..055db0d1e443ba 100644
--- a/src/coreclr/jit/lsra.cpp
+++ b/src/coreclr/jit/lsra.cpp
@@ -234,13 +234,13 @@ weight_t LinearScan::getWeight(RefPosition* refPos)
// allRegs represents a set of registers that can
// be used to allocate the specified type in any point
// in time (more of a 'bank' of registers).
-regMaskTP LinearScan::allRegs(RegisterType rt)
+regMaskOnlyOne LinearScan::allRegs(RegisterType rt)
{
assert((rt != TYP_UNDEF) && (rt != TYP_STRUCT));
return *availableRegs[rt];
}
-regMaskTP LinearScan::allByteRegs()
+regMaskGpr LinearScan::allByteRegs()
{
#ifdef TARGET_X86
return availableIntRegs & RBM_BYTE_REGS;
@@ -249,7 +249,7 @@ regMaskTP LinearScan::allByteRegs()
#endif
}
-regMaskTP LinearScan::allSIMDRegs()
+regMaskFloat LinearScan::allSIMDRegs()
{
return availableFloatRegs;
}
@@ -262,7 +262,7 @@ regMaskTP LinearScan::allSIMDRegs()
// Return Value:
// Register mask of the SSE/VEX-only SIMD registers
//
-regMaskTP LinearScan::lowSIMDRegs()
+regMaskFloat LinearScan::lowSIMDRegs()
{
#if defined(TARGET_AMD64)
return (availableFloatRegs & RBM_LOWFLOAT);
@@ -278,25 +278,27 @@ void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPo
if (nextRefPosition == nullptr)
{
nextLocation = MaxLocation;
- fixedRegs &= ~genRegMask(regRecord->regNum);
+ fixedRegs.RemoveRegNumFromMask(regRecord->regNum);
}
else
{
nextLocation = nextRefPosition->nodeLocation;
- fixedRegs |= genRegMask(regRecord->regNum);
+ fixedRegs.AddRegNumInMask(regRecord->regNum);
}
nextFixedRef[regRecord->regNum] = nextLocation;
}
-regMaskTP LinearScan::getMatchingConstants(regMaskTP mask, Interval* currentInterval, RefPosition* refPosition)
+regMaskOnlyOne LinearScan::getMatchingConstants(regMaskOnlyOne mask,
+ Interval* currentInterval,
+ RefPosition* refPosition)
{
assert(currentInterval->isConstant && RefTypeIsDef(refPosition->refType));
- regMaskTP candidates = (mask & m_RegistersWithConstants);
- regMaskTP result = RBM_NONE;
+ regMaskOnlyOne candidates = (mask & m_RegistersWithConstants.GetRegMaskForType(currentInterval->registerType));
+ regMaskOnlyOne result = RBM_NONE;
while (candidates != RBM_NONE)
{
- regNumber regNum = genFirstRegNumFromMask(candidates);
- regMaskTP candidateBit = genRegMask(regNum);
+ regNumber regNum = genFirstRegNumFromMask(candidates MORE_THAN_64_REG_ARG(currentInterval->registerType));
+ singleRegMask candidateBit = genRegMask(regNum);
candidates ^= candidateBit;
RegRecord* physRegRecord = getRegisterRecord(regNum);
@@ -384,30 +386,33 @@ void LinearScan::updateSpillCost(regNumber reg, Interval* interval)
// interval - Interval of Refposition.
// assignedReg - Assigned register for this refposition.
//
-void LinearScan::updateRegsFreeBusyState(RefPosition& refPosition,
- regMaskTP regsBusy,
- regMaskTP* regsToFree,
- regMaskTP* delayRegsToFree DEBUG_ARG(Interval* interval)
+void LinearScan::updateRegsFreeBusyState(RefPosition& refPosition,
+ regMaskOnlyOne regsBusy,
+ AllRegsMask* regsToFree,
+ AllRegsMask* delayRegsToFree,
+ RegisterType regType DEBUG_ARG(Interval* interval)
DEBUG_ARG(regNumber assignedReg))
{
- regsInUseThisLocation |= regsBusy;
+ assert(compiler->IsOnlyOneRegMask(regsBusy));
+
+ regsInUseThisLocation.AddRegMaskForType(regsBusy, regType);
if (refPosition.lastUse)
{
if (refPosition.delayRegFree)
{
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED, interval, assignedReg));
- *delayRegsToFree |= regsBusy;
- regsInUseNextLocation |= regsBusy;
+ delayRegsToFree->AddRegMaskForType(regsBusy, regType);
+ regsInUseNextLocation.AddRegMaskForType(regsBusy, regType);
}
else
{
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, interval, assignedReg));
- *regsToFree |= regsBusy;
+ regsToFree->AddRegMaskForType(regsBusy, regType);
}
}
else if (refPosition.delayRegFree)
{
- regsInUseNextLocation |= regsBusy;
+ regsInUseNextLocation.AddRegMaskForType(regsBusy, regType);
}
}
@@ -416,7 +421,7 @@ void LinearScan::updateRegsFreeBusyState(RefPosition& refPosition,
// for use as internal float registers.
//
// Return Value:
-// The set of registers (as a regMaskTP).
+// The set of registers (as a regMaskFloat).
//
// Notes:
// compFloatingPointUsed is only required to be set if it is possible that we
@@ -425,7 +430,7 @@ void LinearScan::updateRegsFreeBusyState(RefPosition& refPosition,
// that it will select a callee-save register. But to be safe, we restrict
// the set of candidates if compFloatingPointUsed is not already set.
//
-regMaskTP LinearScan::internalFloatRegCandidates()
+regMaskFloat LinearScan::internalFloatRegCandidates()
{
needNonIntegerRegisters = true;
@@ -470,12 +475,16 @@ RegRecord* LinearScan::getRegisterRecord(regNumber regNum)
// New regMask that has minRegCount registers after intersection.
// Otherwise returns regMaskActual.
//
-regMaskTP LinearScan::getConstrainedRegMask(RefPosition* refPosition,
- regMaskTP regMaskActual,
- regMaskTP regMaskConstraint,
- unsigned minRegCount)
+regMaskOnlyOne LinearScan::getConstrainedRegMask(RefPosition* refPosition,
+ RegisterType regType,
+ regMaskOnlyOne regMaskActual,
+ regMaskOnlyOne regMaskConstraint,
+ unsigned minRegCount)
{
- regMaskTP newMask = regMaskActual & regMaskConstraint;
+ assert(compiler->IsOnlyOneRegMask(regMaskActual));
+ assert(compiler->IsOnlyOneRegMask(regMaskConstraint));
+
+ regMaskOnlyOne newMask = regMaskActual & regMaskConstraint;
if (genCountBits(newMask) < minRegCount)
{
// Constrained mask does not have minimum required registers needed.
@@ -484,7 +493,8 @@ regMaskTP LinearScan::getConstrainedRegMask(RefPosition* refPosition,
if ((refPosition != nullptr) && !refPosition->RegOptional())
{
- regMaskTP busyRegs = regsBusyUntilKill | regsInUseThisLocation;
+ regMaskOnlyOne busyRegs = RBM_NONE;
+ busyRegs = (regsBusyUntilKill | regsInUseThisLocation).GetRegMaskForType(regType);
if ((newMask & ~busyRegs) == RBM_NONE)
{
// Constrained mask does not have at least one free register to allocate.
@@ -501,7 +511,9 @@ regMaskTP LinearScan::getConstrainedRegMask(RefPosition* refPosition,
// them based on the current stress options.
//
// Arguments:
-// mask - The current mask of register candidates for a node
+// refPosition - The refPosition for which we want to stress the register
+// mask - The current mask of register candidates for a node
+// regtype - The registerType
//
// Return Value:
// A possibly-modified mask, based on the value of DOTNET_JitStressRegs.
@@ -510,8 +522,10 @@ regMaskTP LinearScan::getConstrainedRegMask(RefPosition* refPosition,
// This is the method used to implement the stress options that limit
// the set of registers considered for allocation.
//
-regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask)
+regMaskOnlyOne LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskOnlyOne mask, RegisterType regtype)
{
+ assert(compiler->IsOnlyOneRegMask(mask));
+
#ifdef TARGET_ARM64
if ((refPosition != nullptr) && refPosition->isLiveAtConsecutiveRegistersLoc(consecutiveRegistersLocation))
{
@@ -527,31 +541,60 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask)
{
// The refPosition could be null, for example when called
// by getTempRegForResolution().
- int minRegCount = (refPosition != nullptr) ? refPosition->minRegCandidateCount : 1;
+ int minRegCount = 1;
+ if (refPosition != nullptr)
+ {
+ minRegCount = refPosition->minRegCandidateCount;
+ RegisterType currRegType = refPosition->getRegisterType();
+ assert(regtype == currRegType);
+ }
+
+ regMaskOnlyOne calleeSaved = RBM_NONE;
+ regMaskOnlyOne calleeTrash = RBM_NONE;
+ if (varTypeUsesIntReg(regtype))
+ {
+ calleeSaved = RBM_INT_CALLEE_SAVED;
+ calleeTrash = RBM_INT_CALLEE_TRASH;
+ }
+ else if (varTypeUsesFloatReg(regtype))
+ {
+ calleeSaved = RBM_FLT_CALLEE_SAVED;
+ calleeTrash = RBM_FLT_CALLEE_TRASH;
+ }
+ else
+ {
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ assert(varTypeUsesMaskReg(regtype));
+ calleeSaved = RBM_MSK_CALLEE_SAVED;
+ calleeTrash = RBM_MSK_CALLEE_TRASH;
+#else
+ unreached();
+#endif
+ }
switch (getStressLimitRegs())
{
case LSRA_LIMIT_CALLEE:
if (!compiler->opts.compDbgEnC)
{
- mask = getConstrainedRegMask(refPosition, mask, RBM_CALLEE_SAVED, minRegCount);
+ mask = getConstrainedRegMask(refPosition, regtype, mask, calleeSaved, minRegCount);
}
break;
case LSRA_LIMIT_CALLER:
{
- mask = getConstrainedRegMask(refPosition, mask, RBM_CALLEE_TRASH, minRegCount);
+ mask = getConstrainedRegMask(refPosition, regtype, mask, calleeTrash, minRegCount);
}
break;
case LSRA_LIMIT_SMALL_SET:
if ((mask & LsraLimitSmallIntSet) != RBM_NONE)
{
- mask = getConstrainedRegMask(refPosition, mask, LsraLimitSmallIntSet, minRegCount);
+ mask = getConstrainedRegMask(refPosition, regtype, mask, LsraLimitSmallIntSet, minRegCount);
}
else if ((mask & LsraLimitSmallFPSet) != RBM_NONE)
{
- mask = getConstrainedRegMask(refPosition, mask, LsraLimitSmallFPSet, minRegCount);
+ mask = getConstrainedRegMask(refPosition, regtype, mask, LsraLimitSmallFPSet, minRegCount);
}
break;
@@ -559,7 +602,7 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask)
case LSRA_LIMIT_UPPER_SIMD_SET:
if ((mask & LsraLimitUpperSimdSet) != RBM_NONE)
{
- mask = getConstrainedRegMask(refPosition, mask, LsraLimitUpperSimdSet, minRegCount);
+ mask = getConstrainedRegMask(refPosition, regtype, mask, LsraLimitUpperSimdSet, minRegCount);
}
break;
#endif
@@ -720,12 +763,14 @@ LinearScan::LinearScan(Compiler* theCompiler)
#if defined(TARGET_AMD64)
rbmAllFloat = compiler->rbmAllFloat;
rbmFltCalleeTrash = compiler->rbmFltCalleeTrash;
+ assert(compiler->IsFloatRegMask(rbmAllFloat));
+ assert(compiler->IsFloatRegMask(rbmFltCalleeTrash));
#endif // TARGET_AMD64
#if defined(TARGET_XARCH)
rbmAllMask = compiler->rbmAllMask;
rbmMskCalleeTrash = compiler->rbmMskCalleeTrash;
- memcpy(varTypeCalleeTrashRegs, compiler->varTypeCalleeTrashRegs, sizeof(regMaskTP) * TYP_COUNT);
+ memcpy(varTypeCalleeTrashRegs, compiler->varTypeCalleeTrashRegs, sizeof(regMaskOnlyOne) * TYP_COUNT);
if (!compiler->canUseEvexEncoding())
{
@@ -788,7 +833,7 @@ LinearScan::LinearScan(Compiler* theCompiler)
availableFloatRegs = RBM_ALLFLOAT;
availableDoubleRegs = RBM_ALLDOUBLE;
-#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
+#ifdef FEATURE_MASKED_HW_INTRINSICS
availableMaskRegs = RBM_ALLMASK;
#endif
@@ -800,7 +845,7 @@ LinearScan::LinearScan(Compiler* theCompiler)
availableIntRegs &= ~RBM_INT_CALLEE_SAVED | RBM_ENC_CALLEE_SAVED;
availableFloatRegs &= ~RBM_FLT_CALLEE_SAVED;
availableDoubleRegs &= ~RBM_FLT_CALLEE_SAVED;
-#if defined(TARGET_XARCH)
+#ifdef FEATURE_MASKED_HW_INTRINSICS
availableMaskRegs &= ~RBM_MSK_CALLEE_SAVED;
#endif // TARGET_XARCH
}
@@ -2728,7 +2773,7 @@ void LinearScan::setFrameType()
// If we are using FPBASE as the frame register, we cannot also use it for
// a local var.
- regMaskTP removeMask = RBM_NONE;
+ regMaskGpr removeMask = RBM_NONE;
if (frameType == FT_EBP_FRAME)
{
removeMask |= RBM_FPBASE;
@@ -2810,8 +2855,8 @@ bool copyOrMoveRegInUse(RefPosition* ref, LsraLocation loc)
RegisterType LinearScan::getRegisterType(Interval* currentInterval, RefPosition* refPosition)
{
assert(refPosition->getInterval() == currentInterval);
- RegisterType regType = currentInterval->registerType;
- regMaskTP candidates = refPosition->registerAssignment;
+ RegisterType regType = currentInterval->registerType;
+ regMaskOnlyOne candidates = refPosition->registerAssignment;
#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
// The LoongArch64's ABI which the float args maybe passed by integer register
// when no float register left but free integer register.
@@ -2927,16 +2972,16 @@ regNumber LinearScan::allocateRegMinimal(Interval* currentInterva
RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore))
{
assert(!enregisterLocalVars);
- regNumber foundReg;
- regMaskTP foundRegBit;
- RegRecord* availablePhysRegRecord;
+ regNumber foundReg;
+ singleRegMask foundRegBit;
+ RegRecord* availablePhysRegRecord;
foundRegBit = regSelector->selectMinimal(currentInterval, refPosition DEBUG_ARG(registerScore));
if (foundRegBit == RBM_NONE)
{
return REG_NA;
}
- foundReg = genRegNumFromMask(foundRegBit);
+ foundReg = genRegNumFromMask(foundRegBit MORE_THAN_64_REG_ARG(currentInterval->registerType));
availablePhysRegRecord = getRegisterRecord(foundReg);
Interval* assignedInterval = availablePhysRegRecord->assignedInterval;
if ((assignedInterval != currentInterval) &&
@@ -2989,14 +3034,14 @@ template
regNumber LinearScan::allocateReg(Interval* currentInterval,
RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore))
{
- regMaskTP foundRegBit =
+ singleRegMask foundRegBit =
regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore));
if (foundRegBit == RBM_NONE)
{
return REG_NA;
}
- regNumber foundReg = genRegNumFromMask(foundRegBit);
+ regNumber foundReg = genRegNumFromMask(foundRegBit MORE_THAN_64_REG_ARG(currentInterval->registerType));
RegRecord* availablePhysRegRecord = getRegisterRecord(foundReg);
Interval* assignedInterval = availablePhysRegRecord->assignedInterval;
if ((assignedInterval != currentInterval) &&
@@ -3225,8 +3270,8 @@ bool LinearScan::isRefPositionActive(RefPosition* refPosition, LsraLocation refL
//
bool LinearScan::isSpillCandidate(Interval* current, RefPosition* refPosition, RegRecord* physRegRecord)
{
- regMaskTP candidateBit = genRegMask(physRegRecord->regNum);
- LsraLocation refLocation = refPosition->nodeLocation;
+ singleRegMask candidateBit = genRegMask(physRegRecord->regNum);
+ LsraLocation refLocation = refPosition->nodeLocation;
// We shouldn't be calling this if we haven't already determined that the register is not
// busy until the next kill.
assert(!isRegBusy(physRegRecord->regNum, current->registerType));
@@ -3443,8 +3488,9 @@ void LinearScan::checkAndAssignInterval(RegRecord* regRec, Interval* interval)
// Assign the given physical register interval to the given interval
void LinearScan::assignPhysReg(RegRecord* regRec, Interval* interval)
{
- regMaskTP assignedRegMask = genRegMask(regRec->regNum);
- compiler->codeGen->regSet.rsSetRegsModified(assignedRegMask DEBUGARG(true));
+ regNumber reg = regRec->regNum;
+
+ compiler->codeGen->regSet.rsSetRegModified(reg DEBUGARG(true));
interval->assignedReg = regRec;
checkAndAssignInterval(regRec, interval);
@@ -3454,7 +3500,7 @@ void LinearScan::assignPhysReg(RegRecord* regRec, Interval* interval)
if (interval->isLocalVar)
{
// Prefer this register for future references
- interval->updateRegisterPreferences(assignedRegMask);
+ interval->updateRegisterPreferences(genRegMask(reg));
}
}
@@ -3965,11 +4011,13 @@ void LinearScan::spillGCRefs(RefPosition* killRefPosition)
{
// For each physical register that can hold a GC type,
// if it is occupied by an interval of a GC type, spill that interval.
- regMaskTP candidateRegs = killRefPosition->registerAssignment;
+ regMaskGpr candidateRegs = killRefPosition->registerAssignment;
+ assert(compiler->IsGprRegMask(candidateRegs));
+
INDEBUG(bool killedRegs = false);
while (candidateRegs != RBM_NONE)
{
- regNumber nextReg = genFirstRegNumFromMaskAndToggle(candidateRegs);
+ regNumber nextReg = genFirstRegNumFromMaskAndToggle(candidateRegs MORE_THAN_64_REG_ARG(TYP_INT));
RegRecord* regRecord = getRegisterRecord(nextReg);
Interval* assignedInterval = regRecord->assignedInterval;
@@ -4052,18 +4100,25 @@ void LinearScan::processBlockEndAllocation(BasicBlock* currentBlock)
// The new register to use.
//
#ifdef DEBUG
-regNumber LinearScan::rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs)
+regNumber LinearScan::rotateBlockStartLocation(Interval* interval,
+ regNumber targetReg,
+ CONSTREF_AllRegsMask availableRegs)
{
if (targetReg != REG_STK && getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_ROTATE)
{
// If we're rotating the register locations at block boundaries, try to use
// the next higher register number of the appropriate register type.
- regMaskTP candidateRegs = allRegs(interval->registerType) & availableRegs;
- regNumber firstReg = REG_NA;
- regNumber newReg = REG_NA;
+
+ regMaskOnlyOne allRegsMask = allRegs(interval->registerType);
+ RegisterType regType = interval->registerType;
+ regMaskOnlyOne candidateRegs = allRegsMask & availableRegs.GetRegMaskForType(regType);
+
+ regNumber firstReg = REG_NA;
+ regNumber newReg = REG_NA;
while (candidateRegs != RBM_NONE)
{
- regNumber nextReg = genFirstRegNumFromMaskAndToggle(candidateRegs);
+ regNumber nextReg =
+ genFirstRegNumFromMaskAndToggle(candidateRegs MORE_THAN_64_REG_ARG(interval->registerType));
if (nextReg > targetReg)
{
newReg = nextReg;
@@ -4303,6 +4358,50 @@ void LinearScan::resetAllRegistersState()
}
}
+#ifdef HAS_MORE_THAN_64_REGISTERS
+void LinearScan::updateDeadCandidatesAtBlockStart(REF_AllRegsMask deadRegMask, VarToRegMap inVarToRegMap)
+#else
+void LinearScan::updateDeadCandidatesAtBlockStart(RegBitSet64 deadRegMask, VarToRegMap inVarToRegMap)
+#endif // HAS_MORE_THAN_64_REGISTERS
+{
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ while (!deadRegMask.IsEmpty())
+#else
+ while (deadRegMask != RBM_NONE)
+#endif
+ {
+ regNumber reg = genFirstRegNumFromMaskAndToggle(deadRegMask);
+ RegRecord* physRegRecord = getRegisterRecord(reg);
+
+ makeRegAvailable(reg, physRegRecord->registerType);
+ Interval* assignedInterval = physRegRecord->assignedInterval;
+
+ if (assignedInterval != nullptr)
+ {
+ assert(assignedInterval->isLocalVar || assignedInterval->isConstant || assignedInterval->IsUpperVector());
+
+ if (!assignedInterval->isConstant && assignedInterval->assignedReg == physRegRecord)
+ {
+ assignedInterval->isActive = false;
+ if (assignedInterval->getNextRefPosition() == nullptr)
+ {
+ unassignPhysReg(physRegRecord, nullptr);
+ }
+ if (!assignedInterval->IsUpperVector())
+ {
+ inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK;
+ }
+ }
+ else
+ {
+ // This interval may still be active, but was in another register in an
+ // intervening block.
+ clearAssignedInterval(physRegRecord ARM_ARG(assignedInterval->registerType));
+ }
+ }
+ }
+}
+
//------------------------------------------------------------------------
// processBlockStartLocations: Update var locations on entry to 'currentBlock' and clear constant
// registers.
@@ -4362,9 +4461,9 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock)
}
// If we are rotating register assignments at block boundaries, we want to make the
// inactive registers available for the rotation.
- regMaskTP inactiveRegs = RBM_NONE;
+ AllRegsMask inactiveRegs;
#endif // DEBUG
- regMaskTP liveRegs = RBM_NONE;
+ AllRegsMask liveRegs;
VarSetOps::Iter iter(compiler, currentLiveVars);
unsigned varIndex = 0;
while (iter.NextElem(&varIndex))
@@ -4440,7 +4539,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock)
// Case #1 above.
assert(getVarReg(predVarToRegMap, varIndex) == targetReg ||
getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_ROTATE);
- }
+ } // Keep the register assignment - if another var has it, it will get unassigned.
else if (!nextRefPosition->copyReg)
{
// case #2 above.
@@ -4459,7 +4558,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock)
assert(targetReg != REG_STK);
assert(interval->assignedReg != nullptr && interval->assignedReg->regNum == targetReg &&
interval->assignedReg->assignedInterval == interval);
- liveRegs |= getRegMask(targetReg, interval->registerType);
+ liveRegs.AddRegNum(targetReg, interval->registerType);
continue;
}
}
@@ -4489,8 +4588,8 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock)
// likely to match other assignments this way.
targetReg = interval->physReg;
interval->isActive = true;
- liveRegs |= getRegMask(targetReg, interval->registerType);
- INDEBUG(inactiveRegs |= genRegMask(targetReg));
+ liveRegs.AddRegNum(targetReg, interval->registerType);
+ INDEBUG(inactiveRegs |= targetReg);
setVarReg(inVarToRegMap, varIndex, targetReg);
}
else
@@ -4501,7 +4600,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock)
if (targetReg != REG_STK)
{
RegRecord* targetRegRecord = getRegisterRecord(targetReg);
- liveRegs |= getRegMask(targetReg, interval->registerType);
+ liveRegs.AddRegNum(targetReg, interval->registerType);
if (!allocationPassComplete)
{
updateNextIntervalRef(targetReg, interval);
@@ -4538,7 +4637,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock)
RegRecord* anotherHalfRegRec = findAnotherHalfRegRec(targetRegRecord);
// Use TYP_FLOAT to get the regmask of just the half reg.
- liveRegs &= ~getRegMask(anotherHalfRegRec->regNum, TYP_FLOAT);
+ liveRegs.RemoveRegNum(anotherHalfRegRec->regNum, TYP_FLOAT);
}
#endif // TARGET_ARM
@@ -4565,7 +4664,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock)
for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg))
{
RegRecord* physRegRecord = getRegisterRecord(reg);
- if ((liveRegs & genRegMask(reg)) == 0)
+ if (!liveRegs.IsRegNumInMask(reg))
{
makeRegAvailable(reg, physRegRecord->registerType);
Interval* assignedInterval = physRegRecord->assignedInterval;
@@ -4622,43 +4721,17 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock)
}
}
#else
- regMaskTP deadCandidates = ~liveRegs;
+ AllRegsMask deadCandidates = ~liveRegs;
// Only focus on actual registers present
deadCandidates &= actualRegistersMask;
- while (deadCandidates != RBM_NONE)
- {
- regNumber reg = genFirstRegNumFromMaskAndToggle(deadCandidates);
- RegRecord* physRegRecord = getRegisterRecord(reg);
-
- makeRegAvailable(reg, physRegRecord->registerType);
- Interval* assignedInterval = physRegRecord->assignedInterval;
-
- if (assignedInterval != nullptr)
- {
- assert(assignedInterval->isLocalVar || assignedInterval->isConstant || assignedInterval->IsUpperVector());
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ updateDeadCandidatesAtBlockStart(deadCandidates, inVarToRegMap);
+#else
+ updateDeadCandidatesAtBlockStart(deadCandidates.GetAllRegistersMask(), inVarToRegMap);
+#endif // HAS_MORE_THAN_64_REGISTERS
- if (!assignedInterval->isConstant && assignedInterval->assignedReg == physRegRecord)
- {
- assignedInterval->isActive = false;
- if (assignedInterval->getNextRefPosition() == nullptr)
- {
- unassignPhysReg(physRegRecord, nullptr);
- }
- if (!assignedInterval->IsUpperVector())
- {
- inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK;
- }
- }
- else
- {
- // This interval may still be active, but was in another register in an
- // intervening block.
- clearAssignedInterval(physRegRecord ARM_ARG(assignedInterval->registerType));
- }
- }
- }
#endif // TARGET_ARM
}
@@ -4754,6 +4827,25 @@ void LinearScan::makeRegisterInactive(RegRecord* physRegRecord)
}
}
+#ifdef HAS_MORE_THAN_64_REGISTERS
+void LinearScan::inActivateRegisters(REF_AllRegsMask inactiveMask)
+#else
+void LinearScan::inActivateRegisters(RegBitSet64 inactiveMask)
+#endif // HAS_MORE_THAN_64_REGISTERS
+{
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ while (!inactiveMask.IsEmpty())
+#else
+ while (inactiveMask != RBM_NONE)
+#endif // HAS_MORE_THAN_64_REGISTERS
+ {
+ regNumber nextReg = genFirstRegNumFromMaskAndToggle(inactiveMask);
+ RegRecord* regRecord = getRegisterRecord(nextReg);
+ clearSpillCost(regRecord->regNum, regRecord->registerType);
+ makeRegisterInactive(regRecord);
+ }
+}
+
//------------------------------------------------------------------------
// LinearScan::freeRegister: Make a register available for use
//
@@ -4814,25 +4906,42 @@ void LinearScan::freeRegister(RegRecord* physRegRecord)
// Arguments:
// regsToFree - the mask of registers to free
//
-void LinearScan::freeRegisters(regMaskTP regsToFree)
+void LinearScan::freeRegisters(REF_AllRegsMask regsToFree)
{
- if (regsToFree == RBM_NONE)
+ if (regsToFree.IsEmpty())
{
return;
}
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FREE_REGS));
makeRegsAvailable(regsToFree);
- while (regsToFree != RBM_NONE)
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ freeRegisterMask(regsToFree);
+#else
+ freeRegisterMask(regsToFree.GetAllRegistersMask());
+#endif // HAS_MORE_THAN_64_REGISTERS
+}
+
+#ifdef HAS_MORE_THAN_64_REGISTERS
+void LinearScan::freeRegisterMask(REF_AllRegsMask freeMask)
+#else
+void LinearScan::freeRegisterMask(RegBitSet64 freeMask)
+#endif // HAS_MORE_THAN_64_REGISTERS
+{
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ while (!freeMask.IsEmpty())
+#else
+ while (freeMask != RBM_NONE)
+#endif // HAS_MORE_THAN_64_REGISTERS
{
- regNumber nextReg = genFirstRegNumFromMaskAndToggle(regsToFree);
+ regNumber nextReg = genFirstRegNumFromMaskAndToggle(freeMask);
RegRecord* regRecord = getRegisterRecord(nextReg);
#ifdef TARGET_ARM
if (regRecord->assignedInterval != nullptr && (regRecord->assignedInterval->registerType == TYP_DOUBLE))
{
assert(genIsValidDoubleReg(nextReg));
- regsToFree ^= genRegMask(regNumber(nextReg + 1));
+ freeMask ^= genRegMask(regNumber(nextReg + 1));
}
#endif
freeRegister(regRecord);
@@ -4879,7 +4988,7 @@ void LinearScan::allocateRegistersMinimal()
"--------------------\n");
// Start with a small set of commonly used registers, so that we don't keep having to print a new title.
// Include all the arg regs, as they may already have values assigned to them.
- registersToDump = LsraLimitSmallIntSet | LsraLimitSmallFPSet | RBM_ARG_REGS;
+ registersToDump = AllRegsMask(LsraLimitSmallIntSet | RBM_ARG_REGS, LsraLimitSmallFPSet, RBM_NONE);
dumpRegRecordHeader();
// Now print an empty "RefPosition", since we complete the dump of the regs at the beginning of the loop.
printf(indentFormat, "");
@@ -4888,14 +4997,14 @@ void LinearScan::allocateRegistersMinimal()
BasicBlock* currentBlock = nullptr;
- LsraLocation prevLocation = MinLocation;
- regMaskTP regsToFree = RBM_NONE;
- regMaskTP delayRegsToFree = RBM_NONE;
- regMaskTP regsToMakeInactive = RBM_NONE;
- regMaskTP delayRegsToMakeInactive = RBM_NONE;
- regMaskTP copyRegsToFree = RBM_NONE;
- regsInUseThisLocation = RBM_NONE;
- regsInUseNextLocation = RBM_NONE;
+ LsraLocation prevLocation = MinLocation;
+ AllRegsMask regsToFree;
+ AllRegsMask delayRegsToFree;
+ AllRegsMask regsToMakeInactive;
+ AllRegsMask delayRegsToMakeInactive;
+ AllRegsMask copyRegsToFree;
+ regsInUseThisLocation.Clear();
+ regsInUseNextLocation.Clear();
// This is the most recent RefPosition for which a register was allocated
// - currently only used for DEBUG but maintained in non-debug, for clarity of code
@@ -4911,22 +5020,20 @@ void LinearScan::allocateRegistersMinimal()
// TODO: Can we combine this with the freeing of registers below? It might
// mess with the dump, since this was previously being done before the call below
// to dumpRegRecords.
- regMaskTP tempRegsToMakeInactive = (regsToMakeInactive | delayRegsToMakeInactive);
- while (tempRegsToMakeInactive != RBM_NONE)
- {
- regNumber nextReg = genFirstRegNumFromMaskAndToggle(tempRegsToMakeInactive);
- RegRecord* regRecord = getRegisterRecord(nextReg);
- clearSpillCost(regRecord->regNum, regRecord->registerType);
- makeRegisterInactive(regRecord);
- }
+ AllRegsMask tempRegsToMakeInactive = (regsToMakeInactive | delayRegsToMakeInactive);
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ inActivateRegisters(tempRegsToMakeInactive);
+#else
+ inActivateRegisters(tempRegsToMakeInactive.GetAllRegistersMask());
+#endif // HAS_MORE_THAN_64_REGISTERS
+
if (currentRefPosition.nodeLocation > prevLocation)
{
makeRegsAvailable(regsToMakeInactive);
// TODO: Clean this up. We need to make the delayRegs inactive as well, but don't want
// to mark them as free yet.
- regsToMakeInactive |= delayRegsToMakeInactive;
- regsToMakeInactive = delayRegsToMakeInactive;
- delayRegsToMakeInactive = RBM_NONE;
+ regsToMakeInactive = delayRegsToMakeInactive;
+ delayRegsToMakeInactive.Clear();
}
#ifdef DEBUG
@@ -4983,24 +5090,24 @@ void LinearScan::allocateRegistersMinimal()
{
// CopyRegs are simply made available - we don't want to make the associated interval inactive.
makeRegsAvailable(copyRegsToFree);
- copyRegsToFree = RBM_NONE;
+ copyRegsToFree.Clear();
regsInUseThisLocation = regsInUseNextLocation;
- regsInUseNextLocation = RBM_NONE;
- if ((regsToFree | delayRegsToFree) != RBM_NONE)
+ regsInUseNextLocation.Clear();
+ if (!((regsToFree | delayRegsToFree).IsEmpty()))
{
freeRegisters(regsToFree);
- if ((currentLocation > (prevLocation + 1)) && (delayRegsToFree != RBM_NONE))
+ if ((currentLocation > (prevLocation + 1)) && (!delayRegsToFree.IsEmpty()))
{
// We should never see a delayReg that is delayed until a Location that has no RefPosition
// (that would be the RefPosition that it was supposed to interfere with).
assert(!"Found a delayRegFree associated with Location with no reference");
// However, to be cautious for the Release build case, we will free them.
freeRegisters(delayRegsToFree);
- delayRegsToFree = RBM_NONE;
- regsInUseThisLocation = RBM_NONE;
+ delayRegsToFree.Clear();
+ regsInUseThisLocation.Clear();
}
- regsToFree = delayRegsToFree;
- delayRegsToFree = RBM_NONE;
+ regsToFree = delayRegsToFree;
+ delayRegsToFree.Clear();
#ifdef DEBUG
verifyFreeRegisters(regsToFree);
@@ -5042,11 +5149,11 @@ void LinearScan::allocateRegistersMinimal()
{
// Free any delayed regs (now in regsToFree) before processing the block boundary
freeRegisters(regsToFree);
- regsToFree = RBM_NONE;
- regsInUseThisLocation = RBM_NONE;
- regsInUseNextLocation = RBM_NONE;
- handledBlockEnd = true;
- curBBStartLocation = currentRefPosition.nodeLocation;
+ regsToFree.Clear();
+ regsInUseThisLocation.Clear();
+ regsInUseNextLocation.Clear();
+ handledBlockEnd = true;
+ curBBStartLocation = currentRefPosition.nodeLocation;
if (currentBlock == nullptr)
{
currentBlock = startBlockSequence();
@@ -5100,13 +5207,14 @@ void LinearScan::allocateRegistersMinimal()
}
#endif // TARGET_ARM
}
- regsInUseThisLocation |= currentRefPosition.registerAssignment;
+ regsInUseThisLocation.AddRegMaskForType(currentRefPosition.registerAssignment, regRecord->registerType);
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition.assignedReg()));
#ifdef SWIFT_SUPPORT
if (currentRefPosition.delayRegFree)
{
- regsInUseNextLocation |= currentRefPosition.registerAssignment;
+ regsInUseNextLocation.AddRegMaskForType(currentRefPosition.registerAssignment,
+ regRecord->registerType);
}
#endif // SWIFT_SUPPORT
}
@@ -5168,8 +5276,8 @@ void LinearScan::allocateRegistersMinimal()
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, currentInterval, assignedRegister));
}
- regMaskTP assignedRegBit = RBM_NONE;
- bool isInRegister = false;
+ singleRegMask assignedRegBit = RBM_NONE;
+ bool isInRegister = false;
if (assignedRegister != REG_NA)
{
isInRegister = true;
@@ -5233,9 +5341,9 @@ void LinearScan::allocateRegistersMinimal()
{
regNumber copyReg = assignCopyRegMinimal(¤tRefPosition);
- lastAllocatedRefPosition = ¤tRefPosition;
- regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType);
- regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType);
+ lastAllocatedRefPosition = ¤tRefPosition;
+ regMaskOnlyOne copyRegMask = getRegMask(copyReg, currentInterval->registerType);
+ regMaskOnlyOne assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType);
// For consecutive register, although it shouldn't matter what the assigned register was,
// because we have just assigned it `copyReg` and that's the one in-use, and not the
@@ -5243,10 +5351,12 @@ void LinearScan::allocateRegistersMinimal()
// happened to be restored in assignedReg, we would need assignedReg to stay alive because
// we will copy the entire vector value from it to the `copyReg`.
updateRegsFreeBusyState(currentRefPosition, assignedRegMask | copyRegMask, ®sToFree,
- &delayRegsToFree DEBUG_ARG(currentInterval) DEBUG_ARG(assignedRegister));
+ &delayRegsToFree,
+ currentInterval->registerType DEBUG_ARG(currentInterval)
+ DEBUG_ARG(assignedRegister));
if (!currentRefPosition.lastUse)
{
- copyRegsToFree |= copyRegMask;
+ copyRegsToFree.AddRegNum(copyReg, currentInterval->registerType);
}
// For tree temp (non-localVar) interval, we will need an explicit move.
@@ -5261,7 +5371,7 @@ void LinearScan::allocateRegistersMinimal()
else
{
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NEEDS_NEW_REG, nullptr, assignedRegister));
- regsToFree |= getRegMask(assignedRegister, currentInterval->registerType);
+ regsToFree.AddRegNum(assignedRegister, currentInterval->registerType);
// We want a new register, but we don't want this to be considered a spill.
assignedRegister = REG_NA;
if (physRegRecord->assignedInterval == currentInterval)
@@ -5358,17 +5468,19 @@ void LinearScan::allocateRegistersMinimal()
// If we allocated a register, record it
if (assignedRegister != REG_NA)
{
- assignedRegBit = genRegMask(assignedRegister);
- regMaskTP regMask = getRegMask(assignedRegister, currentInterval->registerType);
- regsInUseThisLocation |= regMask;
+ assignedRegBit = genRegMask(assignedRegister);
+ AllRegsMask assignedRegMask;
+ assignedRegMask.AddRegNum(assignedRegister, currentInterval->registerType);
+
+ regsInUseThisLocation |= assignedRegMask;
if (currentRefPosition.delayRegFree)
{
- regsInUseNextLocation |= regMask;
+ regsInUseNextLocation |= assignedRegMask;
}
currentRefPosition.registerAssignment = assignedRegBit;
currentInterval->physReg = assignedRegister;
- regsToFree &= ~regMask; // we'll set it again later if it's dead
+ regsToFree &= ~assignedRegMask; // we'll set it again later if it's dead
// If this interval is dead, free the register.
// The interval could be dead if this is a user variable, or if the
@@ -5389,11 +5501,11 @@ void LinearScan::allocateRegistersMinimal()
{
if (currentRefPosition.delayRegFree)
{
- delayRegsToMakeInactive |= regMask;
+ delayRegsToMakeInactive |= assignedRegMask;
}
else
{
- regsToMakeInactive |= regMask;
+ regsToMakeInactive |= assignedRegMask;
}
// TODO-Cleanup: this makes things consistent with previous, and will enable preferences
// to be propagated, but it seems less than ideal.
@@ -5412,13 +5524,13 @@ void LinearScan::allocateRegistersMinimal()
{
if (currentRefPosition.delayRegFree)
{
- delayRegsToFree |= regMask;
+ delayRegsToFree |= assignedRegMask;
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED));
}
else
{
- regsToFree |= regMask;
+ regsToFree |= assignedRegMask;
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE));
}
@@ -5546,7 +5658,7 @@ void LinearScan::allocateRegisters()
updateNextIntervalRef(reg, interval);
updateSpillCost(reg, interval);
setRegInUse(reg, interval->registerType);
- INDEBUG(registersToDump |= getRegMask(reg, interval->registerType));
+ INDEBUG(registersToDump.AddRegNum(reg, interval->registerType));
}
}
else
@@ -5566,7 +5678,7 @@ void LinearScan::allocateRegisters()
"--------------------\n");
// Start with a small set of commonly used registers, so that we don't keep having to print a new title.
// Include all the arg regs, as they may already have values assigned to them.
- registersToDump = LsraLimitSmallIntSet | LsraLimitSmallFPSet | RBM_ARG_REGS;
+ registersToDump = AllRegsMask(LsraLimitSmallIntSet | RBM_ARG_REGS, LsraLimitSmallFPSet, RBM_NONE);
dumpRegRecordHeader();
// Now print an empty "RefPosition", since we complete the dump of the regs at the beginning of the loop.
printf(indentFormat, "");
@@ -5575,14 +5687,14 @@ void LinearScan::allocateRegisters()
BasicBlock* currentBlock = nullptr;
- LsraLocation prevLocation = MinLocation;
- regMaskTP regsToFree = RBM_NONE;
- regMaskTP delayRegsToFree = RBM_NONE;
- regMaskTP regsToMakeInactive = RBM_NONE;
- regMaskTP delayRegsToMakeInactive = RBM_NONE;
- regMaskTP copyRegsToFree = RBM_NONE;
- regsInUseThisLocation = RBM_NONE;
- regsInUseNextLocation = RBM_NONE;
+ LsraLocation prevLocation = MinLocation;
+ AllRegsMask regsToFree;
+ AllRegsMask delayRegsToFree;
+ AllRegsMask regsToMakeInactive;
+ AllRegsMask delayRegsToMakeInactive;
+ AllRegsMask copyRegsToFree;
+ regsInUseThisLocation.Clear();
+ regsInUseNextLocation.Clear();
// This is the most recent RefPosition for which a register was allocated
// - currently only used for DEBUG but maintained in non-debug, for clarity of code
@@ -5598,22 +5710,20 @@ void LinearScan::allocateRegisters()
// TODO: Can we combine this with the freeing of registers below? It might
// mess with the dump, since this was previously being done before the call below
// to dumpRegRecords.
- regMaskTP tempRegsToMakeInactive = (regsToMakeInactive | delayRegsToMakeInactive);
- while (tempRegsToMakeInactive != RBM_NONE)
- {
- regNumber nextReg = genFirstRegNumFromMaskAndToggle(tempRegsToMakeInactive);
- RegRecord* regRecord = getRegisterRecord(nextReg);
- clearSpillCost(regRecord->regNum, regRecord->registerType);
- makeRegisterInactive(regRecord);
- }
+ AllRegsMask tempRegsToMakeInactive = (regsToMakeInactive | delayRegsToMakeInactive);
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ inActivateRegisters(tempRegsToMakeInactive);
+#else
+ inActivateRegisters(tempRegsToMakeInactive.GetAllRegistersMask());
+#endif // HAS_MORE_THAN_64_REGISTERS
+
if (currentRefPosition.nodeLocation > prevLocation)
{
makeRegsAvailable(regsToMakeInactive);
// TODO: Clean this up. We need to make the delayRegs inactive as well, but don't want
// to mark them as free yet.
- regsToMakeInactive |= delayRegsToMakeInactive;
- regsToMakeInactive = delayRegsToMakeInactive;
- delayRegsToMakeInactive = RBM_NONE;
+ regsToMakeInactive = delayRegsToMakeInactive;
+ delayRegsToMakeInactive.Clear();
}
#ifdef DEBUG
@@ -5668,30 +5778,30 @@ void LinearScan::allocateRegisters()
{
// CopyRegs are simply made available - we don't want to make the associated interval inactive.
makeRegsAvailable(copyRegsToFree);
- copyRegsToFree = RBM_NONE;
+ copyRegsToFree.Clear();
regsInUseThisLocation = regsInUseNextLocation;
- regsInUseNextLocation = RBM_NONE;
+ regsInUseNextLocation.Clear();
#ifdef TARGET_ARM64
if (hasConsecutiveRegister)
{
consecutiveRegsInUseThisLocation = RBM_NONE;
}
#endif
- if ((regsToFree | delayRegsToFree) != RBM_NONE)
+ if (!((regsToFree | delayRegsToFree).IsEmpty()))
{
freeRegisters(regsToFree);
- if ((currentLocation > (prevLocation + 1)) && (delayRegsToFree != RBM_NONE))
+ if ((currentLocation > (prevLocation + 1)) && !delayRegsToFree.IsEmpty())
{
// We should never see a delayReg that is delayed until a Location that has no RefPosition
// (that would be the RefPosition that it was supposed to interfere with).
assert(!"Found a delayRegFree associated with Location with no reference");
// However, to be cautious for the Release build case, we will free them.
freeRegisters(delayRegsToFree);
- delayRegsToFree = RBM_NONE;
- regsInUseThisLocation = RBM_NONE;
+ delayRegsToFree.Clear();
+ regsInUseThisLocation.Clear();
}
- regsToFree = delayRegsToFree;
- delayRegsToFree = RBM_NONE;
+ regsToFree = delayRegsToFree;
+ delayRegsToFree.Clear();
#ifdef DEBUG
verifyFreeRegisters(regsToFree);
#endif
@@ -5750,11 +5860,11 @@ void LinearScan::allocateRegisters()
{
// Free any delayed regs (now in regsToFree) before processing the block boundary
freeRegisters(regsToFree);
- regsToFree = RBM_NONE;
- regsInUseThisLocation = RBM_NONE;
- regsInUseNextLocation = RBM_NONE;
- handledBlockEnd = true;
- curBBStartLocation = currentRefPosition.nodeLocation;
+ regsToFree.Clear();
+ regsInUseThisLocation.Clear();
+ regsInUseNextLocation.Clear();
+ handledBlockEnd = true;
+ curBBStartLocation = currentRefPosition.nodeLocation;
if (currentBlock == nullptr)
{
currentBlock = startBlockSequence();
@@ -5815,13 +5925,14 @@ void LinearScan::allocateRegisters()
}
#endif // TARGET_ARM
}
- regsInUseThisLocation |= currentRefPosition.registerAssignment;
+ regsInUseThisLocation.AddRegMaskForType(currentRefPosition.registerAssignment, regRecord->registerType);
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition.assignedReg()));
#ifdef SWIFT_SUPPORT
if (currentRefPosition.delayRegFree)
{
- regsInUseNextLocation |= currentRefPosition.registerAssignment;
+ regsInUseNextLocation.AddRegMaskForType(currentRefPosition.registerAssignment,
+ regRecord->registerType);
}
#endif // SWIFT_SUPPORT
}
@@ -5969,7 +6080,7 @@ void LinearScan::allocateRegisters()
updateSpillCost(assignedRegister, currentInterval);
}
- regsToFree |= getRegMask(assignedRegister, currentInterval->registerType);
+ regsToFree.AddRegNum(assignedRegister, currentInterval->registerType);
}
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, nullptr, assignedRegister));
currentRefPosition.registerAssignment = RBM_NONE;
@@ -6105,8 +6216,8 @@ void LinearScan::allocateRegisters()
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, currentInterval, assignedRegister));
}
- regMaskTP assignedRegBit = RBM_NONE;
- bool isInRegister = false;
+ singleRegMask assignedRegBit = RBM_NONE;
+ bool isInRegister = false;
if (assignedRegister != REG_NA)
{
isInRegister = true;
@@ -6152,9 +6263,9 @@ void LinearScan::allocateRegisters()
// it might be beneficial to keep it in this reg for PART of the lifetime
if (currentInterval->isLocalVar)
{
- regMaskTP preferences = currentInterval->registerPreferences;
- bool keepAssignment = true;
- bool matchesPreferences = (preferences & genRegMask(assignedRegister)) != RBM_NONE;
+ regMaskOnlyOne preferences = currentInterval->registerPreferences;
+ bool keepAssignment = true;
+ bool matchesPreferences = (preferences & genRegMask(assignedRegister)) != RBM_NONE;
// Will the assigned register cover the lifetime? If not, does it at least
// meet the preferences for the next RefPosition?
@@ -6260,9 +6371,10 @@ void LinearScan::allocateRegisters()
if (copyReg != assignedRegister)
{
- lastAllocatedRefPosition = ¤tRefPosition;
- regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType);
- regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType);
+ lastAllocatedRefPosition = ¤tRefPosition;
+ regMaskOnlyOne copyRegMask = getRegMask(copyReg, currentInterval->registerType);
+ regMaskOnlyOne assignedRegMask =
+ getRegMask(assignedRegister, currentInterval->registerType);
if ((consecutiveRegsInUseThisLocation & assignedRegMask) != RBM_NONE)
{
@@ -6281,11 +6393,12 @@ void LinearScan::allocateRegisters()
// we will copy the entire vector value from it to the `copyReg`.
updateRegsFreeBusyState(currentRefPosition, assignedRegMask | copyRegMask, ®sToFree,
- &delayRegsToFree DEBUG_ARG(currentInterval)
+ &delayRegsToFree,
+ currentInterval->registerType DEBUG_ARG(currentInterval)
DEBUG_ARG(assignedRegister));
if (!currentRefPosition.lastUse)
{
- copyRegsToFree |= copyRegMask;
+ copyRegsToFree.AddRegNum(copyReg, currentInterval->registerType);
}
// If this is a tree temp (non-localVar) interval, we will need an explicit move.
@@ -6358,9 +6471,9 @@ void LinearScan::allocateRegisters()
copyReg = assignCopyReg(¤tRefPosition);
}
- lastAllocatedRefPosition = ¤tRefPosition;
- regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType);
- regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType);
+ lastAllocatedRefPosition = ¤tRefPosition;
+ regMaskOnlyOne copyRegMask = getRegMask(copyReg, currentInterval->registerType);
+ regMaskOnlyOne assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType);
#ifdef TARGET_ARM64
if (hasConsecutiveRegister && currentRefPosition.needsConsecutive)
@@ -6390,10 +6503,12 @@ void LinearScan::allocateRegisters()
// happened to be restored in assignedReg, we would need assignedReg to stay alive because
// we will copy the entire vector value from it to the `copyReg`.
updateRegsFreeBusyState(currentRefPosition, assignedRegMask | copyRegMask, ®sToFree,
- &delayRegsToFree DEBUG_ARG(currentInterval) DEBUG_ARG(assignedRegister));
+ &delayRegsToFree,
+ currentInterval->registerType DEBUG_ARG(currentInterval)
+ DEBUG_ARG(assignedRegister));
if (!currentRefPosition.lastUse)
{
- copyRegsToFree |= copyRegMask;
+ copyRegsToFree.AddRegNum(copyReg, currentInterval->registerType);
}
// If this is a tree temp (non-localVar) interval, we will need an explicit move.
@@ -6414,7 +6529,7 @@ void LinearScan::allocateRegisters()
else
{
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NEEDS_NEW_REG, nullptr, assignedRegister));
- regsToFree |= getRegMask(assignedRegister, currentInterval->registerType);
+ regsToFree.AddRegNum(assignedRegister, currentInterval->registerType);
// We want a new register, but we don't want this to be considered a spill.
assignedRegister = REG_NA;
if (physRegRecord->assignedInterval == currentInterval)
@@ -6581,17 +6696,19 @@ void LinearScan::allocateRegisters()
// If we allocated a register, record it
if (assignedRegister != REG_NA)
{
- assignedRegBit = genRegMask(assignedRegister);
- regMaskTP regMask = getRegMask(assignedRegister, currentInterval->registerType);
- regsInUseThisLocation |= regMask;
+ assignedRegBit = genRegMask(assignedRegister);
+ AllRegsMask assignedRegMask;
+ assignedRegMask.AddRegNum(assignedRegister, currentInterval->registerType);
+
+ regsInUseThisLocation |= assignedRegMask;
if (currentRefPosition.delayRegFree)
{
- regsInUseNextLocation |= regMask;
+ regsInUseNextLocation |= assignedRegMask;
}
currentRefPosition.registerAssignment = assignedRegBit;
currentInterval->physReg = assignedRegister;
- regsToFree &= ~regMask; // we'll set it again later if it's dead
+ regsToFree &= ~assignedRegMask; // we'll set it again later if it's dead
// If this interval is dead, free the register.
// The interval could be dead if this is a user variable, or if the
@@ -6628,11 +6745,11 @@ void LinearScan::allocateRegisters()
{
if (currentRefPosition.delayRegFree)
{
- delayRegsToMakeInactive |= regMask;
+ delayRegsToMakeInactive |= assignedRegMask;
}
else
{
- regsToMakeInactive |= regMask;
+ regsToMakeInactive |= assignedRegMask;
}
// TODO-Cleanup: this makes things consistent with previous, and will enable preferences
// to be propagated, but it seems less than ideal.
@@ -6651,13 +6768,13 @@ void LinearScan::allocateRegisters()
{
if (currentRefPosition.delayRegFree)
{
- delayRegsToFree |= regMask;
+ delayRegsToFree |= assignedRegMask;
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED));
}
else
{
- regsToFree |= regMask;
+ regsToFree |= assignedRegMask;
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE));
}
@@ -8020,7 +8137,8 @@ void LinearScan::resolveRegisters()
{
// If the localVar is in a register, it must be in a register that is not trashed by
// the current node (otherwise it would have already been spilled).
- assert((genRegMask(localVarInterval->physReg) & getKillSetForNode(treeNode)) == RBM_NONE);
+ assert((genRegMask(localVarInterval->physReg) &
+ getKillSetForNode(treeNode).GetRegMaskForType(interval->registerType)) == RBM_NONE);
// If we have allocated a register to spill it to, we will use that; otherwise, we will
// spill it to the stack. We can use as a temp register any non-arg caller-save register.
currentRefPosition->referent->recentRefPosition = currentRefPosition;
@@ -8276,10 +8394,11 @@ void LinearScan::resolveRegisters()
if (varDsc->lvIsParam)
{
- regMaskTP initialRegMask = interval->firstRefPosition->registerAssignment;
- regNumber initialReg = (initialRegMask == RBM_NONE || interval->firstRefPosition->spillAfter)
- ? REG_STK
- : genRegNumFromMask(initialRegMask);
+ regMaskOnlyOne initialRegMask = interval->firstRefPosition->registerAssignment;
+ regNumber initialReg =
+ (initialRegMask == RBM_NONE || interval->firstRefPosition->spillAfter)
+ ? REG_STK
+ : genRegNumFromMask(initialRegMask MORE_THAN_64_REG_ARG(interval->registerType));
#ifdef TARGET_ARM
if (varTypeIsMultiReg(varDsc))
@@ -8365,7 +8484,7 @@ void LinearScan::resolveRegisters()
varDsc->lvOnFrame = false;
}
#ifdef DEBUG
- regMaskTP registerAssignment = genRegMask(varDsc->GetRegNum());
+ singleRegMask registerAssignment = genRegMask(varDsc->GetRegNum());
assert(!interval->isSpilled && !interval->isSplit);
RefPosition* refPosition = interval->firstRefPosition;
assert(refPosition != nullptr);
@@ -8604,7 +8723,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock,
BasicBlock* toBlock,
var_types type,
VARSET_VALARG_TP sharedCriticalLiveSet,
- regMaskTP terminatorConsumedRegs)
+ regMaskOnlyOne terminatorConsumedRegs)
{
// TODO-Throughput: This would be much more efficient if we add RegToVarMaps instead of VarToRegMaps
// and they would be more space-efficient as well.
@@ -8612,7 +8731,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock,
VarToRegMap toVarToRegMap = toBlock == nullptr ? nullptr : getInVarToRegMap(toBlock->bbNum);
#ifdef TARGET_ARM
- regMaskTP freeRegs;
+ regMaskOnlyOne freeRegs;
if (type == TYP_DOUBLE)
{
// We have to consider all float registers for TYP_DOUBLE
@@ -8623,7 +8742,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock,
freeRegs = allRegs(type);
}
#else // !TARGET_ARM
- regMaskTP freeRegs = allRegs(type);
+ regMaskOnlyOne freeRegs = allRegs(type);
#endif // !TARGET_ARM
#ifdef DEBUG
@@ -8632,7 +8751,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock,
return REG_NA;
}
#endif // DEBUG
- INDEBUG(freeRegs = stressLimitRegs(nullptr, freeRegs));
+ INDEBUG(freeRegs = stressLimitRegs(nullptr, freeRegs, type));
freeRegs &= ~terminatorConsumedRegs;
@@ -8693,13 +8812,33 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock,
}
else
{
+ regMaskOnlyOne calleeTrashMask = RBM_NONE;
+
+ if (varTypeUsesIntReg(type))
+ {
+ calleeTrashMask = RBM_INT_CALLEE_TRASH;
+ assert(compiler->IsGprRegMask(terminatorConsumedRegs));
+ }
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ else if (varTypeUsesMaskReg(type))
+ {
+ calleeTrashMask = RBM_MSK_CALLEE_TRASH;
+ assert(compiler->IsPredicateRegMask(terminatorConsumedRegs));
+ }
+#endif
+ else
+ {
+ assert(varTypeUsesFloatReg(type));
+ calleeTrashMask = RBM_FLT_CALLEE_TRASH;
+ assert(compiler->IsFloatRegMask(terminatorConsumedRegs));
+ }
// Prefer a callee-trashed register if possible to prevent new prolog/epilog saves/restores.
- if ((freeRegs & RBM_CALLEE_TRASH) != 0)
+ if ((freeRegs & calleeTrashMask) != 0)
{
- freeRegs &= RBM_CALLEE_TRASH;
+ freeRegs &= calleeTrashMask;
}
- regNumber tempReg = genRegNumFromMask(genFindLowestBit(freeRegs));
+ regNumber tempReg = genRegNumFromMask(genFindLowestBit(freeRegs) MORE_THAN_64_REG_ARG(type));
return tempReg;
}
}
@@ -8890,7 +9029,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
// available to copy into.
// Note that for this purpose we use the full live-out set, because we must ensure that
// even the registers that remain the same across the edge are preserved correctly.
- regMaskTP liveOutRegs = RBM_NONE;
+ AllRegsMask liveOutRegs;
VarSetOps::Iter liveOutIter(compiler, block->bbLiveOut);
unsigned liveOutVarIndex = 0;
while (liveOutIter.NextElem(&liveOutVarIndex))
@@ -8898,8 +9037,8 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
regNumber fromReg = getVarReg(outVarToRegMap, liveOutVarIndex);
if (fromReg != REG_STK)
{
- regMaskTP fromRegMask = genRegMask(fromReg, getIntervalForLocalVar(liveOutVarIndex)->registerType);
- liveOutRegs |= fromRegMask;
+ var_types varType = getIntervalForLocalVar(liveOutVarIndex)->registerType;
+ liveOutRegs.AddRegNumInMask(fromReg ARM_ARG(varType));
}
}
@@ -8910,7 +9049,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
//
// Note: Only switches and JCMP/JTEST (for Arm4) have input regs (and so can be fed by copies), so those
// are the only block-ending branches that need special handling.
- regMaskTP consumedRegs = RBM_NONE;
+ regMaskGpr consumedRegs = RBM_NONE;
if (block->KindIs(BBJ_SWITCH))
{
// At this point, Lowering has transformed any non-switch-table blocks into
@@ -9004,9 +9143,11 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
}
}
+ assert(compiler->IsGprRegMask(consumedRegs)); // If this fails, then we will have to use AllRegsMask for
+ // consumedRegs
VarToRegMap sameVarToRegMap = sharedCriticalVarToRegMap;
- regMaskTP sameWriteRegs = RBM_NONE;
- regMaskTP diffReadRegs = RBM_NONE;
+ AllRegsMask sameWriteRegs;
+ AllRegsMask diffReadRegs;
// For each var that may require resolution, classify them as:
// - in the same register at the end of this block and at each target (no resolution needed)
@@ -9058,17 +9199,20 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
// We only need to check for these cases if sameToReg is an actual register (not REG_STK).
if (sameToReg != REG_NA && sameToReg != REG_STK)
{
+ var_types outVarRegType = getIntervalForLocalVar(outResolutionSetVarIndex)->registerType;
+
// If there's a path on which this var isn't live, it may use the original value in sameToReg.
// In this case, sameToReg will be in the liveOutRegs of this block.
// Similarly, if sameToReg is in sameWriteRegs, it has already been used (i.e. for a lclVar that's
// live only at another target), and we can't copy another lclVar into that reg in this block.
- regMaskTP sameToRegMask =
- genRegMask(sameToReg, getIntervalForLocalVar(outResolutionSetVarIndex)->registerType);
- if (maybeSameLivePaths &&
- (((sameToRegMask & liveOutRegs) != RBM_NONE) || ((sameToRegMask & sameWriteRegs) != RBM_NONE)))
+ regMaskOnlyOne sameToRegMask = genRegMask(sameToReg, outVarRegType);
+
+ if (maybeSameLivePaths && (liveOutRegs.IsRegNumInMask(sameToReg ARM_ARG(outVarRegType)) ||
+ sameWriteRegs.IsRegNumInMask(sameToReg ARM_ARG(outVarRegType)) != RBM_NONE))
{
sameToReg = REG_NA;
}
+
// If this register is busy because it is used by a switch table at the end of the block
// (or for Arm64, it is consumed by JCMP), we can't do the copy in this block since we can't
// insert it after the switch (or for Arm64, can't insert and overwrite the operand/source
@@ -9107,7 +9251,8 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
VarSetOps::AddElemD(compiler, diffResolutionSet, outResolutionSetVarIndex);
if (fromReg != REG_STK)
{
- diffReadRegs |= genRegMask(fromReg, getIntervalForLocalVar(outResolutionSetVarIndex)->registerType);
+ diffReadRegs.AddRegNumInMask(
+ fromReg ARM_ARG(getIntervalForLocalVar(outResolutionSetVarIndex)->registerType));
}
}
else if (sameToReg != fromReg)
@@ -9116,14 +9261,15 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
setVarReg(sameVarToRegMap, outResolutionSetVarIndex, sameToReg);
if (sameToReg != REG_STK)
{
- sameWriteRegs |= genRegMask(sameToReg, getIntervalForLocalVar(outResolutionSetVarIndex)->registerType);
+ sameWriteRegs.AddRegNumInMask(
+ sameToReg ARM_ARG(getIntervalForLocalVar(outResolutionSetVarIndex)->registerType));
}
}
}
if (!VarSetOps::IsEmpty(compiler, sameResolutionSet))
{
- if ((sameWriteRegs & diffReadRegs) != RBM_NONE)
+ if (!((sameWriteRegs & diffReadRegs).IsEmpty()))
{
// We cannot split the "same" and "diff" regs if the "same" set writes registers
// that must be read by the "diff" set. (Note that when these are done as a "batch"
@@ -9438,8 +9584,10 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
BasicBlock* toBlock,
ResolveType resolveType,
VARSET_VALARG_TP liveSet,
- regMaskTP terminatorConsumedRegs)
+ regMaskGpr terminatorConsumedRegs)
{
+ assert(compiler->IsGprRegMask(terminatorConsumedRegs));
+
VarToRegMap fromVarToRegMap = getOutVarToRegMap(fromBlock->bbNum);
VarToRegMap toVarToRegMap;
if (resolveType == ResolveSharedCritical)
@@ -9494,7 +9642,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
{
#ifdef TARGET_ARM
// Try to reserve a double register for TYP_DOUBLE and use it for TYP_FLOAT too if available.
- tempRegDbl = getTempRegForResolution(fromBlock, toBlock, TYP_DOUBLE, liveSet, terminatorConsumedRegs);
+ tempRegDbl = getTempRegForResolution(fromBlock, toBlock, TYP_DOUBLE, liveSet, RBM_NONE);
if (tempRegDbl != REG_NA)
{
tempRegFlt = tempRegDbl;
@@ -9502,13 +9650,13 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
else
#endif // TARGET_ARM
{
- tempRegFlt = getTempRegForResolution(fromBlock, toBlock, TYP_FLOAT, liveSet, terminatorConsumedRegs);
+ tempRegFlt = getTempRegForResolution(fromBlock, toBlock, TYP_FLOAT, liveSet, RBM_NONE);
}
}
- regMaskTP targetRegsToDo = RBM_NONE;
- regMaskTP targetRegsReady = RBM_NONE;
- regMaskTP targetRegsFromStack = RBM_NONE;
+ AllRegsMask targetRegsToDo;
+ AllRegsMask targetRegsReady;
+ AllRegsMask targetRegsFromStack;
// The following arrays capture the location of the registers as they are moved:
// - location[reg] gives the current location of the var that was originally in 'reg'.
@@ -9615,7 +9763,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
if (fromReg == REG_STK)
{
stackToRegIntervals[toReg] = interval;
- targetRegsFromStack |= genRegMask(toReg);
+ targetRegsFromStack |= toReg;
}
else if (toReg == REG_STK)
{
@@ -9629,19 +9777,17 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
location[fromReg] = (regNumberSmall)fromReg;
source[toReg] = (regNumberSmall)fromReg;
sourceIntervals[fromReg] = interval;
- targetRegsToDo |= genRegMask(toReg);
+ targetRegsToDo |= toReg;
}
}
// REGISTER to REGISTER MOVES
// First, find all the ones that are ready to move now
- regMaskTP targetCandidates = targetRegsToDo;
- while (targetCandidates != RBM_NONE)
+ AllRegsMask targetCandidates = targetRegsToDo;
+ while (!targetCandidates.IsEmpty())
{
- regNumber targetReg = genFirstRegNumFromMask(targetCandidates);
- regMaskTP targetRegMask = genRegMask(targetReg);
- targetCandidates ^= targetRegMask;
+ regNumber targetReg = genFirstRegNumFromMaskAndToggle(targetCandidates);
if (location[targetReg] == REG_NA)
{
#ifdef TARGET_ARM
@@ -9654,26 +9800,24 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
regNumber anotherHalfRegNum = REG_NEXT(targetReg);
if (location[anotherHalfRegNum] == REG_NA)
{
- targetRegsReady |= targetRegMask;
+ targetRegsReady |= targetReg;
}
}
else
#endif // TARGET_ARM
{
- targetRegsReady |= targetRegMask;
+ targetRegsReady |= targetReg;
}
}
}
// Perform reg to reg moves
- while (targetRegsToDo != RBM_NONE)
+ while (!targetRegsToDo.IsEmpty())
{
- while (targetRegsReady != RBM_NONE)
+ while (!targetRegsReady.IsEmpty())
{
- regNumber targetReg = genFirstRegNumFromMask(targetRegsReady);
- regMaskTP targetRegMask = genRegMask(targetReg);
- targetRegsToDo ^= targetRegMask;
- targetRegsReady ^= targetRegMask;
+ regNumber targetReg = genFirstRegNumFromMaskAndToggle(targetRegsReady);
+ targetRegsToDo ^= targetReg;
assert(location[targetReg] != targetReg);
assert(targetReg < REG_COUNT);
regNumber sourceReg = (regNumber)source[targetReg];
@@ -9687,14 +9831,14 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
fromReg DEBUG_ARG(fromBlock) DEBUG_ARG(toBlock) DEBUG_ARG(resolveTypeName[resolveType]));
sourceIntervals[sourceReg] = nullptr;
location[sourceReg] = REG_NA;
- regMaskTP fromRegMask = genRegMask(fromReg);
+ singleRegMask fromRegMask = genRegMask(fromReg);
// Do we have a free targetReg?
if (fromReg == sourceReg)
{
- if (source[fromReg] != REG_NA && ((targetRegsFromStack & fromRegMask) != fromRegMask))
+ if (source[fromReg] != REG_NA && !targetRegsFromStack.IsRegNumInMask(fromReg))
{
- targetRegsReady |= fromRegMask;
+ targetRegsReady |= fromReg;
#ifdef TARGET_ARM
if (genIsValidDoubleReg(fromReg))
{
@@ -9705,7 +9849,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
regNumber upperHalfReg = REG_NEXT(fromReg);
if ((otherInterval->registerType == TYP_DOUBLE) && (location[upperHalfReg] != REG_NA))
{
- targetRegsReady &= ~fromRegMask;
+ targetRegsReady.RemoveRegNumFromMask(fromReg);
}
}
}
@@ -9713,10 +9857,10 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
{
// We may have freed up the other half of a double where the lower half
// was already free.
- regNumber lowerHalfReg = REG_PREV(fromReg);
- regNumber lowerHalfSrcReg = (regNumber)source[lowerHalfReg];
- regNumber lowerHalfSrcLoc = (regNumber)location[lowerHalfReg];
- regMaskTP lowerHalfRegMask = genRegMask(lowerHalfReg);
+ regNumber lowerHalfReg = REG_PREV(fromReg);
+ regNumber lowerHalfSrcReg = (regNumber)source[lowerHalfReg];
+ regNumber lowerHalfSrcLoc = (regNumber)location[lowerHalfReg];
+ singleRegMask lowerHalfRegMask = genRegMask(lowerHalfReg);
// Necessary conditions:
// - There is a source register for this reg (lowerHalfSrcReg != REG_NA)
// - It is currently free (lowerHalfSrcLoc == REG_NA)
@@ -9727,22 +9871,21 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
// lowerHalfRegMask)
if ((lowerHalfSrcReg != REG_NA) && (lowerHalfSrcLoc == REG_NA) &&
(sourceIntervals[lowerHalfSrcReg] != nullptr) &&
- ((targetRegsReady & lowerHalfRegMask) == RBM_NONE) &&
- ((targetRegsFromStack & lowerHalfRegMask) != lowerHalfRegMask))
+ !targetRegsReady.IsRegNumInMask(lowerHalfReg) &&
+ !targetRegsFromStack.IsRegNumInMask(lowerHalfReg))
{
// This must be a double interval, otherwise it would be in targetRegsReady, or already
// completed.
assert(sourceIntervals[lowerHalfSrcReg]->registerType == TYP_DOUBLE);
- targetRegsReady |= lowerHalfRegMask;
+ targetRegsReady |= lowerHalfReg;
}
#endif // TARGET_ARM
}
}
}
- if (targetRegsToDo != RBM_NONE)
+ if (!targetRegsToDo.IsEmpty())
{
- regNumber targetReg = genFirstRegNumFromMask(targetRegsToDo);
- regMaskTP targetRegMask = genRegMask(targetReg);
+ regNumber targetReg = genFirstRegNumFromMask(targetRegsToDo);
// Is it already there due to other moves?
// If not, move it to the temp reg, OR swap it with another register
@@ -9750,7 +9893,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
regNumber fromReg = (regNumber)location[sourceReg];
if (targetReg == fromReg)
{
- targetRegsToDo &= ~targetRegMask;
+ targetRegsToDo.RemoveRegNumFromMask(targetReg);
}
else
{
@@ -9793,16 +9936,15 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
// Otherwise, we'll spill it to the stack and reload it later.
if (useSwap)
{
- regMaskTP fromRegMask = genRegMask(fromReg);
- targetRegsToDo &= ~fromRegMask;
+ targetRegsToDo.RemoveRegNumFromMask(fromReg);
}
}
else
{
// Look at the remaining registers from targetRegsToDo (which we expect to be relatively
// small at this point) to find out what's currently in targetReg.
- regMaskTP mask = targetRegsToDo;
- while (mask != RBM_NONE && otherTargetReg == REG_NA)
+ AllRegsMask mask = targetRegsToDo;
+ while (!mask.IsEmpty() && otherTargetReg == REG_NA)
{
regNumber nextReg = genFirstRegNumFromMaskAndToggle(mask);
if (location[source[nextReg]] == targetReg)
@@ -9838,10 +9980,9 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
DEBUG_ARG(resolveTypeName[resolveType]));
location[source[otherTargetReg]] = REG_STK;
- regMaskTP otherTargetRegMask = genRegMask(otherTargetReg);
- targetRegsFromStack |= otherTargetRegMask;
+ targetRegsFromStack |= otherTargetReg;
stackToRegIntervals[otherTargetReg] = otherInterval;
- targetRegsToDo &= ~otherTargetRegMask;
+ targetRegsToDo.RemoveRegNumFromMask(otherTargetReg);
// Now, move the interval that is going to targetReg.
addResolution(block, insertionPoint, sourceIntervals[sourceReg], targetReg,
@@ -9855,8 +9996,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
// value will be retrieved from STK.
if (source[fromReg] != REG_NA && fromReg != otherTargetReg)
{
- regMaskTP fromRegMask = genRegMask(fromReg);
- targetRegsReady |= fromRegMask;
+ targetRegsReady |= fromReg;
#ifdef TARGET_ARM
if (genIsValidDoubleReg(fromReg))
{
@@ -9867,17 +10007,17 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
regNumber upperHalfReg = REG_NEXT(fromReg);
if ((otherInterval->registerType == TYP_DOUBLE) && (location[upperHalfReg] != REG_NA))
{
- targetRegsReady &= ~fromRegMask;
+ targetRegsReady.RemoveRegNumFromMask(fromReg);
}
}
#endif // TARGET_ARM
}
}
- targetRegsToDo &= ~targetRegMask;
+ targetRegsToDo.RemoveRegNumFromMask(targetReg);
}
else
{
- compiler->codeGen->regSet.rsSetRegsModified(genRegMask(tempReg) DEBUGARG(true));
+ compiler->codeGen->regSet.rsSetRegModified(tempReg DEBUGARG(true));
#ifdef TARGET_ARM
if (sourceIntervals[fromReg]->registerType == TYP_DOUBLE)
{
@@ -9897,7 +10037,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
DEBUG_ARG(resolveTypeName[resolveType]));
location[targetReg] = (regNumberSmall)tempReg;
}
- targetRegsReady |= targetRegMask;
+ targetRegsReady |= targetReg;
}
}
}
@@ -9905,7 +10045,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
// Finally, perform stack to reg moves
// All the target regs will be empty at this point
- while (targetRegsFromStack != RBM_NONE)
+ while (!targetRegsFromStack.IsEmpty())
{
regNumber targetReg = genFirstRegNumFromMaskAndToggle(targetRegsFromStack);
@@ -10280,7 +10420,17 @@ void RefPosition::dump(LinearScan* linearScan)
printf(FMT_BB " ", this->bbNum);
printf("regmask=");
- linearScan->compiler->dumpRegMask(registerAssignment);
+ var_types type = TYP_UNKNOWN;
+ if ((refType == RefTypeBB) || (refType == RefTypeKillGCRefs))
+ {
+ // These refTypes do not have intervals
+ type = TYP_INT;
+ }
+ else
+ {
+ type = getRegisterType();
+ }
+ linearScan->compiler->dumpRegMask(registerAssignment, type);
printf(" minReg=%d", minRegCandidateCount);
@@ -10416,10 +10566,10 @@ void Interval::dump(Compiler* compiler)
printf(" physReg:%s", getRegName(physReg));
printf(" Preferences=");
- compiler->dumpRegMask(this->registerPreferences);
+ compiler->dumpRegMask(this->registerPreferences, this->registerType);
printf(" Aversions=");
- compiler->dumpRegMask(this->registerAversion);
+ compiler->dumpRegMask(this->registerAversion, this->registerType);
if (relatedInterval)
{
printf(" RelatedInterval ");
@@ -10974,7 +11124,7 @@ void LinearScan::dumpLsraAllocationEvent(
}
if ((interval != nullptr) && (reg != REG_NA) && (reg != REG_STK))
{
- registersToDump |= getRegMask(reg, interval->registerType);
+ registersToDump.AddRegNum(reg, interval->registerType);
dumpRegRecordTitleIfNeeded();
}
@@ -11312,7 +11462,7 @@ void LinearScan::dumpRegRecordHeader()
regColumnWidth + 1);
// Print a "title row" including the legend and the reg names.
- lastDumpedRegisters = RBM_NONE;
+ lastDumpedRegisters.Clear();
dumpRegRecordTitleIfNeeded();
}
@@ -11321,10 +11471,14 @@ void LinearScan::dumpRegRecordTitleIfNeeded()
if ((lastDumpedRegisters != registersToDump) || (rowCountSinceLastTitle > MAX_ROWS_BETWEEN_TITLES))
{
lastUsedRegNumIndex = 0;
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ int lastRegNumIndex = compiler->compFloatingPointUsed ? REG_MASK_LAST : REG_INT_LAST;
+#else
int lastRegNumIndex = compiler->compFloatingPointUsed ? REG_FP_LAST : REG_INT_LAST;
+#endif
for (int regNumIndex = 0; regNumIndex <= lastRegNumIndex; regNumIndex++)
{
- if ((registersToDump & genRegMask((regNumber)regNumIndex)) != 0)
+ if (registersToDump.IsRegNumInMask((regNumber)regNumIndex))
{
lastUsedRegNumIndex = regNumIndex;
}
@@ -11404,7 +11558,7 @@ void LinearScan::dumpRegRecords()
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
printf("%c", activeChar);
}
- else if ((genRegMask(regNum) & regsBusyUntilKill) != RBM_NONE)
+ else if (regsBusyUntilKill.IsRegNumInMask(regNum))
{
printf(columnFormatArray, "Busy");
}
@@ -11634,14 +11788,28 @@ bool LinearScan::IsResolutionNode(LIR::Range& containingRange, GenTree* node)
// Arguments:
// regsToFree - Registers that were just freed.
//
-void LinearScan::verifyFreeRegisters(regMaskTP regsToFree)
+void LinearScan::verifyFreeRegisters(CONSTREF_AllRegsMask regsToFree)
{
+ regMaskOnlyOne regsMaskToFree = regsToFree.gprRegs();
+ regMaskOnlyOne availableRegsMask = availableIntRegs;
for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg))
{
- regMaskTP regMask = genRegMask(reg);
+ if (reg >= REG_FP_FIRST && reg <= REG_FP_LAST)
+ {
+ regsMaskToFree = regsToFree.floatRegs(compiler);
+ availableRegsMask = availableFloatRegs;
+ }
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ else if (reg >= REG_MASK_FIRST && reg <= REG_MASK_LAST)
+ {
+ regsMaskToFree = regsToFree.predicateRegs(compiler);
+ availableRegsMask = availableMaskRegs;
+ }
+#endif
+ singleRegMask regMask = genRegMask(reg);
// If this isn't available or if it's still waiting to be freed (i.e. it was in
// delayRegsToFree and so now it's in regsToFree), then skip it.
- if ((regMask & allAvailableRegs & ~regsToFree) == RBM_NONE)
+ if ((regMask & availableRegsMask & ~regsMaskToFree) == RBM_NONE)
{
continue;
}
@@ -12085,10 +12253,11 @@ void LinearScan::verifyFinalAllocation()
// However, we will assert that, at resolution time, no registers contain GC refs.
{
DBEXEC(VERBOSE, printf(" "));
- regMaskTP candidateRegs = currentRefPosition.registerAssignment;
+ regMaskOnlyOne candidateRegs = currentRefPosition.registerAssignment;
while (candidateRegs != RBM_NONE)
{
- regNumber nextReg = genFirstRegNumFromMaskAndToggle(candidateRegs);
+ regNumber nextReg =
+ genFirstRegNumFromMaskAndToggle(candidateRegs MORE_THAN_64_REG_ARG(TYP_INT));
RegRecord* regRecord = getRegisterRecord(nextReg);
Interval* assignedInterval = regRecord->assignedInterval;
@@ -12469,9 +12638,11 @@ void LinearScan::RegisterSelection::reset(Interval* interval, RefPosition* refPo
// Return Values:
// 'true' if there was a single register candidate available after the heuristic is applied.
//
-bool LinearScan::RegisterSelection::applySelection(int selectionScore, regMaskTP selectionCandidates)
+bool LinearScan::RegisterSelection::applySelection(int selectionScore, regMaskOnlyOne selectionCandidates)
{
- regMaskTP newCandidates = candidates & selectionCandidates;
+ assert(linearScan->compiler->IsOnlyOneRegMask(selectionCandidates));
+
+ regMaskOnlyOne newCandidates = candidates & selectionCandidates;
if (newCandidates != RBM_NONE)
{
candidates = newCandidates;
@@ -12490,10 +12661,12 @@ bool LinearScan::RegisterSelection::applySelection(int selectionScore, regMaskTP
// Return Values:
// 'true' if there was a single register candidate available after the heuristic is applied.
//
-bool LinearScan::RegisterSelection::applySingleRegSelection(int selectionScore, regMaskTP selectionCandidate)
+bool LinearScan::RegisterSelection::applySingleRegSelection(int selectionScore, regMaskOnlyOne selectionCandidate)
{
+ assert(linearScan->compiler->IsOnlyOneRegMask(selectionCandidate));
assert(LinearScan::isSingleRegister(selectionCandidate));
- regMaskTP newCandidates = candidates & selectionCandidate;
+
+ regMaskOnlyOne newCandidates = candidates & selectionCandidate;
if (newCandidates != RBM_NONE)
{
candidates = newCandidates;
@@ -12540,7 +12713,7 @@ void LinearScan::RegisterSelection::try_CONST_AVAILABLE()
if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType))
{
- regMaskTP newCandidates = candidates & matchingConstants;
+ regMaskOnlyOne newCandidates = candidates & matchingConstants;
if (newCandidates != RBM_NONE)
{
candidates = newCandidates;
@@ -12701,7 +12874,7 @@ void LinearScan::RegisterSelection::try_COVERS_FULL()
calculateCoversSets();
#endif
- regMaskTP newCandidates = candidates & coversFullSet & freeCandidates;
+ regMaskOnlyOne newCandidates = candidates & coversFullSet & freeCandidates;
if (newCandidates != RBM_NONE)
{
candidates = newCandidates;
@@ -12725,15 +12898,15 @@ void LinearScan::RegisterSelection::try_BEST_FIT()
}
#endif
- regMaskTP bestFitSet = RBM_NONE;
+ regMaskOnlyOne bestFitSet = RBM_NONE;
// If the best score includes COVERS_FULL, pick the one that's killed soonest.
// If none cover the full range, the BEST_FIT is the one that's killed later.
bool earliestIsBest = coversFullApplied;
LsraLocation bestFitLocation = earliestIsBest ? MaxLocation : MinLocation;
- for (regMaskTP bestFitCandidates = candidates; bestFitCandidates != RBM_NONE;)
+ for (regMaskOnlyOne bestFitCandidates = candidates; bestFitCandidates != RBM_NONE;)
{
- regNumber bestFitCandidateRegNum = genFirstRegNumFromMask(bestFitCandidates);
- regMaskTP bestFitCandidateBit = genRegMask(bestFitCandidateRegNum);
+ regNumber bestFitCandidateRegNum = genFirstRegNumFromMask(bestFitCandidates MORE_THAN_64_REG_ARG(regType));
+ singleRegMask bestFitCandidateBit = genRegMask(bestFitCandidateRegNum);
bestFitCandidates ^= bestFitCandidateBit;
// Find the next RefPosition of the register.
@@ -12827,12 +13000,12 @@ void LinearScan::RegisterSelection::try_REG_ORDER()
// This will always result in a single candidate. That is, it is the tie-breaker
// for free candidates, and doesn't make sense as anything other than the last
// heuristic for free registers.
- unsigned lowestRegOrder = UINT_MAX;
- regMaskTP lowestRegOrderBit = RBM_NONE;
- for (regMaskTP regOrderCandidates = candidates; regOrderCandidates != RBM_NONE;)
+ unsigned lowestRegOrder = UINT_MAX;
+ regMaskOnlyOne lowestRegOrderBit = RBM_NONE;
+ for (regMaskOnlyOne regOrderCandidates = candidates; regOrderCandidates != RBM_NONE;)
{
- regNumber regOrderCandidateRegNum = genFirstRegNumFromMask(regOrderCandidates);
- regMaskTP regOrderCandidateBit = genRegMask(regOrderCandidateRegNum);
+ regNumber regOrderCandidateRegNum = genFirstRegNumFromMask(regOrderCandidates MORE_THAN_64_REG_ARG(regType));
+ singleRegMask regOrderCandidateBit = genRegMask(regOrderCandidateRegNum);
regOrderCandidates ^= regOrderCandidateBit;
unsigned thisRegOrder = linearScan->getRegisterRecord(regOrderCandidateRegNum)->regOrder;
@@ -12854,7 +13027,7 @@ void LinearScan::RegisterSelection::try_SPILL_COST()
assert(!found);
// The set of registers with the lowest spill weight.
- regMaskTP lowestCostSpillSet = RBM_NONE;
+ regMaskOnlyOne lowestCostSpillSet = RBM_NONE;
// Apply the SPILL_COST heuristic and eliminate regs that can't be spilled.
// The spill weight for 'refPosition' (the one we're allocating now).
@@ -12865,10 +13038,10 @@ void LinearScan::RegisterSelection::try_SPILL_COST()
bool foundLowerSpillWeight = false;
LsraLocation thisLocation = refPosition->nodeLocation;
- for (regMaskTP spillCandidates = candidates; spillCandidates != RBM_NONE;)
+ for (regMaskOnlyOne spillCandidates = candidates; spillCandidates != RBM_NONE;)
{
- regNumber spillCandidateRegNum = genFirstRegNumFromMask(spillCandidates);
- regMaskTP spillCandidateBit = genRegMask(spillCandidateRegNum);
+ regNumber spillCandidateRegNum = genFirstRegNumFromMask(spillCandidates MORE_THAN_64_REG_ARG(regType));
+ singleRegMask spillCandidateBit = genRegMask(spillCandidateRegNum);
spillCandidates ^= spillCandidateBit;
RegRecord* spillCandidateRegRecord = &linearScan->physRegs[spillCandidateRegNum];
@@ -12988,12 +13161,12 @@ void LinearScan::RegisterSelection::try_FAR_NEXT_REF()
{
assert(!found);
- LsraLocation farthestLocation = MinLocation;
- regMaskTP farthestSet = RBM_NONE;
- for (regMaskTP farthestCandidates = candidates; farthestCandidates != RBM_NONE;)
+ LsraLocation farthestLocation = MinLocation;
+ regMaskOnlyOne farthestSet = RBM_NONE;
+ for (regMaskOnlyOne farthestCandidates = candidates; farthestCandidates != RBM_NONE;)
{
- regNumber farthestCandidateRegNum = genFirstRegNumFromMask(farthestCandidates);
- regMaskTP farthestCandidateBit = genRegMask(farthestCandidateRegNum);
+ regNumber farthestCandidateRegNum = genFirstRegNumFromMask(farthestCandidates MORE_THAN_64_REG_ARG(regType));
+ singleRegMask farthestCandidateBit = genRegMask(farthestCandidateRegNum);
farthestCandidates ^= farthestCandidateBit;
// Find the next RefPosition of the register.
@@ -13022,11 +13195,12 @@ void LinearScan::RegisterSelection::try_PREV_REG_OPT()
{
assert(!found);
- regMaskTP prevRegOptSet = RBM_NONE;
- for (regMaskTP prevRegOptCandidates = candidates; prevRegOptCandidates != RBM_NONE;)
+ regMaskOnlyOne prevRegOptSet = RBM_NONE;
+ for (regMaskOnlyOne prevRegOptCandidates = candidates; prevRegOptCandidates != RBM_NONE;)
{
- regNumber prevRegOptCandidateRegNum = genFirstRegNumFromMask(prevRegOptCandidates);
- regMaskTP prevRegOptCandidateBit = genRegMask(prevRegOptCandidateRegNum);
+ regNumber prevRegOptCandidateRegNum =
+ genFirstRegNumFromMask(prevRegOptCandidates MORE_THAN_64_REG_ARG(regType));
+ singleRegMask prevRegOptCandidateBit = genRegMask(prevRegOptCandidateRegNum);
prevRegOptCandidates ^= prevRegOptCandidateBit;
Interval* assignedInterval = linearScan->physRegs[prevRegOptCandidateRegNum].assignedInterval;
bool foundPrevRegOptReg = true;
@@ -13125,11 +13299,11 @@ void LinearScan::RegisterSelection::calculateUnassignedSets() // TODO: Seperate
return;
}
- regMaskTP coversCandidates = candidates;
+ regMaskOnlyOne coversCandidates = candidates;
for (; coversCandidates != RBM_NONE;)
{
- regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates);
- regMaskTP coversCandidateBit = genRegMask(coversCandidateRegNum);
+ regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates MORE_THAN_64_REG_ARG(regType));
+ singleRegMask coversCandidateBit = genRegMask(coversCandidateRegNum);
coversCandidates ^= coversCandidateBit;
// The register is considered unassigned if it has no assignedInterval, OR
@@ -13152,12 +13326,12 @@ void LinearScan::RegisterSelection::calculateCoversSets()
return;
}
- preferenceSet = (candidates & preferences);
- regMaskTP coversCandidates = (preferenceSet == RBM_NONE) ? candidates : preferenceSet;
+ preferenceSet = (candidates & preferences);
+ regMaskOnlyOne coversCandidates = (preferenceSet == RBM_NONE) ? candidates : preferenceSet;
for (; coversCandidates != RBM_NONE;)
{
- regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates);
- regMaskTP coversCandidateBit = genRegMask(coversCandidateRegNum);
+ regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates MORE_THAN_64_REG_ARG(regType));
+ singleRegMask coversCandidateBit = genRegMask(coversCandidateRegNum);
coversCandidates ^= coversCandidateBit;
// If we have a single candidate we don't need to compute the preference-related sets, but we
@@ -13228,8 +13402,8 @@ void LinearScan::RegisterSelection::calculateCoversSets()
// Register bit selected (a single register) and REG_NA if no register was selected.
//
template
-regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
- RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore))
+singleRegMask LinearScan::RegisterSelection::select(Interval* currentInterval,
+ RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore))
{
#ifdef DEBUG
*registerScore = NONE;
@@ -13287,7 +13461,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current
}
#ifdef DEBUG
- candidates = linearScan->stressLimitRegs(refPosition, candidates);
+ candidates = linearScan->stressLimitRegs(refPosition, candidates, regType);
#endif
assert(candidates != RBM_NONE);
@@ -13322,9 +13496,9 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current
nextRelatedInterval = nullptr;
// First, get the preferences for this interval
- regMaskTP thisRelatedPreferences = finalRelatedInterval->getCurrentPreferences();
+ regMaskOnlyOne thisRelatedPreferences = finalRelatedInterval->getCurrentPreferences();
// Now, determine if they are compatible and update the relatedPreferences that we'll consider.
- regMaskTP newRelatedPreferences = thisRelatedPreferences & relatedPreferences;
+ regMaskOnlyOne newRelatedPreferences = thisRelatedPreferences & relatedPreferences;
if (newRelatedPreferences != RBM_NONE && (!avoidByteRegs || thisRelatedPreferences != RBM_BYTE_REGS))
{
// TODO-CQ: The following isFree() check doesn't account for the possibility that there's an
@@ -13334,8 +13508,8 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current
// to achieve zero diffs.
//
bool thisIsSingleReg = isSingleRegister(newRelatedPreferences);
- if (!thisIsSingleReg ||
- linearScan->isFree(linearScan->getRegisterRecord(genRegNumFromMask(newRelatedPreferences))))
+ if (!thisIsSingleReg || linearScan->isFree(linearScan->getRegisterRecord(
+ genRegNumFromMask(newRelatedPreferences MORE_THAN_64_REG_ARG(regType)))))
{
relatedPreferences = newRelatedPreferences;
// If this Interval has a downstream def without a single-register preference, continue to iterate.
@@ -13392,12 +13566,12 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current
if (preferCalleeSave)
{
- regMaskTP calleeSaveCandidates = linearScan->calleeSaveRegs(currentInterval->registerType);
+ regMaskOnlyOne calleeSaveCandidates = linearScan->calleeSaveRegs(regType);
if (currentInterval->isWriteThru)
{
// We'll only prefer a callee-save register if it's already been used.
- regMaskTP unusedCalleeSaves =
- calleeSaveCandidates & ~(linearScan->compiler->codeGen->regSet.rsGetModifiedRegsMask());
+ regMaskOnlyOne unusedCalleeSaves =
+ calleeSaveCandidates & ~linearScan->compiler->codeGen->regSet.rsGetModifiedRegsMask(regType);
callerCalleePrefs = calleeSaveCandidates & ~unusedCalleeSaves;
preferences &= ~unusedCalleeSaves;
}
@@ -13421,7 +13595,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current
found = false;
// Is this a fixedReg?
- regMaskTP fixedRegMask = RBM_NONE;
+ regMaskOnlyOne fixedRegMask = RBM_NONE;
if (refPosition->isFixedRegRef)
{
assert(genMaxOneBit(refPosition->registerAssignment));
@@ -13429,7 +13603,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current
if (candidates == refPosition->registerAssignment)
{
found = true;
- if (linearScan->nextIntervalRef[genRegNumFromMask(candidates)] > lastLocation)
+ if (linearScan->nextIntervalRef[genRegNumFromMask(candidates MORE_THAN_64_REG_ARG(regType))] > lastLocation)
{
unassignedSet = candidates;
}
@@ -13437,7 +13611,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current
}
#ifdef DEBUG
- regMaskTP inUseOrBusyRegsMask = RBM_NONE;
+ AllRegsMask inUseOrBusyRegsMask;
#endif
// Eliminate candidates that are in-use or busy.
@@ -13446,8 +13620,8 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current
// TODO-CQ: We assign same registerAssignment to UPPER_RESTORE and the next USE.
// When we allocate for USE, we see that the register is busy at current location
// and we end up with that candidate is no longer available.
- regMaskTP busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation;
- candidates &= ~busyRegs;
+ AllRegsMask busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation;
+ candidates &= ~busyRegs.GetRegMaskForType(regType);
#ifdef TARGET_ARM
// For TYP_DOUBLE on ARM, we can only use an even floating-point register for which the odd half
@@ -13457,7 +13631,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current
// clause below creates a mask to do this.
if (currentInterval->registerType == TYP_DOUBLE)
{
- candidates &= ~((busyRegs & RBM_ALLDOUBLE_HIGH) >> 1);
+ candidates &= ~((busyRegs.floatRegs(linearScan->compiler) & RBM_ALLDOUBLE_HIGH) >> 1);
}
#endif // TARGET_ARM
@@ -13468,11 +13642,11 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current
// Also eliminate as busy any register with a conflicting fixed reference at this or
// the next location.
// Note that this will eliminate the fixedReg, if any, but we'll add it back below.
- regMaskTP checkConflictMask = candidates & linearScan->fixedRegs;
+ regMaskOnlyOne checkConflictMask = candidates & linearScan->fixedRegs.GetRegMaskForType(regType);
while (checkConflictMask != RBM_NONE)
{
- regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask);
- regMaskTP checkConflictBit = genRegMask(checkConflictReg);
+ regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask MORE_THAN_64_REG_ARG(regType));
+ singleRegMask checkConflictBit = genRegMask(checkConflictReg);
checkConflictMask ^= checkConflictBit;
LsraLocation checkConflictLocation = linearScan->nextFixedRef[checkConflictReg];
@@ -13482,7 +13656,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current
{
candidates &= ~checkConflictBit;
#ifdef DEBUG
- inUseOrBusyRegsMask |= checkConflictBit;
+ inUseOrBusyRegsMask |= checkConflictReg;
#endif
}
}
@@ -13528,7 +13702,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current
if (needsConsecutiveRegisters)
{
#ifdef TARGET_ARM64
- regMaskTP busyConsecutiveCandidates = RBM_NONE;
+ regMaskFloat busyConsecutiveCandidates = RBM_NONE;
if (refPosition->isFirstRefPositionOfConsecutiveRegisters())
{
freeCandidates = linearScan->getConsecutiveCandidates(candidates, refPosition, &busyConsecutiveCandidates);
@@ -13547,7 +13721,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current
// refpositions.
assert((refPosition->refType == RefTypeUpperVectorRestore) || (genCountBits(candidates) == 1));
- freeCandidates = candidates & linearScan->m_AvailableRegs;
+ freeCandidates = candidates & linearScan->m_AvailableRegs.GetRegMaskForType(currentInterval->registerType);
}
if ((freeCandidates == RBM_NONE) && (candidates == RBM_NONE))
@@ -13562,16 +13736,17 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current
// Remove the `inUseOrBusyRegsMask` from the original candidates list and find one
// such range that is consecutive. Next, append that range to the `candidates`.
//
- regMaskTP limitCandidatesForConsecutive = refPosition->registerAssignment & ~inUseOrBusyRegsMask;
- regMaskTP overallLimitCandidates;
- regMaskTP limitConsecutiveResult =
+ regMaskFloat limitCandidatesForConsecutive =
+ refPosition->registerAssignment & ~inUseOrBusyRegsMask.floatRegs(linearScan->compiler);
+ regMaskFloat overallLimitCandidates;
+ regMaskFloat limitConsecutiveResult =
linearScan->filterConsecutiveCandidates(limitCandidatesForConsecutive, refPosition->regCount,
&overallLimitCandidates);
assert(limitConsecutiveResult != RBM_NONE);
unsigned startRegister = BitOperations::BitScanForward(limitConsecutiveResult);
- regMaskTP registersNeededMask = (1ULL << refPosition->regCount) - 1;
+ regMaskFloat registersNeededMask = (1ULL << refPosition->regCount) - 1;
candidates |= (registersNeededMask << startRegister);
}
@@ -13592,7 +13767,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current
return RBM_NONE;
}
- freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType));
+ freeCandidates = linearScan->getFreeCandidates(candidates, regType);
}
// If no free candidates, then double check if refPosition is an actual ref.
@@ -13693,8 +13868,8 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current
// select the REG_ORDER heuristics (if there are any free candidates) or REG_NUM (if all registers
// are busy).
//
-regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval* currentInterval,
- RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore))
+singleRegMask LinearScan::RegisterSelection::selectMinimal(
+ Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore))
{
assert(!linearScan->enregisterLocalVars);
#ifdef DEBUG
@@ -13743,7 +13918,7 @@ regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval*
}
#ifdef DEBUG
- candidates = linearScan->stressLimitRegs(refPosition, candidates);
+ candidates = linearScan->stressLimitRegs(refPosition, candidates, regType);
#endif
assert(candidates != RBM_NONE);
@@ -13756,7 +13931,7 @@ regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval*
#endif
// Is this a fixedReg?
- regMaskTP fixedRegMask = RBM_NONE;
+ regMaskOnlyOne fixedRegMask = RBM_NONE;
if (refPosition->isFixedRegRef)
{
assert(genMaxOneBit(refPosition->registerAssignment));
@@ -13773,8 +13948,8 @@ regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval*
// TODO-CQ: We assign same registerAssignment to UPPER_RESTORE and the next USE.
// When we allocate for USE, we see that the register is busy at current location
// and we end up with that candidate is no longer available.
- regMaskTP busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation;
- candidates &= ~busyRegs;
+ AllRegsMask busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation;
+ candidates &= ~busyRegs.GetRegMaskForType(regType);
#ifdef TARGET_ARM
// For TYP_DOUBLE on ARM, we can only use an even floating-point register for which the odd half
@@ -13784,18 +13959,18 @@ regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval*
// clause below creates a mask to do this.
if (currentInterval->registerType == TYP_DOUBLE)
{
- candidates &= ~((busyRegs & RBM_ALLDOUBLE_HIGH) >> 1);
+ candidates &= ~((busyRegs.floatRegs(linearScan->compiler) & RBM_ALLDOUBLE_HIGH) >> 1);
}
#endif // TARGET_ARM
// Also eliminate as busy any register with a conflicting fixed reference at this or
// the next location.
// Note that this will eliminate the fixedReg, if any, but we'll add it back below.
- regMaskTP checkConflictMask = candidates & linearScan->fixedRegs;
+ regMaskOnlyOne checkConflictMask = candidates & linearScan->fixedRegs.GetRegMaskForType(regType);
while (checkConflictMask != RBM_NONE)
{
- regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask);
- regMaskTP checkConflictBit = genRegMask(checkConflictReg);
+ regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask MORE_THAN_64_REG_ARG(regType));
+ singleRegMask checkConflictBit = genRegMask(checkConflictReg);
checkConflictMask ^= checkConflictBit;
LsraLocation checkConflictLocation = linearScan->nextFixedRef[checkConflictReg];
@@ -13826,7 +14001,7 @@ regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval*
return RBM_NONE;
}
- freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType));
+ freeCandidates = linearScan->getFreeCandidates(candidates, regType);
if (freeCandidates != RBM_NONE)
{
diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h
index 797c9d69c91d8f..f932c68c0f6fd4 100644
--- a/src/coreclr/jit/lsra.h
+++ b/src/coreclr/jit/lsra.h
@@ -51,12 +51,12 @@ RegisterType regType(T type)
{
return IntRegisterType;
}
-#if (defined(TARGET_XARCH) || defined(TARGET_ARM64)) && defined(FEATURE_SIMD)
+#ifdef FEATURE_MASKED_HW_INTRINSICS
else if (varTypeUsesMaskReg(type))
{
return MaskRegisterType;
}
-#endif // (TARGET_XARCH || TARGET_ARM64) && FEATURE_SIMD
+#endif // FEATURE_MASKED_HW_INTRINSICS
else
{
assert(varTypeUsesFloatReg(type));
@@ -451,11 +451,12 @@ typedef jitstd::list::reverse_iterator RefPositionReverseIterator;
class Referenceable
{
public:
- Referenceable()
+ Referenceable(RegisterType _registerType)
{
firstRefPosition = nullptr;
recentRefPosition = nullptr;
lastRefPosition = nullptr;
+ registerType = _registerType;
}
// A linked list of RefPositions. These are only traversed in the forward
@@ -466,6 +467,8 @@ class Referenceable
RefPosition* recentRefPosition;
RefPosition* lastRefPosition;
+ RegisterType registerType;
+
// Get the position of the next reference which is at or greater than
// the current location (relies upon recentRefPosition being updated
// during traversal).
@@ -477,12 +480,12 @@ class RegRecord : public Referenceable
{
public:
RegRecord()
+ : Referenceable(IntRegisterType)
{
assignedInterval = nullptr;
previousInterval = nullptr;
regNum = REG_NA;
isCalleeSave = false;
- registerType = IntRegisterType;
}
void init(regNumber reg)
@@ -499,19 +502,19 @@ class RegRecord : public Referenceable
#endif
if (emitter::isGeneralRegister(reg))
{
- assert(registerType == IntRegisterType);
+ registerType = IntRegisterType;
}
else if (emitter::isFloatReg(reg))
{
registerType = FloatRegisterType;
}
-#if defined(TARGET_XARCH) && defined(FEATURE_SIMD)
+#ifdef FEATURE_MASKED_HW_INTRINSICS
else
{
assert(emitter::isMaskReg(reg));
registerType = MaskRegisterType;
}
-#endif
+#endif // FEATURE_MASKED_HW_INTRINSICS
regNum = reg;
isCalleeSave = ((RBM_CALLEE_SAVED & genRegMask(reg)) != 0);
}
@@ -622,7 +625,7 @@ class LinearScan : public LinearScanInterface
// This is the main driver
virtual PhaseStatus doLinearScan();
- static bool isSingleRegister(regMaskTP regMask)
+ static bool isSingleRegister(regMaskOnlyOne regMask)
{
return (genExactlyOneBit(regMask));
}
@@ -723,7 +726,7 @@ class LinearScan : public LinearScanInterface
BasicBlock* toBlock,
ResolveType resolveType,
VARSET_VALARG_TP liveSet,
- regMaskTP terminatorConsumedRegs);
+ regMaskGpr terminatorConsumedRegs);
void resolveEdges();
@@ -777,34 +780,34 @@ class LinearScan : public LinearScanInterface
#if defined(TARGET_AMD64)
#ifdef UNIX_AMD64_ABI
// On System V the RDI and RSI are not callee saved. Use R12 ans R13 as callee saved registers.
- static const regMaskTP LsraLimitSmallIntSet =
+ static const regMaskGpr LsraLimitSmallIntSet =
(RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13);
#else // !UNIX_AMD64_ABI
// On Windows Amd64 use the RDI and RSI as callee saved registers.
- static const regMaskTP LsraLimitSmallIntSet =
+ static const regMaskGpr LsraLimitSmallIntSet =
(RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI);
#endif // !UNIX_AMD64_ABI
- static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
- static const regMaskTP LsraLimitUpperSimdSet =
+ static const regMaskFloat LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
+ static const regMaskFloat LsraLimitUpperSimdSet =
(RBM_XMM16 | RBM_XMM17 | RBM_XMM18 | RBM_XMM19 | RBM_XMM20 | RBM_XMM21 | RBM_XMM22 | RBM_XMM23 | RBM_XMM24 |
RBM_XMM25 | RBM_XMM26 | RBM_XMM27 | RBM_XMM28 | RBM_XMM29 | RBM_XMM30 | RBM_XMM31);
#elif defined(TARGET_ARM)
// On ARM, we may need two registers to set up the target register for a virtual call, so we need
// to have at least the maximum number of arg registers, plus 2.
- static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5);
- static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17);
+ static const regMaskGpr LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5);
+ static const regMaskFloat LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17);
#elif defined(TARGET_ARM64)
- static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20);
- static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9);
+ static const regMaskGpr LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20);
+ static const regMaskFloat LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9);
#elif defined(TARGET_X86)
- static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI);
- static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
+ static const regMaskGpr LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI);
+ static const regMaskFloat LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
#elif defined(TARGET_LOONGARCH64)
- static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0);
- static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9);
+ static const regMaskGpr LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0);
+ static const regMaskFloat LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9);
#elif defined(TARGET_RISCV64)
- static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0);
- static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9);
+ static const regMaskGpr LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0);
+ static const regMaskFloat LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9);
#else
#error Unsupported or unset target architecture
#endif // target
@@ -814,11 +817,12 @@ class LinearScan : public LinearScanInterface
return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK);
}
- regMaskTP getConstrainedRegMask(RefPosition* refPosition,
- regMaskTP regMaskActual,
- regMaskTP regMaskConstrain,
- unsigned minRegCount);
- regMaskTP stressLimitRegs(RefPosition* refPosition, regMaskTP mask);
+ regMaskOnlyOne getConstrainedRegMask(RefPosition* refPosition,
+ RegisterType regType,
+ regMaskOnlyOne regMaskActual,
+ regMaskOnlyOne regMaskConstrain,
+ unsigned minRegCount);
+ regMaskOnlyOne stressLimitRegs(RefPosition* refPosition, regMaskOnlyOne mask, RegisterType regType);
// This controls the heuristics used to select registers
// These can be combined.
@@ -904,7 +908,7 @@ class LinearScan : public LinearScanInterface
{
return (LsraBlockBoundaryLocations)(lsraStressMask & LSRA_BLOCK_BOUNDARY_MASK);
}
- regNumber rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs);
+ regNumber rotateBlockStartLocation(Interval* interval, regNumber targetReg, CONSTREF_AllRegsMask availableRegs);
// This controls whether we always insert a GT_RELOAD instruction after a spill
// Note that this can be combined with LSRA_SPILL_ALWAYS (or not)
@@ -986,7 +990,7 @@ class LinearScan : public LinearScanInterface
static bool IsResolutionMove(GenTree* node);
static bool IsResolutionNode(LIR::Range& containingRange, GenTree* node);
- void verifyFreeRegisters(regMaskTP regsToFree);
+ void verifyFreeRegisters(CONSTREF_AllRegsMask regsToFree);
void verifyFinalAllocation();
void verifyResolutionMove(GenTree* resolutionNode, LsraLocation currentLocation);
#else // !DEBUG
@@ -1062,6 +1066,11 @@ class LinearScan : public LinearScanInterface
void processBlockStartLocations(BasicBlock* current);
void processBlockEndLocations(BasicBlock* current);
void resetAllRegistersState();
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ FORCEINLINE void updateDeadCandidatesAtBlockStart(REF_AllRegsMask deadRegMask, VarToRegMap inVarToRegMap);
+#else
+ FORCEINLINE void updateDeadCandidatesAtBlockStart(RegBitSet64 deadRegMask, VarToRegMap inVarToRegMap);
+#endif
#ifdef TARGET_ARM
bool isSecondHalfReg(RegRecord* regRec, Interval* interval);
@@ -1084,14 +1093,15 @@ class LinearScan : public LinearScanInterface
void insertZeroInitRefPositions();
// add physreg refpositions for a tree node, based on calling convention and instruction selection predictions
- void addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse);
+ void addRefsForPhysRegMask(CONSTREF_AllRegsMask mask, LsraLocation currentLoc, RefType refType, bool isLastUse);
void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition);
void buildRefPositionsForNode(GenTree* tree, LsraLocation loc);
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- void buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, regMaskTP fpCalleeKillSet);
+ void buildUpperVectorSaveRefPositions(GenTree* tree,
+ LsraLocation currentLoc DEBUG_ARG(regMaskFloat fpCalleeKillSet));
void buildUpperVectorRestoreRefPosition(
Interval* lclVarInterval, LsraLocation currentLoc, GenTree* node, bool isUse, unsigned multiRegIdx);
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
@@ -1121,37 +1131,47 @@ class LinearScan : public LinearScanInterface
}
// Helpers for getKillSetForNode().
- regMaskTP getKillSetForStoreInd(GenTreeStoreInd* tree);
- regMaskTP getKillSetForShiftRotate(GenTreeOp* tree);
- regMaskTP getKillSetForMul(GenTreeOp* tree);
- regMaskTP getKillSetForCall(GenTreeCall* call);
- regMaskTP getKillSetForModDiv(GenTreeOp* tree);
- regMaskTP getKillSetForBlockStore(GenTreeBlk* blkNode);
- regMaskTP getKillSetForReturn();
- regMaskTP getKillSetForProfilerHook();
+ CONSTREF_AllRegsMask getKillSetForStoreInd(GenTreeStoreInd* tree);
+ regMaskGpr getKillSetForShiftRotate(GenTreeOp* tree);
+ regMaskGpr getKillSetForMul(GenTreeOp* tree);
+ AllRegsMask getKillSetForCall(GenTreeCall* call);
+ regMaskGpr getKillSetForModDiv(GenTreeOp* tree);
+ AllRegsMask getKillSetForBlockStore(GenTreeBlk* blkNode);
+ CONSTREF_AllRegsMask getKillSetForReturn();
+ CONSTREF_AllRegsMask getKillSetForProfilerHook();
#ifdef FEATURE_HW_INTRINSICS
- regMaskTP getKillSetForHWIntrinsic(GenTreeHWIntrinsic* node);
+ regMaskGpr getKillSetForHWIntrinsic(GenTreeHWIntrinsic* node);
#endif // FEATURE_HW_INTRINSICS
// Return the registers killed by the given tree node.
// This is used only for an assert, and for stress, so it is only defined under DEBUG.
// Otherwise, the Build methods should obtain the killMask from the appropriate method above.
#ifdef DEBUG
- regMaskTP getKillSetForNode(GenTree* tree);
+ AllRegsMask getKillSetForNode(GenTree* tree);
#endif
// Given some tree node add refpositions for all the registers this node kills
- bool buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc, regMaskTP killMask);
+ bool buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc, CONSTREF_AllRegsMask killMask);
- regMaskTP allRegs(RegisterType rt);
- regMaskTP allByteRegs();
- regMaskTP allSIMDRegs();
- regMaskTP lowSIMDRegs();
- regMaskTP internalFloatRegCandidates();
+ regMaskOnlyOne allRegs(RegisterType rt);
+ regMaskGpr allByteRegs();
+ regMaskFloat allSIMDRegs();
+ regMaskFloat lowSIMDRegs();
+ regMaskFloat internalFloatRegCandidates();
void makeRegisterInactive(RegRecord* physRegRecord);
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ FORCEINLINE void inActivateRegisters(REF_AllRegsMask inactiveMask);
+#else
+ FORCEINLINE void inActivateRegisters(RegBitSet64 inactiveMask);
+#endif
void freeRegister(RegRecord* physRegRecord);
- void freeRegisters(regMaskTP regsToFree);
+ void freeRegisters(REF_AllRegsMask regsToFree);
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ FORCEINLINE void freeRegisterMask(REF_AllRegsMask freeMask);
+#else
+ FORCEINLINE void freeRegisterMask(RegBitSet64 freeMask);
+#endif
// Get the type that this tree defines.
var_types getDefType(GenTree* tree)
@@ -1169,11 +1189,11 @@ class LinearScan : public LinearScanInterface
}
// Managing internal registers during the BuildNode process.
- RefPosition* defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP candidates);
- RefPosition* buildInternalIntRegisterDefForNode(GenTree* tree, regMaskTP internalCands = RBM_NONE);
- RefPosition* buildInternalFloatRegisterDefForNode(GenTree* tree, regMaskTP internalCands = RBM_NONE);
-#if defined(FEATURE_SIMD)
- RefPosition* buildInternalMaskRegisterDefForNode(GenTree* tree, regMaskTP internalCands = RBM_NONE);
+ RefPosition* defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskOnlyOne candidates);
+ RefPosition* buildInternalIntRegisterDefForNode(GenTree* tree, regMaskGpr internalCands = RBM_NONE);
+ RefPosition* buildInternalFloatRegisterDefForNode(GenTree* tree, regMaskFloat internalCands = RBM_NONE);
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ RefPosition* buildInternalMaskRegisterDefForNode(GenTree* tree, regMaskPredicate internalCands = RBM_NONE);
#endif
void buildInternalRegisterUses();
@@ -1206,15 +1226,15 @@ class LinearScan : public LinearScanInterface
RefPosition* newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType);
- RefPosition* newRefPosition(Interval* theInterval,
- LsraLocation theLocation,
- RefType theRefType,
- GenTree* theTreeNode,
- regMaskTP mask,
- unsigned multiRegIdx = 0);
+ RefPosition* newRefPosition(Interval* theInterval,
+ LsraLocation theLocation,
+ RefType theRefType,
+ GenTree* theTreeNode,
+ regMaskOnlyOne mask,
+ unsigned multiRegIdx = 0);
RefPosition* newRefPosition(
- regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask);
+ regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskOnlyOne mask);
void applyCalleeSaveHeuristics(RefPosition* rp);
@@ -1269,24 +1289,26 @@ class LinearScan : public LinearScanInterface
****************************************************************************/
#if defined(TARGET_ARM64)
- bool canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned);
- void assignConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned);
- regMaskTP getConsecutiveCandidates(regMaskTP candidates, RefPosition* refPosition, regMaskTP* busyCandidates);
- regMaskTP filterConsecutiveCandidates(regMaskTP candidates,
- unsigned int registersNeeded,
- regMaskTP* allConsecutiveCandidates);
- regMaskTP filterConsecutiveCandidatesForSpill(regMaskTP consecutiveCandidates, unsigned int registersNeeded);
+ bool canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned);
+ void assignConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned);
+ regMaskFloat getConsecutiveCandidates(regMaskFloat candidates,
+ RefPosition* refPosition,
+ regMaskFloat* busyCandidates);
+ regMaskFloat filterConsecutiveCandidates(regMaskFloat candidates,
+ unsigned int registersNeeded,
+ regMaskFloat* allConsecutiveCandidates);
+ regMaskFloat filterConsecutiveCandidatesForSpill(regMaskFloat consecutiveCandidates, unsigned int registersNeeded);
#endif // TARGET_ARM64
- regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType))
+ regMaskOnlyOne getFreeCandidates(regMaskOnlyOne candidates, var_types regType)
{
- regMaskTP result = candidates & m_AvailableRegs;
+ regMaskOnlyOne result = candidates & m_AvailableRegs.GetRegMaskForType(regType);
#ifdef TARGET_ARM
// For TYP_DOUBLE on ARM, we can only use register for which the odd half is
// also available.
if (regType == TYP_DOUBLE)
{
- result &= (m_AvailableRegs >> 1);
+ result &= (m_AvailableRegs.floatRegs(compiler) >> 1);
}
#endif // TARGET_ARM
return result;
@@ -1308,11 +1330,11 @@ class LinearScan : public LinearScanInterface
// Perform register selection and update currentInterval or refPosition
template
- FORCEINLINE regMaskTP select(Interval* currentInterval,
- RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore));
+ FORCEINLINE singleRegMask select(Interval* currentInterval,
+ RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore));
- FORCEINLINE regMaskTP selectMinimal(Interval* currentInterval,
- RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore));
+ FORCEINLINE singleRegMask selectMinimal(Interval* currentInterval,
+ RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore));
// If the register is from unassigned set such that it was not already
// assigned to the current interval
@@ -1353,39 +1375,39 @@ class LinearScan : public LinearScanInterface
RegisterType regType = RegisterType::TYP_UNKNOWN;
- regMaskTP candidates;
- regMaskTP preferences = RBM_NONE;
- Interval* relatedInterval = nullptr;
+ regMaskOnlyOne candidates;
+ regMaskOnlyOne preferences = RBM_NONE;
+ Interval* relatedInterval = nullptr;
- regMaskTP relatedPreferences = RBM_NONE;
- LsraLocation rangeEndLocation;
- LsraLocation relatedLastLocation;
- bool preferCalleeSave = false;
- RefPosition* rangeEndRefPosition;
- RefPosition* lastRefPosition;
- regMaskTP callerCalleePrefs = RBM_NONE;
- LsraLocation lastLocation;
+ regMaskOnlyOne relatedPreferences = RBM_NONE;
+ LsraLocation rangeEndLocation;
+ LsraLocation relatedLastLocation;
+ bool preferCalleeSave = false;
+ RefPosition* rangeEndRefPosition;
+ RefPosition* lastRefPosition;
+ regMaskOnlyOne callerCalleePrefs = RBM_NONE;
+ LsraLocation lastLocation;
- regMaskTP foundRegBit;
+ singleRegMask foundRegBit;
- regMaskTP prevRegBit = RBM_NONE;
+ singleRegMask prevRegBit = RBM_NONE;
// These are used in the post-selection updates, and must be set for any selection.
- regMaskTP freeCandidates;
- regMaskTP matchingConstants;
- regMaskTP unassignedSet;
+ regMaskOnlyOne freeCandidates;
+ regMaskOnlyOne matchingConstants;
+ regMaskOnlyOne unassignedSet;
// Compute the sets for COVERS, OWN_PREFERENCE, COVERS_RELATED, COVERS_FULL and UNASSIGNED together,
// as they all require similar computation.
- regMaskTP coversSet;
- regMaskTP preferenceSet;
- regMaskTP coversRelatedSet;
- regMaskTP coversFullSet;
- bool coversSetsCalculated = false;
- bool found = false;
- bool skipAllocation = false;
- bool coversFullApplied = false;
- bool constAvailableApplied = false;
+ regMaskOnlyOne coversSet;
+ regMaskOnlyOne preferenceSet;
+ regMaskOnlyOne coversRelatedSet;
+ regMaskOnlyOne coversFullSet;
+ bool coversSetsCalculated = false;
+ bool found = false;
+ bool skipAllocation = false;
+ bool coversFullApplied = false;
+ bool constAvailableApplied = false;
// If the selected register is already assigned to the current internal
FORCEINLINE bool isAlreadyAssigned()
@@ -1394,8 +1416,8 @@ class LinearScan : public LinearScanInterface
return (prevRegBit & preferences) == foundRegBit;
}
- bool applySelection(int selectionScore, regMaskTP selectionCandidates);
- bool applySingleRegSelection(int selectionScore, regMaskTP selectionCandidate);
+ bool applySelection(int selectionScore, regMaskOnlyOne selectionCandidates);
+ bool applySingleRegSelection(int selectionScore, regMaskOnlyOne selectionCandidate);
FORCEINLINE void calculateCoversSets();
FORCEINLINE void calculateUnassignedSets();
FORCEINLINE void reset(Interval* interval, RefPosition* refPosition);
@@ -1456,7 +1478,7 @@ class LinearScan : public LinearScanInterface
BasicBlock* toBlock,
var_types type,
VARSET_VALARG_TP sharedCriticalLiveSet,
- regMaskTP terminatorConsumedRegs);
+ regMaskOnlyOne terminatorConsumedRegs);
#ifdef TARGET_ARM64
typedef JitHashTable, RefPosition*> NextConsecutiveRefPositionsMap;
@@ -1529,12 +1551,12 @@ class LinearScan : public LinearScanInterface
static const int MAX_ROWS_BETWEEN_TITLES = 50;
int rowCountSinceLastTitle;
// Current mask of registers being printed in the dump.
- regMaskTP lastDumpedRegisters;
- regMaskTP registersToDump;
- int lastUsedRegNumIndex;
- bool shouldDumpReg(regNumber regNum)
+ AllRegsMask lastDumpedRegisters;
+ AllRegsMask registersToDump;
+ int lastUsedRegNumIndex;
+ bool shouldDumpReg(regNumber regNum)
{
- return (registersToDump & genRegMask(regNum)) != 0;
+ return registersToDump.IsRegNumInMask(regNum);
}
void dumpRegRecordHeader();
@@ -1739,17 +1761,17 @@ class LinearScan : public LinearScanInterface
VarToRegMap* outVarToRegMaps;
// A temporary VarToRegMap used during the resolution of critical edges.
- VarToRegMap sharedCriticalVarToRegMap;
- PhasedVar actualRegistersMask;
- PhasedVar availableIntRegs;
- PhasedVar availableFloatRegs;
- PhasedVar availableDoubleRegs;
-#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
- PhasedVar availableMaskRegs;
+ VarToRegMap sharedCriticalVarToRegMap;
+ PhasedVar actualRegistersMask;
+ PhasedVar availableIntRegs;
+ PhasedVar availableFloatRegs;
+ PhasedVar availableDoubleRegs;
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ PhasedVar availableMaskRegs;
#endif
- PhasedVar* availableRegs[TYP_COUNT];
+ PhasedVar* availableRegs[TYP_COUNT];
-#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
+#ifdef FEATURE_MASKED_HW_INTRINSICS
#define allAvailableRegs (availableIntRegs | availableFloatRegs | availableMaskRegs)
#else
#define allAvailableRegs (availableIntRegs | availableFloatRegs)
@@ -1759,7 +1781,7 @@ class LinearScan : public LinearScanInterface
// PUTARG_REG node. Tracked between the PUTARG_REG and its corresponding
// CALL node and is used to avoid preferring these registers for locals
// which would otherwise force a spill.
- regMaskTP placedArgRegs;
+ AllRegsMask placedArgRegs;
struct PlacedLocal
{
@@ -1808,60 +1830,41 @@ class LinearScan : public LinearScanInterface
// Register status
//-----------------------------------------------------------------------
- regMaskTP m_AvailableRegs;
- regNumber getRegForType(regNumber reg, var_types regType)
- {
-#ifdef TARGET_ARM
- if ((regType == TYP_DOUBLE) && !genIsValidDoubleReg(reg))
- {
- reg = REG_PREV(reg);
- }
-#endif // TARGET_ARM
- return reg;
- }
-
- regMaskTP getRegMask(regNumber reg, var_types regType)
- {
- reg = getRegForType(reg, regType);
- regMaskTP regMask = genRegMask(reg);
-#ifdef TARGET_ARM
- if (regType == TYP_DOUBLE)
- {
- assert(genIsValidDoubleReg(reg));
- regMask |= (regMask << 1);
- }
-#endif // TARGET_ARM
- return regMask;
- }
+ AllRegsMask m_AvailableRegs;
void resetAvailableRegs()
{
- m_AvailableRegs = allAvailableRegs;
- m_RegistersWithConstants = RBM_NONE;
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ m_AvailableRegs = AllRegsMask(availableIntRegs, availableFloatRegs, availableMaskRegs);
+#else
+ m_AvailableRegs = AllRegsMask(allAvailableRegs);
+#endif // HAS_MORE_THAN_64_REGISTERS
+ m_RegistersWithConstants.Clear();
}
- bool isRegAvailable(regNumber reg, var_types regType)
+ bool isRegAvailable(regNumber reg, var_types regType) // only used in asserts
{
- regMaskTP regMask = getRegMask(reg, regType);
- return (m_AvailableRegs & regMask) == regMask;
+ return m_AvailableRegs.IsRegNumPresent(reg, regType);
}
- void setRegsInUse(regMaskTP regMask)
+
+ void setRegsInUse(CONSTREF_AllRegsMask regMask)
{
m_AvailableRegs &= ~regMask;
}
+
void setRegInUse(regNumber reg, var_types regType)
{
- regMaskTP regMask = getRegMask(reg, regType);
- setRegsInUse(regMask);
+ m_AvailableRegs.RemoveRegNum(reg, regType);
}
- void makeRegsAvailable(regMaskTP regMask)
+
+ void makeRegsAvailable(CONSTREF_AllRegsMask regMask)
{
m_AvailableRegs |= regMask;
}
+
void makeRegAvailable(regNumber reg, var_types regType)
{
- regMaskTP regMask = getRegMask(reg, regType);
- makeRegsAvailable(regMask);
+ m_AvailableRegs.AddRegNum(reg, regType);
}
void clearAllNextIntervalRef();
@@ -1872,30 +1875,31 @@ class LinearScan : public LinearScanInterface
void clearSpillCost(regNumber reg, var_types regType);
void updateSpillCost(regNumber reg, Interval* interval);
- FORCEINLINE void updateRegsFreeBusyState(RefPosition& refPosition,
- regMaskTP regsBusy,
- regMaskTP* regsToFree,
- regMaskTP* delayRegsToFree DEBUG_ARG(Interval* interval)
+ FORCEINLINE void updateRegsFreeBusyState(RefPosition& refPosition,
+ regMaskOnlyOne regsBusy,
+ AllRegsMask* regsToFree,
+ AllRegsMask* delayRegsToFree,
+ RegisterType regType DEBUG_ARG(Interval* interval)
DEBUG_ARG(regNumber assignedReg));
- regMaskTP m_RegistersWithConstants;
- void clearConstantReg(regNumber reg, var_types regType)
+ AllRegsMask m_RegistersWithConstants;
+ AllRegsMask fixedRegs;
+
+ void clearConstantReg(regNumber reg, var_types regType)
{
- m_RegistersWithConstants &= ~getRegMask(reg, regType);
+ m_RegistersWithConstants.RemoveRegNum(reg, regType);
}
void setConstantReg(regNumber reg, var_types regType)
{
- m_RegistersWithConstants |= getRegMask(reg, regType);
+ m_RegistersWithConstants.AddRegNum(reg, regType);
}
bool isRegConstant(regNumber reg, var_types regType)
{
- reg = getRegForType(reg, regType);
- regMaskTP regMask = getRegMask(reg, regType);
- return (m_RegistersWithConstants & regMask) == regMask;
+ reg = getRegForType(reg, regType);
+ return m_RegistersWithConstants.IsRegNumPresent(reg, regType);
}
- regMaskTP getMatchingConstants(regMaskTP mask, Interval* currentInterval, RefPosition* refPosition);
+ regMaskOnlyOne getMatchingConstants(regMaskOnlyOne mask, Interval* currentInterval, RefPosition* refPosition);
- regMaskTP fixedRegs;
LsraLocation nextFixedRef[REG_COUNT];
void updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition);
LsraLocation getNextFixedRef(regNumber regNum, var_types regType)
@@ -1924,36 +1928,35 @@ class LinearScan : public LinearScanInterface
}
weight_t spillCost[REG_COUNT];
- regMaskTP regsBusyUntilKill;
- regMaskTP regsInUseThisLocation;
- regMaskTP regsInUseNextLocation;
+ AllRegsMask regsBusyUntilKill;
+ AllRegsMask regsInUseThisLocation;
+ AllRegsMask regsInUseNextLocation;
#ifdef TARGET_ARM64
- regMaskTP consecutiveRegsInUseThisLocation;
+ regMaskFloat consecutiveRegsInUseThisLocation;
#endif
bool isRegBusy(regNumber reg, var_types regType)
{
- regMaskTP regMask = getRegMask(reg, regType);
- return (regsBusyUntilKill & regMask) != RBM_NONE;
+ return regsBusyUntilKill.IsRegNumPresent(reg, regType);
}
void setRegBusyUntilKill(regNumber reg, var_types regType)
{
- regsBusyUntilKill |= getRegMask(reg, regType);
+ regsBusyUntilKill.AddRegNum(reg, regType);
}
void clearRegBusyUntilKill(regNumber reg)
{
- regsBusyUntilKill &= ~genRegMask(reg);
+ regsBusyUntilKill.RemoveRegNumFromMask(reg);
}
bool isRegInUse(regNumber reg, var_types regType)
{
- regMaskTP regMask = getRegMask(reg, regType);
- return (regsInUseThisLocation & regMask) != RBM_NONE;
+ regMaskOnlyOne regMask = getRegMask(reg, regType);
+ return regsInUseThisLocation.IsRegNumInMask(reg ARM_ARG(regType));
}
void resetRegState()
{
resetAvailableRegs();
- regsBusyUntilKill = RBM_NONE;
+ regsBusyUntilKill.Clear();
}
bool conflictingFixedRegReference(regNumber regNum, RefPosition* refPosition);
@@ -2013,13 +2016,13 @@ class LinearScan : public LinearScanInterface
bool isCandidateMultiRegLclVar(GenTreeLclVar* lclNode);
bool checkContainedOrCandidateLclVar(GenTreeLclVar* lclNode);
- RefPosition* BuildUse(GenTree* operand, regMaskTP candidates = RBM_NONE, int multiRegIdx = 0);
+ RefPosition* BuildUse(GenTree* operand, regMaskOnlyOne candidates = RBM_NONE, int multiRegIdx = 0);
void setDelayFree(RefPosition* use);
- int BuildBinaryUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE);
- int BuildCastUses(GenTreeCast* cast, regMaskTP candidates);
+ int BuildBinaryUses(GenTreeOp* node, regMaskOnlyOne candidates = RBM_NONE);
+ int BuildCastUses(GenTreeCast* cast, regMaskOnlyOne candidates);
#ifdef TARGET_XARCH
- int BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, regMaskTP candidates = RBM_NONE);
- inline regMaskTP BuildEvexIncompatibleMask(GenTree* tree);
+ int BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, regMaskOnlyOne candidates = RBM_NONE);
+ inline regMaskFloat BuildEvexIncompatibleMask(GenTree* tree);
#endif // !TARGET_XARCH
int BuildSelect(GenTreeOp* select);
// This is the main entry point for building the RefPositions for a node.
@@ -2031,18 +2034,25 @@ class LinearScan : public LinearScanInterface
bool supportsSpecialPutArg();
int BuildSimple(GenTree* tree);
- int BuildOperandUses(GenTree* node, regMaskTP candidates = RBM_NONE);
+ int BuildOperandUses(GenTree* node, regMaskOnlyOne candidates = RBM_NONE);
void AddDelayFreeUses(RefPosition* refPosition, GenTree* rmwNode);
- int BuildDelayFreeUses(GenTree* node,
- GenTree* rmwNode = nullptr,
- regMaskTP candidates = RBM_NONE,
- RefPosition** useRefPosition = nullptr);
- int BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates = RBM_NONE);
- int BuildAddrUses(GenTree* addr, regMaskTP candidates = RBM_NONE);
+ int BuildDelayFreeUses(GenTree* node,
+ GenTree* rmwNode = nullptr,
+ regMaskOnlyOne candidates = RBM_NONE,
+ RefPosition** useRefPosition = nullptr);
+ int BuildIndirUses(GenTreeIndir* indirTree, regMaskOnlyOne candidates = RBM_NONE);
+ int BuildAddrUses(GenTree* addr, regMaskOnlyOne candidates = RBM_NONE);
void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs);
- RefPosition* BuildDef(GenTree* tree, regMaskTP dstCandidates = RBM_NONE, int multiRegIdx = 0);
- void BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates = RBM_NONE);
- void BuildDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask);
+ RefPosition* BuildDef(GenTree* tree, regMaskOnlyOne dstCandidates = RBM_NONE, int multiRegIdx = 0);
+ void BuildDefs(GenTree* tree, int dstCount, regMaskOnlyOne dstCandidates = RBM_NONE);
+ void BuildCallDefs(GenTree* tree, int dstCount, REF_AllRegsMask dstCandidates);
+ void BuildKills(GenTree* tree, CONSTREF_AllRegsMask killMask);
+#ifdef TARGET_ARMARCH
+ void BuildDefWithKills(GenTree* tree, regMaskOnlyOne dstCandidates, REF_AllRegsMask killMask);
+#else
+ void BuildDefWithKills(GenTree* tree, int dstCount, regMaskOnlyOne dstCandidates, REF_AllRegsMask killMask);
+#endif
+ void BuildCallDefsWithKills(GenTree* tree, int dstCount, REF_AllRegsMask dstCandidates, REF_AllRegsMask killMask);
int BuildReturn(GenTree* tree);
#ifdef TARGET_XARCH
@@ -2114,29 +2124,29 @@ class LinearScan : public LinearScanInterface
#endif // FEATURE_ARG_SPLIT
int BuildLclHeap(GenTree* tree);
-#if defined(TARGET_AMD64)
- regMaskTP rbmAllFloat;
- regMaskTP rbmFltCalleeTrash;
+ // #if defined(TARGET_AMD64)
+ regMaskFloat rbmAllFloat;
+ regMaskFloat rbmFltCalleeTrash;
- FORCEINLINE regMaskTP get_RBM_ALLFLOAT() const
+ FORCEINLINE regMaskFloat get_RBM_ALLFLOAT() const
{
return this->rbmAllFloat;
}
- FORCEINLINE regMaskTP get_RBM_FLT_CALLEE_TRASH() const
+ FORCEINLINE regMaskFloat get_RBM_FLT_CALLEE_TRASH() const
{
return this->rbmFltCalleeTrash;
}
-#endif // TARGET_AMD64
+ // #endif // TARGET_AMD64
#if defined(TARGET_XARCH)
- regMaskTP rbmAllMask;
- regMaskTP rbmMskCalleeTrash;
+ regMaskPredicate rbmAllMask;
+ regMaskPredicate rbmMskCalleeTrash;
- FORCEINLINE regMaskTP get_RBM_ALLMASK() const
+ FORCEINLINE regMaskPredicate get_RBM_ALLMASK() const
{
return this->rbmAllMask;
}
- FORCEINLINE regMaskTP get_RBM_MSK_CALLEE_TRASH() const
+ FORCEINLINE regMaskPredicate get_RBM_MSK_CALLEE_TRASH() const
{
return this->rbmMskCalleeTrash;
}
@@ -2155,9 +2165,9 @@ class LinearScan : public LinearScanInterface
// NOTE: we currently don't need a LinearScan `this` pointer for this definition, and some callers
// don't have one available, so make is static.
//
- static FORCEINLINE regMaskTP calleeSaveRegs(RegisterType rt)
+ static FORCEINLINE regMaskOnlyOne calleeSaveRegs(RegisterType rt)
{
- static const regMaskTP varTypeCalleeSaveRegs[] = {
+ static const regMaskOnlyOne varTypeCalleeSaveRegs[] = {
#define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) csr,
#include "typelist.h"
#undef DEF_TP
@@ -2171,16 +2181,16 @@ class LinearScan : public LinearScanInterface
// Not all of the callee trash values are constant, so don't declare this as a method local static
// doing so results in significantly more complex codegen and we'd rather just initialize this once
// as part of initializing LSRA instead
- regMaskTP varTypeCalleeTrashRegs[TYP_COUNT];
+ regMaskOnlyOne varTypeCalleeTrashRegs[TYP_COUNT];
#endif // TARGET_XARCH
//------------------------------------------------------------------------
// callerSaveRegs: Get the set of caller-save registers of the given RegisterType
//
- FORCEINLINE regMaskTP callerSaveRegs(RegisterType rt) const
+ FORCEINLINE regMaskOnlyOne callerSaveRegs(RegisterType rt) const
{
#if !defined(TARGET_XARCH)
- static const regMaskTP varTypeCalleeTrashRegs[] = {
+ static const regMaskOnlyOne varTypeCalleeTrashRegs[] = {
#define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) ctr,
#include "typelist.h"
#undef DEF_TP
@@ -2207,14 +2217,14 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
class Interval : public Referenceable
{
public:
- Interval(RegisterType registerType, regMaskTP registerPreferences)
- : registerPreferences(registerPreferences)
+ Interval(RegisterType registerType, regMaskOnlyOne registerPreferences)
+ : Referenceable(registerType)
+ , registerPreferences(registerPreferences)
, registerAversion(RBM_NONE)
, relatedInterval(nullptr)
, assignedReg(nullptr)
, varNum(0)
, physReg(REG_COUNT)
- , registerType(registerType)
, isActive(false)
, isLocalVar(false)
, isSplit(false)
@@ -2236,6 +2246,7 @@ class Interval : public Referenceable
#ifdef DEBUG
, intervalIndex(0)
#endif
+
{
}
@@ -2251,10 +2262,10 @@ class Interval : public Referenceable
void setLocalNumber(Compiler* compiler, unsigned lclNum, LinearScan* l);
// Fixed registers for which this Interval has a preference
- regMaskTP registerPreferences;
+ regMaskOnlyOne registerPreferences;
// Registers that should be avoided for this interval
- regMaskTP registerAversion;
+ regMaskOnlyOne registerAversion;
// The relatedInterval is:
// - for any other interval, it is the interval to which this interval
@@ -2271,8 +2282,6 @@ class Interval : public Referenceable
// The register to which it is currently assigned.
regNumber physReg;
- RegisterType registerType;
-
// Is this Interval currently in a register and live?
bool isActive;
@@ -2403,12 +2412,12 @@ class Interval : public Referenceable
// definitions. This method will return the current assigned register if any, or
// the 'registerPreferences' otherwise.
//
- regMaskTP getCurrentPreferences()
+ regMaskOnlyOne getCurrentPreferences()
{
return (assignedReg == nullptr) ? registerPreferences : genRegMask(assignedReg->regNum);
}
- void mergeRegisterPreferences(regMaskTP preferences)
+ void mergeRegisterPreferences(regMaskOnlyOne preferences)
{
// We require registerPreferences to have been initialized.
assert(registerPreferences != RBM_NONE);
@@ -2423,7 +2432,7 @@ class Interval : public Referenceable
return;
}
- regMaskTP commonPreferences = (registerPreferences & preferences);
+ regMaskOnlyOne commonPreferences = (registerPreferences & preferences);
if (commonPreferences != RBM_NONE)
{
registerPreferences = commonPreferences;
@@ -2458,11 +2467,11 @@ class Interval : public Referenceable
// Keep only the callee-save preferences, if not empty.
// Otherwise, take the union of the preferences.
- regMaskTP newPreferences = registerPreferences | preferences;
+ regMaskOnlyOne newPreferences = registerPreferences | preferences;
if (preferCalleeSave)
{
- regMaskTP calleeSaveMask = (LinearScan::calleeSaveRegs(this->registerType) & newPreferences);
+ regMaskOnlyOne calleeSaveMask = (LinearScan::calleeSaveRegs(this->registerType) & newPreferences);
if (calleeSaveMask != RBM_NONE)
{
newPreferences = calleeSaveMask;
@@ -2477,7 +2486,7 @@ class Interval : public Referenceable
// An exception is made in the case where one of the existing or new
// preferences are all callee-save, in which case we "prefer" the callee-save
- void updateRegisterPreferences(regMaskTP preferences)
+ void updateRegisterPreferences(regMaskOnlyOne preferences)
{
// If this interval is preferenced, that interval may have already been assigned a
// register, and we want to include that in the preferences.
@@ -2515,7 +2524,9 @@ class RefPosition
// Prior to the allocation pass, registerAssignment captures the valid registers
// for this RefPosition.
// After the allocation pass, this contains the actual assignment
- regMaskTP registerAssignment;
+ // TODO-future: This should really be a union, where before allocation-pass it has `mask` and
+ // after allocation-pass, it has regNumber directly, to avoid calling assignedReg();
+ regMaskOnlyOne registerAssignment;
RefType refType;
@@ -2686,7 +2697,12 @@ class RefPosition
return REG_NA;
}
- return genRegNumFromMask(registerAssignment);
+ return genRegNumFromMask(registerAssignment MORE_THAN_64_REG_ARG(getRegisterType()));
+ }
+
+ RegisterType getRegisterType()
+ {
+ return referent->registerType;
}
// Returns true if it is a reference on a GenTree node.
@@ -2777,7 +2793,7 @@ class RefPosition
// isFixedRefOfRegMask indicates that the RefPosition has a fixed assignment to the register
// specified by the given mask
- bool isFixedRefOfRegMask(regMaskTP regMask)
+ bool isFixedRefOfRegMask(regMaskOnlyOne regMask)
{
assert(genMaxOneBit(regMask));
return (registerAssignment == regMask);
diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp
index 2192265984d68e..7fbc07adcab4be 100644
--- a/src/coreclr/jit/lsraarm.cpp
+++ b/src/coreclr/jit/lsraarm.cpp
@@ -192,11 +192,9 @@ int LinearScan::BuildShiftLongCarry(GenTree* tree)
int LinearScan::BuildNode(GenTree* tree)
{
assert(!tree->isContained());
- int srcCount;
- int dstCount = 0;
- regMaskTP dstCandidates = RBM_NONE;
- regMaskTP killMask = RBM_NONE;
- bool isLocalDefUse = false;
+ int srcCount;
+ int dstCount = 0;
+ bool isLocalDefUse = false;
// Reset the build-related members of LinearScan.
clearBuildState();
@@ -367,14 +365,15 @@ int LinearScan::BuildNode(GenTree* tree)
break;
case GT_RETURNTRAP:
+ {
// this just turns into a compare of its child with an int
// + a conditional call
srcCount = 1;
assert(dstCount == 0);
BuildUse(tree->gtGetOp1());
- killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
- BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
+ BuildKills(tree, compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC));
break;
+ }
case GT_MUL:
if (tree->gtOverflow())
@@ -422,7 +421,7 @@ int LinearScan::BuildNode(GenTree* tree)
// This kills GC refs in callee save regs
srcCount = 0;
assert(dstCount == 0);
- BuildDefsWithKills(tree, 0, RBM_NONE, RBM_NONE);
+ BuildKills(tree, compiler->AllRegsMask_NONE);
break;
case GT_LONG:
@@ -467,10 +466,11 @@ int LinearScan::BuildNode(GenTree* tree)
break;
case GT_RETURN:
+ {
srcCount = BuildReturn(tree);
- killMask = getKillSetForReturn();
- BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
+ BuildKills(tree, getKillSetForReturn());
break;
+ }
case GT_RETFILT:
assert(dstCount == 0);
@@ -666,8 +666,8 @@ int LinearScan::BuildNode(GenTree* tree)
case GT_BITCAST:
{
assert(dstCount == 1);
- regNumber argReg = tree->GetRegNum();
- regMaskTP argMask = RBM_NONE;
+ regNumber argReg = tree->GetRegNum();
+ regMaskOnlyOne argMask = RBM_NONE;
if (argReg != REG_COUNT)
{
argMask = genRegMask(argReg);
diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp
index 1096d7f11701c5..00f6429d6ecd74 100644
--- a/src/coreclr/jit/lsraarm64.cpp
+++ b/src/coreclr/jit/lsraarm64.cpp
@@ -176,9 +176,9 @@ bool LinearScan::canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition
// From `candidates`, the mask of series of consecutive registers of `registersNeeded` size with just the first-bit
// set.
//
-regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates,
- unsigned int registersNeeded,
- regMaskTP* allConsecutiveCandidates)
+regMaskFloat LinearScan::filterConsecutiveCandidates(regMaskFloat candidates,
+ unsigned int registersNeeded,
+ regMaskFloat* allConsecutiveCandidates)
{
if (BitOperations::PopCount(candidates) < registersNeeded)
{
@@ -187,16 +187,16 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates,
return RBM_NONE;
}
- regMaskTP currAvailableRegs = candidates;
- regMaskTP overallResult = RBM_NONE;
- regMaskTP consecutiveResult = RBM_NONE;
+ regMaskFloat currAvailableRegs = candidates;
+ regMaskFloat overallResult = RBM_NONE;
+ regMaskFloat consecutiveResult = RBM_NONE;
// At this point, for 'n' registers requirement, if Rm, Rm+1, Rm+2, ..., Rm+k-1 are
// available, create the mask only for Rm, Rm+1, ..., Rm+(k-n) to convey that it
// is safe to assign any of those registers, but not beyond that.
#define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \
- regMaskTP selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \
- regMaskTP selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \
+ regMaskFloat selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \
+ regMaskFloat selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \
consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \
overallResult |= availableRegistersMask;
@@ -206,10 +206,10 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates,
{
// From LSB, find the first available register (bit `1`)
regAvailableStartIndex = BitOperations::BitScanForward(static_cast(currAvailableRegs));
- regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1;
+ regMaskFloat startMask = (1ULL << regAvailableStartIndex) - 1;
// Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`.
- regMaskTP maskProcessed = ~(currAvailableRegs | startMask);
+ regMaskFloat maskProcessed = ~(currAvailableRegs | startMask);
// From regAvailableStart, find the first unavailable register (bit `0`).
if (maskProcessed == RBM_NONE)
@@ -225,7 +225,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates,
{
regAvailableEndIndex = BitOperations::BitScanForward(static_cast(maskProcessed));
}
- regMaskTP endMask = (1ULL << regAvailableEndIndex) - 1;
+ regMaskFloat endMask = (1ULL << regAvailableEndIndex) - 1;
// Anything between regAvailableStart and regAvailableEnd is the range of consecutive registers available.
// If they are equal to or greater than our register requirements, then add all of them to the result.
@@ -236,7 +236,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates,
currAvailableRegs &= ~endMask;
} while (currAvailableRegs != RBM_NONE);
- regMaskTP v0_v31_mask = RBM_V0 | RBM_V31;
+ regMaskFloat v0_v31_mask = RBM_V0 | RBM_V31;
if ((candidates & v0_v31_mask) == v0_v31_mask)
{
// Finally, check for round robin case where sequence of last register
@@ -260,14 +260,14 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates,
}
case 3:
{
- regMaskTP v0_v30_v31_mask = RBM_V0 | RBM_V30 | RBM_V31;
+ regMaskFloat v0_v30_v31_mask = RBM_V0 | RBM_V30 | RBM_V31;
if ((candidates & v0_v30_v31_mask) != RBM_NONE)
{
consecutiveResult |= RBM_V30;
overallResult |= v0_v30_v31_mask;
}
- regMaskTP v0_v1_v31_mask = RBM_V0 | RBM_V1 | RBM_V31;
+ regMaskFloat v0_v1_v31_mask = RBM_V0 | RBM_V1 | RBM_V31;
if ((candidates & v0_v1_v31_mask) != RBM_NONE)
{
consecutiveResult |= RBM_V31;
@@ -277,21 +277,21 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates,
}
case 4:
{
- regMaskTP v0_v29_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31;
+ regMaskFloat v0_v29_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31;
if ((candidates & v0_v29_v30_v31_mask) != RBM_NONE)
{
consecutiveResult |= RBM_V29;
overallResult |= v0_v29_v30_v31_mask;
}
- regMaskTP v0_v1_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31;
+ regMaskFloat v0_v1_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31;
if ((candidates & v0_v1_v30_v31_mask) != RBM_NONE)
{
consecutiveResult |= RBM_V30;
overallResult |= v0_v1_v30_v31_mask;
}
- regMaskTP v0_v1_v2_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31;
+ regMaskFloat v0_v1_v2_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31;
if ((candidates & v0_v1_v2_v31_mask) != RBM_NONE)
{
consecutiveResult |= RBM_V31;
@@ -323,23 +323,24 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates,
// Returns:
// Filtered candidates that needs fewer spilling.
//
-regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveCandidates, unsigned int registersNeeded)
+regMaskFloat LinearScan::filterConsecutiveCandidatesForSpill(regMaskFloat consecutiveCandidates,
+ unsigned int registersNeeded)
{
assert(consecutiveCandidates != RBM_NONE);
assert((registersNeeded >= 2) && (registersNeeded <= 4));
- regMaskTP consecutiveResultForBusy = RBM_NONE;
- regMaskTP unprocessedRegs = consecutiveCandidates;
- unsigned regAvailableStartIndex = 0, regAvailableEndIndex = 0;
- int maxSpillRegs = registersNeeded;
- regMaskTP registersNeededMask = (1ULL << registersNeeded) - 1;
+ regMaskFloat consecutiveResultForBusy = RBM_NONE;
+ regMaskFloat unprocessedRegs = consecutiveCandidates;
+ unsigned regAvailableStartIndex = 0, regAvailableEndIndex = 0;
+ int maxSpillRegs = registersNeeded;
+ regMaskFloat registersNeededMask = (1ULL << registersNeeded) - 1;
do
{
// From LSB, find the first available register (bit `1`)
regAvailableStartIndex = BitOperations::BitScanForward(static_cast(unprocessedRegs));
// For the current range, find how many registers are free vs. busy
- regMaskTP maskForCurRange = RBM_NONE;
- bool shouldCheckForRounding = false;
+ regMaskFloat maskForCurRange = RBM_NONE;
+ bool shouldCheckForRounding = false;
switch (registersNeeded)
{
case 2:
@@ -363,7 +364,7 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC
}
maskForCurRange |= (registersNeededMask << regAvailableStartIndex);
- maskForCurRange &= m_AvailableRegs;
+ maskForCurRange &= m_AvailableRegs.floatRegs(compiler);
if (maskForCurRange != RBM_NONE)
{
@@ -413,13 +414,13 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC
// allCandidates = 0x1C080D0F00000000, the consecutive register mask returned
// will be 0x400000300000000.
//
-regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates,
- RefPosition* refPosition,
- regMaskTP* busyCandidates)
+regMaskFloat LinearScan::getConsecutiveCandidates(regMaskFloat allCandidates,
+ RefPosition* refPosition,
+ regMaskFloat* busyCandidates)
{
assert(compiler->info.compNeedsConsecutiveRegisters);
assert(refPosition->isFirstRefPositionOfConsecutiveRegisters());
- regMaskTP freeCandidates = allCandidates & m_AvailableRegs;
+ regMaskFloat freeCandidates = allCandidates & m_AvailableRegs.floatRegs(compiler);
#ifdef DEBUG
if (getStressLimitRegs() != LSRA_LIMIT_NONE)
@@ -431,12 +432,12 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates,
#endif
*busyCandidates = RBM_NONE;
- regMaskTP overallResult;
+ regMaskFloat overallResult;
unsigned int registersNeeded = refPosition->regCount;
if (freeCandidates != RBM_NONE)
{
- regMaskTP consecutiveResultForFree =
+ regMaskFloat consecutiveResultForFree =
filterConsecutiveCandidates(freeCandidates, registersNeeded, &overallResult);
if (consecutiveResultForFree != RBM_NONE)
@@ -446,10 +447,10 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates,
// register out of the `consecutiveResult` is available for the first RefPosition, then just use
// that. This will avoid unnecessary copies.
- regNumber firstRegNum = REG_NA;
- regNumber prevRegNum = REG_NA;
- int foundCount = 0;
- regMaskTP foundRegMask = RBM_NONE;
+ regNumber firstRegNum = REG_NA;
+ regNumber prevRegNum = REG_NA;
+ int foundCount = 0;
+ regMaskFloat foundRegMask = RBM_NONE;
RefPosition* consecutiveRefPosition = getNextConsecutiveRefPosition(refPosition);
assert(consecutiveRefPosition != nullptr);
@@ -488,7 +489,7 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates,
if (foundCount != 0)
{
assert(firstRegNum != REG_NA);
- regMaskTP remainingRegsMask = ((1ULL << (registersNeeded - foundCount)) - 1) << (firstRegNum - 1);
+ regMaskFloat remainingRegsMask = ((1ULL << (registersNeeded - foundCount)) - 1) << (firstRegNum - 1);
if ((overallResult & remainingRegsMask) != RBM_NONE)
{
@@ -528,33 +529,33 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates,
// try_FAR_NEXT_REF(), etc. here which would complicate things. Instead, we just go with option# 1 and select
// registers based on fewer number of registers that has to be spilled.
//
- regMaskTP overallResultForBusy;
- regMaskTP consecutiveResultForBusy =
+ regMaskFloat overallResultForBusy;
+ regMaskFloat consecutiveResultForBusy =
filterConsecutiveCandidates(allCandidates, registersNeeded, &overallResultForBusy);
*busyCandidates = consecutiveResultForBusy;
// Check if we can further check better registers amoung consecutiveResultForBusy.
- if ((m_AvailableRegs & overallResultForBusy) != RBM_NONE)
+ if ((m_AvailableRegs.floatRegs(compiler) & overallResultForBusy) != RBM_NONE)
{
// `overallResultForBusy` contains the mask of entire series that can be the consecutive candidates.
// If there is an overlap of that with free registers, then try to find a series that will need least
// registers spilling as mentioned in #1 above.
- regMaskTP optimalConsecutiveResultForBusy =
+ regMaskFloat optimalConsecutiveResultForBusy =
filterConsecutiveCandidatesForSpill(consecutiveResultForBusy, registersNeeded);
if (optimalConsecutiveResultForBusy != RBM_NONE)
{
*busyCandidates = optimalConsecutiveResultForBusy;
}
- else if ((m_AvailableRegs & consecutiveResultForBusy) != RBM_NONE)
+ else if ((m_AvailableRegs.floatRegs(compiler) & consecutiveResultForBusy) != RBM_NONE)
{
// We did not find free consecutive candidates, however we found some registers among the
// `allCandidates` that are mix of free and busy. Since `busyCandidates` just has bit set for first
// register of such series, return the mask that starts with free register, if possible. The busy
// registers will be spilled during assignment of subsequent RefPosition.
- *busyCandidates = (m_AvailableRegs & consecutiveResultForBusy);
+ *busyCandidates = (m_AvailableRegs.floatRegs(compiler) & consecutiveResultForBusy);
}
}
@@ -581,10 +582,9 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates,
int LinearScan::BuildNode(GenTree* tree)
{
assert(!tree->isContained());
- int srcCount;
- int dstCount = 0;
- regMaskTP killMask = RBM_NONE;
- bool isLocalDefUse = false;
+ int srcCount;
+ int dstCount = 0;
+ bool isLocalDefUse = false;
// Reset the build-related members of LinearScan.
clearBuildState();
@@ -663,17 +663,18 @@ int LinearScan::BuildNode(GenTree* tree)
break;
case GT_PROF_HOOK:
+ {
srcCount = 0;
assert(dstCount == 0);
- killMask = getKillSetForProfilerHook();
- BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
+ BuildKills(tree, getKillSetForProfilerHook());
break;
+ }
case GT_START_PREEMPTGC:
// This kills GC refs in callee save regs
srcCount = 0;
assert(dstCount == 0);
- BuildDefsWithKills(tree, 0, RBM_NONE, RBM_NONE);
+ BuildKills(tree, compiler->AllRegsMask_NONE);
break;
case GT_CNS_DBL:
@@ -736,10 +737,11 @@ int LinearScan::BuildNode(GenTree* tree)
break;
case GT_RETURN:
+ {
srcCount = BuildReturn(tree);
- killMask = getKillSetForReturn();
- BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
+ BuildKills(tree, getKillSetForReturn());
break;
+ }
case GT_RETFILT:
assert(dstCount == 0);
@@ -823,14 +825,15 @@ int LinearScan::BuildNode(GenTree* tree)
break;
case GT_RETURNTRAP:
+ {
// this just turns into a compare of its child with an int
// + a conditional call
BuildUse(tree->gtGetOp1());
srcCount = 1;
assert(dstCount == 0);
- killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
- BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
+ BuildKills(tree, compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC));
break;
+ }
case GT_MOD:
case GT_UMOD:
@@ -1329,9 +1332,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
const HWIntrinsic intrin(intrinsicTree);
- int srcCount = 0;
- int dstCount = 0;
- regMaskTP dstCandidates = RBM_NONE;
+ int srcCount = 0;
+ int dstCount = 0;
if (HWIntrinsicInfo::IsMultiReg(intrin.id))
{
@@ -1547,7 +1549,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
}
else if (HWIntrinsicInfo::IsMaskedOperation(intrin.id))
{
- regMaskTP predMask = HWIntrinsicInfo::IsLowMaskedOperation(intrin.id) ? RBM_LOWMASK : RBM_ALLMASK;
+ regMaskPredicate predMask = HWIntrinsicInfo::IsLowMaskedOperation(intrin.id) ? RBM_LOWMASK : RBM_ALLMASK;
srcCount += BuildOperandUses(intrin.op1, predMask);
}
else if (intrinsicTree->OperIsMemoryLoadOrStore())
@@ -1804,11 +1806,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
if ((dstCount == 1) || (dstCount == 2))
{
- BuildDef(intrinsicTree, dstCandidates);
+ BuildDef(intrinsicTree);
if (dstCount == 2)
{
- BuildDef(intrinsicTree, dstCandidates, 1);
+ BuildDef(intrinsicTree, RBM_NONE, 1);
}
}
else
diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp
index c2b8b74406584e..249d24d424722e 100644
--- a/src/coreclr/jit/lsraarmarch.cpp
+++ b/src/coreclr/jit/lsraarmarch.cpp
@@ -129,7 +129,6 @@ int LinearScan::BuildCall(GenTreeCall* call)
{
bool hasMultiRegRetVal = false;
const ReturnTypeDesc* retTypeDesc = nullptr;
- regMaskTP dstCandidates = RBM_NONE;
int srcCount = 0;
int dstCount = 0;
@@ -148,8 +147,8 @@ int LinearScan::BuildCall(GenTreeCall* call)
}
}
- GenTree* ctrlExpr = call->gtControlExpr;
- regMaskTP ctrlExprCandidates = RBM_NONE;
+ GenTree* ctrlExpr = call->gtControlExpr;
+ regMaskGpr ctrlExprCandidates = RBM_NONE;
if (call->gtCallType == CT_INDIRECT)
{
// either gtControlExpr != null or gtCallAddr != null.
@@ -183,7 +182,7 @@ int LinearScan::BuildCall(GenTreeCall* call)
{
// For R2R and VSD we have stub address in REG_R2R_INDIRECT_PARAM
// and will load call address into the temp register from this register.
- regMaskTP candidates = RBM_NONE;
+ regMaskGpr candidates = RBM_NONE;
if (call->IsFastTailCall())
{
candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH;
@@ -205,14 +204,15 @@ int LinearScan::BuildCall(GenTreeCall* call)
// the target. We do not handle these constraints on the same
// refposition too well so we help ourselves a bit here by forcing the
// null check with LR.
- regMaskTP candidates = call->IsFastTailCall() ? RBM_LR : RBM_NONE;
+ regMaskGpr candidates = call->IsFastTailCall() ? RBM_LR : RBM_NONE;
buildInternalIntRegisterDefForNode(call, candidates);
}
#endif // TARGET_ARM
- RegisterType registerType = call->TypeGet();
-
// Set destination candidates for return value of the call.
+ AllRegsMask dstReturnCandidates;
+ regMaskOnlyOne dstCandidates = RBM_NONE;
+ RegisterType registerType = call->TypeGet();
#ifdef TARGET_ARM
if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
@@ -226,7 +226,8 @@ int LinearScan::BuildCall(GenTreeCall* call)
if (hasMultiRegRetVal)
{
assert(retTypeDesc != nullptr);
- dstCandidates = retTypeDesc->GetABIReturnRegs(call->GetUnmanagedCallConv());
+ dstReturnCandidates = retTypeDesc->GetABIReturnRegs(call->GetUnmanagedCallConv());
+ assert((int)dstReturnCandidates.Count() == dstCount);
}
else if (varTypeUsesFloatArgReg(registerType))
{
@@ -390,14 +391,30 @@ int LinearScan::BuildCall(GenTreeCall* call)
buildInternalRegisterUses();
// Now generate defs and kills.
- regMaskTP killMask = getKillSetForCall(call);
- BuildDefsWithKills(call, dstCount, dstCandidates, killMask);
+ AllRegsMask killMask = getKillSetForCall(call);
+ if (dstCount > 0)
+ {
+ if (hasMultiRegRetVal)
+ {
+ assert(dstReturnCandidates.Count() > 0);
+ BuildCallDefsWithKills(call, dstCount, dstReturnCandidates, killMask);
+ }
+ else
+ {
+ assert(dstCount == 1);
+ BuildDefWithKills(call, dstCandidates, killMask);
+ }
+ }
+ else
+ {
+ BuildKills(call, killMask);
+ }
#ifdef SWIFT_SUPPORT
if (call->HasSwiftErrorHandling())
{
// Tree is a Swift call with error handling; error register should have been killed
- assert((killMask & RBM_SWIFT_ERROR) != 0);
+ assert((killMask.gprRegs() & RBM_SWIFT_ERROR) != 0);
// After a Swift call that might throw returns, we expect the error register to be consumed
// by a GT_SWIFT_ERROR node. However, we want to ensure the error register won't be trashed
@@ -417,11 +434,36 @@ int LinearScan::BuildCall(GenTreeCall* call)
#endif // SWIFT_SUPPORT
// No args are placed in registers anymore.
- placedArgRegs = RBM_NONE;
+ placedArgRegs.Clear();
numPlacedArgLocals = 0;
return srcCount;
}
+//------------------------------------------------------------------------
+// BuildDefWithKills: Build one RefTypeDef RefPositions for the given node,
+// as well as kills as specified by the given mask.
+//
+// Arguments:
+// tree - The call node that defines a register
+// dstCandidates - The candidate registers for the definition
+// killMask - The mask of registers killed by this node
+//
+// Notes:
+// Adds the RefInfo for the definitions to the defList.
+// The def and kill functionality is folded into a single method so that the
+// save and restores of upper vector registers can be bracketed around the def.
+//
+void LinearScan::BuildDefWithKills(GenTree* tree, regMaskOnlyOne dstCandidates, REF_AllRegsMask killMask)
+{
+ assert(!tree->AsCall()->HasMultiRegRetVal());
+ assert((int)genCountBits(dstCandidates) == 1);
+ assert(compiler->IsOnlyOneRegMask(dstCandidates));
+
+ // Build the kill RefPositions
+ BuildKills(tree, killMask);
+ BuildDef(tree, dstCandidates);
+}
+
//------------------------------------------------------------------------
// BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node
//
@@ -528,14 +570,15 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode)
// Registers for split argument corresponds to source
int dstCount = argNode->gtNumRegs;
- regNumber argReg = argNode->GetRegNum();
- regMaskTP argMask = RBM_NONE;
+ regNumber argReg = argNode->GetRegNum();
+ regMaskGpr argMask = RBM_NONE;
for (unsigned i = 0; i < argNode->gtNumRegs; i++)
{
regNumber thisArgReg = (regNumber)((unsigned)argReg + i);
argMask |= genRegMask(thisArgReg);
argNode->SetRegNumByIdx(thisArgReg, i);
}
+ assert(compiler->IsGprRegMask(argMask));
if (src->OperGet() == GT_FIELD_LIST)
{
@@ -569,7 +612,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode)
// go into registers.
for (unsigned regIndex = 0; regIndex < currentRegCount; regIndex++)
{
- regMaskTP sourceMask = RBM_NONE;
+ regMaskOnlyOne sourceMask = RBM_NONE;
if (sourceRegCount < argNode->gtNumRegs)
{
sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount));
@@ -627,9 +670,9 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
GenTree* srcAddrOrFill = nullptr;
- regMaskTP dstAddrRegMask = RBM_NONE;
- regMaskTP srcRegMask = RBM_NONE;
- regMaskTP sizeRegMask = RBM_NONE;
+ regMaskGpr dstAddrRegMask = RBM_NONE;
+ regMaskGpr srcRegMask = RBM_NONE;
+ regMaskGpr sizeRegMask = RBM_NONE;
if (blkNode->OperIsInitBlkOp())
{
@@ -686,7 +729,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
// We don't need to materialize the struct size but we still need
// a temporary register to perform the sequence of loads and stores.
// We can't use the special Write Barrier registers, so exclude them from the mask
- regMaskTP internalIntCandidates =
+ regMaskGpr internalIntCandidates =
allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF);
buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
@@ -821,6 +864,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
if (!dstAddr->isContained())
{
+ assert(compiler->IsGprRegMask(dstAddrRegMask));
+
useCount++;
BuildUse(dstAddr, dstAddrRegMask);
}
@@ -833,6 +878,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
{
if (!srcAddrOrFill->isContained())
{
+ assert(compiler->IsGprRegMask(srcRegMask));
+
useCount++;
BuildUse(srcAddrOrFill, srcRegMask);
}
@@ -842,9 +889,10 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
}
}
+ assert(compiler->IsGprRegMask(sizeRegMask));
+
buildInternalRegisterUses();
- regMaskTP killMask = getKillSetForBlockStore(blkNode);
- BuildDefsWithKills(blkNode, 0, RBM_NONE, killMask);
+ BuildKills(blkNode, getKillSetForBlockStore(blkNode));
return useCount;
}
diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp
index 15726e201d8a5e..585badc93eea8e 100644
--- a/src/coreclr/jit/lsrabuild.cpp
+++ b/src/coreclr/jit/lsrabuild.cpp
@@ -250,15 +250,15 @@ void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* de
{
assert(!interval->isLocalVar);
- RefPosition* useRefPosition = defRefPosition->nextRefPosition;
- regMaskTP defRegAssignment = defRefPosition->registerAssignment;
- regMaskTP useRegAssignment = useRefPosition->registerAssignment;
- RegRecord* defRegRecord = nullptr;
- RegRecord* useRegRecord = nullptr;
- regNumber defReg = REG_NA;
- regNumber useReg = REG_NA;
- bool defRegConflict = ((defRegAssignment & useRegAssignment) == RBM_NONE);
- bool useRegConflict = defRegConflict;
+ RefPosition* useRefPosition = defRefPosition->nextRefPosition;
+ regMaskOnlyOne defRegAssignment = defRefPosition->registerAssignment;
+ regMaskOnlyOne useRegAssignment = useRefPosition->registerAssignment;
+ RegRecord* defRegRecord = nullptr;
+ RegRecord* useRegRecord = nullptr;
+ regNumber defReg = REG_NA;
+ regNumber useReg = REG_NA;
+ bool defRegConflict = ((defRegAssignment & useRegAssignment) == RBM_NONE);
+ bool useRegConflict = defRegConflict;
// If the useRefPosition is a "delayRegFree", we can't change the registerAssignment
// on it, or we will fail to ensure that the fixedReg is busy at the time the target
@@ -352,7 +352,7 @@ void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* de
RegisterType regType = interval->registerType;
assert((getRegisterType(interval, defRefPosition) == regType) &&
(getRegisterType(interval, useRefPosition) == regType));
- regMaskTP candidates = allRegs(regType);
+ regMaskOnlyOne candidates = allRegs(regType);
defRefPosition->registerAssignment = candidates;
defRefPosition->isFixedRegRef = false;
return;
@@ -423,8 +423,8 @@ void LinearScan::checkConflictingDefUse(RefPosition* useRP)
// All defs must have a valid treeNode, but we check it below to be conservative.
assert(defRP->treeNode != nullptr);
- regMaskTP prevAssignment = defRP->registerAssignment;
- regMaskTP newAssignment = (prevAssignment & useRP->registerAssignment);
+ regMaskOnlyOne prevAssignment = defRP->registerAssignment;
+ regMaskOnlyOne newAssignment = (prevAssignment & useRP->registerAssignment);
if (newAssignment != RBM_NONE)
{
if (!isSingleRegister(newAssignment) || !theInterval->hasInterferingUses)
@@ -519,8 +519,10 @@ void LinearScan::associateRefPosWithInterval(RefPosition* rp)
// a new RefPosition
//
RefPosition* LinearScan::newRefPosition(
- regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask)
+ regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskOnlyOne mask)
{
+ assert(compiler->IsOnlyOneRegMask(mask));
+
RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType);
RegRecord* regRecord = getRegisterRecord(reg);
@@ -554,13 +556,15 @@ RefPosition* LinearScan::newRefPosition(
// Return Value:
// a new RefPosition
//
-RefPosition* LinearScan::newRefPosition(Interval* theInterval,
- LsraLocation theLocation,
- RefType theRefType,
- GenTree* theTreeNode,
- regMaskTP mask,
- unsigned multiRegIdx /* = 0 */)
+RefPosition* LinearScan::newRefPosition(Interval* theInterval,
+ LsraLocation theLocation,
+ RefType theRefType,
+ GenTree* theTreeNode,
+ regMaskOnlyOne mask,
+ unsigned multiRegIdx /* = 0 */)
{
+ assert(compiler->IsOnlyOneRegMask(mask));
+
if (theInterval != nullptr)
{
if (mask == RBM_NONE)
@@ -600,7 +604,7 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval,
if (insertFixedRef)
{
- regNumber physicalReg = genRegNumFromMask(mask);
+ regNumber physicalReg = genRegNumFromMask(mask MORE_THAN_64_REG_ARG(theInterval->registerType));
RefPosition* pos = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask);
assert(theInterval != nullptr);
#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
@@ -697,7 +701,10 @@ bool LinearScan::isContainableMemoryOp(GenTree* node)
// refType - the type of refposition
// isLastUse - true IFF this is a last use of the register
//
-void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse)
+void LinearScan::addRefsForPhysRegMask(CONSTREF_AllRegsMask mask,
+ LsraLocation currentLoc,
+ RefType refType,
+ bool isLastUse)
{
assert(refType == RefTypeKill);
@@ -712,9 +719,10 @@ void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc,
// modified until codegen, which is too late.
compiler->codeGen->regSet.rsSetRegsModified(mask DEBUGARG(true));
- for (regMaskTP candidates = mask; candidates != RBM_NONE;)
+ AllRegsMask maskForRefPositions = mask;
+ while (!maskForRefPositions.IsEmpty())
{
- regNumber reg = genFirstRegNumFromMaskAndToggle(candidates);
+ regNumber reg = genFirstRegNumFromMaskAndToggle(maskForRefPositions);
// This assumes that these are all "special" RefTypes that
// don't need to be recorded on the tree (hence treeNode is nullptr)
RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr,
@@ -738,12 +746,10 @@ void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc,
//
// Return Value: a register mask of the registers killed
//
-regMaskTP LinearScan::getKillSetForStoreInd(GenTreeStoreInd* tree)
+CONSTREF_AllRegsMask LinearScan::getKillSetForStoreInd(GenTreeStoreInd* tree)
{
assert(tree->OperIs(GT_STOREIND));
- regMaskTP killMask = RBM_NONE;
-
GCInfo::WriteBarrierForm writeBarrierForm = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tree);
if (writeBarrierForm != GCInfo::WBF_NoBarrier)
{
@@ -753,16 +759,16 @@ regMaskTP LinearScan::getKillSetForStoreInd(GenTreeStoreInd* tree)
// the allocated register for the `data` operand. However, all the (x86) optimized
// helpers have the same kill set: EDX. And note that currently, only x86 can return
// `true` for genUseOptimizedWriteBarriers().
- killMask = RBM_CALLEE_TRASH_NOGC;
+ return compiler->AllRegsMask_CALLEE_TRASH_NOGC;
}
else
{
// Figure out which helper we're going to use, and then get the kill set for that helper.
CorInfoHelpFunc helper = compiler->codeGen->genWriteBarrierHelperForWriteBarrierForm(writeBarrierForm);
- killMask = compiler->compHelperCallKillSet(helper);
+ return compiler->compHelperCallKillSet(helper);
}
}
- return killMask;
+ return compiler->AllRegsMask_NONE;
}
//------------------------------------------------------------------------
@@ -773,9 +779,9 @@ regMaskTP LinearScan::getKillSetForStoreInd(GenTreeStoreInd* tree)
//
// Return Value: a register mask of the registers killed
//
-regMaskTP LinearScan::getKillSetForShiftRotate(GenTreeOp* shiftNode)
+regMaskGpr LinearScan::getKillSetForShiftRotate(GenTreeOp* shiftNode)
{
- regMaskTP killMask = RBM_NONE;
+ regMaskGpr killMask = RBM_NONE;
#ifdef TARGET_XARCH
assert(shiftNode->OperIsShiftOrRotate());
GenTree* shiftBy = shiftNode->gtGetOp2();
@@ -795,9 +801,9 @@ regMaskTP LinearScan::getKillSetForShiftRotate(GenTreeOp* shiftNode)
//
// Return Value: a register mask of the registers killed
//
-regMaskTP LinearScan::getKillSetForMul(GenTreeOp* mulNode)
+regMaskGpr LinearScan::getKillSetForMul(GenTreeOp* mulNode)
{
- regMaskTP killMask = RBM_NONE;
+ regMaskGpr killMask = RBM_NONE;
#ifdef TARGET_XARCH
assert(mulNode->OperIsMul());
if (!mulNode->OperIs(GT_MUL) || (((mulNode->gtFlags & GTF_UNSIGNED) != 0) && mulNode->gtOverflowEx()))
@@ -816,9 +822,9 @@ regMaskTP LinearScan::getKillSetForMul(GenTreeOp* mulNode)
//
// Return Value: a register mask of the registers killed
//
-regMaskTP LinearScan::getKillSetForModDiv(GenTreeOp* node)
+regMaskGpr LinearScan::getKillSetForModDiv(GenTreeOp* node)
{
- regMaskTP killMask = RBM_NONE;
+ regMaskGpr killMask = RBM_NONE;
#ifdef TARGET_XARCH
assert(node->OperIs(GT_MOD, GT_DIV, GT_UMOD, GT_UDIV));
if (varTypeUsesIntReg(node->TypeGet()))
@@ -838,9 +844,10 @@ regMaskTP LinearScan::getKillSetForModDiv(GenTreeOp* node)
//
// Return Value: a register mask of the registers killed
//
-regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call)
+AllRegsMask LinearScan::getKillSetForCall(GenTreeCall* call)
{
- regMaskTP killMask = RBM_CALLEE_TRASH;
+ AllRegsMask killMask = compiler->AllRegsMask_CALLEE_TRASH;
+
#ifdef TARGET_X86
if (compiler->compFloatingPointUsed)
{
@@ -863,24 +870,23 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call)
// if there is no FP used, we can ignore the FP kills
if (!compiler->compFloatingPointUsed)
{
-#if defined(TARGET_XARCH)
- killMask &= ~(RBM_FLT_CALLEE_TRASH | RBM_MSK_CALLEE_TRASH);
-#else
- killMask &= ~RBM_FLT_CALLEE_TRASH;
-#endif // TARGET_XARCH
+ killMask.RemoveRegTypeFromMask(RBM_FLT_CALLEE_TRASH, TYP_FLOAT);
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ killMask.RemoveRegTypeFromMask(RBM_MSK_CALLEE_TRASH, TYP_MASK);
+#endif // FEATURE_MASKED_HW_INTRINSICS
}
#ifdef TARGET_ARM
if (call->IsVirtualStub())
{
- killMask |= compiler->virtualStubParamInfo->GetRegMask();
+ killMask.AddGprRegMask(compiler->virtualStubParamInfo->GetRegMask());
}
#else // !TARGET_ARM
// Verify that the special virtual stub call registers are in the kill mask.
// We don't just add them unconditionally to the killMask because for most architectures
// they are already in the RBM_CALLEE_TRASH set,
// and we don't want to introduce extra checks and calls in this hot function.
- assert(!call->IsVirtualStub() ||
- ((killMask & compiler->virtualStubParamInfo->GetRegMask()) == compiler->virtualStubParamInfo->GetRegMask()));
+ assert(!call->IsVirtualStub() || ((killMask.gprRegs() & compiler->virtualStubParamInfo->GetRegMask()) ==
+ compiler->virtualStubParamInfo->GetRegMask()));
#endif // !TARGET_ARM
#ifdef SWIFT_SUPPORT
@@ -888,7 +894,7 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call)
// so don't use the register post-call until it is consumed by SwiftError.
if (call->HasSwiftErrorHandling())
{
- killMask |= RBM_SWIFT_ERROR;
+ killMask.AddGprRegMask(RBM_SWIFT_ERROR);
}
#endif // SWIFT_SUPPORT
@@ -903,10 +909,10 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call)
//
// Return Value: a register mask of the registers killed
//
-regMaskTP LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode)
+AllRegsMask LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode)
{
assert(blkNode->OperIsStoreBlk());
- regMaskTP killMask = RBM_NONE;
+ AllRegsMask killMask;
bool isCopyBlk = varTypeIsStruct(blkNode->Data());
switch (blkNode->gtBlkOpKind)
@@ -924,7 +930,7 @@ regMaskTP LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode)
if (isCopyBlk)
{
// rep movs kills RCX, RDI and RSI
- killMask = RBM_RCX | RBM_RDI | RBM_RSI;
+ killMask.AddGprRegMask(RBM_RCX | RBM_RDI | RBM_RSI);
}
else
{
@@ -932,7 +938,7 @@ regMaskTP LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode)
// (Note that the Data() node, if not constant, will be assigned to
// RCX, but it's find that this kills it, as the value is not available
// after this node in any case.)
- killMask = RBM_RDI | RBM_RCX;
+ killMask.AddGprRegMask(RBM_RDI | RBM_RCX);
}
break;
#endif
@@ -949,19 +955,16 @@ regMaskTP LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode)
#ifdef FEATURE_HW_INTRINSICS
//------------------------------------------------------------------------
-// getKillSetForHWIntrinsic: Determine the liveness kill set for a GT_STOREIND node.
-// If the GT_STOREIND will generate a write barrier, determine the specific kill
-// set required by the case-specific, platform-specific write barrier. If no
-// write barrier is required, the kill set will be RBM_NONE.
+// getKillSetForHWIntrinsic: Determine the liveness kill set for a GT_HWINTRINSIC node.
//
// Arguments:
-// tree - the GT_STOREIND node
+// tree - the GT_HWINTRINSIC node
//
// Return Value: a register mask of the registers killed
//
-regMaskTP LinearScan::getKillSetForHWIntrinsic(GenTreeHWIntrinsic* node)
+regMaskGpr LinearScan::getKillSetForHWIntrinsic(GenTreeHWIntrinsic* node)
{
- regMaskTP killMask = RBM_NONE;
+ regMaskGpr killMask = RBM_NONE;
#ifdef TARGET_XARCH
switch (node->GetHWIntrinsicId())
{
@@ -993,10 +996,10 @@ regMaskTP LinearScan::getKillSetForHWIntrinsic(GenTreeHWIntrinsic* node)
//
// Return Value: a register mask of the registers killed
//
-regMaskTP LinearScan::getKillSetForReturn()
+CONSTREF_AllRegsMask LinearScan::getKillSetForReturn()
{
return compiler->compIsProfilerHookNeeded() ? compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_LEAVE)
- : RBM_NONE;
+ : compiler->AllRegsMask_NONE;
}
//------------------------------------------------------------------------
@@ -1007,10 +1010,10 @@ regMaskTP LinearScan::getKillSetForReturn()
//
// Return Value: a register mask of the registers killed
//
-regMaskTP LinearScan::getKillSetForProfilerHook()
+CONSTREF_AllRegsMask LinearScan::getKillSetForProfilerHook()
{
return compiler->compIsProfilerHookNeeded() ? compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_TAILCALL)
- : RBM_NONE;
+ : compiler->AllRegsMask_NONE;
}
#ifdef DEBUG
@@ -1022,9 +1025,9 @@ regMaskTP LinearScan::getKillSetForProfilerHook()
//
// Return Value: a register mask of the registers killed
//
-regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
+AllRegsMask LinearScan::getKillSetForNode(GenTree* tree)
{
- regMaskTP killMask = RBM_NONE;
+ AllRegsMask killMask;
switch (tree->OperGet())
{
case GT_LSH:
@@ -1036,7 +1039,7 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
case GT_LSH_HI:
case GT_RSH_LO:
#endif
- killMask = getKillSetForShiftRotate(tree->AsOp());
+ killMask = GprRegsMask(getKillSetForShiftRotate(tree->AsOp()));
break;
case GT_MUL:
@@ -1044,14 +1047,14 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
#if !defined(TARGET_64BIT) || defined(TARGET_ARM64)
case GT_MUL_LONG:
#endif
- killMask = getKillSetForMul(tree->AsOp());
+ killMask = GprRegsMask(getKillSetForMul(tree->AsOp()));
break;
case GT_MOD:
case GT_DIV:
case GT_UMOD:
case GT_UDIV:
- killMask = getKillSetForModDiv(tree->AsOp());
+ killMask = GprRegsMask(getKillSetForModDiv(tree->AsOp()));
break;
case GT_STORE_BLK:
@@ -1086,7 +1089,7 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
#ifdef FEATURE_HW_INTRINSICS
case GT_HWINTRINSIC:
- killMask = getKillSetForHWIntrinsic(tree->AsHWIntrinsic());
+ killMask = GprRegsMask(getKillSetForHWIntrinsic(tree->AsHWIntrinsic()));
break;
#endif // FEATURE_HW_INTRINSICS
@@ -1121,11 +1124,11 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
// This method can add kills even if killMask is RBM_NONE, if this tree is one of the
// special cases that signals that we can't permit callee save registers to hold GC refs.
-bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc, regMaskTP killMask)
+bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc, CONSTREF_AllRegsMask killMask)
{
bool insertedKills = false;
- if (killMask != RBM_NONE)
+ if (!killMask.IsEmpty())
{
addRefsForPhysRegMask(killMask, currentLoc, RefTypeKill, true);
@@ -1159,8 +1162,10 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo
{
continue;
}
- Interval* interval = getIntervalForLocalVar(varIndex);
- const bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH));
+ Interval* interval = getIntervalForLocalVar(varIndex);
+ regMaskOnlyOne regsKillMask = killMask.GetRegMaskForType(interval->registerType);
+ const bool isCallKill =
+ (killMask.gprRegs() == RBM_INT_CALLEE_TRASH) || (killMask == compiler->AllRegsMask_CALLEE_TRASH);
if (isCallKill)
{
@@ -1173,7 +1178,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo
// See the "heuristics for writeThru intervals" in 'buildIntervals()'.
if (!interval->isWriteThru || !isCallKill)
{
- regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask);
+ regMaskOnlyOne newPreferences = allRegs(interval->registerType) & (~regsKillMask);
if (newPreferences != RBM_NONE)
{
@@ -1181,7 +1186,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo
{
// Update the register aversion as long as this is not write-thru vars for
// reason mentioned above.
- interval->registerAversion |= killMask;
+ interval->registerAversion |= regsKillMask;
}
interval->updateRegisterPreferences(newPreferences);
}
@@ -1324,8 +1329,10 @@ bool LinearScan::checkContainedOrCandidateLclVar(GenTreeLclVar* lclNode)
// currentLoc - Location of the temp Def position
// regMask - register mask of candidates for temp
//
-RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP regMask)
+RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskOnlyOne regMask)
{
+ assert(compiler->IsOnlyOneRegMask(regMask));
+
Interval* current = newInterval(regType);
current->isInternal = true;
RefPosition* newDef = newRefPosition(current, currentLoc, RefTypeDef, tree, regMask, 0);
@@ -1344,10 +1351,10 @@ RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regTy
// Returns:
// The def RefPosition created for this internal temp.
//
-RefPosition* LinearScan::buildInternalIntRegisterDefForNode(GenTree* tree, regMaskTP internalCands)
+RefPosition* LinearScan::buildInternalIntRegisterDefForNode(GenTree* tree, regMaskGpr internalCands)
{
// The candidate set should contain only integer registers.
- assert((internalCands & ~availableIntRegs) == RBM_NONE);
+ assert(compiler->IsGprRegMask(internalCands));
RefPosition* defRefPosition = defineNewInternalTemp(tree, IntRegisterType, internalCands);
return defRefPosition;
@@ -1363,24 +1370,24 @@ RefPosition* LinearScan::buildInternalIntRegisterDefForNode(GenTree* tree, regMa
// Returns:
// The def RefPosition created for this internal temp.
//
-RefPosition* LinearScan::buildInternalFloatRegisterDefForNode(GenTree* tree, regMaskTP internalCands)
+RefPosition* LinearScan::buildInternalFloatRegisterDefForNode(GenTree* tree, regMaskFloat internalCands)
{
// The candidate set should contain only float registers.
- assert((internalCands & ~availableFloatRegs) == RBM_NONE);
+ assert(compiler->IsFloatRegMask(internalCands));
RefPosition* defRefPosition = defineNewInternalTemp(tree, FloatRegisterType, internalCands);
return defRefPosition;
}
-#if defined(FEATURE_SIMD) && defined(TARGET_XARCH)
-RefPosition* LinearScan::buildInternalMaskRegisterDefForNode(GenTree* tree, regMaskTP internalCands)
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+RefPosition* LinearScan::buildInternalMaskRegisterDefForNode(GenTree* tree, regMaskPredicate internalCands)
{
// The candidate set should contain only float registers.
- assert((internalCands & ~availableMaskRegs) == RBM_NONE);
+ assert(compiler->IsPredicateRegMask(internalCands));
return defineNewInternalTemp(tree, MaskRegisterType, internalCands);
}
-#endif
+#endif // FEATURE_MASKED_HW_INTRINSICS
//------------------------------------------------------------------------
// buildInternalRegisterUses - adds use positions for internal
@@ -1403,9 +1410,9 @@ void LinearScan::buildInternalRegisterUses()
assert(internalCount <= MaxInternalCount);
for (int i = 0; i < internalCount; i++)
{
- RefPosition* def = internalDefs[i];
- regMaskTP mask = def->registerAssignment;
- RefPosition* use = newRefPosition(def->getInterval(), currentLoc, RefTypeUse, def->treeNode, mask, 0);
+ RefPosition* def = internalDefs[i];
+ regMaskOnlyOne mask = def->registerAssignment;
+ RefPosition* use = newRefPosition(def->getInterval(), currentLoc, RefTypeUse, def->treeNode, mask, 0);
if (setInternalRegsDelayFree)
{
use->delayRegFree = true;
@@ -1466,14 +1473,17 @@ Interval* LinearScan::getUpperVectorInterval(unsigned varIndex)
// currentLoc - The location of the current node
// fpCalleeKillSet - The set of registers killed by this node.
//
-// Notes: This is called by BuildDefsWithKills for any node that kills registers in the
+// Notes: This is called by BuildCallDefsWithKills for any node that kills registers in the
// RBM_FLT_CALLEE_TRASH set. We actually need to find any calls that kill the upper-half
// of the callee-save vector registers.
// But we will use as a proxy any node that kills floating point registers.
// (Note that some calls are masquerading as other nodes at this point so we can't just check for calls.)
//
-void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, regMaskTP fpCalleeKillSet)
+void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree,
+ LsraLocation currentLoc DEBUG_ARG(regMaskFloat fpCalleeKillSet))
{
+ assert(compiler->IsFloatRegMask(fpCalleeKillSet));
+
if ((tree != nullptr) && tree->IsCall())
{
if (tree->AsCall()->IsNoReturn() || compiler->fgIsThrow(tree))
@@ -1860,10 +1870,10 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc
// The use position of v02 cannot be allocated a reg since it is marked delay-reg free and
// {eax,edx} are getting killed before the def of GT_DIV. For this reason, minRegCount for
// the use position of v02 also needs to take into account the kill set of its consuming node.
- regMaskTP killMask = getKillSetForNode(tree);
- if (killMask != RBM_NONE)
+ AllRegsMask killMask = getKillSetForNode(tree);
+ if (!killMask.IsEmpty())
{
- minRegCountForRef += genCountBits(killMask);
+ minRegCountForRef += killMask.Count();
}
}
else if ((newRefPosition->refType) == RefTypeDef && (newRefPosition->getInterval()->isSpecialPutArg))
@@ -1874,9 +1884,9 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc
newRefPosition->minRegCandidateCount = minRegCountForRef;
if (newRefPosition->IsActualRef() && doReverseCallerCallee())
{
- Interval* interval = newRefPosition->getInterval();
- regMaskTP oldAssignment = newRefPosition->registerAssignment;
- regMaskTP calleeSaveMask = calleeSaveRegs(interval->registerType);
+ Interval* interval = newRefPosition->getInterval();
+ regMaskOnlyOne oldAssignment = newRefPosition->registerAssignment;
+ regMaskOnlyOne calleeSaveMask = calleeSaveRegs(interval->registerType);
#ifdef TARGET_ARM64
if (newRefPosition->isLiveAtConsecutiveRegistersLoc(consecutiveRegistersLocation))
{
@@ -1890,7 +1900,8 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc
#endif // TARGET_ARM64
{
newRefPosition->registerAssignment =
- getConstrainedRegMask(newRefPosition, oldAssignment, calleeSaveMask, minRegCountForRef);
+ getConstrainedRegMask(newRefPosition, interval->registerType, oldAssignment, calleeSaveMask,
+ minRegCountForRef);
}
if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) &&
@@ -2288,8 +2299,8 @@ void LinearScan::buildIntervals()
RegState* floatRegState = &compiler->codeGen->floatRegState;
intRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
floatRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
- regsInUseThisLocation = RBM_NONE;
- regsInUseNextLocation = RBM_NONE;
+ regsInUseThisLocation.Clear();
+ regsInUseNextLocation.Clear();
#ifdef SWIFT_SUPPORT
if (compiler->info.compCallConv == CorInfoCallConvExtension::Swift)
@@ -2346,7 +2357,7 @@ void LinearScan::buildIntervals()
{
Interval* interval = getIntervalForLocalVar(varIndex);
const var_types regType = argDsc->GetRegisterType();
- regMaskTP mask = allRegs(regType);
+ regMaskOnlyOne mask = allRegs(regType);
if (argDsc->lvIsRegArg && !stressInitialParamReg())
{
// Set this interval as currently assigned to that register
@@ -2354,7 +2365,7 @@ void LinearScan::buildIntervals()
assert(inArgReg < REG_COUNT);
mask = genRegMask(inArgReg);
assignPhysReg(inArgReg, interval);
- INDEBUG(registersToDump |= getRegMask(inArgReg, interval->registerType));
+ INDEBUG(registersToDump.AddRegNum(inArgReg, interval->registerType));
}
RefPosition* pos = newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, mask);
pos->setRegOptional(true);
@@ -2428,7 +2439,7 @@ void LinearScan::buildIntervals()
#endif
numPlacedArgLocals = 0;
- placedArgRegs = RBM_NONE;
+ placedArgRegs.Clear();
BasicBlock* predBlock = nullptr;
BasicBlock* prevBlock = nullptr;
@@ -2549,8 +2560,9 @@ void LinearScan::buildIntervals()
// handling clobbers REG_SCRATCH, so kill it here.
if ((block == compiler->fgFirstBB) && compiler->lvaHasAnySwiftStackParamToReassemble())
{
+ AllRegsMask scratchMask = AllRegsMask(genRegMask(REG_SCRATCH));
assert(compiler->fgFirstBBisScratch());
- addRefsForPhysRegMask(genRegMask(REG_SCRATCH), currentLoc + 1, RefTypeKill, true);
+ addRefsForPhysRegMask(scratchMask, currentLoc + 1, RefTypeKill, true);
currentLoc += 2;
}
@@ -2560,13 +2572,14 @@ void LinearScan::buildIntervals()
if (compiler->compShouldPoisonFrame() && (block == compiler->fgFirstBB))
{
assert(compiler->fgFirstBBisScratch());
- regMaskTP killed;
+ AllRegsMask killed;
#if defined(TARGET_XARCH)
// Poisoning uses EAX for small vars and rep stosd that kills edi, ecx and eax for large vars.
- killed = RBM_EDI | RBM_ECX | RBM_EAX;
+ killed.AddGprRegMask(RBM_EDI | RBM_ECX | RBM_EAX);
#else
// Poisoning uses REG_SCRATCH for small vars and memset helper for big vars.
- killed = genRegMask(REG_SCRATCH) | compiler->compHelperCallKillSet(CORINFO_HELP_NATIVE_MEMSET);
+ killed = compiler->compHelperCallKillSet(CORINFO_HELP_NATIVE_MEMSET);
+ killed.AddRegNumInMask(REG_SCRATCH);
#endif
addRefsForPhysRegMask(killed, currentLoc + 1, RefTypeKill, true);
currentLoc += 2;
@@ -2788,12 +2801,12 @@ void LinearScan::buildIntervals()
{
calleeSaveCount = CNT_CALLEE_ENREG;
}
-#if defined(TARGET_XARCH) && defined(FEATURE_SIMD)
+#ifdef FEATURE_MASKED_HW_INTRINSICS
else if (varTypeUsesMaskReg(interval->registerType))
{
calleeSaveCount = CNT_CALLEE_SAVED_MASK;
}
-#endif // TARGET_XARCH && FEATURE_SIMD
+#endif // FEATURE_MASKED_HW_INTRINSICS
else
{
assert(varTypeUsesFloatReg(interval->registerType));
@@ -2846,17 +2859,26 @@ void LinearScan::buildIntervals()
if (!needNonIntegerRegisters)
{
availableRegCount = REG_INT_COUNT;
+ availableFloatRegs.OverrideAssign(RBM_NONE);
+ availableDoubleRegs.OverrideAssign(RBM_NONE);
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ availableMaskRegs.OverrideAssign(RBM_NONE);
+#endif
}
- if (availableRegCount < (sizeof(regMaskTP) * 8))
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ actualRegistersMask = AllRegsMask(availableIntRegs, availableFloatRegs, availableMaskRegs);
+#else
+ if (availableRegCount < (sizeof(RegBitSet64) * 8))
{
// Mask out the bits that are between 64 ~ availableRegCount
- actualRegistersMask = (1ULL << availableRegCount) - 1;
+ actualRegistersMask = AllRegsMask((1ULL << availableRegCount) - 1);
}
else
{
- actualRegistersMask = ~RBM_NONE;
+ actualRegistersMask = AllRegsMask(~RBM_NONE);
}
+#endif // HAS_MORE_THAN_64_REGISTERS
#ifdef DEBUG
// Make sure we don't have any blocks that were not visited
@@ -2889,8 +2911,8 @@ void LinearScan::stressSetRandomParameterPreferences()
{
CLRRandom rng;
rng.Init(compiler->info.compMethodHash());
- regMaskTP intRegs = compiler->codeGen->intRegState.rsCalleeRegArgMaskLiveIn;
- regMaskTP floatRegs = compiler->codeGen->floatRegState.rsCalleeRegArgMaskLiveIn;
+ regMaskGpr intRegs = compiler->codeGen->intRegState.rsCalleeRegArgMaskLiveIn;
+ regMaskFloat floatRegs = compiler->codeGen->floatRegState.rsCalleeRegArgMaskLiveIn;
for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++)
{
@@ -2903,7 +2925,7 @@ void LinearScan::stressSetRandomParameterPreferences()
Interval* interval = getIntervalForLocalVar(varIndex);
- regMaskTP* regs;
+ regMaskOnlyOne* regs;
if (interval->registerType == FloatRegisterType)
{
regs = &floatRegs;
@@ -2921,12 +2943,12 @@ void LinearScan::stressSetRandomParameterPreferences()
continue;
}
- int bitIndex = rng.Next((int)numBits);
- regNumber prefReg = REG_NA;
- regMaskTP regsLeft = *regs;
+ int bitIndex = rng.Next((int)numBits);
+ regNumber prefReg = REG_NA;
+ regMaskOnlyOne regsLeft = *regs;
for (int i = 0; i <= bitIndex; i++)
{
- prefReg = genFirstRegNumFromMaskAndToggle(regsLeft);
+ prefReg = genFirstRegNumFromMaskAndToggle(regsLeft MORE_THAN_64_REG_ARG(interval->registerType));
}
*regs &= ~genRegMask(prefReg);
@@ -3052,9 +3074,10 @@ void setTgtPref(Interval* interval, RefPosition* tgtPrefUse)
// Notes:
// Adds the RefInfo for the definition to the defList.
//
-RefPosition* LinearScan::BuildDef(GenTree* tree, regMaskTP dstCandidates, int multiRegIdx)
+RefPosition* LinearScan::BuildDef(GenTree* tree, regMaskOnlyOne dstCandidates, int multiRegIdx)
{
assert(!tree->isContained());
+ assert(compiler->IsOnlyOneRegMask(dstCandidates));
if (dstCandidates != RBM_NONE)
{
@@ -3131,7 +3154,7 @@ RefPosition* LinearScan::BuildDef(GenTree* tree, regMaskTP dstCandidates, int mu
}
//------------------------------------------------------------------------
-// BuildDef: Build one or more RefTypeDef RefPositions for the given node
+// BuildDef: Build one or more RefTypeDef RefPositions for the given call node
//
// Arguments:
// tree - The node that defines a register
@@ -3141,68 +3164,80 @@ RefPosition* LinearScan::BuildDef(GenTree* tree, regMaskTP dstCandidates, int mu
// Notes:
// Adds the RefInfo for the definitions to the defList.
//
-void LinearScan::BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates)
+void LinearScan::BuildCallDefs(GenTree* tree, int dstCount, REF_AllRegsMask dstCandidates)
{
- bool fixedReg = false;
- if ((dstCount > 1) && (dstCandidates != RBM_NONE) && ((int)genCountBits(dstCandidates) == dstCount))
+ assert(dstCount > 0);
+ assert((int)dstCandidates.Count() == dstCount);
+ assert(tree->IsMultiRegCall());
+
+ const ReturnTypeDesc* retTypeDesc = tree->AsCall()->GetReturnTypeDesc();
+ assert(retTypeDesc != nullptr);
+
+ for (int i = 0; i < dstCount; i++)
{
- fixedReg = true;
+ // In case of multi-reg call node, we have to query the i'th position return register.
+ // For all other cases of multi-reg definitions, the registers must be in sequential order.
+ regNumber thisReg =
+ tree->AsCall()->GetReturnTypeDesc()->GetABIReturnReg(i, tree->AsCall()->GetUnmanagedCallConv());
+
+ assert(dstCandidates.IsRegNumInMask(thisReg));
+ dstCandidates.RemoveRegNumFromMask(thisReg);
+
+ BuildDef(tree, genRegMask(thisReg), i);
}
- const ReturnTypeDesc* retTypeDesc = nullptr;
- if (tree->IsMultiRegCall())
+}
+
+//------------------------------------------------------------------------
+// BuildDef: Build one or more RefTypeDef RefPositions for the given node
+//
+// Arguments:
+// tree - The node that defines a register
+// dstCount - The number of registers defined by the node
+// dstCandidates - the candidate registers for the definition
+//
+// Notes:
+// Adds the RefInfo for the definitions to the defList.
+// Also, the `dstCandidates` is assumed to be of "onlyOne" type. If there are
+// both gpr and float registers, use `BuildDefs` that takes `AllRegsMask`
+//
+void LinearScan::BuildDefs(GenTree* tree, int dstCount, regMaskOnlyOne dstCandidates)
+{
+ assert(dstCount > 0);
+ assert(compiler->IsOnlyOneRegMask(dstCandidates));
+
+ if ((dstCandidates == RBM_NONE) || ((int)genCountBits(dstCandidates) != dstCount))
{
- retTypeDesc = tree->AsCall()->GetReturnTypeDesc();
+ // This is not fixedReg case, so just create definitions based on dstCandidates
+ for (int i = 0; i < dstCount; i++)
+ {
+ BuildDef(tree, dstCandidates, i);
+ }
+ return;
}
+
for (int i = 0; i < dstCount; i++)
{
- regMaskTP thisDstCandidates;
- if (fixedReg)
- {
- // In case of multi-reg call node, we have to query the i'th position return register.
- // For all other cases of multi-reg definitions, the registers must be in sequential order.
- if (retTypeDesc != nullptr)
- {
- thisDstCandidates = genRegMask(
- tree->AsCall()->GetReturnTypeDesc()->GetABIReturnReg(i, tree->AsCall()->GetUnmanagedCallConv()));
- assert((dstCandidates & thisDstCandidates) != RBM_NONE);
- }
- else
- {
- thisDstCandidates = genFindLowestBit(dstCandidates);
- }
- dstCandidates &= ~thisDstCandidates;
- }
- else
- {
- thisDstCandidates = dstCandidates;
- }
+ regMaskOnlyOne thisDstCandidates = genFindLowestBit(dstCandidates);
BuildDef(tree, thisDstCandidates, i);
+ dstCandidates &= ~thisDstCandidates;
}
}
//------------------------------------------------------------------------
-// BuildDef: Build one or more RefTypeDef RefPositions for the given node,
-// as well as kills as specified by the given mask.
+// BuildDef: Build Kills RefPositions as specified by the given mask.
//
// Arguments:
// tree - The node that defines a register
-// dstCount - The number of registers defined by the node
-// dstCandidates - The candidate registers for the definition
// killMask - The mask of registers killed by this node
//
-// Notes:
-// Adds the RefInfo for the definitions to the defList.
-// The def and kill functionality is folded into a single method so that the
-// save and restores of upper vector registers can be bracketed around the def.
-//
-void LinearScan::BuildDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask)
+void LinearScan::BuildKills(GenTree* tree, CONSTREF_AllRegsMask killMask)
{
assert(killMask == getKillSetForNode(tree));
// Call this even when killMask is RBM_NONE, as we have to check for some special cases
buildKillPositionsForNode(tree, currentLoc + 1, killMask);
- if (killMask != RBM_NONE)
+ if (!killMask.IsEmpty())
{
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
// Build RefPositions to account for the fact that, even in a callee-save register, the upper half of any large
@@ -3216,15 +3251,83 @@ void LinearScan::BuildDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCa
// RefPositions in that case.
// This must be done after the kills, so that we know which large vectors are still live.
//
- if ((killMask & RBM_FLT_CALLEE_TRASH) != RBM_NONE)
+ if ((killMask.IsFloatMaskPresent(compiler, RBM_FLT_CALLEE_TRASH)))
{
- buildUpperVectorSaveRefPositions(tree, currentLoc + 1, killMask);
+ buildUpperVectorSaveRefPositions(tree, currentLoc + 1 DEBUG_ARG((killMask.floatRegs(compiler) &
+ RBM_FLT_CALLEE_TRASH)));
}
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
}
+}
- // Now, create the Def(s)
- BuildDefs(tree, dstCount, dstCandidates);
+#ifndef TARGET_ARMARCH
+//------------------------------------------------------------------------
+// BuildDefWithKills: Build one or two (for 32-bit) RefTypeDef RefPositions for the given node,
+// as well as kills as specified by the given mask.
+//
+// Arguments:
+// tree - The call node that defines a register
+// dstCandidates - The candidate registers for the definition
+// killMask - The mask of registers killed by this node
+//
+// Notes:
+// Adds the RefInfo for the definitions to the defList.
+// The def and kill functionality is folded into a single method so that the
+// save and restores of upper vector registers can be bracketed around the def.
+//
+void LinearScan::BuildDefWithKills(GenTree* tree, int dstCount, regMaskOnlyOne dstCandidates, REF_AllRegsMask killMask)
+{
+ assert(compiler->IsOnlyOneRegMask(dstCandidates));
+
+ // Build the kill RefPositions
+ BuildKills(tree, killMask);
+
+#ifdef TARGET_64BIT
+ // For 64 bits,
+ assert(dstCount == 1);
+ BuildDef(tree, dstCandidates);
+#else
+ if (dstCount == 1)
+ {
+ BuildDef(tree, dstCandidates);
+ }
+ else
+ {
+ assert(dstCount == 2);
+ BuildDefs(tree, 2, dstCandidates);
+ }
+#endif // TARGET_64BIT
+}
+#endif
+
+//------------------------------------------------------------------------
+// BuildCallDefsWithKills: Build one or more RefTypeDef RefPositions for the given node,
+// as well as kills as specified by the given mask.
+//
+// Arguments:
+// tree - The node that defines a register
+// dstCount - The number of registers defined by the node
+// dstCandidates - The candidate registers for the definition
+// killMask - The mask of registers killed by this node
+//
+// Notes:
+// Adds the RefInfo for the definitions to the defList.
+// The def and kill functionality is folded into a single method so that the
+// save and restores of upper vector registers can be bracketed around the def.
+//
+void LinearScan::BuildCallDefsWithKills(GenTree* tree,
+ int dstCount,
+ REF_AllRegsMask dstCandidates,
+ REF_AllRegsMask killMask)
+{
+ assert(dstCount > 0);
+ assert(!dstCandidates.IsEmpty());
+
+ // Build the kill RefPositions
+ BuildKills(tree, killMask);
+
+ // And then the Def(s)
+ BuildCallDefs(tree, dstCount, dstCandidates);
}
//------------------------------------------------------------------------
@@ -3248,7 +3351,7 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval)
// _after_ the call, then we are going to prefer callee-saved registers for
// such local anyway, so there is no need to look at such local uses.
//
- if (placedArgRegs == RBM_NONE)
+ if (placedArgRegs.IsEmpty())
{
return;
}
@@ -3263,19 +3366,19 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval)
// Find the registers that we should remove from the preference set because
// they are occupied with argument values.
- regMaskTP unpref = placedArgRegs;
- unsigned varIndex = interval->getVarIndex(compiler);
+ AllRegsMask unpref = placedArgRegs;
+ unsigned varIndex = interval->getVarIndex(compiler);
for (size_t i = 0; i < numPlacedArgLocals; i++)
{
if (placedArgLocals[i].VarIndex == varIndex)
{
// This local's value is going to be available in this register so
// keep it in the preferences.
- unpref &= ~genRegMask(placedArgLocals[i].Reg);
+ unpref.RemoveRegNumFromMask(placedArgLocals[i].Reg);
}
}
- if (unpref != RBM_NONE)
+ if (!unpref.IsEmpty())
{
#ifdef DEBUG
if (VERBOSE)
@@ -3287,8 +3390,9 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval)
}
#endif
- interval->registerAversion |= unpref;
- regMaskTP newPreferences = allRegs(interval->registerType) & ~unpref;
+ regMaskOnlyOne unprefRegMask = unpref.GetRegMaskForType(interval->registerType);
+ interval->registerAversion |= unprefRegMask;
+ regMaskOnlyOne newPreferences = allRegs(interval->registerType) & ~unprefRegMask;
interval->updateRegisterPreferences(newPreferences);
}
}
@@ -3308,8 +3412,10 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval)
// Notes:
// The node must not be contained, and must have been processed by buildRefPositionsForNode().
//
-RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int multiRegIdx)
+RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskOnlyOne candidates, int multiRegIdx)
{
+ assert(compiler->IsOnlyOneRegMask(candidates));
+
assert(!operand->isContained());
Interval* interval;
bool regOptional = operand->IsRegOptional();
@@ -3378,12 +3484,12 @@ RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int mu
// Notes:
// This method may only be used if the candidates are the same for all sources.
//
-int LinearScan::BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates)
+int LinearScan::BuildIndirUses(GenTreeIndir* indirTree, regMaskOnlyOne candidates)
{
return BuildAddrUses(indirTree->Addr(), candidates);
}
-int LinearScan::BuildAddrUses(GenTree* addr, regMaskTP candidates)
+int LinearScan::BuildAddrUses(GenTree* addr, regMaskOnlyOne candidates)
{
if (!addr->isContained())
{
@@ -3440,7 +3546,7 @@ int LinearScan::BuildAddrUses(GenTree* addr, regMaskTP candidates)
// Return Value:
// The number of source registers used by the *parent* of this node.
//
-int LinearScan::BuildOperandUses(GenTree* node, regMaskTP candidates)
+int LinearScan::BuildOperandUses(GenTree* node, regMaskOnlyOne candidates)
{
if (!node->isContained())
{
@@ -3584,10 +3690,10 @@ void LinearScan::AddDelayFreeUses(RefPosition* useRefPosition, GenTree* rmwNode)
// Return Value:
// The number of source registers used by the *parent* of this node.
//
-int LinearScan::BuildDelayFreeUses(GenTree* node,
- GenTree* rmwNode,
- regMaskTP candidates,
- RefPosition** useRefPositionRef)
+int LinearScan::BuildDelayFreeUses(GenTree* node,
+ GenTree* rmwNode,
+ regMaskOnlyOne candidates,
+ RefPosition** useRefPositionRef)
{
RefPosition* use = nullptr;
GenTree* addr = nullptr;
@@ -3683,7 +3789,7 @@ int LinearScan::BuildDelayFreeUses(GenTree* node,
// The operands must already have been processed by buildRefPositionsForNode, and their
// RefInfoListNodes placed in the defList.
//
-int LinearScan::BuildBinaryUses(GenTreeOp* node, regMaskTP candidates)
+int LinearScan::BuildBinaryUses(GenTreeOp* node, regMaskOnlyOne candidates)
{
GenTree* op1 = node->gtGetOp1();
GenTree* op2 = node->gtGetOp2IfPresent();
@@ -3717,7 +3823,7 @@ int LinearScan::BuildBinaryUses(GenTreeOp* node, regMaskTP candidates)
// Return Value:
// The number of actual register operands.
//
-int LinearScan::BuildCastUses(GenTreeCast* cast, regMaskTP candidates)
+int LinearScan::BuildCastUses(GenTreeCast* cast, regMaskOnlyOne candidates)
{
GenTree* src = cast->CastOp();
@@ -3782,8 +3888,8 @@ void LinearScan::BuildStoreLocDef(GenTreeLclVarCommon* storeLoc,
}
}
- regMaskTP defCandidates = RBM_NONE;
- var_types type = varDsc->GetRegisterType();
+ regMaskOnlyOne defCandidates = RBM_NONE;
+ var_types type = varDsc->GetRegisterType();
#ifdef TARGET_X86
if (varTypeIsByte(type))
@@ -3868,7 +3974,7 @@ int LinearScan::BuildMultiRegStoreLoc(GenTreeLclVar* storeLoc)
if (isMultiRegSrc)
{
- regMaskTP srcCandidates = RBM_NONE;
+ regMaskGpr srcCandidates = RBM_NONE;
#ifdef TARGET_X86
var_types type = fieldVarDsc->TypeGet();
if (varTypeIsByte(type))
@@ -3978,8 +4084,8 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc)
}
else
{
- srcCount = 1;
- regMaskTP srcCandidates = RBM_NONE;
+ srcCount = 1;
+ regMaskGpr srcCandidates = RBM_NONE;
#ifdef TARGET_X86
var_types type = varDsc->GetRegisterType(storeLoc);
if (varTypeIsByte(type))
@@ -4070,7 +4176,7 @@ int LinearScan::BuildReturn(GenTree* tree)
#endif // !defined(TARGET_64BIT)
if ((tree->TypeGet() != TYP_VOID) && !op1->isContained())
{
- regMaskTP useCandidates = RBM_NONE;
+ regMaskOnlyOne useCandidates = RBM_NONE;
#if FEATURE_MULTIREG_RET
#ifdef TARGET_ARM64
@@ -4086,7 +4192,7 @@ int LinearScan::BuildReturn(GenTree* tree)
// op1 has to be either a lclvar or a multi-reg returning call
if ((op1->OperGet() == GT_LCL_VAR) && !op1->IsMultiRegLclVar())
{
- BuildUse(op1, useCandidates);
+ BuildUse(op1, RBM_NONE);
}
else
{
@@ -4108,19 +4214,19 @@ int LinearScan::BuildReturn(GenTree* tree)
if (srcType != dstType)
{
hasMismatchedRegTypes = true;
- regMaskTP dstRegMask =
+ regMaskOnlyOne dstRegMask =
genRegMask(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv));
if (varTypeUsesIntReg(dstType))
{
buildInternalIntRegisterDefForNode(tree, dstRegMask);
}
-#if defined(TARGET_XARCH) && defined(FEATURE_SIMD)
+#ifdef FEATURE_MASKED_HW_INTRINSICS
else if (varTypeUsesMaskReg(dstType))
{
buildInternalMaskRegisterDefForNode(tree, dstRegMask);
}
-#endif // TARGET_XARCH && FEATURE_SIMD
+#endif // FEATURE_MASKED_HW_INTRINSICS
else
{
assert(varTypeUsesFloatReg(dstType));
@@ -4263,11 +4369,11 @@ int LinearScan::BuildPutArgReg(GenTreeUnOp* node)
// To avoid redundant moves, have the argument operand computed in the
// register in which the argument is passed to the call.
- regMaskTP argMask = genRegMask(argReg);
- RefPosition* use = BuildUse(op1, argMask);
+ singleRegMask argMask = genRegMask(argReg);
+ RefPosition* use = BuildUse(op1, argMask);
// Record that this register is occupied by a register now.
- placedArgRegs |= argMask;
+ placedArgRegs |= argReg;
if (supportsSpecialPutArg() && isCandidateLocalRef(op1) && ((op1->gtFlags & GTF_VAR_DEATH) == 0))
{
@@ -4295,7 +4401,7 @@ int LinearScan::BuildPutArgReg(GenTreeUnOp* node)
if (node->TypeGet() == TYP_LONG)
{
srcCount++;
- regMaskTP argMaskHi = genRegMask(REG_NEXT(argReg));
+ singleRegMask argMaskHi = genRegMask(REG_NEXT(argReg));
assert(genRegArgNext(argReg) == REG_NEXT(argReg));
use = BuildUse(op1, argMaskHi, 1);
BuildDef(node, argMask, 0);
@@ -4360,8 +4466,8 @@ int LinearScan::BuildGCWriteBarrier(GenTree* tree)
// is an indir through an lea, we need to actually instantiate the
// lea in a register
assert(!addr->isContained() && !src->isContained());
- regMaskTP addrCandidates = RBM_WRITE_BARRIER_DST;
- regMaskTP srcCandidates = RBM_WRITE_BARRIER_SRC;
+ regMaskGpr addrCandidates = RBM_WRITE_BARRIER_DST;
+ regMaskGpr srcCandidates = RBM_WRITE_BARRIER_SRC;
#if defined(TARGET_X86) && NOGC_WRITE_BARRIERS
@@ -4380,7 +4486,7 @@ int LinearScan::BuildGCWriteBarrier(GenTree* tree)
BuildUse(addr, addrCandidates);
BuildUse(src, srcCandidates);
- regMaskTP killMask = getKillSetForStoreInd(tree->AsStoreInd());
+ AllRegsMask killMask = getKillSetForStoreInd(tree->AsStoreInd());
buildKillPositionsForNode(tree, currentLoc + 1, killMask);
return 2;
}
@@ -4408,7 +4514,7 @@ int LinearScan::BuildCmp(GenTree* tree)
if (!tree->TypeIs(TYP_VOID))
{
- regMaskTP dstCandidates = RBM_NONE;
+ regMaskGpr dstCandidates = RBM_NONE;
#ifdef TARGET_X86
// If the compare is used by a jump, we just need to set the condition codes. If not, then we need
@@ -4432,10 +4538,10 @@ int LinearScan::BuildCmp(GenTree* tree)
//
int LinearScan::BuildCmpOperands(GenTree* tree)
{
- regMaskTP op1Candidates = RBM_NONE;
- regMaskTP op2Candidates = RBM_NONE;
- GenTree* op1 = tree->gtGetOp1();
- GenTree* op2 = tree->gtGetOp2();
+ regMaskGpr op1Candidates = RBM_NONE;
+ regMaskGpr op2Candidates = RBM_NONE;
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
#ifdef TARGET_X86
bool needByteRegs = false;
diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp
index 7fe119ccfd165c..d1aee6faa45183 100644
--- a/src/coreclr/jit/lsraxarch.cpp
+++ b/src/coreclr/jit/lsraxarch.cpp
@@ -45,10 +45,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
int LinearScan::BuildNode(GenTree* tree)
{
assert(!tree->isContained());
- int srcCount;
- int dstCount = 0;
- regMaskTP killMask = RBM_NONE;
- bool isLocalDefUse = false;
+ int srcCount;
+ int dstCount = 0;
+ bool isLocalDefUse = false;
// Reset the build-related members of LinearScan.
clearBuildState();
@@ -138,15 +137,16 @@ int LinearScan::BuildNode(GenTree* tree)
// This kills GC refs in callee save regs
srcCount = 0;
assert(dstCount == 0);
- BuildDefsWithKills(tree, 0, RBM_NONE, RBM_NONE);
+ BuildKills(tree, compiler->AllRegsMask_NONE);
break;
case GT_PROF_HOOK:
+ {
srcCount = 0;
assert(dstCount == 0);
- killMask = getKillSetForProfilerHook();
- BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
+ BuildKills(tree, getKillSetForProfilerHook());
break;
+ }
case GT_CNS_INT:
case GT_CNS_LNG:
@@ -188,10 +188,11 @@ int LinearScan::BuildNode(GenTree* tree)
break;
case GT_RETURN:
+ {
srcCount = BuildReturn(tree);
- killMask = getKillSetForReturn();
- BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
+ BuildKills(tree, getKillSetForReturn());
break;
+ }
case GT_RETFILT:
assert(dstCount == 0);
@@ -295,8 +296,7 @@ int LinearScan::BuildNode(GenTree* tree)
RefPosition* internalDef = buildInternalIntRegisterDefForNode(tree);
srcCount = BuildOperandUses(tree->gtGetOp1());
buildInternalRegisterUses();
- killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
- BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
+ BuildKills(tree, compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC));
}
break;
@@ -767,16 +767,16 @@ bool LinearScan::isRMWRegOper(GenTree* tree)
}
// Support for building RefPositions for RMW nodes.
-int LinearScan::BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, regMaskTP candidates)
+int LinearScan::BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, regMaskOnlyOne candidates)
{
- int srcCount = 0;
- regMaskTP op1Candidates = candidates;
- regMaskTP op2Candidates = candidates;
+ int srcCount = 0;
+ regMaskGpr op1Candidates = candidates;
+ regMaskGpr op2Candidates = candidates;
#ifdef TARGET_X86
if (varTypeIsByte(node))
{
- regMaskTP byteCandidates = (candidates == RBM_NONE) ? allByteRegs() : (candidates & allByteRegs());
+ regMaskGpr byteCandidates = (candidates == RBM_NONE) ? allByteRegs() : (candidates & allByteRegs());
if (!op1->isContained())
{
assert(byteCandidates != RBM_NONE);
@@ -1028,11 +1028,11 @@ int LinearScan::BuildShiftRotate(GenTree* tree)
// For shift operations, we need that the number
// of bits moved gets stored in CL in case
// the number of bits to shift is not a constant.
- int srcCount = 0;
- GenTree* shiftBy = tree->gtGetOp2();
- GenTree* source = tree->gtGetOp1();
- regMaskTP srcCandidates = RBM_NONE;
- regMaskTP dstCandidates = RBM_NONE;
+ int srcCount = 0;
+ GenTree* shiftBy = tree->gtGetOp2();
+ GenTree* source = tree->gtGetOp1();
+ regMaskGpr srcCandidates = RBM_NONE;
+ regMaskGpr dstCandidates = RBM_NONE;
// x64 can encode 8 bits of shift and it will use 5 or 6. (the others are masked off)
// We will allow whatever can be encoded - hope you know what you are doing.
@@ -1046,8 +1046,8 @@ int LinearScan::BuildShiftRotate(GenTree* tree)
{
// shlx (as opposed to mov+shl) instructions handles all register forms, but it does not handle contained form
// for memory operand. Likewise for sarx and shrx.
- srcCount += BuildOperandUses(source, srcCandidates);
- srcCount += BuildOperandUses(shiftBy, srcCandidates);
+ srcCount += BuildOperandUses(source);
+ srcCount += BuildOperandUses(shiftBy);
BuildDef(tree, dstCandidates);
return srcCount;
}
@@ -1110,7 +1110,7 @@ int LinearScan::BuildShiftRotate(GenTree* tree)
if (!shiftBy->isContained())
{
srcCount += BuildDelayFreeUses(shiftBy, source, RBM_RCX);
- buildKillPositionsForNode(tree, currentLoc + 1, RBM_RCX);
+ buildKillPositionsForNode(tree, currentLoc + 1, GprRegsMask(RBM_RCX));
}
BuildDef(tree, dstCandidates);
}
@@ -1119,7 +1119,7 @@ int LinearScan::BuildShiftRotate(GenTree* tree)
if (!shiftBy->isContained())
{
srcCount += BuildOperandUses(shiftBy, RBM_RCX);
- buildKillPositionsForNode(tree, currentLoc + 1, RBM_RCX);
+ buildKillPositionsForNode(tree, currentLoc + 1, GprRegsMask(RBM_RCX));
}
}
return srcCount;
@@ -1140,7 +1140,6 @@ int LinearScan::BuildCall(GenTreeCall* call)
const ReturnTypeDesc* retTypeDesc = nullptr;
int srcCount = 0;
int dstCount = 0;
- regMaskTP dstCandidates = RBM_NONE;
assert(!call->isContained());
if (call->TypeGet() != TYP_VOID)
@@ -1164,7 +1163,9 @@ int LinearScan::BuildCall(GenTreeCall* call)
ctrlExpr = call->gtCallAddr;
}
- RegisterType registerType = regType(call);
+ AllRegsMask dstReturnCandidates;
+ regMaskOnlyOne dstCandidates = RBM_NONE;
+ RegisterType registerType = regType(call);
// Set destination candidates for return value of the call.
@@ -1181,8 +1182,8 @@ int LinearScan::BuildCall(GenTreeCall* call)
if (hasMultiRegRetVal)
{
assert(retTypeDesc != nullptr);
- dstCandidates = retTypeDesc->GetABIReturnRegs(call->GetUnmanagedCallConv());
- assert((int)genCountBits(dstCandidates) == dstCount);
+ dstReturnCandidates = retTypeDesc->GetABIReturnRegs(call->GetUnmanagedCallConv());
+ assert((int)dstReturnCandidates.Count() == dstCount);
}
else if (varTypeUsesFloatReg(registerType))
{
@@ -1313,7 +1314,7 @@ int LinearScan::BuildCall(GenTreeCall* call)
// set reg requirements on call target represented as control sequence.
if (ctrlExpr != nullptr)
{
- regMaskTP ctrlExprCandidates = RBM_NONE;
+ regMaskGpr ctrlExprCandidates = RBM_NONE;
// In case of fast tail implemented as jmp, make sure that gtControlExpr is
// computed into appropriate registers.
@@ -1363,15 +1364,34 @@ int LinearScan::BuildCall(GenTreeCall* call)
buildInternalRegisterUses();
// Now generate defs and kills.
- regMaskTP killMask = getKillSetForCall(call);
- BuildDefsWithKills(call, dstCount, dstCandidates, killMask);
+ AllRegsMask killMask = getKillSetForCall(call);
#ifdef SWIFT_SUPPORT
- if (call->HasSwiftErrorHandling())
+ // Tree is a Swift call with error handling; error register should have been killed
+ assert(!call->HasSwiftErrorHandling() || ((killMask.gprRegs() & RBM_SWIFT_ERROR) != 0));
+#endif // SWIFT_SUPPORT
+
+ if (dstCount > 0)
{
- // Tree is a Swift call with error handling; error register should have been killed
- assert((killMask & RBM_SWIFT_ERROR) != 0);
+ if (hasMultiRegRetVal)
+ {
+ assert(dstReturnCandidates.Count() > 0);
+ BuildCallDefsWithKills(call, dstCount, dstReturnCandidates, killMask);
+ }
+ else
+ {
+ assert(dstCount == 1);
+ BuildDefWithKills(call, dstCount, dstCandidates, killMask);
+ }
+ }
+ else
+ {
+ BuildKills(call, killMask);
+ }
+#ifdef SWIFT_SUPPORT
+ if (call->HasSwiftErrorHandling())
+ {
// After a Swift call that might throw returns, we expect the error register to be consumed
// by a GT_SWIFT_ERROR node. However, we want to ensure the error register won't be trashed
// before GT_SWIFT_ERROR can consume it.
@@ -1390,7 +1410,7 @@ int LinearScan::BuildCall(GenTreeCall* call)
#endif // SWIFT_SUPPORT
// No args are placed in registers anymore.
- placedArgRegs = RBM_NONE;
+ placedArgRegs.Clear();
numPlacedArgLocals = 0;
return srcCount;
}
@@ -1412,9 +1432,9 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
GenTree* srcAddrOrFill = nullptr;
- regMaskTP dstAddrRegMask = RBM_NONE;
- regMaskTP srcRegMask = RBM_NONE;
- regMaskTP sizeRegMask = RBM_NONE;
+ regMaskGpr dstAddrRegMask = RBM_NONE;
+ regMaskGpr srcRegMask = RBM_NONE;
+ regMaskGpr sizeRegMask = RBM_NONE;
RefPosition* internalIntDef = nullptr;
#ifdef TARGET_X86
@@ -1512,7 +1532,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
// or if are but the remainder is a power of 2 and less than the
// size of a register
- regMaskTP regMask = availableIntRegs;
+ regMaskGpr regMask = availableIntRegs;
#ifdef TARGET_X86
if ((size & 1) != 0)
{
@@ -1608,6 +1628,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
if (!dstAddr->isContained())
{
+ assert(compiler->IsGprRegMask(dstAddrRegMask));
+
useCount++;
BuildUse(dstAddr, dstAddrRegMask);
}
@@ -1620,6 +1642,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
{
if (!srcAddrOrFill->isContained())
{
+ assert(compiler->IsGprRegMask(srcRegMask));
+
useCount++;
BuildUse(srcAddrOrFill, srcRegMask);
}
@@ -1629,6 +1653,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
}
}
+ assert(compiler->IsGprRegMask(sizeRegMask));
+
#ifdef TARGET_X86
// If we require a byte register on x86, we may run into an over-constrained situation
// if we have BYTE_REG_COUNT or more uses (currently, it can be at most 4, if both the
@@ -1650,8 +1676,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
#endif
buildInternalRegisterUses();
- regMaskTP killMask = getKillSetForBlockStore(blkNode);
- BuildDefsWithKills(blkNode, 0, RBM_NONE, killMask);
+ BuildKills(blkNode, getKillSetForBlockStore(blkNode));
return useCount;
}
@@ -1776,7 +1801,7 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
// If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg.
if ((loadSize % XMM_REGSIZE_BYTES) != 0)
{
- regMaskTP regMask = availableIntRegs;
+ regMaskGpr regMask = availableIntRegs;
#ifdef TARGET_X86
// Storing at byte granularity requires a byteable register.
if ((loadSize & 1) != 0)
@@ -1882,10 +1907,10 @@ int LinearScan::BuildLclHeap(GenTree* tree)
//
int LinearScan::BuildModDiv(GenTree* tree)
{
- GenTree* op1 = tree->gtGetOp1();
- GenTree* op2 = tree->gtGetOp2();
- regMaskTP dstCandidates = RBM_NONE;
- int srcCount = 0;
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
+ regMaskGpr dstCandidates = RBM_NONE;
+ int srcCount = 0;
if (varTypeIsFloating(tree->TypeGet()))
{
@@ -1938,12 +1963,13 @@ int LinearScan::BuildModDiv(GenTree* tree)
srcCount = 1;
}
- srcCount += BuildDelayFreeUses(op2, op1, availableIntRegs & ~(RBM_RAX | RBM_RDX));
+ assert(compiler->IsGprRegMask(dstCandidates));
+ srcCount += BuildDelayFreeUses(op2, op1, availableIntRegs & ~(RBM_RAX | RBM_RDX));
buildInternalRegisterUses();
- regMaskTP killMask = getKillSetForModDiv(tree->AsOp());
- BuildDefsWithKills(tree, 1, dstCandidates, killMask);
+ AllRegsMask killMask(getKillSetForModDiv(tree->AsOp()));
+ BuildDefWithKills(tree, 1, dstCandidates, killMask);
return srcCount;
}
@@ -2100,7 +2126,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
dstCount = 0;
}
- regMaskTP dstCandidates = RBM_NONE;
+ regMaskOnlyOne dstCandidates = RBM_NONE;
if (intrinsicTree->GetOperandCount() == 0)
{
@@ -2690,7 +2716,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
if (buildUses)
{
- regMaskTP op1RegCandidates = RBM_NONE;
+ regMaskFloat op1RegCandidates = RBM_NONE;
#if defined(TARGET_AMD64)
if (!isEvexCompatible)
@@ -2715,7 +2741,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
if (op2 != nullptr)
{
- regMaskTP op2RegCandidates = RBM_NONE;
+ regMaskFloat op2RegCandidates = RBM_NONE;
#if defined(TARGET_AMD64)
if (!isEvexCompatible)
@@ -2761,7 +2787,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
if (op3 != nullptr)
{
- regMaskTP op3RegCandidates = RBM_NONE;
+ regMaskFloat op3RegCandidates = RBM_NONE;
#if defined(TARGET_AMD64)
if (!isEvexCompatible)
@@ -2775,7 +2801,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
if (op4 != nullptr)
{
- regMaskTP op4RegCandidates = RBM_NONE;
+ regMaskFloat op4RegCandidates = RBM_NONE;
#if defined(TARGET_AMD64)
assert(isEvexCompatible);
@@ -2831,7 +2857,7 @@ int LinearScan::BuildCast(GenTreeCast* cast)
const var_types srcType = genActualType(src->TypeGet());
const var_types castType = cast->gtCastType;
- regMaskTP candidates = RBM_NONE;
+ regMaskGpr candidates = RBM_NONE;
#ifdef TARGET_X86
if (varTypeIsByte(castType))
{
@@ -2881,8 +2907,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree)
}
#endif // FEATURE_SIMD
- regMaskTP indirCandidates = RBM_NONE;
- int srcCount = BuildIndirUses(indirTree, indirCandidates);
+ int srcCount = BuildIndirUses(indirTree);
if (indirTree->gtOper == GT_STOREIND)
{
GenTree* source = indirTree->gtGetOp2();
@@ -2898,7 +2923,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree)
}
else
{
- regMaskTP srcCandidates = RBM_NONE;
+ regMaskGpr srcCandidates = RBM_NONE;
#ifdef TARGET_X86
// Determine if we need byte regs for the non-mem source, if any.
@@ -2938,6 +2963,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree)
}
#endif // TARGET_X86
+ assert(compiler->IsGprRegMask(srcCandidates));
srcCount += BuildBinaryUses(source->AsOp(), srcCandidates);
}
}
@@ -3003,9 +3029,9 @@ int LinearScan::BuildMul(GenTree* tree)
return BuildSimple(tree);
}
- int srcCount = BuildBinaryUses(tree->AsOp());
- int dstCount = 1;
- regMaskTP dstCandidates = RBM_NONE;
+ int srcCount = BuildBinaryUses(tree->AsOp());
+ int dstCount = 1;
+ regMaskGpr dstCandidates = RBM_NONE;
bool isUnsignedMultiply = ((tree->gtFlags & GTF_UNSIGNED) != 0);
bool requiresOverflowCheck = tree->gtOverflowEx();
@@ -3059,8 +3085,11 @@ int LinearScan::BuildMul(GenTree* tree)
{
containedMemOp = op2;
}
- regMaskTP killMask = getKillSetForMul(tree->AsOp());
- BuildDefsWithKills(tree, dstCount, dstCandidates, killMask);
+
+ assert(compiler->IsGprRegMask(dstCandidates));
+
+ AllRegsMask killMask(getKillSetForMul(tree->AsOp()));
+ BuildDefWithKills(tree, dstCount, dstCandidates, killMask);
return srcCount;
}
@@ -3103,7 +3132,7 @@ void LinearScan::SetContainsAVXFlags(unsigned sizeOfSIMDVector /* = 0*/)
// RBM_NONE if compatible with EVEX (or not a floating/SIMD register),
// lowSIMDRegs() (XMM0-XMM16) otherwise.
//
-inline regMaskTP LinearScan::BuildEvexIncompatibleMask(GenTree* tree)
+inline regMaskFloat LinearScan::BuildEvexIncompatibleMask(GenTree* tree)
{
#if defined(TARGET_AMD64)
if (!(varTypeIsFloating(tree->gtType) || varTypeIsSIMD(tree->gtType)))
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 8e9b07e8da6ccf..b8be505076d655 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -1962,8 +1962,8 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
bool callIsVararg = IsVarArgs();
#ifdef TARGET_ARM
- regMaskTP argSkippedRegMask = RBM_NONE;
- regMaskTP fltArgSkippedRegMask = RBM_NONE;
+ regMaskGpr argSkippedRegMask = RBM_NONE;
+ regMaskFloat fltArgSkippedRegMask = RBM_NONE;
#endif // TARGET_ARM
#if defined(TARGET_X86)
@@ -2539,8 +2539,8 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
(size == 1)) // The size to back-fill is one float register
{
// Back-fill the register.
- isBackFilled = true;
- regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
+ isBackFilled = true;
+ regMaskFloat backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
fltArgSkippedRegMask &=
~backFillBitMask; // Remove the back-filled register(s) from the skipped mask
nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask));
diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp
index 2767686ecfd7cb..9ac37de2d8eb3b 100644
--- a/src/coreclr/jit/optimizer.cpp
+++ b/src/coreclr/jit/optimizer.cpp
@@ -4160,7 +4160,7 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho
hoistCtxt->m_hoistedFPExprCount = 0;
}
-#ifdef TARGET_XARCH
+#ifdef FEATURE_MASKED_HW_INTRINSICS
if (!VarSetOps::IsEmpty(this, lvaMaskVars))
{
VARSET_TP loopMskVars(VarSetOps::Intersection(this, loopVars, lvaMaskVars));
@@ -4189,7 +4189,9 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho
{
hoistCtxt->m_loopVarMskCount = 0;
hoistCtxt->m_loopVarInOutMskCount = 0;
+#ifdef FEATURE_MASKED_HW_INTRINSICS
hoistCtxt->m_hoistedMskExprCount = 0;
+#endif
}
#endif // TARGET_XARCH
@@ -4313,9 +4315,9 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho
optHoistLoopBlocks(loop, &defExec, hoistCtxt);
unsigned numHoisted = hoistCtxt->m_hoistedFPExprCount + hoistCtxt->m_hoistedExprCount;
-#ifdef TARGET_XARCH
+#ifdef FEATURE_MASKED_HW_INTRINSICS
numHoisted += hoistCtxt->m_hoistedMskExprCount;
-#endif // TARGET_XARCH
+#endif // FEATURE_MASKED_HW_INTRINSICS
return numHoisted > 0;
}
@@ -4347,7 +4349,7 @@ bool Compiler::optIsProfitableToHoistTree(GenTree* tree, FlowGraphNaturalLoop* l
}
#endif
}
-#ifdef TARGET_XARCH
+#ifdef FEATURE_MASKED_HW_INTRINSICS
else if (varTypeUsesMaskReg(tree))
{
hoistedExprCount = hoistCtxt->m_hoistedMskExprCount;
@@ -4360,7 +4362,7 @@ bool Compiler::optIsProfitableToHoistTree(GenTree* tree, FlowGraphNaturalLoop* l
availRegCount += CNT_CALLEE_TRASH_MASK - 1;
}
}
-#endif // TARGET_XARCH
+#endif // FEATURE_MASKED_HW_INTRINSICS
else
{
assert(varTypeUsesFloatReg(tree));
@@ -5522,7 +5524,7 @@ void Compiler::optComputeInterestingVarSets()
#ifndef TARGET_64BIT
VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this));
#endif
-#ifdef TARGET_XARCH
+#ifdef FEATURE_MASKED_HW_INTRINSICS
VarSetOps::AssignNoCopy(this, lvaMaskVars, VarSetOps::MakeEmpty(this));
#endif
@@ -5541,7 +5543,7 @@ void Compiler::optComputeInterestingVarSets()
VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
}
#endif
-#ifdef TARGET_XARCH
+#ifdef FEATURE_MASKED_HW_INTRINSICS
else if (varTypeUsesMaskReg(varDsc->lvType))
{
VarSetOps::AddElemD(this, lvaMaskVars, varDsc->lvVarIndex);
diff --git a/src/coreclr/jit/regalloc.cpp b/src/coreclr/jit/regalloc.cpp
index ba07086a63c212..516f4ea95d5355 100644
--- a/src/coreclr/jit/regalloc.cpp
+++ b/src/coreclr/jit/regalloc.cpp
@@ -99,8 +99,8 @@ bool Compiler::shouldDoubleAlign(
// by linear scan. (It is not shared for System V AMD64 platform.)
regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc)
{
- regNumber inArgReg = argDsc->GetArgReg();
- regMaskTP inArgMask = genRegMask(inArgReg);
+ regNumber inArgReg = argDsc->GetArgReg();
+ singleRegMask inArgMask = genRegMask(inArgReg);
if (regState->rsIsFloat)
{
diff --git a/src/coreclr/jit/register.h b/src/coreclr/jit/register.h
index 43454e96ec64e4..e784f18a167fba 100644
--- a/src/coreclr/jit/register.h
+++ b/src/coreclr/jit/register.h
@@ -16,15 +16,15 @@
#if defined(TARGET_X86)
/*
-REGDEF(name, rnum, mask, sname) */
-REGDEF(EAX, 0, 0x01, "eax" )
-REGDEF(ECX, 1, 0x02, "ecx" )
-REGDEF(EDX, 2, 0x04, "edx" )
-REGDEF(EBX, 3, 0x08, "ebx" )
-REGDEF(ESP, 4, 0x10, "esp" )
-REGDEF(EBP, 5, 0x20, "ebp" )
-REGDEF(ESI, 6, 0x40, "esi" )
-REGDEF(EDI, 7, 0x80, "edi" )
+REGDEF(name, rnum, mask, sname, regTypeTag) */
+REGDEF(EAX, 0, 0x01, "eax", 0)
+REGDEF(ECX, 1, 0x02, "ecx", 0)
+REGDEF(EDX, 2, 0x04, "edx", 0)
+REGDEF(EBX, 3, 0x08, "ebx", 0)
+REGDEF(ESP, 4, 0x10, "esp", 0)
+REGDEF(EBP, 5, 0x20, "ebp", 0)
+REGDEF(ESI, 6, 0x40, "esi", 0)
+REGDEF(EDI, 7, 0x80, "edi", 0)
REGALIAS(RAX, EAX)
REGALIAS(RCX, ECX)
REGALIAS(RDX, EDX)
@@ -37,23 +37,23 @@ REGALIAS(RDI, EDI)
#else // !defined(TARGET_X86)
/*
-REGDEF(name, rnum, mask, sname) */
-REGDEF(RAX, 0, 0x0001, "rax" )
-REGDEF(RCX, 1, 0x0002, "rcx" )
-REGDEF(RDX, 2, 0x0004, "rdx" )
-REGDEF(RBX, 3, 0x0008, "rbx" )
-REGDEF(RSP, 4, 0x0010, "rsp" )
-REGDEF(RBP, 5, 0x0020, "rbp" )
-REGDEF(RSI, 6, 0x0040, "rsi" )
-REGDEF(RDI, 7, 0x0080, "rdi" )
-REGDEF(R8, 8, 0x0100, "r8" )
-REGDEF(R9, 9, 0x0200, "r9" )
-REGDEF(R10, 10, 0x0400, "r10" )
-REGDEF(R11, 11, 0x0800, "r11" )
-REGDEF(R12, 12, 0x1000, "r12" )
-REGDEF(R13, 13, 0x2000, "r13" )
-REGDEF(R14, 14, 0x4000, "r14" )
-REGDEF(R15, 15, 0x8000, "r15" )
+REGDEF(name, rnum, mask, sname, regTypeTag) */
+REGDEF(RAX, 0, 0x0001, "rax", 0)
+REGDEF(RCX, 1, 0x0002, "rcx", 0)
+REGDEF(RDX, 2, 0x0004, "rdx", 0)
+REGDEF(RBX, 3, 0x0008, "rbx", 0)
+REGDEF(RSP, 4, 0x0010, "rsp", 0)
+REGDEF(RBP, 5, 0x0020, "rbp", 0)
+REGDEF(RSI, 6, 0x0040, "rsi", 0)
+REGDEF(RDI, 7, 0x0080, "rdi", 0)
+REGDEF(R8, 8, 0x0100, "r8" , 0)
+REGDEF(R9, 9, 0x0200, "r9" , 0)
+REGDEF(R10, 10, 0x0400, "r10", 0)
+REGDEF(R11, 11, 0x0800, "r11", 0)
+REGDEF(R12, 12, 0x1000, "r12", 0)
+REGDEF(R13, 13, 0x2000, "r13", 0)
+REGDEF(R14, 14, 0x4000, "r14", 0)
+REGDEF(R15, 15, 0x8000, "r15", 0)
REGALIAS(EAX, RAX)
REGALIAS(ECX, RCX)
@@ -83,55 +83,55 @@ REGALIAS(EDI, RDI)
#endif // !TARGET_AMD64
-REGDEF(XMM0, 0+XMMBASE, XMMMASK(0), "mm0" )
-REGDEF(XMM1, 1+XMMBASE, XMMMASK(1), "mm1" )
-REGDEF(XMM2, 2+XMMBASE, XMMMASK(2), "mm2" )
-REGDEF(XMM3, 3+XMMBASE, XMMMASK(3), "mm3" )
-REGDEF(XMM4, 4+XMMBASE, XMMMASK(4), "mm4" )
-REGDEF(XMM5, 5+XMMBASE, XMMMASK(5), "mm5" )
-REGDEF(XMM6, 6+XMMBASE, XMMMASK(6), "mm6" )
-REGDEF(XMM7, 7+XMMBASE, XMMMASK(7), "mm7" )
+REGDEF(XMM0, 0+XMMBASE, XMMMASK(0), "mm0", 1)
+REGDEF(XMM1, 1+XMMBASE, XMMMASK(1), "mm1", 1)
+REGDEF(XMM2, 2+XMMBASE, XMMMASK(2), "mm2", 1)
+REGDEF(XMM3, 3+XMMBASE, XMMMASK(3), "mm3", 1)
+REGDEF(XMM4, 4+XMMBASE, XMMMASK(4), "mm4", 1)
+REGDEF(XMM5, 5+XMMBASE, XMMMASK(5), "mm5", 1)
+REGDEF(XMM6, 6+XMMBASE, XMMMASK(6), "mm6", 1)
+REGDEF(XMM7, 7+XMMBASE, XMMMASK(7), "mm7", 1)
#ifdef TARGET_AMD64
-REGDEF(XMM8, 8+XMMBASE, XMMMASK(8), "mm8" )
-REGDEF(XMM9, 9+XMMBASE, XMMMASK(9), "mm9" )
-REGDEF(XMM10, 10+XMMBASE, XMMMASK(10), "mm10" )
-REGDEF(XMM11, 11+XMMBASE, XMMMASK(11), "mm11" )
-REGDEF(XMM12, 12+XMMBASE, XMMMASK(12), "mm12" )
-REGDEF(XMM13, 13+XMMBASE, XMMMASK(13), "mm13" )
-REGDEF(XMM14, 14+XMMBASE, XMMMASK(14), "mm14" )
-REGDEF(XMM15, 15+XMMBASE, XMMMASK(15), "mm15" )
-
-REGDEF(XMM16, 16+XMMBASE, XMMMASK(16), "mm16" )
-REGDEF(XMM17, 17+XMMBASE, XMMMASK(17), "mm17" )
-REGDEF(XMM18, 18+XMMBASE, XMMMASK(18), "mm18" )
-REGDEF(XMM19, 19+XMMBASE, XMMMASK(19), "mm19" )
-REGDEF(XMM20, 20+XMMBASE, XMMMASK(20), "mm20" )
-REGDEF(XMM21, 21+XMMBASE, XMMMASK(21), "mm21" )
-REGDEF(XMM22, 22+XMMBASE, XMMMASK(22), "mm22" )
-REGDEF(XMM23, 23+XMMBASE, XMMMASK(23), "mm23" )
-
-REGDEF(XMM24, 24+XMMBASE, XMMMASK(24), "mm24" )
-REGDEF(XMM25, 25+XMMBASE, XMMMASK(25), "mm25" )
-REGDEF(XMM26, 26+XMMBASE, XMMMASK(26), "mm26" )
-REGDEF(XMM27, 27+XMMBASE, XMMMASK(27), "mm27" )
-REGDEF(XMM28, 28+XMMBASE, XMMMASK(28), "mm28" )
-REGDEF(XMM29, 29+XMMBASE, XMMMASK(29), "mm29" )
-REGDEF(XMM30, 30+XMMBASE, XMMMASK(30), "mm30" )
-REGDEF(XMM31, 31+XMMBASE, XMMMASK(31), "mm31" )
+REGDEF(XMM8, 8+XMMBASE, XMMMASK(8), "mm8", 1)
+REGDEF(XMM9, 9+XMMBASE, XMMMASK(9), "mm9", 1)
+REGDEF(XMM10, 10+XMMBASE, XMMMASK(10), "mm10", 1)
+REGDEF(XMM11, 11+XMMBASE, XMMMASK(11), "mm11", 1)
+REGDEF(XMM12, 12+XMMBASE, XMMMASK(12), "mm12", 1)
+REGDEF(XMM13, 13+XMMBASE, XMMMASK(13), "mm13", 1)
+REGDEF(XMM14, 14+XMMBASE, XMMMASK(14), "mm14", 1)
+REGDEF(XMM15, 15+XMMBASE, XMMMASK(15), "mm15", 1)
+
+REGDEF(XMM16, 16+XMMBASE, XMMMASK(16), "mm16", 1)
+REGDEF(XMM17, 17+XMMBASE, XMMMASK(17), "mm17", 1)
+REGDEF(XMM18, 18+XMMBASE, XMMMASK(18), "mm18", 1)
+REGDEF(XMM19, 19+XMMBASE, XMMMASK(19), "mm19", 1)
+REGDEF(XMM20, 20+XMMBASE, XMMMASK(20), "mm20", 1)
+REGDEF(XMM21, 21+XMMBASE, XMMMASK(21), "mm21", 1)
+REGDEF(XMM22, 22+XMMBASE, XMMMASK(22), "mm22", 1)
+REGDEF(XMM23, 23+XMMBASE, XMMMASK(23), "mm23", 1)
+
+REGDEF(XMM24, 24+XMMBASE, XMMMASK(24), "mm24", 1)
+REGDEF(XMM25, 25+XMMBASE, XMMMASK(25), "mm25", 1)
+REGDEF(XMM26, 26+XMMBASE, XMMMASK(26), "mm26", 1)
+REGDEF(XMM27, 27+XMMBASE, XMMMASK(27), "mm27", 1)
+REGDEF(XMM28, 28+XMMBASE, XMMMASK(28), "mm28", 1)
+REGDEF(XMM29, 29+XMMBASE, XMMMASK(29), "mm29", 1)
+REGDEF(XMM30, 30+XMMBASE, XMMMASK(30), "mm30", 1)
+REGDEF(XMM31, 31+XMMBASE, XMMMASK(31), "mm31", 1)
#endif // !TARGET_AMD64
-REGDEF(K0, 0+KBASE, KMASK(0), "k0" )
-REGDEF(K1, 1+KBASE, KMASK(1), "k1" )
-REGDEF(K2, 2+KBASE, KMASK(2), "k2" )
-REGDEF(K3, 3+KBASE, KMASK(3), "k3" )
-REGDEF(K4, 4+KBASE, KMASK(4), "k4" )
-REGDEF(K5, 5+KBASE, KMASK(5), "k5" )
-REGDEF(K6, 6+KBASE, KMASK(6), "k6" )
-REGDEF(K7, 7+KBASE, KMASK(7), "k7" )
+REGDEF(K0, 0+KBASE, KMASK(0), "k0", 2)
+REGDEF(K1, 1+KBASE, KMASK(1), "k1", 2)
+REGDEF(K2, 2+KBASE, KMASK(2), "k2", 2)
+REGDEF(K3, 3+KBASE, KMASK(3), "k3", 2)
+REGDEF(K4, 4+KBASE, KMASK(4), "k4", 2)
+REGDEF(K5, 5+KBASE, KMASK(5), "k5", 2)
+REGDEF(K6, 6+KBASE, KMASK(6), "k6", 2)
+REGDEF(K7, 7+KBASE, KMASK(7), "k7", 2)
-REGDEF(STK, 8+KBASE, 0x0000, "STK" )
+REGDEF(STK, 8+KBASE, 0x0000, "STK", 3)
#elif defined(TARGET_ARM)
#include "registerarm.h"
diff --git a/src/coreclr/jit/registerargconvention.cpp b/src/coreclr/jit/registerargconvention.cpp
index f58388a39672c1..5321483f2739a1 100644
--- a/src/coreclr/jit/registerargconvention.cpp
+++ b/src/coreclr/jit/registerargconvention.cpp
@@ -28,7 +28,7 @@ unsigned InitVarDscInfo::allocRegArg(var_types type, unsigned numRegs /* = 1 */)
// can't create a > 1 register alignment hole to back-fill.
// Back-fill the register
- regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
+ regMaskFloat backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
fltArgSkippedRegMask &= ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask
resultArgNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask));
assert(resultArgNum < MAX_FLOAT_REG_ARG);
diff --git a/src/coreclr/jit/registerargconvention.h b/src/coreclr/jit/registerargconvention.h
index 840f7adc4fcebd..897e019244dffa 100644
--- a/src/coreclr/jit/registerargconvention.h
+++ b/src/coreclr/jit/registerargconvention.h
@@ -21,8 +21,8 @@ struct InitVarDscInfo
#ifdef TARGET_ARM
// Support back-filling of FP parameters. This is similar to code in gtMorphArgs() that
// handles arguments.
- regMaskTP fltArgSkippedRegMask;
- bool anyFloatStackArgs;
+ regMaskFloat fltArgSkippedRegMask;
+ bool anyFloatStackArgs;
#endif // TARGET_ARM
#if defined(TARGET_ARM) || defined(TARGET_RISCV64)
diff --git a/src/coreclr/jit/registerarm.h b/src/coreclr/jit/registerarm.h
index ad70eaa211ccf6..c8e4e06947d4fc 100644
--- a/src/coreclr/jit/registerarm.h
+++ b/src/coreclr/jit/registerarm.h
@@ -13,59 +13,59 @@
#endif
/*
-REGDEF(name, rnum, mask, sname) */
-REGDEF(R0, 0, 0x0001, "r0" )
-REGDEF(R1, 1, 0x0002, "r1" )
-REGDEF(R2, 2, 0x0004, "r2" )
-REGDEF(R3, 3, 0x0008, "r3" )
-REGDEF(R4, 4, 0x0010, "r4" )
-REGDEF(R5, 5, 0x0020, "r5" )
-REGDEF(R6, 6, 0x0040, "r6" )
-REGDEF(R7, 7, 0x0080, "r7" )
-REGDEF(R8, 8, 0x0100, "r8" )
-REGDEF(R9, 9, 0x0200, "r9" )
-REGDEF(R10, 10, 0x0400, "r10" )
-REGDEF(R11, 11, 0x0800, "r11" )
-REGDEF(R12, 12, 0x1000, "r12" )
-REGDEF(SP, 13, 0x2000, "sp" )
-REGDEF(LR, 14, 0x4000, "lr" )
-REGDEF(PC, 15, 0x8000, "pc" )
+REGDEF(name, rnum, mask, sname, regTypeTag) */
+REGDEF(R0, 0, 0x0001, "r0", 0)
+REGDEF(R1, 1, 0x0002, "r1", 0)
+REGDEF(R2, 2, 0x0004, "r2", 0)
+REGDEF(R3, 3, 0x0008, "r3", 0)
+REGDEF(R4, 4, 0x0010, "r4", 0)
+REGDEF(R5, 5, 0x0020, "r5", 0)
+REGDEF(R6, 6, 0x0040, "r6", 0)
+REGDEF(R7, 7, 0x0080, "r7", 0)
+REGDEF(R8, 8, 0x0100, "r8", 0)
+REGDEF(R9, 9, 0x0200, "r9", 0)
+REGDEF(R10, 10, 0x0400, "r10",0)
+REGDEF(R11, 11, 0x0800, "r11",0)
+REGDEF(R12, 12, 0x1000, "r12",0)
+REGDEF(SP, 13, 0x2000, "sp", 0)
+REGDEF(LR, 14, 0x4000, "lr", 0)
+REGDEF(PC, 15, 0x8000, "pc", 0)
#define FPBASE 16
#define VFPMASK(x) (((__int64)1) << (x+FPBASE))
-REGDEF(F0, 0+FPBASE, VFPMASK(0), "f0")
-REGDEF(F1, 1+FPBASE, VFPMASK(1), "f1")
-REGDEF(F2, 2+FPBASE, VFPMASK(2), "f2")
-REGDEF(F3, 3+FPBASE, VFPMASK(3), "f3")
-REGDEF(F4, 4+FPBASE, VFPMASK(4), "f4")
-REGDEF(F5, 5+FPBASE, VFPMASK(5), "f5")
-REGDEF(F6, 6+FPBASE, VFPMASK(6), "f6")
-REGDEF(F7, 7+FPBASE, VFPMASK(7), "f7")
-REGDEF(F8, 8+FPBASE, VFPMASK(8), "f8")
-REGDEF(F9, 9+FPBASE, VFPMASK(9), "f9")
-REGDEF(F10, 10+FPBASE, VFPMASK(10), "f10")
-REGDEF(F11, 11+FPBASE, VFPMASK(11), "f11")
-REGDEF(F12, 12+FPBASE, VFPMASK(12), "f12")
-REGDEF(F13, 13+FPBASE, VFPMASK(13), "f13")
-REGDEF(F14, 14+FPBASE, VFPMASK(14), "f14")
-REGDEF(F15, 15+FPBASE, VFPMASK(15), "f15")
-REGDEF(F16, 16+FPBASE, VFPMASK(16), "f16")
-REGDEF(F17, 17+FPBASE, VFPMASK(17), "f17")
-REGDEF(F18, 18+FPBASE, VFPMASK(18), "f18")
-REGDEF(F19, 19+FPBASE, VFPMASK(19), "f19")
-REGDEF(F20, 20+FPBASE, VFPMASK(20), "f20")
-REGDEF(F21, 21+FPBASE, VFPMASK(21), "f21")
-REGDEF(F22, 22+FPBASE, VFPMASK(22), "f22")
-REGDEF(F23, 23+FPBASE, VFPMASK(23), "f23")
-REGDEF(F24, 24+FPBASE, VFPMASK(24), "f24")
-REGDEF(F25, 25+FPBASE, VFPMASK(25), "f25")
-REGDEF(F26, 26+FPBASE, VFPMASK(26), "f26")
-REGDEF(F27, 27+FPBASE, VFPMASK(27), "f27")
-REGDEF(F28, 28+FPBASE, VFPMASK(28), "f28")
-REGDEF(F29, 29+FPBASE, VFPMASK(29), "f29")
-REGDEF(F30, 30+FPBASE, VFPMASK(30), "f30")
-REGDEF(F31, 31+FPBASE, VFPMASK(31), "f31")
+REGDEF(F0, 0+FPBASE, VFPMASK(0), "f0", 1)
+REGDEF(F1, 1+FPBASE, VFPMASK(1), "f1", 1)
+REGDEF(F2, 2+FPBASE, VFPMASK(2), "f2", 1)
+REGDEF(F3, 3+FPBASE, VFPMASK(3), "f3", 1)
+REGDEF(F4, 4+FPBASE, VFPMASK(4), "f4", 1)
+REGDEF(F5, 5+FPBASE, VFPMASK(5), "f5", 1)
+REGDEF(F6, 6+FPBASE, VFPMASK(6), "f6", 1)
+REGDEF(F7, 7+FPBASE, VFPMASK(7), "f7", 1)
+REGDEF(F8, 8+FPBASE, VFPMASK(8), "f8", 1)
+REGDEF(F9, 9+FPBASE, VFPMASK(9), "f9", 1)
+REGDEF(F10, 10+FPBASE, VFPMASK(10), "f10", 1)
+REGDEF(F11, 11+FPBASE, VFPMASK(11), "f11", 1)
+REGDEF(F12, 12+FPBASE, VFPMASK(12), "f12", 1)
+REGDEF(F13, 13+FPBASE, VFPMASK(13), "f13", 1)
+REGDEF(F14, 14+FPBASE, VFPMASK(14), "f14", 1)
+REGDEF(F15, 15+FPBASE, VFPMASK(15), "f15", 1)
+REGDEF(F16, 16+FPBASE, VFPMASK(16), "f16", 1)
+REGDEF(F17, 17+FPBASE, VFPMASK(17), "f17", 1)
+REGDEF(F18, 18+FPBASE, VFPMASK(18), "f18", 1)
+REGDEF(F19, 19+FPBASE, VFPMASK(19), "f19", 1)
+REGDEF(F20, 20+FPBASE, VFPMASK(20), "f20", 1)
+REGDEF(F21, 21+FPBASE, VFPMASK(21), "f21", 1)
+REGDEF(F22, 22+FPBASE, VFPMASK(22), "f22", 1)
+REGDEF(F23, 23+FPBASE, VFPMASK(23), "f23", 1)
+REGDEF(F24, 24+FPBASE, VFPMASK(24), "f24", 1)
+REGDEF(F25, 25+FPBASE, VFPMASK(25), "f25", 1)
+REGDEF(F26, 26+FPBASE, VFPMASK(26), "f26", 1)
+REGDEF(F27, 27+FPBASE, VFPMASK(27), "f27", 1)
+REGDEF(F28, 28+FPBASE, VFPMASK(28), "f28", 1)
+REGDEF(F29, 29+FPBASE, VFPMASK(29), "f29", 1)
+REGDEF(F30, 30+FPBASE, VFPMASK(30), "f30", 1)
+REGDEF(F31, 31+FPBASE, VFPMASK(31), "f31", 1)
// Allow us to call R11/FP, SP, LR and PC by their register number names
@@ -75,7 +75,7 @@ REGALIAS(R14, LR)
REGALIAS(R15, PC)
// This must be last!
-REGDEF(STK, 32+FPBASE, 0x0000, "STK")
+REGDEF(STK, 32+FPBASE, 0x0000, "STK", 2)
/*****************************************************************************/
#undef REGDEF
diff --git a/src/coreclr/jit/registerarm64.h b/src/coreclr/jit/registerarm64.h
index e8c126fac148cb..e754add488404b 100644
--- a/src/coreclr/jit/registerarm64.h
+++ b/src/coreclr/jit/registerarm64.h
@@ -15,39 +15,39 @@
#define RMASK(x) (1ULL << (x))
/*
-REGDEF(name, rnum, mask, xname, wname) */
-REGDEF(R0, 0, 0x0001, "x0" , "w0" )
-REGDEF(R1, 1, 0x0002, "x1" , "w1" )
-REGDEF(R2, 2, 0x0004, "x2" , "w2" )
-REGDEF(R3, 3, 0x0008, "x3" , "w3" )
-REGDEF(R4, 4, 0x0010, "x4" , "w4" )
-REGDEF(R5, 5, 0x0020, "x5" , "w5" )
-REGDEF(R6, 6, 0x0040, "x6" , "w6" )
-REGDEF(R7, 7, 0x0080, "x7" , "w7" )
-REGDEF(R8, 8, 0x0100, "x8" , "w8" )
-REGDEF(R9, 9, 0x0200, "x9" , "w9" )
-REGDEF(R10, 10, 0x0400, "x10", "w10" )
-REGDEF(R11, 11, 0x0800, "x11", "w11" )
-REGDEF(R12, 12, 0x1000, "x12", "w12" )
-REGDEF(R13, 13, 0x2000, "x13", "w13" )
-REGDEF(R14, 14, 0x4000, "x14", "w14" )
-REGDEF(R15, 15, 0x8000, "x15", "w15" )
-REGDEF(IP0, 16, 0x10000, "xip0","wip0" )
-REGDEF(IP1, 17, 0x20000, "xip1","wip1" )
-REGDEF(PR, 18, 0x40000, "xpr", "wpr" )
-REGDEF(R19, 19, 0x80000, "x19", "w19" )
-REGDEF(R20, 20, 0x100000, "x20", "w20" )
-REGDEF(R21, 21, 0x200000, "x21", "w21" )
-REGDEF(R22, 22, 0x400000, "x22", "w22" )
-REGDEF(R23, 23, 0x800000, "x23", "w23" )
-REGDEF(R24, 24, 0x1000000, "x24", "w24" )
-REGDEF(R25, 25, 0x2000000, "x25", "w25" )
-REGDEF(R26, 26, 0x4000000, "x26", "w26" )
-REGDEF(R27, 27, 0x8000000, "x27", "w27" )
-REGDEF(R28, 28, 0x10000000, "x28", "w28" )
-REGDEF(FP, 29, 0x20000000, "fp" , "w29" )
-REGDEF(LR, 30, 0x40000000, "lr" , "w30" )
-REGDEF(ZR, 31, 0x80000000, "xzr", "wzr" )
+REGDEF(name, rnum, mask, xname, wname, regTypeTag) */
+REGDEF(R0, 0, 0x0001, "x0" , "w0", 0)
+REGDEF(R1, 1, 0x0002, "x1" , "w1", 0)
+REGDEF(R2, 2, 0x0004, "x2" , "w2", 0)
+REGDEF(R3, 3, 0x0008, "x3" , "w3", 0)
+REGDEF(R4, 4, 0x0010, "x4" , "w4", 0)
+REGDEF(R5, 5, 0x0020, "x5" , "w5", 0)
+REGDEF(R6, 6, 0x0040, "x6" , "w6", 0)
+REGDEF(R7, 7, 0x0080, "x7" , "w7", 0)
+REGDEF(R8, 8, 0x0100, "x8" , "w8", 0)
+REGDEF(R9, 9, 0x0200, "x9" , "w9", 0)
+REGDEF(R10, 10, 0x0400, "x10", "w10", 0)
+REGDEF(R11, 11, 0x0800, "x11", "w11", 0)
+REGDEF(R12, 12, 0x1000, "x12", "w12", 0)
+REGDEF(R13, 13, 0x2000, "x13", "w13", 0)
+REGDEF(R14, 14, 0x4000, "x14", "w14", 0)
+REGDEF(R15, 15, 0x8000, "x15", "w15", 0)
+REGDEF(IP0, 16, 0x10000, "xip0","wip0",0)
+REGDEF(IP1, 17, 0x20000, "xip1","wip1",0)
+REGDEF(PR, 18, 0x40000, "xpr", "wpr", 0)
+REGDEF(R19, 19, 0x80000, "x19", "w19", 0)
+REGDEF(R20, 20, 0x100000, "x20", "w20", 0)
+REGDEF(R21, 21, 0x200000, "x21", "w21", 0)
+REGDEF(R22, 22, 0x400000, "x22", "w22", 0)
+REGDEF(R23, 23, 0x800000, "x23", "w23", 0)
+REGDEF(R24, 24, 0x1000000, "x24", "w24", 0)
+REGDEF(R25, 25, 0x2000000, "x25", "w25", 0)
+REGDEF(R26, 26, 0x4000000, "x26", "w26", 0)
+REGDEF(R27, 27, 0x8000000, "x27", "w27", 0)
+REGDEF(R28, 28, 0x10000000, "x28", "w28", 0)
+REGDEF(FP, 29, 0x20000000, "fp" , "w29", 0)
+REGDEF(LR, 30, 0x40000000, "lr" , "w30", 0)
+REGDEF(ZR, 31, 0x80000000, "xzr", "wzr", 0)
// Allow us to call IP0,IP1,PR,FP,LR by their register number names
REGALIAS(R16, IP0)
@@ -60,66 +60,69 @@ REGALIAS(R30, LR)
#define VMASK(x) (1ULL << (VBASE+(x)))
/*
-REGDEF(name, rnum, mask, xname, wname) */
-REGDEF(V0, 0+VBASE, VMASK(0), "d0", "s0")
-REGDEF(V1, 1+VBASE, VMASK(1), "d1", "s1")
-REGDEF(V2, 2+VBASE, VMASK(2), "d2", "s2")
-REGDEF(V3, 3+VBASE, VMASK(3), "d3", "s3")
-REGDEF(V4, 4+VBASE, VMASK(4), "d4", "s4")
-REGDEF(V5, 5+VBASE, VMASK(5), "d5", "s5")
-REGDEF(V6, 6+VBASE, VMASK(6), "d6", "s6")
-REGDEF(V7, 7+VBASE, VMASK(7), "d7", "s7")
-REGDEF(V8, 8+VBASE, VMASK(8), "d8", "s8")
-REGDEF(V9, 9+VBASE, VMASK(9), "d9", "s9")
-REGDEF(V10, 10+VBASE, VMASK(10), "d10", "s10")
-REGDEF(V11, 11+VBASE, VMASK(11), "d11", "s11")
-REGDEF(V12, 12+VBASE, VMASK(12), "d12", "s12")
-REGDEF(V13, 13+VBASE, VMASK(13), "d13", "s13")
-REGDEF(V14, 14+VBASE, VMASK(14), "d14", "s14")
-REGDEF(V15, 15+VBASE, VMASK(15), "d15", "s15")
-REGDEF(V16, 16+VBASE, VMASK(16), "d16", "s16")
-REGDEF(V17, 17+VBASE, VMASK(17), "d17", "s17")
-REGDEF(V18, 18+VBASE, VMASK(18), "d18", "s18")
-REGDEF(V19, 19+VBASE, VMASK(19), "d19", "s19")
-REGDEF(V20, 20+VBASE, VMASK(20), "d20", "s20")
-REGDEF(V21, 21+VBASE, VMASK(21), "d21", "s21")
-REGDEF(V22, 22+VBASE, VMASK(22), "d22", "s22")
-REGDEF(V23, 23+VBASE, VMASK(23), "d23", "s23")
-REGDEF(V24, 24+VBASE, VMASK(24), "d24", "s24")
-REGDEF(V25, 25+VBASE, VMASK(25), "d25", "s25")
-REGDEF(V26, 26+VBASE, VMASK(26), "d26", "s26")
-REGDEF(V27, 27+VBASE, VMASK(27), "d27", "s27")
-REGDEF(V28, 28+VBASE, VMASK(28), "d28", "s28")
-REGDEF(V29, 29+VBASE, VMASK(29), "d29", "s29")
-REGDEF(V30, 30+VBASE, VMASK(30), "d30", "s30")
-REGDEF(V31, 31+VBASE, VMASK(31), "d31", "s31")
+REGDEF(name, rnum, mask, xname, wname, regTypeTag) */
+REGDEF(V0, 0+VBASE, VMASK(0), "d0", "s0", 1)
+REGDEF(V1, 1+VBASE, VMASK(1), "d1", "s1", 1)
+REGDEF(V2, 2+VBASE, VMASK(2), "d2", "s2", 1)
+REGDEF(V3, 3+VBASE, VMASK(3), "d3", "s3", 1)
+REGDEF(V4, 4+VBASE, VMASK(4), "d4", "s4", 1)
+REGDEF(V5, 5+VBASE, VMASK(5), "d5", "s5", 1)
+REGDEF(V6, 6+VBASE, VMASK(6), "d6", "s6", 1)
+REGDEF(V7, 7+VBASE, VMASK(7), "d7", "s7", 1)
+REGDEF(V8, 8+VBASE, VMASK(8), "d8", "s8", 1)
+REGDEF(V9, 9+VBASE, VMASK(9), "d9", "s9", 1)
+REGDEF(V10, 10+VBASE, VMASK(10), "d10", "s10", 1)
+REGDEF(V11, 11+VBASE, VMASK(11), "d11", "s11", 1)
+REGDEF(V12, 12+VBASE, VMASK(12), "d12", "s12", 1)
+REGDEF(V13, 13+VBASE, VMASK(13), "d13", "s13", 1)
+REGDEF(V14, 14+VBASE, VMASK(14), "d14", "s14", 1)
+REGDEF(V15, 15+VBASE, VMASK(15), "d15", "s15", 1)
+REGDEF(V16, 16+VBASE, VMASK(16), "d16", "s16", 1)
+REGDEF(V17, 17+VBASE, VMASK(17), "d17", "s17", 1)
+REGDEF(V18, 18+VBASE, VMASK(18), "d18", "s18", 1)
+REGDEF(V19, 19+VBASE, VMASK(19), "d19", "s19", 1)
+REGDEF(V20, 20+VBASE, VMASK(20), "d20", "s20", 1)
+REGDEF(V21, 21+VBASE, VMASK(21), "d21", "s21", 1)
+REGDEF(V22, 22+VBASE, VMASK(22), "d22", "s22", 1)
+REGDEF(V23, 23+VBASE, VMASK(23), "d23", "s23", 1)
+REGDEF(V24, 24+VBASE, VMASK(24), "d24", "s24", 1)
+REGDEF(V25, 25+VBASE, VMASK(25), "d25", "s25", 1)
+REGDEF(V26, 26+VBASE, VMASK(26), "d26", "s26", 1)
+REGDEF(V27, 27+VBASE, VMASK(27), "d27", "s27", 1)
+REGDEF(V28, 28+VBASE, VMASK(28), "d28", "s28", 1)
+REGDEF(V29, 29+VBASE, VMASK(29), "d29", "s29", 1)
+REGDEF(V30, 30+VBASE, VMASK(30), "d30", "s30", 1)
+REGDEF(V31, 31+VBASE, VMASK(31), "d31", "s31", 1)
-// TODO-SVE: Fix once we add predicate registers
-REGALIAS(P0, V0)
-REGALIAS(P1, V1)
-REGALIAS(P2, V2)
-REGALIAS(P3, V3)
-REGALIAS(P4, V4)
-REGALIAS(P5, V5)
-REGALIAS(P6, V6)
-REGALIAS(P7, V7)
-REGALIAS(P8, V8)
-REGALIAS(P9, V9)
-REGALIAS(P10, V10)
-REGALIAS(P11, V11)
-REGALIAS(P12, V12)
-REGALIAS(P13, V13)
-REGALIAS(P14, V14)
-REGALIAS(P15, V15)
+#define PBASE 64
+#define PMASK(x) (1ULL << x)
+/*
+REGDEF(name, rnum, mask, xname, wname, regTypeTag) */
+REGDEF(P0, 0+PBASE, PMASK(0), "p0" , "na", 2)
+REGDEF(P1, 1+PBASE, PMASK(1), "p1" , "na", 2)
+REGDEF(P2, 2+PBASE, PMASK(2), "p2" , "na", 2)
+REGDEF(P3, 3+PBASE, PMASK(3), "p3" , "na", 2)
+REGDEF(P4, 4+PBASE, PMASK(4), "p4" , "na", 2)
+REGDEF(P5, 5+PBASE, PMASK(5), "p5" , "na", 2)
+REGDEF(P6, 6+PBASE, PMASK(6), "p6" , "na", 2)
+REGDEF(P7, 7+PBASE, PMASK(7), "p7" , "na", 2)
+REGDEF(P8, 8+PBASE, PMASK(8), "p8" , "na", 2)
+REGDEF(P9, 9+PBASE, PMASK(9), "p9" , "na", 2)
+REGDEF(P10, 10+PBASE, PMASK(10), "p10", "na", 2)
+REGDEF(P11, 11+PBASE, PMASK(11), "p11", "na", 2)
+REGDEF(P12, 12+PBASE, PMASK(12), "p12", "na", 2)
+REGDEF(P13, 13+PBASE, PMASK(13), "p13", "na", 2)
+REGDEF(P14, 14+PBASE, PMASK(14), "p14", "na", 2)
+REGDEF(P15, 15+PBASE, PMASK(15), "p15", "na", 2)
-// The registers with values 64 (NBASE) and above are not real register numbers
-#define NBASE 64
+// The registers with values 80 (NBASE) and above are not real register numbers
+#define NBASE 80
-REGDEF(SP, 0+NBASE, 0x0000, "sp", "wsp?")
+REGDEF(SP, 0+NBASE, 0x0000, "sp", "wsp?", 3)
// This must be last!
-REGDEF(STK, 1+NBASE, 0x0000, "STK", "STK")
+REGDEF(STK, 1+NBASE, 0x0000, "STK", "STK", 3)
/*****************************************************************************/
#undef RMASK
diff --git a/src/coreclr/jit/registerloongarch64.h b/src/coreclr/jit/registerloongarch64.h
index 8f3cd157016bb2..288bb020a93ad0 100644
--- a/src/coreclr/jit/registerloongarch64.h
+++ b/src/coreclr/jit/registerloongarch64.h
@@ -15,39 +15,39 @@
#define RMASK(x) (1ULL << (x))
/*
-REGDEF(name, rnum, mask, sname) */
-REGDEF(R0, 0, 0x0001, "zero")
-REGDEF(RA, 1, 0x0002, "ra" )
-REGDEF(TP, 2, 0x0004, "tp" )
-REGDEF(SP, 3, 0x0008, "sp" )
-REGDEF(A0, 4, 0x0010, "a0" )
-REGDEF(A1, 5, 0x0020, "a1" )
-REGDEF(A2, 6, 0x0040, "a2" )
-REGDEF(A3, 7, 0x0080, "a3" )
-REGDEF(A4, 8, 0x0100, "a4" )
-REGDEF(A5, 9, 0x0200, "a5" )
-REGDEF(A6, 10, 0x0400, "a6" )
-REGDEF(A7, 11, 0x0800, "a7" )
-REGDEF(T0, 12, 0x1000, "t0" )
-REGDEF(T1, 13, 0x2000, "t1" )
-REGDEF(T2, 14, 0x4000, "t2" )
-REGDEF(T3, 15, 0x8000, "t3" )
-REGDEF(T4, 16, 0x10000, "t4" )
-REGDEF(T5, 17, 0x20000, "t5" )
-REGDEF(T6, 18, 0x40000, "t6" )
-REGDEF(T7, 19, 0x80000, "t7" )
-REGDEF(T8, 20, 0x100000, "t8" )
-REGDEF(X0, 21, 0x200000, "x0" )
-REGDEF(FP, 22, 0x400000, "fp" )
-REGDEF(S0, 23, 0x800000, "s0" )
-REGDEF(S1, 24, 0x1000000, "s1" )
-REGDEF(S2, 25, 0x2000000, "s2" )
-REGDEF(S3, 26, 0x4000000, "s3" )
-REGDEF(S4, 27, 0x8000000, "s4" )
-REGDEF(S5, 28, 0x10000000, "s5" )
-REGDEF(S6, 29, 0x20000000, "s6" )
-REGDEF(S7, 30, 0x40000000, "s7" )
-REGDEF(S8, 31, 0x80000000, "s8" )
+REGDEF(name, rnum, mask, sname, regTypeTag) */
+REGDEF(R0, 0, 0x0001, "zero", 0)
+REGDEF(RA, 1, 0x0002, "ra" , 0)
+REGDEF(TP, 2, 0x0004, "tp" , 0)
+REGDEF(SP, 3, 0x0008, "sp" , 0)
+REGDEF(A0, 4, 0x0010, "a0" , 0)
+REGDEF(A1, 5, 0x0020, "a1" , 0)
+REGDEF(A2, 6, 0x0040, "a2" , 0)
+REGDEF(A3, 7, 0x0080, "a3" , 0)
+REGDEF(A4, 8, 0x0100, "a4" , 0)
+REGDEF(A5, 9, 0x0200, "a5" , 0)
+REGDEF(A6, 10, 0x0400, "a6" , 0)
+REGDEF(A7, 11, 0x0800, "a7" , 0)
+REGDEF(T0, 12, 0x1000, "t0" , 0)
+REGDEF(T1, 13, 0x2000, "t1" , 0)
+REGDEF(T2, 14, 0x4000, "t2" , 0)
+REGDEF(T3, 15, 0x8000, "t3" , 0)
+REGDEF(T4, 16, 0x10000, "t4" , 0)
+REGDEF(T5, 17, 0x20000, "t5" , 0)
+REGDEF(T6, 18, 0x40000, "t6" , 0)
+REGDEF(T7, 19, 0x80000, "t7" , 0)
+REGDEF(T8, 20, 0x100000, "t8" , 0)
+REGDEF(X0, 21, 0x200000, "x0" , 0)
+REGDEF(FP, 22, 0x400000, "fp" , 0)
+REGDEF(S0, 23, 0x800000, "s0" , 0)
+REGDEF(S1, 24, 0x1000000, "s1" , 0)
+REGDEF(S2, 25, 0x2000000, "s2" , 0)
+REGDEF(S3, 26, 0x4000000, "s3" , 0)
+REGDEF(S4, 27, 0x8000000, "s4" , 0)
+REGDEF(S5, 28, 0x10000000, "s5" , 0)
+REGDEF(S6, 29, 0x20000000, "s6" , 0)
+REGDEF(S7, 30, 0x40000000, "s7" , 0)
+REGDEF(S8, 31, 0x80000000, "s8" , 0)
//NOTE for LoongArch64:
// The `REG_R21` which alias `REG_X0` is specially reserved !!!
@@ -64,44 +64,44 @@ REGALIAS(R21, X0)
/*
REGDEF(name, rnum, mask, sname) */
-REGDEF(F0, 0+FBASE, FMASK(0), "f0")
-REGDEF(F1, 1+FBASE, FMASK(1), "f1")
-REGDEF(F2, 2+FBASE, FMASK(2), "f2")
-REGDEF(F3, 3+FBASE, FMASK(3), "f3")
-REGDEF(F4, 4+FBASE, FMASK(4), "f4")
-REGDEF(F5, 5+FBASE, FMASK(5), "f5")
-REGDEF(F6, 6+FBASE, FMASK(6), "f6")
-REGDEF(F7, 7+FBASE, FMASK(7), "f7")
-REGDEF(F8, 8+FBASE, FMASK(8), "f8")
-REGDEF(F9, 9+FBASE, FMASK(9), "f9")
-REGDEF(F10, 10+FBASE, FMASK(10), "f10")
-REGDEF(F11, 11+FBASE, FMASK(11), "f11")
-REGDEF(F12, 12+FBASE, FMASK(12), "f12")
-REGDEF(F13, 13+FBASE, FMASK(13), "f13")
-REGDEF(F14, 14+FBASE, FMASK(14), "f14")
-REGDEF(F15, 15+FBASE, FMASK(15), "f15")
-REGDEF(F16, 16+FBASE, FMASK(16), "f16")
-REGDEF(F17, 17+FBASE, FMASK(17), "f17")
-REGDEF(F18, 18+FBASE, FMASK(18), "f18")
-REGDEF(F19, 19+FBASE, FMASK(19), "f19")
-REGDEF(F20, 20+FBASE, FMASK(20), "f20")
-REGDEF(F21, 21+FBASE, FMASK(21), "f21")
-REGDEF(F22, 22+FBASE, FMASK(22), "f22")
-REGDEF(F23, 23+FBASE, FMASK(23), "f23")
-REGDEF(F24, 24+FBASE, FMASK(24), "f24")
-REGDEF(F25, 25+FBASE, FMASK(25), "f25")
-REGDEF(F26, 26+FBASE, FMASK(26), "f26")
-REGDEF(F27, 27+FBASE, FMASK(27), "f27")
-REGDEF(F28, 28+FBASE, FMASK(28), "f28")
-REGDEF(F29, 29+FBASE, FMASK(29), "f29")
-REGDEF(F30, 30+FBASE, FMASK(30), "f30")
-REGDEF(F31, 31+FBASE, FMASK(31), "f31")
+REGDEF(F0, 0+FBASE, FMASK(0), "f0", 1)
+REGDEF(F1, 1+FBASE, FMASK(1), "f1", 1)
+REGDEF(F2, 2+FBASE, FMASK(2), "f2", 1)
+REGDEF(F3, 3+FBASE, FMASK(3), "f3", 1)
+REGDEF(F4, 4+FBASE, FMASK(4), "f4", 1)
+REGDEF(F5, 5+FBASE, FMASK(5), "f5", 1)
+REGDEF(F6, 6+FBASE, FMASK(6), "f6", 1)
+REGDEF(F7, 7+FBASE, FMASK(7), "f7", 1)
+REGDEF(F8, 8+FBASE, FMASK(8), "f8", 1)
+REGDEF(F9, 9+FBASE, FMASK(9), "f9", 1)
+REGDEF(F10, 10+FBASE, FMASK(10), "f10", 1)
+REGDEF(F11, 11+FBASE, FMASK(11), "f11", 1)
+REGDEF(F12, 12+FBASE, FMASK(12), "f12", 1)
+REGDEF(F13, 13+FBASE, FMASK(13), "f13", 1)
+REGDEF(F14, 14+FBASE, FMASK(14), "f14", 1)
+REGDEF(F15, 15+FBASE, FMASK(15), "f15", 1)
+REGDEF(F16, 16+FBASE, FMASK(16), "f16", 1)
+REGDEF(F17, 17+FBASE, FMASK(17), "f17", 1)
+REGDEF(F18, 18+FBASE, FMASK(18), "f18", 1)
+REGDEF(F19, 19+FBASE, FMASK(19), "f19", 1)
+REGDEF(F20, 20+FBASE, FMASK(20), "f20", 1)
+REGDEF(F21, 21+FBASE, FMASK(21), "f21", 1)
+REGDEF(F22, 22+FBASE, FMASK(22), "f22", 1)
+REGDEF(F23, 23+FBASE, FMASK(23), "f23", 1)
+REGDEF(F24, 24+FBASE, FMASK(24), "f24", 1)
+REGDEF(F25, 25+FBASE, FMASK(25), "f25", 1)
+REGDEF(F26, 26+FBASE, FMASK(26), "f26", 1)
+REGDEF(F27, 27+FBASE, FMASK(27), "f27", 1)
+REGDEF(F28, 28+FBASE, FMASK(28), "f28", 1)
+REGDEF(F29, 29+FBASE, FMASK(29), "f29", 1)
+REGDEF(F30, 30+FBASE, FMASK(30), "f30", 1)
+REGDEF(F31, 31+FBASE, FMASK(31), "f31", 1)
// The registers with values 64 (NBASE) and above are not real register numbers
#define NBASE 64
// This must be last!
-REGDEF(STK, 0+NBASE, 0x0000, "STK")
+REGDEF(STK, 0+NBASE, 0x0000, "STK", 2)
/*****************************************************************************/
#undef RMASK
diff --git a/src/coreclr/jit/registerriscv64.h b/src/coreclr/jit/registerriscv64.h
index fe6d3cf8ece424..2522fd3f5f9e91 100644
--- a/src/coreclr/jit/registerriscv64.h
+++ b/src/coreclr/jit/registerriscv64.h
@@ -15,39 +15,39 @@
#define RMASK(x) (1ULL << (x))
/*
-REGDEF(name, rnum, mask, sname) */
-REGDEF(R0, 0, 0x0001, "zero")
-REGDEF(RA, 1, 0x0002, "ra" )
-REGDEF(SP, 2, 0x0004, "sp" )
-REGDEF(GP, 3, 0x0008, "gp" )
-REGDEF(TP, 4, 0x0010, "tp" )
-REGDEF(T0, 5, 0x0020, "t0" )
-REGDEF(T1, 6, 0x0040, "t1" )
-REGDEF(T2, 7, 0x0080, "t2" )
-REGDEF(FP, 8, 0x0100, "fp" )
-REGDEF(S1, 9, 0x0200, "s1" )
-REGDEF(A0, 10, 0x0400, "a0" )
-REGDEF(A1, 11, 0x0800, "a1" )
-REGDEF(A2, 12, 0x1000, "a2" )
-REGDEF(A3, 13, 0x2000, "a3" )
-REGDEF(A4, 14, 0x4000, "a4" )
-REGDEF(A5, 15, 0x8000, "a5" )
-REGDEF(A6, 16, 0x10000, "a6" )
-REGDEF(A7, 17, 0x20000, "a7" )
-REGDEF(S2, 18, 0x40000, "s2" )
-REGDEF(S3, 19, 0x80000, "s3" )
-REGDEF(S4, 20, 0x100000, "s4" )
-REGDEF(S5, 21, 0x200000, "s5" )
-REGDEF(S6, 22, 0x400000, "s6" )
-REGDEF(S7, 23, 0x800000, "s7" )
-REGDEF(S8, 24, 0x1000000, "s8" )
-REGDEF(S9, 25, 0x2000000, "s9" )
-REGDEF(S10, 26, 0x4000000, "s10" )
-REGDEF(S11, 27, 0x8000000, "s11" )
-REGDEF(T3, 28, 0x10000000, "t3" )
-REGDEF(T4, 29, 0x20000000, "t4" )
-REGDEF(T5, 30, 0x40000000, "t5" )
-REGDEF(T6, 31, 0x80000000, "t6" )
+REGDEF(name, rnum, mask, sname, regTypeTag) */
+REGDEF(R0, 0, 0x0001, "zero", 0)
+REGDEF(RA, 1, 0x0002, "ra" , 0)
+REGDEF(SP, 2, 0x0004, "sp" , 0)
+REGDEF(GP, 3, 0x0008, "gp" , 0)
+REGDEF(TP, 4, 0x0010, "tp" , 0)
+REGDEF(T0, 5, 0x0020, "t0" , 0)
+REGDEF(T1, 6, 0x0040, "t1" , 0)
+REGDEF(T2, 7, 0x0080, "t2", 0)
+REGDEF(FP, 8, 0x0100, "fp", 0)
+REGDEF(S1, 9, 0x0200, "s1", 0)
+REGDEF(A0, 10, 0x0400, "a0", 0)
+REGDEF(A1, 11, 0x0800, "a1", 0)
+REGDEF(A2, 12, 0x1000, "a2", 0)
+REGDEF(A3, 13, 0x2000, "a3", 0)
+REGDEF(A4, 14, 0x4000, "a4", 0)
+REGDEF(A5, 15, 0x8000, "a5", 0)
+REGDEF(A6, 16, 0x10000, "a6", 0)
+REGDEF(A7, 17, 0x20000, "a7", 0)
+REGDEF(S2, 18, 0x40000, "s2", 0)
+REGDEF(S3, 19, 0x80000, "s3", 0)
+REGDEF(S4, 20, 0x100000, "s4", 0)
+REGDEF(S5, 21, 0x200000, "s5", 0)
+REGDEF(S6, 22, 0x400000, "s6", 0)
+REGDEF(S7, 23, 0x800000, "s7", 0)
+REGDEF(S8, 24, 0x1000000, "s8", 0)
+REGDEF(S9, 25, 0x2000000, "s9", 0)
+REGDEF(S10, 26, 0x4000000, "s10", 0)
+REGDEF(S11, 27, 0x8000000, "s11", 0)
+REGDEF(T3, 28, 0x10000000, "t3", 0)
+REGDEF(T4, 29, 0x20000000, "t4", 0)
+REGDEF(T5, 30, 0x40000000, "t5", 0)
+REGDEF(T6, 31, 0x80000000, "t6", 0)
REGALIAS(R8, FP)
REGALIAS(ZERO, R0)
@@ -57,43 +57,43 @@ REGALIAS(ZERO, R0)
/*
REGDEF(name, rnum, mask, sname) */
-REGDEF(F0, 0+FBASE, FMASK(0), "f0")
-REGDEF(F1, 1+FBASE, FMASK(1), "f1")
-REGDEF(F2, 2+FBASE, FMASK(2), "f2")
-REGDEF(F3, 3+FBASE, FMASK(3), "f3")
-REGDEF(F4, 4+FBASE, FMASK(4), "f4")
-REGDEF(F5, 5+FBASE, FMASK(5), "f5")
-REGDEF(F6, 6+FBASE, FMASK(6), "f6")
-REGDEF(F7, 7+FBASE, FMASK(7), "f7")
-REGDEF(F8, 8+FBASE, FMASK(8), "f8")
-REGDEF(F9, 9+FBASE, FMASK(9), "f9")
-REGDEF(F10, 10+FBASE, FMASK(10), "f10")
-REGDEF(F11, 11+FBASE, FMASK(11), "f11")
-REGDEF(F12, 12+FBASE, FMASK(12), "f12")
-REGDEF(F13, 13+FBASE, FMASK(13), "f13")
-REGDEF(F14, 14+FBASE, FMASK(14), "f14")
-REGDEF(F15, 15+FBASE, FMASK(15), "f15")
-REGDEF(F16, 16+FBASE, FMASK(16), "f16")
-REGDEF(F17, 17+FBASE, FMASK(17), "f17")
-REGDEF(F18, 18+FBASE, FMASK(18), "f18")
-REGDEF(F19, 19+FBASE, FMASK(19), "f19")
-REGDEF(F20, 20+FBASE, FMASK(20), "f20")
-REGDEF(F21, 21+FBASE, FMASK(21), "f21")
-REGDEF(F22, 22+FBASE, FMASK(22), "f22")
-REGDEF(F23, 23+FBASE, FMASK(23), "f23")
-REGDEF(F24, 24+FBASE, FMASK(24), "f24")
-REGDEF(F25, 25+FBASE, FMASK(25), "f25")
-REGDEF(F26, 26+FBASE, FMASK(26), "f26")
-REGDEF(F27, 27+FBASE, FMASK(27), "f27")
-REGDEF(F28, 28+FBASE, FMASK(28), "f28")
-REGDEF(F29, 29+FBASE, FMASK(29), "f29")
-REGDEF(F30, 30+FBASE, FMASK(30), "f30")
-REGDEF(F31, 31+FBASE, FMASK(31), "f31")
+REGDEF(F0, 0+FBASE, FMASK(0), "f0", 1)
+REGDEF(F1, 1+FBASE, FMASK(1), "f1", 1)
+REGDEF(F2, 2+FBASE, FMASK(2), "f2", 1)
+REGDEF(F3, 3+FBASE, FMASK(3), "f3", 1)
+REGDEF(F4, 4+FBASE, FMASK(4), "f4", 1)
+REGDEF(F5, 5+FBASE, FMASK(5), "f5", 1)
+REGDEF(F6, 6+FBASE, FMASK(6), "f6", 1)
+REGDEF(F7, 7+FBASE, FMASK(7), "f7", 1)
+REGDEF(F8, 8+FBASE, FMASK(8), "f8", 1)
+REGDEF(F9, 9+FBASE, FMASK(9), "f9", 1)
+REGDEF(F10, 10+FBASE, FMASK(10), "f10", 1)
+REGDEF(F11, 11+FBASE, FMASK(11), "f11", 1)
+REGDEF(F12, 12+FBASE, FMASK(12), "f12", 1)
+REGDEF(F13, 13+FBASE, FMASK(13), "f13", 1)
+REGDEF(F14, 14+FBASE, FMASK(14), "f14", 1)
+REGDEF(F15, 15+FBASE, FMASK(15), "f15", 1)
+REGDEF(F16, 16+FBASE, FMASK(16), "f16", 1)
+REGDEF(F17, 17+FBASE, FMASK(17), "f17", 1)
+REGDEF(F18, 18+FBASE, FMASK(18), "f18", 1)
+REGDEF(F19, 19+FBASE, FMASK(19), "f19", 1)
+REGDEF(F20, 20+FBASE, FMASK(20), "f20", 1)
+REGDEF(F21, 21+FBASE, FMASK(21), "f21", 1)
+REGDEF(F22, 22+FBASE, FMASK(22), "f22", 1)
+REGDEF(F23, 23+FBASE, FMASK(23), "f23", 1)
+REGDEF(F24, 24+FBASE, FMASK(24), "f24", 1)
+REGDEF(F25, 25+FBASE, FMASK(25), "f25", 1)
+REGDEF(F26, 26+FBASE, FMASK(26), "f26", 1)
+REGDEF(F27, 27+FBASE, FMASK(27), "f27", 1)
+REGDEF(F28, 28+FBASE, FMASK(28), "f28", 1)
+REGDEF(F29, 29+FBASE, FMASK(29), "f29", 1)
+REGDEF(F30, 30+FBASE, FMASK(30), "f30", 1)
+REGDEF(F31, 31+FBASE, FMASK(31), "f31", 1)
// The registers with values 64 (NBASE) and above are not real register numbers
#define NBASE 64
-REGDEF(STK, 0+NBASE, 0x0000, "STK")
+REGDEF(STK, 0+NBASE, 0x0000, "STK", 2)
/*****************************************************************************/
#undef RMASK
diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp
index 12975850a404ba..09b892377ae61f 100644
--- a/src/coreclr/jit/regset.cpp
+++ b/src/coreclr/jit/regset.cpp
@@ -25,12 +25,12 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#if defined(TARGET_ARM64)
const regMaskSmall regMasks[] = {
-#define REGDEF(name, rnum, mask, xname, wname) mask,
+#define REGDEF(name, rnum, mask, xname, wname, regTypeTag) mask,
#include "register.h"
};
#else // !TARGET_ARM64
const regMaskSmall regMasks[] = {
-#define REGDEF(name, rnum, mask, sname) mask,
+#define REGDEF(name, rnum, mask, sname, regTypeTag) mask,
#include "register.h"
};
#endif
@@ -45,7 +45,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*/
//------------------------------------------------------------------------
-// verifyRegUsed: verify that the register is marked as used.
+// verifyGprRegUsed: verify that the GPR register is marked as used.
//
// Arguments:
// reg - The register to verify.
@@ -61,12 +61,33 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// This method is intended to be called during code generation, and
// should simply validate that the register (or registers) have
// already been added to the modified set.
+//
+void RegSet::verifyGprRegUsed(regNumber reg)
+{
+ assert(emitter::isGeneralRegister(reg));
+ rsSetGprRegsModified(genRegMask(reg));
+}
+//------------------------------------------------------------------------
+// verifyRegUsed: verify that the mask register is marked as used.
+//
+// Arguments:
+// reg - The register to verify.
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The caller must have ensured that the register is already marked
+// as used.
+//
+// Notes:
+// This method is intended to be called during code generation, and
+// should simply validate that the register (or registers) have
+// already been added to the modified set.
void RegSet::verifyRegUsed(regNumber reg)
{
- // TODO-Cleanup: we need to identify the places where the register
- // is not marked as used when this is called.
- rsSetRegsModified(genRegMask(reg));
+ rsSetRegModified(reg);
}
//------------------------------------------------------------------------
@@ -87,21 +108,27 @@ void RegSet::verifyRegUsed(regNumber reg)
// should simply validate that the register (or registers) have
// already been added to the modified set.
-void RegSet::verifyRegistersUsed(regMaskTP regMask)
+void RegSet::verifyRegistersUsed(CONSTREF_AllRegsMask regs)
{
if (m_rsCompiler->opts.OptimizationDisabled())
{
return;
}
- if (regMask == RBM_NONE)
+ if (regs.IsEmpty())
{
return;
}
- // TODO-Cleanup: we need to identify the places where the registers
- // are not marked as used when this is called.
- rsSetRegsModified(regMask);
+ // TODO-Cleanup:
+ // We need to identify the places where the register
+ // is not marked as used when this is called.
+ //
+ // See https://github.com/dotnet/runtime/issues/10411 and
+ // https://github.com/dotnet/coreclr/pull/18230 on why we call
+ // rsSetGprRegsModified() instead of assert(rsRegsModified())
+
+ rsSetRegsModified(regs);
}
void RegSet::rsClearRegsModified()
@@ -116,23 +143,24 @@ void RegSet::rsClearRegsModified()
rsModifiedRegsMaskInitialized = true;
#endif // DEBUG
- rsModifiedRegsMask = RBM_NONE;
+ rsModifiedRegsMask.Clear();
#ifdef SWIFT_SUPPORT
// If this method has a SwiftError* parameter, we will return SwiftError::Value in REG_SWIFT_ERROR,
// so don't treat it as callee-save.
if (m_rsCompiler->lvaSwiftErrorArg != BAD_VAR_NUM)
{
- rsAllCalleeSavedMask &= ~RBM_SWIFT_ERROR;
rsIntCalleeSavedMask &= ~RBM_SWIFT_ERROR;
}
#endif // SWIFT_SUPPORT
}
-void RegSet::rsSetRegsModified(regMaskTP mask DEBUGARG(bool suppressDump))
+#ifdef DEBUG
+void RegSet::printModifiedRegsMask(regMaskOnlyOne currentMask,
+ regMaskOnlyOne modifiedMask DEBUGARG(bool suppressDump)
+ DEBUGARG(regMaskOnlyOne calleeSaveMask)) const
{
- assert(mask != RBM_NONE);
- assert(rsModifiedRegsMaskInitialized);
+ regMaskOnlyOne newMask = (currentMask | modifiedMask);
// We can't update the modified registers set after final frame layout (that is, during code
// generation and after). Ignore prolog and epilog generation: they call register tracking to
@@ -141,90 +169,181 @@ void RegSet::rsSetRegsModified(regMaskTP mask DEBUGARG(bool suppressDump))
// Frame layout is only affected by callee-saved registers, so only ensure that callee-saved
// registers aren't modified after final frame layout.
assert((m_rsCompiler->lvaDoneFrameLayout < Compiler::FINAL_FRAME_LAYOUT) || m_rsCompiler->compGeneratingProlog ||
- m_rsCompiler->compGeneratingEpilog ||
- (((rsModifiedRegsMask | mask) & RBM_CALLEE_SAVED) == (rsModifiedRegsMask & RBM_CALLEE_SAVED)));
+ m_rsCompiler->compGeneratingEpilog || ((newMask & calleeSaveMask) == (currentMask & calleeSaveMask)));
-#ifdef DEBUG
if (m_rsCompiler->verbose && !suppressDump)
{
- if (rsModifiedRegsMask != (rsModifiedRegsMask | mask))
+ if (currentMask != newMask)
{
printf("Marking regs modified: ");
- dspRegMask(mask);
+ dspRegMask(modifiedMask);
printf(" (");
- dspRegMask(rsModifiedRegsMask);
+ dspRegMask(currentMask);
printf(" => ");
- dspRegMask(rsModifiedRegsMask | mask);
+ dspRegMask(newMask);
printf(")\n");
}
}
+}
+#endif
+
+AllRegsMask RegSet::rsGetModifiedCalleeSavedRegsMask() const
+{
+ assert(rsModifiedRegsMaskInitialized);
+ AllRegsMask allCalleeSavedMask = m_rsCompiler->AllRegsMask_CALLEE_SAVED;
+#ifdef SWIFT_SUPPORT
+ // If this method has a SwiftError* parameter, we will return SwiftError::Value in REG_SWIFT_ERROR,
+ // so don't treat it as callee-save.
+ if (m_rsCompiler->lvaSwiftErrorArg != BAD_VAR_NUM)
+ {
+ allCalleeSavedMask.RemoveRegNum(REG_SWIFT_ERROR, TYP_INT);
+ }
+#endif // SWIFT_SUPPORT
+ return (rsModifiedRegsMask & allCalleeSavedMask);
+}
+
+void RegSet::rsSetGprRegsModified(regMaskGpr mask DEBUGARG(bool suppressDump))
+{
+ assert(m_rsCompiler->IsGprRegMask(mask));
+ assert(rsModifiedRegsMaskInitialized);
+#ifdef DEBUG
+ printModifiedRegsMask(rsModifiedRegsMask.gprRegs(), mask DEBUG_ARG(suppressDump) DEBUG_ARG(RBM_INT_CALLEE_SAVED));
+#endif
+
+ rsModifiedRegsMask.AddGprRegMask(mask);
+}
+
+void RegSet::rsSetFloatRegsModified(regMaskFloat mask DEBUGARG(bool suppressDump))
+{
+ assert(m_rsCompiler->IsFloatRegMask(mask));
+ assert(rsModifiedRegsMaskInitialized);
+#ifdef DEBUG
+ printModifiedRegsMask(rsModifiedRegsMask.floatRegs(m_rsCompiler),
+ mask DEBUG_ARG(suppressDump) DEBUG_ARG(RBM_FLT_CALLEE_SAVED));
+#endif
+
+ rsModifiedRegsMask.AddFloatRegMask(mask);
+}
+
+void RegSet::rsSetRegModified(regNumber reg DEBUGARG(bool suppressDump))
+{
+ assert(rsModifiedRegsMaskInitialized);
+
+#ifdef DEBUG
+ if (genIsValidIntReg(reg))
+ {
+ printModifiedRegsMask(rsModifiedRegsMask.gprRegs(),
+ genRegMask(reg) DEBUG_ARG(suppressDump) DEBUG_ARG(RBM_INT_CALLEE_SAVED));
+ }
+ else if (genIsValidFloatReg(reg))
+ {
+ printModifiedRegsMask(rsModifiedRegsMask.floatRegs(m_rsCompiler),
+ genRegMask(reg) DEBUG_ARG(suppressDump) DEBUG_ARG(RBM_FLT_CALLEE_SAVED));
+ }
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ else
+ {
+ assert(genIsValidMaskReg(reg));
+ printModifiedRegsMask(rsModifiedRegsMask.predicateRegs(m_rsCompiler),
+ genRegMask(reg) DEBUG_ARG(suppressDump) DEBUG_ARG(RBM_MSK_CALLEE_SAVED));
+ }
+#endif // FEATURE_MASKED_HW_INTRINSICS
#endif // DEBUG
- rsModifiedRegsMask |= mask;
+ rsModifiedRegsMask.AddRegNumInMask(reg);
}
-void RegSet::rsRemoveRegsModified(regMaskTP mask)
+void RegSet::rsSetRegsModified(CONSTREF_AllRegsMask modifiedMask DEBUGARG(bool suppressDump))
+{
+ // TODO: Commented this, so that caller don't have to check if modifiedMask is not RBM_NONE
+ // It doesn't harm if this was RBM_NONE, as it will not modify the trackingMask
+ // assert(modifiedMask != RBM_NONE);
+ assert(rsModifiedRegsMaskInitialized);
+
+#ifdef DEBUG
+ printModifiedRegsMask(rsModifiedRegsMask.gprRegs(),
+ modifiedMask.gprRegs() DEBUG_ARG(suppressDump) DEBUG_ARG(RBM_INT_CALLEE_SAVED));
+ printModifiedRegsMask(rsModifiedRegsMask.floatRegs(m_rsCompiler),
+ modifiedMask.floatRegs(m_rsCompiler) DEBUG_ARG(suppressDump) DEBUG_ARG(RBM_FLT_CALLEE_SAVED));
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ printModifiedRegsMask(rsModifiedRegsMask.predicateRegs(m_rsCompiler), modifiedMask.predicateRegs(m_rsCompiler)
+ DEBUG_ARG(suppressDump)
+ DEBUG_ARG(RBM_MSK_CALLEE_SAVED));
+#endif // FEATURE_MASKED_HW_INTRINSICS
+#endif // DEBUG
+
+ rsModifiedRegsMask |= modifiedMask;
+}
+
+void RegSet::rsRemoveRegsModified(regMaskGpr mask)
{
assert(mask != RBM_NONE);
assert(rsModifiedRegsMaskInitialized);
+ assert(m_rsCompiler->IsGprRegMask(mask));
+#ifdef DEBUG
+ regMaskGpr rsModifiedGprRegsMask = rsGetModifiedGprRegsMask();
// See comment in rsSetRegsModified().
assert((m_rsCompiler->lvaDoneFrameLayout < Compiler::FINAL_FRAME_LAYOUT) || m_rsCompiler->compGeneratingProlog ||
m_rsCompiler->compGeneratingEpilog ||
- (((rsModifiedRegsMask & ~mask) & RBM_CALLEE_SAVED) == (rsModifiedRegsMask & RBM_CALLEE_SAVED)));
+ (((rsModifiedGprRegsMask & ~mask) & RBM_CALLEE_SAVED) == (rsModifiedGprRegsMask & RBM_CALLEE_SAVED)));
-#ifdef DEBUG
if (m_rsCompiler->verbose)
{
printf("Removing modified regs: ");
- dspRegMask(mask);
- if (rsModifiedRegsMask == (rsModifiedRegsMask & ~mask))
+ dspRegMask(mask, RBM_NONE);
+ if (rsModifiedGprRegsMask == (rsModifiedGprRegsMask & ~mask))
{
printf(" (unchanged)");
}
else
{
printf(" (");
- dspRegMask(rsModifiedRegsMask);
+ dspRegMask(rsModifiedGprRegsMask, RBM_NONE);
printf(" => ");
- dspRegMask(rsModifiedRegsMask & ~mask);
+ dspRegMask(rsModifiedGprRegsMask & ~mask, RBM_NONE);
printf(")");
}
printf("\n");
}
#endif // DEBUG
- rsModifiedRegsMask &= ~mask;
+ rsModifiedRegsMask.RemoveRegTypeFromMask(mask, TYP_INT);
}
-void RegSet::SetMaskVars(regMaskTP newMaskVars)
+void RegSet::ClearMaskVars() // Like SetMaskVars(AllRegsMask), but without any debug output.
+{
+ _rsAllMaskVars.Clear();
+}
+
+void RegSet::SetMaskVars(CONSTREF_AllRegsMask newMaskVars)
{
#ifdef DEBUG
if (m_rsCompiler->verbose)
{
printf("\t\t\t\t\t\t\tLive regs: ");
- if (_rsMaskVars == newMaskVars)
+ if (_rsAllMaskVars == newMaskVars)
{
printf("(unchanged) ");
}
else
{
- printRegMask(_rsMaskVars);
- m_rsCompiler->GetEmitter()->emitDispRegSet(_rsMaskVars);
+ printRegMask(_rsAllMaskVars);
+ m_rsCompiler->GetEmitter()->emitDispRegSet(_rsAllMaskVars);
// deadSet = old - new
- regMaskTP deadSet = _rsMaskVars & ~newMaskVars;
+ AllRegsMask deadSet = _rsAllMaskVars & ~newMaskVars;
// bornSet = new - old
- regMaskTP bornSet = newMaskVars & ~_rsMaskVars;
+ AllRegsMask bornSet = newMaskVars & ~_rsAllMaskVars;
- if (deadSet != RBM_NONE)
+ if (!deadSet.IsEmpty())
{
printf(" -");
m_rsCompiler->GetEmitter()->emitDispRegSet(deadSet);
}
- if (bornSet != RBM_NONE)
+ if (!bornSet.IsEmpty())
{
printf(" +");
m_rsCompiler->GetEmitter()->emitDispRegSet(bornSet);
@@ -238,7 +357,7 @@ void RegSet::SetMaskVars(regMaskTP newMaskVars)
}
#endif // DEBUG
- _rsMaskVars = newMaskVars;
+ _rsAllMaskVars = newMaskVars;
}
/*****************************************************************************/
@@ -259,19 +378,23 @@ RegSet::RegSet(Compiler* compiler, GCInfo& gcInfo)
rsMaskResvd = RBM_NONE;
-#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARMARCH)
+ rsGprMaskCalleeSaved = RBM_NONE;
+ rsFloatMaskCalleeSaved = RBM_NONE;
+#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
rsMaskCalleeSaved = RBM_NONE;
-#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64
+
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ rsPredicateMaskCalleeSaved = RBM_NONE;
+#endif // FEATURE_MASKED_HW_INTRINSICS
#ifdef TARGET_ARM
rsMaskPreSpillRegArg = RBM_NONE;
rsMaskPreSpillAlign = RBM_NONE;
#endif
-#ifdef SWIFT_SUPPORT
- rsAllCalleeSavedMask = RBM_CALLEE_SAVED;
rsIntCalleeSavedMask = RBM_INT_CALLEE_SAVED;
-#endif // SWIFT_SUPPORT
#ifdef DEBUG
rsModifiedRegsMaskInitialized = false;
@@ -347,18 +470,12 @@ void RegSet::rsSpillTree(regNumber reg, GenTree* tree, unsigned regIdx /* =0 */)
treeType = tree->TypeGet();
}
- var_types tempType = RegSet::tmpNormalizeType(treeType);
- regMaskTP mask;
+ var_types tempType = RegSet::tmpNormalizeType(treeType);
bool floatSpill = false;
if (isFloatRegType(treeType))
{
floatSpill = true;
- mask = genRegMaskFloat(reg ARM_ARG(treeType));
- }
- else
- {
- mask = genRegMask(reg);
}
rsNeededSpillReg = true;
@@ -952,15 +1069,15 @@ regNumber genRegArgNext(regNumber argReg)
* register numbers and corresponding bitmaps.
*/
-const regNumber raRegCalleeSaveOrder[] = {REG_CALLEE_SAVED_ORDER};
-const regMaskTP raRbmCalleeSaveOrder[] = {RBM_CALLEE_SAVED_ORDER};
+const regNumber raRegCalleeSaveOrder[] = {REG_CALLEE_SAVED_ORDER};
+const regMaskGpr raRbmCalleeSaveOrder[] = {RBM_CALLEE_SAVED_ORDER};
regMaskSmall genRegMaskFromCalleeSavedMask(unsigned short calleeSaveMask)
{
regMaskSmall res = 0;
for (int i = 0; i < CNT_CALLEE_SAVED; i++)
{
- if ((calleeSaveMask & ((regMaskTP)1 << i)) != 0)
+ if ((calleeSaveMask & ((regMaskOnlyOne)1 << i)) != 0)
{
res |= raRbmCalleeSaveOrder[i];
}
diff --git a/src/coreclr/jit/regset.h b/src/coreclr/jit/regset.h
index dae93baebad306..2709e3898ecaaa 100644
--- a/src/coreclr/jit/regset.h
+++ b/src/coreclr/jit/regset.h
@@ -43,7 +43,7 @@ class RegSet
RegSet(Compiler* compiler, GCInfo& gcInfo);
#ifdef TARGET_ARM
- regMaskTP rsMaskPreSpillRegs(bool includeAlignment) const
+ regMaskGpr rsMaskPreSpillRegs(bool includeAlignment) const
{
return includeAlignment ? (rsMaskPreSpillRegArg | rsMaskPreSpillAlign) : rsMaskPreSpillRegArg;
}
@@ -67,111 +67,141 @@ class RegSet
//
private:
- bool rsNeededSpillReg; // true if this method needed to spill any registers
- regMaskTP rsModifiedRegsMask; // mask of the registers modified by the current function.
+ bool rsNeededSpillReg; // true if this method needed to spill any registers
+ AllRegsMask rsModifiedRegsMask; // mask of the registers modified by the current function.
#ifdef DEBUG
bool rsModifiedRegsMaskInitialized; // Has rsModifiedRegsMask been initialized? Guards against illegal use.
-#endif // DEBUG
+ void printModifiedRegsMask(regMaskOnlyOne currentMask,
+ regMaskOnlyOne modifiedMask DEBUGARG(bool suppressDump = false)
+ DEBUGARG(regMaskOnlyOne calleeSaveMask = RBM_NONE)) const;
+#endif // DEBUG
-#ifdef SWIFT_SUPPORT
- regMaskTP rsAllCalleeSavedMask;
- regMaskTP rsIntCalleeSavedMask;
-#else // !SWIFT_SUPPORT
- static constexpr regMaskTP rsAllCalleeSavedMask = RBM_CALLEE_SAVED;
- static constexpr regMaskTP rsIntCalleeSavedMask = RBM_INT_CALLEE_SAVED;
-#endif // !SWIFT_SUPPORT
+ regMaskGpr rsIntCalleeSavedMask;
public:
- regMaskTP rsGetModifiedRegsMask() const
+ void rsSetRegsModified(CONSTREF_AllRegsMask modifiedMask DEBUGARG(bool suppressDump = false));
+ void rsSetRegModified(regNumber reg DEBUGARG(bool suppressDump = false));
+
+ CONSTREF_AllRegsMask rsGetModifiedRegsMask() const
{
assert(rsModifiedRegsMaskInitialized);
return rsModifiedRegsMask;
}
- regMaskTP rsGetModifiedCalleeSavedRegsMask() const
+ AllRegsMask rsGetModifiedCalleeSavedRegsMask() const;
+
+ regMaskGpr rsGetModifiedIntCalleeSavedRegsMask() const
{
assert(rsModifiedRegsMaskInitialized);
- return (rsModifiedRegsMask & rsAllCalleeSavedMask);
+ return (rsGetModifiedGprRegsMask() & rsIntCalleeSavedMask);
}
- regMaskTP rsGetModifiedIntCalleeSavedRegsMask() const
+ regMaskGpr rsGetModifiedGprRegsMask() const
{
assert(rsModifiedRegsMaskInitialized);
- return (rsModifiedRegsMask & rsIntCalleeSavedMask);
+ return rsModifiedRegsMask.gprRegs();
}
-#ifdef TARGET_AMD64
- regMaskTP rsGetModifiedOsrIntCalleeSavedRegsMask() const
+ regMaskFloat rsGetModifiedFloatRegsMask() const
{
assert(rsModifiedRegsMaskInitialized);
- return (rsModifiedRegsMask & (rsIntCalleeSavedMask | RBM_EBP));
+ return rsModifiedRegsMask.floatRegs(m_rsCompiler);
}
-#endif // TARGET_AMD64
- regMaskTP rsGetModifiedFltCalleeSavedRegsMask() const
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ regMaskPredicate rsGetModifiedPredicateRegsMask() const
{
assert(rsModifiedRegsMaskInitialized);
- return (rsModifiedRegsMask & RBM_FLT_CALLEE_SAVED);
+ return rsModifiedRegsMask.predicateRegs(m_rsCompiler);
+ }
+#endif // FEATURE_MASKED_HW_INTRINSICS
+
+ regMaskGpr rsGetModifiedRegsMask(var_types type) const
+ {
+ return rsModifiedRegsMask.GetRegMaskForType(type);
}
void rsClearRegsModified();
+ void rsSetGprRegsModified(regMaskGpr mask DEBUGARG(bool suppressDump = false));
+ void rsSetFloatRegsModified(regMaskFloat mask DEBUGARG(bool suppressDump = false));
- void rsSetRegsModified(regMaskTP mask DEBUGARG(bool suppressDump = false));
+ void rsRemoveRegsModified(regMaskGpr mask);
- void rsRemoveRegsModified(regMaskTP mask);
+ bool rsRegsModified(regMaskGpr mask) const
+ {
+ assert(rsModifiedRegsMaskInitialized);
+ return (rsModifiedRegsMask.gprRegs() & mask) != 0;
+ }
- bool rsRegsModified(regMaskTP mask) const
+ bool rsRegsModified(CONSTREF_AllRegsMask mask) const
{
assert(rsModifiedRegsMaskInitialized);
- return (rsModifiedRegsMask & mask) != 0;
+ return !((rsModifiedRegsMask & mask).IsEmpty());
}
void verifyRegUsed(regNumber reg);
+ void verifyGprRegUsed(regNumber reg);
- void verifyRegistersUsed(regMaskTP regMask);
+ void verifyRegistersUsed(CONSTREF_AllRegsMask mask);
public:
- regMaskTP GetMaskVars() const // 'get' property function for rsMaskVars property
+ regMaskOnlyOne GetMaskVars(var_types type) const // 'get' property function for rsMaskVars property
{
- return _rsMaskVars;
+ return _rsAllMaskVars.GetRegMaskForType(type);
}
- void SetMaskVars(regMaskTP newMaskVars); // 'put' property function for rsMaskVars property
-
- void AddMaskVars(regMaskTP addMaskVars) // union 'addMaskVars' with the rsMaskVars set
+ regMaskGpr GetGprMaskVars() const // 'get' property function for rsMaskVars property
{
- SetMaskVars(_rsMaskVars | addMaskVars);
+ return _rsAllMaskVars.gprRegs();
}
- void RemoveMaskVars(regMaskTP removeMaskVars) // remove 'removeMaskVars' from the rsMaskVars set (like bitset DiffD)
+ void SetMaskVars(CONSTREF_AllRegsMask newMaskVars); // 'put' property function for rsMaskVars property
+
+ void AddMaskVars(var_types type, regMaskOnlyOne addMaskVars) // union 'addMaskVars' with the rsMaskVars set
{
- SetMaskVars(_rsMaskVars & ~removeMaskVars);
+ AllRegsMask newMask = _rsAllMaskVars;
+ newMask.AddRegMaskForType(addMaskVars, type);
+ SetMaskVars(newMask);
}
- void ClearMaskVars() // Like SetMaskVars(RBM_NONE), but without any debug output.
+ // remove 'removeMaskVars' from the rsMaskVars set (like bitset DiffD)
+ void RemoveMaskVars(var_types type, regMaskOnlyOne removeMaskVars)
{
- _rsMaskVars = RBM_NONE;
+ // TODO: Skip assigning to newMask, just update _rsAllMaskVars directly. The only thing remaining
+ // would be to print the change if (newMask != _rsAllMaskVars).
+ AllRegsMask newMask = _rsAllMaskVars;
+ newMask.RemoveRegTypeFromMask(removeMaskVars, type);
+ SetMaskVars(newMask);
}
+ void ClearMaskVars(); // Like SetMaskVars(RBM_NONE), but without any debug output.
+
private:
- regMaskTP _rsMaskVars; // backing store for rsMaskVars property
+ AllRegsMask _rsAllMaskVars; // backing store for rsMaskVars property
+
+#if defined(TARGET_ARMARCH)
+ regMaskGpr rsGprMaskCalleeSaved;
+ regMaskFloat rsFloatMaskCalleeSaved;
+#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
+ regMaskMixed rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_LOONGARCH64
-#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
- regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog
-#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ regMaskPredicate rsPredicateMaskCalleeSaved;
+#endif
-public: // TODO-Cleanup: Should be private, but Compiler uses it
- regMaskTP rsMaskResvd; // mask of the registers that are reserved for special purposes (typically empty)
+public: // TODO-Cleanup: Should be private, but Compiler uses it
+ regMaskGpr rsMaskResvd; // mask of the registers that are reserved for special purposes (typically empty)
public: // The PreSpill masks are used in LclVars.cpp
#ifdef TARGET_ARM
- regMaskTP rsMaskPreSpillAlign; // Mask of alignment padding added to prespill to keep double aligned args
- // at aligned stack addresses.
- regMaskTP rsMaskPreSpillRegArg; // mask of incoming registers that are spilled at the start of the prolog
- // This includes registers used to pass a struct (or part of a struct)
- // and all enregistered user arguments in a varargs call
-#endif // TARGET_ARM
+ regMaskGpr rsMaskPreSpillAlign; // Mask of alignment padding added to prespill to keep double aligned args
+ // at aligned stack addresses.
+ regMaskGpr rsMaskPreSpillRegArg; // mask of incoming registers that are spilled at the start of the prolog
+ // This includes registers used to pass a struct (or part of a struct)
+ // and all enregistered user arguments in a varargs call
+#endif // TARGET_ARM
private:
//-------------------------------------------------------------------------
diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h
index 06777fa9d5f709..a8773eb818c80e 100644
--- a/src/coreclr/jit/target.h
+++ b/src/coreclr/jit/target.h
@@ -109,8 +109,8 @@ inline bool compUnixX86Abi()
#if defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
enum _regNumber_enum : unsigned
{
-#define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
-#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
+#define REGDEF(name, rnum, mask, sname, regTypeTag) REG_##name = rnum,
+#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
#include "register.h"
REG_COUNT,
@@ -121,8 +121,8 @@ enum _regNumber_enum : unsigned
enum _regMask_enum : unsigned __int64
{
RBM_NONE = 0,
-#define REGDEF(name, rnum, mask, sname) RBM_##name = mask,
-#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
+#define REGDEF(name, rnum, mask, sname, regTypeTag) RBM_##name = mask,
+#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
#include "register.h"
};
@@ -130,8 +130,8 @@ enum _regMask_enum : unsigned __int64
enum _regNumber_enum : unsigned
{
-#define REGDEF(name, rnum, mask, xname, wname) REG_##name = rnum,
-#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
+#define REGDEF(name, rnum, mask, xname, wname, regTypeTag) REG_##name = rnum,
+#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
#include "register.h"
REG_COUNT,
@@ -142,8 +142,8 @@ enum _regNumber_enum : unsigned
enum _regMask_enum : unsigned __int64
{
RBM_NONE = 0,
-#define REGDEF(name, rnum, mask, xname, wname) RBM_##name = mask,
-#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
+#define REGDEF(name, rnum, mask, xname, wname, regTypeTag) RBM_##name = mask,
+#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
#include "register.h"
};
@@ -151,8 +151,8 @@ enum _regMask_enum : unsigned __int64
enum _regNumber_enum : unsigned
{
-#define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
-#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
+#define REGDEF(name, rnum, mask, sname, regTypeTag) REG_##name = rnum,
+#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
#include "register.h"
REG_COUNT,
@@ -164,8 +164,8 @@ enum _regMask_enum : uint64_t
{
RBM_NONE = 0,
-#define REGDEF(name, rnum, mask, sname) RBM_##name = mask,
-#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
+#define REGDEF(name, rnum, mask, sname, regTypeTag) RBM_##name = mask,
+#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
#include "register.h"
};
@@ -173,8 +173,8 @@ enum _regMask_enum : uint64_t
enum _regNumber_enum : unsigned
{
-#define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
-#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
+#define REGDEF(name, rnum, mask, sname, regTypeTag) REG_##name = rnum,
+#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
#include "register.h"
REG_COUNT,
@@ -186,8 +186,8 @@ enum _regMask_enum : unsigned
{
RBM_NONE = 0,
-#define REGDEF(name, rnum, mask, sname) RBM_##name = mask,
-#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
+#define REGDEF(name, rnum, mask, sname, regTypeTag) RBM_##name = mask,
+#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
#include "register.h"
};
@@ -197,6 +197,13 @@ enum _regMask_enum : unsigned
#define AVAILABLE_REG_COUNT get_AVAILABLE_REG_COUNT()
+#ifdef TARGET_ARM64
+#define HAS_MORE_THAN_64_REGISTERS 1
+#define MORE_THAN_64_REG_ARG(x) , x
+#else
+#define MORE_THAN_64_REG_ARG(x)
+#endif
+
/*****************************************************************************/
// TODO-Cleanup: The types defined below are mildly confusing: why are there both?
@@ -208,21 +215,186 @@ enum _regMask_enum : unsigned
// In any case, we believe that is OK to freely cast between these types; no information will
// be lost.
+typedef unsigned __int32 RegBitSet32;
+
#if defined(TARGET_AMD64) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
typedef unsigned __int64 regMaskTP;
+typedef unsigned __int64 regMaskGpr;
+typedef unsigned __int64 regMaskFloat;
+typedef unsigned __int64 regMaskPredicate;
+typedef unsigned __int64 RegBitSet64;
+
+// Represents that the mask in this type is from one of the register type - gpr/float/predicate
+// but not more than 1.
+typedef unsigned __int64 regMaskOnlyOne;
+typedef unsigned __int64 singleRegMask;
+
#else
+// x86 and arm
typedef unsigned regMaskTP;
+typedef unsigned RegBitSet64;
+
+#define regMaskGpr regMaskTP
+#define regMaskFloat regMaskTP
+#define regMaskPredicate regMaskTP
+#define regMaskOnlyOne regMaskTP
+#define regMaskMixed regMaskTP
+#define singleRegMask regMaskTP
+#endif // defined(TARGET_AMD64) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
+
+unsigned genCountBits(uint64_t bits);
+
+typedef _regNumber_enum regNumber;
+typedef unsigned char regNumberSmall;
+
+typedef struct _regMaskAll
+{
+private:
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ union
+ {
+ RegBitSet32 _registers[3];
+ struct
+ {
+ union
+ {
+ // Represents combined registers bitset including gpr/float
+ RegBitSet64 _combinedRegisters;
+ struct
+ {
+ RegBitSet32 _gprRegs;
+ RegBitSet32 _floatRegs;
+ };
+ };
+ RegBitSet32 _predicateRegs;
+ };
+ };
+#else
+ // Represents combined registers bitset including gpr/float and on some platforms
+ // mask or predicate registers
+ RegBitSet64 _combinedRegisters;
+#endif
+ // TODO: Come up with a name of variable such that:
+ // 1. If HAS_MORE_THAN_64_REGISTERS==1, it represents float_gpr combined
+ // 2. If HAS_MORE_THAN_64_REGISTERS==0, it represents all registers possible be - gpr/float/predicate in same place
+ // Once we have that, we can just use and remove some of the #ifdef HAS_MORE_THAN_64_REGISTERS
+
+ // This method shifts the high-32 bits of float to low-32 bits and return.
+ // For gpr and predicate registers, it returns the same value.
+ FORCEINLINE static RegBitSet32 encodeForIndex(int index, RegBitSet64 value)
+ {
+ int shiftAmount = 32 * (index == 1);
+ return (RegBitSet32)(value >> shiftAmount);
+ }
+
+ FORCEINLINE static RegBitSet64 decodeForIndex(int index, RegBitSet32 value)
+ {
+ int shiftAmount = 32 * (index == 1);
+ return ((RegBitSet64)value << shiftAmount);
+ }
+
+public:
+ FORCEINLINE regMaskGpr gprRegs() const;
+ FORCEINLINE regMaskFloat floatRegs(const Compiler* compiler) const;
+
+ // #ifdef DEBUG
+
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ FORCEINLINE regMaskPredicate predicateRegs(const Compiler* compiler) const;
+#endif // FEATURE_MASKED_HW_INTRINSICS
+
+ _regMaskAll(RegBitSet64 gprRegMask, RegBitSet64 floatRegMask)
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ : _combinedRegisters(floatRegMask | gprRegMask)
+ , _predicateRegs(RBM_NONE)
+#else
+ : _combinedRegisters(floatRegMask | gprRegMask)
+#endif
+ {
+ }
+
+ // TODO: See if we can avoid the '|' operation here.
+ _regMaskAll(RegBitSet64 gprRegMask, RegBitSet64 floatRegMask, RegBitSet64 predicateRegs)
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ : _combinedRegisters(floatRegMask | gprRegMask)
+ , _predicateRegs((RegBitSet32)predicateRegs)
+#else
+ : _combinedRegisters(predicateRegs | floatRegMask | gprRegMask)
+#endif
+ {
+ }
+
+ _regMaskAll()
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ : _combinedRegisters(RBM_NONE)
+ , _predicateRegs(RBM_NONE)
+#else
+ : _combinedRegisters(RBM_NONE)
+#endif
+ {
+ }
+
+ _regMaskAll(RegBitSet64 allRegistersMask)
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ : _combinedRegisters(allRegistersMask)
+ , _predicateRegs(RBM_NONE)
+#else
+ : _combinedRegisters(allRegistersMask)
+#endif
+ {
+ }
+
+ FORCEINLINE void Clear();
+ FORCEINLINE bool IsEmpty() const;
+ FORCEINLINE unsigned Count() const;
+ FORCEINLINE void AddGprRegInMask(regNumber reg);
+ FORCEINLINE void AddRegMaskForType(regMaskOnlyOne maskToAdd, var_types type);
+ FORCEINLINE void AddGprRegMask(regMaskGpr maskToAdd);
+ FORCEINLINE void AddFloatRegMask(regMaskFloat maskToAdd);
+
+ FORCEINLINE void AddRegNumInMask(regNumber reg);
+ FORCEINLINE void AddRegNum(regNumber reg, var_types type);
+ FORCEINLINE void RemoveRegNumFromMask(regNumber reg);
+ FORCEINLINE void RemoveRegNum(regNumber reg, var_types type);
+ FORCEINLINE bool IsRegNumInMask(regNumber reg) const;
+ FORCEINLINE bool IsRegNumPresent(regNumber reg, var_types type) const;
+
+#ifdef TARGET_ARM
+ FORCEINLINE void AddRegNumInMask(regNumber reg, var_types type);
+ FORCEINLINE void RemoveRegNumFromMask(regNumber reg, var_types type);
+ FORCEINLINE bool IsRegNumInMask(regNumber reg, var_types type) const;
#endif
-#if REGMASK_BITS == 8
-typedef unsigned char regMaskSmall;
-#define REG_MASK_INT_FMT "%02X"
-#define REG_MASK_ALL_FMT "%02X"
-#elif REGMASK_BITS == 16
-typedef unsigned short regMaskSmall;
-#define REG_MASK_INT_FMT "%04X"
-#define REG_MASK_ALL_FMT "%04X"
-#elif REGMASK_BITS == 32
+ FORCEINLINE void RemoveRegTypeFromMask(regMaskOnlyOne regMaskToRemove, var_types type);
+ FORCEINLINE bool IsGprMaskPresent(regMaskGpr maskToCheck) const;
+ FORCEINLINE bool IsFloatMaskPresent(Compiler* compiler, regMaskFloat maskToCheck) const;
+ FORCEINLINE regMaskOnlyOne GetRegMaskForType(var_types type) const;
+
+ FORCEINLINE bool IsGprOrFloatPresent() const;
+ FORCEINLINE RegBitSet64 GetGprFloatCombinedMask() const;
+#ifndef HAS_MORE_THAN_64_REGISTERS
+ FORCEINLINE RegBitSet64 GetAllRegistersMask() const;
+#endif // !HAS_MORE_THAN_64_REGISTERS
+
+ FORCEINLINE regMaskOnlyOne operator[](int index) const;
+ FORCEINLINE void operator|=(const _regMaskAll& other);
+ FORCEINLINE void operator&=(const _regMaskAll& other);
+ FORCEINLINE void operator|=(const regNumber reg);
+ FORCEINLINE void operator^=(const regNumber reg);
+ FORCEINLINE _regMaskAll operator~() const;
+ FORCEINLINE bool operator==(const _regMaskAll& other) const;
+ FORCEINLINE bool operator!=(const _regMaskAll& other) const;
+ FORCEINLINE _regMaskAll operator&(const _regMaskAll& other) const;
+ FORCEINLINE _regMaskAll operator|(const _regMaskAll& other) const;
+
+} AllRegsMask;
+
+#define CONSTREF_AllRegsMask const AllRegsMask&
+#define REF_AllRegsMask AllRegsMask
+
+#define GprRegsMask(gprRegs) AllRegsMask(gprRegs)
+
+#if REGMASK_BITS == 32
typedef unsigned regMaskSmall;
#define REG_MASK_INT_FMT "%08X"
#define REG_MASK_ALL_FMT "%08X"
@@ -232,9 +404,6 @@ typedef unsigned __int64 regMaskSmall;
#define REG_MASK_ALL_FMT "%016llX"
#endif
-typedef _regNumber_enum regNumber;
-typedef unsigned char regNumberSmall;
-
/*****************************************************************************/
#ifdef DEBUG
@@ -337,7 +506,7 @@ const char* getRegName(regNumber reg);
#ifdef DEBUG
const char* getRegNameFloat(regNumber reg, var_types type);
-extern void dspRegMask(regMaskTP regMask, size_t minSiz = 0);
+extern void dspRegMask(AllRegsMask mask, size_t minSiz = 0);
#endif
#if CPU_HAS_BYTE_REGS
@@ -352,8 +521,8 @@ inline bool isByteReg(regNumber reg)
}
#endif
-inline regMaskTP genRegMask(regNumber reg);
-inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE));
+inline singleRegMask genRegMask(regNumber reg);
+inline regMaskFloat genRegMaskFloat(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE));
/*****************************************************************************
* Return true if the register number is valid
@@ -451,7 +620,7 @@ inline regNumber theFixedRetBuffReg(CorInfoCallConvExtension callConv)
// theFixedRetBuffMask:
// Returns the regNumber to use for the fixed return buffer
//
-inline regMaskTP theFixedRetBuffMask(CorInfoCallConvExtension callConv)
+inline regMaskGpr theFixedRetBuffMask(CorInfoCallConvExtension callConv)
{
assert(hasFixedRetBuffReg(callConv)); // This predicate should be checked before calling this method
#if defined(TARGET_ARM64)
@@ -486,9 +655,9 @@ inline unsigned theFixedRetBuffArgNum(CorInfoCallConvExtension callConv)
// Returns the full mask of all possible integer registers
// Note this includes the fixed return buffer register on Arm64
//
-inline regMaskTP fullIntArgRegMask(CorInfoCallConvExtension callConv)
+inline regMaskGpr fullIntArgRegMask(CorInfoCallConvExtension callConv)
{
- regMaskTP result = RBM_ARG_REGS;
+ regMaskGpr result = RBM_ARG_REGS;
if (hasFixedRetBuffReg(callConv))
{
result |= theFixedRetBuffMask(callConv);
@@ -583,7 +752,7 @@ inline bool floatRegCanHoldType(regNumber reg, var_types type)
extern const regMaskSmall regMasks[REG_COUNT];
-inline regMaskTP genRegMask(regNumber reg)
+inline singleRegMask genRegMask(regNumber reg)
{
assert((unsigned)reg < ArrLen(regMasks));
#ifdef TARGET_AMD64
@@ -591,7 +760,7 @@ inline regMaskTP genRegMask(regNumber reg)
// (L1 latency on sandy bridge is 4 cycles for [base] and 5 for [base + index*c] )
// the reason this is AMD-only is because the x86 BE will try to get reg masks for REG_STK
// and the result needs to be zero.
- regMaskTP result = 1ULL << reg;
+ singleRegMask result = 1ULL << reg;
assert(result == regMasks[reg]);
return result;
#else
@@ -604,7 +773,7 @@ inline regMaskTP genRegMask(regNumber reg)
* Map a register number to a floating-point register mask.
*/
-inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* = TYP_DOUBLE */))
+inline regMaskFloat genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* = TYP_DOUBLE */))
{
#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_X86) || defined(TARGET_LOONGARCH64) || \
defined(TARGET_RISCV64)
@@ -628,6 +797,36 @@ inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* = TYP_D
#endif
}
+inline regNumber getRegForType(regNumber reg, var_types regType)
+{
+#ifdef TARGET_ARM
+ if ((regType == TYP_DOUBLE) && !genIsValidDoubleReg(reg))
+ {
+ reg = REG_PREV(reg);
+ }
+#endif // TARGET_ARM
+ return reg;
+}
+
+// This is similar to genRegMask(reg, regType) for all platforms
+// except Arm. For Arm, if regType is DOUBLE and reg is also a
+// valid double register, it is again same as genRegMask(reg, regType)
+// but if not, it will return the pair of even/odd registers corresponding
+// to the `reg`.
+inline regMaskOnlyOne getRegMask(regNumber reg, var_types regType)
+{
+ reg = getRegForType(reg, regType);
+ singleRegMask regMask = genRegMask(reg);
+#ifdef TARGET_ARM
+ if (regType == TYP_DOUBLE)
+ {
+ assert(genIsValidDoubleReg(reg));
+ regMask |= (regMask << 1);
+ }
+#endif // TARGET_ARM
+ return regMask;
+}
+
//------------------------------------------------------------------------
// genRegMask: Given a register, and its type, generate the appropriate regMask
//
@@ -647,22 +846,20 @@ inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* = TYP_D
// For registers that are used in pairs, the caller will be handling
// each member of the pair separately.
//
-inline regMaskTP genRegMask(regNumber regNum, var_types type)
+inline regMaskOnlyOne genRegMask(regNumber regNum, var_types type)
{
#if defined(TARGET_ARM)
- regMaskTP regMask = RBM_NONE;
-
if (varTypeUsesIntReg(type))
{
- regMask = genRegMask(regNum);
+ return genRegMask(regNum);
}
else
{
assert(varTypeUsesFloatReg(type));
- regMask = genRegMaskFloat(regNum, type);
+ return genRegMaskFloat(regNum, type);
}
- return regMask;
+ return RBM_NONE;
#else
return genRegMask(regNum);
#endif
@@ -673,8 +870,8 @@ inline regMaskTP genRegMask(regNumber regNum, var_types type)
* These arrays list the callee-saved register numbers (and bitmaps, respectively) for
* the current architecture.
*/
-extern const regNumber raRegCalleeSaveOrder[CNT_CALLEE_SAVED];
-extern const regMaskTP raRbmCalleeSaveOrder[CNT_CALLEE_SAVED];
+extern const regNumber raRegCalleeSaveOrder[CNT_CALLEE_SAVED];
+extern const regMaskGpr raRbmCalleeSaveOrder[CNT_CALLEE_SAVED];
// This method takes a "compact" bitset of the callee-saved registers, and "expands" it to a full register mask.
regMaskSmall genRegMaskFromCalleeSavedMask(unsigned short);
diff --git a/src/coreclr/jit/targetamd64.cpp b/src/coreclr/jit/targetamd64.cpp
index 85b1ba6ef19a23..8e47e286263146 100644
--- a/src/coreclr/jit/targetamd64.cpp
+++ b/src/coreclr/jit/targetamd64.cpp
@@ -19,14 +19,14 @@ const Target::ArgOrder Target::g_tgtUnmanagedArgOrder = ARG_ORDER_R2L;
// clang-format off
#ifdef UNIX_AMD64_ABI
const regNumber intArgRegs [] = { REG_EDI, REG_ESI, REG_EDX, REG_ECX, REG_R8, REG_R9 };
-const regMaskTP intArgMasks[] = { RBM_EDI, RBM_ESI, RBM_EDX, RBM_ECX, RBM_R8, RBM_R9 };
+const regMaskGpr intArgMasks[] = { RBM_EDI, RBM_ESI, RBM_EDX, RBM_ECX, RBM_R8, RBM_R9 };
const regNumber fltArgRegs [] = { REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3, REG_XMM4, REG_XMM5, REG_XMM6, REG_XMM7 };
-const regMaskTP fltArgMasks[] = { RBM_XMM0, RBM_XMM1, RBM_XMM2, RBM_XMM3, RBM_XMM4, RBM_XMM5, RBM_XMM6, RBM_XMM7 };
+const regMaskFloat fltArgMasks[] = { RBM_XMM0, RBM_XMM1, RBM_XMM2, RBM_XMM3, RBM_XMM4, RBM_XMM5, RBM_XMM6, RBM_XMM7 };
#else // !UNIX_AMD64_ABI
const regNumber intArgRegs [] = { REG_ECX, REG_EDX, REG_R8, REG_R9 };
-const regMaskTP intArgMasks[] = { RBM_ECX, RBM_EDX, RBM_R8, RBM_R9 };
+const regMaskGpr intArgMasks[] = { RBM_ECX, RBM_EDX, RBM_R8, RBM_R9 };
const regNumber fltArgRegs [] = { REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3 };
-const regMaskTP fltArgMasks[] = { RBM_XMM0, RBM_XMM1, RBM_XMM2, RBM_XMM3 };
+const regMaskFloat fltArgMasks[] = { RBM_XMM0, RBM_XMM1, RBM_XMM2, RBM_XMM3 };
#endif // !UNIX_AMD64_ABI
// clang-format on
diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h
index 7e72da9cf2ccdc..6fbc6a81d0eecd 100644
--- a/src/coreclr/jit/targetamd64.h
+++ b/src/coreclr/jit/targetamd64.h
@@ -86,10 +86,12 @@
#define RBM_ALLDOUBLE RBM_ALLFLOAT
#define REG_FP_FIRST REG_XMM0
#define REG_FP_LAST REG_XMM31
+ #define REG_FP_COUNT (REG_FP_LAST - REG_FP_FIRST + 1)
#define FIRST_FP_ARGREG REG_XMM0
#define REG_MASK_FIRST REG_K0
#define REG_MASK_LAST REG_K7
+ #define REG_MASK_COUNT (REG_MASK_LAST - REG_MASK_FIRST + 1)
#define RBM_ALLMASK_INIT (0)
#define RBM_ALLMASK_EVEX (RBM_K1 | RBM_K2 | RBM_K3 | RBM_K4 | RBM_K5 | RBM_K6 | RBM_K7)
@@ -172,6 +174,7 @@
#define RBM_CALLEE_SAVED (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED | RBM_MSK_CALLEE_SAVED)
#define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH)
+ #define RBM_ALLGPR (RBM_ALLINT)
// AMD64 write barrier ABI (see vm\amd64\JitHelpers_Fast.asm, vm\amd64\JitHelpers_Fast.S):
// CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier):
@@ -449,9 +452,9 @@
#define REG_ARG_5 REG_R9
extern const regNumber intArgRegs [MAX_REG_ARG];
- extern const regMaskTP intArgMasks[MAX_REG_ARG];
+ extern const regMaskGpr intArgMasks[MAX_REG_ARG];
extern const regNumber fltArgRegs [MAX_FLOAT_REG_ARG];
- extern const regMaskTP fltArgMasks[MAX_FLOAT_REG_ARG];
+ extern const regMaskFloat fltArgMasks[MAX_FLOAT_REG_ARG];
#define RBM_ARG_0 RBM_RDI
#define RBM_ARG_1 RBM_RSI
@@ -472,9 +475,9 @@
#define REG_ARG_3 REG_R9
extern const regNumber intArgRegs [MAX_REG_ARG];
- extern const regMaskTP intArgMasks[MAX_REG_ARG];
+ extern const regMaskGpr intArgMasks[MAX_REG_ARG];
extern const regNumber fltArgRegs [MAX_FLOAT_REG_ARG];
- extern const regMaskTP fltArgMasks[MAX_FLOAT_REG_ARG];
+ extern const regMaskFloat fltArgMasks[MAX_FLOAT_REG_ARG];
#define RBM_ARG_0 RBM_ECX
#define RBM_ARG_1 RBM_EDX
@@ -534,9 +537,6 @@
#define RBM_PROFILER_TAILCALL_TRASH RBM_PROFILER_LEAVE_TRASH
#endif
- // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
- #define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH
-
#define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_R10 | RBM_RCX))
#define REG_VALIDATE_INDIRECT_CALL_ADDR REG_RCX
#define REG_DISPATCH_INDIRECT_CALL_ADDR REG_RAX
diff --git a/src/coreclr/jit/targetarm.cpp b/src/coreclr/jit/targetarm.cpp
index 037578fa67b85c..4ce22dc05ebba8 100644
--- a/src/coreclr/jit/targetarm.cpp
+++ b/src/coreclr/jit/targetarm.cpp
@@ -18,10 +18,10 @@ const Target::ArgOrder Target::g_tgtUnmanagedArgOrder = ARG_ORDER_R2L;
// clang-format off
const regNumber intArgRegs [] = {REG_R0, REG_R1, REG_R2, REG_R3};
-const regMaskTP intArgMasks[] = {RBM_R0, RBM_R1, RBM_R2, RBM_R3};
+const regMaskGpr intArgMasks[] = {RBM_R0, RBM_R1, RBM_R2, RBM_R3};
const regNumber fltArgRegs [] = {REG_F0, REG_F1, REG_F2, REG_F3, REG_F4, REG_F5, REG_F6, REG_F7, REG_F8, REG_F9, REG_F10, REG_F11, REG_F12, REG_F13, REG_F14, REG_F15 };
-const regMaskTP fltArgMasks[] = {RBM_F0, RBM_F1, RBM_F2, RBM_F3, RBM_F4, RBM_F5, RBM_F6, RBM_F7, RBM_F8, RBM_F9, RBM_F10, RBM_F11, RBM_F12, RBM_F13, RBM_F14, RBM_F15 };
+const regMaskFloat fltArgMasks[] = {RBM_F0, RBM_F1, RBM_F2, RBM_F3, RBM_F4, RBM_F5, RBM_F6, RBM_F7, RBM_F8, RBM_F9, RBM_F10, RBM_F11, RBM_F12, RBM_F13, RBM_F14, RBM_F15 };
// clang-format on
static_assert_no_msg(RBM_ALLDOUBLE == (RBM_ALLDOUBLE_HIGH >> 1));
diff --git a/src/coreclr/jit/targetarm.h b/src/coreclr/jit/targetarm.h
index 0f56ebe1ce989a..f9d486a5e7f6c5 100644
--- a/src/coreclr/jit/targetarm.h
+++ b/src/coreclr/jit/targetarm.h
@@ -45,6 +45,7 @@
#define REG_FP_FIRST REG_F0
#define REG_FP_LAST REG_F31
+ #define REG_FP_COUNT (REG_FP_LAST - REG_FP_FIRST + 1)
#define FIRST_FP_ARGREG REG_F0
#define LAST_FP_ARGREG REG_F15
@@ -89,6 +90,7 @@
#define RBM_LOW_REGS (RBM_R0|RBM_R1|RBM_R2|RBM_R3|RBM_R4|RBM_R5|RBM_R6|RBM_R7)
#define RBM_HIGH_REGS (RBM_R8|RBM_R9|RBM_R10|RBM_R11|RBM_R12|RBM_SP|RBM_LR|RBM_PC)
+ #define RBM_ALLGPR (RBM_LOW_REGS | RBM_HIGH_REGS)
#define REG_CALLEE_SAVED_ORDER REG_R4,REG_R5,REG_R6,REG_R7,REG_R8,REG_R9,REG_R10,REG_R11
#define RBM_CALLEE_SAVED_ORDER RBM_R4,RBM_R5,RBM_R6,RBM_R7,RBM_R8,RBM_R9,RBM_R10,RBM_R11
@@ -241,13 +243,6 @@
#define RBM_FLOATRET RBM_F0
#define RBM_DOUBLERET (RBM_F0|RBM_F1)
- // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper (JIT_RareDisableHelper).
- // See vm\arm\amshelpers.asm for more details.
- #define RBM_STOP_FOR_GC_TRASH (RBM_CALLEE_TRASH & ~(RBM_LNGRET|RBM_R7|RBM_R8|RBM_R11|RBM_DOUBLERET|RBM_F2|RBM_F3|RBM_F4|RBM_F5|RBM_F6|RBM_F7))
-
- // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
- #define RBM_INIT_PINVOKE_FRAME_TRASH (RBM_CALLEE_TRASH | RBM_PINVOKE_TCB | RBM_PINVOKE_SCRATCH)
-
#define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH)
#define REG_VALIDATE_INDIRECT_CALL_ADDR REG_R0
@@ -276,7 +271,7 @@
#define REG_ARG_3 REG_R3
extern const regNumber intArgRegs [MAX_REG_ARG];
- extern const regMaskTP intArgMasks[MAX_REG_ARG];
+ extern const regMaskGpr intArgMasks[MAX_REG_ARG];
#define RBM_ARG_0 RBM_R0
#define RBM_ARG_1 RBM_R1
@@ -287,7 +282,7 @@
#define RBM_FLTARG_REGS (RBM_F0|RBM_F1|RBM_F2|RBM_F3|RBM_F4|RBM_F5|RBM_F6|RBM_F7|RBM_F8|RBM_F9|RBM_F10|RBM_F11|RBM_F12|RBM_F13|RBM_F14|RBM_F15)
extern const regNumber fltArgRegs [MAX_FLOAT_REG_ARG];
- extern const regMaskTP fltArgMasks[MAX_FLOAT_REG_ARG];
+ extern const regMaskFloat fltArgMasks[MAX_FLOAT_REG_ARG];
#define LBL_DIST_SMALL_MAX_NEG (0)
#define LBL_DIST_SMALL_MAX_POS (+1020)
diff --git a/src/coreclr/jit/targetarm64.cpp b/src/coreclr/jit/targetarm64.cpp
index f95663202456b8..9cbc615f955680 100644
--- a/src/coreclr/jit/targetarm64.cpp
+++ b/src/coreclr/jit/targetarm64.cpp
@@ -18,10 +18,10 @@ const Target::ArgOrder Target::g_tgtUnmanagedArgOrder = ARG_ORDER_R2L;
// clang-format off
const regNumber intArgRegs [] = {REG_R0, REG_R1, REG_R2, REG_R3, REG_R4, REG_R5, REG_R6, REG_R7};
-const regMaskTP intArgMasks[] = {RBM_R0, RBM_R1, RBM_R2, RBM_R3, RBM_R4, RBM_R5, RBM_R6, RBM_R7};
+const regMaskGpr intArgMasks[] = {RBM_R0, RBM_R1, RBM_R2, RBM_R3, RBM_R4, RBM_R5, RBM_R6, RBM_R7};
const regNumber fltArgRegs [] = {REG_V0, REG_V1, REG_V2, REG_V3, REG_V4, REG_V5, REG_V6, REG_V7 };
-const regMaskTP fltArgMasks[] = {RBM_V0, RBM_V1, RBM_V2, RBM_V3, RBM_V4, RBM_V5, RBM_V6, RBM_V7 };
+const regMaskFloat fltArgMasks[] = {RBM_V0, RBM_V1, RBM_V2, RBM_V3, RBM_V4, RBM_V5, RBM_V6, RBM_V7 };
// clang-format on
//-----------------------------------------------------------------------------
diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h
index 6d33d378bcd96e..1cde57bc59ab8a 100644
--- a/src/coreclr/jit/targetarm64.h
+++ b/src/coreclr/jit/targetarm64.h
@@ -47,10 +47,12 @@
#define REG_FP_FIRST REG_V0
#define REG_FP_LAST REG_V31
+ #define REG_FP_COUNT (REG_FP_LAST - REG_FP_FIRST + 1)
#define FIRST_FP_ARGREG REG_V0
#define LAST_FP_ARGREG REG_V15
#define REG_PREDICATE_FIRST REG_P0
#define REG_PREDICATE_LAST REG_P15
+ #define REG_MASK_COUNT (REG_PREDICATE_LAST - REG_PREDICATE_FIRST + 1)
#define REG_PREDICATE_LOW_LAST REG_P7 // Some instructions can only use the first half of the predicate registers.
#define REG_PREDICATE_HIGH_FIRST REG_P8 // Similarly, some instructions can only use the second half of the predicate registers.
#define REG_PREDICATE_HIGH_LAST REG_P15
@@ -60,7 +62,7 @@
static_assert_no_msg(REG_PREDICATE_HIGH_LAST == REG_PREDICATE_LAST);
- #define REGNUM_BITS 6 // number of bits in a REG_*
+ #define REGNUM_BITS 7 // number of bits in a REG_*
#define REGSIZE_BYTES 8 // number of bytes in one general purpose register
#define FP_REGSIZE_BYTES 16 // number of bytes in one FP/SIMD register
#define FPSAVE_REGSIZE_BYTES 8 // number of bytes in one FP/SIMD register that are saved/restored, for callee-saved registers
@@ -84,6 +86,7 @@
#define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH)
#define RBM_ALLFLOAT (RBM_FLT_CALLEE_SAVED | RBM_FLT_CALLEE_TRASH)
#define RBM_ALLDOUBLE RBM_ALLFLOAT
+ #define RBM_ALLGPR (RBM_ALLINT | RBM_FP | RBM_LR | RBM_ZR)
// REG_VAR_ORDER is: (CALLEE_TRASH & ~CALLEE_TRASH_NOGC), CALLEE_TRASH_NOGC, CALLEE_SAVED
#define REG_VAR_ORDER REG_R0, REG_R1, REG_R2, REG_R3, REG_R4, REG_R5, \
@@ -110,6 +113,8 @@
#define CNT_CALLEE_SAVED_FLOAT (8)
#define CNT_CALLEE_TRASH_FLOAT (24)
+ #define CNT_CALLEE_SAVED_MASK (4)
+ #define CNT_CALLEE_TRASH_MASK (8)
#define CALLEE_SAVED_REG_MAXSZ (CNT_CALLEE_SAVED * REGSIZE_BYTES)
#define CALLEE_SAVED_FLOAT_MAXSZ (CNT_CALLEE_SAVED_FLOAT * FPSAVE_REGSIZE_BYTES)
@@ -147,8 +152,8 @@
#define RBM_ALLMASK (RBM_LOWMASK | RBM_HIGHMASK)
// TODO-SVE: Fix when adding predicate register allocation
- #define RBM_MSK_CALLEE_SAVED (0)
- #define RBM_MSK_CALLEE_TRASH (0)
+ #define RBM_MSK_CALLEE_SAVED (RBM_P0 | RBM_P1 | RBM_P2 | RBM_P3)
+ #define RBM_MSK_CALLEE_TRASH (RBM_P4 | RBM_P5 | RBM_P6 | RBM_P7 | RBM_P8 | RBM_P9 | RBM_P10 | RBM_P11 | RBM_P12 | RBM_P13 | RBM_P14 | RBM_P15)
// ARM64 write barrier ABI (see vm\arm64\asmhelpers.asm, vm\arm64\asmhelpers.S):
// CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier):
@@ -186,7 +191,7 @@
#define REG_WRITE_BARRIER_SRC_BYREF REG_R13
#define RBM_WRITE_BARRIER_SRC_BYREF RBM_R13
- #define RBM_CALLEE_TRASH_NOGC (RBM_R12|RBM_R15|RBM_IP0|RBM_IP1|RBM_DEFAULT_HELPER_CALL_TARGET)
+ #define RBM_CALLEE_TRASH_NOGC (RBM_R12|RBM_R15|RBM_IP0|RBM_IP1|RBM_DEFAULT_HELPER_CALL_TARGET)
// Registers killed by CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF.
#define RBM_CALLEE_TRASH_WRITEBARRIER (RBM_R14|RBM_CALLEE_TRASH_NOGC)
@@ -263,12 +268,6 @@
#define RBM_FLOATRET RBM_V0
#define RBM_DOUBLERET RBM_V0
- // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper
- #define RBM_STOP_FOR_GC_TRASH RBM_CALLEE_TRASH
-
- // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
- #define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH
-
#define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R15))
#define REG_VALIDATE_INDIRECT_CALL_ADDR REG_R15
#define REG_DISPATCH_INDIRECT_CALL_ADDR REG_R9
@@ -310,7 +309,7 @@
#define REG_ARG_7 REG_R7
extern const regNumber intArgRegs [MAX_REG_ARG];
- extern const regMaskTP intArgMasks[MAX_REG_ARG];
+ extern const regMaskGpr intArgMasks[MAX_REG_ARG];
#define RBM_ARG_0 RBM_R0
#define RBM_ARG_1 RBM_R1
@@ -343,7 +342,7 @@
#define RBM_FLTARG_REGS (RBM_FLTARG_0|RBM_FLTARG_1|RBM_FLTARG_2|RBM_FLTARG_3|RBM_FLTARG_4|RBM_FLTARG_5|RBM_FLTARG_6|RBM_FLTARG_7)
extern const regNumber fltArgRegs [MAX_FLOAT_REG_ARG];
- extern const regMaskTP fltArgMasks[MAX_FLOAT_REG_ARG];
+ extern const regMaskFloat fltArgMasks[MAX_FLOAT_REG_ARG];
#define LBL_DIST_SMALL_MAX_NEG (-1048576)
#define LBL_DIST_SMALL_MAX_POS (+1048575)
diff --git a/src/coreclr/jit/targetx86.cpp b/src/coreclr/jit/targetx86.cpp
index 2a7c906962b0cb..36347c15331d5a 100644
--- a/src/coreclr/jit/targetx86.cpp
+++ b/src/coreclr/jit/targetx86.cpp
@@ -18,7 +18,7 @@ const Target::ArgOrder Target::g_tgtUnmanagedArgOrder = ARG_ORDER_R2L;
// clang-format off
const regNumber intArgRegs [] = {REG_ECX, REG_EDX};
-const regMaskTP intArgMasks[] = {RBM_ECX, RBM_EDX};
+const regMaskGpr intArgMasks[] = {RBM_ECX, RBM_EDX};
// clang-format on
//-----------------------------------------------------------------------------
diff --git a/src/coreclr/jit/targetx86.h b/src/coreclr/jit/targetx86.h
index dfeb96ae9e977c..8a19781ed67e4a 100644
--- a/src/coreclr/jit/targetx86.h
+++ b/src/coreclr/jit/targetx86.h
@@ -70,9 +70,11 @@
#define REG_FP_FIRST REG_XMM0
#define REG_FP_LAST REG_XMM7
+ #define REG_FP_COUNT (REG_FP_LAST - REG_FP_FIRST + 1)
#define REG_MASK_FIRST REG_K0
#define REG_MASK_LAST REG_K7
+ #define REG_MASK_COUNT (REG_MASK_LAST - REG_MASK_FIRST)
#define CNT_MASK_REGS 8
#define FIRST_FP_ARGREG REG_XMM0
@@ -140,6 +142,7 @@
#define RBM_CALLEE_TRASH (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH | RBM_MSK_CALLEE_TRASH)
#define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH)
+ #define RBM_ALLGPR (RBM_ALLINT)
#define REG_VAR_ORDER REG_EAX,REG_EDX,REG_ECX,REG_ESI,REG_EDI,REG_EBX
#define MAX_VAR_ORDER_SIZE 6
@@ -319,7 +322,7 @@
#define REG_ARG_1 REG_EDX
extern const regNumber intArgRegs [MAX_REG_ARG];
- extern const regMaskTP intArgMasks[MAX_REG_ARG];
+ extern const regMaskGpr intArgMasks[MAX_REG_ARG];
#define RBM_ARG_0 RBM_ECX
#define RBM_ARG_1 RBM_EDX
diff --git a/src/coreclr/jit/typelist.h b/src/coreclr/jit/typelist.h
index bf5acb5ee014a5..2f121ae84f8cc2 100644
--- a/src/coreclr/jit/typelist.h
+++ b/src/coreclr/jit/typelist.h
@@ -64,9 +64,9 @@ DEF_TP(SIMD16 ,"simd16" , TYP_SIMD16, 16,16, 16, 4,16, VTR_FLOAT, available
DEF_TP(SIMD32 ,"simd32" , TYP_SIMD32, 32,32, 32, 8,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC)
DEF_TP(SIMD64 ,"simd64" , TYP_SIMD64, 64,64, 64, 16,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC)
#endif // TARGET_XARCH
-#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
+#ifdef FEATURE_MASKED_HW_INTRINSICS
DEF_TP(MASK ,"mask" , TYP_MASK, 8, 8, 8, 2, 8, VTR_MASK, availableMaskRegs, RBM_MSK_CALLEE_SAVED, RBM_MSK_CALLEE_TRASH, VTF_S)
-#endif // TARGET_XARCH || TARGET_ARM64
+#endif // FEATURE_MASKED_HW_INTRINSICS
#endif // FEATURE_SIMD
DEF_TP(UNKNOWN ,"unknown" ,TYP_UNKNOWN, 0, 0, 0, 0, 0, VTR_INT, availableIntRegs, RBM_INT_CALLEE_SAVED, RBM_INT_CALLEE_TRASH, VTF_ANY)
diff --git a/src/coreclr/jit/unwind.cpp b/src/coreclr/jit/unwind.cpp
index a51a52ab21d640..f084bc3ab73713 100644
--- a/src/coreclr/jit/unwind.cpp
+++ b/src/coreclr/jit/unwind.cpp
@@ -150,18 +150,7 @@ void Compiler::unwindPushPopCFI(regNumber reg)
FuncInfoDsc* func = funCurrentFunc();
UNATIVE_OFFSET cbProlog = unwindGetCurrentOffset(func);
- regMaskTP relOffsetMask = RBM_CALLEE_SAVED
-#if defined(UNIX_AMD64_ABI) && ETW_EBP_FRAMED
- // In case of ETW_EBP_FRAMED defined the REG_FPBASE (RBP)
- // is excluded from the callee-save register list.
- // Make sure the register gets PUSH unwind info in this case,
- // since it is pushed as a frame register.
- | RBM_FPBASE
-#endif
-#if defined(TARGET_ARM)
- | RBM_R11 | RBM_LR | RBM_PC
-#endif
- ;
+ regMaskOnlyOne mask = genRegMask(reg);
#if defined(TARGET_ARM)
createCfiCode(func, cbProlog, CFI_ADJUST_CFA_OFFSET, DWARF_REG_ILLEGAL,
@@ -170,7 +159,35 @@ void Compiler::unwindPushPopCFI(regNumber reg)
assert(reg < REG_FP_FIRST);
createCfiCode(func, cbProlog, CFI_ADJUST_CFA_OFFSET, DWARF_REG_ILLEGAL, REGSIZE_BYTES);
#endif
- if (relOffsetMask & genRegMask(reg))
+
+ bool shouldCreateCfiCode = false;
+ if (emitter::isGeneralRegister(reg))
+ {
+
+#if defined(UNIX_AMD64_ABI) && ETW_EBP_FRAMED
+ // In case of ETW_EBP_FRAMED defined the REG_FPBASE (RBP)
+ // is excluded from the callee-save register list.
+ // Make sure the register gets PUSH unwind info in this case,
+ // since it is pushed as a frame register.
+ mask |= RBM_FPBASE;
+#endif
+#if defined(TARGET_ARM)
+ mask |= RBM_R11 | RBM_LR | RBM_PC;
+#endif
+ shouldCreateCfiCode = (RBM_INT_CALLEE_SAVED & mask);
+ }
+ else if (emitter::isFloatReg(reg))
+ {
+ shouldCreateCfiCode = (RBM_FLT_CALLEE_SAVED & mask);
+ }
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ else if (emitter::isMaskReg(reg) && (RBM_MSK_CALLEE_SAVED & mask))
+ {
+ shouldCreateCfiCode = (RBM_MSK_CALLEE_SAVED & mask);
+ }
+#endif // FEATURE_MASKED_HW_INTRINSICS
+
+ if (shouldCreateCfiCode)
{
createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg));
}
@@ -200,14 +217,16 @@ void Compiler::unwindBegPrologCFI()
}
}
-void Compiler::unwindPushPopMaskCFI(regMaskTP regMask, bool isFloat)
+void Compiler::unwindPushPopMaskCFI(regMaskOnlyOne regMask, bool isFloat)
{
+ assert(IsOnlyOneRegMask(regMask));
+
#if TARGET_ARM
- regNumber regNum = isFloat ? REG_PREV(REG_FP_LAST) : REG_INT_LAST;
- regMaskTP regBit = isFloat ? genRegMask(regNum) | genRegMask(REG_NEXT(regNum)) : genRegMask(regNum);
+ regNumber regNum = isFloat ? REG_PREV(REG_FP_LAST) : REG_INT_LAST;
+ regMaskOnlyOne regBit = isFloat ? genRegMask(regNum) | genRegMask(REG_NEXT(regNum)) : genRegMask(regNum);
#else
- regNumber regNum = isFloat ? REG_FP_LAST : REG_INT_LAST;
- regMaskTP regBit = genRegMask(regNum);
+ regNumber regNum = isFloat ? REG_FP_LAST : REG_INT_LAST;
+ regMaskOnlyOne regBit = genRegMask(regNum);
#endif
for (; regMask != 0 && regBit != RBM_NONE;)
diff --git a/src/coreclr/jit/unwindarmarch.cpp b/src/coreclr/jit/unwindarmarch.cpp
index 51af7f24889d1b..ccbdb4350aeabd 100644
--- a/src/coreclr/jit/unwindarmarch.cpp
+++ b/src/coreclr/jit/unwindarmarch.cpp
@@ -196,10 +196,10 @@ void Compiler::unwindEndEpilog()
#if defined(TARGET_ARM)
-void Compiler::unwindPushPopMaskInt(regMaskTP maskInt, bool useOpsize16)
+void Compiler::unwindPushPopMaskInt(regMaskGpr maskInt, bool useOpsize16)
{
// floating point registers cannot be specified in 'maskInt'
- assert((maskInt & RBM_ALLFLOAT) == 0);
+ assert(IsGprRegMask(maskInt));
UnwindInfo* pu = &funCurrentFunc()->uwi;
@@ -213,8 +213,8 @@ void Compiler::unwindPushPopMaskInt(regMaskTP maskInt, bool useOpsize16)
if ((maskInt & (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3)) == 0)
{
- regMaskTP matchMask = maskInt & (RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7);
- regMaskTP valMask = RBM_R4;
+ regMaskGpr matchMask = maskInt & (RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7);
+ regMaskGpr valMask = RBM_R4;
while (val < 4)
{
if (matchMask == valMask)
@@ -252,8 +252,8 @@ void Compiler::unwindPushPopMaskInt(regMaskTP maskInt, bool useOpsize16)
if (((maskInt & (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3)) == 0) &&
((maskInt & (RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8)) == (RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8)))
{
- regMaskTP matchMask = maskInt & (RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R9 | RBM_R10 | RBM_R11);
- regMaskTP valMask = RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8;
+ regMaskGpr matchMask = maskInt & (RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R9 | RBM_R10 | RBM_R11);
+ regMaskGpr valMask = RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8;
while (val < 4)
{
if (matchMask == valMask)
@@ -282,10 +282,10 @@ void Compiler::unwindPushPopMaskInt(regMaskTP maskInt, bool useOpsize16)
}
}
-void Compiler::unwindPushPopMaskFloat(regMaskTP maskFloat)
+void Compiler::unwindPushPopMaskFloat(regMaskFloat maskFloat)
{
// Only floating pointer registers can be specified in 'maskFloat'
- assert((maskFloat & ~RBM_ALLFLOAT) == 0);
+ assert(IsFloatRegMask(maskFloat));
// If the maskFloat is zero there is no unwind code to emit
//
@@ -296,8 +296,8 @@ void Compiler::unwindPushPopMaskFloat(regMaskTP maskFloat)
UnwindInfo* pu = &funCurrentFunc()->uwi;
- BYTE val = 0;
- regMaskTP valMask = (RBM_F16 | RBM_F17);
+ BYTE val = 0;
+ regMaskFloat valMask = (RBM_F16 | RBM_F17);
while (maskFloat != valMask)
{
@@ -317,8 +317,10 @@ void Compiler::unwindPushPopMaskFloat(regMaskTP maskFloat)
pu->AddCode(0xE0 | val);
}
-void Compiler::unwindPushMaskInt(regMaskTP maskInt)
+void Compiler::unwindPushMaskInt(regMaskGpr maskInt)
{
+ assert(IsGprRegMask(maskInt));
+
// Only r0-r12 and lr are supported
assert((maskInt & ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R9 |
RBM_R10 | RBM_R11 | RBM_R12 | RBM_LR)) == 0);
@@ -336,8 +338,10 @@ void Compiler::unwindPushMaskInt(regMaskTP maskInt)
unwindPushPopMaskInt(maskInt, useOpsize16);
}
-void Compiler::unwindPushMaskFloat(regMaskTP maskFloat)
+void Compiler::unwindPushMaskFloat(regMaskFloat maskFloat)
{
+ assert(IsFloatRegMask(maskFloat));
+
// Only floating point registers should be in maskFloat
assert((maskFloat & RBM_ALLFLOAT) == maskFloat);
@@ -352,8 +356,10 @@ void Compiler::unwindPushMaskFloat(regMaskTP maskFloat)
unwindPushPopMaskFloat(maskFloat);
}
-void Compiler::unwindPopMaskInt(regMaskTP maskInt)
+void Compiler::unwindPopMaskInt(regMaskGpr maskInt)
{
+ assert(IsGprRegMask(maskInt));
+
#if defined(FEATURE_CFI_SUPPORT)
if (generateCFIUnwindCodes())
{
@@ -379,8 +385,10 @@ void Compiler::unwindPopMaskInt(regMaskTP maskInt)
unwindPushPopMaskInt(maskInt, useOpsize16);
}
-void Compiler::unwindPopMaskFloat(regMaskTP maskFloat)
+void Compiler::unwindPopMaskFloat(regMaskFloat maskFloat)
{
+ assert(IsFloatRegMask(maskFloat));
+
#if defined(FEATURE_CFI_SUPPORT)
if (generateCFIUnwindCodes())
{
diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp
index ea33f1d14fb035..373532702d6346 100644
--- a/src/coreclr/jit/utils.cpp
+++ b/src/coreclr/jit/utils.cpp
@@ -111,6 +111,20 @@ extern const BYTE opcodeArgKinds[] = {
/*****************************************************************************/
+const int regIndexForRegister(regNumber reg)
+{
+ static const BYTE _registerTypeIndex[] = {
+#ifdef TARGET_ARM64
+#define REGDEF(name, rnum, mask, xname, wname, regTypeTag) regTypeTag,
+#else
+#define REGDEF(name, rnum, mask, sname, regTypeTag) regTypeTag,
+#endif
+#include "register.h"
+ };
+
+ return _registerTypeIndex[reg];
+}
+
const char* varTypeName(var_types vt)
{
static const char* const varTypeNames[] = {
@@ -139,9 +153,9 @@ const char* getRegName(regNumber reg)
static const char* const regNames[] = {
#if defined(TARGET_ARM64)
-#define REGDEF(name, rnum, mask, xname, wname) xname,
+#define REGDEF(name, rnum, mask, xname, wname, regTypeTag) xname,
#else
-#define REGDEF(name, rnum, mask, sname) sname,
+#define REGDEF(name, rnum, mask, sname, regTypeTag) sname,
#endif
#include "register.h"
};
@@ -227,7 +241,7 @@ const char* getRegNameFloat(regNumber reg, var_types type)
#elif defined(TARGET_ARM64)
static const char* regNamesFloat[] = {
-#define REGDEF(name, rnum, mask, xname, wname) xname,
+#define REGDEF(name, rnum, mask, xname, wname, regTypeTag) xname,
#include "register.h"
};
assert((unsigned)reg < ArrLen(regNamesFloat));
@@ -237,7 +251,7 @@ const char* getRegNameFloat(regNumber reg, var_types type)
#elif defined(TARGET_LOONGARCH64)
static const char* regNamesFloat[] = {
-#define REGDEF(name, rnum, mask, sname) sname,
+#define REGDEF(name, rnum, mask, sname, regTypeTag) sname,
#include "register.h"
};
@@ -247,16 +261,16 @@ const char* getRegNameFloat(regNumber reg, var_types type)
#else
static const char* regNamesFloat[] = {
-#define REGDEF(name, rnum, mask, sname) "x" sname,
+#define REGDEF(name, rnum, mask, sname, regTypeTag) "x" sname,
#include "register.h"
};
#ifdef FEATURE_SIMD
static const char* regNamesYMM[] = {
-#define REGDEF(name, rnum, mask, sname) "y" sname,
+#define REGDEF(name, rnum, mask, sname, regTypeTag) "y" sname,
#include "register.h"
};
static const char* regNamesZMM[] = {
-#define REGDEF(name, rnum, mask, sname) "z" sname,
+#define REGDEF(name, rnum, mask, sname, regTypeTag) "z" sname,
#include "register.h"
};
#endif // FEATURE_SIMD
@@ -282,9 +296,9 @@ const char* getRegNameFloat(regNumber reg, var_types type)
* Displays a range of registers
* -- This is a helper used by dspRegMask
*/
-const char* dspRegRange(regMaskTP regMask, size_t& minSiz, const char* sep, regNumber regFirst, regNumber regLast)
+const char* dspRegRange(regMaskOnlyOne regMask, size_t& minSiz, const char* sep, regNumber regFirst, regNumber regLast)
{
-#ifdef TARGET_XARCH
+#ifdef FEATURE_MASKED_HW_INTRINSICS
assert(((regFirst == REG_INT_FIRST) && (regLast == REG_INT_LAST)) ||
((regFirst == REG_FP_FIRST) && (regLast == REG_FP_LAST)) ||
((regFirst == REG_MASK_FIRST) && (regLast == REG_MASK_LAST)));
@@ -306,7 +320,7 @@ const char* dspRegRange(regMaskTP regMask, size_t& minSiz, const char* sep, regN
for (regNumber regNum = regFirst; regNum <= regLast; regNum = REG_NEXT(regNum))
{
- regMaskTP regBit = genRegMask(regNum);
+ singleRegMask regBit = genRegMask(regNum);
if ((regMask & regBit) != 0)
{
@@ -433,18 +447,18 @@ const char* dspRegRange(regMaskTP regMask, size_t& minSiz, const char* sep, regN
* Displays a register set.
* TODO-ARM64-Cleanup: don't allow ip0, ip1 as part of a range.
*/
-void dspRegMask(regMaskTP regMask, size_t minSiz)
+void dspRegMask(AllRegsMask mask, size_t minSiz)
{
const char* sep = "";
printf("[");
- sep = dspRegRange(regMask, minSiz, sep, REG_INT_FIRST, REG_INT_LAST);
- sep = dspRegRange(regMask, minSiz, sep, REG_FP_FIRST, REG_FP_LAST);
+ sep = dspRegRange(mask.gprRegs(), minSiz, sep, REG_INT_FIRST, REG_INT_LAST);
+ sep = dspRegRange(mask.floatRegs(nullptr), minSiz, sep, REG_FP_FIRST, REG_FP_LAST);
-#ifdef TARGET_XARCH
- sep = dspRegRange(regMask, minSiz, sep, REG_MASK_FIRST, REG_MASK_LAST);
-#endif // TARGET_XARCH
+#ifdef FEATURE_MASKED_HW_INTRINSICS
+ sep = dspRegRange(mask.predicateRegs(nullptr), minSiz, sep, REG_MASK_FIRST, REG_MASK_LAST);
+#endif // FEATURE_MASKED_HW_INTRINSICS
printf("]");