diff --git a/src/coreclr/inc/patchpointinfo.h b/src/coreclr/inc/patchpointinfo.h index 02b9fd89f338ae..27346a3c4aeea3 100644 --- a/src/coreclr/inc/patchpointinfo.h +++ b/src/coreclr/inc/patchpointinfo.h @@ -37,7 +37,7 @@ struct PatchpointInfo // Initialize void Initialize(unsigned localCount, int totalFrameSize) { - m_calleeSaveRegisters = 0; + m_calleeSaveGprRegisters = 0; m_totalFrameSize = totalFrameSize; m_numberOfLocals = localCount; m_genericContextArgOffset = -1; @@ -49,7 +49,7 @@ struct PatchpointInfo // Copy void Copy(const PatchpointInfo* original) { - m_calleeSaveRegisters = original->m_calleeSaveRegisters; + m_calleeSaveGprRegisters = original->m_calleeSaveGprRegisters; m_genericContextArgOffset = original->m_genericContextArgOffset; m_keptAliveThisOffset = original->m_keptAliveThisOffset; m_securityCookieOffset = original->m_securityCookieOffset; @@ -163,14 +163,14 @@ struct PatchpointInfo // Callee save registers saved by the original method. // Includes all saves that must be restored (eg includes pushed RBP on x64). // - uint64_t CalleeSaveRegisters() const + uint64_t CalleeSaveGprRegisters() const { - return m_calleeSaveRegisters; + return m_calleeSaveGprRegisters; } - void SetCalleeSaveRegisters(uint64_t registerMask) + void SetCalleeSaveGprRegisters(uint64_t gprRegs) { - m_calleeSaveRegisters = registerMask; + m_calleeSaveGprRegisters = gprRegs; } private: @@ -180,7 +180,7 @@ struct PatchpointInfo EXPOSURE_MASK = 0x1 }; - uint64_t m_calleeSaveRegisters; + uint64_t m_calleeSaveGprRegisters; unsigned m_numberOfLocals; int m_totalFrameSize; int m_genericContextArgOffset; diff --git a/src/coreclr/jit/abi.cpp b/src/coreclr/jit/abi.cpp index e80533fc783a06..958a1a73fee96c 100644 --- a/src/coreclr/jit/abi.cpp +++ b/src/coreclr/jit/abi.cpp @@ -48,10 +48,10 @@ regNumber ABIPassingSegment::GetRegister() const // Return Value: // The register mask. // -regMaskTP ABIPassingSegment::GetRegisterMask() const +regMaskOnlyOne ABIPassingSegment::GetRegisterMask() const { assert(IsPassedInRegister()); - regMaskTP reg = genRegMask(m_register); + regMaskOnlyOne reg = genRegMask(m_register); #ifdef TARGET_ARM if (genIsValidFloatReg(m_register) && (Size == 8)) diff --git a/src/coreclr/jit/abi.h b/src/coreclr/jit/abi.h index 1e51a14d9c09a5..10d2fa3672f757 100644 --- a/src/coreclr/jit/abi.h +++ b/src/coreclr/jit/abi.h @@ -25,7 +25,7 @@ class ABIPassingSegment // If this segment is passed in a register, return the particular register. regNumber GetRegister() const; - regMaskTP GetRegisterMask() const; + regMaskOnlyOne GetRegisterMask() const; // If this segment is passed on the stack then return the particular stack // offset, relative to the base of stack arguments. diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index 500b5274b6f41c..5c19022df38a2f 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -1601,8 +1601,8 @@ struct BasicBlock : private LIR::Range // is bbCodeOffsEnd - bbCodeOffs, assuming neither are BAD_IL_OFFSET. #ifdef DEBUG - void dspBlockILRange() const; // Display the block's IL range as [XXX...YYY), where XXX and YYY might be "???" for - // BAD_IL_OFFSET. + void dspBlockILRange() const; // Display the block's IL range as [XXX...YYY), where XXX and YYY might be "???" + // for BAD_IL_OFFSET. #endif // DEBUG VARSET_TP bbVarUse; // variables used by block (before a definition) @@ -1642,8 +1642,8 @@ struct BasicBlock : private LIR::Range }; static MemoryPhiArg* EmptyMemoryPhiDef; // Special value (0x1, FWIW) to represent a to-be-filled in Phi arg list // for Heap. - MemoryPhiArg* bbMemorySsaPhiFunc[MemoryKindCount]; // If the "in" Heap SSA var is not a phi definition, this value - // is NULL. + MemoryPhiArg* bbMemorySsaPhiFunc[MemoryKindCount]; // If the "in" Heap SSA var is not a phi definition, this + // value is NULL. // Otherwise, it is either the special value EmptyMemoryPhiDefn, to indicate // that Heap needs a phi definition on entry, or else it is the linked list // of the phi arguments. @@ -1677,7 +1677,8 @@ struct BasicBlock : private LIR::Range { EXPSET_TP bbCseOut; // CSEs available on exit ASSERT_TP bbAssertionOut; // assertions available on exit (global prop, local prop & !BBJ_COND) - ASSERT_TP bbAssertionOutIfFalse; // assertions available on exit along false/next edge (BBJ_COND, local prop) + ASSERT_TP bbAssertionOutIfFalse; // assertions available on exit along false/next edge (BBJ_COND, local + // prop) }; void* bbEmitCookie; @@ -1981,8 +1982,8 @@ struct BasicBlock : private LIR::Range } }; - // BBCompilerSuccEdgeList: adapter class for forward iteration of block successors edges, using range-based `for`, - // normally used via BasicBlock::SuccEdges(), e.g.: + // BBCompilerSuccEdgeList: adapter class for forward iteration of block successors edges, using range-based + // `for`, normally used via BasicBlock::SuccEdges(), e.g.: // for (FlowEdge* const succEdge : block->SuccEdges(compiler)) ... // // This version uses NumSucc(Compiler*)/GetSucc(Compiler*). See the documentation there for the explanation @@ -2448,7 +2449,8 @@ inline BasicBlock* BBArrayIterator::operator*() const return edgeTarget->getDestinationBlock(); } -// Pred list iterator implementations (that are required to be defined after the declaration of BasicBlock and FlowEdge) +// Pred list iterator implementations (that are required to be defined after the declaration of BasicBlock and +// FlowEdge) inline PredEdgeList::iterator::iterator(FlowEdge* pred) : m_pred(pred) diff --git a/src/coreclr/jit/clrjit.natvis b/src/coreclr/jit/clrjit.natvis index cfbc6a181e9743..1b751541aec7e7 100644 --- a/src/coreclr/jit/clrjit.natvis +++ b/src/coreclr/jit/clrjit.natvis @@ -139,26 +139,26 @@ Documentation for VS debugger format specifiers: https://learn.microsoft.com/en- this->m_AvailableRegs - - - - ((regNumber)regIndex),en - regIndex++ - reg = reg >> 1 - + + + + ((regNumber)regIndex),en + regIndex++ + reg = reg >> 1 + this->m_RegistersWithConstants - - - - ((regNumber)regIndex),en - regIndex++ - reg = reg >> 1 - + + + + ((regNumber)regIndex),en + regIndex++ + reg = reg >> 1 + - + [#{rpNum,d} - {refType,en}] @@ -177,6 +177,21 @@ Documentation for VS debugger format specifiers: https://learn.microsoft.com/en- + + + + + + + + ((regNumber)regIndex),en + regIndex++ + reg = reg >> 1 + + + + + [U{this->relatedInterval->varNum,d}, #{this->intervalIndex, d}, reg={(regNumber)physReg, en}] [V{this->varNum,d}, #{this->intervalIndex, d}, reg={(regNumber)physReg, en}] diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index b5b0d19402f0a3..433af3b2fe89fa 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -261,9 +261,9 @@ class CodeGen final : public CodeGenInterface // Prolog functions and data (there are a few exceptions for more generally used things) // - void genEstablishFramePointer(int delta, bool reportUnwindData); - void genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed); - regMaskTP genGetParameterHomingTempRegisterCandidates(); + void genEstablishFramePointer(int delta, bool reportUnwindData); + void genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed); + RegBitSet64 genGetParameterHomingTempRegisterCandidates(); var_types genParamStackStoreType(LclVarDsc* dsc, const ABIPassingSegment& seg); void genSpillOrAddRegisterParam(unsigned lclNum, class RegGraph* graph); @@ -335,16 +335,23 @@ class CodeGen final : public CodeGenInterface } }; - static void genBuildRegPairsStack(regMaskTP regsMask, ArrayStack* regStack); + static void genBuildRegPairsStack(regMaskOnlyOne regsMask, + ArrayStack* regStack MORE_THAN_64_REG_ARG(var_types type)); static void genSetUseSaveNextPairs(ArrayStack* regStack); - static int genGetSlotSizeForRegsInMask(regMaskTP regsMask); + static int genGetSlotSizeForRegsInMask(regMaskOnlyOne regsMask); - void genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset); - void genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset); + void genSaveCalleeSavedRegisterGroup(regMaskOnlyOne regsMask, + int spDelta, + int spOffset MORE_THAN_64_REG_ARG(var_types type)); + void genRestoreCalleeSavedRegisterGroup(regMaskOnlyOne regsMask, + int spDelta, + int spOffset MORE_THAN_64_REG_ARG(var_types type)); - void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta); - void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta); + void genSaveCalleeSavedRegistersHelp(CONSTREF_AllRegsMask regsToSaveMask, int lowestCalleeSavedOffset, int spDelta); + void genRestoreCalleeSavedRegistersHelp(CONSTREF_AllRegsMask regsToRestoreMask, + int lowestCalleeSavedOffset, + int spDelta); void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed); @@ -361,9 +368,9 @@ class CodeGen final : public CodeGenInterface void genStackProbe(ssize_t frameSize, regNumber rOffset, regNumber rLimit, regNumber rPageSize); #endif - void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn); + void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskGpr maskArgRegsLiveIn); - void genPoisonFrame(regMaskTP bbRegLiveIn); + void genPoisonFrame(regMaskGpr bbRegLiveIn); #if defined(TARGET_ARM) @@ -372,11 +379,9 @@ class CodeGen final : public CodeGenInterface bool genStackPointerAdjustment(ssize_t spAdjustment, regNumber tmpReg); - void genPushFltRegs(regMaskTP regMask); - void genPopFltRegs(regMaskTP regMask); - regMaskTP genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat); - - regMaskTP genJmpCallArgMask(); + void genPushFltRegs(regMaskFloat regMask); + void genPopFltRegs(regMaskFloat regMask); + regMaskGpr genStackAllocRegisterMask(unsigned frameSize, regMaskFloat maskCalleeSavedFloat); void genFreeLclFrame(unsigned frameSize, /* IN OUT */ bool* pUnwindStarted); @@ -393,11 +398,12 @@ class CodeGen final : public CodeGenInterface // same. struct FuncletFrameInfoDsc { - regMaskTP fiSaveRegs; // Set of registers saved in the funclet prolog (includes LR) - unsigned fiFunctionCallerSPtoFPdelta; // Delta between caller SP and the frame pointer - unsigned fiSpDelta; // Stack pointer delta - unsigned fiPSP_slot_SP_offset; // PSP slot offset from SP - int fiPSP_slot_CallerSP_offset; // PSP slot offset from Caller SP + regMaskGpr fiSaveGprRegs; // Set of GPR registers saved in the funclet prolog (includes LR) + regMaskFloat fiSaveFloatRegs; // Set of Float registers saved in the funclet prolog (includes LR) + unsigned fiFunctionCallerSPtoFPdelta; // Delta between caller SP and the frame pointer + unsigned fiSpDelta; // Stack pointer delta + unsigned fiPSP_slot_SP_offset; // PSP slot offset from SP + int fiPSP_slot_CallerSP_offset; // PSP slot offset from Caller SP }; FuncletFrameInfoDsc genFuncletInfo; @@ -409,7 +415,12 @@ class CodeGen final : public CodeGenInterface // same. struct FuncletFrameInfoDsc { - regMaskTP fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes LR) + regMaskGpr fiSaveGprRegs; // Set of callee-saved GPR registers saved in the funclet prolog (includes LR) + regMaskFloat fiSaveFloatRegs; // Set of callee-saved float registers saved in the funclet prolog (includes LR) +#ifdef FEATURE_MASKED_HW_INTRINSICS + regMaskPredicate fiSavePredicateRegs; // Set of callee-saved predicate registers saved in the funclet prolog + // (includes LR) +#endif int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function // (negative) int fiSP_to_FPLR_save_delta; // FP/LR register save offset from SP (positive) @@ -444,7 +455,7 @@ class CodeGen final : public CodeGenInterface // and used by all funclet prologs and epilogs, which must all be the same. struct FuncletFrameInfoDsc { - regMaskTP fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes RA) + regMaskMixed fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes RA) int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function // (negative) int fiSP_to_CalleeSaved_delta; // CalleeSaved register save offset from SP (positive) @@ -463,7 +474,7 @@ class CodeGen final : public CodeGenInterface // and used by all funclet prologs and epilogs, which must all be the same. struct FuncletFrameInfoDsc { - regMaskTP fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes RA) + regMaskMixed fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes RA) int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function // (negative) int fiSP_to_CalleeSaved_delta; // CalleeSaved register save offset from SP (positive) @@ -485,7 +496,7 @@ class CodeGen final : public CodeGenInterface #endif // TARGET_XARCH - void genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& initDblRegs, const regNumber& initReg); + void genZeroInitFltRegs(const regMaskFloat& initFltRegs, const regMaskFloat& initDblRegs, const regNumber& initReg); regNumber genGetZeroReg(regNumber initReg, bool* pInitRegZeroed); @@ -533,7 +544,7 @@ class CodeGen final : public CodeGenInterface // #if defined(TARGET_ARM) - bool genCanUsePopToReturn(regMaskTP maskPopRegsInt, bool jmpEpilog); + bool genCanUsePopToReturn(bool jmpEpilog); #endif #if defined(TARGET_ARM64) @@ -545,7 +556,7 @@ class CodeGen final : public CodeGenInterface void genPopCalleeSavedRegisters(bool jmpEpilog = false); #if defined(TARGET_XARCH) - unsigned genPopCalleeSavedRegistersFromMask(regMaskTP rsPopRegs); + unsigned genPopCalleeSavedRegistersFromMask(regMaskGpr rsPopRegs); #endif // !defined(TARGET_XARCH) #endif // !defined(TARGET_ARM64) @@ -671,10 +682,10 @@ class CodeGen final : public CodeGenInterface // //------------------------------------------------------------------------- - void genSinglePush(); - void genSinglePop(); - regMaskTP genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs); - void genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs); + void genSinglePush(); + void genSinglePop(); + regMaskGpr genPushRegs(regMaskGpr regs, regMaskGpr* byrefRegs, regMaskGpr* noRefRegs); + void genPopRegs(regMaskGpr regs, regMaskGpr byrefRegs, regMaskGpr noRefRegs); /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 2c010f116a2657..63287bacebcba8 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -1641,7 +1641,7 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, else { GetEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, callTargetReg, (ssize_t)pAddr); - regSet.verifyRegUsed(callTargetReg); + regSet.verifyGprRegUsed(callTargetReg); } GetEmitter()->emitIns_Call(emitter::EC_INDIR_R, compiler->eeFindHelper(helper), @@ -1663,7 +1663,7 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, ); } - regSet.verifyRegistersUsed(RBM_CALLEE_TRASH); + regSet.verifyRegistersUsed(compiler->AllRegsMask_CALLEE_TRASH); } #ifdef PROFILING_SUPPORTED @@ -1692,14 +1692,14 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) // On Arm arguments are prespilled on stack, which frees r0-r3. // For generating Enter callout we would need two registers and one of them has to be r0 to pass profiler handle. // The call target register could be any free register. - regNumber argReg = REG_PROFILER_ENTER_ARG; - regMaskTP argRegMask = genRegMask(argReg); + regNumber argReg = REG_PROFILER_ENTER_ARG; + regMaskGpr argRegMask = genRegMask(argReg); assert((regSet.rsMaskPreSpillRegArg & argRegMask) != 0); if (compiler->compProfilerMethHndIndirected) { GetEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd); - regSet.verifyRegUsed(argReg); + regSet.verifyGprRegUsed(argReg); } else { @@ -1801,7 +1801,7 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) // profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract. GetEmitter()->emitIns_Mov(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_R0, /* canSkip */ false); genTransferRegGCState(REG_PROFILER_RET_SCRATCH, REG_R0); - regSet.verifyRegUsed(REG_PROFILER_RET_SCRATCH); + regSet.verifyGprRegUsed(REG_PROFILER_RET_SCRATCH); } if (compiler->compProfilerMethHndIndirected) @@ -1813,8 +1813,8 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_R0, (ssize_t)compiler->compProfilerMethHnd); } - gcInfo.gcMarkRegSetNpt(RBM_R0); - regSet.verifyRegUsed(REG_R0); + gcInfo.gcMarkGprRegNpt(REG_R0); + regSet.verifyGprRegUsed(REG_R0); genEmitHelperCall(helper, 0, // argSize @@ -1825,7 +1825,9 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) { GetEmitter()->emitIns_Mov(INS_mov, attr, REG_R0, REG_PROFILER_RET_SCRATCH, /* canSkip */ false); genTransferRegGCState(REG_R0, REG_PROFILER_RET_SCRATCH); - gcInfo.gcMarkRegSetNpt(RBM_PROFILER_RET_SCRATCH); + assert(compiler->IsGprRegMask(compiler->AllRegsMask_PROFILER_RET_SCRATCH.gprRegs())); + assert(compiler->AllRegsMask_PROFILER_RET_SCRATCH.floatRegs(compiler) == RBM_NONE); + gcInfo.gcMarkRegSetNpt(compiler->AllRegsMask_PROFILER_RET_SCRATCH.gprRegs()); } } @@ -1872,7 +1874,10 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData) // Return value: // None // -void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn) +void CodeGen::genAllocLclFrame(unsigned frameSize, + regNumber initReg, + bool* pInitRegZeroed, + regMaskGpr maskArgRegsLiveIn) { assert(compiler->compGeneratingProlog); @@ -1908,9 +1913,9 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni genInstrWithConstant(INS_sub, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, frameSize, INS_FLAGS_DONT_CARE, REG_STACK_PROBE_HELPER_ARG); - regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG); + regSet.verifyGprRegUsed(REG_STACK_PROBE_HELPER_ARG); genEmitHelperCall(CORINFO_HELP_STACK_PROBE, 0, EA_UNKNOWN, REG_STACK_PROBE_HELPER_CALL_TARGET); - regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_CALL_TARGET); + regSet.verifyGprRegUsed(REG_STACK_PROBE_HELPER_CALL_TARGET); compiler->unwindPadding(); GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG, /* canSkip */ false); @@ -1924,15 +1929,15 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni compiler->unwindAllocStack(frameSize); } -void CodeGen::genPushFltRegs(regMaskTP regMask) +void CodeGen::genPushFltRegs(regMaskFloat regMask) { - assert(regMask != 0); // Don't call uness we have some registers to push - assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask + assert(regMask != 0); // Don't call unless we have some registers to push + assert(compiler->IsFloatRegMask(regMask)); // Only floating point registers should be in regMask regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask)); int slots = genCountBits(regMask); // regMask should be contiguously set - regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set + regMaskFloat tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set assert((tmpMask & (tmpMask - 1)) == 0); assert(lowReg == REG_F16); // Currently we expect to start at F16 in the unwind codes @@ -1943,15 +1948,15 @@ void CodeGen::genPushFltRegs(regMaskTP regMask) GetEmitter()->emitIns_R_I(INS_vpush, EA_8BYTE, lowReg, slots / 2); } -void CodeGen::genPopFltRegs(regMaskTP regMask) +void CodeGen::genPopFltRegs(regMaskFloat regMask) { - assert(regMask != 0); // Don't call uness we have some registers to pop - assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask + assert(regMask != 0); // Don't call uness we have some registers to pop + assert(compiler->IsFloatRegMask(regMask)); // Only floasting point registers should be in regMask regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask)); int slots = genCountBits(regMask); // regMask should be contiguously set - regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set + regMaskFloat tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set assert((tmpMask & (tmpMask - 1)) == 0); // Our calling convention requires that we only use vpop for TYP_DOUBLE registers @@ -2081,7 +2086,7 @@ void CodeGen::genMov32RelocatableImmediate(emitAttr size, BYTE* addr, regNumber * instead of using "sub sp" / "add sp". Returns RBM_NONE if either frame size * is zero, or if we should use "sub sp" / "add sp" instead of push/pop. */ -regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat) +regMaskGpr CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskFloat maskCalleeSavedFloat) { assert(compiler->compGeneratingProlog || compiler->compGeneratingEpilog); @@ -2090,6 +2095,8 @@ regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskC if (maskCalleeSavedFloat != RBM_NONE) return RBM_NONE; + assert(compiler->IsFloatRegMask(maskCalleeSavedFloat)); + // Allocate space for small frames by pushing extra registers. It generates smaller and faster code // that extra sub sp,XXX/add sp,XXX. // R0 and R1 may be used by return value. Keep things simple and just skip the optimization @@ -2138,7 +2145,7 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) } } -bool CodeGen::genCanUsePopToReturn(regMaskTP maskPopRegsInt, bool jmpEpilog) +bool CodeGen::genCanUsePopToReturn(bool jmpEpilog) { assert(compiler->compGeneratingEpilog); @@ -2152,9 +2159,8 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) { assert(compiler->compGeneratingEpilog); - regMaskTP maskPopRegs = regSet.rsGetModifiedCalleeSavedRegsMask(); - regMaskTP maskPopRegsFloat = maskPopRegs & RBM_ALLFLOAT; - regMaskTP maskPopRegsInt = maskPopRegs & ~maskPopRegsFloat; + regMaskFloat maskPopRegsFloat = regSet.rsGetModifiedCalleeSavedRegsMask().GetGprFloatCombinedMask() & RBM_ALLFLOAT; + regMaskGpr maskPopRegsInt = regSet.rsGetModifiedCalleeSavedRegsMask().gprRegs(); // First, pop float registers @@ -2168,7 +2174,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) if (!jmpEpilog) { - regMaskTP maskStackAlloc = genStackAllocRegisterMask(compiler->compLclFrameSize, maskPopRegsFloat); + regMaskGpr maskStackAlloc = genStackAllocRegisterMask(compiler->compLclFrameSize, maskPopRegsFloat); maskPopRegsInt |= maskStackAlloc; } @@ -2178,7 +2184,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) maskPopRegsInt |= RBM_FPBASE; } - if (genCanUsePopToReturn(maskPopRegsInt, jmpEpilog)) + if (genCanUsePopToReturn(jmpEpilog)) { maskPopRegsInt |= RBM_PC; // Record the fact that we use a pop to the PC to perform the return @@ -2313,10 +2319,10 @@ void CodeGen::genFuncletProlog(BasicBlock* block) compiler->unwindBegProlog(); - regMaskTP maskPushRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT; - regMaskTP maskPushRegsInt = genFuncletInfo.fiSaveRegs & ~maskPushRegsFloat; + regMaskFloat maskPushRegsFloat = genFuncletInfo.fiSaveFloatRegs; + regMaskGpr maskPushRegsInt = genFuncletInfo.fiSaveGprRegs; - regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPushRegsFloat); + regMaskGpr maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPushRegsFloat); maskPushRegsInt |= maskStackAlloc; assert(FitsIn(maskPushRegsInt)); @@ -2331,7 +2337,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) bool isFilter = (block->bbCatchTyp == BBCT_FILTER); - regMaskTP maskArgRegsLiveIn; + regMaskGpr maskArgRegsLiveIn; if (isFilter) { maskArgRegsLiveIn = RBM_R0 | RBM_R1; @@ -2367,7 +2373,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // This is the first block of a filter GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, REG_R1, REG_R1, genFuncletInfo.fiPSP_slot_CallerSP_offset); - regSet.verifyRegUsed(REG_R1); + regSet.verifyGprRegUsed(REG_R1); GetEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_R1, REG_SPBASE, genFuncletInfo.fiPSP_slot_SP_offset); GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_FPBASE, REG_R1, genFuncletInfo.fiFunctionCallerSPtoFPdelta); @@ -2377,7 +2383,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // This is a non-filter funclet GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE, genFuncletInfo.fiFunctionCallerSPtoFPdelta); - regSet.verifyRegUsed(REG_R3); + regSet.verifyGprRegUsed(REG_R3); GetEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_R3, REG_SPBASE, genFuncletInfo.fiPSP_slot_SP_offset); } } @@ -2407,12 +2413,12 @@ void CodeGen::genFuncletEpilog() bool unwindStarted = false; /* The saved regs info saves the LR register. We need to pop the PC register to return */ - assert(genFuncletInfo.fiSaveRegs & RBM_LR); + assert(genFuncletInfo.fiSaveGprRegs & RBM_LR); - regMaskTP maskPopRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT; - regMaskTP maskPopRegsInt = genFuncletInfo.fiSaveRegs & ~maskPopRegsFloat; + regMaskFloat maskPopRegsFloat = genFuncletInfo.fiSaveFloatRegs; + regMaskGpr maskPopRegsInt = genFuncletInfo.fiSaveGprRegs; - regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPopRegsFloat); + regMaskGpr maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPopRegsFloat); maskPopRegsInt |= maskStackAlloc; if (maskStackAlloc == RBM_NONE) @@ -2465,14 +2471,15 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() // of adding the number of callee-saved regs to CallerSP, we add 1 for lr and 1 for r11 // (plus the "pre spill regs"). Note that we assume r12 and r13 aren't saved // (also assumed in genFnProlog()). - assert((regSet.rsMaskCalleeSaved & (RBM_R12 | RBM_R13)) == 0); + assert((regSet.rsGprMaskCalleeSaved & (RBM_R12 | RBM_R13)) == 0); unsigned preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES; genFuncletInfo.fiFunctionCallerSPtoFPdelta = preSpillRegArgSize + 2 * REGSIZE_BYTES; - regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved; - unsigned saveRegsCount = genCountBits(rsMaskSaveRegs); - unsigned saveRegsSize = saveRegsCount * REGSIZE_BYTES; // bytes of regs we're saving - unsigned saveSizeWithPSP = saveRegsSize + REGSIZE_BYTES /* PSP sym */; + regMaskGpr rsGprMaskSaveRegs = regSet.rsGprMaskCalleeSaved; + regMaskFloat rsFloatMaskSaveRegs = regSet.rsFloatMaskCalleeSaved; + unsigned saveRegsCount = genCountBits(rsGprMaskSaveRegs) + genCountBits(rsFloatMaskSaveRegs); + unsigned saveRegsSize = saveRegsCount * REGSIZE_BYTES; // bytes of regs we're saving + unsigned saveSizeWithPSP = saveRegsSize + REGSIZE_BYTES /* PSP sym */; if (compiler->lvaMonAcquired != BAD_VAR_NUM) { saveSizeWithPSP += TARGET_POINTER_SIZE; @@ -2490,7 +2497,8 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() /* Now save it for future use */ - genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; + genFuncletInfo.fiSaveGprRegs = rsGprMaskSaveRegs; + genFuncletInfo.fiSaveFloatRegs = rsFloatMaskSaveRegs; genFuncletInfo.fiSpDelta = spDelta; genFuncletInfo.fiPSP_slot_SP_offset = PSP_slot_SP_offset; genFuncletInfo.fiPSP_slot_CallerSP_offset = PSP_slot_CallerSP_offset; @@ -2502,7 +2510,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() printf("Funclet prolog / epilog info\n"); printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunctionCallerSPtoFPdelta); printf(" Save regs: "); - dspRegMask(rsMaskSaveRegs); + dspRegMask(AllRegsMask(rsGprMaskSaveRegs, rsFloatMaskSaveRegs)); printf("\n"); printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta); printf(" PSP slot SP offset: %d\n", genFuncletInfo.fiPSP_slot_SP_offset); @@ -2623,11 +2631,11 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu // // str rZero1,[rAddr] // When cnt is odd - regNumber rAddr; - regNumber rCnt = REG_NA; // Invalid - regMaskTP regMask; + regNumber rAddr; + regNumber rCnt = REG_NA; // Invalid + regMaskGpr regMask; - regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers + regMaskGpr availMask = regSet.rsGetModifiedGprRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers availMask &= ~intRegState.rsCalleeRegArgMaskLiveIn; // Remove all of the incoming argument registers as they are // currently live availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index cd1b1558d93e64..11c468fb6f3322 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -36,16 +36,19 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) { assert(compiler->compGeneratingEpilog); - regMaskTP rsRestoreRegs = regSet.rsGetModifiedCalleeSavedRegsMask(); + regMaskFloat rsRestoreFloatRegs = + regSet.rsGetModifiedCalleeSavedRegsMask().GetGprFloatCombinedMask() & RBM_ALLFLOAT; + regMaskGpr rsRestoreGprRegs = regSet.rsGetModifiedCalleeSavedRegsMask().gprRegs(); +#ifdef FEATURE_MASKED_HW_INTRINSICS + regMaskPredicate rsRestorePredicateRegs = regSet.rsGetModifiedPredicateRegsMask() & RBM_MSK_CALLEE_SAVED; +#endif if (isFramePointerUsed()) { - rsRestoreRegs |= RBM_FPBASE; + rsRestoreGprRegs |= RBM_FPBASE; } - rsRestoreRegs |= RBM_LR; // We must save/restore the return address (in the LR register) - - regMaskTP regsToRestoreMask = rsRestoreRegs; + rsRestoreGprRegs |= RBM_LR; // We must save/restore the return address (in the LR register) const int totalFrameSize = genTotalFrameSize(); @@ -71,7 +74,7 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) compiler->unwindSetFrameReg(REG_FPBASE, 0); } - regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP. + rsRestoreGprRegs &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP. break; } @@ -91,7 +94,7 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta); } - regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP. + rsRestoreGprRegs &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP. break; } @@ -104,7 +107,7 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) JITDUMP(" calleeSaveSpDelta=%d\n", calleeSaveSpDelta); - regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and (hopefully) post-index SP. + rsRestoreGprRegs &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and (hopefully) post-index SP. int remainingFrameSz = totalFrameSize - calleeSaveSpDelta; assert(remainingFrameSz > 0); @@ -203,7 +206,13 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) } JITDUMP(" calleeSaveSpOffset=%d, calleeSaveSpDelta=%d\n", calleeSaveSpOffset, calleeSaveSpDelta); - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSpOffset, calleeSaveSpDelta); + genRestoreCalleeSavedRegistersHelp(AllRegsMask(rsRestoreGprRegs, rsRestoreFloatRegs +#ifdef FEATURE_MASKED_HW_INTRINSICS + , + rsRestorePredicateRegs +#endif + ), + calleeSaveSpOffset, calleeSaveSpDelta); switch (frameType) { @@ -718,7 +727,8 @@ void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, reg // no return value; the regStack argument is modified. // // static -void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack* regStack) +void CodeGen::genBuildRegPairsStack(regMaskOnlyOne regsMask, + ArrayStack* regStack MORE_THAN_64_REG_ARG(var_types type)) { assert(regStack != nullptr); assert(regStack->Height() == 0); @@ -727,13 +737,13 @@ void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack* reg while (regsMask != RBM_NONE) { - regNumber reg1 = genFirstRegNumFromMaskAndToggle(regsMask); + regNumber reg1 = genFirstRegNumFromMaskAndToggle(regsMask MORE_THAN_64_REG_ARG(type)); regsCount -= 1; bool isPairSave = false; if (regsCount > 0) { - regNumber reg2 = genFirstRegNumFromMask(regsMask); + regNumber reg2 = genFirstRegNumFromMask(regsMask MORE_THAN_64_REG_ARG(type)); if (reg2 == REG_NEXT(reg1)) { // The JIT doesn't allow saving pair (R28,FP), even though the @@ -820,7 +830,7 @@ void CodeGen::genSetUseSaveNextPairs(ArrayStack* regStack) // Note: Because int and float register type sizes match we can call this function with a mask that includes both. // // static -int CodeGen::genGetSlotSizeForRegsInMask(regMaskTP regsMask) +int CodeGen::genGetSlotSizeForRegsInMask(regMaskOnlyOne regsMask) { assert((regsMask & (RBM_CALLEE_SAVED | RBM_FP | RBM_LR)) == regsMask); // Do not expect anything else. @@ -835,13 +845,18 @@ int CodeGen::genGetSlotSizeForRegsInMask(regMaskTP regsMask) // regsMask - a mask of registers for prolog generation; // spDelta - if non-zero, the amount to add to SP before the first register save (or together with it); // spOffset - the offset from SP that is the beginning of the callee-saved register area; +// type - The type of `regsMask` we are operating on. // -void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset) +void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskOnlyOne regsMask, + int spDelta, + int spOffset MORE_THAN_64_REG_ARG(var_types type)) { + assert(compiler->IsOnlyOneRegMask(regsMask)); + const int slotSize = genGetSlotSizeForRegsInMask(regsMask); ArrayStack regStack(compiler->getAllocator(CMK_Codegen)); - genBuildRegPairsStack(regsMask, ®Stack); + genBuildRegPairsStack(regsMask, ®Stack MORE_THAN_64_REG_ARG(type)); for (int i = 0; i < regStack.Height(); ++i) { @@ -902,12 +917,27 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i // The save set can contain LR in which case LR is saved along with the other callee-saved registers. // But currently Jit doesn't use frames without frame pointer on arm64. // -void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta) +void CodeGen::genSaveCalleeSavedRegistersHelp(CONSTREF_AllRegsMask regsToSaveMask, + int lowestCalleeSavedOffset, + int spDelta) { assert(spDelta <= 0); assert(-spDelta <= STACK_PROBE_BOUNDARY_THRESHOLD_BYTES); - unsigned regsToSaveCount = genCountBits(regsToSaveMask); + regMaskGpr maskSaveRegsInt = regsToSaveMask.gprRegs(); + regMaskFloat maskSaveRegsFloat = regsToSaveMask.floatRegs(compiler); + + assert(compiler->IsGprRegMask(maskSaveRegsInt)); + assert(compiler->IsFloatRegMask(maskSaveRegsFloat)); + + unsigned regsToSaveCount = genCountBits(maskSaveRegsFloat) + genCountBits(maskSaveRegsInt); + +#ifdef FEATURE_MASKED_HW_INTRINSICS + regMaskPredicate maskSaveRegsPredicate = regsToSaveMask.predicateRegs(compiler); + assert(compiler->IsPredicateRegMask(maskSaveRegsPredicate)); + regsToSaveCount += genCountBits(maskSaveRegsPredicate); +#endif + if (regsToSaveCount == 0) { if (spDelta != 0) @@ -924,21 +954,29 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // We also can save FP and LR, even though they are not in RBM_CALLEE_SAVED. assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED | RBM_FP | RBM_LR)); - // Save integer registers at higher addresses than floating-point registers. - - regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT; - regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat; +#ifdef FEATURE_MASKED_HW_INTRINSICS + if (maskSaveRegsPredicate != RBM_NONE) + { + genSaveCalleeSavedRegisterGroup(maskSaveRegsPredicate, spDelta, + lowestCalleeSavedOffset MORE_THAN_64_REG_ARG(TYP_MASK)); + spDelta = 0; + lowestCalleeSavedOffset += genCountBits(maskSaveRegsPredicate) * FPSAVE_REGSIZE_BYTES; + } +#endif // FEATURE_MASKED_HW_INTRINSICS + // Save integer registers at higher addresses than floating-point registers. if (maskSaveRegsFloat != RBM_NONE) { - genSaveCalleeSavedRegisterGroup(maskSaveRegsFloat, spDelta, lowestCalleeSavedOffset); + genSaveCalleeSavedRegisterGroup(maskSaveRegsFloat, spDelta, + lowestCalleeSavedOffset MORE_THAN_64_REG_ARG(TYP_FLOAT)); spDelta = 0; lowestCalleeSavedOffset += genCountBits(maskSaveRegsFloat) * FPSAVE_REGSIZE_BYTES; } if (maskSaveRegsInt != RBM_NONE) { - genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta, lowestCalleeSavedOffset); + genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta, + lowestCalleeSavedOffset MORE_THAN_64_REG_ARG(TYP_INT)); // No need to update spDelta, lowestCalleeSavedOffset since they're not used after this. } } @@ -949,14 +987,17 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // Arguments: // regsMask - a mask of registers for epilog generation; // spDelta - if non-zero, the amount to add to SP after the last register restore (or together with it); -// spOffset - the offset from SP that is the beginning of the callee-saved register area; +// spOffset - the offset from SP that is the beginning of the callee-saved register area;. +// type - The type of `regsMask` we are operating on. // -void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset) +void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskOnlyOne regsMask, + int spDelta, + int spOffset MORE_THAN_64_REG_ARG(var_types type)) { const int slotSize = genGetSlotSizeForRegsInMask(regsMask); ArrayStack regStack(compiler->getAllocator(CMK_Codegen)); - genBuildRegPairsStack(regsMask, ®Stack); + genBuildRegPairsStack(regsMask, ®Stack MORE_THAN_64_REG_ARG(type)); int stackDelta = 0; for (int i = 0; i < regStack.Height(); ++i) @@ -1017,10 +1058,26 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta // Return Value: // None. -void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta) +void CodeGen::genRestoreCalleeSavedRegistersHelp(CONSTREF_AllRegsMask regsToRestoreMask, + int lowestCalleeSavedOffset, + int spDelta) { assert(spDelta >= 0); - unsigned regsToRestoreCount = genCountBits(regsToRestoreMask); + + regMaskGpr maskRestoreRegsInt = regsToRestoreMask.gprRegs(); + regMaskFloat maskRestoreRegsFloat = regsToRestoreMask.floatRegs(compiler); + + assert(compiler->IsGprRegMask(maskRestoreRegsInt)); + assert(compiler->IsFloatRegMask(maskRestoreRegsFloat)); + + unsigned regsToRestoreCount = genCountBits(maskRestoreRegsInt) + genCountBits(maskRestoreRegsFloat); + +#ifdef FEATURE_MASKED_HW_INTRINSICS + regMaskPredicate maskRestoreRegsPredicate = regsToRestoreMask.predicateRegs(compiler); + assert(compiler->IsPredicateRegMask(maskRestoreRegsPredicate)); + regsToRestoreCount += genCountBits(maskRestoreRegsPredicate); +#endif + if (regsToRestoreCount == 0) { if (spDelta != 0) @@ -1043,24 +1100,28 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in // Save integer registers at higher addresses than floating-point registers. - regMaskTP maskRestoreRegsFloat = regsToRestoreMask & RBM_ALLFLOAT; - regMaskTP maskRestoreRegsInt = regsToRestoreMask & ~maskRestoreRegsFloat; - // Restore in the opposite order of saving. - if (maskRestoreRegsInt != RBM_NONE) { int spIntDelta = (maskRestoreRegsFloat != RBM_NONE) ? 0 : spDelta; // should we delay the SP adjustment? - genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, spIntDelta, spOffset); + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, spIntDelta, spOffset MORE_THAN_64_REG_ARG(TYP_INT)); spOffset -= genCountBits(maskRestoreRegsInt) * REGSIZE_BYTES; } if (maskRestoreRegsFloat != RBM_NONE) { // If there is any spDelta, it must be used here. - genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, spDelta, spOffset); - // No need to update spOffset since it's not used after this. + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, spDelta, spOffset MORE_THAN_64_REG_ARG(TYP_FLOAT)); + spOffset -= genCountBits(maskRestoreRegsInt) * FPSAVE_REGSIZE_BYTES; + } + +#ifdef FEATURE_MASKED_HW_INTRINSICS + if (maskRestoreRegsPredicate != RBM_NONE) + { + // TODO: Do we need to adjust spDelta? + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsPredicate, spDelta, spOffset MORE_THAN_64_REG_ARG(TYP_MASK)); } +#endif } // clang-format off @@ -1368,8 +1429,12 @@ void CodeGen::genFuncletProlog(BasicBlock* block) compiler->unwindBegProlog(); - regMaskTP maskSaveRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT; - regMaskTP maskSaveRegsInt = genFuncletInfo.fiSaveRegs & ~maskSaveRegsFloat; + regMaskFloat maskSaveRegsFloat = genFuncletInfo.fiSaveFloatRegs; + regMaskGpr maskSaveRegsInt = genFuncletInfo.fiSaveGprRegs; + +#ifdef FEATURE_MASKED_HW_INTRINSICS + regMaskPredicate maskSaveRegsPredicate = genFuncletInfo.fiSavePredicateRegs; +#endif // Funclets must always save LR and FP, since when we have funclets we must have an FP frame. assert((maskSaveRegsInt & RBM_LR) != 0); @@ -1377,7 +1442,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) bool isFilter = (block->bbCatchTyp == BBCT_FILTER); - regMaskTP maskArgRegsLiveIn; + regMaskGpr maskArgRegsLiveIn; if (isFilter) { maskArgRegsLiveIn = RBM_R0 | RBM_R1; @@ -1486,7 +1551,14 @@ void CodeGen::genFuncletProlog(BasicBlock* block) int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta + genFuncletInfo.fiSpDelta2; // We haven't done the second adjustment of SP yet (if any) - genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, lowestCalleeSavedOffset, 0); + + genSaveCalleeSavedRegistersHelp(AllRegsMask(maskSaveRegsInt, maskSaveRegsFloat +#ifdef FEATURE_MASKED_HW_INTRINSICS + , + maskSaveRegsPredicate +#endif + ), + lowestCalleeSavedOffset, 0); if ((genFuncletInfo.fiFrameType == 3) || (genFuncletInfo.fiFrameType == 5)) { @@ -1522,7 +1594,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // function) genInstrWithConstant(INS_ldr, EA_PTRSIZE, REG_R1, REG_R1, genFuncletInfo.fiCallerSP_to_PSP_slot_delta, REG_R2, false); - regSet.verifyRegUsed(REG_R1); + regSet.verifyGprRegUsed(REG_R1); // Store the PSP value (aka CallerSP) genInstrWithConstant(INS_str, EA_PTRSIZE, REG_R1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, @@ -1539,7 +1611,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // compute the CallerSP, given the frame pointer. x3 is scratch. genInstrWithConstant(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE, -genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_R2, false); - regSet.verifyRegUsed(REG_R3); + regSet.verifyGprRegUsed(REG_R3); genInstrWithConstant(INS_str, EA_PTRSIZE, REG_R3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false); @@ -1572,8 +1644,12 @@ void CodeGen::genFuncletEpilog() unwindStarted = true; } - regMaskTP maskRestoreRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT; - regMaskTP maskRestoreRegsInt = genFuncletInfo.fiSaveRegs & ~maskRestoreRegsFloat; + regMaskFloat maskRestoreRegsFloat = genFuncletInfo.fiSaveFloatRegs; + regMaskGpr maskRestoreRegsInt = genFuncletInfo.fiSaveGprRegs; + +#ifdef FEATURE_MASKED_HW_INTRINSICS + regMaskPredicate maskRestoreRegsPredicate = genFuncletInfo.fiSavePredicateRegs; +#endif // Funclets must always save LR and FP, since when we have funclets we must have an FP frame. assert((maskRestoreRegsInt & RBM_LR) != 0); @@ -1596,13 +1672,18 @@ void CodeGen::genFuncletEpilog() } } - regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat; if ((genFuncletInfo.fiFrameType == 1) || (genFuncletInfo.fiFrameType == 2) || (genFuncletInfo.fiFrameType == 3)) { - regsToRestoreMask &= ~(RBM_LR | RBM_FP); // We restore FP/LR at the end + maskRestoreRegsInt &= ~(RBM_LR | RBM_FP); // We restore FP/LR at the end } int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta + genFuncletInfo.fiSpDelta2; - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, lowestCalleeSavedOffset, 0); + genRestoreCalleeSavedRegistersHelp(AllRegsMask(maskRestoreRegsInt, maskRestoreRegsFloat +#ifdef FEATURE_MASKED_HW_INSTRINSICS + , + maskRestoreRegsPredicate +#endif + ), + lowestCalleeSavedOffset, 0); if (genFuncletInfo.fiFrameType == 1) { @@ -1733,11 +1814,18 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta() - osrPad; - regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved; - assert((rsMaskSaveRegs & RBM_LR) != 0); - assert((rsMaskSaveRegs & RBM_FP) != 0); + regMaskGpr rsMaskSaveGprRegs = regSet.rsGprMaskCalleeSaved; + regMaskFloat rsMaskSaveFloatRegs = regSet.rsFloatMaskCalleeSaved; + regMaskPredicate rsMaskSavePredicateRegs = RBM_NONE; +#ifdef FEATURE_MASKED_HW_INTRINSICS + rsMaskSavePredicateRegs = regSet.rsPredicateMaskCalleeSaved; +#endif + + assert((rsMaskSaveGprRegs & RBM_LR) != 0); + assert((rsMaskSaveGprRegs & RBM_FP) != 0); - unsigned saveRegsCount = genCountBits(rsMaskSaveRegs); + unsigned saveRegsCount = + genCountBits(rsMaskSaveGprRegs) + genCountBits(rsMaskSaveFloatRegs) + genCountBits(rsMaskSavePredicateRegs); unsigned saveRegsPlusPSPSize = saveRegsCount * REGSIZE_BYTES + PSPSize; if (compiler->info.compIsVarArgs) { @@ -1856,7 +1944,11 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() /* Now save it for future use */ - genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; + genFuncletInfo.fiSaveGprRegs = rsMaskSaveGprRegs; + genFuncletInfo.fiSaveFloatRegs = rsMaskSaveFloatRegs; +#ifdef FEATURE_MASKED_HW_INTRINSICS + genFuncletInfo.fiSavePredicateRegs = rsMaskSavePredicateRegs; +#endif genFuncletInfo.fiSP_to_FPLR_save_delta = SP_to_FPLR_save_delta; genFuncletInfo.fiSP_to_PSP_slot_delta = SP_to_PSP_slot_delta; genFuncletInfo.fiSP_to_CalleeSave_delta = SP_to_PSP_slot_delta + PSPSize; @@ -1868,7 +1960,12 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() printf("\n"); printf("Funclet prolog / epilog info\n"); printf(" Save regs: "); - dspRegMask(genFuncletInfo.fiSaveRegs); + dspRegMask(AllRegsMask(genFuncletInfo.fiSaveGprRegs, genFuncletInfo.fiSaveFloatRegs +#ifdef FEATURE_MASKED_HW_INTRINSICS + , + genFuncletInfo.fiSavePredicateRegs +#endif + )); printf("\n"); if (compiler->opts.IsOSR()) { @@ -3951,7 +4048,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) instGen_MemoryBarrier(); - gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); + gcInfo.gcMarkRegSetNpt(addr->gtGetGprRegMask()); } if (targetReg != REG_NA) @@ -4101,7 +4198,7 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) instGen_MemoryBarrier(); - gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); + gcInfo.gcMarkRegSetNpt(addr->gtGetGprRegMask()); } if (varTypeIsSmall(treeNode->TypeGet()) && varTypeIsSigned(treeNode->TypeGet())) @@ -4345,10 +4442,14 @@ void CodeGen::genCodeForSwap(GenTreeOp* tree) // FP swap is not yet implemented (and should have NYI'd in LSRA) assert(!varTypeIsFloating(type1)); - regNumber oldOp1Reg = lcl1->GetRegNum(); - regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg); - regNumber oldOp2Reg = lcl2->GetRegNum(); - regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg); + regNumber oldOp1Reg = lcl1->GetRegNum(); + regNumber oldOp2Reg = lcl2->GetRegNum(); + + regMaskGpr oldOp1RegMask = genRegMask(oldOp1Reg); + regMaskGpr oldOp2RegMask = genRegMask(oldOp2Reg); + + assert(compiler->IsGprRegMask(oldOp1RegMask)); + assert(compiler->IsGprRegMask(oldOp2RegMask)); // We don't call genUpdateVarReg because we don't have a tree node with the new register. varDsc1->SetRegNum(oldOp2Reg); @@ -5109,11 +5210,11 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET; } - regMaskTP callTargetMask = genRegMask(callTargetReg); - regMaskTP callKillSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); + regMaskGpr callTargetMask = genRegMask(callTargetReg); + CONSTREF_AllRegsMask callKillSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); // assert that all registers in callTargetMask are in the callKillSet - noway_assert((callTargetMask & callKillSet) == callTargetMask); + noway_assert((callTargetMask & callKillSet.gprRegs()) == callTargetMask); callTarget = callTargetReg; @@ -5132,7 +5233,7 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, false /* isJump */ ); - regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); + CONSTREF_AllRegsMask killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); regSet.verifyRegistersUsed(killMask); } @@ -5432,6 +5533,7 @@ void CodeGen::genStoreLclTypeSimd12(GenTreeLclVarCommon* treeNode) void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) { assert(compiler->compGeneratingProlog); + assert(genIsValidIntReg(initReg)); if (!compiler->compIsProfilerHookNeeded()) { @@ -5458,8 +5560,9 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) // If initReg is trashed, either because it was an arg to the enter // callback, or because the enter callback itself trashes it, then it needs // to be zero'ed again before using. - if (((RBM_PROFILER_ENTER_TRASH | RBM_PROFILER_ENTER_ARG_FUNC_ID | RBM_PROFILER_ENTER_ARG_CALLER_SP) & - genRegMask(initReg)) != RBM_NONE) + AllRegsMask profileEnterTrash = compiler->AllRegsMask_PROFILER_ENTER_TRASH; + profileEnterTrash.AddRegMaskForType(RBM_PROFILER_ENTER_ARG_FUNC_ID | RBM_PROFILER_ENTER_ARG_CALLER_SP, TYP_INT); + if (profileEnterTrash.IsRegNumInMask(initReg)) { *pInitRegZeroed = false; } @@ -5559,9 +5662,13 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData) // Return value: // None // -void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn) +void CodeGen::genAllocLclFrame(unsigned frameSize, + regNumber initReg, + bool* pInitRegZeroed, + regMaskGpr maskArgRegsLiveIn) { assert(compiler->compGeneratingProlog); + assert(compiler->IsGprRegMask(maskArgRegsLiveIn)); if (frameSize == 0) { @@ -5617,18 +5724,18 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni // until this is complete since the tickles could cause a stack overflow, and we need to be able to crawl // the stack afterward (which means the stack pointer needs to be known). - regMaskTP availMask = RBM_ALLINT & (regSet.rsGetModifiedRegsMask() | ~RBM_INT_CALLEE_SAVED); + regMaskGpr availMask = regSet.rsGetModifiedGprRegsMask() | ~RBM_INT_CALLEE_SAVED; availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers as they are currently live availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg - regNumber rOffset = initReg; - regNumber rLimit; - regMaskTP tempMask; + regNumber rOffset = initReg; + regNumber rLimit; + regMaskGpr tempMask; // We pick the next lowest register number for rLimit noway_assert(availMask != RBM_NONE); tempMask = genFindLowestBit(availMask); - rLimit = genRegNumFromMask(tempMask); + rLimit = genRegNumFromMask(tempMask MORE_THAN_64_REG_ARG(TYP_INT)); // Generate: // diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index a9e2a41f73f945..2dfc9d791a6a99 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -1754,7 +1754,7 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node) GetEmitter()->emitIns_R_R_I(INS_add, emitActualTypeSize(node), node->GetRegNum(), node->GetRegNum(), node->gtElemOffset); - gcInfo.gcMarkRegSetNpt(base->gtGetRegMask()); + gcInfo.gcMarkRegSetNpt(base->gtGetGprRegMask()); genProduceReg(node); } @@ -3266,7 +3266,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) #endif inst_JMP(EJ_ne, loop); - gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); + gcInfo.gcMarkGprRegNpt(dstReg); } } @@ -3394,15 +3394,16 @@ void CodeGen::genCall(GenTreeCall* call) // We should not have GC pointers in killed registers live around the call. // GC info for arg registers were cleared when consuming arg nodes above // and LSRA should ensure it for other trashed registers. - regMaskTP killMask = RBM_CALLEE_TRASH; + AllRegsMask killMask = compiler->AllRegsMask_CALLEE_TRASH; + if (call->IsHelperCall()) { CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd); killMask = compiler->compHelperCallKillSet(helpFunc); } - assert((gcInfo.gcRegGCrefSetCur & killMask) == 0); - assert((gcInfo.gcRegByrefSetCur & killMask) == 0); + assert(!killMask.IsGprMaskPresent(gcInfo.gcRegGCrefSetCur)); + assert(!killMask.IsGprMaskPresent(gcInfo.gcRegByrefSetCur)); #endif var_types returnType = call->TypeGet(); @@ -3477,7 +3478,7 @@ void CodeGen::genCall(GenTreeCall* call) // However, for minopts or debuggable code, we keep it live to support managed return value debugging. if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode) { - gcInfo.gcMarkRegSetNpt(RBM_INTRET); + gcInfo.gcMarkGprRegNpt(REG_INTRET); } } @@ -3537,28 +3538,49 @@ void CodeGen::genCallInstruction(GenTreeCall* call) if (call->IsFastTailCall()) { - regMaskTP trashedByEpilog = RBM_CALLEE_SAVED; + regMaskGpr trashedGprByEpilog = RBM_INT_CALLEE_SAVED; + regMaskFloat trashedFloatByEpilog = RBM_FLT_CALLEE_SAVED; +#ifdef FEATURE_MASKED_HW_INTRINSICS + regMaskPredicate trashedPredicateByEpilog = RBM_MSK_CALLEE_SAVED; +#endif // FEATURE_MASKED_HW_INTRINSICS // The epilog may use and trash REG_GSCOOKIE_TMP_0/1. Make sure we have no // non-standard args that may be trash if this is a tailcall. if (compiler->getNeedsGSSecurityCookie()) { - trashedByEpilog |= genRegMask(REG_GSCOOKIE_TMP_0); - trashedByEpilog |= genRegMask(REG_GSCOOKIE_TMP_1); + trashedGprByEpilog |= genRegMask(REG_GSCOOKIE_TMP_0); + trashedGprByEpilog |= genRegMask(REG_GSCOOKIE_TMP_1); } for (CallArg& arg : call->gtArgs.Args()) { for (unsigned j = 0; j < arg.AbiInfo.NumRegs; j++) { - regNumber reg = arg.AbiInfo.GetRegNum(j); - if ((trashedByEpilog & genRegMask(reg)) != 0) + regNumber reg = arg.AbiInfo.GetRegNum(j); + var_types argType = arg.AbiInfo.ArgType; + if (varTypeUsesIntReg(argType) && ((trashedGprByEpilog & genRegMask(reg)) != 0)) + { + JITDUMP("Tail call node:\n"); + DISPTREE(call); + JITDUMP("Gpr Register used: %s\n", getRegName(reg)); + assert(!"Argument to tailcall may be trashed by epilog"); + } + else if (varTypeUsesFloatArgReg(argType) && ((trashedFloatByEpilog & genRegMask(reg)) != 0)) { JITDUMP("Tail call node:\n"); DISPTREE(call); - JITDUMP("Register used: %s\n", getRegName(reg)); + JITDUMP("Float Register used: %s\n", getRegName(reg)); assert(!"Argument to tailcall may be trashed by epilog"); } +#ifdef FEATURE_MASKED_HW_INTRINSICS + else if (varTypeUsesMaskReg(argType) && ((trashedPredicateByEpilog & genRegMask(reg)) != 0)) + { + JITDUMP("Tail call node:\n"); + DISPTREE(call); + JITDUMP("Mask Register used: %s\n", getRegName(reg)); + assert(!"Argument to tailcall may be trashed by epilog"); + } +#endif // FEATURE_MASKED_HW_INTRINSICS } } } @@ -3786,7 +3808,8 @@ void CodeGen::genJmpMethod(GenTree* jmp) // are not frequent. for (varNum = 0; varNum < compiler->info.compArgsCount; varNum++) { - varDsc = compiler->lvaGetDesc(varNum); + varDsc = compiler->lvaGetDesc(varNum); + regNumber varReg = varDsc->GetRegNum(); if (varDsc->lvPromoted) { @@ -3797,17 +3820,17 @@ void CodeGen::genJmpMethod(GenTree* jmp) } noway_assert(varDsc->lvIsParam); - if (varDsc->lvIsRegArg && (varDsc->GetRegNum() != REG_STK)) + if (varDsc->lvIsRegArg && (varReg != REG_STK)) { // Skip reg args which are already in its right register for jmp call. // If not, we will spill such args to their stack locations. // // If we need to generate a tail call profiler hook, then spill all // arg regs to free them up for the callback. - if (!compiler->compIsProfilerHookNeeded() && (varDsc->GetRegNum() == varDsc->GetArgReg())) + if (!compiler->compIsProfilerHookNeeded() && (varReg == varDsc->GetArgReg())) continue; } - else if (varDsc->GetRegNum() == REG_STK) + else if (varReg == REG_STK) { // Skip args which are currently living in stack. continue; @@ -3816,7 +3839,7 @@ void CodeGen::genJmpMethod(GenTree* jmp) // If we came here it means either a reg argument not in the right register or // a stack argument currently living in a register. In either case the following // assert should hold. - assert(varDsc->GetRegNum() != REG_STK); + assert(varReg != REG_STK); assert(varDsc->IsEnregisterableLcl()); var_types storeType = varDsc->GetStackSlotHomeType(); emitAttr storeSize = emitActualTypeSize(storeType); @@ -3841,9 +3864,8 @@ void CodeGen::genJmpMethod(GenTree* jmp) // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live. // Note that we cannot modify varDsc->GetRegNum() here because another basic block may not be expecting it. // Therefore manually update life of varDsc->GetRegNum(). - regMaskTP tempMask = genRegMask(varDsc->GetRegNum()); - regSet.RemoveMaskVars(tempMask); - gcInfo.gcMarkRegSetNpt(tempMask); + regSet.RemoveMaskVars(varDsc->TypeGet(), genRegMask(varReg)); + gcInfo.gcMarkRegNpt(varReg); if (compiler->lvaIsGCTracked(varDsc)) { VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varNum); @@ -3857,8 +3879,8 @@ void CodeGen::genJmpMethod(GenTree* jmp) #endif // Next move any un-enregistered register arguments back to their register. - regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method. - unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method. + regMaskGpr fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method. + unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method. for (varNum = 0; varNum < compiler->info.compArgsCount; varNum++) { varDsc = compiler->lvaGetDesc(varNum); @@ -3930,7 +3952,7 @@ void CodeGen::genJmpMethod(GenTree* jmp) // expecting it. Therefore manually update life of argReg. Note that GT_JMP marks the end of // the basic block and after which reg life and gc info will be recomputed for the new block // in genCodeForBBList(). - regSet.AddMaskVars(genRegMask(argReg)); + regSet.AddMaskVars(varDsc->TypeGet(), genRegMask(argReg)); gcInfo.gcMarkRegPtrVal(argReg, loadType); if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs)) @@ -3942,7 +3964,7 @@ void CodeGen::genJmpMethod(GenTree* jmp) loadSize = emitActualTypeSize(loadType); GetEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argRegNext, varNum, TARGET_POINTER_SIZE); - regSet.AddMaskVars(genRegMask(argRegNext)); + regSet.AddMaskVars(varDsc->TypeGet(), genRegMask(argRegNext)); gcInfo.gcMarkRegPtrVal(argRegNext, loadType); } @@ -4040,7 +4062,7 @@ void CodeGen::genJmpMethod(GenTree* jmp) GetEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, slotReg, varNum, ofs); } - regSet.AddMaskVars(genRegMask(slotReg)); + regSet.AddMaskVars(varDsc->TypeGet(), genRegMask(slotReg)); gcInfo.gcMarkRegPtrVal(slotReg, loadType); if (genIsValidIntReg(slotReg) && compiler->info.compIsVarArgs) { @@ -4059,7 +4081,7 @@ void CodeGen::genJmpMethod(GenTree* jmp) GetEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0); } - regSet.AddMaskVars(genRegMask(argReg)); + regSet.AddMaskVars(varDsc->TypeGet(), genRegMask(argReg)); gcInfo.gcMarkRegPtrVal(argReg, loadType); if (genIsValidIntReg(argReg) && compiler->info.compIsVarArgs) @@ -4089,14 +4111,14 @@ void CodeGen::genJmpMethod(GenTree* jmp) assert(compiler->info.compIsVarArgs); assert(firstArgVarNum != BAD_VAR_NUM); - regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask; + regMaskGpr remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask; if (remainingIntArgMask != RBM_NONE) { GetEmitter()->emitDisableGC(); for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum) { - regNumber argReg = intArgRegs[argNum]; - regMaskTP argRegMask = genRegMask(argReg); + regNumber argReg = intArgRegs[argNum]; + regMaskGpr argRegMask = genRegMask(argReg); if ((remainingIntArgMask & argRegMask) != 0) { @@ -4893,7 +4915,12 @@ void CodeGen::genPushCalleeSavedRegisters() intRegState.rsCalleeRegArgMaskLiveIn); #endif - regMaskTP rsPushRegs = regSet.rsGetModifiedCalleeSavedRegsMask(); + regMaskGpr rsPushGprRegs = regSet.rsGetModifiedCalleeSavedRegsMask().gprRegs() & RBM_INT_CALLEE_SAVED; + regMaskFloat rsPushFloatRegs = + regSet.rsGetModifiedCalleeSavedRegsMask().GetGprFloatCombinedMask() & RBM_FLT_CALLEE_SAVED; +#ifdef FEATURE_MASKED_HW_INTRINSICS + regMaskPredicate rsPushPredicateRegs = regSet.rsGetModifiedPredicateRegsMask() & RBM_MSK_CALLEE_SAVED; +#endif #if ETW_EBP_FRAMED if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE)) @@ -4904,7 +4931,7 @@ void CodeGen::genPushCalleeSavedRegisters() // On ARM we push the FP (frame-pointer) here along with all other callee saved registers if (isFramePointerUsed()) - rsPushRegs |= RBM_FPBASE; + rsPushGprRegs |= RBM_FPBASE; // // It may be possible to skip pushing/popping lr for leaf methods. However, such optimization would require @@ -4925,24 +4952,38 @@ void CodeGen::genPushCalleeSavedRegisters() // Given the limited benefit from this optimization (<10k for CoreLib NGen image), the extra complexity // is not worth it. // - rsPushRegs |= RBM_LR; // We must save the return address (in the LR register) + rsPushGprRegs |= RBM_LR; // We must save the return address (in the LR register) - regSet.rsMaskCalleeSaved = rsPushRegs; + regSet.rsGprMaskCalleeSaved = rsPushGprRegs; + regSet.rsFloatMaskCalleeSaved = rsPushFloatRegs; +#ifdef FEATURE_MASKED_HW_INTRINSICS + regSet.rsPredicateMaskCalleeSaved = rsPushPredicateRegs; +#endif #ifdef DEBUG - if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs)) + unsigned pushRegsCnt = genCountBits(rsPushGprRegs) + genCountBits(rsPushFloatRegs); +#ifdef FEATURE_MASKED_HW_INTRINSICS + pushRegsCnt += genCountBits(rsPushPredicateRegs); +#endif + + if (compiler->compCalleeRegsPushed != pushRegsCnt) { printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ", - compiler->compCalleeRegsPushed, genCountBits(rsPushRegs)); - dspRegMask(rsPushRegs); + compiler->compCalleeRegsPushed, pushRegsCnt); + dspRegMask(AllRegsMask(rsPushGprRegs, rsPushFloatRegs +#ifdef FEATURE_MASKED_HW_INTRINSICS + , + rsPushPredicateRegs +#endif + )); printf("\n"); - assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs)); + assert(compiler->compCalleeRegsPushed == pushRegsCnt); } #endif // DEBUG #if defined(TARGET_ARM) - regMaskTP maskPushRegsFloat = rsPushRegs & RBM_ALLFLOAT; - regMaskTP maskPushRegsInt = rsPushRegs & ~maskPushRegsFloat; + regMaskFloat maskPushRegsFloat = rsPushFloatRegs; + regMaskGpr maskPushRegsInt = rsPushGprRegs; maskPushRegsInt |= genStackAllocRegisterMask(compiler->compLclFrameSize, maskPushRegsFloat); @@ -5048,17 +5089,13 @@ void CodeGen::genPushCalleeSavedRegisters() int offset; // This will be the starting place for saving the callee-saved registers, in increasing order. - regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT; - regMaskTP maskSaveRegsInt = rsPushRegs & ~maskSaveRegsFloat; + regMaskFloat maskSaveRegsFloat = rsPushFloatRegs; + regMaskGpr maskSaveRegsInt = rsPushGprRegs; #ifdef DEBUG if (verbose) { - printf("Save float regs: "); - dspRegMask(maskSaveRegsFloat); - printf("\n"); - printf("Save int regs: "); - dspRegMask(maskSaveRegsInt); + dspRegMask(AllRegsMask(maskSaveRegsInt, maskSaveRegsFloat)); printf("\n"); } #endif // DEBUG @@ -5303,7 +5340,8 @@ void CodeGen::genPushCalleeSavedRegisters() const int calleeSaveSpOffset = offset; JITDUMP(" offset=%d, calleeSaveSpDelta=%d\n", offset, calleeSaveSpDelta); - genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSpDelta); + + genSaveCalleeSavedRegistersHelp(AllRegsMask(maskSaveRegsInt, maskSaveRegsFloat), offset, -calleeSaveSpDelta); offset += genCountBits(maskSaveRegsInt | maskSaveRegsFloat) * REGSIZE_BYTES; @@ -5486,10 +5524,10 @@ void CodeGen::genFnEpilog(BasicBlock* block) dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur); printf(", gcRegGCrefSetCur="); printRegMaskInt(gcInfo.gcRegGCrefSetCur); - GetEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur); + GetEmitter()->emitDispGprRegSet(gcInfo.gcRegGCrefSetCur); printf(", gcRegByrefSetCur="); printRegMaskInt(gcInfo.gcRegByrefSetCur); - GetEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur); + GetEmitter()->emitDispGprRegSet(gcInfo.gcRegByrefSetCur); printf("\n"); } #endif // DEBUG @@ -5540,8 +5578,8 @@ void CodeGen::genFnEpilog(BasicBlock* block) compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0); } - if (jmpEpilog || - genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedFltCalleeSavedRegsMask()) == RBM_NONE) + if (jmpEpilog || genStackAllocRegisterMask(compiler->compLclFrameSize, + regSet.rsGetModifiedFloatRegsMask() & RBM_FLT_CALLEE_SAVED) == RBM_NONE) { genFreeLclFrame(compiler->compLclFrameSize, &unwindStarted); } @@ -5661,7 +5699,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) if (addrInfo.accessType == IAT_PVALUE) { GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0); - regSet.verifyRegUsed(indCallReg); + regSet.verifyGprRegUsed(indCallReg); } break; @@ -5675,7 +5713,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) indCallReg = REG_R12; addr = NULL; - regSet.verifyRegUsed(indCallReg); + regSet.verifyGprRegUsed(indCallReg); break; } diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 1edfb2ea124a72..231e7332d53fcf 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -130,7 +130,7 @@ CodeGen::CodeGen(Compiler* theCompiler) #if defined(TARGET_XARCH) // Shouldn't be used before it is set in genFnProlog() - compiler->compCalleeFPRegsSavedMask = (regMaskTP)-1; + compiler->compCalleeFPRegsSavedMask = (regMaskFloat)-1; #endif // defined(TARGET_XARCH) #endif // DEBUG @@ -484,9 +484,9 @@ void CodeGenInterface::genUpdateLife(VARSET_VALARG_TP newLife) // Return the register mask for the given register variable // inline -regMaskTP CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc) +regMaskOnlyOne CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc) { - regMaskTP regMask = RBM_NONE; + regMaskOnlyOne regMask = RBM_NONE; assert(varDsc->lvIsInReg()); @@ -504,11 +504,11 @@ regMaskTP CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc) // Return the register mask for the given lclVar or regVar tree node // inline -regMaskTP CodeGenInterface::genGetRegMask(GenTree* tree) +regMaskOnlyOne CodeGenInterface::genGetRegMask(GenTree* tree) { assert(tree->gtOper == GT_LCL_VAR); - regMaskTP regMask = RBM_NONE; + regMaskOnlyOne regMask = RBM_NONE; const LclVarDsc* varDsc = compiler->lvaGetDesc(tree->AsLclVarCommon()); if (varDsc->lvPromoted) { @@ -535,7 +535,8 @@ regMaskTP CodeGenInterface::genGetRegMask(GenTree* tree) // inline void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bool isDying DEBUGARG(GenTree* tree)) { - regMaskTP regMask = genGetRegMask(varDsc); + regMaskOnlyOne regMask = genGetRegMask(varDsc); + assert(compiler->IsOnlyOneRegMask(regMask)); #ifdef DEBUG if (compiler->verbose) @@ -554,15 +555,15 @@ void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bo // We'd like to be able to assert the following, however if we are walking // through a qmark/colon tree, we may encounter multiple last-use nodes. // assert((regSet.GetMaskVars() & regMask) == regMask); - regSet.RemoveMaskVars(regMask); + regSet.RemoveMaskVars(varDsc->TypeGet(), regMask); } else { // If this is going live, the register must not have a variable in it, except // in the case of an exception or "spill at single-def" variable, which may be already treated // as live in the register. - assert(varDsc->IsAlwaysAliveInMemory() || ((regSet.GetMaskVars() & regMask) == 0)); - regSet.AddMaskVars(regMask); + assert(varDsc->IsAlwaysAliveInMemory() || ((regSet.GetMaskVars(varDsc->TypeGet()) & regMask) == 0)); + regSet.AddMaskVars(varDsc->TypeGet(), regMask); } } @@ -576,7 +577,7 @@ void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bo // Return Value: // Mask of register kills -- registers whose values are no longer guaranteed to be the same. // -regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) +CONSTREF_AllRegsMask Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) { switch (helper) { @@ -588,19 +589,19 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) // case CORINFO_HELP_ASSIGN_REF: case CORINFO_HELP_CHECKED_ASSIGN_REF: - return RBM_CALLEE_TRASH_WRITEBARRIER; + return AllRegsMask_CALLEE_TRASH_WRITEBARRIER; case CORINFO_HELP_ASSIGN_BYREF: - return RBM_CALLEE_TRASH_WRITEBARRIER_BYREF; + return AllRegsMask_CALLEE_TRASH_WRITEBARRIER_BYREF; case CORINFO_HELP_PROF_FCN_ENTER: - return RBM_PROFILER_ENTER_TRASH; + return AllRegsMask_PROFILER_ENTER_TRASH; case CORINFO_HELP_PROF_FCN_LEAVE: - return RBM_PROFILER_LEAVE_TRASH; + return AllRegsMask_PROFILER_LEAVE_TRASH; case CORINFO_HELP_PROF_FCN_TAILCALL: - return RBM_PROFILER_TAILCALL_TRASH; + return AllRegsMask_PROFILER_TAILCALL_TRASH; #ifdef TARGET_X86 case CORINFO_HELP_ASSIGN_REF_EAX: @@ -616,20 +617,20 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) case CORINFO_HELP_CHECKED_ASSIGN_REF_EBP: case CORINFO_HELP_CHECKED_ASSIGN_REF_ESI: case CORINFO_HELP_CHECKED_ASSIGN_REF_EDI: - return RBM_EDX; + return AllRegsMask_EDX; #endif case CORINFO_HELP_STOP_FOR_GC: - return RBM_STOP_FOR_GC_TRASH; + return AllRegsMask_STOP_FOR_GC_TRASH; case CORINFO_HELP_INIT_PINVOKE_FRAME: - return RBM_INIT_PINVOKE_FRAME_TRASH; + return AllRegsMask_INIT_PINVOKE_FRAME_TRASH; case CORINFO_HELP_VALIDATE_INDIRECT_CALL: - return RBM_VALIDATE_INDIRECT_CALL_TRASH; + return AllRegsMask_VALIDATE_INDIRECT_CALL_TRASH; default: - return RBM_CALLEE_TRASH; + return AllRegsMask_CALLEE_TRASH; } } @@ -707,7 +708,7 @@ void Compiler::compChangeLife(VARSET_VALARG_TP newLife) { // TODO-Cleanup: Move the code from compUpdateLifeVar to genUpdateRegLife that updates the // gc sets - regMaskTP regMask = varDsc->lvRegMask(); + regMaskOnlyOne regMask = varDsc->lvRegMask(); if (isGCRef) { codeGen->gcInfo.gcRegGCrefSetCur &= ~regMask; @@ -752,7 +753,7 @@ void Compiler::compChangeLife(VARSET_VALARG_TP newLife) VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex); } codeGen->genUpdateRegLife(varDsc, true /*isBorn*/, false /*isDying*/ DEBUGARG(nullptr)); - regMaskTP regMask = varDsc->lvRegMask(); + regMaskOnlyOne regMask = varDsc->lvRegMask(); if (isGCRef) { codeGen->gcInfo.gcRegGCrefSetCur |= regMask; @@ -3270,17 +3271,17 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) if ((node->outgoing != nullptr) && (node->copiedReg == REG_NA)) { - var_types copyType = node->outgoing->type; - regMaskTP tempRegCandidates = genGetParameterHomingTempRegisterCandidates(); + var_types copyType = node->outgoing->type; + RegBitSet64 tempRegCandidates = genGetParameterHomingTempRegisterCandidates(); tempRegCandidates &= ~busyRegs; - regMaskTP regTypeMask = varTypeUsesFloatReg(copyType) ? RBM_ALLFLOAT : RBM_ALLINT; - regMaskTP availRegs = tempRegCandidates & regTypeMask; + regMaskOnlyOne regTypeMask = varTypeUsesFloatReg(copyType) ? RBM_ALLFLOAT : RBM_ALLINT; + regMaskOnlyOne availRegs = tempRegCandidates & regTypeMask; // We should have ensured temporary registers are available in // genFinalizeFrame. noway_assert(availRegs != RBM_NONE); - node->copiedReg = genFirstRegNumFromMask(availRegs); + node->copiedReg = genFirstRegNumFromMask(availRegs MORE_THAN_64_REG_ARG(copyType)); busyRegs |= genRegMask(node->copiedReg); instruction ins = ins_Copy(node->reg, copyType); @@ -3359,10 +3360,10 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) // destination register of a parameter, or because a value passed in one of // these registers is still needed. // -regMaskTP CodeGen::genGetParameterHomingTempRegisterCandidates() +RegBitSet64 CodeGen::genGetParameterHomingTempRegisterCandidates() { return RBM_CALLEE_TRASH | intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn | - regSet.rsGetModifiedRegsMask(); + regSet.rsGetModifiedRegsMask().GetGprFloatCombinedMask(); } /***************************************************************************** @@ -3679,7 +3680,7 @@ void CodeGen::genCheckUseBlockInit() if (genUseBlockInit) { - regMaskTP maskCalleeRegArgMask = intRegState.rsCalleeRegArgMaskLiveIn; + regMaskGpr maskCalleeRegArgMask = intRegState.rsCalleeRegArgMaskLiveIn; // If there is a secret stub param, don't count it, as it will no longer // be live when we do block init. @@ -3696,11 +3697,11 @@ void CodeGen::genCheckUseBlockInit() // int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~regSet.rsMaskPreSpillRegs(false)) - 1; if (forceSpillRegCount > 0) - regSet.rsSetRegsModified(RBM_R4); + regSet.rsSetGprRegsModified(RBM_R4); if (forceSpillRegCount > 1) - regSet.rsSetRegsModified(RBM_R5); + regSet.rsSetGprRegsModified(RBM_R5); if (forceSpillRegCount > 2) - regSet.rsSetRegsModified(RBM_R6); + regSet.rsSetGprRegsModified(RBM_R6); #endif // TARGET_ARM } } @@ -3715,9 +3716,13 @@ void CodeGen::genCheckUseBlockInit() * initialized to 0. (Arm Only) Else copies from the integer register which * is slower. */ -void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& initDblRegs, const regNumber& initReg) +void CodeGen::genZeroInitFltRegs(const regMaskFloat& initFltRegs, + const regMaskFloat& initDblRegs, + const regNumber& initReg) { assert(compiler->compGeneratingProlog); + assert(compiler->IsFloatRegMask(initFltRegs)); + assert(compiler->IsFloatRegMask(initDblRegs)); // The first float/double reg that is initialized to 0. So they can be used to // initialize the remaining registers. @@ -3726,7 +3731,7 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& // Iterate through float/double registers and initialize them to 0 or // copy from already initialized register of the same type. - regMaskTP regMask = genRegMask(REG_FP_FIRST); + regMaskFloat regMask = genRegMask(REG_FP_FIRST); for (regNumber reg = REG_FP_FIRST; reg <= REG_FP_LAST; reg = REG_NEXT(reg), regMask <<= 1) { if (regMask & initFltRegs) @@ -4162,8 +4167,8 @@ void CodeGen::genHomeSwiftStructParameters(bool handleStack) if (seg.IsPassedInRegister()) { - RegState* regState = genIsValidFloatReg(seg.GetRegister()) ? &floatRegState : &intRegState; - regMaskTP regs = seg.GetRegisterMask(); + RegState* regState = genIsValidFloatReg(seg.GetRegister()) ? &floatRegState : &intRegState; + regMaskOnlyOne regs = seg.GetRegisterMask(); if ((regState->rsCalleeRegArgMaskLiveIn & regs) != RBM_NONE) { @@ -4462,8 +4467,10 @@ void CodeGen::genReserveProlog(BasicBlock* block) void CodeGen::genReserveEpilog(BasicBlock* block) { - regMaskTP gcrefRegsArg = gcInfo.gcRegGCrefSetCur; - regMaskTP byrefRegsArg = gcInfo.gcRegByrefSetCur; + regMaskGpr gcrefRegsArg = gcInfo.gcRegGCrefSetCur; + regMaskGpr byrefRegsArg = gcInfo.gcRegByrefSetCur; + assert(compiler->IsGprRegMask(gcrefRegsArg)); + assert(compiler->IsGprRegMask(byrefRegsArg)); /* The return value is special-cased: make sure it goes live for the epilog */ @@ -4577,7 +4584,7 @@ void CodeGen::genFinalizeFrame() // registers (ebx, esi, edi). So, we need to make sure all the callee-saved registers // actually get saved. - regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED); + regSet.rsSetGprRegsModified(RBM_INT_CALLEE_SAVED); } #endif // TARGET_X86 @@ -4585,14 +4592,14 @@ void CodeGen::genFinalizeFrame() // Make sure that callee-saved registers used by call to a stack probing helper generated are pushed on stack. if (compiler->compLclFrameSize >= compiler->eeGetPageSize()) { - regSet.rsSetRegsModified(RBM_STACK_PROBE_HELPER_ARG | RBM_STACK_PROBE_HELPER_CALL_TARGET | - RBM_STACK_PROBE_HELPER_TRASH); + regSet.rsSetGprRegsModified(RBM_STACK_PROBE_HELPER_ARG | RBM_STACK_PROBE_HELPER_CALL_TARGET | + RBM_STACK_PROBE_HELPER_TRASH); } // If there are any reserved registers, add them to the modified set. if (regSet.rsMaskResvd != RBM_NONE) { - regSet.rsSetRegsModified(regSet.rsMaskResvd); + regSet.rsSetGprRegsModified(regSet.rsMaskResvd); } #endif // TARGET_ARM @@ -4611,15 +4618,19 @@ void CodeGen::genFinalizeFrame() // We always save FP. noway_assert(isFramePointerUsed()); #if defined(TARGET_AMD64) || defined(TARGET_ARM64) - regMaskTP okRegs = (RBM_CALLEE_TRASH | RBM_FPBASE | RBM_ENC_CALLEE_SAVED); if (RBM_ENC_CALLEE_SAVED != 0) { - regSet.rsSetRegsModified(RBM_ENC_CALLEE_SAVED); + regSet.rsSetGprRegsModified(RBM_ENC_CALLEE_SAVED); } - noway_assert((regSet.rsGetModifiedRegsMask() & ~okRegs) == 0); + noway_assert( + (regSet.rsGetModifiedGprRegsMask() & ~(RBM_INT_CALLEE_TRASH | RBM_FPBASE | RBM_ENC_CALLEE_SAVED)) == 0); + noway_assert((regSet.rsGetModifiedFloatRegsMask() & ~RBM_FLT_CALLEE_TRASH) == 0); +#ifdef FEATURE_MASKED_HW_INTRINSICS + noway_assert((regSet.rsGetModifiedPredicateRegsMask() & ~RBM_MSK_CALLEE_TRASH) == 0); +#endif // FEATURE_MASKED_HW_INTRINSICS #else // !TARGET_AMD64 && !TARGET_ARM64 // On x86 we save all callee saved regs so the saved reg area size is consistent - regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE); + regSet.rsSetGprRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE); #endif // !TARGET_AMD64 && !TARGET_ARM64 } @@ -4627,26 +4638,26 @@ void CodeGen::genFinalizeFrame() if (compiler->compMethodRequiresPInvokeFrame()) { noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame - regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE); + regSet.rsSetGprRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE); } // Parameter homing may need an additional register to handle conflicts if // all callee trash registers are used by parameters. - regMaskTP homingCandidates = genGetParameterHomingTempRegisterCandidates(); + RegBitSet64 homingCandidates = genGetParameterHomingTempRegisterCandidates(); if (((homingCandidates & ~intRegState.rsCalleeRegArgMaskLiveIn) & RBM_ALLINT) == RBM_NONE) { - regMaskTP extraRegMask = RBM_ALLINT & ~homingCandidates; + regMaskGpr extraRegMask = RBM_ALLINT & ~homingCandidates; assert(extraRegMask != RBM_NONE); - regNumber extraReg = genFirstRegNumFromMask(extraRegMask); + regNumber extraReg = genFirstRegNumFromMask(extraRegMask MORE_THAN_64_REG_ARG(TYP_INT)); JITDUMP("No temporary registers are available for integer parameter homing. Adding %s\n", getRegName(extraReg)); regSet.rsSetRegsModified(genRegMask(extraReg)); } if (((homingCandidates & ~floatRegState.rsCalleeRegArgMaskLiveIn) & RBM_ALLFLOAT) == RBM_NONE) { - regMaskTP extraRegMask = RBM_ALLFLOAT & ~homingCandidates; + regMaskFloat extraRegMask = RBM_ALLFLOAT & ~homingCandidates; assert(extraRegMask != RBM_NONE); - regNumber extraReg = genFirstRegNumFromMask(extraRegMask); + regNumber extraReg = genFirstRegNumFromMask(extraRegMask MORE_THAN_64_REG_ARG(TYP_FLOAT)); JITDUMP("No temporary registers are available for float parameter homing. Adding %s\n", getRegName(extraReg)); regSet.rsSetRegsModified(genRegMask(extraReg)); } @@ -4655,7 +4666,7 @@ void CodeGen::genFinalizeFrame() // On Unix x64 we also save R14 and R15 for ELT profiler hook generation. if (compiler->compIsProfilerHookNeeded()) { - regSet.rsSetRegsModified(RBM_PROFILER_ENTER_ARG_0 | RBM_PROFILER_ENTER_ARG_1); + regSet.rsSetGprRegsModified(RBM_PROFILER_ENTER_ARG_0 | RBM_PROFILER_ENTER_ARG_1); } #endif @@ -4669,14 +4680,19 @@ void CodeGen::genFinalizeFrame() noway_assert(!regSet.rsRegsModified(RBM_FPBASE)); #endif - regMaskTP maskCalleeRegsPushed = regSet.rsGetModifiedCalleeSavedRegsMask(); + regMaskFloat maskPushRegsInt = regSet.rsGetModifiedCalleeSavedRegsMask().gprRegs(); + regMaskGpr maskPushRegsFloat = + regSet.rsGetModifiedCalleeSavedRegsMask().GetGprFloatCombinedMask() & RBM_FLT_CALLEE_SAVED; +#ifdef FEATURE_MASKED_HW_INTRINSICS + regMaskPredicate maskPushRegsPredicate = regSet.rsGetModifiedPredicateRegsMask() & RBM_MSK_CALLEE_SAVED; +#endif #ifdef TARGET_ARMARCH if (isFramePointerUsed()) { // For a FP based frame we have to push/pop the FP register // - maskCalleeRegsPushed |= RBM_FPBASE; + maskPushRegsInt |= RBM_FPBASE; // This assert check that we are not using REG_FP // as both the frame pointer and as a codegen register @@ -4686,15 +4702,12 @@ void CodeGen::genFinalizeFrame() // we always push LR. See genPushCalleeSavedRegisters // - maskCalleeRegsPushed |= RBM_LR; + maskPushRegsInt |= RBM_LR; #if defined(TARGET_ARM) // TODO-ARM64-Bug?: enable some variant of this for FP on ARM64? - regMaskTP maskPushRegsFloat = maskCalleeRegsPushed & RBM_ALLFLOAT; - regMaskTP maskPushRegsInt = maskCalleeRegsPushed & ~maskPushRegsFloat; - if ((maskPushRegsFloat != RBM_NONE) || - (compiler->opts.MinOpts() && (regSet.rsMaskResvd & maskCalleeRegsPushed & RBM_OPT_RSVD))) + (compiler->opts.MinOpts() && (regSet.rsMaskResvd & maskPushRegsInt & RBM_OPT_RSVD))) { // Here we try to keep stack double-aligned before the vpush if ((genCountBits(regSet.rsMaskPreSpillRegs(true) | maskPushRegsInt) % 2) != 0) @@ -4707,10 +4720,9 @@ void CodeGen::genFinalizeFrame() if (extraPushedReg < REG_R11) { maskPushRegsInt |= genRegMask(extraPushedReg); - regSet.rsSetRegsModified(genRegMask(extraPushedReg)); + regSet.rsSetGprRegsModified(genRegMask(extraPushedReg)); } } - maskCalleeRegsPushed = maskPushRegsInt | maskPushRegsFloat; } // We currently only expect to push/pop consecutive FP registers @@ -4719,7 +4731,7 @@ void CodeGen::genFinalizeFrame() // if (maskPushRegsFloat != RBM_NONE) { - regMaskTP contiguousMask = genRegMaskFloat(REG_F16); + regMaskFloat contiguousMask = genRegMaskFloat(REG_F16); while (maskPushRegsFloat > contiguousMask) { contiguousMask <<= 2; @@ -4727,10 +4739,10 @@ void CodeGen::genFinalizeFrame() } if (maskPushRegsFloat != contiguousMask) { - regMaskTP maskExtraRegs = contiguousMask - maskPushRegsFloat; + regMaskFloat maskExtraRegs = contiguousMask - maskPushRegsFloat; + maskPushRegsFloat |= maskExtraRegs; + regSet.rsSetFloatRegsModified(maskExtraRegs); maskPushRegsFloat |= maskExtraRegs; - regSet.rsSetRegsModified(maskExtraRegs); - maskCalleeRegsPushed |= maskExtraRegs; } } #endif // TARGET_ARM @@ -4740,8 +4752,8 @@ void CodeGen::genFinalizeFrame() // Compute the count of callee saved float regs saved on stack. // On Amd64 we push only integer regs. Callee saved float (xmm6-xmm31) // regs are stack allocated and preserved in their stack locations. - compiler->compCalleeFPRegsSavedMask = maskCalleeRegsPushed & RBM_FLT_CALLEE_SAVED; - maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED; + compiler->compCalleeFPRegsSavedMask = maskPushRegsFloat & RBM_FLT_CALLEE_SAVED; + maskPushRegsFloat &= ~RBM_FLT_CALLEE_SAVED; #endif // defined(TARGET_XARCH) #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) @@ -4749,7 +4761,7 @@ void CodeGen::genFinalizeFrame() { // For a FP based frame we have to push/pop the FP register // - maskCalleeRegsPushed |= RBM_FPBASE; + maskPushRegsInt |= RBM_FPBASE; // This assert check that we are not using REG_FP // as both the frame pointer and as a codegen register @@ -4758,16 +4770,24 @@ void CodeGen::genFinalizeFrame() } // we always push RA. See genPushCalleeSavedRegisters - maskCalleeRegsPushed |= RBM_RA; + maskPushRegsInt |= RBM_RA; #endif // TARGET_LOONGARCH64 || TARGET_RISCV64 - compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed); + compiler->compCalleeRegsPushed = genCountBits(maskPushRegsInt) + genCountBits(maskPushRegsFloat); +#ifdef FEATURE_MASKED_HW_INTRINSICS + compiler->compCalleeRegsPushed += genCountBits(maskPushRegsPredicate); +#endif #ifdef DEBUG if (verbose) { printf("Callee-saved registers pushed: %d ", compiler->compCalleeRegsPushed); - dspRegMask(maskCalleeRegsPushed); + dspRegMask(AllRegsMask(maskPushRegsInt, maskPushRegsFloat +#ifdef FEATURE_MASKED_HW_INTRINSICS + , + maskPushRegsPredicate +#endif + )); printf("\n"); } #endif // DEBUG @@ -4917,9 +4937,9 @@ void CodeGen::genFnProlog() int GCrefHi = -INT_MAX; bool hasGCRef = false; - regMaskTP initRegs = RBM_NONE; // Registers which must be init'ed. - regMaskTP initFltRegs = RBM_NONE; // FP registers which must be init'ed. - regMaskTP initDblRegs = RBM_NONE; + regMaskGpr initRegs = RBM_NONE; // Registers which must be init'ed. + regMaskFloat initFltRegs = RBM_NONE; // FP registers which must be init'ed. + regMaskFloat initDblRegs = RBM_NONE; unsigned varNum; LclVarDsc* varDsc; @@ -4992,8 +5012,8 @@ void CodeGen::genFnProlog() if (isInReg) { - regNumber regForVar = varDsc->GetRegNum(); - regMaskTP regMask = genRegMask(regForVar); + regNumber regForVar = varDsc->GetRegNum(); + singleRegMask regMask = genRegMask(regForVar); if (!genIsValidFloatReg(regForVar)) { initRegs |= regMask; @@ -5105,9 +5125,9 @@ void CodeGen::genFnProlog() // Track if initReg holds non-zero value. Start conservative and assume it has non-zero value. // If initReg is ever set to zero, this variable is set to true and zero initializing initReg // will be skipped. - bool initRegZeroed = false; - regMaskTP excludeMask = intRegState.rsCalleeRegArgMaskLiveIn; - regMaskTP tempMask; + bool initRegZeroed = false; + regMaskGpr excludeMask = intRegState.rsCalleeRegArgMaskLiveIn; + regMaskGpr tempMask; // We should not use the special PINVOKE registers as the initReg // since they are trashed by the jithelper call to setup the PINVOKE frame @@ -5154,18 +5174,18 @@ void CodeGen::genFnProlog() // We will use one of the registers that we were planning to zero init anyway. // We pick the lowest register number. tempMask = genFindLowestBit(tempMask); - initReg = genRegNumFromMask(tempMask); + initReg = genRegNumFromMask(tempMask MORE_THAN_64_REG_ARG(TYP_INT)); } // Next we prefer to use one of the unused argument registers. // If they aren't available we use one of the caller-saved integer registers. else { - tempMask = regSet.rsGetModifiedRegsMask() & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd; + tempMask = regSet.rsGetModifiedGprRegsMask() & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd; if (tempMask != RBM_NONE) { // We pick the lowest register number tempMask = genFindLowestBit(tempMask); - initReg = genRegNumFromMask(tempMask); + initReg = genRegNumFromMask(tempMask MORE_THAN_64_REG_ARG(TYP_INT)); } } @@ -5343,11 +5363,11 @@ void CodeGen::genFnProlog() //------------------------------------------------------------------------- #if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) - regMaskTP maskStackAlloc = RBM_NONE; + regMaskGpr maskStackAlloc = RBM_NONE; #ifdef TARGET_ARM maskStackAlloc = genStackAllocRegisterMask(compiler->compLclFrameSize + extraFrameSize, - regSet.rsGetModifiedFltCalleeSavedRegsMask()); + regSet.rsGetModifiedFloatRegsMask() & RBM_FLT_CALLEE_SAVED); #endif // TARGET_ARM if (maskStackAlloc == RBM_NONE) @@ -5372,7 +5392,7 @@ void CodeGen::genFnProlog() if (compiler->compLocallocUsed) { GetEmitter()->emitIns_Mov(INS_mov, EA_4BYTE, REG_SAVED_LOCALLOC_SP, REG_SPBASE, /* canSkip */ false); - regSet.verifyRegUsed(REG_SAVED_LOCALLOC_SP); + regSet.verifyGprRegUsed(REG_SAVED_LOCALLOC_SP); compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0); } #endif // TARGET_ARMARCH @@ -5564,7 +5584,7 @@ void CodeGen::genFnProlog() if (initRegs) { - regMaskTP regMask = 0x1; + regMaskGpr regMask = 0x1; for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg), regMask <<= 1) { @@ -5658,7 +5678,7 @@ void CodeGen::genFnProlog() // MOV EAX, assert(compiler->lvaVarargsHandleArg == compiler->info.compArgsCount - 1); GetEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, compiler->lvaVarargsHandleArg, 0); - regSet.verifyRegUsed(REG_EAX); + regSet.verifyGprRegUsed(REG_EAX); // MOV EAX, [EAX] GetEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, REG_EAX, 0); @@ -6086,7 +6106,7 @@ void CodeGen::genSinglePop() // Notes: // This function does not check if the register is marked as used, etc. // -regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs) +regMaskGpr CodeGen::genPushRegs(regMaskGpr regs, regMaskGpr* byrefRegs, regMaskGpr* noRefRegs) { *byrefRegs = RBM_NONE; *noRefRegs = RBM_NONE; @@ -6096,6 +6116,8 @@ regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* return RBM_NONE; } + assert(compiler->IsGprRegMask(regs)); + #if FEATURE_FIXED_OUT_ARGS NYI("Don't call genPushRegs with real regs!"); @@ -6106,11 +6128,11 @@ regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL)); noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL)); - regMaskTP pushedRegs = regs; + regMaskGpr pushedRegs = regs; for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg)) { - regMaskTP regBit = regMaskTP(1) << reg; + regMaskGpr regBit = regMaskGpr(1) << reg; if ((regBit & regs) == RBM_NONE) continue; @@ -6159,13 +6181,14 @@ regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* // Return Value: // None // -void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs) +void CodeGen::genPopRegs(regMaskGpr regs, regMaskGpr byrefRegs, regMaskGpr noRefRegs) { if (regs == RBM_NONE) { return; } + assert(compiler->IsGprRegMask(regs)); #if FEATURE_FIXED_OUT_ARGS NYI("Don't call genPopRegs with real regs!"); @@ -6182,7 +6205,7 @@ void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefReg // Walk the registers in the reverse order as genPushRegs() for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg)) { - regMaskTP regBit = regMaskTP(1) << reg; + regMaskGpr regBit = regMaskGpr(1) << reg; if ((regBit & regs) == RBM_NONE) continue; @@ -7035,7 +7058,7 @@ void CodeGen::genReturn(GenTree* treeNode) if (compiler->compMethodReturnsRetBufAddr()) { - gcInfo.gcMarkRegSetNpt(genRegMask(REG_INTRET)); + gcInfo.gcMarkGprRegNpt(REG_INTRET); } else { @@ -7043,7 +7066,7 @@ void CodeGen::genReturn(GenTree* treeNode) { if (varTypeIsGC(retTypeDesc.GetReturnRegType(i))) { - gcInfo.gcMarkRegSetNpt(genRegMask(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv))); + gcInfo.gcMarkRegNpt(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv)); } } } @@ -7478,30 +7501,37 @@ void CodeGen::genRegCopy(GenTree* treeNode) unsigned regCount = op1->GetMultiRegCount(compiler); assert(regCount <= MAX_MULTIREG_COUNT); - // First set the source registers as busy if they haven't been spilled. - // (Note that this is just for verification that we don't have circular dependencies.) - regMaskTP busyRegs = RBM_NONE; +// First set the source registers as busy if they haven't been spilled. +// (Note that this is just for verification that we don't have circular dependencies.) +#ifdef DEBUG + AllRegsMask busyRegs; for (unsigned i = 0; i < regCount; ++i) { if ((op1->GetRegSpillFlagByIdx(i) & GTF_SPILLED) == 0) { - busyRegs |= genRegMask(op1->GetRegByIndex(i)); + regNumber reg = op1->GetRegByIndex(i); + busyRegs.AddRegNumInMask(reg); } } +#endif // DEBUG + for (unsigned i = 0; i < regCount; ++i) { regNumber sourceReg = op1->GetRegByIndex(i); // genRegCopy will consume the source register, perform any required reloads, // and will return either the register copied to, or the original register if there's no copy. regNumber targetReg = genRegCopy(treeNode, i); + +#ifdef DEBUG + if (targetReg != sourceReg) { - regMaskTP targetRegMask = genRegMask(targetReg); - assert((busyRegs & targetRegMask) == 0); - // Clear sourceReg from the busyRegs, and add targetReg. - busyRegs &= ~genRegMask(sourceReg); + singleRegMask targetRegMask = genRegMask(targetReg); + assert(!busyRegs.IsRegNumInMask(targetReg)); + busyRegs.RemoveRegNumFromMask(sourceReg); } - busyRegs |= genRegMask(targetReg); + busyRegs.AddRegNumInMask(targetReg); +#endif // DEBUG } return; } @@ -7540,7 +7570,7 @@ void CodeGen::genRegCopy(GenTree* treeNode) // The old location is dying genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1)); - gcInfo.gcMarkRegSetNpt(genRegMask(op1->GetRegNum())); + gcInfo.gcMarkRegNpt(op1->GetRegNum()); genUpdateVarReg(varDsc, treeNode); @@ -7607,7 +7637,7 @@ regNumber CodeGen::genRegCopy(GenTree* treeNode, unsigned multiRegIndex) { // The old location is dying genUpdateRegLife(fieldVarDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1)); - gcInfo.gcMarkRegSetNpt(genRegMask(sourceReg)); + gcInfo.gcMarkRegNpt(sourceReg); genUpdateVarReg(fieldVarDsc, treeNode); // Report the home change for this variable @@ -7693,7 +7723,7 @@ unsigned CodeGenInterface::getCurrentStackLevel() const // This function emits code to poison address exposed non-zero-inited local variables. We expect this function // to be called when emitting code for the scratch BB that comes right after the prolog. // The variables are poisoned using 0xcdcdcdcd. -void CodeGen::genPoisonFrame(regMaskTP regLiveIn) +void CodeGen::genPoisonFrame(regMaskGpr regLiveIn) { assert(compiler->compShouldPoisonFrame()); #if defined(TARGET_XARCH) diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index ef87ccca858702..b774c1d41c715a 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -36,9 +36,9 @@ class emitter; struct RegState { - regMaskTP rsCalleeRegArgMaskLiveIn; // mask of register arguments (live on entry to method) - unsigned rsCalleeRegArgCount; // total number of incoming register arguments of this kind (int or float) - bool rsIsFloat; // true for float argument registers, false for integer argument registers + regMaskOnlyOne rsCalleeRegArgMaskLiveIn; // mask of register arguments (live on entry to method) + unsigned rsCalleeRegArgCount; // total number of incoming register arguments of this kind (int or float) + bool rsIsFloat; // true for float argument registers, false for integer argument registers }; //-------------------- CodeGenInterface --------------------------------- @@ -60,31 +60,31 @@ class CodeGenInterface } #if defined(TARGET_AMD64) - regMaskTP rbmAllFloat; - regMaskTP rbmFltCalleeTrash; + regMaskFloat rbmAllFloat; + regMaskFloat rbmFltCalleeTrash; - FORCEINLINE regMaskTP get_RBM_ALLFLOAT() const + FORCEINLINE regMaskFloat get_RBM_ALLFLOAT() const { return this->rbmAllFloat; } - FORCEINLINE regMaskTP get_RBM_FLT_CALLEE_TRASH() const + FORCEINLINE regMaskFloat get_RBM_FLT_CALLEE_TRASH() const { return this->rbmFltCalleeTrash; } #endif // TARGET_AMD64 #if defined(TARGET_XARCH) - regMaskTP rbmAllMask; - regMaskTP rbmMskCalleeTrash; + regMaskPredicate rbmAllMask; + regMaskPredicate rbmMskCalleeTrash; // Call this function after the equivalent fields in Compiler have been initialized. void CopyRegisterInfo(); - FORCEINLINE regMaskTP get_RBM_ALLMASK() const + FORCEINLINE regMaskPredicate get_RBM_ALLMASK() const { return this->rbmAllMask; } - FORCEINLINE regMaskTP get_RBM_MSK_CALLEE_TRASH() const + FORCEINLINE regMaskPredicate get_RBM_MSK_CALLEE_TRASH() const { return this->rbmMskCalleeTrash; } @@ -156,8 +156,8 @@ class CodeGenInterface VARSET_TP genLastLiveSet; // A one element map (genLastLiveSet-> genLastLiveMask) regMaskTP genLastLiveMask; // these two are used in genLiveMask - regMaskTP genGetRegMask(const LclVarDsc* varDsc); - regMaskTP genGetRegMask(GenTree* tree); + regMaskOnlyOne genGetRegMask(const LclVarDsc* varDsc); + regMaskOnlyOne genGetRegMask(GenTree* tree); void genUpdateLife(GenTree* tree); void genUpdateLife(VARSET_VALARG_TP newLife); diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 351ca14942838b..efd21811e81add 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -60,7 +60,7 @@ void CodeGen::genInitializeRegisterState() regNumber reg = varDsc->GetRegNum(); if (genIsValidIntReg(reg)) { - regSet.verifyRegUsed(reg); + regSet.verifyGprRegUsed(reg); } } } @@ -202,9 +202,9 @@ void CodeGen::genCodeForBBlist() // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change // here. That would require handling the changes in recordVarLocationsAtStartOfBB(). - regMaskTP newLiveRegSet = RBM_NONE; - regMaskTP newRegGCrefSet = RBM_NONE; - regMaskTP newRegByrefSet = RBM_NONE; + AllRegsMask newLiveRegSet; + regMaskGpr newRegGCrefSet = RBM_NONE; + regMaskGpr newRegByrefSet = RBM_NONE; #ifdef DEBUG VARSET_TP removedGCVars(VarSetOps::MakeEmpty(compiler)); VARSET_TP addedGCVars(VarSetOps::MakeEmpty(compiler)); @@ -217,14 +217,18 @@ void CodeGen::genCodeForBBlist() if (varDsc->lvIsInReg()) { - newLiveRegSet |= varDsc->lvRegMask(); + regMaskOnlyOne varRegMask = varDsc->lvRegMask(); + assert(compiler->IsOnlyOneRegMask(varRegMask)); + + newLiveRegSet.AddRegMaskForType(varRegMask, varDsc->TypeGet()); + if (varDsc->lvType == TYP_REF) { - newRegGCrefSet |= varDsc->lvRegMask(); + newRegGCrefSet |= varRegMask; } else if (varDsc->lvType == TYP_BYREF) { - newRegByrefSet |= varDsc->lvRegMask(); + newRegByrefSet |= varRegMask; } if (!varDsc->IsAlwaysAliveInMemory()) { @@ -400,7 +404,7 @@ void CodeGen::genCodeForBBlist() // We cannot emit this code in the prolog as it might make the prolog too large. if (compiler->compShouldPoisonFrame() && compiler->fgBBisScratch(block)) { - genPoisonFrame(newLiveRegSet); + genPoisonFrame(newLiveRegSet.gprRegs()); } // Traverse the block in linear order, generating code for each node as we @@ -489,8 +493,8 @@ void CodeGen::genCodeForBBlist() /* Make sure we didn't bungle pointer register tracking */ - regMaskTP ptrRegs = gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur; - regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.GetMaskVars(); + regMaskGpr ptrRegs = gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur; + regMaskGpr nonVarPtrRegs = ptrRegs & ~regSet.GetGprMaskVars(); // If return is a GC-type, clear it. Note that if a common // epilog is generated (genReturnBB) it has a void return @@ -508,14 +512,14 @@ void CodeGen::genCodeForBBlist() if (nonVarPtrRegs) { printf("Regset after " FMT_BB " gcr=", block->bbNum); - printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.GetMaskVars()); - compiler->GetEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.GetMaskVars()); + printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.GetGprMaskVars()); + compiler->GetEmitter()->emitDispGprRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.GetGprMaskVars()); printf(", byr="); - printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.GetMaskVars()); - compiler->GetEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.GetMaskVars()); + printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.GetGprMaskVars()); + compiler->GetEmitter()->emitDispGprRegSet(gcInfo.gcRegByrefSetCur & ~regSet.GetGprMaskVars()); printf(", regVars="); - printRegMaskInt(regSet.GetMaskVars()); - compiler->GetEmitter()->emitDispRegSet(regSet.GetMaskVars()); + printRegMaskInt(regSet.GetGprMaskVars()); + compiler->GetEmitter()->emitDispGprRegSet(regSet.GetGprMaskVars()); printf("\n"); } @@ -955,7 +959,14 @@ void CodeGen::genSpillVar(GenTree* tree) // Remove the live var from the register. genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree)); - gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask()); + if (varTypeUsesIntReg(varDsc)) + { + // TYP_STRUCT are also VTR_INT and can return vector registers. + // Make sure that we pass the register, so Npt will be called + // only if the `reg` is Gpr. + regNumber reg = varDsc->GetRegNum(); + gcInfo.gcMarkRegNpt(reg); + } if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex)) { @@ -1102,7 +1113,7 @@ void CodeGen::genUnspillLocal( } #endif // DEBUG - regSet.AddMaskVars(genGetRegMask(varDsc)); + regSet.AddMaskVars(varDsc->TypeGet(), genGetRegMask(varDsc)); } gcInfo.gcMarkRegPtrVal(regNum, type); @@ -1433,11 +1444,11 @@ regNumber CodeGen::genConsumeReg(GenTree* tree, unsigned multiRegIndex) if (fldVarDsc->GetRegNum() == REG_STK) { // We have loaded this into a register only temporarily - gcInfo.gcMarkRegSetNpt(genRegMask(reg)); + gcInfo.gcMarkRegNpt(reg); } else if (lcl->IsLastUse(multiRegIndex)) { - gcInfo.gcMarkRegSetNpt(genRegMask(fldVarDsc->GetRegNum())); + gcInfo.gcMarkRegNpt(fldVarDsc->GetRegNum()); } } else @@ -1445,7 +1456,7 @@ regNumber CodeGen::genConsumeReg(GenTree* tree, unsigned multiRegIndex) regNumber regAtIndex = tree->GetRegByIndex(multiRegIndex); if (regAtIndex != REG_NA) { - gcInfo.gcMarkRegSetNpt(genRegMask(regAtIndex)); + gcInfo.gcMarkRegNpt(regAtIndex); } } return reg; @@ -1512,11 +1523,11 @@ regNumber CodeGen::genConsumeReg(GenTree* tree) if (varDsc->GetRegNum() == REG_STK) { // We have loaded this into a register only temporarily - gcInfo.gcMarkRegSetNpt(genRegMask(tree->GetRegNum())); + gcInfo.gcMarkRegNpt(tree->GetRegNum()); } else if ((tree->gtFlags & GTF_VAR_DEATH) != 0) { - gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->GetRegNum())); + gcInfo.gcMarkRegNpt(varDsc->GetRegNum()); } } else if (tree->gtSkipReloadOrCopy()->IsMultiRegLclVar()) @@ -1542,17 +1553,17 @@ regNumber CodeGen::genConsumeReg(GenTree* tree) if (fldVarDsc->GetRegNum() == REG_STK) { // We have loaded this into a register only temporarily - gcInfo.gcMarkRegSetNpt(genRegMask(reg)); + gcInfo.gcMarkRegNpt(reg); } else if (lcl->IsLastUse(i)) { - gcInfo.gcMarkRegSetNpt(genRegMask(fldVarDsc->GetRegNum())); + gcInfo.gcMarkRegNpt(fldVarDsc->GetRegNum()); } } } else { - gcInfo.gcMarkRegSetNpt(tree->gtGetRegMask()); + gcInfo.gcMarkRegSetNpt(tree->gtGetGprRegMask()); } genCheckConsumeNode(tree); @@ -1835,7 +1846,7 @@ void CodeGen::genConsumeArgSplitStruct(GenTreePutArgSplit* putArgNode) genUnspillRegIfNeeded(putArgNode); - gcInfo.gcMarkRegSetNpt(putArgNode->gtGetRegMask()); + gcInfo.gcMarkRegSetNpt(putArgNode->gtGetGprRegMask()); genCheckConsumeNode(putArgNode); } @@ -2133,14 +2144,14 @@ void CodeGen::genProduceReg(GenTree* tree) { regNumber reg = tree->GetRegByIndex(i); regSet.rsSpillTree(reg, tree, i); - gcInfo.gcMarkRegSetNpt(genRegMask(reg)); + gcInfo.gcMarkRegNpt(reg); } } } else { regSet.rsSpillTree(tree->GetRegNum(), tree); - gcInfo.gcMarkRegSetNpt(genRegMask(tree->GetRegNum())); + gcInfo.gcMarkRegNpt(tree->GetRegNum()); } tree->gtFlags |= GTF_SPILLED; @@ -2234,8 +2245,8 @@ void CodeGen::genProduceReg(GenTree* tree) // transfer gc/byref status of src reg to dst reg void CodeGen::genTransferRegGCState(regNumber dst, regNumber src) { - regMaskTP srcMask = genRegMask(src); - regMaskTP dstMask = genRegMask(dst); + regMaskOnlyOne srcMask = genRegMask(src); + regMaskOnlyOne dstMask = genRegMask(dst); if (gcInfo.gcRegGCrefSetCur & srcMask) { @@ -2247,7 +2258,7 @@ void CodeGen::genTransferRegGCState(regNumber dst, regNumber src) } else { - gcInfo.gcMarkRegSetNpt(dstMask); + gcInfo.gcMarkRegNpt(dst); } } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 3e5f1a4b38a691..dda69cff0f874c 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -75,7 +75,7 @@ void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed) // mov eax, dword ptr [compiler->gsGlobalSecurityCookieAddr] // mov dword ptr [frame.GSSecurityCookie], eax GetEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_EAX, (ssize_t)compiler->gsGlobalSecurityCookieAddr); - regSet.verifyRegUsed(REG_EAX); + regSet.verifyGprRegUsed(REG_EAX); GetEmitter()->emitIns_S_R(INS_mov, EA_PTRSIZE, REG_EAX, compiler->lvaGSSecurityCookie, 0); if (initReg == REG_EAX) { @@ -127,8 +127,8 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) } } - regNumber regGSCheck; - regMaskTP regMaskGSCheck = RBM_NONE; + regNumber regGSCheck; + regMaskGpr regMaskGSCheck = RBM_NONE; if (!pushReg) { @@ -163,9 +163,9 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) #endif // !TARGET_X86 } - regMaskTP byrefPushedRegs = RBM_NONE; - regMaskTP norefPushedRegs = RBM_NONE; - regMaskTP pushedRegs = RBM_NONE; + regMaskGpr byrefPushedRegs = RBM_NONE; + regMaskGpr norefPushedRegs = RBM_NONE; + regMaskGpr pushedRegs = RBM_NONE; if (compiler->gsGlobalSecurityCookieAddr == nullptr) { @@ -979,7 +979,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode) { emit->emitIns(INS_cdq, size); // the cdq instruction writes RDX, So clear the gcInfo for RDX - gcInfo.gcMarkRegSetNpt(RBM_RDX); + gcInfo.gcMarkGprRegNpt(REG_RDX); } // Perform the 'targetType' (64-bit or 32-bit) divide instruction @@ -2382,8 +2382,12 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData) // Return value: // None // -void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn) +void CodeGen::genAllocLclFrame(unsigned frameSize, + regNumber initReg, + bool* pInitRegZeroed, + regMaskGpr maskArgRegsLiveIn) { + assert(compiler->IsGprRegMask(maskArgRegsLiveIn)); assert(compiler->compGeneratingProlog); if (frameSize == 0) @@ -2427,7 +2431,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni } GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, spOffset); - regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG); + regSet.verifyGprRegUsed(REG_STACK_PROBE_HELPER_ARG); genEmitHelperCall(CORINFO_HELP_STACK_PROBE, 0, EA_UNKNOWN); @@ -2445,7 +2449,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni RBM_NONE); GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, -(int)frameSize); - regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG); + regSet.verifyGprRegUsed(REG_STACK_PROBE_HELPER_ARG); genEmitHelperCall(CORINFO_HELP_STACK_PROBE, 0, EA_UNKNOWN); @@ -3394,7 +3398,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, offsetReg, TARGET_POINTER_SIZE); inst_JMP(EJ_jne, loop); - gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); + gcInfo.gcMarkGprRegNpt(dstReg); } } @@ -4281,8 +4285,8 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) // Clear the gcInfo for RSI and RDI. // While we normally update GC info prior to the last instruction that uses them, // these actually live into the helper call. - gcInfo.gcMarkRegSetNpt(RBM_RSI); - gcInfo.gcMarkRegSetNpt(RBM_RDI); + gcInfo.gcMarkGprRegNpt(REG_RSI); + gcInfo.gcMarkGprRegNpt(REG_RDI); } // generate code do a switch statement based on a table of ip-relative offsets @@ -4429,7 +4433,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* node) GetEmitter()->emitIns_AR_R(INS_cmpxchg, size, tmpReg, addr->GetRegNum(), 0); inst_JMP(EJ_jne, loop); - gcInfo.gcMarkRegSetNpt(genRegMask(addr->GetRegNum())); + gcInfo.gcMarkGprRegNpt(addr->GetRegNum()); genProduceReg(node); } return; @@ -5360,7 +5364,7 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node) GetEmitter()->emitIns_R_ARX(INS_lea, emitTypeSize(node->TypeGet()), dstReg, baseReg, tmpReg, scale, static_cast(node->gtElemOffset)); - gcInfo.gcMarkRegSetNpt(base->gtGetRegMask()); + gcInfo.gcMarkRegSetNpt(base->gtGetGprRegMask()); genProduceReg(node); } @@ -5726,10 +5730,10 @@ void CodeGen::genCodeForSwap(GenTreeOp* tree) // FP swap is not yet implemented (and should have NYI'd in LSRA) assert(varTypeUsesIntReg(type1)); - regNumber oldOp1Reg = lcl1->GetRegNum(); - regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg); - regNumber oldOp2Reg = lcl2->GetRegNum(); - regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg); + regNumber oldOp1Reg = lcl1->GetRegNum(); + regMaskGpr oldOp1RegMask = genRegMask(oldOp1Reg); + regNumber oldOp2Reg = lcl2->GetRegNum(); + regMaskGpr oldOp2RegMask = genRegMask(oldOp2Reg); // We don't call genUpdateVarReg because we don't have a tree node with the new register. varDsc1->SetRegNum(oldOp2Reg); @@ -6026,15 +6030,16 @@ void CodeGen::genCall(GenTreeCall* call) // We should not have GC pointers in killed registers live around the call. // GC info for arg registers were cleared when consuming arg nodes above // and LSRA should ensure it for other trashed registers. - regMaskTP killMask = RBM_CALLEE_TRASH; + AllRegsMask killMask = compiler->AllRegsMask_CALLEE_TRASH; + if (call->IsHelperCall()) { CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd); killMask = compiler->compHelperCallKillSet(helpFunc); } - assert((gcInfo.gcRegGCrefSetCur & killMask) == 0); - assert((gcInfo.gcRegByrefSetCur & killMask) == 0); + assert(!killMask.IsGprMaskPresent(gcInfo.gcRegGCrefSetCur)); + assert(!killMask.IsGprMaskPresent(gcInfo.gcRegByrefSetCur)); #endif var_types returnType = call->TypeGet(); @@ -6115,7 +6120,7 @@ void CodeGen::genCall(GenTreeCall* call) // However, for minopts or debuggable code, we keep it live to support managed return value debugging. if ((call->gtNext == nullptr) && compiler->opts.OptimizationEnabled()) { - gcInfo.gcMarkRegSetNpt(RBM_INTRET); + gcInfo.gcMarkGprRegNpt(REG_INTRET); } #if defined(DEBUG) && defined(TARGET_X86) @@ -6548,9 +6553,12 @@ void CodeGen::genJmpMethod(GenTree* jmp) // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live. // Note that we cannot modify varDsc->GetRegNum() here because another basic block may not be expecting it. // Therefore manually update life of varDsc->GetRegNum(). - regMaskTP tempMask = varDsc->lvRegMask(); - regSet.RemoveMaskVars(tempMask); - gcInfo.gcMarkRegSetNpt(tempMask); + regMaskOnlyOne tempMask = varDsc->lvRegMask(); + regSet.RemoveMaskVars(varDsc->TypeGet(), tempMask); + if (varTypeUsesIntReg(varDsc)) + { + gcInfo.gcMarkRegSetNpt(tempMask); + } if (compiler->lvaIsGCTracked(varDsc)) { #ifdef DEBUG @@ -6575,8 +6583,8 @@ void CodeGen::genJmpMethod(GenTree* jmp) #endif // Next move any un-enregistered register arguments back to their register. - regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method. - unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method. + regMaskGpr fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method. + unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method. for (varNum = 0; varNum < compiler->info.compArgsCount; varNum++) { varDsc = compiler->lvaGetDesc(varNum); @@ -6625,7 +6633,7 @@ void CodeGen::genJmpMethod(GenTree* jmp) if (type0 != TYP_UNKNOWN) { GetEmitter()->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), varDsc->GetArgReg(), varNum, offset0); - regSet.SetMaskVars(regSet.GetMaskVars() | genRegMask(varDsc->GetArgReg())); + regSet.AddMaskVars(varDsc->TypeGet(), genRegMask(varDsc->GetArgReg())); gcInfo.gcMarkRegPtrVal(varDsc->GetArgReg(), type0); } @@ -6633,7 +6641,7 @@ void CodeGen::genJmpMethod(GenTree* jmp) { GetEmitter()->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), varDsc->GetOtherArgReg(), varNum, offset1); - regSet.SetMaskVars(regSet.GetMaskVars() | genRegMask(varDsc->GetOtherArgReg())); + regSet.AddMaskVars(varDsc->TypeGet(), genRegMask(varDsc->GetOtherArgReg())); gcInfo.gcMarkRegPtrVal(varDsc->GetOtherArgReg(), type1); } @@ -6679,7 +6687,7 @@ void CodeGen::genJmpMethod(GenTree* jmp) // expecting it. Therefore manually update life of argReg. Note that GT_JMP marks the end of the // basic block and after which reg life and gc info will be recomputed for the new block in // genCodeForBBList(). - regSet.AddMaskVars(genRegMask(argReg)); + regSet.AddMaskVars(varDsc->TypeGet(), genRegMask(argReg)); gcInfo.gcMarkRegPtrVal(argReg, loadType); if (compiler->lvaIsGCTracked(varDsc)) { @@ -6749,14 +6757,14 @@ void CodeGen::genJmpMethod(GenTree* jmp) assert(compiler->info.compIsVarArgs); assert(firstArgVarNum != BAD_VAR_NUM); - regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask; + regMaskGpr remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask; if (remainingIntArgMask != RBM_NONE) { GetEmitter()->emitDisableGC(); for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum) { - regNumber argReg = intArgRegs[argNum]; - regMaskTP argRegMask = genRegMask(argReg); + regNumber argReg = intArgRegs[argNum]; + regMaskGpr argRegMask = genRegMask(argReg); if ((remainingIntArgMask & argRegMask) != 0) { @@ -8351,7 +8359,9 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk) regNumber simdTmpReg = REG_NA; if (putArgStk->AvailableTempRegCount() != 0) { - regMaskTP rsvdRegs = putArgStk->gtRsvdRegs; + regMaskOnlyOne rsvdRegs = putArgStk->gtRsvdRegs; + assert(compiler->IsOnlyOneRegMask(rsvdRegs)); + if ((rsvdRegs & RBM_ALLINT) != 0) { intTmpReg = putArgStk->GetSingleTempReg(RBM_ALLINT); @@ -9050,10 +9060,10 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, void* addr = nullptr; void* pAddr = nullptr; - emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN; - addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); - regNumber callTarget = REG_NA; - regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); + emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN; + addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); + regNumber callTarget = REG_NA; + CONSTREF_AllRegsMask killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); if (!addr) { @@ -9082,16 +9092,16 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, { // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET. - callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET; - regMaskTP callTargetMask = genRegMask(callTargetReg); - noway_assert((callTargetMask & killMask) == callTargetMask); + callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET; + regMaskGpr callTargetMask = genRegMask(callTargetReg); + noway_assert((callTargetMask & killMask.gprRegs()) == callTargetMask); } else { // The call target must not overwrite any live variable, though it may not be in the // kill set for the call. - regMaskTP callTargetMask = genRegMask(callTargetReg); - noway_assert((callTargetMask & regSet.GetMaskVars()) == RBM_NONE); + regMaskGpr callTargetMask = genRegMask(callTargetReg); + noway_assert((callTargetMask & regSet.GetGprMaskVars()) == RBM_NONE); } #endif @@ -9505,7 +9515,9 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) // If initReg is trashed, either because it was an arg to the enter // callback, or because the enter callback itself trashes it, then it needs // to be zero'ed again before using. - if (((RBM_PROFILER_ENTER_TRASH | RBM_ARG_0 | RBM_ARG_1) & genRegMask(initReg)) != 0) + AllRegsMask profileEnterTrash = compiler->AllRegsMask_PROFILER_ENTER_TRASH; + profileEnterTrash.AddRegMaskForType((RBM_ARG_0 | RBM_ARG_1), TYP_INT); + if (profileEnterTrash.IsRegNumInMask(initReg)) { *pInitRegZeroed = false; } @@ -9544,7 +9556,9 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) // If initReg is trashed, either because it was an arg to the enter // callback, or because the enter callback itself trashes it, then it needs // to be zero'ed again before using. - if (((RBM_PROFILER_ENTER_TRASH | RBM_PROFILER_ENTER_ARG_0 | RBM_PROFILER_ENTER_ARG_1) & genRegMask(initReg)) != 0) + AllRegsMask profileEnterTrash = compiler->AllRegsMask_PROFILER_ENTER_TRASH; + profileEnterTrash.AddRegMaskForType((RBM_PROFILER_ENTER_ARG_0 | RBM_PROFILER_ENTER_ARG_1), TYP_INT); + if (profileEnterTrash.IsRegNumInMask(initReg)) { *pInitRegZeroed = false; } @@ -9584,8 +9598,8 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) // registers that profiler callback kills. if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaGetDesc(compiler->info.compThisArg)->lvIsInReg()) { - regMaskTP thisPtrMask = genRegMask(compiler->lvaGetDesc(compiler->info.compThisArg)->GetRegNum()); - noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == 0); + regNumber thisPtrReg = compiler->lvaGetDesc(compiler->info.compThisArg)->GetRegNum(); + noway_assert(!compiler->AllRegsMask_PROFILER_LEAVE_TRASH.IsRegNumInMask(thisPtrReg)); } // At this point return value is computed and stored in RAX or XMM0. @@ -9699,13 +9713,12 @@ void CodeGen::genOSRRecordTier0CalleeSavedRegistersAndFrame() // Figure out which set of int callee saves was already saved by Tier0. // Emit appropriate unwind. // - PatchpointInfo* const patchpointInfo = compiler->info.compPatchpointInfo; - regMaskTP const tier0CalleeSaves = (regMaskTP)patchpointInfo->CalleeSaveRegisters(); - regMaskTP tier0IntCalleeSaves = tier0CalleeSaves & RBM_OSR_INT_CALLEE_SAVED; + PatchpointInfo* const patchpointInfo = compiler->info.compPatchpointInfo; + regMaskGpr tier0IntCalleeSaves = patchpointInfo->CalleeSaveGprRegisters() & RBM_OSR_INT_CALLEE_SAVED; int const tier0IntCalleeSaveUsedSize = genCountBits(tier0IntCalleeSaves) * REGSIZE_BYTES; JITDUMP("--OSR--- tier0 has already saved "); - JITDUMPEXEC(dspRegMask(tier0IntCalleeSaves)); + JITDUMPEXEC(dspRegMask(tier0IntCalleeSaves, RBM_NONE)); JITDUMP("\n"); // We must account for the Tier0 callee saves. @@ -9723,7 +9736,7 @@ void CodeGen::genOSRRecordTier0CalleeSavedRegistersAndFrame() // for (regNumber reg = REG_INT_LAST; tier0IntCalleeSaves != RBM_NONE; reg = REG_PREV(reg)) { - regMaskTP regBit = genRegMask(reg); + regMaskGpr regBit = genRegMask(reg); if ((regBit & tier0IntCalleeSaves) != 0) { @@ -9768,7 +9781,7 @@ void CodeGen::genOSRSaveRemainingCalleeSavedRegisters() // x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack // here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not // here. - regMaskTP rsPushRegs = regSet.rsGetModifiedOsrIntCalleeSavedRegsMask(); + regMaskGpr rsPushRegs = regSet.rsGetModifiedGprRegsMask() & RBM_OSR_INT_CALLEE_SAVED; #if ETW_EBP_FRAMED if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE)) @@ -9779,19 +9792,18 @@ void CodeGen::genOSRSaveRemainingCalleeSavedRegisters() // Figure out which set of int callee saves still needs saving. // - PatchpointInfo* const patchpointInfo = compiler->info.compPatchpointInfo; - regMaskTP const tier0CalleeSaves = (regMaskTP)patchpointInfo->CalleeSaveRegisters(); - regMaskTP tier0IntCalleeSaves = tier0CalleeSaves & RBM_OSR_INT_CALLEE_SAVED; + PatchpointInfo* const patchpointInfo = compiler->info.compPatchpointInfo; + regMaskGpr tier0IntCalleeSaves = patchpointInfo->CalleeSaveGprRegisters() & RBM_OSR_INT_CALLEE_SAVED; unsigned const tier0IntCalleeSaveUsedSize = genCountBits(tier0IntCalleeSaves) * REGSIZE_BYTES; - regMaskTP const osrIntCalleeSaves = rsPushRegs & RBM_OSR_INT_CALLEE_SAVED; - regMaskTP osrAdditionalIntCalleeSaves = osrIntCalleeSaves & ~tier0IntCalleeSaves; + regMaskGpr const osrIntCalleeSaves = rsPushRegs & RBM_OSR_INT_CALLEE_SAVED; + regMaskGpr osrAdditionalIntCalleeSaves = osrIntCalleeSaves & ~tier0IntCalleeSaves; JITDUMP("---OSR--- int callee saves are "); - JITDUMPEXEC(dspRegMask(osrIntCalleeSaves)); + JITDUMPEXEC(dspRegMask(osrIntCalleeSaves, RBM_NONE)); JITDUMP("; tier0 already saved "); - JITDUMPEXEC(dspRegMask(tier0IntCalleeSaves)); + JITDUMPEXEC(dspRegMask(tier0IntCalleeSaves, RBM_NONE)); JITDUMP("; so only saving "); - JITDUMPEXEC(dspRegMask(osrAdditionalIntCalleeSaves)); + JITDUMPEXEC(dspRegMask(osrAdditionalIntCalleeSaves, RBM_NONE)); JITDUMP("\n"); // These remaining callee saves will be stored in the Tier0 callee save area @@ -9807,14 +9819,14 @@ void CodeGen::genOSRSaveRemainingCalleeSavedRegisters() // The tier0 frame is always an RBP frame, so the OSR method should never need to save RBP. // - assert((tier0CalleeSaves & RBM_FPBASE) == RBM_FPBASE); + assert((tier0IntCalleeSaves & RBM_FPBASE) == RBM_FPBASE); assert((osrAdditionalIntCalleeSaves & RBM_FPBASE) == RBM_NONE); // The OSR method must use MOVs to save additional callee saves. // for (regNumber reg = REG_INT_LAST; osrAdditionalIntCalleeSaves != RBM_NONE; reg = REG_PREV(reg)) { - regMaskTP regBit = genRegMask(reg); + regMaskGpr regBit = genRegMask(reg); if ((regBit & osrAdditionalIntCalleeSaves) != 0) { @@ -9849,7 +9861,7 @@ void CodeGen::genPushCalleeSavedRegisters() // x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack // here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not // here. - regMaskTP rsPushRegs = regSet.rsGetModifiedIntCalleeSavedRegsMask(); + regMaskGpr rsPushRegs = regSet.rsGetModifiedIntCalleeSavedRegsMask(); #if ETW_EBP_FRAMED if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE)) @@ -9869,7 +9881,7 @@ void CodeGen::genPushCalleeSavedRegisters() { printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ", compiler->compCalleeRegsPushed, genCountBits(rsPushRegs)); - dspRegMask(rsPushRegs); + dspRegMask(rsPushRegs, RBM_NONE); printf("\n"); assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs)); } @@ -9879,7 +9891,7 @@ void CodeGen::genPushCalleeSavedRegisters() // and all the other code that expects it to be in this order. for (regNumber reg = REG_INT_LAST; rsPushRegs != RBM_NONE; reg = REG_PREV(reg)) { - regMaskTP regBit = genRegMask(reg); + regMaskGpr regBit = genRegMask(reg); if ((regBit & rsPushRegs) != 0) { @@ -9907,10 +9919,10 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) // if (doesSupersetOfNormalPops) { - regMaskTP rsPopRegs = regSet.rsGetModifiedOsrIntCalleeSavedRegsMask(); - regMaskTP tier0CalleeSaves = - ((regMaskTP)compiler->info.compPatchpointInfo->CalleeSaveRegisters()) & RBM_OSR_INT_CALLEE_SAVED; - regMaskTP additionalCalleeSaves = rsPopRegs & ~tier0CalleeSaves; + regMaskGpr rsPopRegs = regSet.rsGetModifiedGprRegsMask() & RBM_OSR_INT_CALLEE_SAVED; + regMaskGpr tier0CalleeSaves = + ((regMaskGpr)compiler->info.compPatchpointInfo->CalleeSaveGprRegisters()) & RBM_OSR_INT_CALLEE_SAVED; + regMaskGpr additionalCalleeSaves = rsPopRegs & ~tier0CalleeSaves; // Registers saved by the OSR prolog. // @@ -9927,7 +9939,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) // Registers saved by a normal prolog // - regMaskTP rsPopRegs = regSet.rsGetModifiedIntCalleeSavedRegsMask(); + regMaskGpr rsPopRegs = regSet.rsGetModifiedIntCalleeSavedRegsMask(); const unsigned popCount = genPopCalleeSavedRegistersFromMask(rsPopRegs); noway_assert(compiler->compCalleeRegsPushed == popCount); } @@ -9936,8 +9948,10 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) // genPopCalleeSavedRegistersFromMask: pop specified set of callee saves // in the "standard" order // -unsigned CodeGen::genPopCalleeSavedRegistersFromMask(regMaskTP rsPopRegs) +unsigned CodeGen::genPopCalleeSavedRegistersFromMask(regMaskGpr rsPopRegs) { + assert(compiler->IsGprRegMask(rsPopRegs)); + unsigned popCount = 0; if ((rsPopRegs & RBM_EBX) != 0) { @@ -10040,10 +10054,10 @@ void CodeGen::genFnEpilog(BasicBlock* block) dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur); printf(", gcRegGCrefSetCur="); printRegMaskInt(gcInfo.gcRegGCrefSetCur); - GetEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur); + GetEmitter()->emitDispGprRegSet(gcInfo.gcRegGCrefSetCur); printf(", gcRegByrefSetCur="); printRegMaskInt(gcInfo.gcRegByrefSetCur); - GetEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur); + GetEmitter()->emitDispGprRegSet(gcInfo.gcRegByrefSetCur); printf("\n"); } #endif @@ -10112,15 +10126,14 @@ void CodeGen::genFnEpilog(BasicBlock* block) // PatchpointInfo* const patchpointInfo = compiler->info.compPatchpointInfo; - regMaskTP const tier0CalleeSaves = (regMaskTP)patchpointInfo->CalleeSaveRegisters(); - regMaskTP const tier0IntCalleeSaves = tier0CalleeSaves & RBM_OSR_INT_CALLEE_SAVED; - regMaskTP const osrIntCalleeSaves = regSet.rsGetModifiedOsrIntCalleeSavedRegsMask(); - regMaskTP const allIntCalleeSaves = osrIntCalleeSaves | tier0IntCalleeSaves; - unsigned const tier0FrameSize = patchpointInfo->TotalFrameSize() + REGSIZE_BYTES; - unsigned const tier0IntCalleeSaveUsedSize = genCountBits(allIntCalleeSaves) * REGSIZE_BYTES; - unsigned const osrCalleeSaveSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES; - unsigned const osrFramePointerSize = isFramePointerUsed() ? REGSIZE_BYTES : 0; - unsigned const osrAdjust = + regMaskGpr const tier0IntCalleeSaves = patchpointInfo->CalleeSaveGprRegisters() & RBM_OSR_INT_CALLEE_SAVED; + regMaskGpr const osrIntCalleeSaves = regSet.rsGetModifiedGprRegsMask() & RBM_OSR_INT_CALLEE_SAVED; + regMaskGpr const allIntCalleeSaves = osrIntCalleeSaves | tier0IntCalleeSaves; + unsigned const tier0FrameSize = patchpointInfo->TotalFrameSize() + REGSIZE_BYTES; + unsigned const tier0IntCalleeSaveUsedSize = genCountBits(allIntCalleeSaves) * REGSIZE_BYTES; + unsigned const osrCalleeSaveSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES; + unsigned const osrFramePointerSize = isFramePointerUsed() ? REGSIZE_BYTES : 0; + unsigned const osrAdjust = tier0FrameSize - tier0IntCalleeSaveUsedSize + osrCalleeSaveSize + osrFramePointerSize; JITDUMP("OSR epilog adjust factors: tier0 frame %u, tier0 callee saves -%u, osr callee saves %u, osr " @@ -10141,7 +10154,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) if ((frameSize == TARGET_POINTER_SIZE) && !compiler->compJmpOpUsed) { inst_RV(INS_pop, REG_ECX, TYP_I_IMPL); - regSet.verifyRegUsed(REG_ECX); + regSet.verifyGprRegUsed(REG_ECX); } else #endif // TARGET_X86 @@ -10206,7 +10219,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) // lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES] needLea = true; } - else if (!regSet.rsRegsModified(RBM_CALLEE_SAVED)) + else if (!regSet.rsRegsModified(compiler->AllRegsMask_CALLEE_SAVED)) { if (compiler->compLclFrameSize != 0) { @@ -10229,7 +10242,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) { // "pop ecx" will make ESP point to the callee-saved registers inst_RV(INS_pop, REG_ECX, TYP_I_IMPL); - regSet.verifyRegUsed(REG_ECX); + regSet.verifyGprRegUsed(REG_ECX); } #endif // TARGET_X86 else @@ -10368,7 +10381,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) indCallReg = REG_RAX; addr = nullptr; instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr); - regSet.verifyRegUsed(indCallReg); + regSet.verifyGprRegUsed(indCallReg); } } else @@ -10553,7 +10566,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // Callee saved int registers are pushed to stack. genPushCalleeSavedRegisters(); - regMaskTP maskArgRegsLiveIn; + regMaskGpr maskArgRegsLiveIn; if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT)) { maskArgRegsLiveIn = RBM_ARG_0; @@ -10583,7 +10596,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) GetEmitter()->emitIns_R_AR(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_ARG_0, genFuncletInfo.fiPSP_slot_InitialSP_offset); - regSet.verifyRegUsed(REG_FPBASE); + regSet.verifyGprRegUsed(REG_FPBASE); GetEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, genFuncletInfo.fiPSP_slot_InitialSP_offset); @@ -10642,7 +10655,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() assert(isFramePointerUsed()); assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be // finalized - assert(compiler->compCalleeFPRegsSavedMask != (regMaskTP)-1); // The float registers to be preserved is finalized + assert(compiler->compCalleeFPRegsSavedMask != (regMaskFloat)-1); // The float registers to be preserved is finalized // Even though lvaToInitialSPRelativeOffset() depends on compLclFrameSize, // that's ok, because we're figuring out an offset in the parent frame. @@ -11100,10 +11113,10 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu // funclet frames: this will be FuncletInfo.fiSpDelta. void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize) { - regMaskTP regMask = compiler->compCalleeFPRegsSavedMask; + regMaskFloat regMask = compiler->compCalleeFPRegsSavedMask; // Only callee saved floating point registers should be in regMask - assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask); + assert(compiler->IsFloatRegMask(regMask)); if (GetEmitter()->ContainsCallNeedingVzeroupper() && !GetEmitter()->Contains256bitOrMoreAVX()) { @@ -11141,7 +11154,7 @@ void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize) for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg)) { - regMaskTP regBit = genRegMask(reg); + regMaskFloat regBit = genRegMask(reg); if ((regBit & regMask) != 0) { // ABI requires us to preserve lower 128-bits of YMM register. @@ -11163,10 +11176,10 @@ void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize) // funclet frames: this will be FuncletInfo.fiSpDelta. void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize) { - regMaskTP regMask = compiler->compCalleeFPRegsSavedMask; + regMaskFloat regMask = compiler->compCalleeFPRegsSavedMask; // Only callee saved floating point registers should be in regMask - assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask); + assert(compiler->IsFloatRegMask(regMask)); if (GetEmitter()->Contains256bitOrMoreAVX()) { @@ -11215,7 +11228,7 @@ void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize) for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg)) { - regMaskTP regBit = genRegMask(reg); + regMaskFloat regBit = genRegMask(reg); if ((regBit & regMask) != 0) { // ABI requires us to restore lower 128-bits of YMM register. diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index c834a4f6f27810..9a850538649b07 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2019,6 +2019,201 @@ void Compiler::compInit(ArenaAllocator* pAlloc, new (&Metrics, jitstd::placement_t()) JitMetrics(); } +void Compiler::compInitAllRegsMask() +{ +#if defined(TARGET_AMD64) + rbmAllFloat = RBM_ALLFLOAT_INIT; + rbmFltCalleeTrash = RBM_FLT_CALLEE_TRASH_INIT; + cntCalleeTrashFloat = CNT_CALLEE_TRASH_FLOAT_INIT; + + if (canUseEvexEncoding()) + { + rbmAllFloat |= RBM_HIGHFLOAT; + rbmFltCalleeTrash |= RBM_HIGHFLOAT; + cntCalleeTrashFloat += CNT_CALLEE_TRASH_HIGHFLOAT; + } +#endif // TARGET_AMD64 + +#if defined(TARGET_XARCH) + rbmAllMask = RBM_ALLMASK_INIT; + rbmMskCalleeTrash = RBM_MSK_CALLEE_TRASH_INIT; + cntCalleeTrashMask = CNT_CALLEE_TRASH_MASK_INIT; + + if (canUseEvexEncoding()) + { + rbmAllMask |= RBM_ALLMASK_EVEX; + rbmMskCalleeTrash |= RBM_MSK_CALLEE_TRASH_EVEX; + cntCalleeTrashMask += CNT_CALLEE_TRASH_MASK_EVEX; + } +#endif // TARGET_XARCH + + AllRegsMask_NONE = AllRegsMask(); +#ifdef HAS_MORE_THAN_64_REGISTERS + AllRegsMask_CALLEE_SAVED = AllRegsMask(RBM_INT_CALLEE_SAVED, RBM_FLT_CALLEE_SAVED, RBM_MSK_CALLEE_SAVED); + AllRegsMask_CALLEE_TRASH = AllRegsMask(RBM_INT_CALLEE_TRASH, RBM_FLT_CALLEE_TRASH, RBM_MSK_CALLEE_TRASH); +#else + AllRegsMask_CALLEE_SAVED = AllRegsMask(RBM_CALLEE_SAVED); + AllRegsMask_CALLEE_TRASH = AllRegsMask(RBM_CALLEE_TRASH); +#endif + +#if defined(TARGET_X86) + + AllRegsMask_CALLEE_TRASH_NOGC = GprRegsMask(RBM_EDX); + // The registers trashed by profiler enter/leave/tailcall hook + // See vm\i386\asmhelpers.asm for more details. + AllRegsMask_PROFILER_ENTER_TRASH = AllRegsMask_NONE; + AllRegsMask_PROFILER_LEAVE_TRASH = AllRegsMask(); + AllRegsMask_PROFILER_TAILCALL_TRASH = (AllRegsMask_CALLEE_TRASH & GprRegsMask(~RBM_ARG_REGS)); + +#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS + AllRegsMask_CALLEE_TRASH_WRITEBARRIER = GprRegsMask(RBM_EAX | RBM_EDX); +#else // !FEATURE_USE_ASM_GC_WRITE_BARRIERS + AllRegsMask_CALLEE_TRASH_WRITEBARRIER = AllRegsMask_CALLEE_TRASH; +#endif // !FEATURE_USE_ASM_GC_WRITE_BARRIERS + + // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF. + AllRegsMask_CALLEE_GCTRASH_WRITEBARRIER = GprRegsMask(RBM_EDX); + + // Registers killed by CORINFO_HELP_ASSIGN_BYREF. + AllRegsMask_CALLEE_TRASH_WRITEBARRIER_BYREF = GprRegsMask(RBM_ESI | RBM_EDI | RBM_ECX); + + // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_BYREF. + // Note that RDI and RSI are still valid byref pointers after this helper call, despite their value being changed. + AllRegsMask_CALLEE_GCTRASH_WRITEBARRIER_BYREF = GprRegsMask(RBM_ECX); + + // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper + AllRegsMask_STOP_FOR_GC_TRASH = AllRegsMask_CALLEE_TRASH; + + // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. On x86, this helper has a custom calling + // convention that takes EDI as argument (but doesn't trash it), trashes EAX, and returns ESI. + AllRegsMask_INIT_PINVOKE_FRAME_TRASH = GprRegsMask(RBM_PINVOKE_SCRATCH | RBM_PINVOKE_TCB); + + AllRegsMask_VALIDATE_INDIRECT_CALL_TRASH = GprRegsMask(RBM_INT_CALLEE_TRASH & ~RBM_ECX); + AllRegsMask_EDX = GprRegsMask(RBM_EDX); + +#elif defined(TARGET_AMD64) + AllRegsMask_CALLEE_TRASH_NOGC = AllRegsMask_CALLEE_TRASH; + // Registers killed by CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF. + AllRegsMask_CALLEE_TRASH_WRITEBARRIER = AllRegsMask_CALLEE_TRASH_NOGC; + // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF. + AllRegsMask_CALLEE_GCTRASH_WRITEBARRIER = AllRegsMask_CALLEE_TRASH_NOGC; + AllRegsMask_CALLEE_GCTRASH_WRITEBARRIER_BYREF = GprRegsMask(RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF); + AllRegsMask_CALLEE_TRASH_WRITEBARRIER_BYREF = GprRegsMask(RBM_CALLEE_TRASH_WRITEBARRIER_BYREF); + +#ifdef UNIX_AMD64_ABI + + // See vm\amd64\unixasmhelpers.S for more details. + // + // On Unix a struct of size >=9 and <=16 bytes in size is returned in two return registers. + // The return registers could be any two from the set { RAX, RDX, XMM0, XMM1 }. + // STOP_FOR_GC helper preserves all the 4 possible return registers. + AllRegsMask_STOP_FOR_GC_TRASH = + AllRegsMask(RBM_INT_CALLEE_TRASH & ~(RBM_INTRET | RBM_INTRET_1), + (RBM_FLT_CALLEE_TRASH & ~(RBM_FLOATRET | RBM_FLOATRET_1)), RBM_MSK_CALLEE_TRASH); + AllRegsMask_PROFILER_ENTER_TRASH = AllRegsMask((RBM_INT_CALLEE_TRASH & ~RBM_ARG_REGS), + (RBM_FLT_CALLEE_TRASH & ~RBM_FLTARG_REGS), RBM_MSK_CALLEE_TRASH); +#else + // See vm\amd64\asmhelpers.asm for more details. + AllRegsMask_STOP_FOR_GC_TRASH = + AllRegsMask((RBM_INT_CALLEE_TRASH & ~RBM_INTRET), (RBM_FLT_CALLEE_TRASH & ~RBM_FLOATRET), RBM_MSK_CALLEE_TRASH); + AllRegsMask_PROFILER_ENTER_TRASH = AllRegsMask_CALLEE_TRASH; +#endif // UNIX_AMD64_ABI + + AllRegsMask_PROFILER_LEAVE_TRASH = AllRegsMask_STOP_FOR_GC_TRASH; + AllRegsMask_PROFILER_TAILCALL_TRASH = AllRegsMask_PROFILER_LEAVE_TRASH; + + // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. + AllRegsMask_INIT_PINVOKE_FRAME_TRASH = AllRegsMask_CALLEE_TRASH; + AllRegsMask_VALIDATE_INDIRECT_CALL_TRASH = GprRegsMask(RBM_VALIDATE_INDIRECT_CALL_TRASH); + +#elif defined(TARGET_ARM) + + AllRegsMask_CALLEE_TRASH_NOGC = GprRegsMask(RBM_CALLEE_TRASH_NOGC); + AllRegsMask_PROFILER_ENTER_TRASH = AllRegsMask_NONE; + + // Registers killed by CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF. + AllRegsMask_CALLEE_TRASH_WRITEBARRIER = GprRegsMask(RBM_R0 | RBM_R3 | RBM_LR | RBM_DEFAULT_HELPER_CALL_TARGET); + + // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF. + AllRegsMask_CALLEE_GCTRASH_WRITEBARRIER = AllRegsMask_CALLEE_TRASH_WRITEBARRIER; + + // Registers killed by CORINFO_HELP_ASSIGN_BYREF. + AllRegsMask_CALLEE_TRASH_WRITEBARRIER_BYREF = + GprRegsMask(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF | RBM_CALLEE_TRASH_NOGC); + + // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_BYREF. + // Note that r0 and r1 are still valid byref pointers after this helper call, despite their value being changed. + AllRegsMask_CALLEE_GCTRASH_WRITEBARRIER_BYREF = AllRegsMask_CALLEE_TRASH_NOGC; + AllRegsMask_PROFILER_RET_SCRATCH = GprRegsMask(RBM_R2); + // While REG_PROFILER_RET_SCRATCH is not trashed by the method, the register allocator must + // consider it killed by the return. + AllRegsMask_PROFILER_LEAVE_TRASH = AllRegsMask_PROFILER_RET_SCRATCH; + AllRegsMask_PROFILER_TAILCALL_TRASH = AllRegsMask_NONE; + // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper (JIT_RareDisableHelper). + // See vm\arm\amshelpers.asm for more details. + AllRegsMask_STOP_FOR_GC_TRASH = + AllRegsMask((RBM_INT_CALLEE_TRASH & ~(RBM_LNGRET | RBM_R7 | RBM_R8 | RBM_R11)), + (RBM_FLT_CALLEE_TRASH & ~(RBM_DOUBLERET | RBM_F2 | RBM_F3 | RBM_F4 | RBM_F5 | RBM_F6 | RBM_F7))); + // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. + AllRegsMask_INIT_PINVOKE_FRAME_TRASH = + (AllRegsMask_CALLEE_TRASH | GprRegsMask(RBM_PINVOKE_TCB | RBM_PINVOKE_SCRATCH)); + + AllRegsMask_VALIDATE_INDIRECT_CALL_TRASH = GprRegsMask(RBM_INT_CALLEE_TRASH); + +#elif defined(TARGET_ARM64) + + AllRegsMask_CALLEE_TRASH_NOGC = GprRegsMask(RBM_CALLEE_TRASH_NOGC); + AllRegsMask_PROFILER_ENTER_TRASH = AllRegsMask((RBM_INT_CALLEE_TRASH & ~(RBM_ARG_REGS | RBM_ARG_RET_BUFF | RBM_FP)), + (RBM_FLT_CALLEE_TRASH & ~RBM_FLTARG_REGS), RBM_MSK_CALLEE_TRASH); + // Registers killed by CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF. + AllRegsMask_CALLEE_TRASH_WRITEBARRIER = GprRegsMask(RBM_R14 | RBM_CALLEE_TRASH_NOGC); + + // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF. + AllRegsMask_CALLEE_GCTRASH_WRITEBARRIER = AllRegsMask_CALLEE_TRASH_NOGC; + + // Registers killed by CORINFO_HELP_ASSIGN_BYREF. + AllRegsMask_CALLEE_TRASH_WRITEBARRIER_BYREF = + GprRegsMask(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF | RBM_CALLEE_TRASH_NOGC); + + // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_BYREF. + // Note that x13 and x14 are still valid byref pointers after this helper call, despite their value being changed. + AllRegsMask_CALLEE_GCTRASH_WRITEBARRIER_BYREF = AllRegsMask_CALLEE_TRASH_NOGC; + + AllRegsMask_PROFILER_LEAVE_TRASH = AllRegsMask_PROFILER_ENTER_TRASH; + AllRegsMask_PROFILER_TAILCALL_TRASH = AllRegsMask_PROFILER_ENTER_TRASH; + + // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper + AllRegsMask_STOP_FOR_GC_TRASH = AllRegsMask_CALLEE_TRASH; + // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. + AllRegsMask_INIT_PINVOKE_FRAME_TRASH = AllRegsMask_CALLEE_TRASH; + AllRegsMask_VALIDATE_INDIRECT_CALL_TRASH = GprRegsMask(RBM_VALIDATE_INDIRECT_CALL_TRASH); +#endif + +#if defined(TARGET_ARM) + // profiler scratch remains gc live + AllRegsMask_PROF_FNC_LEAVE = AllRegsMask_PROFILER_LEAVE_TRASH & ~AllRegsMask_PROFILER_RET_SCRATCH; +#else + AllRegsMask_PROF_FNC_LEAVE = AllRegsMask_PROFILER_LEAVE_TRASH; +#endif // TARGET_ARM + +#ifdef TARGET_XARCH + + // Make sure we copy the register info and initialize the + // trash regs after the underlying fields are initialized + + const regMaskTP vtCalleeTrashRegs[TYP_COUNT]{ +#define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) ctr, +#include "typelist.h" +#undef DEF_TP + }; + memcpy(varTypeCalleeTrashRegs, vtCalleeTrashRegs, sizeof(regMaskTP) * TYP_COUNT); + + if (codeGen != nullptr) + { + codeGen->CopyRegisterInfo(); + } +#endif // TARGET_XARCH +} /***************************************************************************** * * Destructor @@ -3456,44 +3651,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags) opts.compJitSaveFpLrWithCalleeSavedRegisters = JitConfig.JitSaveFpLrWithCalleeSavedRegisters(); } #endif // defined(DEBUG) && defined(TARGET_ARM64) - -#if defined(TARGET_AMD64) - rbmAllFloat = RBM_ALLFLOAT_INIT; - rbmFltCalleeTrash = RBM_FLT_CALLEE_TRASH_INIT; - cntCalleeTrashFloat = CNT_CALLEE_TRASH_FLOAT_INIT; - - if (canUseEvexEncoding()) - { - rbmAllFloat |= RBM_HIGHFLOAT; - rbmFltCalleeTrash |= RBM_HIGHFLOAT; - cntCalleeTrashFloat += CNT_CALLEE_TRASH_HIGHFLOAT; - } -#endif // TARGET_AMD64 - -#if defined(TARGET_XARCH) - rbmAllMask = RBM_ALLMASK_INIT; - rbmMskCalleeTrash = RBM_MSK_CALLEE_TRASH_INIT; - cntCalleeTrashMask = CNT_CALLEE_TRASH_MASK_INIT; - - if (canUseEvexEncoding()) - { - rbmAllMask |= RBM_ALLMASK_EVEX; - rbmMskCalleeTrash |= RBM_MSK_CALLEE_TRASH_EVEX; - cntCalleeTrashMask += CNT_CALLEE_TRASH_MASK_EVEX; - } - - // Make sure we copy the register info and initialize the - // trash regs after the underlying fields are initialized - - const regMaskTP vtCalleeTrashRegs[TYP_COUNT]{ -#define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) ctr, -#include "typelist.h" -#undef DEF_TP - }; - memcpy(varTypeCalleeTrashRegs, vtCalleeTrashRegs, sizeof(regMaskTP) * TYP_COUNT); - - codeGen->CopyRegisterInfo(); -#endif // TARGET_XARCH } #ifdef DEBUG @@ -3735,6 +3892,24 @@ bool Compiler::compPromoteFewerStructs(unsigned lclNum) return rejectThisPromo; } +void Compiler::dumpRegMask(regMaskOnlyOne mask, var_types type) const +{ + if (varTypeUsesIntReg(type)) + { + dumpRegMask(AllRegsMask(mask, RBM_NONE, RBM_NONE)); + } + else if (varTypeUsesFloatReg(type)) + { + dumpRegMask(AllRegsMask(RBM_NONE, mask, RBM_NONE)); + } +#ifdef FEATURE_MASKED_HW_INTRINSICS + else + { + dumpRegMask(AllRegsMask(RBM_NONE, RBM_NONE, mask)); + } +#endif +} + //------------------------------------------------------------------------ // dumpRegMask: display a register mask. For well-known sets of registers, display a well-known token instead of // a potentially large number of registers. @@ -3742,33 +3917,33 @@ bool Compiler::compPromoteFewerStructs(unsigned lclNum) // Arguments: // regs - The set of registers to display // -void Compiler::dumpRegMask(regMaskTP regs) const +void Compiler::dumpRegMask(CONSTREF_AllRegsMask mask) const { - if (regs == RBM_ALLINT) + if (mask.gprRegs() == RBM_ALLINT) { printf("[allInt]"); } - else if (regs == (RBM_ALLINT & ~RBM_FPBASE)) + else if (mask.gprRegs() == (RBM_ALLINT & ~RBM_FPBASE)) { printf("[allIntButFP]"); } - else if (regs == RBM_ALLFLOAT) + else if (mask.floatRegs(this) == RBM_ALLFLOAT) { printf("[allFloat]"); } - else if (regs == RBM_ALLDOUBLE) + else if (mask.floatRegs(this) == RBM_ALLDOUBLE) { printf("[allDouble]"); } -#ifdef TARGET_XARCH - else if (regs == RBM_ALLMASK) +#ifdef FEATURE_MASKED_HW_INTRINSICS + else if ((RBM_ALLMASK != RBM_NONE) && (mask.predicateRegs(this) == RBM_ALLMASK)) { printf("[allMask]"); } -#endif // TARGET_XARCH +#endif // FEATURE_MASKED_HW_INTRINSICS else { - dspRegMask(regs); + dspRegMask(mask); } } @@ -5926,11 +6101,11 @@ void Compiler::generatePatchpointInfo() // Record callee save registers. // Currently only needed for x64. // - regMaskTP rsPushRegs = codeGen->regSet.rsGetModifiedCalleeSavedRegsMask(); - rsPushRegs |= RBM_FPBASE; - patchpointInfo->SetCalleeSaveRegisters((uint64_t)rsPushRegs); + regMaskGpr rsPushGprRegs = codeGen->regSet.rsGetModifiedCalleeSavedRegsMask().gprRegs(); + rsPushGprRegs |= RBM_FPBASE; + patchpointInfo->SetCalleeSaveGprRegisters(rsPushGprRegs); JITDUMP("--OSR-- Tier0 callee saves: "); - JITDUMPEXEC(dspRegMask((regMaskTP)patchpointInfo->CalleeSaveRegisters())); + JITDUMPEXEC(dspRegMask(patchpointInfo->CalleeSaveGprRegisters(), RBM_NONE)); JITDUMP("\n"); #endif @@ -6961,6 +7136,7 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr, // compInitOptions will set the correct verbose flag. compInitOptions(compileFlags); + compInitAllRegsMask(); if (!compIsForInlining() && !opts.altJit && opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ALT_JIT)) { @@ -9500,7 +9676,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX * cVN, dVN : Display a ValueNum (call vnPrint()). * * The following don't require a Compiler* to work: - * dRegMask : Display a regMaskTP (call dspRegMask(mask)). + * dRegMask : Display a regMaskOnlyOne (call dspRegMask(mask)). * dBlockList : Display a BasicBlockList*. * * The following find an object in the IR and return it, as well as setting a global variable with the value that can @@ -10458,7 +10634,7 @@ JITDBGAPI void __cdecl dVN(ValueNum vn) cVN(JitTls::GetCompiler(), vn); } -JITDBGAPI void __cdecl dRegMask(regMaskTP mask) +JITDBGAPI void __cdecl dRegMask(regMaskOnlyOne mask) { static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called printf("===================================================================== dRegMask %u\n", sequenceNumber++); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 05b3e07ebf63d1..188cd2339c558e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1013,9 +1013,9 @@ class LclVarDsc return lvIsRegCandidate() && (GetRegNum() != REG_STK); } - regMaskTP lvRegMask() const + regMaskOnlyOne lvRegMask() const { - regMaskTP regMask = RBM_NONE; + regMaskOnlyOne regMask = RBM_NONE; if (GetRegNum() != REG_STK) { if (varTypeUsesFloatReg(this)) @@ -3796,9 +3796,9 @@ class Compiler VARSET_TP lvaLongVars; // set of long (64-bit) variables #endif VARSET_TP lvaFloatVars; // set of floating-point (32-bit and 64-bit) or SIMD variables -#ifdef TARGET_XARCH +#ifdef FEATURE_MASKED_HW_INTRINSICS VARSET_TP lvaMaskVars; // set of mask variables -#endif // TARGET_XARCH +#endif // FEATURE_MASKED_HW_INTRINSICS unsigned lvaCurEpoch; // VarSets are relative to a specific set of tracked var indices. // It that changes, this changes. VarSets from different epochs @@ -3925,7 +3925,7 @@ class Compiler unsigned lvaGetMaxSpillTempSize(); #ifdef TARGET_ARM - bool lvaIsPreSpilled(unsigned lclNum, regMaskTP preSpillMask); + bool lvaIsPreSpilled(unsigned lclNum, regMaskGpr preSpillMask); #endif // TARGET_ARM void lvaAssignFrameOffsets(FrameLayoutState curState); void lvaFixVirtualFrameOffsets(); @@ -6774,11 +6774,11 @@ class Compiler int m_loopVarFPCount; int m_hoistedFPExprCount; -#ifdef TARGET_XARCH +#ifdef FEATURE_MASKED_HW_INTRINSICS int m_loopVarInOutMskCount; int m_loopVarMskCount; int m_hoistedMskExprCount; -#endif // TARGET_XARCH +#endif // FEATURE_MASKED_HW_INTRINSICS // Get the VN cache for current loop VNSet* GetHoistedInCurLoop(Compiler* comp) @@ -8503,7 +8503,7 @@ class Compiler // Gets a register mask that represent the kill set for a helper call since // not all JIT Helper calls follow the standard ABI on the target architecture. - regMaskTP compHelperCallKillSet(CorInfoHelpFunc helper); + CONSTREF_AllRegsMask compHelperCallKillSet(CorInfoHelpFunc helper); /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX @@ -8524,6 +8524,28 @@ class Compiler // Infrastructure functions: start/stop/reserve/emit. // + bool IsGprRegMask(regMaskTP regMask) + { + return (regMask & RBM_ALLFLOAT) == RBM_NONE; + } + + bool IsFloatRegMask(regMaskTP regMask) + { + return (regMask & ~RBM_ALLFLOAT) == RBM_NONE; + } + +#ifdef FEATURE_MASKED_HW_INTRINSICS + bool IsPredicateRegMask(regMaskTP regMask) + { + return (regMask & ~RBM_ALLMASK) == RBM_NONE; + } +#endif // FEATURE_MASKED_HW_INTRINSICS + + bool IsOnlyOneRegMask(regMaskTP regMask) + { + return (regMask == RBM_NONE) || (IsGprRegMask(regMask) != IsFloatRegMask(regMask)); + } + void unwindBegProlog(); void unwindEndProlog(); void unwindBegEpilog(); @@ -8542,10 +8564,10 @@ class Compiler void unwindSaveReg(regNumber reg, unsigned offset); #if defined(TARGET_ARM) - void unwindPushMaskInt(regMaskTP mask); - void unwindPushMaskFloat(regMaskTP mask); - void unwindPopMaskInt(regMaskTP mask); - void unwindPopMaskFloat(regMaskTP mask); + void unwindPushMaskInt(regMaskGpr mask); + void unwindPushMaskFloat(regMaskFloat mask); + void unwindPopMaskInt(regMaskGpr mask); + void unwindPopMaskFloat(regMaskFloat mask); void unwindBranch16(); // The epilog terminates with a 16-bit branch (e.g., "bx lr") void unwindNop(unsigned codeSizeInBytes); // Generate unwind NOP code. 'codeSizeInBytes' is 2 or 4 bytes. Only // called via unwindPadding(). @@ -8618,8 +8640,8 @@ class Compiler #endif // UNIX_AMD64_ABI #elif defined(TARGET_ARM) - void unwindPushPopMaskInt(regMaskTP mask, bool useOpsize16); - void unwindPushPopMaskFloat(regMaskTP mask); + void unwindPushPopMaskInt(regMaskGpr mask, bool useOpsize16); + void unwindPushPopMaskFloat(regMaskFloat mask); #endif // TARGET_ARM @@ -8628,7 +8650,7 @@ class Compiler void createCfiCode(FuncInfoDsc* func, UNATIVE_OFFSET codeOffset, UCHAR opcode, short dwarfReg, INT offset = 0); void unwindPushPopCFI(regNumber reg); void unwindBegPrologCFI(); - void unwindPushPopMaskCFI(regMaskTP regMask, bool isFloat); + void unwindPushPopMaskCFI(regMaskOnlyOne regMask, bool isFloat); void unwindAllocStackCFI(unsigned size); void unwindSetFrameRegCFI(regNumber reg, unsigned offset); void unwindEmitFuncCFI(FuncInfoDsc* func, void* pHotCode, void* pColdCode); @@ -10502,7 +10524,7 @@ class Compiler #if defined(TARGET_XARCH) // Mask of callee saved float regs on stack. - regMaskTP compCalleeFPRegsSavedMask; + regMaskFloat compCalleeFPRegsSavedMask; #endif #ifdef TARGET_AMD64 // Quirk for VS debug-launch scenario to work: @@ -10549,6 +10571,7 @@ class Compiler COMP_HANDLE compHnd, CORINFO_METHOD_INFO* methodInfo, InlineInfo* inlineInfo); + void compInitAllRegsMask(); void compDone(); static void compDisplayStaticSizes(); @@ -10871,7 +10894,8 @@ class Compiler bool compJitHaltMethod(); - void dumpRegMask(regMaskTP regs) const; + void dumpRegMask(regMaskOnlyOne mask, var_types type) const; + void dumpRegMask(CONSTREF_AllRegsMask mask) const; #endif @@ -11145,8 +11169,8 @@ class Compiler // // Users of these values need to define four accessor functions: // - // regMaskTP get_RBM_ALLFLOAT(); - // regMaskTP get_RBM_FLT_CALLEE_TRASH(); + // regMaskFloat get_RBM_ALLFLOAT(); + // regMaskFloat get_RBM_FLT_CALLEE_TRASH(); // unsigned get_CNT_CALLEE_TRASH_FLOAT(); // unsigned get_AVAILABLE_REG_COUNT(); // @@ -11155,16 +11179,16 @@ class Compiler // This was done to avoid polluting all `targetXXX.h` macro definitions with a compiler parameter, where only // TARGET_AMD64 requires one. // - regMaskTP rbmAllFloat; - regMaskTP rbmFltCalleeTrash; - unsigned cntCalleeTrashFloat; + regMaskFloat rbmAllFloat; + regMaskFloat rbmFltCalleeTrash; + unsigned cntCalleeTrashFloat; public: - FORCEINLINE regMaskTP get_RBM_ALLFLOAT() const + FORCEINLINE regMaskFloat get_RBM_ALLFLOAT() const { return this->rbmAllFloat; } - FORCEINLINE regMaskTP get_RBM_FLT_CALLEE_TRASH() const + FORCEINLINE regMaskFloat get_RBM_FLT_CALLEE_TRASH() const { return this->rbmFltCalleeTrash; } @@ -11183,8 +11207,8 @@ class Compiler // // Users of these values need to define four accessor functions: // - // regMaskTP get_RBM_ALLMASK(); - // regMaskTP get_RBM_MSK_CALLEE_TRASH(); + // regMaskPredicate get_RBM_ALLMASK(); + // regMaskPredicate get_RBM_MSK_CALLEE_TRASH(); // unsigned get_CNT_CALLEE_TRASH_MASK(); // unsigned get_AVAILABLE_REG_COUNT(); // @@ -11193,17 +11217,17 @@ class Compiler // This was done to avoid polluting all `targetXXX.h` macro definitions with a compiler parameter, where only // TARGET_XARCH requires one. // - regMaskTP rbmAllMask; - regMaskTP rbmMskCalleeTrash; - unsigned cntCalleeTrashMask; - regMaskTP varTypeCalleeTrashRegs[TYP_COUNT]; + regMaskPredicate rbmAllMask; + regMaskPredicate rbmMskCalleeTrash; + unsigned cntCalleeTrashMask; + regMaskOnlyOne varTypeCalleeTrashRegs[TYP_COUNT]; public: - FORCEINLINE regMaskTP get_RBM_ALLMASK() const + FORCEINLINE regMaskPredicate get_RBM_ALLMASK() const { return this->rbmAllMask; } - FORCEINLINE regMaskTP get_RBM_MSK_CALLEE_TRASH() const + FORCEINLINE regMaskPredicate get_RBM_MSK_CALLEE_TRASH() const { return this->rbmMskCalleeTrash; } @@ -11213,6 +11237,24 @@ class Compiler } #endif // TARGET_XARCH + AllRegsMask AllRegsMask_CALLEE_GCTRASH_WRITEBARRIER; + AllRegsMask AllRegsMask_CALLEE_GCTRASH_WRITEBARRIER_BYREF; + AllRegsMask AllRegsMask_CALLEE_SAVED; + AllRegsMask AllRegsMask_CALLEE_TRASH; + AllRegsMask AllRegsMask_CALLEE_TRASH_NOGC; + AllRegsMask AllRegsMask_CALLEE_TRASH_WRITEBARRIER; + AllRegsMask AllRegsMask_CALLEE_TRASH_WRITEBARRIER_BYREF; + AllRegsMask AllRegsMask_EDX; + AllRegsMask AllRegsMask_INIT_PINVOKE_FRAME_TRASH; + AllRegsMask AllRegsMask_NONE; + AllRegsMask AllRegsMask_PROF_FNC_LEAVE; + AllRegsMask AllRegsMask_PROFILER_ENTER_TRASH; + AllRegsMask AllRegsMask_PROFILER_LEAVE_TRASH; + AllRegsMask AllRegsMask_PROFILER_RET_SCRATCH; + AllRegsMask AllRegsMask_PROFILER_TAILCALL_TRASH; + AllRegsMask AllRegsMask_STOP_FOR_GC_TRASH; + AllRegsMask AllRegsMask_VALIDATE_INDIRECT_CALL_TRASH; + }; // end of class Compiler //--------------------------------------------------------------------------------------------------------------------- diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 3c9b9ac9e5e284..5a6e7f75a41a03 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -895,7 +895,7 @@ inline unsigned Compiler::funGetFuncIdx(BasicBlock* block) // Assumptions: // The mask contains one and only one register. -inline regNumber genRegNumFromMask(regMaskTP mask) +inline regNumber genRegNumFromMask(regMaskOnlyOne mask MORE_THAN_64_REG_ARG(var_types type)) { assert(mask != 0); // Must have one bit set, so can't have a mask of zero @@ -907,6 +907,10 @@ inline regNumber genRegNumFromMask(regMaskTP mask) assert(genRegMask(regNum) == mask); +#ifdef HAS_MORE_THAN_64_REGISTERS + // If this is mask type, add `64` to the regNumber + return (regNumber)((varTypeUsesMaskReg(type) << 6) + regNum); +#endif return regNum; } @@ -915,13 +919,14 @@ inline regNumber genRegNumFromMask(regMaskTP mask) // register number and also toggle the bit in the `mask`. // Arguments: // mask - the register mask +// type - the register type, `mask` represents. // // Return Value: // The number of the first register contained in the mask and updates the `mask` to toggle // the bit. // -inline regNumber genFirstRegNumFromMaskAndToggle(regMaskTP& mask) +inline regNumber genFirstRegNumFromMaskAndToggle(regMaskOnlyOne& mask MORE_THAN_64_REG_ARG(var_types type)) { assert(mask != 0); // Must have one bit set, so can't have a mask of zero @@ -930,6 +935,10 @@ inline regNumber genFirstRegNumFromMaskAndToggle(regMaskTP& mask) regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); mask ^= genRegMask(regNum); +#ifdef HAS_MORE_THAN_64_REGISTERS + // If this is mask type, add `64` to the regNumber + return (regNumber)((varTypeUsesMaskReg(type) << 6) + regNum); +#endif return regNum; } @@ -943,7 +952,45 @@ inline regNumber genFirstRegNumFromMaskAndToggle(regMaskTP& mask) // The number of the first register contained in the mask. // -inline regNumber genFirstRegNumFromMask(regMaskTP mask) +inline regNumber genFirstRegNumFromMask(AllRegsMask& mask) +{ + assert(!mask.IsEmpty()); // Must have one bit set, so can't have a mask of zero + + /* Convert the mask to a register number */ + regNumber regNum; + + RegBitSet64 gprOrFloatMask = mask.GetGprFloatCombinedMask(); + +#ifdef HAS_MORE_THAN_64_REGISTERS + // Only check this condition if there are predicate register support + // is present. + // If not, then gprOrFloatMask should be non-empty, and we will hit the + // above assert of IsEmpty() anyway. + if (gprOrFloatMask != RBM_NONE) + { + regNum = (regNumber)BitOperations::BitScanForward(gprOrFloatMask); + } + else + { + regNum = (regNumber)(64 + BitOperations::BitScanForward(mask.predicateRegs(nullptr))); + } +#else + regNum = (regNumber)BitOperations::BitScanForward(gprOrFloatMask); +#endif + return regNum; +} + +//------------------------------------------------------------------------------ +// genFirstRegNumFromMask : Maps first bit set in the register mask to a register number. +// +// Arguments: +// mask - the register mask +// +// Return Value: +// The number of the first register contained in the mask. +// + +inline regNumber genFirstRegNumFromMask(regMaskOnlyOne mask MORE_THAN_64_REG_ARG(var_types type)) { assert(mask != 0); // Must have one bit set, so can't have a mask of zero @@ -951,6 +998,29 @@ inline regNumber genFirstRegNumFromMask(regMaskTP mask) regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); +#ifdef HAS_MORE_THAN_64_REGISTERS + // If this is mask type, add `64` to the regNumber + return (regNumber)((varTypeUsesMaskReg(type) << 6) + regNum); +#endif + return regNum; +} + +//------------------------------------------------------------------------------ +// genFirstRegNumFromMaskAndToggle : Maps first bit set in the register mask to a +// register number and also toggle the bit in the `mask`. +// Arguments: +// mask - the register mask +// +// Return Value: +// The number of the first register contained in the mask and updates the `mask` to toggle +// the bit. +// +// TODO: We can make certain methods on compiler object and check if predicate is needed +// and if yes, use optimize path +inline regNumber genFirstRegNumFromMaskAndToggle(AllRegsMask& mask) +{ + regNumber regNum = genFirstRegNumFromMask(mask); + mask ^= regNum; return regNum; } @@ -1105,6 +1175,7 @@ inline const char* varTypeGCstring(var_types type) /*****************************************************************************/ const char* varTypeName(var_types); +const int regIndexForRegister(regNumber reg); /*****************************************************************************/ // Helpers to pull little-endian values out of a byte stream. @@ -3296,14 +3367,14 @@ __forceinline regNumber genMapRegArgNumToRegNum(unsigned argNum, var_types type, * (for a double on ARM) is returned. */ -inline regMaskTP genMapIntRegArgNumToRegMask(unsigned argNum) +inline regMaskGpr genMapIntRegArgNumToRegMask(unsigned argNum) { assert(argNum < ArrLen(intArgMasks)); return intArgMasks[argNum]; } -inline regMaskTP genMapFloatRegArgNumToRegMask(unsigned argNum) +inline regMaskFloat genMapFloatRegArgNumToRegMask(unsigned argNum) { #ifndef TARGET_X86 assert(argNum < ArrLen(fltArgMasks)); @@ -3315,9 +3386,9 @@ inline regMaskTP genMapFloatRegArgNumToRegMask(unsigned argNum) #endif } -__forceinline regMaskTP genMapArgNumToRegMask(unsigned argNum, var_types type) +__forceinline regMaskOnlyOne genMapArgNumToRegMask(unsigned argNum, var_types type) { - regMaskTP result; + regMaskOnlyOne result; if (varTypeUsesFloatArgReg(type)) { result = genMapFloatRegArgNumToRegMask(argNum); @@ -3451,39 +3522,6 @@ inline unsigned genMapRegNumToRegArgNum(regNumber regNum, var_types type, CorInf } } -/*****************************************************************************/ -/* Return a register mask with the first 'numRegs' argument registers set. - */ - -inline regMaskTP genIntAllRegArgMask(unsigned numRegs) -{ - assert(numRegs <= MAX_REG_ARG); - - regMaskTP result = RBM_NONE; - for (unsigned i = 0; i < numRegs; i++) - { - result |= intArgMasks[i]; - } - return result; -} - -inline regMaskTP genFltAllRegArgMask(unsigned numRegs) -{ -#ifndef TARGET_X86 - assert(numRegs <= MAX_FLOAT_REG_ARG); - - regMaskTP result = RBM_NONE; - for (unsigned i = 0; i < numRegs; i++) - { - result |= fltArgMasks[i]; - } - return result; -#else - assert(!"no x86 float arg regs\n"); - return RBM_NONE; -#endif -} - /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX @@ -4441,27 +4479,24 @@ inline void* operator new[](size_t sz, Compiler* compiler, CompMemKind cmk) #ifdef DEBUG -inline void printRegMask(regMaskTP mask) -{ - printf(REG_MASK_ALL_FMT, mask); -} - -inline char* regMaskToString(regMaskTP mask, Compiler* context) +inline void printRegMask(AllRegsMask mask) { - const size_t cchRegMask = 24; - char* regmask = new (context, CMK_Unknown) char[cchRegMask]; + printf(REG_MASK_ALL_FMT, mask.gprRegs()); + printf(" "); + printf(REG_MASK_ALL_FMT, mask.floatRegs(nullptr)); - sprintf_s(regmask, cchRegMask, REG_MASK_ALL_FMT, mask); - - return regmask; +#ifdef FEATURE_MASKED_HW_INTRINSICS + printf(" "); + printf(REG_MASK_ALL_FMT, mask.predicateRegs(nullptr)); +#endif // FEATURE_MASKED_HW_INTRINSICS } -inline void printRegMaskInt(regMaskTP mask) +inline void printRegMaskInt(regMaskGpr mask) { printf(REG_MASK_INT_FMT, (mask & RBM_ALLINT)); } -inline char* regMaskIntToString(regMaskTP mask, Compiler* context) +inline char* regMaskIntToString(regMaskGpr mask, Compiler* context) { const size_t cchRegMask = 24; char* regmask = new (context, CMK_Unknown) char[cchRegMask]; @@ -4989,6 +5024,351 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitRegularExitBlocks(TFunc func) return BasicBlockVisit::Continue; } +template +FORCEINLINE int regIndexForType(T vt) +{ + int type = varTypeRegister[TypeGet(vt)]; +#ifdef HAS_MORE_THAN_64_REGISTERS + assert(type <= 3); +#endif + +#ifndef FEATURE_MASKED_HW_INTRINSICS + assert(type != VTR_MASK); +#endif + return (type - 1); +} + +void AllRegsMask::operator|=(CONSTREF_AllRegsMask other) +{ + _combinedRegisters |= other._combinedRegisters; +#ifdef HAS_MORE_THAN_64_REGISTERS + _predicateRegs |= other._predicateRegs; +#endif +} + +void AllRegsMask::operator&=(CONSTREF_AllRegsMask other) +{ + _combinedRegisters &= other._combinedRegisters; +#ifdef HAS_MORE_THAN_64_REGISTERS + _predicateRegs &= other._predicateRegs; +#endif +} + +void AllRegsMask::operator|=(const regNumber reg) +{ +#ifdef HAS_MORE_THAN_64_REGISTERS + int index = regIndexForRegister(reg); + RegBitSet64 value = genRegMask(reg); + _registers[index] |= encodeForIndex(index, value); +#else + _combinedRegisters |= genRegMask(reg); +#endif +} + +void AllRegsMask::operator^=(const regNumber reg) +{ +#ifdef HAS_MORE_THAN_64_REGISTERS + int index = regIndexForRegister(reg); + RegBitSet64 value = genRegMask(reg); + _registers[index] ^= encodeForIndex(index, value); +#else + _combinedRegisters ^= genRegMask(reg); +#endif +} + +AllRegsMask AllRegsMask::operator~() const +{ + AllRegsMask result; + result._combinedRegisters = ~_combinedRegisters; +#ifdef HAS_MORE_THAN_64_REGISTERS + result._predicateRegs = ~_predicateRegs; +#endif + return result; +} + +bool AllRegsMask::operator==(CONSTREF_AllRegsMask other) const +{ + return (_combinedRegisters == other._combinedRegisters) +#ifdef HAS_MORE_THAN_64_REGISTERS + && (_predicateRegs == other._predicateRegs); +#endif + ; +} + +bool AllRegsMask::operator!=(CONSTREF_AllRegsMask other) const +{ + return !(*this == other); +} + +AllRegsMask AllRegsMask::operator&(CONSTREF_AllRegsMask other) const +{ + AllRegsMask result; + result._combinedRegisters = _combinedRegisters & other._combinedRegisters; +#ifdef HAS_MORE_THAN_64_REGISTERS + result._predicateRegs = _predicateRegs & other._predicateRegs; +#endif + return result; +} + +AllRegsMask AllRegsMask::operator|(CONSTREF_AllRegsMask other) const +{ + AllRegsMask result; + result._combinedRegisters = _combinedRegisters | other._combinedRegisters; +#ifdef HAS_MORE_THAN_64_REGISTERS + result._predicateRegs = _predicateRegs | other._predicateRegs; +#endif + return result; +} + +void AllRegsMask::Clear() +{ + _combinedRegisters = RBM_NONE; +#ifdef HAS_MORE_THAN_64_REGISTERS + _predicateRegs = RBM_NONE; +#endif +} + +bool AllRegsMask::IsEmpty() const +{ +#ifdef HAS_MORE_THAN_64_REGISTERS + return ((_combinedRegisters | _predicateRegs) == RBM_NONE); +#else + return _combinedRegisters == RBM_NONE; +#endif +} + +unsigned AllRegsMask::Count() const +{ + return genCountBits(_combinedRegisters) +#ifdef HAS_MORE_THAN_64_REGISTERS + + genCountBits(_predicateRegs) +#endif + ; +} + +regMaskOnlyOne AllRegsMask::operator[](int index) const +{ +#ifdef HAS_MORE_THAN_64_REGISTERS + assert(index <= 2); + RegBitSet32 value = _registers[index]; + return decodeForIndex(index, value); +#else + return _combinedRegisters; +#endif +} + +void AllRegsMask::AddRegMaskForType(regMaskOnlyOne maskToAdd, var_types type) +{ +#ifdef HAS_MORE_THAN_64_REGISTERS + int index = regIndexForType(type); + _registers[index] |= encodeForIndex(index, maskToAdd); +#else + _combinedRegisters |= maskToAdd; +#endif +} + +void AllRegsMask::AddGprRegMask(regMaskOnlyOne maskToAdd) +{ +#ifdef HAS_MORE_THAN_64_REGISTERS + _registers[0] |= maskToAdd; +#else + _combinedRegisters |= maskToAdd; +#endif +} + +void AllRegsMask::AddFloatRegMask(regMaskOnlyOne maskToAdd) +{ + _combinedRegisters |= maskToAdd; +} + +// Adds reg only if it is gpr register +void AllRegsMask::AddGprRegInMask(regNumber reg) +{ + AddGprRegMask(genRegMask(reg)); +} + +// ---------------------------------------------------------- +// AddRegNumForType: Adds `reg` to the mask. +// +void AllRegsMask::AddRegNumInMask(regNumber reg) +{ + RegBitSet64 value = genRegMask(reg); +#ifdef HAS_MORE_THAN_64_REGISTERS + int index = regIndexForRegister(reg); + _registers[index] |= encodeForIndex(index, value); +#else + _combinedRegisters |= value; +#endif +} + +// This is similar to AddRegNumInMask(reg, regType) for all platforms +// except Arm. For Arm, it calls getRegMask() instead of genRegMask() +// to create a mask that needs to be added. +void AllRegsMask::AddRegNum(regNumber reg, var_types type) +{ +#ifdef TARGET_ARM + _combinedRegisters |= getRegMask(reg, type); +#else + AddRegNumInMask(reg); +#endif +} + +// ---------------------------------------------------------- +// RemoveRegNumFromMask: Removes `reg` to the mask. +// +void AllRegsMask::RemoveRegNumFromMask(regNumber reg) +{ + RegBitSet64 value = genRegMask(reg); +#ifdef HAS_MORE_THAN_64_REGISTERS + int index = regIndexForRegister(reg); + _registers[index] &= ~encodeForIndex(index, value); +#else + _combinedRegisters &= ~value; +#endif +} + +// This is similar to RemoveRegNumFromMask(reg, regType) for all platforms +// except Arm. For Arm, it calls getRegMask() instead of genRegMask() +// to create a mask that needs to be added. +void AllRegsMask::RemoveRegNum(regNumber reg, var_types type) +{ +#ifdef TARGET_ARM + _combinedRegisters &= ~getRegMask(reg, type); +#else + RemoveRegNumFromMask(reg); +#endif +} + +// ---------------------------------------------------------- +// IsRegNumInMask: Checks if `reg` is present in the mask. +// +bool AllRegsMask::IsRegNumInMask(regNumber reg) const +{ + RegBitSet64 value = genRegMask(reg); +#ifdef HAS_MORE_THAN_64_REGISTERS + int index = regIndexForRegister(reg); + return (_registers[index] & encodeForIndex(index, value)) != RBM_NONE; +#else + return (_combinedRegisters & value) != RBM_NONE; +#endif +} + +// This is similar to IsRegNumInMask(reg, regType) for all platforms +// except Arm. For Arm, it calls getRegMask() instead of genRegMask() +// to create a mask that needs to be added. +bool AllRegsMask::IsRegNumPresent(regNumber reg, var_types type) const +{ +#ifdef TARGET_ARM + return (_combinedRegisters & getRegMask(reg, type)) != RBM_NONE; +#else + return IsRegNumInMask(reg); +#endif +} + +#ifdef TARGET_ARM +// ---------------------------------------------------------- +// AddRegNumForType: Adds `reg` to the mask. It is same as AddRegNumInMask(reg) except +// that it takes `type` as an argument and adds `reg` to the mask for that type. +// +void AllRegsMask::AddRegNumInMask(regNumber reg, var_types type) +{ + _combinedRegisters |= genRegMask(reg, type); +} + +// ---------------------------------------------------------- +// RemoveRegNumFromMask: Removes `reg` from the mask. It is same as RemoveRegNumFromMask(reg) except +// that it takes `type` as an argument and adds `reg` to the mask for that type. +// +void AllRegsMask::RemoveRegNumFromMask(regNumber reg, var_types type) +{ + _combinedRegisters &= ~genRegMask(reg, type); +} + +// ---------------------------------------------------------- +// IsRegNumInMask: Removes `reg` from the mask. It is same as IsRegNumInMask(reg) except +// that it takes `type` as an argument and adds `reg` to the mask for that type. +// +bool AllRegsMask::IsRegNumInMask(regNumber reg, var_types type) const +{ + return (_combinedRegisters & genRegMask(reg, type)) != RBM_NONE; +} +#endif + +void AllRegsMask::RemoveRegTypeFromMask(regMaskOnlyOne regMaskToRemove, var_types type) +{ +#ifdef HAS_MORE_THAN_64_REGISTERS + int index = regIndexForType(type); + _registers[index] &= ~encodeForIndex(index, regMaskToRemove); +#else + _combinedRegisters &= ~regMaskToRemove; +#endif +} + +bool AllRegsMask::IsGprMaskPresent(regMaskGpr maskToCheck) const +{ + return (gprRegs() & maskToCheck) != RBM_NONE; +} + +bool AllRegsMask::IsFloatMaskPresent(Compiler* compiler, regMaskFloat maskToCheck) const +{ + return (floatRegs(compiler) & maskToCheck) != RBM_NONE; +} + +regMaskOnlyOne AllRegsMask::GetRegMaskForType(var_types type) const +{ +#ifdef HAS_MORE_THAN_64_REGISTERS + int index = regIndexForType(type); + RegBitSet32 value = _registers[index]; + return decodeForIndex(index, value); +#else + return _combinedRegisters; +#endif +} + +RegBitSet64 AllRegsMask::GetGprFloatCombinedMask() const +{ + return _combinedRegisters; +} + +bool AllRegsMask::IsGprOrFloatPresent() const +{ + return GetGprFloatCombinedMask() != RBM_NONE; +} + +#ifndef HAS_MORE_THAN_64_REGISTERS +RegBitSet64 AllRegsMask::GetAllRegistersMask() const +{ + return _combinedRegisters; +} +#endif + +regMaskGpr AllRegsMask::gprRegs() const +{ + return _combinedRegisters & RBM_ALLGPR; +} + +regMaskFloat AllRegsMask::floatRegs(const Compiler* compiler) const +{ +#ifdef TARGET_AMD64 + regMaskOnlyOne allFloat = compiler != nullptr ? compiler->get_RBM_ALLFLOAT() : (RBM_HIGHFLOAT | RBM_LOWFLOAT); + return _combinedRegisters & allFloat; +#else + return _combinedRegisters & RBM_ALLFLOAT; +#endif // TARGET_AMD64 +} + +#ifdef FEATURE_MASKED_HW_INTRINSICS +regMaskPredicate AllRegsMask::predicateRegs(const Compiler* compiler) const +{ +#ifdef HAS_MORE_THAN_64_REGISTERS + return _predicateRegs; +#else + regMaskOnlyOne allMask = compiler != nullptr ? compiler->get_RBM_ALLMASK() : (RBM_ALLMASK_EVEX); + return _combinedRegisters & allMask; +#endif +} +#endif // FEATURE_MASKED_HW_INTRINSICS + /*****************************************************************************/ #endif //_COMPILER_HPP_ /*****************************************************************************/ diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index f8b320c07d44cd..6e0e326fbde496 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -273,10 +273,8 @@ void emitterStaticStats() fprintf(fout, "Offset / size of igLoopBackEdge = %3zu / %2zu\n", offsetof(insGroup, igLoopBackEdge), sizeof(igDummy->igLoopBackEdge)); #endif // FEATURE_LOOP_ALIGN -#if !(REGMASK_BITS <= 32) fprintf(fout, "Offset / size of igGCregs = %3zu / %2zu\n", offsetof(insGroup, igGCregs), sizeof(igDummy->igGCregs)); -#endif // !(REGMASK_BITS <= 32) fprintf(fout, "Offset / size of igData = %3zu / %2zu\n", offsetof(insGroup, igData), sizeof(igDummy->igData)); fprintf(fout, "Offset / size of igPhData = %3zu / %2zu\n", offsetof(insGroup, igPhData), @@ -289,10 +287,8 @@ void emitterStaticStats() fprintf(fout, "Offset / size of igStkLvl = %3zu / %2zu\n", offsetof(insGroup, igStkLvl), sizeof(igDummy->igStkLvl)); #endif // EMIT_TRACK_STACK_DEPTH -#if REGMASK_BITS <= 32 fprintf(fout, "Offset / size of igGCregs = %3zu / %2zu\n", offsetof(insGroup, igGCregs), sizeof(igDummy->igGCregs)); -#endif // REGMASK_BITS <= 32 fprintf(fout, "Offset / size of igInsCnt = %3zu / %2zu\n", offsetof(insGroup, igInsCnt), sizeof(igDummy->igInsCnt)); fprintf(fout, "\n"); @@ -751,6 +747,7 @@ void emitter::emitBegCG(Compiler* comp, COMP_HANDLE cmpHandle) #endif #if defined(TARGET_AMD64) + rbmAllFloat = emitComp->rbmAllFloat; rbmFltCalleeTrash = emitComp->rbmFltCalleeTrash; #endif // TARGET_AMD64 @@ -2059,8 +2056,8 @@ void emitter::emitEndProlog() void emitter::emitCreatePlaceholderIG(insGroupPlaceholderType igType, BasicBlock* igBB, VARSET_VALARG_TP GCvars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, + regMaskGpr gcrefRegs, + regMaskGpr byrefRegs, bool last) { assert(igBB != nullptr); @@ -2868,9 +2865,9 @@ bool emitter::emitNoGChelper(CORINFO_METHOD_HANDLE methHnd) * Mark the current spot as having a label. */ -void* emitter::emitAddLabel(VARSET_VALARG_TP GCvars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs DEBUG_ARG(BasicBlock* block)) +void* emitter::emitAddLabel(VARSET_VALARG_TP GCvars, + regMaskGpr gcrefRegs, + regMaskGpr byrefRegs DEBUG_ARG(BasicBlock* block)) { /* Create a new IG if the current one is non-empty */ @@ -2901,10 +2898,10 @@ void* emitter::emitAddLabel(VARSET_VALARG_TP GCvars, dumpConvertedVarSet(emitComp, GCvars); printf(", gcrefRegs="); printRegMaskInt(gcrefRegs); - emitDispRegSet(gcrefRegs); + emitDispGprRegSet(gcrefRegs); printf(", byrefRegs="); printRegMaskInt(byrefRegs); - emitDispRegSet(byrefRegs); + emitDispGprRegSet(byrefRegs); printf("\n"); } #endif @@ -3450,21 +3447,21 @@ const char* emitter::emitGetFrameReg() * Display a register set in a readable form. */ -void emitter::emitDispRegSet(regMaskTP regs) +void emitter::emitDispRegSet(regNumber firstReg, regNumber lastReg, regMaskOnlyOne regs) { + printf(" {"); + regNumber reg; bool sp = false; - printf(" {"); - - for (reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) + for (reg = firstReg; reg <= lastReg; reg = REG_NEXT(reg)) { if (regs == RBM_NONE) { break; } - regMaskTP curReg = genRegMask(reg); + singleRegMask curReg = genRegMask(reg); if ((regs & curReg) == 0) { continue; @@ -3483,10 +3480,35 @@ void emitter::emitDispRegSet(regMaskTP regs) printf("%s", emitRegName(reg)); } - printf("}"); } +void emitter::emitDispGprRegSet(regMaskGpr regs) +{ + emitDispRegSet(REG_INT_FIRST, REG_INT_LAST, regs); +} + +void emitter::emitDispFloatRegSet(regMaskFloat regs) +{ + emitDispRegSet(REG_FP_FIRST, REG_FP_LAST, regs); +} + +#ifdef FEATURE_MASKED_HW_INTRINSICS +void emitter::emitDispMaskRegSet(regMaskPredicate regs) +{ + emitDispRegSet(REG_MASK_FIRST, REG_MASK_LAST, regs); +} +#endif // FEATURE_MASKED_HW_INTRINSICS + +void emitter::emitDispRegSet(CONSTREF_AllRegsMask regs) +{ + emitDispGprRegSet(regs.gprRegs()); + emitDispFloatRegSet(regs.floatRegs(nullptr)); +#ifdef FEATURE_MASKED_HW_INTRINSICS + emitDispMaskRegSet(regs.predicateRegs(nullptr)); +#endif +} + /***************************************************************************** * * Display the current GC ref variable set in a readable form. @@ -3577,11 +3599,13 @@ void emitter::emitSetSecondRetRegGCType(instrDescCGCA* id, emitAttr secondRetSiz emitter::instrDesc* emitter::emitNewInstrCallInd(int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, + regMaskGpr gcrefRegs, + regMaskGpr byrefRegs, emitAttr retSizeIn MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)) { + assert(emitComp->IsGprRegMask(gcrefRegs)); + assert(emitComp->IsGprRegMask(byrefRegs)); emitAttr retSize = (retSizeIn != EA_UNKNOWN) ? retSizeIn : EA_PTRSIZE; bool gcRefRegsInScratch = ((gcrefRegs & RBM_CALLEE_TRASH) != 0); @@ -3660,11 +3684,14 @@ emitter::instrDesc* emitter::emitNewInstrCallInd(int argCnt, emitter::instrDesc* emitter::emitNewInstrCallDir(int argCnt, VARSET_VALARG_TP GCvars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, + regMaskGpr gcrefRegs, + regMaskGpr byrefRegs, emitAttr retSizeIn MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)) { + assert(emitComp->IsGprRegMask(gcrefRegs)); + assert(emitComp->IsGprRegMask(byrefRegs)); + emitAttr retSize = (retSizeIn != EA_UNKNOWN) ? retSizeIn : EA_PTRSIZE; // Allocate a larger descriptor if new GC values need to be saved @@ -3822,23 +3849,26 @@ void emitter::emitDispGCDeltaTitle(const char* title) // prevRegs - The live GC registers before the recent instruction. // curRegs - The live GC registers after the recent instruction. // -void emitter::emitDispGCRegDelta(const char* title, regMaskTP prevRegs, regMaskTP curRegs) +void emitter::emitDispGCRegDelta(const char* title, regMaskGpr prevRegs, regMaskGpr curRegs) { + assert(emitComp->IsGprRegMask(prevRegs)); + assert(emitComp->IsGprRegMask(curRegs)); + if (prevRegs != curRegs) { emitDispGCDeltaTitle(title); - regMaskTP sameRegs = prevRegs & curRegs; - regMaskTP removedRegs = prevRegs - sameRegs; - regMaskTP addedRegs = curRegs - sameRegs; + regMaskGpr sameRegs = prevRegs & curRegs; + regMaskGpr removedRegs = prevRegs - sameRegs; + regMaskGpr addedRegs = curRegs - sameRegs; if (removedRegs != RBM_NONE) { printf(" -"); - dspRegMask(removedRegs); + dspRegMask(removedRegs, RBM_NONE); } if (addedRegs != RBM_NONE) { printf(" +"); - dspRegMask(addedRegs); + dspRegMask(addedRegs, RBM_NONE); } printf("\n"); } @@ -4066,10 +4096,10 @@ void emitter::emitDispIG(insGroup* ig, bool displayFunc, bool displayInstruction dumpConvertedVarSet(emitComp, igPh->igPhData->igPhPrevGCrefVars); printf(", PrevGCrefRegs="); printRegMaskInt(igPh->igPhData->igPhPrevGCrefRegs); - emitDispRegSet(igPh->igPhData->igPhPrevGCrefRegs); + emitDispGprRegSet(igPh->igPhData->igPhPrevGCrefRegs); printf(", PrevByrefRegs="); printRegMaskInt(igPh->igPhData->igPhPrevByrefRegs); - emitDispRegSet(igPh->igPhData->igPhPrevByrefRegs); + emitDispGprRegSet(igPh->igPhData->igPhPrevByrefRegs); printf("\n"); printf("%*s; InitGCVars=%s ", strlen(buff), "", @@ -4077,10 +4107,10 @@ void emitter::emitDispIG(insGroup* ig, bool displayFunc, bool displayInstruction dumpConvertedVarSet(emitComp, igPh->igPhData->igPhInitGCrefVars); printf(", InitGCrefRegs="); printRegMaskInt(igPh->igPhData->igPhInitGCrefRegs); - emitDispRegSet(igPh->igPhData->igPhInitGCrefRegs); + emitDispGprRegSet(igPh->igPhData->igPhInitGCrefRegs); printf(", InitByrefRegs="); printRegMaskInt(igPh->igPhData->igPhInitByrefRegs); - emitDispRegSet(igPh->igPhData->igPhInitByrefRegs); + emitDispGprRegSet(igPh->igPhData->igPhInitByrefRegs); printf("\n"); assert(!(ig->igFlags & IGF_GC_VARS)); @@ -4116,7 +4146,7 @@ void emitter::emitDispIG(insGroup* ig, bool displayFunc, bool displayInstruction { printf("%sgcrefRegs=", separator); printRegMaskInt(ig->igGCregs); - emitDispRegSet(ig->igGCregs); + emitDispGprRegSet(ig->igGCregs); separator = ", "; } @@ -4124,7 +4154,7 @@ void emitter::emitDispIG(insGroup* ig, bool displayFunc, bool displayInstruction { printf("%sbyrefRegs=", separator); printRegMaskInt(ig->igByrefRegs()); - emitDispRegSet(ig->igByrefRegs()); + emitDispGprRegSet(ig->igByrefRegs()); separator = ", "; } @@ -4220,26 +4250,26 @@ void emitter::emitDispGCinfo() dumpConvertedVarSet(emitComp, emitPrevGCrefVars); printf("\n emitPrevGCrefRegs(0x%p)=", dspPtr(&emitPrevGCrefRegs)); printRegMaskInt(emitPrevGCrefRegs); - emitDispRegSet(emitPrevGCrefRegs); + emitDispGprRegSet(emitPrevGCrefRegs); printf("\n emitPrevByrefRegs(0x%p)=", dspPtr(&emitPrevByrefRegs)); printRegMaskInt(emitPrevByrefRegs); - emitDispRegSet(emitPrevByrefRegs); + emitDispGprRegSet(emitPrevByrefRegs); printf("\n emitInitGCrefVars "); dumpConvertedVarSet(emitComp, emitInitGCrefVars); printf("\n emitInitGCrefRegs(0x%p)=", dspPtr(&emitInitGCrefRegs)); printRegMaskInt(emitInitGCrefRegs); - emitDispRegSet(emitInitGCrefRegs); + emitDispGprRegSet(emitInitGCrefRegs); printf("\n emitInitByrefRegs(0x%p)=", dspPtr(&emitInitByrefRegs)); printRegMaskInt(emitInitByrefRegs); - emitDispRegSet(emitInitByrefRegs); + emitDispGprRegSet(emitInitByrefRegs); printf("\n emitThisGCrefVars "); dumpConvertedVarSet(emitComp, emitThisGCrefVars); printf("\n emitThisGCrefRegs(0x%p)=", dspPtr(&emitThisGCrefRegs)); printRegMaskInt(emitThisGCrefRegs); - emitDispRegSet(emitThisGCrefRegs); + emitDispGprRegSet(emitThisGCrefRegs); printf("\n emitThisByrefRegs(0x%p)=", dspPtr(&emitThisByrefRegs)); printRegMaskInt(emitThisByrefRegs); - emitDispRegSet(emitThisByrefRegs); + emitDispGprRegSet(emitThisByrefRegs); printf("\n\n"); } @@ -7261,7 +7291,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, /* Update the set of live GC ref registers */ { - regMaskTP GCregs = ig->igGCregs; + regMaskGpr GCregs = ig->igGCregs; if (GCregs != emitThisGCrefRegs) { @@ -8770,12 +8800,12 @@ void emitter::emitRecordGCcall(BYTE* codePos, unsigned char callInstrSize) emitDispVarSet(); printf(", gcrefRegs="); printRegMaskInt(emitThisGCrefRegs); - emitDispRegSet(emitThisGCrefRegs); + emitDispGprRegSet(emitThisGCrefRegs); // printRegMaskInt(emitThisGCrefRegs & ~RBM_INTRET & RBM_CALLEE_SAVED); // only display callee-saved // emitDispRegSet (emitThisGCrefRegs & ~RBM_INTRET & RBM_CALLEE_SAVED); // only display callee-saved printf(", byrefRegs="); printRegMaskInt(emitThisByrefRegs); - emitDispRegSet(emitThisByrefRegs); + emitDispGprRegSet(emitThisByrefRegs); // printRegMaskInt(emitThisByrefRegs & ~RBM_INTRET & RBM_CALLEE_SAVED); // only display callee-saved // emitDispRegSet (emitThisByrefRegs & ~RBM_INTRET & RBM_CALLEE_SAVED); // only display callee-saved printf("\n"); @@ -8870,8 +8900,9 @@ void emitter::emitRecordGCcall(BYTE* codePos, unsigned char callInstrSize) * Record a new set of live GC ref registers. */ -void emitter::emitUpdateLiveGCregs(GCtype gcType, regMaskTP regs, BYTE* addr) +void emitter::emitUpdateLiveGCregs(GCtype gcType, regMaskGpr regs, BYTE* addr) { + assert(emitComp->IsGprRegMask(regs)); assert(emitIssuing); // Don't track GC changes in epilogs @@ -8880,14 +8911,14 @@ void emitter::emitUpdateLiveGCregs(GCtype gcType, regMaskTP regs, BYTE* addr) return; } - regMaskTP life; - regMaskTP dead; - regMaskTP chg; + regMaskGpr life; + regMaskGpr dead; + regMaskGpr chg; assert(needsGC(gcType)); - regMaskTP& emitThisXXrefRegs = (gcType == GCT_GCREF) ? emitThisGCrefRegs : emitThisByrefRegs; - regMaskTP& emitThisYYrefRegs = (gcType == GCT_GCREF) ? emitThisByrefRegs : emitThisGCrefRegs; + regMaskGpr& emitThisXXrefRegs = (gcType == GCT_GCREF) ? emitThisGCrefRegs : emitThisByrefRegs; + regMaskGpr& emitThisYYrefRegs = (gcType == GCT_GCREF) ? emitThisByrefRegs : emitThisGCrefRegs; assert(emitThisXXrefRegs != regs); if (emitFullGCinfo) @@ -8908,8 +8939,8 @@ void emitter::emitUpdateLiveGCregs(GCtype gcType, regMaskTP regs, BYTE* addr) do { - regMaskTP bit = genFindLowestBit(chg); - regNumber reg = genRegNumFromMask(bit); + regMaskGpr bit = genFindLowestBit(chg); + regNumber reg = genRegNumFromMask(bit MORE_THAN_64_REG_ARG(TYP_INT)); if (life & bit) { @@ -8941,8 +8972,9 @@ void emitter::emitUpdateLiveGCregs(GCtype gcType, regMaskTP regs, BYTE* addr) * Record the fact that the given register now contains a live GC ref. */ -void emitter::emitGCregLiveSet(GCtype gcType, regMaskTP regMask, BYTE* addr, bool isThis) +void emitter::emitGCregLiveSet(GCtype gcType, regMaskGpr regMask, BYTE* addr, bool isThis) { + assert(emitComp->IsGprRegMask(regMask)); assert(emitIssuing); assert(needsGC(gcType)); @@ -8972,7 +9004,7 @@ void emitter::emitGCregLiveSet(GCtype gcType, regMaskTP regMask, BYTE* addr, boo * Record the fact that the given register no longer contains a live GC ref. */ -void emitter::emitGCregDeadSet(GCtype gcType, regMaskTP regMask, BYTE* addr) +void emitter::emitGCregDeadSet(GCtype gcType, regMaskGpr regMask, BYTE* addr) { assert(emitIssuing); assert(needsGC(gcType)); @@ -9209,10 +9241,10 @@ void emitter::emitGCregLiveUpd(GCtype gcType, regNumber reg, BYTE* addr) assert(needsGC(gcType)); - regMaskTP regMask = genRegMask(reg); + singleRegMask regMask = genRegMask(reg); - regMaskTP& emitThisXXrefRegs = (gcType == GCT_GCREF) ? emitThisGCrefRegs : emitThisByrefRegs; - regMaskTP& emitThisYYrefRegs = (gcType == GCT_GCREF) ? emitThisByrefRegs : emitThisGCrefRegs; + regMaskGpr& emitThisXXrefRegs = (gcType == GCT_GCREF) ? emitThisGCrefRegs : emitThisByrefRegs; + regMaskGpr& emitThisYYrefRegs = (gcType == GCT_GCREF) ? emitThisByrefRegs : emitThisGCrefRegs; if ((emitThisXXrefRegs & regMask) == 0) { @@ -9249,7 +9281,7 @@ void emitter::emitGCregLiveUpd(GCtype gcType, regNumber reg, BYTE* addr) * Record the fact that the given set of registers no longer contain live GC refs. */ -void emitter::emitGCregDeadUpdMask(regMaskTP regs, BYTE* addr) +void emitter::emitGCregDeadUpdMask(regMaskGpr regs, BYTE* addr) { assert(emitIssuing); @@ -9261,7 +9293,7 @@ void emitter::emitGCregDeadUpdMask(regMaskTP regs, BYTE* addr) // First, handle the gcref regs going dead - regMaskTP gcrefRegs = emitThisGCrefRegs & regs; + regMaskGpr gcrefRegs = emitThisGCrefRegs & regs; // "this" can never go dead in synchronized methods, except in the epilog // after the call to CORINFO_HELP_MON_EXIT. @@ -9281,7 +9313,7 @@ void emitter::emitGCregDeadUpdMask(regMaskTP regs, BYTE* addr) // Second, handle the byref regs going dead - regMaskTP byrefRegs = emitThisByrefRegs & regs; + regMaskGpr byrefRegs = emitThisByrefRegs & regs; if (byrefRegs) { @@ -9311,7 +9343,7 @@ void emitter::emitGCregDeadUpd(regNumber reg, BYTE* addr) return; } - regMaskTP regMask = genRegMask(reg); + singleRegMask regMask = genRegMask(reg); if ((emitThisGCrefRegs & regMask) != 0) { @@ -10010,7 +10042,7 @@ void emitter::emitStackPopLargeStk(BYTE* addr, bool isCall, unsigned char callIn // of callee-saved registers only). for (unsigned calleeSavedRegIdx = 0; calleeSavedRegIdx < CNT_CALLEE_SAVED; calleeSavedRegIdx++) { - regMaskTP calleeSavedRbm = raRbmCalleeSaveOrder[calleeSavedRegIdx]; + regMaskGpr calleeSavedRbm = raRbmCalleeSaveOrder[calleeSavedRegIdx]; if (emitThisGCrefRegs & calleeSavedRbm) { gcrefRegs |= (1 << calleeSavedRegIdx); @@ -10327,7 +10359,7 @@ const char* emitter::emitOffsetToLabel(unsigned offs) // Return value: // the saved set of registers. // -regMaskTP emitter::emitGetGCRegsSavedOrModified(CORINFO_METHOD_HANDLE methHnd) +regMaskGpr emitter::emitGetGCRegsSavedOrModified(CORINFO_METHOD_HANDLE methHnd) { // Is it a helper with a special saved set? bool isNoGCHelper = emitNoGChelper(methHnd); @@ -10336,14 +10368,14 @@ regMaskTP emitter::emitGetGCRegsSavedOrModified(CORINFO_METHOD_HANDLE methHnd) CorInfoHelpFunc helpFunc = Compiler::eeGetHelperNum(methHnd); // Get the set of registers that this call kills and remove it from the saved set. - regMaskTP savedSet = RBM_ALLINT & ~emitGetGCRegsKilledByNoGCCall(helpFunc); + regMaskGpr savedSet = RBM_ALLINT & ~emitGetGCRegsKilledByNoGCCall(helpFunc); #ifdef DEBUG if (emitComp->verbose) { printf("NoGC Call: savedSet="); printRegMaskInt(savedSet); - emitDispRegSet(savedSet); + emitDispGprRegSet(savedSet); printf("\n"); } #endif @@ -10352,7 +10384,7 @@ regMaskTP emitter::emitGetGCRegsSavedOrModified(CORINFO_METHOD_HANDLE methHnd) else { // This is the saved set of registers after a normal call. - return RBM_CALLEE_SAVED; + return RBM_INT_CALLEE_SAVED; } } @@ -10373,7 +10405,7 @@ regMaskTP emitter::emitGetGCRegsSavedOrModified(CORINFO_METHOD_HANDLE methHnd) // Return Value: // Mask of GC register kills // -regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper) +RegBitSet64 emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper) { assert(emitNoGChelper(helper)); regMaskTP result; @@ -10424,7 +10456,7 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper) // compHelperCallKillSet returns a superset of the registers which values are not guaranteed to be the same // after the call, if a register loses its GC or byref it has to be in the compHelperCallKillSet set as well. - assert((result & emitComp->compHelperCallKillSet(helper)) == result); + assert((result & emitComp->compHelperCallKillSet(helper).GetGprFloatCombinedMask()) == result); return result; } diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 40a729dd70fee2..d9a4b276118d50 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -57,7 +57,7 @@ void emitterStats(FILE* fout); void emitterStaticStats(FILE* fout); // Static stats about the emitter (data structure offsets, sizes, etc.) #endif -void printRegMaskInt(regMaskTP mask); +void printRegMaskInt(regMaskGpr mask); /*****************************************************************************/ /* Forward declarations */ @@ -250,11 +250,11 @@ struct insPlaceholderGroupData insGroup* igPhNext; BasicBlock* igPhBB; VARSET_TP igPhInitGCrefVars; - regMaskTP igPhInitGCrefRegs; - regMaskTP igPhInitByrefRegs; + regMaskGpr igPhInitGCrefRegs; + regMaskGpr igPhInitByrefRegs; VARSET_TP igPhPrevGCrefVars; - regMaskTP igPhPrevGCrefRegs; - regMaskTP igPhPrevByrefRegs; + regMaskGpr igPhPrevGCrefRegs; + regMaskGpr igPhPrevByrefRegs; insGroupPlaceholderType igPhType; }; // end of struct insPlaceholderGroupData @@ -323,9 +323,7 @@ struct insGroup // Try to do better packing based on how large regMaskSmall is (8, 16, or 64 bits). -#if !(REGMASK_BITS <= 32) regMaskSmall igGCregs; // set of registers with live GC refs -#endif // !(REGMASK_BITS <= 32) union { @@ -343,10 +341,6 @@ struct insGroup unsigned igStkLvl; // stack level on entry #endif // EMIT_TRACK_STACK_DEPTH -#if REGMASK_BITS <= 32 - regMaskSmall igGCregs; // set of registers with live GC refs -#endif // REGMASK_BITS <= 32 - unsigned char igInsCnt; // # of instructions in this group VARSET_VALRET_TP igGCvars() const @@ -758,7 +752,7 @@ class emitter // x86: 38 bits // amd64: 38 bits // arm: 32 bits - // arm64: 44 bits + // arm64: 46 bits // loongarch64: 28 bits // risc-v: 28 bits @@ -828,7 +822,7 @@ class emitter // x86: 48 bits // amd64: 48 bits // arm: 48 bits - // arm64: 53 bits + // arm64: 55 bits // loongarch64: 46 bits // risc-v: 46 bits @@ -840,7 +834,7 @@ class emitter #if defined(TARGET_ARM) #define ID_EXTRA_BITFIELD_BITS (16) #elif defined(TARGET_ARM64) -#define ID_EXTRA_BITFIELD_BITS (21) +#define ID_EXTRA_BITFIELD_BITS (23) #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #define ID_EXTRA_BITFIELD_BITS (14) #elif defined(TARGET_XARCH) @@ -881,7 +875,7 @@ class emitter // x86: 54/50 bits // amd64: 55/50 bits // arm: 54/50 bits - // arm64: 60/55 bits + // arm64: 62/57 bits // loongarch64: 53/48 bits // risc-v: 53/48 bits @@ -897,7 +891,7 @@ class emitter // x86: 10/14 bits // amd64: 9/14 bits // arm: 10/14 bits - // arm64: 4/9 bits + // arm64: 2/7 bits // loongarch64: 11/16 bits // risc-v: 11/16 bits @@ -2140,11 +2134,11 @@ class emitter { instrDescCGCA() = delete; - VARSET_TP idcGCvars; // ... updated GC vars or - ssize_t idcDisp; // ... big addrmode disp - regMaskTP idcGcrefRegs; // ... gcref registers - regMaskTP idcByrefRegs; // ... byref registers - unsigned idcArgCnt; // ... lots of args or (<0 ==> caller pops args) + VARSET_TP idcGCvars; // ... updated GC vars or + ssize_t idcDisp; // ... big addrmode disp + regMaskGpr idcGcrefRegs; // ... gcref registers + regMaskGpr idcByrefRegs; // ... byref registers + unsigned idcArgCnt; // ... lots of args or (<0 ==> caller pops args) #if MULTIREG_HAS_SECOND_GC_RET // This method handle the GC-ness of the second register in a 2 register returned struct on System V. @@ -2257,11 +2251,11 @@ class emitter VARSET_TP debugPrevGCrefVars; VARSET_TP debugThisGCrefVars; regPtrDsc* debugPrevRegPtrDsc; - regMaskTP debugPrevGCrefRegs; - regMaskTP debugPrevByrefRegs; + regMaskGpr debugPrevGCrefRegs; + regMaskGpr debugPrevByrefRegs; void emitDispInsIndent(); void emitDispGCDeltaTitle(const char* title); - void emitDispGCRegDelta(const char* title, regMaskTP prevRegs, regMaskTP curRegs); + void emitDispGCRegDelta(const char* title, regMaskGpr prevRegs, regMaskGpr curRegs); void emitDispGCVarDelta(); void emitDispRegPtrListDelta(); void emitDispGCInfoDelta(); @@ -2476,18 +2470,24 @@ class emitter private: #if defined(TARGET_AMD64) - regMaskTP rbmFltCalleeTrash; + regMaskFloat rbmAllFloat; + regMaskFloat rbmFltCalleeTrash; + + FORCEINLINE regMaskFloat get_RBM_ALLFLOAT() const + { + return this->rbmAllFloat; + } - FORCEINLINE regMaskTP get_RBM_FLT_CALLEE_TRASH() const + FORCEINLINE regMaskFloat get_RBM_FLT_CALLEE_TRASH() const { return this->rbmFltCalleeTrash; } #endif // TARGET_AMD64 #if defined(TARGET_XARCH) - regMaskTP rbmMskCalleeTrash; + regMaskPredicate rbmMskCalleeTrash; - FORCEINLINE regMaskTP get_RBM_MSK_CALLEE_TRASH() const + FORCEINLINE regMaskPredicate get_RBM_MSK_CALLEE_TRASH() const { return this->rbmMskCalleeTrash; } @@ -2650,13 +2650,13 @@ class emitter // in that tracking. See emitSavIG(): the important use of ByrefRegs is commented // out, and GCrefRegs is always saved. - VARSET_TP emitPrevGCrefVars; - regMaskTP emitPrevGCrefRegs; - regMaskTP emitPrevByrefRegs; + VARSET_TP emitPrevGCrefVars; + regMaskGpr emitPrevGCrefRegs; + regMaskGpr emitPrevByrefRegs; - VARSET_TP emitInitGCrefVars; - regMaskTP emitInitGCrefRegs; - regMaskTP emitInitByrefRegs; + VARSET_TP emitInitGCrefVars; + regMaskGpr emitInitGCrefRegs; + regMaskGpr emitInitByrefRegs; // If this is set, we ignore comparing emitPrev* and emitInit* to determine // whether to save GC state (to save space in the IG), and always save it. @@ -2673,9 +2673,9 @@ class emitter // really the only one used; the others seem to be calculated, but not // used due to bugs. - VARSET_TP emitThisGCrefVars; - regMaskTP emitThisGCrefRegs; // Current set of registers holding GC references - regMaskTP emitThisByrefRegs; // Current set of registers holding BYREF references + VARSET_TP emitThisGCrefVars; + regMaskGpr emitThisGCrefRegs; // Current set of registers holding GC references + regMaskGpr emitThisByrefRegs; // Current set of registers holding BYREF references bool emitThisGCrefVset; // Is "emitThisGCrefVars" up to date? @@ -2685,7 +2685,7 @@ class emitter void emitSetSecondRetRegGCType(instrDescCGCA* id, emitAttr secondRetSize); #endif // MULTIREG_HAS_SECOND_GC_RET - static void emitEncodeCallGCregs(regMaskTP regs, instrDesc* id); + static void emitEncodeCallGCregs(regMaskGpr regs, instrDesc* id); static unsigned emitDecodeCallGCregs(instrDesc* id); unsigned emitNxtIGnum; @@ -2857,9 +2857,9 @@ class emitter // Mark this instruction group as having a label; return the new instruction group. // Sets the emitter's record of the currently live GC variables // and registers. - void* emitAddLabel(VARSET_VALARG_TP GCvars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs DEBUG_ARG(BasicBlock* block = nullptr)); + void* emitAddLabel(VARSET_VALARG_TP GCvars, + regMaskGpr gcrefRegs, + regMaskGpr byrefRegs DEBUG_ARG(BasicBlock* block = nullptr)); // Same as above, except the label is added and is conceptually "inline" in // the current block. Thus it extends the previous block and the emitter @@ -3140,10 +3140,10 @@ class emitter bool emitFullGCinfo; // full GC pointer maps? bool emitFullyInt; // fully interruptible code? - regMaskTP emitGetGCRegsSavedOrModified(CORINFO_METHOD_HANDLE methHnd); + regMaskGpr emitGetGCRegsSavedOrModified(CORINFO_METHOD_HANDLE methHnd); // Gets a register mask that represent the kill set for a NoGC helper call. - regMaskTP emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper); + RegBitSet64 emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper); #if EMIT_TRACK_STACK_DEPTH unsigned emitCntStackDepth; // 0 in prolog/epilog, One DWORD elsewhere @@ -3199,19 +3199,25 @@ class emitter /* Liveness of stack variables, and registers */ void emitUpdateLiveGCvars(VARSET_VALARG_TP vars, BYTE* addr); - void emitUpdateLiveGCregs(GCtype gcType, regMaskTP regs, BYTE* addr); + void emitUpdateLiveGCregs(GCtype gcType, regMaskGpr regs, BYTE* addr); #ifdef DEBUG const char* emitGetFrameReg(); - void emitDispRegSet(regMaskTP regs); - void emitDispVarSet(); + void emitDispRegSet(regNumber firstReg, regNumber lastReg, regMaskOnlyOne regs); + void emitDispGprRegSet(regMaskGpr regs); + void emitDispFloatRegSet(regMaskFloat regs); +#ifdef FEATURE_MASKED_HW_INTRINSICS + void emitDispMaskRegSet(regMaskPredicate regs); +#endif // FEATURE_MASKED_HW_INTRINSICS + void emitDispRegSet(CONSTREF_AllRegsMask regs); + void emitDispVarSet(); #endif void emitGCregLiveUpd(GCtype gcType, regNumber reg, BYTE* addr); - void emitGCregLiveSet(GCtype gcType, regMaskTP mask, BYTE* addr, bool isThis); - void emitGCregDeadUpdMask(regMaskTP, BYTE* addr); + void emitGCregLiveSet(GCtype gcType, regMaskGpr mask, BYTE* addr, bool isThis); + void emitGCregDeadUpdMask(regMaskGpr, BYTE* addr); void emitGCregDeadUpd(regNumber reg, BYTE* addr); - void emitGCregDeadSet(GCtype gcType, regMaskTP mask, BYTE* addr); + void emitGCregDeadSet(GCtype gcType, regMaskGpr mask, BYTE* addr); void emitGCvarLiveUpd(int offs, int varNum, GCtype gcType, BYTE* addr DEBUG_ARG(unsigned actualVarNum)); void emitGCvarLiveSet(int offs, GCtype gcType, BYTE* addr, ssize_t disp = -1); diff --git a/src/coreclr/jit/emitarm.cpp b/src/coreclr/jit/emitarm.cpp index 5a20f8a1f940ad..85b05ad801f3e3 100644 --- a/src/coreclr/jit/emitarm.cpp +++ b/src/coreclr/jit/emitarm.cpp @@ -4670,8 +4670,8 @@ void emitter::emitIns_Call(EmitCallType callType, int argSize, emitAttr retSize, VARSET_VALARG_TP ptrVars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, + regMaskGpr gcrefRegs, + regMaskGpr byrefRegs, const DebugInfo& di /* = DebugInfo() */, regNumber ireg /* = REG_NA */, regNumber xreg /* = REG_NA */, @@ -4680,7 +4680,8 @@ void emitter::emitIns_Call(EmitCallType callType, bool isJump /* = false */) { /* Sanity check the arguments depending on callType */ - + assert(emitComp->IsGprRegMask(gcrefRegs)); + assert(emitComp->IsGprRegMask(byrefRegs)); assert(callType < EC_COUNT); assert((callType != EC_FUNC_TOKEN) || (addr != nullptr && ireg == REG_NA)); assert(callType != EC_INDIR_R || (addr == nullptr && ireg < REG_COUNT)); @@ -4693,9 +4694,9 @@ void emitter::emitIns_Call(EmitCallType callType, assert((unsigned)abs(argSize) <= codeGen->genStackLevel); // Trim out any callee-trashed registers from the live set. - regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd); - gcrefRegs &= savedSet; - byrefRegs &= savedSet; + AllRegsMask savedSet = emitGetGCRegsSavedOrModified(methHnd); + gcrefRegs &= savedSet.gprRegs(); + byrefRegs &= savedSet.gprRegs(); #ifdef DEBUG if (EMIT_GC_VERBOSE) @@ -4704,10 +4705,10 @@ void emitter::emitIns_Call(EmitCallType callType, dumpConvertedVarSet(emitComp, ptrVars); printf(", gcrefRegs="); printRegMaskInt(gcrefRegs); - emitDispRegSet(gcrefRegs); + emitDispGprRegSet(gcrefRegs); printf(", byrefRegs="); printRegMaskInt(byrefRegs); - emitDispRegSet(byrefRegs); + emitDispGprRegSet(byrefRegs); printf("\n"); } #endif @@ -5762,10 +5763,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) switch (fmt) { - int imm; - BYTE* addr; - regMaskTP gcrefRegs; - regMaskTP byrefRegs; + int imm; + BYTE* addr; + regMaskGpr gcrefRegs; + regMaskGpr byrefRegs; case IF_T1_A: // T1_A ................ sz = SMALL_IDSC_SIZE; @@ -6667,11 +6668,11 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) printf("Before emitOutputInstr for id->idDebugOnlyInfo()->idNum=0x%02x\n", id->idDebugOnlyInfo()->idNum); printf(" emitThisGCrefRegs(0x%p)=", dspPtr(&emitThisGCrefRegs)); printRegMaskInt(emitThisGCrefRegs); - emitDispRegSet(emitThisGCrefRegs); + emitDispGprRegSet(emitThisGCrefRegs); printf("\n"); printf(" emitThisByrefRegs(0x%p)=", dspPtr(&emitThisByrefRegs)); printRegMaskInt(emitThisByrefRegs); - emitDispRegSet(emitThisByrefRegs); + emitDispGprRegSet(emitThisByrefRegs); printf("\n"); } diff --git a/src/coreclr/jit/emitarm.h b/src/coreclr/jit/emitarm.h index 6ae0c57dea6d26..83541a36427516 100644 --- a/src/coreclr/jit/emitarm.h +++ b/src/coreclr/jit/emitarm.h @@ -65,10 +65,10 @@ void emitDispInsHelp(instrDesc* id, private: instrDesc* emitNewInstrCallDir( - int argCnt, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize); + int argCnt, VARSET_VALARG_TP GCvars, regMaskGpr gcrefRegs, regMaskGpr byrefRegs, emitAttr retSize); instrDesc* emitNewInstrCallInd( - int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize); + int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, regMaskGpr gcrefRegs, regMaskGpr byrefRegs, emitAttr retSize); /************************************************************************/ /* Private helpers for instruction output */ @@ -328,8 +328,8 @@ void emitIns_Call(EmitCallType callType, int argSize, emitAttr retSize, VARSET_VALARG_TP ptrVars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, + regMaskGpr gcrefRegs, + regMaskGpr byrefRegs, const DebugInfo& di = DebugInfo(), regNumber ireg = REG_NA, regNumber xreg = REG_NA, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 181b9706e41611..b327bccab2972c 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1309,13 +1309,13 @@ emitAttr emitter::emitInsLoadStoreSize(instrDesc* id) // clang-format off static const char * const xRegNames[] = { - #define REGDEF(name, rnum, mask, xname, wname) xname, + #define REGDEF(name, rnum, mask, xname, wname, regTypeTag) xname, #include "register.h" }; static const char * const wRegNames[] = { - #define REGDEF(name, rnum, mask, xname, wname) wname, + #define REGDEF(name, rnum, mask, xname, wname, regTypeTag) wname, #include "register.h" }; @@ -8944,8 +8944,8 @@ void emitter::emitIns_Call(EmitCallType callType, emitAttr retSize, emitAttr secondRetSize, VARSET_VALARG_TP ptrVars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, + regMaskGpr gcrefRegs, + regMaskGpr byrefRegs, const DebugInfo& di /* = DebugInfo() */, regNumber ireg /* = REG_NA */, regNumber xreg /* = REG_NA */, @@ -8954,7 +8954,8 @@ void emitter::emitIns_Call(EmitCallType callType, bool isJump /* = false */) { /* Sanity check the arguments depending on callType */ - + assert(emitComp->IsGprRegMask(gcrefRegs)); + assert(emitComp->IsGprRegMask(byrefRegs)); assert(callType < EC_COUNT); assert((callType != EC_FUNC_TOKEN) || (addr != nullptr && ireg == REG_NA)); assert(callType != EC_INDIR_R || (addr == nullptr && ireg < REG_COUNT)); @@ -8967,7 +8968,7 @@ void emitter::emitIns_Call(EmitCallType callType, assert((unsigned)std::abs(argSize) <= codeGen->genStackLevel); // Trim out any callee-trashed registers from the live set. - regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd); + regMaskGpr savedSet = emitGetGCRegsSavedOrModified(methHnd); gcrefRegs &= savedSet; byrefRegs &= savedSet; @@ -8978,10 +8979,10 @@ void emitter::emitIns_Call(EmitCallType callType, dumpConvertedVarSet(emitComp, ptrVars); printf(", gcrefRegs="); printRegMaskInt(gcrefRegs); - emitDispRegSet(gcrefRegs); + emitDispGprRegSet(gcrefRegs); printf(", byrefRegs="); printRegMaskInt(byrefRegs); - emitDispRegSet(byrefRegs); + emitDispGprRegSet(byrefRegs); printf("\n"); } #endif @@ -10651,8 +10652,8 @@ BYTE* emitter::emitOutputVectorConstant( unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code) { const unsigned char callInstrSize = sizeof(code_t); // 4 bytes - regMaskTP gcrefRegs; - regMaskTP byrefRegs; + regMaskGpr gcrefRegs; + regMaskGpr byrefRegs; VARSET_TP GCvars(VarSetOps::UninitVal()); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index cc3254c06810ab..68ec1621485770 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -95,16 +95,16 @@ void emitDispSvePrfop(insSvePrfop prfop, bool addComma); private: instrDesc* emitNewInstrCallDir(int argCnt, VARSET_VALARG_TP GCvars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, + regMaskGpr gcrefRegs, + regMaskGpr byrefRegs, emitAttr retSize, emitAttr secondRetSize); instrDesc* emitNewInstrCallInd(int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, + regMaskGpr gcrefRegs, + regMaskGpr byrefRegs, emitAttr retSize, emitAttr secondRetSize); @@ -1175,6 +1175,13 @@ inline static bool isGeneralRegisterOrSP(regNumber reg) return isGeneralRegister(reg) || (reg == REG_SP); } // Includes REG_SP, Excludes REG_ZR +#ifdef FEATURE_MASKED_HW_INTRINSICS +inline static bool isMaskReg(regNumber reg) +{ + return (reg >= REG_MASK_FIRST && reg <= REG_MASK_LAST); +} +#endif // FEATURE_MASKED_HW_INTRINSICS + inline static bool isVectorRegister(regNumber reg) { return (reg >= REG_FP_FIRST && reg <= REG_FP_LAST); @@ -1738,8 +1745,8 @@ void emitIns_Call(EmitCallType callType, emitAttr retSize, emitAttr secondRetSize, VARSET_VALARG_TP ptrVars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, + regMaskGpr gcrefRegs, + regMaskGpr byrefRegs, const DebugInfo& di, regNumber ireg, regNumber xreg, diff --git a/src/coreclr/jit/emitinl.h b/src/coreclr/jit/emitinl.h index 66a33b813d58fa..43e68885fac8b9 100644 --- a/src/coreclr/jit/emitinl.h +++ b/src/coreclr/jit/emitinl.h @@ -211,7 +211,7 @@ inline ssize_t emitter::emitGetInsAmdAny(const instrDesc* id) const * * Convert between a register mask and a smaller version for storage. */ -/*static*/ inline void emitter::emitEncodeCallGCregs(regMaskTP regmask, instrDesc* id) +/*static*/ inline void emitter::emitEncodeCallGCregs(regMaskGpr regmask, instrDesc* id) { unsigned encodeMask; diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp index c69ea7c5a36e6f..0bd387ae5d4ad0 100644 --- a/src/coreclr/jit/emitloongarch64.cpp +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -3887,7 +3887,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) // clang-format off static const char* const RegNames[] = { - #define REGDEF(name, rnum, mask, sname) sname, + #define REGDEF(name, rnum, mask, sname, regTypeTag) sname, #include "register.h" }; // clang-format on diff --git a/src/coreclr/jit/emitpub.h b/src/coreclr/jit/emitpub.h index bf15ba33667cac..46fe21ed7b394d 100644 --- a/src/coreclr/jit/emitpub.h +++ b/src/coreclr/jit/emitpub.h @@ -53,8 +53,8 @@ void emitEndProlog(); void emitCreatePlaceholderIG(insGroupPlaceholderType igType, BasicBlock* igBB, VARSET_VALARG_TP GCvars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, + regMaskGpr gcrefRegs, + regMaskGpr byrefRegs, bool last); void emitGeneratePrologEpilog(); diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index 71fd3e323d518c..03eea8e8e29bcf 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -3413,7 +3413,7 @@ void emitter::emitDispIllegalInstruction(code_t instructionCode) // clang-format off static const char* const RegNames[] = { - #define REGDEF(name, rnum, mask, sname) sname, + #define REGDEF(name, rnum, mask, sname, regTypeTag) sname, #include "register.h" }; // clang-format on diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 6bf148cf2d8883..321b2ffb9dd1bb 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -9488,8 +9488,8 @@ void emitter::emitIns_Call(EmitCallType callType, emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), VARSET_VALARG_TP ptrVars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, + regMaskGpr gcrefRegs, + regMaskGpr byrefRegs, const DebugInfo& di, regNumber ireg, regNumber xreg, @@ -9499,6 +9499,10 @@ void emitter::emitIns_Call(EmitCallType callType, // clang-format on { /* Sanity check the arguments depending on callType */ + assert(emitComp->IsGprRegMask(gcrefRegs)); + assert(emitComp->IsGprRegMask(byrefRegs)); + assert(emitComp->IsGprRegMask(gcrefRegs)); + assert(emitComp->IsGprRegMask(byrefRegs)); assert(callType < EC_COUNT); if (!emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI)) @@ -9514,7 +9518,7 @@ void emitter::emitIns_Call(EmitCallType callType, assert((unsigned)abs((signed)argSize) <= codeGen->genStackLevel); // Trim out any callee-trashed registers from the live set. - regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd); + regMaskGpr savedSet = emitGetGCRegsSavedOrModified(methHnd); gcrefRegs &= savedSet; byrefRegs &= savedSet; @@ -9525,10 +9529,10 @@ void emitter::emitIns_Call(EmitCallType callType, dumpConvertedVarSet(emitComp, ptrVars); printf(", gcrefRegs="); printRegMaskInt(gcrefRegs); - emitDispRegSet(gcrefRegs); + emitDispGprRegSet(gcrefRegs); printf(", byrefRegs="); printRegMaskInt(byrefRegs); - emitDispRegSet(byrefRegs); + emitDispGprRegSet(byrefRegs); printf("\n"); } #endif @@ -10218,7 +10222,7 @@ const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName) con const char* emitter::emitXMMregName(unsigned reg) const { static const char* const regNames[] = { -#define REGDEF(name, rnum, mask, sname) "x" sname, +#define REGDEF(name, rnum, mask, sname, regTypeTag) "x" sname, #include "register.h" }; @@ -10236,7 +10240,7 @@ const char* emitter::emitXMMregName(unsigned reg) const const char* emitter::emitYMMregName(unsigned reg) const { static const char* const regNames[] = { -#define REGDEF(name, rnum, mask, sname) "y" sname, +#define REGDEF(name, rnum, mask, sname, regTypeTag) "y" sname, #include "register.h" }; @@ -10254,7 +10258,7 @@ const char* emitter::emitYMMregName(unsigned reg) const const char* emitter::emitZMMregName(unsigned reg) const { static const char* const regNames[] = { -#define REGDEF(name, rnum, mask, sname) "z" sname, +#define REGDEF(name, rnum, mask, sname, regTypeTag) "z" sname, #include "register.h" }; @@ -14677,7 +14681,7 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) case IF_RRW: { #ifdef DEBUG - regMaskTP regMask = genRegMask(reg); + singleRegMask regMask = genRegMask(reg); #endif if (id->idGCref()) { @@ -15020,8 +15024,9 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) // instruction, if writing a GC ref even through reading a long, will go live here. // These situations typically occur due to unsafe casting, such as with Span. - regMaskTP regMask; + regMaskGpr regMask; regMask = genRegMask(reg1) | genRegMask(reg2); + assert(emitComp->IsGprRegMask(regMask)); // r1/r2 could have been a GCREF as GCREF + int=BYREF // or BYREF+/-int=BYREF @@ -15520,7 +15525,7 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id) assert(id->idGCref() == GCT_BYREF); #ifdef DEBUG - regMaskTP regMask; + regMaskGpr regMask; regMask = genRegMask(reg); // FIXNOW review the other places and relax the assert there too @@ -16343,8 +16348,8 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) BYTE* addr; bool recCall; - regMaskTP gcrefRegs; - regMaskTP byrefRegs; + regMaskGpr gcrefRegs; + regMaskGpr byrefRegs; /********************************************************************/ /* No operands */ @@ -17910,11 +17915,11 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) printf("Before emitOutputInstr for id->idDebugOnlyInfo()->idNum=0x%02x\n", id->idDebugOnlyInfo()->idNum); printf(" emitThisGCrefRegs(0x%p)=", emitComp->dspPtr(&emitThisGCrefRegs)); printRegMaskInt(emitThisGCrefRegs); - emitDispRegSet(emitThisGCrefRegs); + emitDispGprRegSet(emitThisGCrefRegs); printf("\n"); printf(" emitThisByrefRegs(0x%p)=", emitComp->dspPtr(&emitThisByrefRegs)); printRegMaskInt(emitThisByrefRegs); - emitDispRegSet(emitThisByrefRegs); + emitDispGprRegSet(emitThisByrefRegs); printf("\n"); } @@ -17943,7 +17948,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) // The target of the 3-operand imul is implicitly encoded. Make sure // that we detected the implicit register and cleared its GC-status. - regMaskTP regMask = genRegMask(inst3opImulReg(ins)); + singleRegMask regMask = genRegMask(inst3opImulReg(ins)); assert((regMask & (emitThisGCrefRegs | emitThisByrefRegs)) == 0); } diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index e32cab66254fe8..fa48eeb53f3a2c 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -23,10 +23,12 @@ inline static bool isDoubleReg(regNumber reg) return isFloatReg(reg); } +#ifdef FEATURE_MASKED_HW_INTRINSICS inline static bool isMaskReg(regNumber reg) { return (reg >= REG_MASK_FIRST && reg <= REG_MASK_LAST); } +#endif // FEATURE_MASKED_HW_INTRINSICS inline static bool isHighSimdReg(regNumber reg) { @@ -534,15 +536,15 @@ instrDesc* emitNewInstrAmdCns(emitAttr attr, ssize_t dsp, int cns); instrDesc* emitNewInstrCallDir(int argCnt, VARSET_VALARG_TP GCvars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, + regMaskGpr gcrefRegs, + regMaskGpr byrefRegs, emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); instrDesc* emitNewInstrCallInd(int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, + regMaskGpr gcrefRegs, + regMaskGpr byrefRegs, emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); void emitGetInsCns(const instrDesc* id, CnsVal* cv) const; @@ -912,8 +914,8 @@ void emitIns_Call(EmitCallType callType, emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), VARSET_VALARG_TP ptrVars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, + regMaskGpr gcrefRegs, + regMaskGpr byrefRegs, const DebugInfo& di = DebugInfo(), regNumber ireg = REG_NA, regNumber xreg = REG_NA, diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp index e21fe864984ef7..25ee06d61b8085 100644 --- a/src/coreclr/jit/gcencode.cpp +++ b/src/coreclr/jit/gcencode.cpp @@ -4471,8 +4471,8 @@ void GCInfo::gcMakeRegPtrTable( assert(call->u1.cdArgMask == 0 && call->cdArgCnt == 0); // Other than that, we just have to deal with the regmasks. - regMaskSmall gcrefRegMask = call->cdGCrefRegs & RBM_CALLEE_SAVED; - regMaskSmall byrefRegMask = call->cdByrefRegs & RBM_CALLEE_SAVED; + regMaskSmall gcrefRegMask = call->cdGCrefRegs & RBM_INT_CALLEE_SAVED; + regMaskSmall byrefRegMask = call->cdByrefRegs & RBM_INT_CALLEE_SAVED; assert((gcrefRegMask & byrefRegMask) == 0); @@ -4620,7 +4620,7 @@ void GCInfo::gcInfoRecordGCRegStateChange(GcInfoEncoder* gcInfoEncoder, while (regMask) { // Get hold of the next register bit. - regMaskTP tmpMask = genFindLowestBit(regMask); + regMaskGpr tmpMask = genFindLowestBit(regMask); assert(tmpMask); // Remember the new state of this register. @@ -4637,7 +4637,7 @@ void GCInfo::gcInfoRecordGCRegStateChange(GcInfoEncoder* gcInfoEncoder, } // Figure out which register the next bit corresponds to. - regNumber regNum = genRegNumFromMask(tmpMask); + regNumber regNum = genRegNumFromMask(tmpMask MORE_THAN_64_REG_ARG(TYP_INT)); /* Reserve SP future use */ assert(regNum != REG_SPBASE); diff --git a/src/coreclr/jit/gcinfo.cpp b/src/coreclr/jit/gcinfo.cpp index ff534a0afcbf21..e77446952647d5 100644 --- a/src/coreclr/jit/gcinfo.cpp +++ b/src/coreclr/jit/gcinfo.cpp @@ -84,7 +84,7 @@ void GCInfo::gcResetForBB() * Print the changes in the gcRegGCrefSetCur sets. */ -void GCInfo::gcDspGCrefSetChanges(regMaskTP gcRegGCrefSetNew DEBUGARG(bool forceOutput)) +void GCInfo::gcDspGCrefSetChanges(regMaskGpr gcRegGCrefSetNew DEBUGARG(bool forceOutput)) { if (compiler->verbose) { @@ -98,11 +98,11 @@ void GCInfo::gcDspGCrefSetChanges(regMaskTP gcRegGCrefSetNew DEBUGARG(bool force else { printRegMaskInt(gcRegGCrefSetCur); - compiler->GetEmitter()->emitDispRegSet(gcRegGCrefSetCur); + compiler->GetEmitter()->emitDispGprRegSet(gcRegGCrefSetCur); printf(" => "); } printRegMaskInt(gcRegGCrefSetNew); - compiler->GetEmitter()->emitDispRegSet(gcRegGCrefSetNew); + compiler->GetEmitter()->emitDispGprRegSet(gcRegGCrefSetNew); printf("\n"); } } @@ -113,7 +113,7 @@ void GCInfo::gcDspGCrefSetChanges(regMaskTP gcRegGCrefSetNew DEBUGARG(bool force * Print the changes in the gcRegByrefSetCur sets. */ -void GCInfo::gcDspByrefSetChanges(regMaskTP gcRegByrefSetNew DEBUGARG(bool forceOutput)) +void GCInfo::gcDspByrefSetChanges(regMaskGpr gcRegByrefSetNew DEBUGARG(bool forceOutput)) { if (compiler->verbose) { @@ -127,11 +127,11 @@ void GCInfo::gcDspByrefSetChanges(regMaskTP gcRegByrefSetNew DEBUGARG(bool force else { printRegMaskInt(gcRegByrefSetCur); - compiler->GetEmitter()->emitDispRegSet(gcRegByrefSetCur); + compiler->GetEmitter()->emitDispGprRegSet(gcRegByrefSetCur); printf(" => "); } printRegMaskInt(gcRegByrefSetNew); - compiler->GetEmitter()->emitDispRegSet(gcRegByrefSetNew); + compiler->GetEmitter()->emitDispGprRegSet(gcRegByrefSetNew); printf("\n"); } } @@ -145,14 +145,16 @@ void GCInfo::gcDspByrefSetChanges(regMaskTP gcRegByrefSetNew DEBUGARG(bool force * GCref pointer values. */ -void GCInfo::gcMarkRegSetGCref(regMaskTP regMask DEBUGARG(bool forceOutput)) +void GCInfo::gcMarkRegSetGCref(regMaskGpr regMask DEBUGARG(bool forceOutput)) { + assert(compiler->IsGprRegMask(regMask)); + // This set of registers are going to hold REFs. // Make sure they were not holding BYREFs. assert((gcRegByrefSetCur & regMask) == 0); - regMaskTP gcRegByrefSetNew = gcRegByrefSetCur & ~regMask; // Clear it if set in Byref mask - regMaskTP gcRegGCrefSetNew = gcRegGCrefSetCur | regMask; // Set it in GCref mask + regMaskGpr gcRegByrefSetNew = gcRegByrefSetCur & ~regMask; // Clear it if set in Byref mask + regMaskGpr gcRegGCrefSetNew = gcRegGCrefSetCur | regMask; // Set it in GCref mask INDEBUG(gcDspGCrefSetChanges(gcRegGCrefSetNew, forceOutput)); INDEBUG(gcDspByrefSetChanges(gcRegByrefSetNew)); @@ -167,10 +169,12 @@ void GCInfo::gcMarkRegSetGCref(regMaskTP regMask DEBUGARG(bool forceOutput)) * Byref pointer values. */ -void GCInfo::gcMarkRegSetByref(regMaskTP regMask DEBUGARG(bool forceOutput)) +void GCInfo::gcMarkRegSetByref(regMaskGpr regMask DEBUGARG(bool forceOutput)) { - regMaskTP gcRegByrefSetNew = gcRegByrefSetCur | regMask; // Set it in Byref mask - regMaskTP gcRegGCrefSetNew = gcRegGCrefSetCur & ~regMask; // Clear it if set in GCref mask + assert(compiler->IsGprRegMask(regMask)); + + regMaskGpr gcRegByrefSetNew = gcRegByrefSetCur | regMask; // Set it in Byref mask + regMaskGpr gcRegGCrefSetNew = gcRegGCrefSetCur & ~regMask; // Clear it if set in GCref mask INDEBUG(gcDspGCrefSetChanges(gcRegGCrefSetNew)); INDEBUG(gcDspByrefSetChanges(gcRegByrefSetNew, forceOutput)); @@ -179,18 +183,50 @@ void GCInfo::gcMarkRegSetByref(regMaskTP regMask DEBUGARG(bool forceOutput)) gcRegGCrefSetCur = gcRegGCrefSetNew; } +/***************************************************************************** + * + * Mark the gpr register as holding non-pointer values. + * + */ + +void GCInfo::gcMarkGprRegNpt(regNumber reg DEBUGARG(bool forceOutput)) +{ + assert(emitter::isGeneralRegister(reg)); + gcMarkRegSetNpt(genRegMask(reg) DEBUGARG(forceOutput)); +} + +/***************************************************************************** + * + * Mark the register as holding non-pointer values. + * + */ + +void GCInfo::gcMarkRegNpt(regNumber reg DEBUGARG(bool forceOutput)) +{ + if (!emitter::isGeneralRegister(reg)) + { + return; + } + + gcMarkRegSetNpt(genRegMask(reg) DEBUGARG(forceOutput)); +} + /***************************************************************************** * * Mark the set of registers given by the specified mask as holding * non-pointer values. */ -void GCInfo::gcMarkRegSetNpt(regMaskTP regMask DEBUGARG(bool forceOutput)) +void GCInfo::gcMarkRegSetNpt(regMaskGpr regMask DEBUGARG(bool forceOutput)) { + // We only care about gpr registers because those are the ones that hold + // gc pointers. + assert(compiler->IsGprRegMask(regMask)); + /* NOTE: don't unmark any live register variables */ - regMaskTP gcRegByrefSetNew = gcRegByrefSetCur & ~(regMask & ~regSet->GetMaskVars()); - regMaskTP gcRegGCrefSetNew = gcRegGCrefSetCur & ~(regMask & ~regSet->GetMaskVars()); + regMaskGpr gcRegByrefSetNew = gcRegByrefSetCur & ~(regMask & ~regSet->GetGprMaskVars()); + regMaskGpr gcRegGCrefSetNew = gcRegGCrefSetCur & ~(regMask & ~regSet->GetGprMaskVars()); INDEBUG(gcDspGCrefSetChanges(gcRegGCrefSetNew, forceOutput)); INDEBUG(gcDspByrefSetChanges(gcRegByrefSetNew, forceOutput)); @@ -206,8 +242,12 @@ void GCInfo::gcMarkRegSetNpt(regMaskTP regMask DEBUGARG(bool forceOutput)) void GCInfo::gcMarkRegPtrVal(regNumber reg, var_types type) { - regMaskTP regMask = genRegMask(reg); + if (!emitter::isGeneralRegister(reg)) + { + return; + } + singleRegMask regMask = genRegMask(reg); switch (type) { case TYP_REF: @@ -700,6 +740,7 @@ void GCInfo::gcRegPtrSetInit() #endif // JIT32_GCENCODER +#if 0 //------------------------------------------------------------------------ // gcUpdateForRegVarMove: Update the masks when a variable is moved // @@ -716,7 +757,7 @@ void GCInfo::gcRegPtrSetInit() // It is also called by LinearScan::recordVarLocationAtStartOfBB() which is in turn called by // CodeGen::genCodeForBBList() at the block boundary. -void GCInfo::gcUpdateForRegVarMove(regMaskTP srcMask, regMaskTP dstMask, LclVarDsc* varDsc) +void GCInfo::gcUpdateForRegVarMove(regMaskOnlyOne srcMask, regMaskOnlyOne dstMask, LclVarDsc* varDsc) { var_types type = varDsc->TypeGet(); bool isGCRef = (type == TYP_REF); @@ -766,6 +807,6 @@ void GCInfo::gcUpdateForRegVarMove(regMaskTP srcMask, regMaskTP dstMask, LclVarD VarSetOps::AddElemD(compiler, gcVarPtrSetCur, varDsc->lvVarIndex); } } - +#endif /*****************************************************************************/ /*****************************************************************************/ diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 7e90d26a3d6820..693c051d5f808b 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -1053,14 +1053,14 @@ bool GenTree::NeedsConsecutiveRegisters() const // Return Value: // Reg Mask of GenTree node. // -regMaskTP GenTree::gtGetContainedRegMask() +regMaskGpr GenTree::gtGetContainedRegMask() { if (!isContained()) { return isUsedFromReg() ? gtGetRegMask() : RBM_NONE; } - regMaskTP mask = 0; + regMaskGpr mask = RBM_NONE; for (GenTree* operand : Operands()) { mask |= operand->gtGetContainedRegMask(); @@ -1077,14 +1077,14 @@ regMaskTP GenTree::gtGetContainedRegMask() // Return Value: // Reg Mask of GenTree node. // -regMaskTP GenTree::gtGetRegMask() const +RegBitSet64 GenTree::gtGetRegMask() const { - regMaskTP resultMask; + RegBitSet64 resultMask = RBM_NONE; if (IsMultiRegCall()) { resultMask = genRegMask(GetRegNum()); - resultMask |= AsCall()->GetOtherRegMask(); + resultMask |= AsCall()->GetOtherRegMask().GetGprFloatCombinedMask(); } else if (IsCopyOrReloadOfMultiRegCall()) { @@ -1096,13 +1096,12 @@ regMaskTP GenTree::gtGetRegMask() const const GenTreeCall* call = copyOrReload->gtGetOp1()->AsCall(); const unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount(); - resultMask = RBM_NONE; for (unsigned i = 0; i < regCount; ++i) { regNumber reg = copyOrReload->GetRegNumByIdx(i); if (reg != REG_NA) { - resultMask |= genRegMask(reg); + resultMask |= reg; } } } @@ -1112,12 +1111,11 @@ regMaskTP GenTree::gtGetRegMask() const const GenTreePutArgSplit* splitArg = AsPutArgSplit(); const unsigned regCount = splitArg->gtNumRegs; - resultMask = RBM_NONE; for (unsigned i = 0; i < regCount; ++i) { regNumber reg = splitArg->GetRegNumByIdx(i); assert(reg != REG_NA); - resultMask |= genRegMask(reg); + resultMask |= reg; } } #endif // FEATURE_ARG_SPLIT @@ -1129,6 +1127,81 @@ regMaskTP GenTree::gtGetRegMask() const return resultMask; } +//--------------------------------------------------------------- +// gtGetGprRegMask: Get the gpr reg mask of the node. +// +// Arguments: +// None +// +// Return Value: +// Reg Mask of GenTree node. +// +// Note: This method would populate the reg mask with only the GPR registers. +regMaskGpr GenTree::gtGetGprRegMask() const +{ + regMaskGpr resultMask = RBM_NONE; + + if (IsMultiRegCall()) + { + regNumber reg = GetRegNum(); + resultMask |= -static_cast(!regIndexForRegister(reg)) & genRegMask(reg); + +#if FEATURE_MULTIREG_RET + const GenTreeCall* call = AsCall(); + for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i) + { + regNumber otherReg = (regNumber)call->gtOtherRegs[i]; + if (otherReg != REG_NA) + { + resultMask |= -static_cast(!regIndexForRegister(otherReg)) & genRegMask(otherReg); + continue; + } + break; + } +#endif + } + else if (IsCopyOrReloadOfMultiRegCall()) + { + // A multi-reg copy or reload, will have valid regs for only those + // positions that need to be copied or reloaded. Hence we need + // to consider only those registers for computing reg mask. + + const GenTreeCopyOrReload* copyOrReload = AsCopyOrReload(); + const GenTreeCall* call = copyOrReload->gtGetOp1()->AsCall(); + const unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount(); + + for (unsigned i = 0; i < regCount; ++i) + { + regNumber reg = copyOrReload->GetRegNumByIdx(i); + if (reg != REG_NA) + { + resultMask |= -static_cast(!regIndexForRegister(reg)) & genRegMask(reg); + } + } + } +#if FEATURE_ARG_SPLIT + else if (compFeatureArgSplit() && OperIsPutArgSplit()) + { + const GenTreePutArgSplit* splitArg = AsPutArgSplit(); + const unsigned regCount = splitArg->gtNumRegs; + + for (unsigned i = 0; i < regCount; ++i) + { + regNumber reg = splitArg->GetRegNumByIdx(i); + assert(reg != REG_NA); + resultMask |= -static_cast(!regIndexForRegister(reg)) & genRegMask(reg); + } + } +#endif // FEATURE_ARG_SPLIT + else + { + regNumber reg = GetRegNum(); + resultMask |= -static_cast(!regIndexForRegister(reg)) & genRegMask(reg); + } + + return resultMask; +} + void GenTreeFieldList::AddField(Compiler* compiler, GenTree* node, unsigned offset, var_types type) { m_uses.AddUse(new (compiler, CMK_ASTNode) Use(node, offset, type)); @@ -2155,16 +2228,17 @@ bool GenTreeCall::NeedsVzeroupper(Compiler* comp) // Return Value: // Reg mask of gtOtherRegs of call node. // -regMaskTP GenTreeCall::GetOtherRegMask() const +AllRegsMask GenTreeCall::GetOtherRegMask() const { - regMaskTP resultMask = RBM_NONE; + AllRegsMask resultMask; #if FEATURE_MULTIREG_RET for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i) { - if (gtOtherRegs[i] != REG_NA) + regNumber otherReg = (regNumber)gtOtherRegs[i]; + if (otherReg != REG_NA) { - resultMask |= genRegMask((regNumber)gtOtherRegs[i]); + resultMask |= otherReg; continue; } break; @@ -27568,14 +27642,15 @@ regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx, CorInfoCallConvExtension // of return registers and wants to know the set of return registers. // // static -regMaskTP ReturnTypeDesc::GetABIReturnRegs(CorInfoCallConvExtension callConv) const +AllRegsMask ReturnTypeDesc::GetABIReturnRegs(CorInfoCallConvExtension callConv) const { - regMaskTP resultMask = RBM_NONE; + AllRegsMask resultMask; unsigned count = GetReturnRegCount(); for (unsigned i = 0; i < count; ++i) { - resultMask |= genRegMask(GetABIReturnReg(i, callConv)); + regNumber reg = GetABIReturnReg(i, callConv); + resultMask.AddRegNumInMask(reg); } return resultMask; @@ -27595,7 +27670,7 @@ regMaskTP ReturnTypeDesc::GetABIReturnRegs(CorInfoCallConvExtension callConv) co // Return Value: // Count of available temporary registers in given set. // -unsigned GenTree::AvailableTempRegCount(regMaskTP mask /* = (regMaskTP)-1 */) const +unsigned GenTree::AvailableTempRegCount(regMaskOnlyOne mask /* = (regMaskOnlyOne)-1 */) const { return genCountBits(gtRsvdRegs & mask); } @@ -27612,11 +27687,11 @@ unsigned GenTree::AvailableTempRegCount(regMaskTP mask /* = (regMaskTP)-1 */) co // Return Value: // Available temporary register in given mask. // -regNumber GenTree::GetSingleTempReg(regMaskTP mask /* = (regMaskTP)-1 */) +regNumber GenTree::GetSingleTempReg(regMaskOnlyOne mask /* = (regMaskOnlyOne)-1 */) { - regMaskTP availableSet = gtRsvdRegs & mask; + regMaskOnlyOne availableSet = gtRsvdRegs & mask; assert(genCountBits(availableSet) == 1); - regNumber tempReg = genRegNumFromMask(availableSet); + regNumber tempReg = genRegNumFromMask(availableSet MORE_THAN_64_REG_ARG(TypeGet())); INDEBUG(gtRsvdRegs &= ~availableSet;) // Remove the register from the set, so it can't be used again. return tempReg; } @@ -27633,11 +27708,11 @@ regNumber GenTree::GetSingleTempReg(regMaskTP mask /* = (regMaskTP)-1 */) // Return Value: // Available temporary register in given mask. // -regNumber GenTree::ExtractTempReg(regMaskTP mask /* = (regMaskTP)-1 */) +regNumber GenTree::ExtractTempReg(regMaskOnlyOne mask /* = (regMaskOnlyOne)-1 */) { - regMaskTP availableSet = gtRsvdRegs & mask; + regMaskOnlyOne availableSet = gtRsvdRegs & mask; assert(genCountBits(availableSet) >= 1); - regNumber tempReg = genFirstRegNumFromMask(availableSet); + regNumber tempReg = genFirstRegNumFromMask(availableSet MORE_THAN_64_REG_ARG(TypeGet())); gtRsvdRegs ^= genRegMask(tempReg); return tempReg; } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index d5dbad500c16d2..65114ca89c2488 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -945,8 +945,9 @@ struct GenTree int GetRegisterDstCount(Compiler* compiler) const; - regMaskTP gtGetRegMask() const; - regMaskTP gtGetContainedRegMask(); + RegBitSet64 gtGetRegMask() const; + regMaskGpr gtGetGprRegMask() const; + regMaskGpr gtGetContainedRegMask(); GenTreeFlags gtFlags; @@ -956,11 +957,11 @@ struct GenTree ValueNumPair gtVNPair; - regMaskSmall gtRsvdRegs; // set of fixed trashed registers + regMaskOnlyOne gtRsvdRegs; // set of fixed trashed registers - unsigned AvailableTempRegCount(regMaskTP mask = (regMaskTP)-1) const; - regNumber GetSingleTempReg(regMaskTP mask = (regMaskTP)-1); - regNumber ExtractTempReg(regMaskTP mask = (regMaskTP)-1); + unsigned AvailableTempRegCount(regMaskOnlyOne mask = (regMaskOnlyOne)-1) const; + regNumber GetSingleTempReg(regMaskOnlyOne mask = (regMaskOnlyOne)-1); + regNumber ExtractTempReg(regMaskOnlyOne mask = (regMaskOnlyOne)-1); void SetVNsFromNode(GenTree* tree) { @@ -4360,7 +4361,7 @@ struct ReturnTypeDesc regNumber GetABIReturnReg(unsigned idx, CorInfoCallConvExtension callConv) const; // Get reg mask of ABI return registers - regMaskTP GetABIReturnRegs(CorInfoCallConvExtension callConv) const; + AllRegsMask GetABIReturnRegs(CorInfoCallConvExtension callConv) const; }; class TailCallSiteInfo @@ -5169,7 +5170,7 @@ struct GenTreeCall final : public GenTree #endif // TARGET_XARCH // Get reg mask of all the valid registers of gtOtherRegs array - regMaskTP GetOtherRegMask() const; + AllRegsMask GetOtherRegMask() const; GenTreeFlags GetRegSpillFlagByIdx(unsigned idx) const { diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 79aae2c3345491..8d8a345ec90638 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -2012,13 +2012,20 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) return ins_Copy(dstType); } -#if defined(TARGET_XARCH) && defined(FEATURE_SIMD) +#if defined(FEATURE_MASKED_HW_INTRINSICS) if (genIsValidMaskReg(srcReg)) { +#if defined(TARGET_XARCH) // mask to int return INS_kmovq_gpr; +#elif defined(TARGET_ARM64) + unreached(); + return INS_mov; // TODO-SVE: needs testing +#else + unreached(); +#endif } -#endif // TARGET_XARCH && FEATURE_SIMD +#endif // FEATURE_MASKED_HW_INTRINSICS // float to int assert(genIsValidFloatReg(srcReg)); @@ -2255,13 +2262,13 @@ instruction CodeGenInterface::ins_StoreFromSrc(regNumber srcReg, var_types dstTy return ins_Store(dstType, aligned); } -#if defined(TARGET_XARCH) && defined(FEATURE_SIMD) +#ifdef FEATURE_MASKED_HW_INTRINSICS if (genIsValidMaskReg(srcReg)) { // mask to int, treat as mask so it works on 32-bit return ins_Store(TYP_MASK, aligned); } -#endif // TARGET_XARCH && FEATURE_SIMD +#endif // FEATURE_MASKED_HW_INTRINSICS // float to int, treat as float to float assert(genIsValidFloatReg(srcReg)); @@ -2617,7 +2624,7 @@ void CodeGen::instGen_Set_Reg_To_Zero(emitAttr size, regNumber reg, insFlags fla #error "Unknown TARGET" #endif - regSet.verifyRegUsed(reg); + regSet.verifyGprRegUsed(reg); } /*****************************************************************************/ diff --git a/src/coreclr/jit/jitgcinfo.h b/src/coreclr/jit/jitgcinfo.h index 02fd49cead9cb3..a04223f0c43c74 100644 --- a/src/coreclr/jit/jitgcinfo.h +++ b/src/coreclr/jit/jitgcinfo.h @@ -93,14 +93,16 @@ class GCInfo void gcResetForBB(); - void gcMarkRegSetGCref(regMaskTP regMask DEBUGARG(bool forceOutput = false)); - void gcMarkRegSetByref(regMaskTP regMask DEBUGARG(bool forceOutput = false)); - void gcMarkRegSetNpt(regMaskTP regMask DEBUGARG(bool forceOutput = false)); + void gcMarkRegSetGCref(regMaskGpr regMask DEBUGARG(bool forceOutput = false)); + void gcMarkRegSetByref(regMaskGpr regMask DEBUGARG(bool forceOutput = false)); + void gcMarkGprRegNpt(regNumber reg DEBUGARG(bool forceOutput = false)); + void gcMarkRegNpt(regNumber reg DEBUGARG(bool forceOutput = false)); + void gcMarkRegSetNpt(regMaskGpr regMask DEBUGARG(bool forceOutput = false)); void gcMarkRegPtrVal(regNumber reg, var_types type); #ifdef DEBUG - void gcDspGCrefSetChanges(regMaskTP gcRegGCrefSetNew DEBUGARG(bool forceOutput = false)); - void gcDspByrefSetChanges(regMaskTP gcRegByrefSetNew DEBUGARG(bool forceOutput = false)); + void gcDspGCrefSetChanges(regMaskGpr gcRegGCrefSetNew DEBUGARG(bool forceOutput = false)); + void gcDspByrefSetChanges(regMaskGpr gcRegByrefSetNew DEBUGARG(bool forceOutput = false)); #endif // DEBUG /*****************************************************************************/ @@ -111,8 +113,8 @@ class GCInfo // values. // - regMaskTP gcRegGCrefSetCur; // current regs holding GCrefs - regMaskTP gcRegByrefSetCur; // current regs holding Byrefs + regMaskGpr gcRegGCrefSetCur; // current regs holding GCrefs + regMaskGpr gcRegByrefSetCur; // current regs holding Byrefs VARSET_TP gcTrkStkPtrLcls; // set of tracked stack ptr lcls (GCref and Byref) - no args VARSET_TP gcVarPtrSetCur; // currently live part of "gcTrkStkPtrLcls" @@ -390,9 +392,11 @@ class GCInfo #endif // JIT32_GCENCODER #endif // DUMP_GC_TABLES +#if 0 public: // This method updates the appropriate reg masks when a variable is moved. - void gcUpdateForRegVarMove(regMaskTP srcMask, regMaskTP dstMask, LclVarDsc* varDsc); + void gcUpdateForRegVarMove(regMaskOnlyOne srcMask, regMaskOnlyOne dstMask, LclVarDsc* varDsc); +#endif private: ReturnKind getReturnKind(); diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 774eee3fe3cb20..705c57158d242e 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -630,7 +630,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un } #ifdef TARGET_ARM - regMaskTP doubleAlignMask = RBM_NONE; + regMaskGpr doubleAlignMask = RBM_NONE; #endif // TARGET_ARM // Skip skipArgs arguments from the signature. @@ -830,7 +830,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un { break; } - regMaskTP regMask = genMapArgNumToRegMask(varDscInfo->regArgNum(TYP_INT) + ix, TYP_INT); + regMaskGpr regMask = genMapArgNumToRegMask(varDscInfo->regArgNum(TYP_INT) + ix, TYP_INT); if (cAlign == 2) { doubleAlignMask |= regMask; @@ -1750,7 +1750,9 @@ void Compiler::lvaClassifyParameterABI() SwiftABIClassifier classifier(cInfo); lvaClassifyParameterABI(classifier); - regMaskTP argRegs = RBM_NONE; + // There is an assumption that args never has predicate registers in case + // of OSX/arm64. + RegBitSet64 argRegs = RBM_NONE; // The calling convention details computed by the old ABI classifier // are wrong since it does not handle the Swift ABI for structs @@ -5801,8 +5803,9 @@ void Compiler::lvaFixVirtualFrameOffsets() } #ifdef TARGET_ARM -bool Compiler::lvaIsPreSpilled(unsigned lclNum, regMaskTP preSpillMask) +bool Compiler::lvaIsPreSpilled(unsigned lclNum, regMaskGpr preSpillMask) { + assert(IsGprRegMask(preSpillMask)); const LclVarDsc& desc = lvaTable[lclNum]; return desc.lvIsRegArg && (preSpillMask & genRegMask(desc.GetArgReg())); } @@ -6020,8 +6023,8 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() unsigned argLcls = 0; // Take care of pre spill registers first. - regMaskTP preSpillMask = codeGen->regSet.rsMaskPreSpillRegs(false); - regMaskTP tempMask = RBM_NONE; + regMaskGpr preSpillMask = codeGen->regSet.rsMaskPreSpillRegs(false); + regMaskGpr tempMask = RBM_NONE; for (unsigned i = 0, preSpillLclNum = lclNum; i < argSigLen; ++i, ++preSpillLclNum) { if (lvaIsPreSpilled(preSpillLclNum, preSpillMask)) @@ -6254,7 +6257,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, // On ARM we spill the registers in codeGen->regSet.rsMaskPreSpillRegArg // in the prolog, so we have to do SetStackOffset() here // - regMaskTP regMask = genRegMask(varDsc->GetArgReg()); + singleRegMask regMask = genRegMask(varDsc->GetArgReg()); if (codeGen->regSet.rsMaskPreSpillRegArg & regMask) { // Signature: void foo(struct_8, int, struct_4) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index f96a2e6a746a7a..055db0d1e443ba 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -234,13 +234,13 @@ weight_t LinearScan::getWeight(RefPosition* refPos) // allRegs represents a set of registers that can // be used to allocate the specified type in any point // in time (more of a 'bank' of registers). -regMaskTP LinearScan::allRegs(RegisterType rt) +regMaskOnlyOne LinearScan::allRegs(RegisterType rt) { assert((rt != TYP_UNDEF) && (rt != TYP_STRUCT)); return *availableRegs[rt]; } -regMaskTP LinearScan::allByteRegs() +regMaskGpr LinearScan::allByteRegs() { #ifdef TARGET_X86 return availableIntRegs & RBM_BYTE_REGS; @@ -249,7 +249,7 @@ regMaskTP LinearScan::allByteRegs() #endif } -regMaskTP LinearScan::allSIMDRegs() +regMaskFloat LinearScan::allSIMDRegs() { return availableFloatRegs; } @@ -262,7 +262,7 @@ regMaskTP LinearScan::allSIMDRegs() // Return Value: // Register mask of the SSE/VEX-only SIMD registers // -regMaskTP LinearScan::lowSIMDRegs() +regMaskFloat LinearScan::lowSIMDRegs() { #if defined(TARGET_AMD64) return (availableFloatRegs & RBM_LOWFLOAT); @@ -278,25 +278,27 @@ void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPo if (nextRefPosition == nullptr) { nextLocation = MaxLocation; - fixedRegs &= ~genRegMask(regRecord->regNum); + fixedRegs.RemoveRegNumFromMask(regRecord->regNum); } else { nextLocation = nextRefPosition->nodeLocation; - fixedRegs |= genRegMask(regRecord->regNum); + fixedRegs.AddRegNumInMask(regRecord->regNum); } nextFixedRef[regRecord->regNum] = nextLocation; } -regMaskTP LinearScan::getMatchingConstants(regMaskTP mask, Interval* currentInterval, RefPosition* refPosition) +regMaskOnlyOne LinearScan::getMatchingConstants(regMaskOnlyOne mask, + Interval* currentInterval, + RefPosition* refPosition) { assert(currentInterval->isConstant && RefTypeIsDef(refPosition->refType)); - regMaskTP candidates = (mask & m_RegistersWithConstants); - regMaskTP result = RBM_NONE; + regMaskOnlyOne candidates = (mask & m_RegistersWithConstants.GetRegMaskForType(currentInterval->registerType)); + regMaskOnlyOne result = RBM_NONE; while (candidates != RBM_NONE) { - regNumber regNum = genFirstRegNumFromMask(candidates); - regMaskTP candidateBit = genRegMask(regNum); + regNumber regNum = genFirstRegNumFromMask(candidates MORE_THAN_64_REG_ARG(currentInterval->registerType)); + singleRegMask candidateBit = genRegMask(regNum); candidates ^= candidateBit; RegRecord* physRegRecord = getRegisterRecord(regNum); @@ -384,30 +386,33 @@ void LinearScan::updateSpillCost(regNumber reg, Interval* interval) // interval - Interval of Refposition. // assignedReg - Assigned register for this refposition. // -void LinearScan::updateRegsFreeBusyState(RefPosition& refPosition, - regMaskTP regsBusy, - regMaskTP* regsToFree, - regMaskTP* delayRegsToFree DEBUG_ARG(Interval* interval) +void LinearScan::updateRegsFreeBusyState(RefPosition& refPosition, + regMaskOnlyOne regsBusy, + AllRegsMask* regsToFree, + AllRegsMask* delayRegsToFree, + RegisterType regType DEBUG_ARG(Interval* interval) DEBUG_ARG(regNumber assignedReg)) { - regsInUseThisLocation |= regsBusy; + assert(compiler->IsOnlyOneRegMask(regsBusy)); + + regsInUseThisLocation.AddRegMaskForType(regsBusy, regType); if (refPosition.lastUse) { if (refPosition.delayRegFree) { INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED, interval, assignedReg)); - *delayRegsToFree |= regsBusy; - regsInUseNextLocation |= regsBusy; + delayRegsToFree->AddRegMaskForType(regsBusy, regType); + regsInUseNextLocation.AddRegMaskForType(regsBusy, regType); } else { INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, interval, assignedReg)); - *regsToFree |= regsBusy; + regsToFree->AddRegMaskForType(regsBusy, regType); } } else if (refPosition.delayRegFree) { - regsInUseNextLocation |= regsBusy; + regsInUseNextLocation.AddRegMaskForType(regsBusy, regType); } } @@ -416,7 +421,7 @@ void LinearScan::updateRegsFreeBusyState(RefPosition& refPosition, // for use as internal float registers. // // Return Value: -// The set of registers (as a regMaskTP). +// The set of registers (as a regMaskFloat). // // Notes: // compFloatingPointUsed is only required to be set if it is possible that we @@ -425,7 +430,7 @@ void LinearScan::updateRegsFreeBusyState(RefPosition& refPosition, // that it will select a callee-save register. But to be safe, we restrict // the set of candidates if compFloatingPointUsed is not already set. // -regMaskTP LinearScan::internalFloatRegCandidates() +regMaskFloat LinearScan::internalFloatRegCandidates() { needNonIntegerRegisters = true; @@ -470,12 +475,16 @@ RegRecord* LinearScan::getRegisterRecord(regNumber regNum) // New regMask that has minRegCount registers after intersection. // Otherwise returns regMaskActual. // -regMaskTP LinearScan::getConstrainedRegMask(RefPosition* refPosition, - regMaskTP regMaskActual, - regMaskTP regMaskConstraint, - unsigned minRegCount) +regMaskOnlyOne LinearScan::getConstrainedRegMask(RefPosition* refPosition, + RegisterType regType, + regMaskOnlyOne regMaskActual, + regMaskOnlyOne regMaskConstraint, + unsigned minRegCount) { - regMaskTP newMask = regMaskActual & regMaskConstraint; + assert(compiler->IsOnlyOneRegMask(regMaskActual)); + assert(compiler->IsOnlyOneRegMask(regMaskConstraint)); + + regMaskOnlyOne newMask = regMaskActual & regMaskConstraint; if (genCountBits(newMask) < minRegCount) { // Constrained mask does not have minimum required registers needed. @@ -484,7 +493,8 @@ regMaskTP LinearScan::getConstrainedRegMask(RefPosition* refPosition, if ((refPosition != nullptr) && !refPosition->RegOptional()) { - regMaskTP busyRegs = regsBusyUntilKill | regsInUseThisLocation; + regMaskOnlyOne busyRegs = RBM_NONE; + busyRegs = (regsBusyUntilKill | regsInUseThisLocation).GetRegMaskForType(regType); if ((newMask & ~busyRegs) == RBM_NONE) { // Constrained mask does not have at least one free register to allocate. @@ -501,7 +511,9 @@ regMaskTP LinearScan::getConstrainedRegMask(RefPosition* refPosition, // them based on the current stress options. // // Arguments: -// mask - The current mask of register candidates for a node +// refPosition - The refPosition for which we want to stress the register +// mask - The current mask of register candidates for a node +// regtype - The registerType // // Return Value: // A possibly-modified mask, based on the value of DOTNET_JitStressRegs. @@ -510,8 +522,10 @@ regMaskTP LinearScan::getConstrainedRegMask(RefPosition* refPosition, // This is the method used to implement the stress options that limit // the set of registers considered for allocation. // -regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask) +regMaskOnlyOne LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskOnlyOne mask, RegisterType regtype) { + assert(compiler->IsOnlyOneRegMask(mask)); + #ifdef TARGET_ARM64 if ((refPosition != nullptr) && refPosition->isLiveAtConsecutiveRegistersLoc(consecutiveRegistersLocation)) { @@ -527,31 +541,60 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask) { // The refPosition could be null, for example when called // by getTempRegForResolution(). - int minRegCount = (refPosition != nullptr) ? refPosition->minRegCandidateCount : 1; + int minRegCount = 1; + if (refPosition != nullptr) + { + minRegCount = refPosition->minRegCandidateCount; + RegisterType currRegType = refPosition->getRegisterType(); + assert(regtype == currRegType); + } + + regMaskOnlyOne calleeSaved = RBM_NONE; + regMaskOnlyOne calleeTrash = RBM_NONE; + if (varTypeUsesIntReg(regtype)) + { + calleeSaved = RBM_INT_CALLEE_SAVED; + calleeTrash = RBM_INT_CALLEE_TRASH; + } + else if (varTypeUsesFloatReg(regtype)) + { + calleeSaved = RBM_FLT_CALLEE_SAVED; + calleeTrash = RBM_FLT_CALLEE_TRASH; + } + else + { +#ifdef FEATURE_MASKED_HW_INTRINSICS + assert(varTypeUsesMaskReg(regtype)); + calleeSaved = RBM_MSK_CALLEE_SAVED; + calleeTrash = RBM_MSK_CALLEE_TRASH; +#else + unreached(); +#endif + } switch (getStressLimitRegs()) { case LSRA_LIMIT_CALLEE: if (!compiler->opts.compDbgEnC) { - mask = getConstrainedRegMask(refPosition, mask, RBM_CALLEE_SAVED, minRegCount); + mask = getConstrainedRegMask(refPosition, regtype, mask, calleeSaved, minRegCount); } break; case LSRA_LIMIT_CALLER: { - mask = getConstrainedRegMask(refPosition, mask, RBM_CALLEE_TRASH, minRegCount); + mask = getConstrainedRegMask(refPosition, regtype, mask, calleeTrash, minRegCount); } break; case LSRA_LIMIT_SMALL_SET: if ((mask & LsraLimitSmallIntSet) != RBM_NONE) { - mask = getConstrainedRegMask(refPosition, mask, LsraLimitSmallIntSet, minRegCount); + mask = getConstrainedRegMask(refPosition, regtype, mask, LsraLimitSmallIntSet, minRegCount); } else if ((mask & LsraLimitSmallFPSet) != RBM_NONE) { - mask = getConstrainedRegMask(refPosition, mask, LsraLimitSmallFPSet, minRegCount); + mask = getConstrainedRegMask(refPosition, regtype, mask, LsraLimitSmallFPSet, minRegCount); } break; @@ -559,7 +602,7 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask) case LSRA_LIMIT_UPPER_SIMD_SET: if ((mask & LsraLimitUpperSimdSet) != RBM_NONE) { - mask = getConstrainedRegMask(refPosition, mask, LsraLimitUpperSimdSet, minRegCount); + mask = getConstrainedRegMask(refPosition, regtype, mask, LsraLimitUpperSimdSet, minRegCount); } break; #endif @@ -720,12 +763,14 @@ LinearScan::LinearScan(Compiler* theCompiler) #if defined(TARGET_AMD64) rbmAllFloat = compiler->rbmAllFloat; rbmFltCalleeTrash = compiler->rbmFltCalleeTrash; + assert(compiler->IsFloatRegMask(rbmAllFloat)); + assert(compiler->IsFloatRegMask(rbmFltCalleeTrash)); #endif // TARGET_AMD64 #if defined(TARGET_XARCH) rbmAllMask = compiler->rbmAllMask; rbmMskCalleeTrash = compiler->rbmMskCalleeTrash; - memcpy(varTypeCalleeTrashRegs, compiler->varTypeCalleeTrashRegs, sizeof(regMaskTP) * TYP_COUNT); + memcpy(varTypeCalleeTrashRegs, compiler->varTypeCalleeTrashRegs, sizeof(regMaskOnlyOne) * TYP_COUNT); if (!compiler->canUseEvexEncoding()) { @@ -788,7 +833,7 @@ LinearScan::LinearScan(Compiler* theCompiler) availableFloatRegs = RBM_ALLFLOAT; availableDoubleRegs = RBM_ALLDOUBLE; -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#ifdef FEATURE_MASKED_HW_INTRINSICS availableMaskRegs = RBM_ALLMASK; #endif @@ -800,7 +845,7 @@ LinearScan::LinearScan(Compiler* theCompiler) availableIntRegs &= ~RBM_INT_CALLEE_SAVED | RBM_ENC_CALLEE_SAVED; availableFloatRegs &= ~RBM_FLT_CALLEE_SAVED; availableDoubleRegs &= ~RBM_FLT_CALLEE_SAVED; -#if defined(TARGET_XARCH) +#ifdef FEATURE_MASKED_HW_INTRINSICS availableMaskRegs &= ~RBM_MSK_CALLEE_SAVED; #endif // TARGET_XARCH } @@ -2728,7 +2773,7 @@ void LinearScan::setFrameType() // If we are using FPBASE as the frame register, we cannot also use it for // a local var. - regMaskTP removeMask = RBM_NONE; + regMaskGpr removeMask = RBM_NONE; if (frameType == FT_EBP_FRAME) { removeMask |= RBM_FPBASE; @@ -2810,8 +2855,8 @@ bool copyOrMoveRegInUse(RefPosition* ref, LsraLocation loc) RegisterType LinearScan::getRegisterType(Interval* currentInterval, RefPosition* refPosition) { assert(refPosition->getInterval() == currentInterval); - RegisterType regType = currentInterval->registerType; - regMaskTP candidates = refPosition->registerAssignment; + RegisterType regType = currentInterval->registerType; + regMaskOnlyOne candidates = refPosition->registerAssignment; #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // The LoongArch64's ABI which the float args maybe passed by integer register // when no float register left but free integer register. @@ -2927,16 +2972,16 @@ regNumber LinearScan::allocateRegMinimal(Interval* currentInterva RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { assert(!enregisterLocalVars); - regNumber foundReg; - regMaskTP foundRegBit; - RegRecord* availablePhysRegRecord; + regNumber foundReg; + singleRegMask foundRegBit; + RegRecord* availablePhysRegRecord; foundRegBit = regSelector->selectMinimal(currentInterval, refPosition DEBUG_ARG(registerScore)); if (foundRegBit == RBM_NONE) { return REG_NA; } - foundReg = genRegNumFromMask(foundRegBit); + foundReg = genRegNumFromMask(foundRegBit MORE_THAN_64_REG_ARG(currentInterval->registerType)); availablePhysRegRecord = getRegisterRecord(foundReg); Interval* assignedInterval = availablePhysRegRecord->assignedInterval; if ((assignedInterval != currentInterval) && @@ -2989,14 +3034,14 @@ template regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { - regMaskTP foundRegBit = + singleRegMask foundRegBit = regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); if (foundRegBit == RBM_NONE) { return REG_NA; } - regNumber foundReg = genRegNumFromMask(foundRegBit); + regNumber foundReg = genRegNumFromMask(foundRegBit MORE_THAN_64_REG_ARG(currentInterval->registerType)); RegRecord* availablePhysRegRecord = getRegisterRecord(foundReg); Interval* assignedInterval = availablePhysRegRecord->assignedInterval; if ((assignedInterval != currentInterval) && @@ -3225,8 +3270,8 @@ bool LinearScan::isRefPositionActive(RefPosition* refPosition, LsraLocation refL // bool LinearScan::isSpillCandidate(Interval* current, RefPosition* refPosition, RegRecord* physRegRecord) { - regMaskTP candidateBit = genRegMask(physRegRecord->regNum); - LsraLocation refLocation = refPosition->nodeLocation; + singleRegMask candidateBit = genRegMask(physRegRecord->regNum); + LsraLocation refLocation = refPosition->nodeLocation; // We shouldn't be calling this if we haven't already determined that the register is not // busy until the next kill. assert(!isRegBusy(physRegRecord->regNum, current->registerType)); @@ -3443,8 +3488,9 @@ void LinearScan::checkAndAssignInterval(RegRecord* regRec, Interval* interval) // Assign the given physical register interval to the given interval void LinearScan::assignPhysReg(RegRecord* regRec, Interval* interval) { - regMaskTP assignedRegMask = genRegMask(regRec->regNum); - compiler->codeGen->regSet.rsSetRegsModified(assignedRegMask DEBUGARG(true)); + regNumber reg = regRec->regNum; + + compiler->codeGen->regSet.rsSetRegModified(reg DEBUGARG(true)); interval->assignedReg = regRec; checkAndAssignInterval(regRec, interval); @@ -3454,7 +3500,7 @@ void LinearScan::assignPhysReg(RegRecord* regRec, Interval* interval) if (interval->isLocalVar) { // Prefer this register for future references - interval->updateRegisterPreferences(assignedRegMask); + interval->updateRegisterPreferences(genRegMask(reg)); } } @@ -3965,11 +4011,13 @@ void LinearScan::spillGCRefs(RefPosition* killRefPosition) { // For each physical register that can hold a GC type, // if it is occupied by an interval of a GC type, spill that interval. - regMaskTP candidateRegs = killRefPosition->registerAssignment; + regMaskGpr candidateRegs = killRefPosition->registerAssignment; + assert(compiler->IsGprRegMask(candidateRegs)); + INDEBUG(bool killedRegs = false); while (candidateRegs != RBM_NONE) { - regNumber nextReg = genFirstRegNumFromMaskAndToggle(candidateRegs); + regNumber nextReg = genFirstRegNumFromMaskAndToggle(candidateRegs MORE_THAN_64_REG_ARG(TYP_INT)); RegRecord* regRecord = getRegisterRecord(nextReg); Interval* assignedInterval = regRecord->assignedInterval; @@ -4052,18 +4100,25 @@ void LinearScan::processBlockEndAllocation(BasicBlock* currentBlock) // The new register to use. // #ifdef DEBUG -regNumber LinearScan::rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs) +regNumber LinearScan::rotateBlockStartLocation(Interval* interval, + regNumber targetReg, + CONSTREF_AllRegsMask availableRegs) { if (targetReg != REG_STK && getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_ROTATE) { // If we're rotating the register locations at block boundaries, try to use // the next higher register number of the appropriate register type. - regMaskTP candidateRegs = allRegs(interval->registerType) & availableRegs; - regNumber firstReg = REG_NA; - regNumber newReg = REG_NA; + + regMaskOnlyOne allRegsMask = allRegs(interval->registerType); + RegisterType regType = interval->registerType; + regMaskOnlyOne candidateRegs = allRegsMask & availableRegs.GetRegMaskForType(regType); + + regNumber firstReg = REG_NA; + regNumber newReg = REG_NA; while (candidateRegs != RBM_NONE) { - regNumber nextReg = genFirstRegNumFromMaskAndToggle(candidateRegs); + regNumber nextReg = + genFirstRegNumFromMaskAndToggle(candidateRegs MORE_THAN_64_REG_ARG(interval->registerType)); if (nextReg > targetReg) { newReg = nextReg; @@ -4303,6 +4358,50 @@ void LinearScan::resetAllRegistersState() } } +#ifdef HAS_MORE_THAN_64_REGISTERS +void LinearScan::updateDeadCandidatesAtBlockStart(REF_AllRegsMask deadRegMask, VarToRegMap inVarToRegMap) +#else +void LinearScan::updateDeadCandidatesAtBlockStart(RegBitSet64 deadRegMask, VarToRegMap inVarToRegMap) +#endif // HAS_MORE_THAN_64_REGISTERS +{ +#ifdef HAS_MORE_THAN_64_REGISTERS + while (!deadRegMask.IsEmpty()) +#else + while (deadRegMask != RBM_NONE) +#endif + { + regNumber reg = genFirstRegNumFromMaskAndToggle(deadRegMask); + RegRecord* physRegRecord = getRegisterRecord(reg); + + makeRegAvailable(reg, physRegRecord->registerType); + Interval* assignedInterval = physRegRecord->assignedInterval; + + if (assignedInterval != nullptr) + { + assert(assignedInterval->isLocalVar || assignedInterval->isConstant || assignedInterval->IsUpperVector()); + + if (!assignedInterval->isConstant && assignedInterval->assignedReg == physRegRecord) + { + assignedInterval->isActive = false; + if (assignedInterval->getNextRefPosition() == nullptr) + { + unassignPhysReg(physRegRecord, nullptr); + } + if (!assignedInterval->IsUpperVector()) + { + inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK; + } + } + else + { + // This interval may still be active, but was in another register in an + // intervening block. + clearAssignedInterval(physRegRecord ARM_ARG(assignedInterval->registerType)); + } + } + } +} + //------------------------------------------------------------------------ // processBlockStartLocations: Update var locations on entry to 'currentBlock' and clear constant // registers. @@ -4362,9 +4461,9 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) } // If we are rotating register assignments at block boundaries, we want to make the // inactive registers available for the rotation. - regMaskTP inactiveRegs = RBM_NONE; + AllRegsMask inactiveRegs; #endif // DEBUG - regMaskTP liveRegs = RBM_NONE; + AllRegsMask liveRegs; VarSetOps::Iter iter(compiler, currentLiveVars); unsigned varIndex = 0; while (iter.NextElem(&varIndex)) @@ -4440,7 +4539,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) // Case #1 above. assert(getVarReg(predVarToRegMap, varIndex) == targetReg || getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_ROTATE); - } + } // Keep the register assignment - if another var has it, it will get unassigned. else if (!nextRefPosition->copyReg) { // case #2 above. @@ -4459,7 +4558,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) assert(targetReg != REG_STK); assert(interval->assignedReg != nullptr && interval->assignedReg->regNum == targetReg && interval->assignedReg->assignedInterval == interval); - liveRegs |= getRegMask(targetReg, interval->registerType); + liveRegs.AddRegNum(targetReg, interval->registerType); continue; } } @@ -4489,8 +4588,8 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) // likely to match other assignments this way. targetReg = interval->physReg; interval->isActive = true; - liveRegs |= getRegMask(targetReg, interval->registerType); - INDEBUG(inactiveRegs |= genRegMask(targetReg)); + liveRegs.AddRegNum(targetReg, interval->registerType); + INDEBUG(inactiveRegs |= targetReg); setVarReg(inVarToRegMap, varIndex, targetReg); } else @@ -4501,7 +4600,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) if (targetReg != REG_STK) { RegRecord* targetRegRecord = getRegisterRecord(targetReg); - liveRegs |= getRegMask(targetReg, interval->registerType); + liveRegs.AddRegNum(targetReg, interval->registerType); if (!allocationPassComplete) { updateNextIntervalRef(targetReg, interval); @@ -4538,7 +4637,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) RegRecord* anotherHalfRegRec = findAnotherHalfRegRec(targetRegRecord); // Use TYP_FLOAT to get the regmask of just the half reg. - liveRegs &= ~getRegMask(anotherHalfRegRec->regNum, TYP_FLOAT); + liveRegs.RemoveRegNum(anotherHalfRegRec->regNum, TYP_FLOAT); } #endif // TARGET_ARM @@ -4565,7 +4664,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) { RegRecord* physRegRecord = getRegisterRecord(reg); - if ((liveRegs & genRegMask(reg)) == 0) + if (!liveRegs.IsRegNumInMask(reg)) { makeRegAvailable(reg, physRegRecord->registerType); Interval* assignedInterval = physRegRecord->assignedInterval; @@ -4622,43 +4721,17 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) } } #else - regMaskTP deadCandidates = ~liveRegs; + AllRegsMask deadCandidates = ~liveRegs; // Only focus on actual registers present deadCandidates &= actualRegistersMask; - while (deadCandidates != RBM_NONE) - { - regNumber reg = genFirstRegNumFromMaskAndToggle(deadCandidates); - RegRecord* physRegRecord = getRegisterRecord(reg); - - makeRegAvailable(reg, physRegRecord->registerType); - Interval* assignedInterval = physRegRecord->assignedInterval; - - if (assignedInterval != nullptr) - { - assert(assignedInterval->isLocalVar || assignedInterval->isConstant || assignedInterval->IsUpperVector()); +#ifdef HAS_MORE_THAN_64_REGISTERS + updateDeadCandidatesAtBlockStart(deadCandidates, inVarToRegMap); +#else + updateDeadCandidatesAtBlockStart(deadCandidates.GetAllRegistersMask(), inVarToRegMap); +#endif // HAS_MORE_THAN_64_REGISTERS - if (!assignedInterval->isConstant && assignedInterval->assignedReg == physRegRecord) - { - assignedInterval->isActive = false; - if (assignedInterval->getNextRefPosition() == nullptr) - { - unassignPhysReg(physRegRecord, nullptr); - } - if (!assignedInterval->IsUpperVector()) - { - inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK; - } - } - else - { - // This interval may still be active, but was in another register in an - // intervening block. - clearAssignedInterval(physRegRecord ARM_ARG(assignedInterval->registerType)); - } - } - } #endif // TARGET_ARM } @@ -4754,6 +4827,25 @@ void LinearScan::makeRegisterInactive(RegRecord* physRegRecord) } } +#ifdef HAS_MORE_THAN_64_REGISTERS +void LinearScan::inActivateRegisters(REF_AllRegsMask inactiveMask) +#else +void LinearScan::inActivateRegisters(RegBitSet64 inactiveMask) +#endif // HAS_MORE_THAN_64_REGISTERS +{ +#ifdef HAS_MORE_THAN_64_REGISTERS + while (!inactiveMask.IsEmpty()) +#else + while (inactiveMask != RBM_NONE) +#endif // HAS_MORE_THAN_64_REGISTERS + { + regNumber nextReg = genFirstRegNumFromMaskAndToggle(inactiveMask); + RegRecord* regRecord = getRegisterRecord(nextReg); + clearSpillCost(regRecord->regNum, regRecord->registerType); + makeRegisterInactive(regRecord); + } +} + //------------------------------------------------------------------------ // LinearScan::freeRegister: Make a register available for use // @@ -4814,25 +4906,42 @@ void LinearScan::freeRegister(RegRecord* physRegRecord) // Arguments: // regsToFree - the mask of registers to free // -void LinearScan::freeRegisters(regMaskTP regsToFree) +void LinearScan::freeRegisters(REF_AllRegsMask regsToFree) { - if (regsToFree == RBM_NONE) + if (regsToFree.IsEmpty()) { return; } INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FREE_REGS)); makeRegsAvailable(regsToFree); - while (regsToFree != RBM_NONE) +#ifdef HAS_MORE_THAN_64_REGISTERS + freeRegisterMask(regsToFree); +#else + freeRegisterMask(regsToFree.GetAllRegistersMask()); +#endif // HAS_MORE_THAN_64_REGISTERS +} + +#ifdef HAS_MORE_THAN_64_REGISTERS +void LinearScan::freeRegisterMask(REF_AllRegsMask freeMask) +#else +void LinearScan::freeRegisterMask(RegBitSet64 freeMask) +#endif // HAS_MORE_THAN_64_REGISTERS +{ +#ifdef HAS_MORE_THAN_64_REGISTERS + while (!freeMask.IsEmpty()) +#else + while (freeMask != RBM_NONE) +#endif // HAS_MORE_THAN_64_REGISTERS { - regNumber nextReg = genFirstRegNumFromMaskAndToggle(regsToFree); + regNumber nextReg = genFirstRegNumFromMaskAndToggle(freeMask); RegRecord* regRecord = getRegisterRecord(nextReg); #ifdef TARGET_ARM if (regRecord->assignedInterval != nullptr && (regRecord->assignedInterval->registerType == TYP_DOUBLE)) { assert(genIsValidDoubleReg(nextReg)); - regsToFree ^= genRegMask(regNumber(nextReg + 1)); + freeMask ^= genRegMask(regNumber(nextReg + 1)); } #endif freeRegister(regRecord); @@ -4879,7 +4988,7 @@ void LinearScan::allocateRegistersMinimal() "--------------------\n"); // Start with a small set of commonly used registers, so that we don't keep having to print a new title. // Include all the arg regs, as they may already have values assigned to them. - registersToDump = LsraLimitSmallIntSet | LsraLimitSmallFPSet | RBM_ARG_REGS; + registersToDump = AllRegsMask(LsraLimitSmallIntSet | RBM_ARG_REGS, LsraLimitSmallFPSet, RBM_NONE); dumpRegRecordHeader(); // Now print an empty "RefPosition", since we complete the dump of the regs at the beginning of the loop. printf(indentFormat, ""); @@ -4888,14 +4997,14 @@ void LinearScan::allocateRegistersMinimal() BasicBlock* currentBlock = nullptr; - LsraLocation prevLocation = MinLocation; - regMaskTP regsToFree = RBM_NONE; - regMaskTP delayRegsToFree = RBM_NONE; - regMaskTP regsToMakeInactive = RBM_NONE; - regMaskTP delayRegsToMakeInactive = RBM_NONE; - regMaskTP copyRegsToFree = RBM_NONE; - regsInUseThisLocation = RBM_NONE; - regsInUseNextLocation = RBM_NONE; + LsraLocation prevLocation = MinLocation; + AllRegsMask regsToFree; + AllRegsMask delayRegsToFree; + AllRegsMask regsToMakeInactive; + AllRegsMask delayRegsToMakeInactive; + AllRegsMask copyRegsToFree; + regsInUseThisLocation.Clear(); + regsInUseNextLocation.Clear(); // This is the most recent RefPosition for which a register was allocated // - currently only used for DEBUG but maintained in non-debug, for clarity of code @@ -4911,22 +5020,20 @@ void LinearScan::allocateRegistersMinimal() // TODO: Can we combine this with the freeing of registers below? It might // mess with the dump, since this was previously being done before the call below // to dumpRegRecords. - regMaskTP tempRegsToMakeInactive = (regsToMakeInactive | delayRegsToMakeInactive); - while (tempRegsToMakeInactive != RBM_NONE) - { - regNumber nextReg = genFirstRegNumFromMaskAndToggle(tempRegsToMakeInactive); - RegRecord* regRecord = getRegisterRecord(nextReg); - clearSpillCost(regRecord->regNum, regRecord->registerType); - makeRegisterInactive(regRecord); - } + AllRegsMask tempRegsToMakeInactive = (regsToMakeInactive | delayRegsToMakeInactive); +#ifdef HAS_MORE_THAN_64_REGISTERS + inActivateRegisters(tempRegsToMakeInactive); +#else + inActivateRegisters(tempRegsToMakeInactive.GetAllRegistersMask()); +#endif // HAS_MORE_THAN_64_REGISTERS + if (currentRefPosition.nodeLocation > prevLocation) { makeRegsAvailable(regsToMakeInactive); // TODO: Clean this up. We need to make the delayRegs inactive as well, but don't want // to mark them as free yet. - regsToMakeInactive |= delayRegsToMakeInactive; - regsToMakeInactive = delayRegsToMakeInactive; - delayRegsToMakeInactive = RBM_NONE; + regsToMakeInactive = delayRegsToMakeInactive; + delayRegsToMakeInactive.Clear(); } #ifdef DEBUG @@ -4983,24 +5090,24 @@ void LinearScan::allocateRegistersMinimal() { // CopyRegs are simply made available - we don't want to make the associated interval inactive. makeRegsAvailable(copyRegsToFree); - copyRegsToFree = RBM_NONE; + copyRegsToFree.Clear(); regsInUseThisLocation = regsInUseNextLocation; - regsInUseNextLocation = RBM_NONE; - if ((regsToFree | delayRegsToFree) != RBM_NONE) + regsInUseNextLocation.Clear(); + if (!((regsToFree | delayRegsToFree).IsEmpty())) { freeRegisters(regsToFree); - if ((currentLocation > (prevLocation + 1)) && (delayRegsToFree != RBM_NONE)) + if ((currentLocation > (prevLocation + 1)) && (!delayRegsToFree.IsEmpty())) { // We should never see a delayReg that is delayed until a Location that has no RefPosition // (that would be the RefPosition that it was supposed to interfere with). assert(!"Found a delayRegFree associated with Location with no reference"); // However, to be cautious for the Release build case, we will free them. freeRegisters(delayRegsToFree); - delayRegsToFree = RBM_NONE; - regsInUseThisLocation = RBM_NONE; + delayRegsToFree.Clear(); + regsInUseThisLocation.Clear(); } - regsToFree = delayRegsToFree; - delayRegsToFree = RBM_NONE; + regsToFree = delayRegsToFree; + delayRegsToFree.Clear(); #ifdef DEBUG verifyFreeRegisters(regsToFree); @@ -5042,11 +5149,11 @@ void LinearScan::allocateRegistersMinimal() { // Free any delayed regs (now in regsToFree) before processing the block boundary freeRegisters(regsToFree); - regsToFree = RBM_NONE; - regsInUseThisLocation = RBM_NONE; - regsInUseNextLocation = RBM_NONE; - handledBlockEnd = true; - curBBStartLocation = currentRefPosition.nodeLocation; + regsToFree.Clear(); + regsInUseThisLocation.Clear(); + regsInUseNextLocation.Clear(); + handledBlockEnd = true; + curBBStartLocation = currentRefPosition.nodeLocation; if (currentBlock == nullptr) { currentBlock = startBlockSequence(); @@ -5100,13 +5207,14 @@ void LinearScan::allocateRegistersMinimal() } #endif // TARGET_ARM } - regsInUseThisLocation |= currentRefPosition.registerAssignment; + regsInUseThisLocation.AddRegMaskForType(currentRefPosition.registerAssignment, regRecord->registerType); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition.assignedReg())); #ifdef SWIFT_SUPPORT if (currentRefPosition.delayRegFree) { - regsInUseNextLocation |= currentRefPosition.registerAssignment; + regsInUseNextLocation.AddRegMaskForType(currentRefPosition.registerAssignment, + regRecord->registerType); } #endif // SWIFT_SUPPORT } @@ -5168,8 +5276,8 @@ void LinearScan::allocateRegistersMinimal() INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, currentInterval, assignedRegister)); } - regMaskTP assignedRegBit = RBM_NONE; - bool isInRegister = false; + singleRegMask assignedRegBit = RBM_NONE; + bool isInRegister = false; if (assignedRegister != REG_NA) { isInRegister = true; @@ -5233,9 +5341,9 @@ void LinearScan::allocateRegistersMinimal() { regNumber copyReg = assignCopyRegMinimal(¤tRefPosition); - lastAllocatedRefPosition = ¤tRefPosition; - regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); - regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); + lastAllocatedRefPosition = ¤tRefPosition; + regMaskOnlyOne copyRegMask = getRegMask(copyReg, currentInterval->registerType); + regMaskOnlyOne assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); // For consecutive register, although it shouldn't matter what the assigned register was, // because we have just assigned it `copyReg` and that's the one in-use, and not the @@ -5243,10 +5351,12 @@ void LinearScan::allocateRegistersMinimal() // happened to be restored in assignedReg, we would need assignedReg to stay alive because // we will copy the entire vector value from it to the `copyReg`. updateRegsFreeBusyState(currentRefPosition, assignedRegMask | copyRegMask, ®sToFree, - &delayRegsToFree DEBUG_ARG(currentInterval) DEBUG_ARG(assignedRegister)); + &delayRegsToFree, + currentInterval->registerType DEBUG_ARG(currentInterval) + DEBUG_ARG(assignedRegister)); if (!currentRefPosition.lastUse) { - copyRegsToFree |= copyRegMask; + copyRegsToFree.AddRegNum(copyReg, currentInterval->registerType); } // For tree temp (non-localVar) interval, we will need an explicit move. @@ -5261,7 +5371,7 @@ void LinearScan::allocateRegistersMinimal() else { INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NEEDS_NEW_REG, nullptr, assignedRegister)); - regsToFree |= getRegMask(assignedRegister, currentInterval->registerType); + regsToFree.AddRegNum(assignedRegister, currentInterval->registerType); // We want a new register, but we don't want this to be considered a spill. assignedRegister = REG_NA; if (physRegRecord->assignedInterval == currentInterval) @@ -5358,17 +5468,19 @@ void LinearScan::allocateRegistersMinimal() // If we allocated a register, record it if (assignedRegister != REG_NA) { - assignedRegBit = genRegMask(assignedRegister); - regMaskTP regMask = getRegMask(assignedRegister, currentInterval->registerType); - regsInUseThisLocation |= regMask; + assignedRegBit = genRegMask(assignedRegister); + AllRegsMask assignedRegMask; + assignedRegMask.AddRegNum(assignedRegister, currentInterval->registerType); + + regsInUseThisLocation |= assignedRegMask; if (currentRefPosition.delayRegFree) { - regsInUseNextLocation |= regMask; + regsInUseNextLocation |= assignedRegMask; } currentRefPosition.registerAssignment = assignedRegBit; currentInterval->physReg = assignedRegister; - regsToFree &= ~regMask; // we'll set it again later if it's dead + regsToFree &= ~assignedRegMask; // we'll set it again later if it's dead // If this interval is dead, free the register. // The interval could be dead if this is a user variable, or if the @@ -5389,11 +5501,11 @@ void LinearScan::allocateRegistersMinimal() { if (currentRefPosition.delayRegFree) { - delayRegsToMakeInactive |= regMask; + delayRegsToMakeInactive |= assignedRegMask; } else { - regsToMakeInactive |= regMask; + regsToMakeInactive |= assignedRegMask; } // TODO-Cleanup: this makes things consistent with previous, and will enable preferences // to be propagated, but it seems less than ideal. @@ -5412,13 +5524,13 @@ void LinearScan::allocateRegistersMinimal() { if (currentRefPosition.delayRegFree) { - delayRegsToFree |= regMask; + delayRegsToFree |= assignedRegMask; INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED)); } else { - regsToFree |= regMask; + regsToFree |= assignedRegMask; INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE)); } @@ -5546,7 +5658,7 @@ void LinearScan::allocateRegisters() updateNextIntervalRef(reg, interval); updateSpillCost(reg, interval); setRegInUse(reg, interval->registerType); - INDEBUG(registersToDump |= getRegMask(reg, interval->registerType)); + INDEBUG(registersToDump.AddRegNum(reg, interval->registerType)); } } else @@ -5566,7 +5678,7 @@ void LinearScan::allocateRegisters() "--------------------\n"); // Start with a small set of commonly used registers, so that we don't keep having to print a new title. // Include all the arg regs, as they may already have values assigned to them. - registersToDump = LsraLimitSmallIntSet | LsraLimitSmallFPSet | RBM_ARG_REGS; + registersToDump = AllRegsMask(LsraLimitSmallIntSet | RBM_ARG_REGS, LsraLimitSmallFPSet, RBM_NONE); dumpRegRecordHeader(); // Now print an empty "RefPosition", since we complete the dump of the regs at the beginning of the loop. printf(indentFormat, ""); @@ -5575,14 +5687,14 @@ void LinearScan::allocateRegisters() BasicBlock* currentBlock = nullptr; - LsraLocation prevLocation = MinLocation; - regMaskTP regsToFree = RBM_NONE; - regMaskTP delayRegsToFree = RBM_NONE; - regMaskTP regsToMakeInactive = RBM_NONE; - regMaskTP delayRegsToMakeInactive = RBM_NONE; - regMaskTP copyRegsToFree = RBM_NONE; - regsInUseThisLocation = RBM_NONE; - regsInUseNextLocation = RBM_NONE; + LsraLocation prevLocation = MinLocation; + AllRegsMask regsToFree; + AllRegsMask delayRegsToFree; + AllRegsMask regsToMakeInactive; + AllRegsMask delayRegsToMakeInactive; + AllRegsMask copyRegsToFree; + regsInUseThisLocation.Clear(); + regsInUseNextLocation.Clear(); // This is the most recent RefPosition for which a register was allocated // - currently only used for DEBUG but maintained in non-debug, for clarity of code @@ -5598,22 +5710,20 @@ void LinearScan::allocateRegisters() // TODO: Can we combine this with the freeing of registers below? It might // mess with the dump, since this was previously being done before the call below // to dumpRegRecords. - regMaskTP tempRegsToMakeInactive = (regsToMakeInactive | delayRegsToMakeInactive); - while (tempRegsToMakeInactive != RBM_NONE) - { - regNumber nextReg = genFirstRegNumFromMaskAndToggle(tempRegsToMakeInactive); - RegRecord* regRecord = getRegisterRecord(nextReg); - clearSpillCost(regRecord->regNum, regRecord->registerType); - makeRegisterInactive(regRecord); - } + AllRegsMask tempRegsToMakeInactive = (regsToMakeInactive | delayRegsToMakeInactive); +#ifdef HAS_MORE_THAN_64_REGISTERS + inActivateRegisters(tempRegsToMakeInactive); +#else + inActivateRegisters(tempRegsToMakeInactive.GetAllRegistersMask()); +#endif // HAS_MORE_THAN_64_REGISTERS + if (currentRefPosition.nodeLocation > prevLocation) { makeRegsAvailable(regsToMakeInactive); // TODO: Clean this up. We need to make the delayRegs inactive as well, but don't want // to mark them as free yet. - regsToMakeInactive |= delayRegsToMakeInactive; - regsToMakeInactive = delayRegsToMakeInactive; - delayRegsToMakeInactive = RBM_NONE; + regsToMakeInactive = delayRegsToMakeInactive; + delayRegsToMakeInactive.Clear(); } #ifdef DEBUG @@ -5668,30 +5778,30 @@ void LinearScan::allocateRegisters() { // CopyRegs are simply made available - we don't want to make the associated interval inactive. makeRegsAvailable(copyRegsToFree); - copyRegsToFree = RBM_NONE; + copyRegsToFree.Clear(); regsInUseThisLocation = regsInUseNextLocation; - regsInUseNextLocation = RBM_NONE; + regsInUseNextLocation.Clear(); #ifdef TARGET_ARM64 if (hasConsecutiveRegister) { consecutiveRegsInUseThisLocation = RBM_NONE; } #endif - if ((regsToFree | delayRegsToFree) != RBM_NONE) + if (!((regsToFree | delayRegsToFree).IsEmpty())) { freeRegisters(regsToFree); - if ((currentLocation > (prevLocation + 1)) && (delayRegsToFree != RBM_NONE)) + if ((currentLocation > (prevLocation + 1)) && !delayRegsToFree.IsEmpty()) { // We should never see a delayReg that is delayed until a Location that has no RefPosition // (that would be the RefPosition that it was supposed to interfere with). assert(!"Found a delayRegFree associated with Location with no reference"); // However, to be cautious for the Release build case, we will free them. freeRegisters(delayRegsToFree); - delayRegsToFree = RBM_NONE; - regsInUseThisLocation = RBM_NONE; + delayRegsToFree.Clear(); + regsInUseThisLocation.Clear(); } - regsToFree = delayRegsToFree; - delayRegsToFree = RBM_NONE; + regsToFree = delayRegsToFree; + delayRegsToFree.Clear(); #ifdef DEBUG verifyFreeRegisters(regsToFree); #endif @@ -5750,11 +5860,11 @@ void LinearScan::allocateRegisters() { // Free any delayed regs (now in regsToFree) before processing the block boundary freeRegisters(regsToFree); - regsToFree = RBM_NONE; - regsInUseThisLocation = RBM_NONE; - regsInUseNextLocation = RBM_NONE; - handledBlockEnd = true; - curBBStartLocation = currentRefPosition.nodeLocation; + regsToFree.Clear(); + regsInUseThisLocation.Clear(); + regsInUseNextLocation.Clear(); + handledBlockEnd = true; + curBBStartLocation = currentRefPosition.nodeLocation; if (currentBlock == nullptr) { currentBlock = startBlockSequence(); @@ -5815,13 +5925,14 @@ void LinearScan::allocateRegisters() } #endif // TARGET_ARM } - regsInUseThisLocation |= currentRefPosition.registerAssignment; + regsInUseThisLocation.AddRegMaskForType(currentRefPosition.registerAssignment, regRecord->registerType); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition.assignedReg())); #ifdef SWIFT_SUPPORT if (currentRefPosition.delayRegFree) { - regsInUseNextLocation |= currentRefPosition.registerAssignment; + regsInUseNextLocation.AddRegMaskForType(currentRefPosition.registerAssignment, + regRecord->registerType); } #endif // SWIFT_SUPPORT } @@ -5969,7 +6080,7 @@ void LinearScan::allocateRegisters() updateSpillCost(assignedRegister, currentInterval); } - regsToFree |= getRegMask(assignedRegister, currentInterval->registerType); + regsToFree.AddRegNum(assignedRegister, currentInterval->registerType); } INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, nullptr, assignedRegister)); currentRefPosition.registerAssignment = RBM_NONE; @@ -6105,8 +6216,8 @@ void LinearScan::allocateRegisters() INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, currentInterval, assignedRegister)); } - regMaskTP assignedRegBit = RBM_NONE; - bool isInRegister = false; + singleRegMask assignedRegBit = RBM_NONE; + bool isInRegister = false; if (assignedRegister != REG_NA) { isInRegister = true; @@ -6152,9 +6263,9 @@ void LinearScan::allocateRegisters() // it might be beneficial to keep it in this reg for PART of the lifetime if (currentInterval->isLocalVar) { - regMaskTP preferences = currentInterval->registerPreferences; - bool keepAssignment = true; - bool matchesPreferences = (preferences & genRegMask(assignedRegister)) != RBM_NONE; + regMaskOnlyOne preferences = currentInterval->registerPreferences; + bool keepAssignment = true; + bool matchesPreferences = (preferences & genRegMask(assignedRegister)) != RBM_NONE; // Will the assigned register cover the lifetime? If not, does it at least // meet the preferences for the next RefPosition? @@ -6260,9 +6371,10 @@ void LinearScan::allocateRegisters() if (copyReg != assignedRegister) { - lastAllocatedRefPosition = ¤tRefPosition; - regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); - regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); + lastAllocatedRefPosition = ¤tRefPosition; + regMaskOnlyOne copyRegMask = getRegMask(copyReg, currentInterval->registerType); + regMaskOnlyOne assignedRegMask = + getRegMask(assignedRegister, currentInterval->registerType); if ((consecutiveRegsInUseThisLocation & assignedRegMask) != RBM_NONE) { @@ -6281,11 +6393,12 @@ void LinearScan::allocateRegisters() // we will copy the entire vector value from it to the `copyReg`. updateRegsFreeBusyState(currentRefPosition, assignedRegMask | copyRegMask, ®sToFree, - &delayRegsToFree DEBUG_ARG(currentInterval) + &delayRegsToFree, + currentInterval->registerType DEBUG_ARG(currentInterval) DEBUG_ARG(assignedRegister)); if (!currentRefPosition.lastUse) { - copyRegsToFree |= copyRegMask; + copyRegsToFree.AddRegNum(copyReg, currentInterval->registerType); } // If this is a tree temp (non-localVar) interval, we will need an explicit move. @@ -6358,9 +6471,9 @@ void LinearScan::allocateRegisters() copyReg = assignCopyReg(¤tRefPosition); } - lastAllocatedRefPosition = ¤tRefPosition; - regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); - regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); + lastAllocatedRefPosition = ¤tRefPosition; + regMaskOnlyOne copyRegMask = getRegMask(copyReg, currentInterval->registerType); + regMaskOnlyOne assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); #ifdef TARGET_ARM64 if (hasConsecutiveRegister && currentRefPosition.needsConsecutive) @@ -6390,10 +6503,12 @@ void LinearScan::allocateRegisters() // happened to be restored in assignedReg, we would need assignedReg to stay alive because // we will copy the entire vector value from it to the `copyReg`. updateRegsFreeBusyState(currentRefPosition, assignedRegMask | copyRegMask, ®sToFree, - &delayRegsToFree DEBUG_ARG(currentInterval) DEBUG_ARG(assignedRegister)); + &delayRegsToFree, + currentInterval->registerType DEBUG_ARG(currentInterval) + DEBUG_ARG(assignedRegister)); if (!currentRefPosition.lastUse) { - copyRegsToFree |= copyRegMask; + copyRegsToFree.AddRegNum(copyReg, currentInterval->registerType); } // If this is a tree temp (non-localVar) interval, we will need an explicit move. @@ -6414,7 +6529,7 @@ void LinearScan::allocateRegisters() else { INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NEEDS_NEW_REG, nullptr, assignedRegister)); - regsToFree |= getRegMask(assignedRegister, currentInterval->registerType); + regsToFree.AddRegNum(assignedRegister, currentInterval->registerType); // We want a new register, but we don't want this to be considered a spill. assignedRegister = REG_NA; if (physRegRecord->assignedInterval == currentInterval) @@ -6581,17 +6696,19 @@ void LinearScan::allocateRegisters() // If we allocated a register, record it if (assignedRegister != REG_NA) { - assignedRegBit = genRegMask(assignedRegister); - regMaskTP regMask = getRegMask(assignedRegister, currentInterval->registerType); - regsInUseThisLocation |= regMask; + assignedRegBit = genRegMask(assignedRegister); + AllRegsMask assignedRegMask; + assignedRegMask.AddRegNum(assignedRegister, currentInterval->registerType); + + regsInUseThisLocation |= assignedRegMask; if (currentRefPosition.delayRegFree) { - regsInUseNextLocation |= regMask; + regsInUseNextLocation |= assignedRegMask; } currentRefPosition.registerAssignment = assignedRegBit; currentInterval->physReg = assignedRegister; - regsToFree &= ~regMask; // we'll set it again later if it's dead + regsToFree &= ~assignedRegMask; // we'll set it again later if it's dead // If this interval is dead, free the register. // The interval could be dead if this is a user variable, or if the @@ -6628,11 +6745,11 @@ void LinearScan::allocateRegisters() { if (currentRefPosition.delayRegFree) { - delayRegsToMakeInactive |= regMask; + delayRegsToMakeInactive |= assignedRegMask; } else { - regsToMakeInactive |= regMask; + regsToMakeInactive |= assignedRegMask; } // TODO-Cleanup: this makes things consistent with previous, and will enable preferences // to be propagated, but it seems less than ideal. @@ -6651,13 +6768,13 @@ void LinearScan::allocateRegisters() { if (currentRefPosition.delayRegFree) { - delayRegsToFree |= regMask; + delayRegsToFree |= assignedRegMask; INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED)); } else { - regsToFree |= regMask; + regsToFree |= assignedRegMask; INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE)); } @@ -8020,7 +8137,8 @@ void LinearScan::resolveRegisters() { // If the localVar is in a register, it must be in a register that is not trashed by // the current node (otherwise it would have already been spilled). - assert((genRegMask(localVarInterval->physReg) & getKillSetForNode(treeNode)) == RBM_NONE); + assert((genRegMask(localVarInterval->physReg) & + getKillSetForNode(treeNode).GetRegMaskForType(interval->registerType)) == RBM_NONE); // If we have allocated a register to spill it to, we will use that; otherwise, we will // spill it to the stack. We can use as a temp register any non-arg caller-save register. currentRefPosition->referent->recentRefPosition = currentRefPosition; @@ -8276,10 +8394,11 @@ void LinearScan::resolveRegisters() if (varDsc->lvIsParam) { - regMaskTP initialRegMask = interval->firstRefPosition->registerAssignment; - regNumber initialReg = (initialRegMask == RBM_NONE || interval->firstRefPosition->spillAfter) - ? REG_STK - : genRegNumFromMask(initialRegMask); + regMaskOnlyOne initialRegMask = interval->firstRefPosition->registerAssignment; + regNumber initialReg = + (initialRegMask == RBM_NONE || interval->firstRefPosition->spillAfter) + ? REG_STK + : genRegNumFromMask(initialRegMask MORE_THAN_64_REG_ARG(interval->registerType)); #ifdef TARGET_ARM if (varTypeIsMultiReg(varDsc)) @@ -8365,7 +8484,7 @@ void LinearScan::resolveRegisters() varDsc->lvOnFrame = false; } #ifdef DEBUG - regMaskTP registerAssignment = genRegMask(varDsc->GetRegNum()); + singleRegMask registerAssignment = genRegMask(varDsc->GetRegNum()); assert(!interval->isSpilled && !interval->isSplit); RefPosition* refPosition = interval->firstRefPosition; assert(refPosition != nullptr); @@ -8604,7 +8723,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type, VARSET_VALARG_TP sharedCriticalLiveSet, - regMaskTP terminatorConsumedRegs) + regMaskOnlyOne terminatorConsumedRegs) { // TODO-Throughput: This would be much more efficient if we add RegToVarMaps instead of VarToRegMaps // and they would be more space-efficient as well. @@ -8612,7 +8731,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, VarToRegMap toVarToRegMap = toBlock == nullptr ? nullptr : getInVarToRegMap(toBlock->bbNum); #ifdef TARGET_ARM - regMaskTP freeRegs; + regMaskOnlyOne freeRegs; if (type == TYP_DOUBLE) { // We have to consider all float registers for TYP_DOUBLE @@ -8623,7 +8742,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, freeRegs = allRegs(type); } #else // !TARGET_ARM - regMaskTP freeRegs = allRegs(type); + regMaskOnlyOne freeRegs = allRegs(type); #endif // !TARGET_ARM #ifdef DEBUG @@ -8632,7 +8751,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, return REG_NA; } #endif // DEBUG - INDEBUG(freeRegs = stressLimitRegs(nullptr, freeRegs)); + INDEBUG(freeRegs = stressLimitRegs(nullptr, freeRegs, type)); freeRegs &= ~terminatorConsumedRegs; @@ -8693,13 +8812,33 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, } else { + regMaskOnlyOne calleeTrashMask = RBM_NONE; + + if (varTypeUsesIntReg(type)) + { + calleeTrashMask = RBM_INT_CALLEE_TRASH; + assert(compiler->IsGprRegMask(terminatorConsumedRegs)); + } +#ifdef FEATURE_MASKED_HW_INTRINSICS + else if (varTypeUsesMaskReg(type)) + { + calleeTrashMask = RBM_MSK_CALLEE_TRASH; + assert(compiler->IsPredicateRegMask(terminatorConsumedRegs)); + } +#endif + else + { + assert(varTypeUsesFloatReg(type)); + calleeTrashMask = RBM_FLT_CALLEE_TRASH; + assert(compiler->IsFloatRegMask(terminatorConsumedRegs)); + } // Prefer a callee-trashed register if possible to prevent new prolog/epilog saves/restores. - if ((freeRegs & RBM_CALLEE_TRASH) != 0) + if ((freeRegs & calleeTrashMask) != 0) { - freeRegs &= RBM_CALLEE_TRASH; + freeRegs &= calleeTrashMask; } - regNumber tempReg = genRegNumFromMask(genFindLowestBit(freeRegs)); + regNumber tempReg = genRegNumFromMask(genFindLowestBit(freeRegs) MORE_THAN_64_REG_ARG(type)); return tempReg; } } @@ -8890,7 +9029,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) // available to copy into. // Note that for this purpose we use the full live-out set, because we must ensure that // even the registers that remain the same across the edge are preserved correctly. - regMaskTP liveOutRegs = RBM_NONE; + AllRegsMask liveOutRegs; VarSetOps::Iter liveOutIter(compiler, block->bbLiveOut); unsigned liveOutVarIndex = 0; while (liveOutIter.NextElem(&liveOutVarIndex)) @@ -8898,8 +9037,8 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) regNumber fromReg = getVarReg(outVarToRegMap, liveOutVarIndex); if (fromReg != REG_STK) { - regMaskTP fromRegMask = genRegMask(fromReg, getIntervalForLocalVar(liveOutVarIndex)->registerType); - liveOutRegs |= fromRegMask; + var_types varType = getIntervalForLocalVar(liveOutVarIndex)->registerType; + liveOutRegs.AddRegNumInMask(fromReg ARM_ARG(varType)); } } @@ -8910,7 +9049,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) // // Note: Only switches and JCMP/JTEST (for Arm4) have input regs (and so can be fed by copies), so those // are the only block-ending branches that need special handling. - regMaskTP consumedRegs = RBM_NONE; + regMaskGpr consumedRegs = RBM_NONE; if (block->KindIs(BBJ_SWITCH)) { // At this point, Lowering has transformed any non-switch-table blocks into @@ -9004,9 +9143,11 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) } } + assert(compiler->IsGprRegMask(consumedRegs)); // If this fails, then we will have to use AllRegsMask for + // consumedRegs VarToRegMap sameVarToRegMap = sharedCriticalVarToRegMap; - regMaskTP sameWriteRegs = RBM_NONE; - regMaskTP diffReadRegs = RBM_NONE; + AllRegsMask sameWriteRegs; + AllRegsMask diffReadRegs; // For each var that may require resolution, classify them as: // - in the same register at the end of this block and at each target (no resolution needed) @@ -9058,17 +9199,20 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) // We only need to check for these cases if sameToReg is an actual register (not REG_STK). if (sameToReg != REG_NA && sameToReg != REG_STK) { + var_types outVarRegType = getIntervalForLocalVar(outResolutionSetVarIndex)->registerType; + // If there's a path on which this var isn't live, it may use the original value in sameToReg. // In this case, sameToReg will be in the liveOutRegs of this block. // Similarly, if sameToReg is in sameWriteRegs, it has already been used (i.e. for a lclVar that's // live only at another target), and we can't copy another lclVar into that reg in this block. - regMaskTP sameToRegMask = - genRegMask(sameToReg, getIntervalForLocalVar(outResolutionSetVarIndex)->registerType); - if (maybeSameLivePaths && - (((sameToRegMask & liveOutRegs) != RBM_NONE) || ((sameToRegMask & sameWriteRegs) != RBM_NONE))) + regMaskOnlyOne sameToRegMask = genRegMask(sameToReg, outVarRegType); + + if (maybeSameLivePaths && (liveOutRegs.IsRegNumInMask(sameToReg ARM_ARG(outVarRegType)) || + sameWriteRegs.IsRegNumInMask(sameToReg ARM_ARG(outVarRegType)) != RBM_NONE)) { sameToReg = REG_NA; } + // If this register is busy because it is used by a switch table at the end of the block // (or for Arm64, it is consumed by JCMP), we can't do the copy in this block since we can't // insert it after the switch (or for Arm64, can't insert and overwrite the operand/source @@ -9107,7 +9251,8 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) VarSetOps::AddElemD(compiler, diffResolutionSet, outResolutionSetVarIndex); if (fromReg != REG_STK) { - diffReadRegs |= genRegMask(fromReg, getIntervalForLocalVar(outResolutionSetVarIndex)->registerType); + diffReadRegs.AddRegNumInMask( + fromReg ARM_ARG(getIntervalForLocalVar(outResolutionSetVarIndex)->registerType)); } } else if (sameToReg != fromReg) @@ -9116,14 +9261,15 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) setVarReg(sameVarToRegMap, outResolutionSetVarIndex, sameToReg); if (sameToReg != REG_STK) { - sameWriteRegs |= genRegMask(sameToReg, getIntervalForLocalVar(outResolutionSetVarIndex)->registerType); + sameWriteRegs.AddRegNumInMask( + sameToReg ARM_ARG(getIntervalForLocalVar(outResolutionSetVarIndex)->registerType)); } } } if (!VarSetOps::IsEmpty(compiler, sameResolutionSet)) { - if ((sameWriteRegs & diffReadRegs) != RBM_NONE) + if (!((sameWriteRegs & diffReadRegs).IsEmpty())) { // We cannot split the "same" and "diff" regs if the "same" set writes registers // that must be read by the "diff" set. (Note that when these are done as a "batch" @@ -9438,8 +9584,10 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, BasicBlock* toBlock, ResolveType resolveType, VARSET_VALARG_TP liveSet, - regMaskTP terminatorConsumedRegs) + regMaskGpr terminatorConsumedRegs) { + assert(compiler->IsGprRegMask(terminatorConsumedRegs)); + VarToRegMap fromVarToRegMap = getOutVarToRegMap(fromBlock->bbNum); VarToRegMap toVarToRegMap; if (resolveType == ResolveSharedCritical) @@ -9494,7 +9642,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, { #ifdef TARGET_ARM // Try to reserve a double register for TYP_DOUBLE and use it for TYP_FLOAT too if available. - tempRegDbl = getTempRegForResolution(fromBlock, toBlock, TYP_DOUBLE, liveSet, terminatorConsumedRegs); + tempRegDbl = getTempRegForResolution(fromBlock, toBlock, TYP_DOUBLE, liveSet, RBM_NONE); if (tempRegDbl != REG_NA) { tempRegFlt = tempRegDbl; @@ -9502,13 +9650,13 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, else #endif // TARGET_ARM { - tempRegFlt = getTempRegForResolution(fromBlock, toBlock, TYP_FLOAT, liveSet, terminatorConsumedRegs); + tempRegFlt = getTempRegForResolution(fromBlock, toBlock, TYP_FLOAT, liveSet, RBM_NONE); } } - regMaskTP targetRegsToDo = RBM_NONE; - regMaskTP targetRegsReady = RBM_NONE; - regMaskTP targetRegsFromStack = RBM_NONE; + AllRegsMask targetRegsToDo; + AllRegsMask targetRegsReady; + AllRegsMask targetRegsFromStack; // The following arrays capture the location of the registers as they are moved: // - location[reg] gives the current location of the var that was originally in 'reg'. @@ -9615,7 +9763,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, if (fromReg == REG_STK) { stackToRegIntervals[toReg] = interval; - targetRegsFromStack |= genRegMask(toReg); + targetRegsFromStack |= toReg; } else if (toReg == REG_STK) { @@ -9629,19 +9777,17 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, location[fromReg] = (regNumberSmall)fromReg; source[toReg] = (regNumberSmall)fromReg; sourceIntervals[fromReg] = interval; - targetRegsToDo |= genRegMask(toReg); + targetRegsToDo |= toReg; } } // REGISTER to REGISTER MOVES // First, find all the ones that are ready to move now - regMaskTP targetCandidates = targetRegsToDo; - while (targetCandidates != RBM_NONE) + AllRegsMask targetCandidates = targetRegsToDo; + while (!targetCandidates.IsEmpty()) { - regNumber targetReg = genFirstRegNumFromMask(targetCandidates); - regMaskTP targetRegMask = genRegMask(targetReg); - targetCandidates ^= targetRegMask; + regNumber targetReg = genFirstRegNumFromMaskAndToggle(targetCandidates); if (location[targetReg] == REG_NA) { #ifdef TARGET_ARM @@ -9654,26 +9800,24 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, regNumber anotherHalfRegNum = REG_NEXT(targetReg); if (location[anotherHalfRegNum] == REG_NA) { - targetRegsReady |= targetRegMask; + targetRegsReady |= targetReg; } } else #endif // TARGET_ARM { - targetRegsReady |= targetRegMask; + targetRegsReady |= targetReg; } } } // Perform reg to reg moves - while (targetRegsToDo != RBM_NONE) + while (!targetRegsToDo.IsEmpty()) { - while (targetRegsReady != RBM_NONE) + while (!targetRegsReady.IsEmpty()) { - regNumber targetReg = genFirstRegNumFromMask(targetRegsReady); - regMaskTP targetRegMask = genRegMask(targetReg); - targetRegsToDo ^= targetRegMask; - targetRegsReady ^= targetRegMask; + regNumber targetReg = genFirstRegNumFromMaskAndToggle(targetRegsReady); + targetRegsToDo ^= targetReg; assert(location[targetReg] != targetReg); assert(targetReg < REG_COUNT); regNumber sourceReg = (regNumber)source[targetReg]; @@ -9687,14 +9831,14 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, fromReg DEBUG_ARG(fromBlock) DEBUG_ARG(toBlock) DEBUG_ARG(resolveTypeName[resolveType])); sourceIntervals[sourceReg] = nullptr; location[sourceReg] = REG_NA; - regMaskTP fromRegMask = genRegMask(fromReg); + singleRegMask fromRegMask = genRegMask(fromReg); // Do we have a free targetReg? if (fromReg == sourceReg) { - if (source[fromReg] != REG_NA && ((targetRegsFromStack & fromRegMask) != fromRegMask)) + if (source[fromReg] != REG_NA && !targetRegsFromStack.IsRegNumInMask(fromReg)) { - targetRegsReady |= fromRegMask; + targetRegsReady |= fromReg; #ifdef TARGET_ARM if (genIsValidDoubleReg(fromReg)) { @@ -9705,7 +9849,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, regNumber upperHalfReg = REG_NEXT(fromReg); if ((otherInterval->registerType == TYP_DOUBLE) && (location[upperHalfReg] != REG_NA)) { - targetRegsReady &= ~fromRegMask; + targetRegsReady.RemoveRegNumFromMask(fromReg); } } } @@ -9713,10 +9857,10 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, { // We may have freed up the other half of a double where the lower half // was already free. - regNumber lowerHalfReg = REG_PREV(fromReg); - regNumber lowerHalfSrcReg = (regNumber)source[lowerHalfReg]; - regNumber lowerHalfSrcLoc = (regNumber)location[lowerHalfReg]; - regMaskTP lowerHalfRegMask = genRegMask(lowerHalfReg); + regNumber lowerHalfReg = REG_PREV(fromReg); + regNumber lowerHalfSrcReg = (regNumber)source[lowerHalfReg]; + regNumber lowerHalfSrcLoc = (regNumber)location[lowerHalfReg]; + singleRegMask lowerHalfRegMask = genRegMask(lowerHalfReg); // Necessary conditions: // - There is a source register for this reg (lowerHalfSrcReg != REG_NA) // - It is currently free (lowerHalfSrcLoc == REG_NA) @@ -9727,22 +9871,21 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, // lowerHalfRegMask) if ((lowerHalfSrcReg != REG_NA) && (lowerHalfSrcLoc == REG_NA) && (sourceIntervals[lowerHalfSrcReg] != nullptr) && - ((targetRegsReady & lowerHalfRegMask) == RBM_NONE) && - ((targetRegsFromStack & lowerHalfRegMask) != lowerHalfRegMask)) + !targetRegsReady.IsRegNumInMask(lowerHalfReg) && + !targetRegsFromStack.IsRegNumInMask(lowerHalfReg)) { // This must be a double interval, otherwise it would be in targetRegsReady, or already // completed. assert(sourceIntervals[lowerHalfSrcReg]->registerType == TYP_DOUBLE); - targetRegsReady |= lowerHalfRegMask; + targetRegsReady |= lowerHalfReg; } #endif // TARGET_ARM } } } - if (targetRegsToDo != RBM_NONE) + if (!targetRegsToDo.IsEmpty()) { - regNumber targetReg = genFirstRegNumFromMask(targetRegsToDo); - regMaskTP targetRegMask = genRegMask(targetReg); + regNumber targetReg = genFirstRegNumFromMask(targetRegsToDo); // Is it already there due to other moves? // If not, move it to the temp reg, OR swap it with another register @@ -9750,7 +9893,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, regNumber fromReg = (regNumber)location[sourceReg]; if (targetReg == fromReg) { - targetRegsToDo &= ~targetRegMask; + targetRegsToDo.RemoveRegNumFromMask(targetReg); } else { @@ -9793,16 +9936,15 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, // Otherwise, we'll spill it to the stack and reload it later. if (useSwap) { - regMaskTP fromRegMask = genRegMask(fromReg); - targetRegsToDo &= ~fromRegMask; + targetRegsToDo.RemoveRegNumFromMask(fromReg); } } else { // Look at the remaining registers from targetRegsToDo (which we expect to be relatively // small at this point) to find out what's currently in targetReg. - regMaskTP mask = targetRegsToDo; - while (mask != RBM_NONE && otherTargetReg == REG_NA) + AllRegsMask mask = targetRegsToDo; + while (!mask.IsEmpty() && otherTargetReg == REG_NA) { regNumber nextReg = genFirstRegNumFromMaskAndToggle(mask); if (location[source[nextReg]] == targetReg) @@ -9838,10 +9980,9 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, DEBUG_ARG(resolveTypeName[resolveType])); location[source[otherTargetReg]] = REG_STK; - regMaskTP otherTargetRegMask = genRegMask(otherTargetReg); - targetRegsFromStack |= otherTargetRegMask; + targetRegsFromStack |= otherTargetReg; stackToRegIntervals[otherTargetReg] = otherInterval; - targetRegsToDo &= ~otherTargetRegMask; + targetRegsToDo.RemoveRegNumFromMask(otherTargetReg); // Now, move the interval that is going to targetReg. addResolution(block, insertionPoint, sourceIntervals[sourceReg], targetReg, @@ -9855,8 +9996,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, // value will be retrieved from STK. if (source[fromReg] != REG_NA && fromReg != otherTargetReg) { - regMaskTP fromRegMask = genRegMask(fromReg); - targetRegsReady |= fromRegMask; + targetRegsReady |= fromReg; #ifdef TARGET_ARM if (genIsValidDoubleReg(fromReg)) { @@ -9867,17 +10007,17 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, regNumber upperHalfReg = REG_NEXT(fromReg); if ((otherInterval->registerType == TYP_DOUBLE) && (location[upperHalfReg] != REG_NA)) { - targetRegsReady &= ~fromRegMask; + targetRegsReady.RemoveRegNumFromMask(fromReg); } } #endif // TARGET_ARM } } - targetRegsToDo &= ~targetRegMask; + targetRegsToDo.RemoveRegNumFromMask(targetReg); } else { - compiler->codeGen->regSet.rsSetRegsModified(genRegMask(tempReg) DEBUGARG(true)); + compiler->codeGen->regSet.rsSetRegModified(tempReg DEBUGARG(true)); #ifdef TARGET_ARM if (sourceIntervals[fromReg]->registerType == TYP_DOUBLE) { @@ -9897,7 +10037,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, DEBUG_ARG(resolveTypeName[resolveType])); location[targetReg] = (regNumberSmall)tempReg; } - targetRegsReady |= targetRegMask; + targetRegsReady |= targetReg; } } } @@ -9905,7 +10045,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, // Finally, perform stack to reg moves // All the target regs will be empty at this point - while (targetRegsFromStack != RBM_NONE) + while (!targetRegsFromStack.IsEmpty()) { regNumber targetReg = genFirstRegNumFromMaskAndToggle(targetRegsFromStack); @@ -10280,7 +10420,17 @@ void RefPosition::dump(LinearScan* linearScan) printf(FMT_BB " ", this->bbNum); printf("regmask="); - linearScan->compiler->dumpRegMask(registerAssignment); + var_types type = TYP_UNKNOWN; + if ((refType == RefTypeBB) || (refType == RefTypeKillGCRefs)) + { + // These refTypes do not have intervals + type = TYP_INT; + } + else + { + type = getRegisterType(); + } + linearScan->compiler->dumpRegMask(registerAssignment, type); printf(" minReg=%d", minRegCandidateCount); @@ -10416,10 +10566,10 @@ void Interval::dump(Compiler* compiler) printf(" physReg:%s", getRegName(physReg)); printf(" Preferences="); - compiler->dumpRegMask(this->registerPreferences); + compiler->dumpRegMask(this->registerPreferences, this->registerType); printf(" Aversions="); - compiler->dumpRegMask(this->registerAversion); + compiler->dumpRegMask(this->registerAversion, this->registerType); if (relatedInterval) { printf(" RelatedInterval "); @@ -10974,7 +11124,7 @@ void LinearScan::dumpLsraAllocationEvent( } if ((interval != nullptr) && (reg != REG_NA) && (reg != REG_STK)) { - registersToDump |= getRegMask(reg, interval->registerType); + registersToDump.AddRegNum(reg, interval->registerType); dumpRegRecordTitleIfNeeded(); } @@ -11312,7 +11462,7 @@ void LinearScan::dumpRegRecordHeader() regColumnWidth + 1); // Print a "title row" including the legend and the reg names. - lastDumpedRegisters = RBM_NONE; + lastDumpedRegisters.Clear(); dumpRegRecordTitleIfNeeded(); } @@ -11321,10 +11471,14 @@ void LinearScan::dumpRegRecordTitleIfNeeded() if ((lastDumpedRegisters != registersToDump) || (rowCountSinceLastTitle > MAX_ROWS_BETWEEN_TITLES)) { lastUsedRegNumIndex = 0; +#ifdef FEATURE_MASKED_HW_INTRINSICS + int lastRegNumIndex = compiler->compFloatingPointUsed ? REG_MASK_LAST : REG_INT_LAST; +#else int lastRegNumIndex = compiler->compFloatingPointUsed ? REG_FP_LAST : REG_INT_LAST; +#endif for (int regNumIndex = 0; regNumIndex <= lastRegNumIndex; regNumIndex++) { - if ((registersToDump & genRegMask((regNumber)regNumIndex)) != 0) + if (registersToDump.IsRegNumInMask((regNumber)regNumIndex)) { lastUsedRegNumIndex = regNumIndex; } @@ -11404,7 +11558,7 @@ void LinearScan::dumpRegRecords() #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE printf("%c", activeChar); } - else if ((genRegMask(regNum) & regsBusyUntilKill) != RBM_NONE) + else if (regsBusyUntilKill.IsRegNumInMask(regNum)) { printf(columnFormatArray, "Busy"); } @@ -11634,14 +11788,28 @@ bool LinearScan::IsResolutionNode(LIR::Range& containingRange, GenTree* node) // Arguments: // regsToFree - Registers that were just freed. // -void LinearScan::verifyFreeRegisters(regMaskTP regsToFree) +void LinearScan::verifyFreeRegisters(CONSTREF_AllRegsMask regsToFree) { + regMaskOnlyOne regsMaskToFree = regsToFree.gprRegs(); + regMaskOnlyOne availableRegsMask = availableIntRegs; for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) { - regMaskTP regMask = genRegMask(reg); + if (reg >= REG_FP_FIRST && reg <= REG_FP_LAST) + { + regsMaskToFree = regsToFree.floatRegs(compiler); + availableRegsMask = availableFloatRegs; + } +#ifdef FEATURE_MASKED_HW_INTRINSICS + else if (reg >= REG_MASK_FIRST && reg <= REG_MASK_LAST) + { + regsMaskToFree = regsToFree.predicateRegs(compiler); + availableRegsMask = availableMaskRegs; + } +#endif + singleRegMask regMask = genRegMask(reg); // If this isn't available or if it's still waiting to be freed (i.e. it was in // delayRegsToFree and so now it's in regsToFree), then skip it. - if ((regMask & allAvailableRegs & ~regsToFree) == RBM_NONE) + if ((regMask & availableRegsMask & ~regsMaskToFree) == RBM_NONE) { continue; } @@ -12085,10 +12253,11 @@ void LinearScan::verifyFinalAllocation() // However, we will assert that, at resolution time, no registers contain GC refs. { DBEXEC(VERBOSE, printf(" ")); - regMaskTP candidateRegs = currentRefPosition.registerAssignment; + regMaskOnlyOne candidateRegs = currentRefPosition.registerAssignment; while (candidateRegs != RBM_NONE) { - regNumber nextReg = genFirstRegNumFromMaskAndToggle(candidateRegs); + regNumber nextReg = + genFirstRegNumFromMaskAndToggle(candidateRegs MORE_THAN_64_REG_ARG(TYP_INT)); RegRecord* regRecord = getRegisterRecord(nextReg); Interval* assignedInterval = regRecord->assignedInterval; @@ -12469,9 +12638,11 @@ void LinearScan::RegisterSelection::reset(Interval* interval, RefPosition* refPo // Return Values: // 'true' if there was a single register candidate available after the heuristic is applied. // -bool LinearScan::RegisterSelection::applySelection(int selectionScore, regMaskTP selectionCandidates) +bool LinearScan::RegisterSelection::applySelection(int selectionScore, regMaskOnlyOne selectionCandidates) { - regMaskTP newCandidates = candidates & selectionCandidates; + assert(linearScan->compiler->IsOnlyOneRegMask(selectionCandidates)); + + regMaskOnlyOne newCandidates = candidates & selectionCandidates; if (newCandidates != RBM_NONE) { candidates = newCandidates; @@ -12490,10 +12661,12 @@ bool LinearScan::RegisterSelection::applySelection(int selectionScore, regMaskTP // Return Values: // 'true' if there was a single register candidate available after the heuristic is applied. // -bool LinearScan::RegisterSelection::applySingleRegSelection(int selectionScore, regMaskTP selectionCandidate) +bool LinearScan::RegisterSelection::applySingleRegSelection(int selectionScore, regMaskOnlyOne selectionCandidate) { + assert(linearScan->compiler->IsOnlyOneRegMask(selectionCandidate)); assert(LinearScan::isSingleRegister(selectionCandidate)); - regMaskTP newCandidates = candidates & selectionCandidate; + + regMaskOnlyOne newCandidates = candidates & selectionCandidate; if (newCandidates != RBM_NONE) { candidates = newCandidates; @@ -12540,7 +12713,7 @@ void LinearScan::RegisterSelection::try_CONST_AVAILABLE() if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType)) { - regMaskTP newCandidates = candidates & matchingConstants; + regMaskOnlyOne newCandidates = candidates & matchingConstants; if (newCandidates != RBM_NONE) { candidates = newCandidates; @@ -12701,7 +12874,7 @@ void LinearScan::RegisterSelection::try_COVERS_FULL() calculateCoversSets(); #endif - regMaskTP newCandidates = candidates & coversFullSet & freeCandidates; + regMaskOnlyOne newCandidates = candidates & coversFullSet & freeCandidates; if (newCandidates != RBM_NONE) { candidates = newCandidates; @@ -12725,15 +12898,15 @@ void LinearScan::RegisterSelection::try_BEST_FIT() } #endif - regMaskTP bestFitSet = RBM_NONE; + regMaskOnlyOne bestFitSet = RBM_NONE; // If the best score includes COVERS_FULL, pick the one that's killed soonest. // If none cover the full range, the BEST_FIT is the one that's killed later. bool earliestIsBest = coversFullApplied; LsraLocation bestFitLocation = earliestIsBest ? MaxLocation : MinLocation; - for (regMaskTP bestFitCandidates = candidates; bestFitCandidates != RBM_NONE;) + for (regMaskOnlyOne bestFitCandidates = candidates; bestFitCandidates != RBM_NONE;) { - regNumber bestFitCandidateRegNum = genFirstRegNumFromMask(bestFitCandidates); - regMaskTP bestFitCandidateBit = genRegMask(bestFitCandidateRegNum); + regNumber bestFitCandidateRegNum = genFirstRegNumFromMask(bestFitCandidates MORE_THAN_64_REG_ARG(regType)); + singleRegMask bestFitCandidateBit = genRegMask(bestFitCandidateRegNum); bestFitCandidates ^= bestFitCandidateBit; // Find the next RefPosition of the register. @@ -12827,12 +13000,12 @@ void LinearScan::RegisterSelection::try_REG_ORDER() // This will always result in a single candidate. That is, it is the tie-breaker // for free candidates, and doesn't make sense as anything other than the last // heuristic for free registers. - unsigned lowestRegOrder = UINT_MAX; - regMaskTP lowestRegOrderBit = RBM_NONE; - for (regMaskTP regOrderCandidates = candidates; regOrderCandidates != RBM_NONE;) + unsigned lowestRegOrder = UINT_MAX; + regMaskOnlyOne lowestRegOrderBit = RBM_NONE; + for (regMaskOnlyOne regOrderCandidates = candidates; regOrderCandidates != RBM_NONE;) { - regNumber regOrderCandidateRegNum = genFirstRegNumFromMask(regOrderCandidates); - regMaskTP regOrderCandidateBit = genRegMask(regOrderCandidateRegNum); + regNumber regOrderCandidateRegNum = genFirstRegNumFromMask(regOrderCandidates MORE_THAN_64_REG_ARG(regType)); + singleRegMask regOrderCandidateBit = genRegMask(regOrderCandidateRegNum); regOrderCandidates ^= regOrderCandidateBit; unsigned thisRegOrder = linearScan->getRegisterRecord(regOrderCandidateRegNum)->regOrder; @@ -12854,7 +13027,7 @@ void LinearScan::RegisterSelection::try_SPILL_COST() assert(!found); // The set of registers with the lowest spill weight. - regMaskTP lowestCostSpillSet = RBM_NONE; + regMaskOnlyOne lowestCostSpillSet = RBM_NONE; // Apply the SPILL_COST heuristic and eliminate regs that can't be spilled. // The spill weight for 'refPosition' (the one we're allocating now). @@ -12865,10 +13038,10 @@ void LinearScan::RegisterSelection::try_SPILL_COST() bool foundLowerSpillWeight = false; LsraLocation thisLocation = refPosition->nodeLocation; - for (regMaskTP spillCandidates = candidates; spillCandidates != RBM_NONE;) + for (regMaskOnlyOne spillCandidates = candidates; spillCandidates != RBM_NONE;) { - regNumber spillCandidateRegNum = genFirstRegNumFromMask(spillCandidates); - regMaskTP spillCandidateBit = genRegMask(spillCandidateRegNum); + regNumber spillCandidateRegNum = genFirstRegNumFromMask(spillCandidates MORE_THAN_64_REG_ARG(regType)); + singleRegMask spillCandidateBit = genRegMask(spillCandidateRegNum); spillCandidates ^= spillCandidateBit; RegRecord* spillCandidateRegRecord = &linearScan->physRegs[spillCandidateRegNum]; @@ -12988,12 +13161,12 @@ void LinearScan::RegisterSelection::try_FAR_NEXT_REF() { assert(!found); - LsraLocation farthestLocation = MinLocation; - regMaskTP farthestSet = RBM_NONE; - for (regMaskTP farthestCandidates = candidates; farthestCandidates != RBM_NONE;) + LsraLocation farthestLocation = MinLocation; + regMaskOnlyOne farthestSet = RBM_NONE; + for (regMaskOnlyOne farthestCandidates = candidates; farthestCandidates != RBM_NONE;) { - regNumber farthestCandidateRegNum = genFirstRegNumFromMask(farthestCandidates); - regMaskTP farthestCandidateBit = genRegMask(farthestCandidateRegNum); + regNumber farthestCandidateRegNum = genFirstRegNumFromMask(farthestCandidates MORE_THAN_64_REG_ARG(regType)); + singleRegMask farthestCandidateBit = genRegMask(farthestCandidateRegNum); farthestCandidates ^= farthestCandidateBit; // Find the next RefPosition of the register. @@ -13022,11 +13195,12 @@ void LinearScan::RegisterSelection::try_PREV_REG_OPT() { assert(!found); - regMaskTP prevRegOptSet = RBM_NONE; - for (regMaskTP prevRegOptCandidates = candidates; prevRegOptCandidates != RBM_NONE;) + regMaskOnlyOne prevRegOptSet = RBM_NONE; + for (regMaskOnlyOne prevRegOptCandidates = candidates; prevRegOptCandidates != RBM_NONE;) { - regNumber prevRegOptCandidateRegNum = genFirstRegNumFromMask(prevRegOptCandidates); - regMaskTP prevRegOptCandidateBit = genRegMask(prevRegOptCandidateRegNum); + regNumber prevRegOptCandidateRegNum = + genFirstRegNumFromMask(prevRegOptCandidates MORE_THAN_64_REG_ARG(regType)); + singleRegMask prevRegOptCandidateBit = genRegMask(prevRegOptCandidateRegNum); prevRegOptCandidates ^= prevRegOptCandidateBit; Interval* assignedInterval = linearScan->physRegs[prevRegOptCandidateRegNum].assignedInterval; bool foundPrevRegOptReg = true; @@ -13125,11 +13299,11 @@ void LinearScan::RegisterSelection::calculateUnassignedSets() // TODO: Seperate return; } - regMaskTP coversCandidates = candidates; + regMaskOnlyOne coversCandidates = candidates; for (; coversCandidates != RBM_NONE;) { - regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates); - regMaskTP coversCandidateBit = genRegMask(coversCandidateRegNum); + regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates MORE_THAN_64_REG_ARG(regType)); + singleRegMask coversCandidateBit = genRegMask(coversCandidateRegNum); coversCandidates ^= coversCandidateBit; // The register is considered unassigned if it has no assignedInterval, OR @@ -13152,12 +13326,12 @@ void LinearScan::RegisterSelection::calculateCoversSets() return; } - preferenceSet = (candidates & preferences); - regMaskTP coversCandidates = (preferenceSet == RBM_NONE) ? candidates : preferenceSet; + preferenceSet = (candidates & preferences); + regMaskOnlyOne coversCandidates = (preferenceSet == RBM_NONE) ? candidates : preferenceSet; for (; coversCandidates != RBM_NONE;) { - regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates); - regMaskTP coversCandidateBit = genRegMask(coversCandidateRegNum); + regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates MORE_THAN_64_REG_ARG(regType)); + singleRegMask coversCandidateBit = genRegMask(coversCandidateRegNum); coversCandidates ^= coversCandidateBit; // If we have a single candidate we don't need to compute the preference-related sets, but we @@ -13228,8 +13402,8 @@ void LinearScan::RegisterSelection::calculateCoversSets() // Register bit selected (a single register) and REG_NA if no register was selected. // template -regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, - RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) +singleRegMask LinearScan::RegisterSelection::select(Interval* currentInterval, + RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { #ifdef DEBUG *registerScore = NONE; @@ -13287,7 +13461,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current } #ifdef DEBUG - candidates = linearScan->stressLimitRegs(refPosition, candidates); + candidates = linearScan->stressLimitRegs(refPosition, candidates, regType); #endif assert(candidates != RBM_NONE); @@ -13322,9 +13496,9 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current nextRelatedInterval = nullptr; // First, get the preferences for this interval - regMaskTP thisRelatedPreferences = finalRelatedInterval->getCurrentPreferences(); + regMaskOnlyOne thisRelatedPreferences = finalRelatedInterval->getCurrentPreferences(); // Now, determine if they are compatible and update the relatedPreferences that we'll consider. - regMaskTP newRelatedPreferences = thisRelatedPreferences & relatedPreferences; + regMaskOnlyOne newRelatedPreferences = thisRelatedPreferences & relatedPreferences; if (newRelatedPreferences != RBM_NONE && (!avoidByteRegs || thisRelatedPreferences != RBM_BYTE_REGS)) { // TODO-CQ: The following isFree() check doesn't account for the possibility that there's an @@ -13334,8 +13508,8 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current // to achieve zero diffs. // bool thisIsSingleReg = isSingleRegister(newRelatedPreferences); - if (!thisIsSingleReg || - linearScan->isFree(linearScan->getRegisterRecord(genRegNumFromMask(newRelatedPreferences)))) + if (!thisIsSingleReg || linearScan->isFree(linearScan->getRegisterRecord( + genRegNumFromMask(newRelatedPreferences MORE_THAN_64_REG_ARG(regType))))) { relatedPreferences = newRelatedPreferences; // If this Interval has a downstream def without a single-register preference, continue to iterate. @@ -13392,12 +13566,12 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current if (preferCalleeSave) { - regMaskTP calleeSaveCandidates = linearScan->calleeSaveRegs(currentInterval->registerType); + regMaskOnlyOne calleeSaveCandidates = linearScan->calleeSaveRegs(regType); if (currentInterval->isWriteThru) { // We'll only prefer a callee-save register if it's already been used. - regMaskTP unusedCalleeSaves = - calleeSaveCandidates & ~(linearScan->compiler->codeGen->regSet.rsGetModifiedRegsMask()); + regMaskOnlyOne unusedCalleeSaves = + calleeSaveCandidates & ~linearScan->compiler->codeGen->regSet.rsGetModifiedRegsMask(regType); callerCalleePrefs = calleeSaveCandidates & ~unusedCalleeSaves; preferences &= ~unusedCalleeSaves; } @@ -13421,7 +13595,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current found = false; // Is this a fixedReg? - regMaskTP fixedRegMask = RBM_NONE; + regMaskOnlyOne fixedRegMask = RBM_NONE; if (refPosition->isFixedRegRef) { assert(genMaxOneBit(refPosition->registerAssignment)); @@ -13429,7 +13603,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current if (candidates == refPosition->registerAssignment) { found = true; - if (linearScan->nextIntervalRef[genRegNumFromMask(candidates)] > lastLocation) + if (linearScan->nextIntervalRef[genRegNumFromMask(candidates MORE_THAN_64_REG_ARG(regType))] > lastLocation) { unassignedSet = candidates; } @@ -13437,7 +13611,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current } #ifdef DEBUG - regMaskTP inUseOrBusyRegsMask = RBM_NONE; + AllRegsMask inUseOrBusyRegsMask; #endif // Eliminate candidates that are in-use or busy. @@ -13446,8 +13620,8 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current // TODO-CQ: We assign same registerAssignment to UPPER_RESTORE and the next USE. // When we allocate for USE, we see that the register is busy at current location // and we end up with that candidate is no longer available. - regMaskTP busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation; - candidates &= ~busyRegs; + AllRegsMask busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation; + candidates &= ~busyRegs.GetRegMaskForType(regType); #ifdef TARGET_ARM // For TYP_DOUBLE on ARM, we can only use an even floating-point register for which the odd half @@ -13457,7 +13631,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current // clause below creates a mask to do this. if (currentInterval->registerType == TYP_DOUBLE) { - candidates &= ~((busyRegs & RBM_ALLDOUBLE_HIGH) >> 1); + candidates &= ~((busyRegs.floatRegs(linearScan->compiler) & RBM_ALLDOUBLE_HIGH) >> 1); } #endif // TARGET_ARM @@ -13468,11 +13642,11 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current // Also eliminate as busy any register with a conflicting fixed reference at this or // the next location. // Note that this will eliminate the fixedReg, if any, but we'll add it back below. - regMaskTP checkConflictMask = candidates & linearScan->fixedRegs; + regMaskOnlyOne checkConflictMask = candidates & linearScan->fixedRegs.GetRegMaskForType(regType); while (checkConflictMask != RBM_NONE) { - regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask); - regMaskTP checkConflictBit = genRegMask(checkConflictReg); + regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask MORE_THAN_64_REG_ARG(regType)); + singleRegMask checkConflictBit = genRegMask(checkConflictReg); checkConflictMask ^= checkConflictBit; LsraLocation checkConflictLocation = linearScan->nextFixedRef[checkConflictReg]; @@ -13482,7 +13656,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current { candidates &= ~checkConflictBit; #ifdef DEBUG - inUseOrBusyRegsMask |= checkConflictBit; + inUseOrBusyRegsMask |= checkConflictReg; #endif } } @@ -13528,7 +13702,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current if (needsConsecutiveRegisters) { #ifdef TARGET_ARM64 - regMaskTP busyConsecutiveCandidates = RBM_NONE; + regMaskFloat busyConsecutiveCandidates = RBM_NONE; if (refPosition->isFirstRefPositionOfConsecutiveRegisters()) { freeCandidates = linearScan->getConsecutiveCandidates(candidates, refPosition, &busyConsecutiveCandidates); @@ -13547,7 +13721,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current // refpositions. assert((refPosition->refType == RefTypeUpperVectorRestore) || (genCountBits(candidates) == 1)); - freeCandidates = candidates & linearScan->m_AvailableRegs; + freeCandidates = candidates & linearScan->m_AvailableRegs.GetRegMaskForType(currentInterval->registerType); } if ((freeCandidates == RBM_NONE) && (candidates == RBM_NONE)) @@ -13562,16 +13736,17 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current // Remove the `inUseOrBusyRegsMask` from the original candidates list and find one // such range that is consecutive. Next, append that range to the `candidates`. // - regMaskTP limitCandidatesForConsecutive = refPosition->registerAssignment & ~inUseOrBusyRegsMask; - regMaskTP overallLimitCandidates; - regMaskTP limitConsecutiveResult = + regMaskFloat limitCandidatesForConsecutive = + refPosition->registerAssignment & ~inUseOrBusyRegsMask.floatRegs(linearScan->compiler); + regMaskFloat overallLimitCandidates; + regMaskFloat limitConsecutiveResult = linearScan->filterConsecutiveCandidates(limitCandidatesForConsecutive, refPosition->regCount, &overallLimitCandidates); assert(limitConsecutiveResult != RBM_NONE); unsigned startRegister = BitOperations::BitScanForward(limitConsecutiveResult); - regMaskTP registersNeededMask = (1ULL << refPosition->regCount) - 1; + regMaskFloat registersNeededMask = (1ULL << refPosition->regCount) - 1; candidates |= (registersNeededMask << startRegister); } @@ -13592,7 +13767,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current return RBM_NONE; } - freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType)); + freeCandidates = linearScan->getFreeCandidates(candidates, regType); } // If no free candidates, then double check if refPosition is an actual ref. @@ -13693,8 +13868,8 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current // select the REG_ORDER heuristics (if there are any free candidates) or REG_NUM (if all registers // are busy). // -regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval* currentInterval, - RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) +singleRegMask LinearScan::RegisterSelection::selectMinimal( + Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { assert(!linearScan->enregisterLocalVars); #ifdef DEBUG @@ -13743,7 +13918,7 @@ regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval* } #ifdef DEBUG - candidates = linearScan->stressLimitRegs(refPosition, candidates); + candidates = linearScan->stressLimitRegs(refPosition, candidates, regType); #endif assert(candidates != RBM_NONE); @@ -13756,7 +13931,7 @@ regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval* #endif // Is this a fixedReg? - regMaskTP fixedRegMask = RBM_NONE; + regMaskOnlyOne fixedRegMask = RBM_NONE; if (refPosition->isFixedRegRef) { assert(genMaxOneBit(refPosition->registerAssignment)); @@ -13773,8 +13948,8 @@ regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval* // TODO-CQ: We assign same registerAssignment to UPPER_RESTORE and the next USE. // When we allocate for USE, we see that the register is busy at current location // and we end up with that candidate is no longer available. - regMaskTP busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation; - candidates &= ~busyRegs; + AllRegsMask busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation; + candidates &= ~busyRegs.GetRegMaskForType(regType); #ifdef TARGET_ARM // For TYP_DOUBLE on ARM, we can only use an even floating-point register for which the odd half @@ -13784,18 +13959,18 @@ regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval* // clause below creates a mask to do this. if (currentInterval->registerType == TYP_DOUBLE) { - candidates &= ~((busyRegs & RBM_ALLDOUBLE_HIGH) >> 1); + candidates &= ~((busyRegs.floatRegs(linearScan->compiler) & RBM_ALLDOUBLE_HIGH) >> 1); } #endif // TARGET_ARM // Also eliminate as busy any register with a conflicting fixed reference at this or // the next location. // Note that this will eliminate the fixedReg, if any, but we'll add it back below. - regMaskTP checkConflictMask = candidates & linearScan->fixedRegs; + regMaskOnlyOne checkConflictMask = candidates & linearScan->fixedRegs.GetRegMaskForType(regType); while (checkConflictMask != RBM_NONE) { - regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask); - regMaskTP checkConflictBit = genRegMask(checkConflictReg); + regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask MORE_THAN_64_REG_ARG(regType)); + singleRegMask checkConflictBit = genRegMask(checkConflictReg); checkConflictMask ^= checkConflictBit; LsraLocation checkConflictLocation = linearScan->nextFixedRef[checkConflictReg]; @@ -13826,7 +14001,7 @@ regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval* return RBM_NONE; } - freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType)); + freeCandidates = linearScan->getFreeCandidates(candidates, regType); if (freeCandidates != RBM_NONE) { diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 797c9d69c91d8f..f932c68c0f6fd4 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -51,12 +51,12 @@ RegisterType regType(T type) { return IntRegisterType; } -#if (defined(TARGET_XARCH) || defined(TARGET_ARM64)) && defined(FEATURE_SIMD) +#ifdef FEATURE_MASKED_HW_INTRINSICS else if (varTypeUsesMaskReg(type)) { return MaskRegisterType; } -#endif // (TARGET_XARCH || TARGET_ARM64) && FEATURE_SIMD +#endif // FEATURE_MASKED_HW_INTRINSICS else { assert(varTypeUsesFloatReg(type)); @@ -451,11 +451,12 @@ typedef jitstd::list::reverse_iterator RefPositionReverseIterator; class Referenceable { public: - Referenceable() + Referenceable(RegisterType _registerType) { firstRefPosition = nullptr; recentRefPosition = nullptr; lastRefPosition = nullptr; + registerType = _registerType; } // A linked list of RefPositions. These are only traversed in the forward @@ -466,6 +467,8 @@ class Referenceable RefPosition* recentRefPosition; RefPosition* lastRefPosition; + RegisterType registerType; + // Get the position of the next reference which is at or greater than // the current location (relies upon recentRefPosition being updated // during traversal). @@ -477,12 +480,12 @@ class RegRecord : public Referenceable { public: RegRecord() + : Referenceable(IntRegisterType) { assignedInterval = nullptr; previousInterval = nullptr; regNum = REG_NA; isCalleeSave = false; - registerType = IntRegisterType; } void init(regNumber reg) @@ -499,19 +502,19 @@ class RegRecord : public Referenceable #endif if (emitter::isGeneralRegister(reg)) { - assert(registerType == IntRegisterType); + registerType = IntRegisterType; } else if (emitter::isFloatReg(reg)) { registerType = FloatRegisterType; } -#if defined(TARGET_XARCH) && defined(FEATURE_SIMD) +#ifdef FEATURE_MASKED_HW_INTRINSICS else { assert(emitter::isMaskReg(reg)); registerType = MaskRegisterType; } -#endif +#endif // FEATURE_MASKED_HW_INTRINSICS regNum = reg; isCalleeSave = ((RBM_CALLEE_SAVED & genRegMask(reg)) != 0); } @@ -622,7 +625,7 @@ class LinearScan : public LinearScanInterface // This is the main driver virtual PhaseStatus doLinearScan(); - static bool isSingleRegister(regMaskTP regMask) + static bool isSingleRegister(regMaskOnlyOne regMask) { return (genExactlyOneBit(regMask)); } @@ -723,7 +726,7 @@ class LinearScan : public LinearScanInterface BasicBlock* toBlock, ResolveType resolveType, VARSET_VALARG_TP liveSet, - regMaskTP terminatorConsumedRegs); + regMaskGpr terminatorConsumedRegs); void resolveEdges(); @@ -777,34 +780,34 @@ class LinearScan : public LinearScanInterface #if defined(TARGET_AMD64) #ifdef UNIX_AMD64_ABI // On System V the RDI and RSI are not callee saved. Use R12 ans R13 as callee saved registers. - static const regMaskTP LsraLimitSmallIntSet = + static const regMaskGpr LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13); #else // !UNIX_AMD64_ABI // On Windows Amd64 use the RDI and RSI as callee saved registers. - static const regMaskTP LsraLimitSmallIntSet = + static const regMaskGpr LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI); #endif // !UNIX_AMD64_ABI - static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); - static const regMaskTP LsraLimitUpperSimdSet = + static const regMaskFloat LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); + static const regMaskFloat LsraLimitUpperSimdSet = (RBM_XMM16 | RBM_XMM17 | RBM_XMM18 | RBM_XMM19 | RBM_XMM20 | RBM_XMM21 | RBM_XMM22 | RBM_XMM23 | RBM_XMM24 | RBM_XMM25 | RBM_XMM26 | RBM_XMM27 | RBM_XMM28 | RBM_XMM29 | RBM_XMM30 | RBM_XMM31); #elif defined(TARGET_ARM) // On ARM, we may need two registers to set up the target register for a virtual call, so we need // to have at least the maximum number of arg registers, plus 2. - static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5); - static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17); + static const regMaskGpr LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5); + static const regMaskFloat LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17); #elif defined(TARGET_ARM64) - static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); - static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); + static const regMaskGpr LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); + static const regMaskFloat LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); #elif defined(TARGET_X86) - static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); - static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); + static const regMaskGpr LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); + static const regMaskFloat LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); #elif defined(TARGET_LOONGARCH64) - static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); - static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); + static const regMaskGpr LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); + static const regMaskFloat LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); #elif defined(TARGET_RISCV64) - static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); - static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); + static const regMaskGpr LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); + static const regMaskFloat LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); #else #error Unsupported or unset target architecture #endif // target @@ -814,11 +817,12 @@ class LinearScan : public LinearScanInterface return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK); } - regMaskTP getConstrainedRegMask(RefPosition* refPosition, - regMaskTP regMaskActual, - regMaskTP regMaskConstrain, - unsigned minRegCount); - regMaskTP stressLimitRegs(RefPosition* refPosition, regMaskTP mask); + regMaskOnlyOne getConstrainedRegMask(RefPosition* refPosition, + RegisterType regType, + regMaskOnlyOne regMaskActual, + regMaskOnlyOne regMaskConstrain, + unsigned minRegCount); + regMaskOnlyOne stressLimitRegs(RefPosition* refPosition, regMaskOnlyOne mask, RegisterType regType); // This controls the heuristics used to select registers // These can be combined. @@ -904,7 +908,7 @@ class LinearScan : public LinearScanInterface { return (LsraBlockBoundaryLocations)(lsraStressMask & LSRA_BLOCK_BOUNDARY_MASK); } - regNumber rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs); + regNumber rotateBlockStartLocation(Interval* interval, regNumber targetReg, CONSTREF_AllRegsMask availableRegs); // This controls whether we always insert a GT_RELOAD instruction after a spill // Note that this can be combined with LSRA_SPILL_ALWAYS (or not) @@ -986,7 +990,7 @@ class LinearScan : public LinearScanInterface static bool IsResolutionMove(GenTree* node); static bool IsResolutionNode(LIR::Range& containingRange, GenTree* node); - void verifyFreeRegisters(regMaskTP regsToFree); + void verifyFreeRegisters(CONSTREF_AllRegsMask regsToFree); void verifyFinalAllocation(); void verifyResolutionMove(GenTree* resolutionNode, LsraLocation currentLocation); #else // !DEBUG @@ -1062,6 +1066,11 @@ class LinearScan : public LinearScanInterface void processBlockStartLocations(BasicBlock* current); void processBlockEndLocations(BasicBlock* current); void resetAllRegistersState(); +#ifdef HAS_MORE_THAN_64_REGISTERS + FORCEINLINE void updateDeadCandidatesAtBlockStart(REF_AllRegsMask deadRegMask, VarToRegMap inVarToRegMap); +#else + FORCEINLINE void updateDeadCandidatesAtBlockStart(RegBitSet64 deadRegMask, VarToRegMap inVarToRegMap); +#endif #ifdef TARGET_ARM bool isSecondHalfReg(RegRecord* regRec, Interval* interval); @@ -1084,14 +1093,15 @@ class LinearScan : public LinearScanInterface void insertZeroInitRefPositions(); // add physreg refpositions for a tree node, based on calling convention and instruction selection predictions - void addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse); + void addRefsForPhysRegMask(CONSTREF_AllRegsMask mask, LsraLocation currentLoc, RefType refType, bool isLastUse); void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition); void buildRefPositionsForNode(GenTree* tree, LsraLocation loc); #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - void buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, regMaskTP fpCalleeKillSet); + void buildUpperVectorSaveRefPositions(GenTree* tree, + LsraLocation currentLoc DEBUG_ARG(regMaskFloat fpCalleeKillSet)); void buildUpperVectorRestoreRefPosition( Interval* lclVarInterval, LsraLocation currentLoc, GenTree* node, bool isUse, unsigned multiRegIdx); #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE @@ -1121,37 +1131,47 @@ class LinearScan : public LinearScanInterface } // Helpers for getKillSetForNode(). - regMaskTP getKillSetForStoreInd(GenTreeStoreInd* tree); - regMaskTP getKillSetForShiftRotate(GenTreeOp* tree); - regMaskTP getKillSetForMul(GenTreeOp* tree); - regMaskTP getKillSetForCall(GenTreeCall* call); - regMaskTP getKillSetForModDiv(GenTreeOp* tree); - regMaskTP getKillSetForBlockStore(GenTreeBlk* blkNode); - regMaskTP getKillSetForReturn(); - regMaskTP getKillSetForProfilerHook(); + CONSTREF_AllRegsMask getKillSetForStoreInd(GenTreeStoreInd* tree); + regMaskGpr getKillSetForShiftRotate(GenTreeOp* tree); + regMaskGpr getKillSetForMul(GenTreeOp* tree); + AllRegsMask getKillSetForCall(GenTreeCall* call); + regMaskGpr getKillSetForModDiv(GenTreeOp* tree); + AllRegsMask getKillSetForBlockStore(GenTreeBlk* blkNode); + CONSTREF_AllRegsMask getKillSetForReturn(); + CONSTREF_AllRegsMask getKillSetForProfilerHook(); #ifdef FEATURE_HW_INTRINSICS - regMaskTP getKillSetForHWIntrinsic(GenTreeHWIntrinsic* node); + regMaskGpr getKillSetForHWIntrinsic(GenTreeHWIntrinsic* node); #endif // FEATURE_HW_INTRINSICS // Return the registers killed by the given tree node. // This is used only for an assert, and for stress, so it is only defined under DEBUG. // Otherwise, the Build methods should obtain the killMask from the appropriate method above. #ifdef DEBUG - regMaskTP getKillSetForNode(GenTree* tree); + AllRegsMask getKillSetForNode(GenTree* tree); #endif // Given some tree node add refpositions for all the registers this node kills - bool buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc, regMaskTP killMask); + bool buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc, CONSTREF_AllRegsMask killMask); - regMaskTP allRegs(RegisterType rt); - regMaskTP allByteRegs(); - regMaskTP allSIMDRegs(); - regMaskTP lowSIMDRegs(); - regMaskTP internalFloatRegCandidates(); + regMaskOnlyOne allRegs(RegisterType rt); + regMaskGpr allByteRegs(); + regMaskFloat allSIMDRegs(); + regMaskFloat lowSIMDRegs(); + regMaskFloat internalFloatRegCandidates(); void makeRegisterInactive(RegRecord* physRegRecord); +#ifdef HAS_MORE_THAN_64_REGISTERS + FORCEINLINE void inActivateRegisters(REF_AllRegsMask inactiveMask); +#else + FORCEINLINE void inActivateRegisters(RegBitSet64 inactiveMask); +#endif void freeRegister(RegRecord* physRegRecord); - void freeRegisters(regMaskTP regsToFree); + void freeRegisters(REF_AllRegsMask regsToFree); +#ifdef HAS_MORE_THAN_64_REGISTERS + FORCEINLINE void freeRegisterMask(REF_AllRegsMask freeMask); +#else + FORCEINLINE void freeRegisterMask(RegBitSet64 freeMask); +#endif // Get the type that this tree defines. var_types getDefType(GenTree* tree) @@ -1169,11 +1189,11 @@ class LinearScan : public LinearScanInterface } // Managing internal registers during the BuildNode process. - RefPosition* defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP candidates); - RefPosition* buildInternalIntRegisterDefForNode(GenTree* tree, regMaskTP internalCands = RBM_NONE); - RefPosition* buildInternalFloatRegisterDefForNode(GenTree* tree, regMaskTP internalCands = RBM_NONE); -#if defined(FEATURE_SIMD) - RefPosition* buildInternalMaskRegisterDefForNode(GenTree* tree, regMaskTP internalCands = RBM_NONE); + RefPosition* defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskOnlyOne candidates); + RefPosition* buildInternalIntRegisterDefForNode(GenTree* tree, regMaskGpr internalCands = RBM_NONE); + RefPosition* buildInternalFloatRegisterDefForNode(GenTree* tree, regMaskFloat internalCands = RBM_NONE); +#ifdef FEATURE_MASKED_HW_INTRINSICS + RefPosition* buildInternalMaskRegisterDefForNode(GenTree* tree, regMaskPredicate internalCands = RBM_NONE); #endif void buildInternalRegisterUses(); @@ -1206,15 +1226,15 @@ class LinearScan : public LinearScanInterface RefPosition* newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType); - RefPosition* newRefPosition(Interval* theInterval, - LsraLocation theLocation, - RefType theRefType, - GenTree* theTreeNode, - regMaskTP mask, - unsigned multiRegIdx = 0); + RefPosition* newRefPosition(Interval* theInterval, + LsraLocation theLocation, + RefType theRefType, + GenTree* theTreeNode, + regMaskOnlyOne mask, + unsigned multiRegIdx = 0); RefPosition* newRefPosition( - regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask); + regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskOnlyOne mask); void applyCalleeSaveHeuristics(RefPosition* rp); @@ -1269,24 +1289,26 @@ class LinearScan : public LinearScanInterface ****************************************************************************/ #if defined(TARGET_ARM64) - bool canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); - void assignConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); - regMaskTP getConsecutiveCandidates(regMaskTP candidates, RefPosition* refPosition, regMaskTP* busyCandidates); - regMaskTP filterConsecutiveCandidates(regMaskTP candidates, - unsigned int registersNeeded, - regMaskTP* allConsecutiveCandidates); - regMaskTP filterConsecutiveCandidatesForSpill(regMaskTP consecutiveCandidates, unsigned int registersNeeded); + bool canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); + void assignConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); + regMaskFloat getConsecutiveCandidates(regMaskFloat candidates, + RefPosition* refPosition, + regMaskFloat* busyCandidates); + regMaskFloat filterConsecutiveCandidates(regMaskFloat candidates, + unsigned int registersNeeded, + regMaskFloat* allConsecutiveCandidates); + regMaskFloat filterConsecutiveCandidatesForSpill(regMaskFloat consecutiveCandidates, unsigned int registersNeeded); #endif // TARGET_ARM64 - regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) + regMaskOnlyOne getFreeCandidates(regMaskOnlyOne candidates, var_types regType) { - regMaskTP result = candidates & m_AvailableRegs; + regMaskOnlyOne result = candidates & m_AvailableRegs.GetRegMaskForType(regType); #ifdef TARGET_ARM // For TYP_DOUBLE on ARM, we can only use register for which the odd half is // also available. if (regType == TYP_DOUBLE) { - result &= (m_AvailableRegs >> 1); + result &= (m_AvailableRegs.floatRegs(compiler) >> 1); } #endif // TARGET_ARM return result; @@ -1308,11 +1330,11 @@ class LinearScan : public LinearScanInterface // Perform register selection and update currentInterval or refPosition template - FORCEINLINE regMaskTP select(Interval* currentInterval, - RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); + FORCEINLINE singleRegMask select(Interval* currentInterval, + RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); - FORCEINLINE regMaskTP selectMinimal(Interval* currentInterval, - RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); + FORCEINLINE singleRegMask selectMinimal(Interval* currentInterval, + RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); // If the register is from unassigned set such that it was not already // assigned to the current interval @@ -1353,39 +1375,39 @@ class LinearScan : public LinearScanInterface RegisterType regType = RegisterType::TYP_UNKNOWN; - regMaskTP candidates; - regMaskTP preferences = RBM_NONE; - Interval* relatedInterval = nullptr; + regMaskOnlyOne candidates; + regMaskOnlyOne preferences = RBM_NONE; + Interval* relatedInterval = nullptr; - regMaskTP relatedPreferences = RBM_NONE; - LsraLocation rangeEndLocation; - LsraLocation relatedLastLocation; - bool preferCalleeSave = false; - RefPosition* rangeEndRefPosition; - RefPosition* lastRefPosition; - regMaskTP callerCalleePrefs = RBM_NONE; - LsraLocation lastLocation; + regMaskOnlyOne relatedPreferences = RBM_NONE; + LsraLocation rangeEndLocation; + LsraLocation relatedLastLocation; + bool preferCalleeSave = false; + RefPosition* rangeEndRefPosition; + RefPosition* lastRefPosition; + regMaskOnlyOne callerCalleePrefs = RBM_NONE; + LsraLocation lastLocation; - regMaskTP foundRegBit; + singleRegMask foundRegBit; - regMaskTP prevRegBit = RBM_NONE; + singleRegMask prevRegBit = RBM_NONE; // These are used in the post-selection updates, and must be set for any selection. - regMaskTP freeCandidates; - regMaskTP matchingConstants; - regMaskTP unassignedSet; + regMaskOnlyOne freeCandidates; + regMaskOnlyOne matchingConstants; + regMaskOnlyOne unassignedSet; // Compute the sets for COVERS, OWN_PREFERENCE, COVERS_RELATED, COVERS_FULL and UNASSIGNED together, // as they all require similar computation. - regMaskTP coversSet; - regMaskTP preferenceSet; - regMaskTP coversRelatedSet; - regMaskTP coversFullSet; - bool coversSetsCalculated = false; - bool found = false; - bool skipAllocation = false; - bool coversFullApplied = false; - bool constAvailableApplied = false; + regMaskOnlyOne coversSet; + regMaskOnlyOne preferenceSet; + regMaskOnlyOne coversRelatedSet; + regMaskOnlyOne coversFullSet; + bool coversSetsCalculated = false; + bool found = false; + bool skipAllocation = false; + bool coversFullApplied = false; + bool constAvailableApplied = false; // If the selected register is already assigned to the current internal FORCEINLINE bool isAlreadyAssigned() @@ -1394,8 +1416,8 @@ class LinearScan : public LinearScanInterface return (prevRegBit & preferences) == foundRegBit; } - bool applySelection(int selectionScore, regMaskTP selectionCandidates); - bool applySingleRegSelection(int selectionScore, regMaskTP selectionCandidate); + bool applySelection(int selectionScore, regMaskOnlyOne selectionCandidates); + bool applySingleRegSelection(int selectionScore, regMaskOnlyOne selectionCandidate); FORCEINLINE void calculateCoversSets(); FORCEINLINE void calculateUnassignedSets(); FORCEINLINE void reset(Interval* interval, RefPosition* refPosition); @@ -1456,7 +1478,7 @@ class LinearScan : public LinearScanInterface BasicBlock* toBlock, var_types type, VARSET_VALARG_TP sharedCriticalLiveSet, - regMaskTP terminatorConsumedRegs); + regMaskOnlyOne terminatorConsumedRegs); #ifdef TARGET_ARM64 typedef JitHashTable, RefPosition*> NextConsecutiveRefPositionsMap; @@ -1529,12 +1551,12 @@ class LinearScan : public LinearScanInterface static const int MAX_ROWS_BETWEEN_TITLES = 50; int rowCountSinceLastTitle; // Current mask of registers being printed in the dump. - regMaskTP lastDumpedRegisters; - regMaskTP registersToDump; - int lastUsedRegNumIndex; - bool shouldDumpReg(regNumber regNum) + AllRegsMask lastDumpedRegisters; + AllRegsMask registersToDump; + int lastUsedRegNumIndex; + bool shouldDumpReg(regNumber regNum) { - return (registersToDump & genRegMask(regNum)) != 0; + return registersToDump.IsRegNumInMask(regNum); } void dumpRegRecordHeader(); @@ -1739,17 +1761,17 @@ class LinearScan : public LinearScanInterface VarToRegMap* outVarToRegMaps; // A temporary VarToRegMap used during the resolution of critical edges. - VarToRegMap sharedCriticalVarToRegMap; - PhasedVar actualRegistersMask; - PhasedVar availableIntRegs; - PhasedVar availableFloatRegs; - PhasedVar availableDoubleRegs; -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) - PhasedVar availableMaskRegs; + VarToRegMap sharedCriticalVarToRegMap; + PhasedVar actualRegistersMask; + PhasedVar availableIntRegs; + PhasedVar availableFloatRegs; + PhasedVar availableDoubleRegs; +#ifdef FEATURE_MASKED_HW_INTRINSICS + PhasedVar availableMaskRegs; #endif - PhasedVar* availableRegs[TYP_COUNT]; + PhasedVar* availableRegs[TYP_COUNT]; -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#ifdef FEATURE_MASKED_HW_INTRINSICS #define allAvailableRegs (availableIntRegs | availableFloatRegs | availableMaskRegs) #else #define allAvailableRegs (availableIntRegs | availableFloatRegs) @@ -1759,7 +1781,7 @@ class LinearScan : public LinearScanInterface // PUTARG_REG node. Tracked between the PUTARG_REG and its corresponding // CALL node and is used to avoid preferring these registers for locals // which would otherwise force a spill. - regMaskTP placedArgRegs; + AllRegsMask placedArgRegs; struct PlacedLocal { @@ -1808,60 +1830,41 @@ class LinearScan : public LinearScanInterface // Register status //----------------------------------------------------------------------- - regMaskTP m_AvailableRegs; - regNumber getRegForType(regNumber reg, var_types regType) - { -#ifdef TARGET_ARM - if ((regType == TYP_DOUBLE) && !genIsValidDoubleReg(reg)) - { - reg = REG_PREV(reg); - } -#endif // TARGET_ARM - return reg; - } - - regMaskTP getRegMask(regNumber reg, var_types regType) - { - reg = getRegForType(reg, regType); - regMaskTP regMask = genRegMask(reg); -#ifdef TARGET_ARM - if (regType == TYP_DOUBLE) - { - assert(genIsValidDoubleReg(reg)); - regMask |= (regMask << 1); - } -#endif // TARGET_ARM - return regMask; - } + AllRegsMask m_AvailableRegs; void resetAvailableRegs() { - m_AvailableRegs = allAvailableRegs; - m_RegistersWithConstants = RBM_NONE; +#ifdef HAS_MORE_THAN_64_REGISTERS + m_AvailableRegs = AllRegsMask(availableIntRegs, availableFloatRegs, availableMaskRegs); +#else + m_AvailableRegs = AllRegsMask(allAvailableRegs); +#endif // HAS_MORE_THAN_64_REGISTERS + m_RegistersWithConstants.Clear(); } - bool isRegAvailable(regNumber reg, var_types regType) + bool isRegAvailable(regNumber reg, var_types regType) // only used in asserts { - regMaskTP regMask = getRegMask(reg, regType); - return (m_AvailableRegs & regMask) == regMask; + return m_AvailableRegs.IsRegNumPresent(reg, regType); } - void setRegsInUse(regMaskTP regMask) + + void setRegsInUse(CONSTREF_AllRegsMask regMask) { m_AvailableRegs &= ~regMask; } + void setRegInUse(regNumber reg, var_types regType) { - regMaskTP regMask = getRegMask(reg, regType); - setRegsInUse(regMask); + m_AvailableRegs.RemoveRegNum(reg, regType); } - void makeRegsAvailable(regMaskTP regMask) + + void makeRegsAvailable(CONSTREF_AllRegsMask regMask) { m_AvailableRegs |= regMask; } + void makeRegAvailable(regNumber reg, var_types regType) { - regMaskTP regMask = getRegMask(reg, regType); - makeRegsAvailable(regMask); + m_AvailableRegs.AddRegNum(reg, regType); } void clearAllNextIntervalRef(); @@ -1872,30 +1875,31 @@ class LinearScan : public LinearScanInterface void clearSpillCost(regNumber reg, var_types regType); void updateSpillCost(regNumber reg, Interval* interval); - FORCEINLINE void updateRegsFreeBusyState(RefPosition& refPosition, - regMaskTP regsBusy, - regMaskTP* regsToFree, - regMaskTP* delayRegsToFree DEBUG_ARG(Interval* interval) + FORCEINLINE void updateRegsFreeBusyState(RefPosition& refPosition, + regMaskOnlyOne regsBusy, + AllRegsMask* regsToFree, + AllRegsMask* delayRegsToFree, + RegisterType regType DEBUG_ARG(Interval* interval) DEBUG_ARG(regNumber assignedReg)); - regMaskTP m_RegistersWithConstants; - void clearConstantReg(regNumber reg, var_types regType) + AllRegsMask m_RegistersWithConstants; + AllRegsMask fixedRegs; + + void clearConstantReg(regNumber reg, var_types regType) { - m_RegistersWithConstants &= ~getRegMask(reg, regType); + m_RegistersWithConstants.RemoveRegNum(reg, regType); } void setConstantReg(regNumber reg, var_types regType) { - m_RegistersWithConstants |= getRegMask(reg, regType); + m_RegistersWithConstants.AddRegNum(reg, regType); } bool isRegConstant(regNumber reg, var_types regType) { - reg = getRegForType(reg, regType); - regMaskTP regMask = getRegMask(reg, regType); - return (m_RegistersWithConstants & regMask) == regMask; + reg = getRegForType(reg, regType); + return m_RegistersWithConstants.IsRegNumPresent(reg, regType); } - regMaskTP getMatchingConstants(regMaskTP mask, Interval* currentInterval, RefPosition* refPosition); + regMaskOnlyOne getMatchingConstants(regMaskOnlyOne mask, Interval* currentInterval, RefPosition* refPosition); - regMaskTP fixedRegs; LsraLocation nextFixedRef[REG_COUNT]; void updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition); LsraLocation getNextFixedRef(regNumber regNum, var_types regType) @@ -1924,36 +1928,35 @@ class LinearScan : public LinearScanInterface } weight_t spillCost[REG_COUNT]; - regMaskTP regsBusyUntilKill; - regMaskTP regsInUseThisLocation; - regMaskTP regsInUseNextLocation; + AllRegsMask regsBusyUntilKill; + AllRegsMask regsInUseThisLocation; + AllRegsMask regsInUseNextLocation; #ifdef TARGET_ARM64 - regMaskTP consecutiveRegsInUseThisLocation; + regMaskFloat consecutiveRegsInUseThisLocation; #endif bool isRegBusy(regNumber reg, var_types regType) { - regMaskTP regMask = getRegMask(reg, regType); - return (regsBusyUntilKill & regMask) != RBM_NONE; + return regsBusyUntilKill.IsRegNumPresent(reg, regType); } void setRegBusyUntilKill(regNumber reg, var_types regType) { - regsBusyUntilKill |= getRegMask(reg, regType); + regsBusyUntilKill.AddRegNum(reg, regType); } void clearRegBusyUntilKill(regNumber reg) { - regsBusyUntilKill &= ~genRegMask(reg); + regsBusyUntilKill.RemoveRegNumFromMask(reg); } bool isRegInUse(regNumber reg, var_types regType) { - regMaskTP regMask = getRegMask(reg, regType); - return (regsInUseThisLocation & regMask) != RBM_NONE; + regMaskOnlyOne regMask = getRegMask(reg, regType); + return regsInUseThisLocation.IsRegNumInMask(reg ARM_ARG(regType)); } void resetRegState() { resetAvailableRegs(); - regsBusyUntilKill = RBM_NONE; + regsBusyUntilKill.Clear(); } bool conflictingFixedRegReference(regNumber regNum, RefPosition* refPosition); @@ -2013,13 +2016,13 @@ class LinearScan : public LinearScanInterface bool isCandidateMultiRegLclVar(GenTreeLclVar* lclNode); bool checkContainedOrCandidateLclVar(GenTreeLclVar* lclNode); - RefPosition* BuildUse(GenTree* operand, regMaskTP candidates = RBM_NONE, int multiRegIdx = 0); + RefPosition* BuildUse(GenTree* operand, regMaskOnlyOne candidates = RBM_NONE, int multiRegIdx = 0); void setDelayFree(RefPosition* use); - int BuildBinaryUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE); - int BuildCastUses(GenTreeCast* cast, regMaskTP candidates); + int BuildBinaryUses(GenTreeOp* node, regMaskOnlyOne candidates = RBM_NONE); + int BuildCastUses(GenTreeCast* cast, regMaskOnlyOne candidates); #ifdef TARGET_XARCH - int BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, regMaskTP candidates = RBM_NONE); - inline regMaskTP BuildEvexIncompatibleMask(GenTree* tree); + int BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, regMaskOnlyOne candidates = RBM_NONE); + inline regMaskFloat BuildEvexIncompatibleMask(GenTree* tree); #endif // !TARGET_XARCH int BuildSelect(GenTreeOp* select); // This is the main entry point for building the RefPositions for a node. @@ -2031,18 +2034,25 @@ class LinearScan : public LinearScanInterface bool supportsSpecialPutArg(); int BuildSimple(GenTree* tree); - int BuildOperandUses(GenTree* node, regMaskTP candidates = RBM_NONE); + int BuildOperandUses(GenTree* node, regMaskOnlyOne candidates = RBM_NONE); void AddDelayFreeUses(RefPosition* refPosition, GenTree* rmwNode); - int BuildDelayFreeUses(GenTree* node, - GenTree* rmwNode = nullptr, - regMaskTP candidates = RBM_NONE, - RefPosition** useRefPosition = nullptr); - int BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates = RBM_NONE); - int BuildAddrUses(GenTree* addr, regMaskTP candidates = RBM_NONE); + int BuildDelayFreeUses(GenTree* node, + GenTree* rmwNode = nullptr, + regMaskOnlyOne candidates = RBM_NONE, + RefPosition** useRefPosition = nullptr); + int BuildIndirUses(GenTreeIndir* indirTree, regMaskOnlyOne candidates = RBM_NONE); + int BuildAddrUses(GenTree* addr, regMaskOnlyOne candidates = RBM_NONE); void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs); - RefPosition* BuildDef(GenTree* tree, regMaskTP dstCandidates = RBM_NONE, int multiRegIdx = 0); - void BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates = RBM_NONE); - void BuildDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask); + RefPosition* BuildDef(GenTree* tree, regMaskOnlyOne dstCandidates = RBM_NONE, int multiRegIdx = 0); + void BuildDefs(GenTree* tree, int dstCount, regMaskOnlyOne dstCandidates = RBM_NONE); + void BuildCallDefs(GenTree* tree, int dstCount, REF_AllRegsMask dstCandidates); + void BuildKills(GenTree* tree, CONSTREF_AllRegsMask killMask); +#ifdef TARGET_ARMARCH + void BuildDefWithKills(GenTree* tree, regMaskOnlyOne dstCandidates, REF_AllRegsMask killMask); +#else + void BuildDefWithKills(GenTree* tree, int dstCount, regMaskOnlyOne dstCandidates, REF_AllRegsMask killMask); +#endif + void BuildCallDefsWithKills(GenTree* tree, int dstCount, REF_AllRegsMask dstCandidates, REF_AllRegsMask killMask); int BuildReturn(GenTree* tree); #ifdef TARGET_XARCH @@ -2114,29 +2124,29 @@ class LinearScan : public LinearScanInterface #endif // FEATURE_ARG_SPLIT int BuildLclHeap(GenTree* tree); -#if defined(TARGET_AMD64) - regMaskTP rbmAllFloat; - regMaskTP rbmFltCalleeTrash; + // #if defined(TARGET_AMD64) + regMaskFloat rbmAllFloat; + regMaskFloat rbmFltCalleeTrash; - FORCEINLINE regMaskTP get_RBM_ALLFLOAT() const + FORCEINLINE regMaskFloat get_RBM_ALLFLOAT() const { return this->rbmAllFloat; } - FORCEINLINE regMaskTP get_RBM_FLT_CALLEE_TRASH() const + FORCEINLINE regMaskFloat get_RBM_FLT_CALLEE_TRASH() const { return this->rbmFltCalleeTrash; } -#endif // TARGET_AMD64 + // #endif // TARGET_AMD64 #if defined(TARGET_XARCH) - regMaskTP rbmAllMask; - regMaskTP rbmMskCalleeTrash; + regMaskPredicate rbmAllMask; + regMaskPredicate rbmMskCalleeTrash; - FORCEINLINE regMaskTP get_RBM_ALLMASK() const + FORCEINLINE regMaskPredicate get_RBM_ALLMASK() const { return this->rbmAllMask; } - FORCEINLINE regMaskTP get_RBM_MSK_CALLEE_TRASH() const + FORCEINLINE regMaskPredicate get_RBM_MSK_CALLEE_TRASH() const { return this->rbmMskCalleeTrash; } @@ -2155,9 +2165,9 @@ class LinearScan : public LinearScanInterface // NOTE: we currently don't need a LinearScan `this` pointer for this definition, and some callers // don't have one available, so make is static. // - static FORCEINLINE regMaskTP calleeSaveRegs(RegisterType rt) + static FORCEINLINE regMaskOnlyOne calleeSaveRegs(RegisterType rt) { - static const regMaskTP varTypeCalleeSaveRegs[] = { + static const regMaskOnlyOne varTypeCalleeSaveRegs[] = { #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) csr, #include "typelist.h" #undef DEF_TP @@ -2171,16 +2181,16 @@ class LinearScan : public LinearScanInterface // Not all of the callee trash values are constant, so don't declare this as a method local static // doing so results in significantly more complex codegen and we'd rather just initialize this once // as part of initializing LSRA instead - regMaskTP varTypeCalleeTrashRegs[TYP_COUNT]; + regMaskOnlyOne varTypeCalleeTrashRegs[TYP_COUNT]; #endif // TARGET_XARCH //------------------------------------------------------------------------ // callerSaveRegs: Get the set of caller-save registers of the given RegisterType // - FORCEINLINE regMaskTP callerSaveRegs(RegisterType rt) const + FORCEINLINE regMaskOnlyOne callerSaveRegs(RegisterType rt) const { #if !defined(TARGET_XARCH) - static const regMaskTP varTypeCalleeTrashRegs[] = { + static const regMaskOnlyOne varTypeCalleeTrashRegs[] = { #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) ctr, #include "typelist.h" #undef DEF_TP @@ -2207,14 +2217,14 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX class Interval : public Referenceable { public: - Interval(RegisterType registerType, regMaskTP registerPreferences) - : registerPreferences(registerPreferences) + Interval(RegisterType registerType, regMaskOnlyOne registerPreferences) + : Referenceable(registerType) + , registerPreferences(registerPreferences) , registerAversion(RBM_NONE) , relatedInterval(nullptr) , assignedReg(nullptr) , varNum(0) , physReg(REG_COUNT) - , registerType(registerType) , isActive(false) , isLocalVar(false) , isSplit(false) @@ -2236,6 +2246,7 @@ class Interval : public Referenceable #ifdef DEBUG , intervalIndex(0) #endif + { } @@ -2251,10 +2262,10 @@ class Interval : public Referenceable void setLocalNumber(Compiler* compiler, unsigned lclNum, LinearScan* l); // Fixed registers for which this Interval has a preference - regMaskTP registerPreferences; + regMaskOnlyOne registerPreferences; // Registers that should be avoided for this interval - regMaskTP registerAversion; + regMaskOnlyOne registerAversion; // The relatedInterval is: // - for any other interval, it is the interval to which this interval @@ -2271,8 +2282,6 @@ class Interval : public Referenceable // The register to which it is currently assigned. regNumber physReg; - RegisterType registerType; - // Is this Interval currently in a register and live? bool isActive; @@ -2403,12 +2412,12 @@ class Interval : public Referenceable // definitions. This method will return the current assigned register if any, or // the 'registerPreferences' otherwise. // - regMaskTP getCurrentPreferences() + regMaskOnlyOne getCurrentPreferences() { return (assignedReg == nullptr) ? registerPreferences : genRegMask(assignedReg->regNum); } - void mergeRegisterPreferences(regMaskTP preferences) + void mergeRegisterPreferences(regMaskOnlyOne preferences) { // We require registerPreferences to have been initialized. assert(registerPreferences != RBM_NONE); @@ -2423,7 +2432,7 @@ class Interval : public Referenceable return; } - regMaskTP commonPreferences = (registerPreferences & preferences); + regMaskOnlyOne commonPreferences = (registerPreferences & preferences); if (commonPreferences != RBM_NONE) { registerPreferences = commonPreferences; @@ -2458,11 +2467,11 @@ class Interval : public Referenceable // Keep only the callee-save preferences, if not empty. // Otherwise, take the union of the preferences. - regMaskTP newPreferences = registerPreferences | preferences; + regMaskOnlyOne newPreferences = registerPreferences | preferences; if (preferCalleeSave) { - regMaskTP calleeSaveMask = (LinearScan::calleeSaveRegs(this->registerType) & newPreferences); + regMaskOnlyOne calleeSaveMask = (LinearScan::calleeSaveRegs(this->registerType) & newPreferences); if (calleeSaveMask != RBM_NONE) { newPreferences = calleeSaveMask; @@ -2477,7 +2486,7 @@ class Interval : public Referenceable // An exception is made in the case where one of the existing or new // preferences are all callee-save, in which case we "prefer" the callee-save - void updateRegisterPreferences(regMaskTP preferences) + void updateRegisterPreferences(regMaskOnlyOne preferences) { // If this interval is preferenced, that interval may have already been assigned a // register, and we want to include that in the preferences. @@ -2515,7 +2524,9 @@ class RefPosition // Prior to the allocation pass, registerAssignment captures the valid registers // for this RefPosition. // After the allocation pass, this contains the actual assignment - regMaskTP registerAssignment; + // TODO-future: This should really be a union, where before allocation-pass it has `mask` and + // after allocation-pass, it has regNumber directly, to avoid calling assignedReg(); + regMaskOnlyOne registerAssignment; RefType refType; @@ -2686,7 +2697,12 @@ class RefPosition return REG_NA; } - return genRegNumFromMask(registerAssignment); + return genRegNumFromMask(registerAssignment MORE_THAN_64_REG_ARG(getRegisterType())); + } + + RegisterType getRegisterType() + { + return referent->registerType; } // Returns true if it is a reference on a GenTree node. @@ -2777,7 +2793,7 @@ class RefPosition // isFixedRefOfRegMask indicates that the RefPosition has a fixed assignment to the register // specified by the given mask - bool isFixedRefOfRegMask(regMaskTP regMask) + bool isFixedRefOfRegMask(regMaskOnlyOne regMask) { assert(genMaxOneBit(regMask)); return (registerAssignment == regMask); diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index 2192265984d68e..7fbc07adcab4be 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -192,11 +192,9 @@ int LinearScan::BuildShiftLongCarry(GenTree* tree) int LinearScan::BuildNode(GenTree* tree) { assert(!tree->isContained()); - int srcCount; - int dstCount = 0; - regMaskTP dstCandidates = RBM_NONE; - regMaskTP killMask = RBM_NONE; - bool isLocalDefUse = false; + int srcCount; + int dstCount = 0; + bool isLocalDefUse = false; // Reset the build-related members of LinearScan. clearBuildState(); @@ -367,14 +365,15 @@ int LinearScan::BuildNode(GenTree* tree) break; case GT_RETURNTRAP: + { // this just turns into a compare of its child with an int // + a conditional call srcCount = 1; assert(dstCount == 0); BuildUse(tree->gtGetOp1()); - killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); - BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + BuildKills(tree, compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC)); break; + } case GT_MUL: if (tree->gtOverflow()) @@ -422,7 +421,7 @@ int LinearScan::BuildNode(GenTree* tree) // This kills GC refs in callee save regs srcCount = 0; assert(dstCount == 0); - BuildDefsWithKills(tree, 0, RBM_NONE, RBM_NONE); + BuildKills(tree, compiler->AllRegsMask_NONE); break; case GT_LONG: @@ -467,10 +466,11 @@ int LinearScan::BuildNode(GenTree* tree) break; case GT_RETURN: + { srcCount = BuildReturn(tree); - killMask = getKillSetForReturn(); - BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + BuildKills(tree, getKillSetForReturn()); break; + } case GT_RETFILT: assert(dstCount == 0); @@ -666,8 +666,8 @@ int LinearScan::BuildNode(GenTree* tree) case GT_BITCAST: { assert(dstCount == 1); - regNumber argReg = tree->GetRegNum(); - regMaskTP argMask = RBM_NONE; + regNumber argReg = tree->GetRegNum(); + regMaskOnlyOne argMask = RBM_NONE; if (argReg != REG_COUNT) { argMask = genRegMask(argReg); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 1096d7f11701c5..00f6429d6ecd74 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -176,9 +176,9 @@ bool LinearScan::canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition // From `candidates`, the mask of series of consecutive registers of `registersNeeded` size with just the first-bit // set. // -regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, - unsigned int registersNeeded, - regMaskTP* allConsecutiveCandidates) +regMaskFloat LinearScan::filterConsecutiveCandidates(regMaskFloat candidates, + unsigned int registersNeeded, + regMaskFloat* allConsecutiveCandidates) { if (BitOperations::PopCount(candidates) < registersNeeded) { @@ -187,16 +187,16 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, return RBM_NONE; } - regMaskTP currAvailableRegs = candidates; - regMaskTP overallResult = RBM_NONE; - regMaskTP consecutiveResult = RBM_NONE; + regMaskFloat currAvailableRegs = candidates; + regMaskFloat overallResult = RBM_NONE; + regMaskFloat consecutiveResult = RBM_NONE; // At this point, for 'n' registers requirement, if Rm, Rm+1, Rm+2, ..., Rm+k-1 are // available, create the mask only for Rm, Rm+1, ..., Rm+(k-n) to convey that it // is safe to assign any of those registers, but not beyond that. #define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ - regMaskTP selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ - regMaskTP selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ + regMaskFloat selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ + regMaskFloat selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ overallResult |= availableRegistersMask; @@ -206,10 +206,10 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, { // From LSB, find the first available register (bit `1`) regAvailableStartIndex = BitOperations::BitScanForward(static_cast(currAvailableRegs)); - regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; + regMaskFloat startMask = (1ULL << regAvailableStartIndex) - 1; // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. - regMaskTP maskProcessed = ~(currAvailableRegs | startMask); + regMaskFloat maskProcessed = ~(currAvailableRegs | startMask); // From regAvailableStart, find the first unavailable register (bit `0`). if (maskProcessed == RBM_NONE) @@ -225,7 +225,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, { regAvailableEndIndex = BitOperations::BitScanForward(static_cast(maskProcessed)); } - regMaskTP endMask = (1ULL << regAvailableEndIndex) - 1; + regMaskFloat endMask = (1ULL << regAvailableEndIndex) - 1; // Anything between regAvailableStart and regAvailableEnd is the range of consecutive registers available. // If they are equal to or greater than our register requirements, then add all of them to the result. @@ -236,7 +236,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, currAvailableRegs &= ~endMask; } while (currAvailableRegs != RBM_NONE); - regMaskTP v0_v31_mask = RBM_V0 | RBM_V31; + regMaskFloat v0_v31_mask = RBM_V0 | RBM_V31; if ((candidates & v0_v31_mask) == v0_v31_mask) { // Finally, check for round robin case where sequence of last register @@ -260,14 +260,14 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, } case 3: { - regMaskTP v0_v30_v31_mask = RBM_V0 | RBM_V30 | RBM_V31; + regMaskFloat v0_v30_v31_mask = RBM_V0 | RBM_V30 | RBM_V31; if ((candidates & v0_v30_v31_mask) != RBM_NONE) { consecutiveResult |= RBM_V30; overallResult |= v0_v30_v31_mask; } - regMaskTP v0_v1_v31_mask = RBM_V0 | RBM_V1 | RBM_V31; + regMaskFloat v0_v1_v31_mask = RBM_V0 | RBM_V1 | RBM_V31; if ((candidates & v0_v1_v31_mask) != RBM_NONE) { consecutiveResult |= RBM_V31; @@ -277,21 +277,21 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, } case 4: { - regMaskTP v0_v29_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + regMaskFloat v0_v29_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; if ((candidates & v0_v29_v30_v31_mask) != RBM_NONE) { consecutiveResult |= RBM_V29; overallResult |= v0_v29_v30_v31_mask; } - regMaskTP v0_v1_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + regMaskFloat v0_v1_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; if ((candidates & v0_v1_v30_v31_mask) != RBM_NONE) { consecutiveResult |= RBM_V30; overallResult |= v0_v1_v30_v31_mask; } - regMaskTP v0_v1_v2_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + regMaskFloat v0_v1_v2_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; if ((candidates & v0_v1_v2_v31_mask) != RBM_NONE) { consecutiveResult |= RBM_V31; @@ -323,23 +323,24 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, // Returns: // Filtered candidates that needs fewer spilling. // -regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveCandidates, unsigned int registersNeeded) +regMaskFloat LinearScan::filterConsecutiveCandidatesForSpill(regMaskFloat consecutiveCandidates, + unsigned int registersNeeded) { assert(consecutiveCandidates != RBM_NONE); assert((registersNeeded >= 2) && (registersNeeded <= 4)); - regMaskTP consecutiveResultForBusy = RBM_NONE; - regMaskTP unprocessedRegs = consecutiveCandidates; - unsigned regAvailableStartIndex = 0, regAvailableEndIndex = 0; - int maxSpillRegs = registersNeeded; - regMaskTP registersNeededMask = (1ULL << registersNeeded) - 1; + regMaskFloat consecutiveResultForBusy = RBM_NONE; + regMaskFloat unprocessedRegs = consecutiveCandidates; + unsigned regAvailableStartIndex = 0, regAvailableEndIndex = 0; + int maxSpillRegs = registersNeeded; + regMaskFloat registersNeededMask = (1ULL << registersNeeded) - 1; do { // From LSB, find the first available register (bit `1`) regAvailableStartIndex = BitOperations::BitScanForward(static_cast(unprocessedRegs)); // For the current range, find how many registers are free vs. busy - regMaskTP maskForCurRange = RBM_NONE; - bool shouldCheckForRounding = false; + regMaskFloat maskForCurRange = RBM_NONE; + bool shouldCheckForRounding = false; switch (registersNeeded) { case 2: @@ -363,7 +364,7 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC } maskForCurRange |= (registersNeededMask << regAvailableStartIndex); - maskForCurRange &= m_AvailableRegs; + maskForCurRange &= m_AvailableRegs.floatRegs(compiler); if (maskForCurRange != RBM_NONE) { @@ -413,13 +414,13 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC // allCandidates = 0x1C080D0F00000000, the consecutive register mask returned // will be 0x400000300000000. // -regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, - RefPosition* refPosition, - regMaskTP* busyCandidates) +regMaskFloat LinearScan::getConsecutiveCandidates(regMaskFloat allCandidates, + RefPosition* refPosition, + regMaskFloat* busyCandidates) { assert(compiler->info.compNeedsConsecutiveRegisters); assert(refPosition->isFirstRefPositionOfConsecutiveRegisters()); - regMaskTP freeCandidates = allCandidates & m_AvailableRegs; + regMaskFloat freeCandidates = allCandidates & m_AvailableRegs.floatRegs(compiler); #ifdef DEBUG if (getStressLimitRegs() != LSRA_LIMIT_NONE) @@ -431,12 +432,12 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, #endif *busyCandidates = RBM_NONE; - regMaskTP overallResult; + regMaskFloat overallResult; unsigned int registersNeeded = refPosition->regCount; if (freeCandidates != RBM_NONE) { - regMaskTP consecutiveResultForFree = + regMaskFloat consecutiveResultForFree = filterConsecutiveCandidates(freeCandidates, registersNeeded, &overallResult); if (consecutiveResultForFree != RBM_NONE) @@ -446,10 +447,10 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, // register out of the `consecutiveResult` is available for the first RefPosition, then just use // that. This will avoid unnecessary copies. - regNumber firstRegNum = REG_NA; - regNumber prevRegNum = REG_NA; - int foundCount = 0; - regMaskTP foundRegMask = RBM_NONE; + regNumber firstRegNum = REG_NA; + regNumber prevRegNum = REG_NA; + int foundCount = 0; + regMaskFloat foundRegMask = RBM_NONE; RefPosition* consecutiveRefPosition = getNextConsecutiveRefPosition(refPosition); assert(consecutiveRefPosition != nullptr); @@ -488,7 +489,7 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, if (foundCount != 0) { assert(firstRegNum != REG_NA); - regMaskTP remainingRegsMask = ((1ULL << (registersNeeded - foundCount)) - 1) << (firstRegNum - 1); + regMaskFloat remainingRegsMask = ((1ULL << (registersNeeded - foundCount)) - 1) << (firstRegNum - 1); if ((overallResult & remainingRegsMask) != RBM_NONE) { @@ -528,33 +529,33 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, // try_FAR_NEXT_REF(), etc. here which would complicate things. Instead, we just go with option# 1 and select // registers based on fewer number of registers that has to be spilled. // - regMaskTP overallResultForBusy; - regMaskTP consecutiveResultForBusy = + regMaskFloat overallResultForBusy; + regMaskFloat consecutiveResultForBusy = filterConsecutiveCandidates(allCandidates, registersNeeded, &overallResultForBusy); *busyCandidates = consecutiveResultForBusy; // Check if we can further check better registers amoung consecutiveResultForBusy. - if ((m_AvailableRegs & overallResultForBusy) != RBM_NONE) + if ((m_AvailableRegs.floatRegs(compiler) & overallResultForBusy) != RBM_NONE) { // `overallResultForBusy` contains the mask of entire series that can be the consecutive candidates. // If there is an overlap of that with free registers, then try to find a series that will need least // registers spilling as mentioned in #1 above. - regMaskTP optimalConsecutiveResultForBusy = + regMaskFloat optimalConsecutiveResultForBusy = filterConsecutiveCandidatesForSpill(consecutiveResultForBusy, registersNeeded); if (optimalConsecutiveResultForBusy != RBM_NONE) { *busyCandidates = optimalConsecutiveResultForBusy; } - else if ((m_AvailableRegs & consecutiveResultForBusy) != RBM_NONE) + else if ((m_AvailableRegs.floatRegs(compiler) & consecutiveResultForBusy) != RBM_NONE) { // We did not find free consecutive candidates, however we found some registers among the // `allCandidates` that are mix of free and busy. Since `busyCandidates` just has bit set for first // register of such series, return the mask that starts with free register, if possible. The busy // registers will be spilled during assignment of subsequent RefPosition. - *busyCandidates = (m_AvailableRegs & consecutiveResultForBusy); + *busyCandidates = (m_AvailableRegs.floatRegs(compiler) & consecutiveResultForBusy); } } @@ -581,10 +582,9 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, int LinearScan::BuildNode(GenTree* tree) { assert(!tree->isContained()); - int srcCount; - int dstCount = 0; - regMaskTP killMask = RBM_NONE; - bool isLocalDefUse = false; + int srcCount; + int dstCount = 0; + bool isLocalDefUse = false; // Reset the build-related members of LinearScan. clearBuildState(); @@ -663,17 +663,18 @@ int LinearScan::BuildNode(GenTree* tree) break; case GT_PROF_HOOK: + { srcCount = 0; assert(dstCount == 0); - killMask = getKillSetForProfilerHook(); - BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + BuildKills(tree, getKillSetForProfilerHook()); break; + } case GT_START_PREEMPTGC: // This kills GC refs in callee save regs srcCount = 0; assert(dstCount == 0); - BuildDefsWithKills(tree, 0, RBM_NONE, RBM_NONE); + BuildKills(tree, compiler->AllRegsMask_NONE); break; case GT_CNS_DBL: @@ -736,10 +737,11 @@ int LinearScan::BuildNode(GenTree* tree) break; case GT_RETURN: + { srcCount = BuildReturn(tree); - killMask = getKillSetForReturn(); - BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + BuildKills(tree, getKillSetForReturn()); break; + } case GT_RETFILT: assert(dstCount == 0); @@ -823,14 +825,15 @@ int LinearScan::BuildNode(GenTree* tree) break; case GT_RETURNTRAP: + { // this just turns into a compare of its child with an int // + a conditional call BuildUse(tree->gtGetOp1()); srcCount = 1; assert(dstCount == 0); - killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); - BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + BuildKills(tree, compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC)); break; + } case GT_MOD: case GT_UMOD: @@ -1329,9 +1332,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou const HWIntrinsic intrin(intrinsicTree); - int srcCount = 0; - int dstCount = 0; - regMaskTP dstCandidates = RBM_NONE; + int srcCount = 0; + int dstCount = 0; if (HWIntrinsicInfo::IsMultiReg(intrin.id)) { @@ -1547,7 +1549,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else if (HWIntrinsicInfo::IsMaskedOperation(intrin.id)) { - regMaskTP predMask = HWIntrinsicInfo::IsLowMaskedOperation(intrin.id) ? RBM_LOWMASK : RBM_ALLMASK; + regMaskPredicate predMask = HWIntrinsicInfo::IsLowMaskedOperation(intrin.id) ? RBM_LOWMASK : RBM_ALLMASK; srcCount += BuildOperandUses(intrin.op1, predMask); } else if (intrinsicTree->OperIsMemoryLoadOrStore()) @@ -1804,11 +1806,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if ((dstCount == 1) || (dstCount == 2)) { - BuildDef(intrinsicTree, dstCandidates); + BuildDef(intrinsicTree); if (dstCount == 2) { - BuildDef(intrinsicTree, dstCandidates, 1); + BuildDef(intrinsicTree, RBM_NONE, 1); } } else diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index c2b8b74406584e..249d24d424722e 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -129,7 +129,6 @@ int LinearScan::BuildCall(GenTreeCall* call) { bool hasMultiRegRetVal = false; const ReturnTypeDesc* retTypeDesc = nullptr; - regMaskTP dstCandidates = RBM_NONE; int srcCount = 0; int dstCount = 0; @@ -148,8 +147,8 @@ int LinearScan::BuildCall(GenTreeCall* call) } } - GenTree* ctrlExpr = call->gtControlExpr; - regMaskTP ctrlExprCandidates = RBM_NONE; + GenTree* ctrlExpr = call->gtControlExpr; + regMaskGpr ctrlExprCandidates = RBM_NONE; if (call->gtCallType == CT_INDIRECT) { // either gtControlExpr != null or gtCallAddr != null. @@ -183,7 +182,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { // For R2R and VSD we have stub address in REG_R2R_INDIRECT_PARAM // and will load call address into the temp register from this register. - regMaskTP candidates = RBM_NONE; + regMaskGpr candidates = RBM_NONE; if (call->IsFastTailCall()) { candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; @@ -205,14 +204,15 @@ int LinearScan::BuildCall(GenTreeCall* call) // the target. We do not handle these constraints on the same // refposition too well so we help ourselves a bit here by forcing the // null check with LR. - regMaskTP candidates = call->IsFastTailCall() ? RBM_LR : RBM_NONE; + regMaskGpr candidates = call->IsFastTailCall() ? RBM_LR : RBM_NONE; buildInternalIntRegisterDefForNode(call, candidates); } #endif // TARGET_ARM - RegisterType registerType = call->TypeGet(); - // Set destination candidates for return value of the call. + AllRegsMask dstReturnCandidates; + regMaskOnlyOne dstCandidates = RBM_NONE; + RegisterType registerType = call->TypeGet(); #ifdef TARGET_ARM if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME)) @@ -226,7 +226,8 @@ int LinearScan::BuildCall(GenTreeCall* call) if (hasMultiRegRetVal) { assert(retTypeDesc != nullptr); - dstCandidates = retTypeDesc->GetABIReturnRegs(call->GetUnmanagedCallConv()); + dstReturnCandidates = retTypeDesc->GetABIReturnRegs(call->GetUnmanagedCallConv()); + assert((int)dstReturnCandidates.Count() == dstCount); } else if (varTypeUsesFloatArgReg(registerType)) { @@ -390,14 +391,30 @@ int LinearScan::BuildCall(GenTreeCall* call) buildInternalRegisterUses(); // Now generate defs and kills. - regMaskTP killMask = getKillSetForCall(call); - BuildDefsWithKills(call, dstCount, dstCandidates, killMask); + AllRegsMask killMask = getKillSetForCall(call); + if (dstCount > 0) + { + if (hasMultiRegRetVal) + { + assert(dstReturnCandidates.Count() > 0); + BuildCallDefsWithKills(call, dstCount, dstReturnCandidates, killMask); + } + else + { + assert(dstCount == 1); + BuildDefWithKills(call, dstCandidates, killMask); + } + } + else + { + BuildKills(call, killMask); + } #ifdef SWIFT_SUPPORT if (call->HasSwiftErrorHandling()) { // Tree is a Swift call with error handling; error register should have been killed - assert((killMask & RBM_SWIFT_ERROR) != 0); + assert((killMask.gprRegs() & RBM_SWIFT_ERROR) != 0); // After a Swift call that might throw returns, we expect the error register to be consumed // by a GT_SWIFT_ERROR node. However, we want to ensure the error register won't be trashed @@ -417,11 +434,36 @@ int LinearScan::BuildCall(GenTreeCall* call) #endif // SWIFT_SUPPORT // No args are placed in registers anymore. - placedArgRegs = RBM_NONE; + placedArgRegs.Clear(); numPlacedArgLocals = 0; return srcCount; } +//------------------------------------------------------------------------ +// BuildDefWithKills: Build one RefTypeDef RefPositions for the given node, +// as well as kills as specified by the given mask. +// +// Arguments: +// tree - The call node that defines a register +// dstCandidates - The candidate registers for the definition +// killMask - The mask of registers killed by this node +// +// Notes: +// Adds the RefInfo for the definitions to the defList. +// The def and kill functionality is folded into a single method so that the +// save and restores of upper vector registers can be bracketed around the def. +// +void LinearScan::BuildDefWithKills(GenTree* tree, regMaskOnlyOne dstCandidates, REF_AllRegsMask killMask) +{ + assert(!tree->AsCall()->HasMultiRegRetVal()); + assert((int)genCountBits(dstCandidates) == 1); + assert(compiler->IsOnlyOneRegMask(dstCandidates)); + + // Build the kill RefPositions + BuildKills(tree, killMask); + BuildDef(tree, dstCandidates); +} + //------------------------------------------------------------------------ // BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node // @@ -528,14 +570,15 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) // Registers for split argument corresponds to source int dstCount = argNode->gtNumRegs; - regNumber argReg = argNode->GetRegNum(); - regMaskTP argMask = RBM_NONE; + regNumber argReg = argNode->GetRegNum(); + regMaskGpr argMask = RBM_NONE; for (unsigned i = 0; i < argNode->gtNumRegs; i++) { regNumber thisArgReg = (regNumber)((unsigned)argReg + i); argMask |= genRegMask(thisArgReg); argNode->SetRegNumByIdx(thisArgReg, i); } + assert(compiler->IsGprRegMask(argMask)); if (src->OperGet() == GT_FIELD_LIST) { @@ -569,7 +612,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) // go into registers. for (unsigned regIndex = 0; regIndex < currentRegCount; regIndex++) { - regMaskTP sourceMask = RBM_NONE; + regMaskOnlyOne sourceMask = RBM_NONE; if (sourceRegCount < argNode->gtNumRegs) { sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount)); @@ -627,9 +670,9 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) GenTree* srcAddrOrFill = nullptr; - regMaskTP dstAddrRegMask = RBM_NONE; - regMaskTP srcRegMask = RBM_NONE; - regMaskTP sizeRegMask = RBM_NONE; + regMaskGpr dstAddrRegMask = RBM_NONE; + regMaskGpr srcRegMask = RBM_NONE; + regMaskGpr sizeRegMask = RBM_NONE; if (blkNode->OperIsInitBlkOp()) { @@ -686,7 +729,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) // We don't need to materialize the struct size but we still need // a temporary register to perform the sequence of loads and stores. // We can't use the special Write Barrier registers, so exclude them from the mask - regMaskTP internalIntCandidates = + regMaskGpr internalIntCandidates = allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); @@ -821,6 +864,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) if (!dstAddr->isContained()) { + assert(compiler->IsGprRegMask(dstAddrRegMask)); + useCount++; BuildUse(dstAddr, dstAddrRegMask); } @@ -833,6 +878,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) { if (!srcAddrOrFill->isContained()) { + assert(compiler->IsGprRegMask(srcRegMask)); + useCount++; BuildUse(srcAddrOrFill, srcRegMask); } @@ -842,9 +889,10 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) } } + assert(compiler->IsGprRegMask(sizeRegMask)); + buildInternalRegisterUses(); - regMaskTP killMask = getKillSetForBlockStore(blkNode); - BuildDefsWithKills(blkNode, 0, RBM_NONE, killMask); + BuildKills(blkNode, getKillSetForBlockStore(blkNode)); return useCount; } diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 15726e201d8a5e..585badc93eea8e 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -250,15 +250,15 @@ void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* de { assert(!interval->isLocalVar); - RefPosition* useRefPosition = defRefPosition->nextRefPosition; - regMaskTP defRegAssignment = defRefPosition->registerAssignment; - regMaskTP useRegAssignment = useRefPosition->registerAssignment; - RegRecord* defRegRecord = nullptr; - RegRecord* useRegRecord = nullptr; - regNumber defReg = REG_NA; - regNumber useReg = REG_NA; - bool defRegConflict = ((defRegAssignment & useRegAssignment) == RBM_NONE); - bool useRegConflict = defRegConflict; + RefPosition* useRefPosition = defRefPosition->nextRefPosition; + regMaskOnlyOne defRegAssignment = defRefPosition->registerAssignment; + regMaskOnlyOne useRegAssignment = useRefPosition->registerAssignment; + RegRecord* defRegRecord = nullptr; + RegRecord* useRegRecord = nullptr; + regNumber defReg = REG_NA; + regNumber useReg = REG_NA; + bool defRegConflict = ((defRegAssignment & useRegAssignment) == RBM_NONE); + bool useRegConflict = defRegConflict; // If the useRefPosition is a "delayRegFree", we can't change the registerAssignment // on it, or we will fail to ensure that the fixedReg is busy at the time the target @@ -352,7 +352,7 @@ void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* de RegisterType regType = interval->registerType; assert((getRegisterType(interval, defRefPosition) == regType) && (getRegisterType(interval, useRefPosition) == regType)); - regMaskTP candidates = allRegs(regType); + regMaskOnlyOne candidates = allRegs(regType); defRefPosition->registerAssignment = candidates; defRefPosition->isFixedRegRef = false; return; @@ -423,8 +423,8 @@ void LinearScan::checkConflictingDefUse(RefPosition* useRP) // All defs must have a valid treeNode, but we check it below to be conservative. assert(defRP->treeNode != nullptr); - regMaskTP prevAssignment = defRP->registerAssignment; - regMaskTP newAssignment = (prevAssignment & useRP->registerAssignment); + regMaskOnlyOne prevAssignment = defRP->registerAssignment; + regMaskOnlyOne newAssignment = (prevAssignment & useRP->registerAssignment); if (newAssignment != RBM_NONE) { if (!isSingleRegister(newAssignment) || !theInterval->hasInterferingUses) @@ -519,8 +519,10 @@ void LinearScan::associateRefPosWithInterval(RefPosition* rp) // a new RefPosition // RefPosition* LinearScan::newRefPosition( - regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask) + regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskOnlyOne mask) { + assert(compiler->IsOnlyOneRegMask(mask)); + RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); RegRecord* regRecord = getRegisterRecord(reg); @@ -554,13 +556,15 @@ RefPosition* LinearScan::newRefPosition( // Return Value: // a new RefPosition // -RefPosition* LinearScan::newRefPosition(Interval* theInterval, - LsraLocation theLocation, - RefType theRefType, - GenTree* theTreeNode, - regMaskTP mask, - unsigned multiRegIdx /* = 0 */) +RefPosition* LinearScan::newRefPosition(Interval* theInterval, + LsraLocation theLocation, + RefType theRefType, + GenTree* theTreeNode, + regMaskOnlyOne mask, + unsigned multiRegIdx /* = 0 */) { + assert(compiler->IsOnlyOneRegMask(mask)); + if (theInterval != nullptr) { if (mask == RBM_NONE) @@ -600,7 +604,7 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval, if (insertFixedRef) { - regNumber physicalReg = genRegNumFromMask(mask); + regNumber physicalReg = genRegNumFromMask(mask MORE_THAN_64_REG_ARG(theInterval->registerType)); RefPosition* pos = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask); assert(theInterval != nullptr); #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) @@ -697,7 +701,10 @@ bool LinearScan::isContainableMemoryOp(GenTree* node) // refType - the type of refposition // isLastUse - true IFF this is a last use of the register // -void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse) +void LinearScan::addRefsForPhysRegMask(CONSTREF_AllRegsMask mask, + LsraLocation currentLoc, + RefType refType, + bool isLastUse) { assert(refType == RefTypeKill); @@ -712,9 +719,10 @@ void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, // modified until codegen, which is too late. compiler->codeGen->regSet.rsSetRegsModified(mask DEBUGARG(true)); - for (regMaskTP candidates = mask; candidates != RBM_NONE;) + AllRegsMask maskForRefPositions = mask; + while (!maskForRefPositions.IsEmpty()) { - regNumber reg = genFirstRegNumFromMaskAndToggle(candidates); + regNumber reg = genFirstRegNumFromMaskAndToggle(maskForRefPositions); // This assumes that these are all "special" RefTypes that // don't need to be recorded on the tree (hence treeNode is nullptr) RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr, @@ -738,12 +746,10 @@ void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, // // Return Value: a register mask of the registers killed // -regMaskTP LinearScan::getKillSetForStoreInd(GenTreeStoreInd* tree) +CONSTREF_AllRegsMask LinearScan::getKillSetForStoreInd(GenTreeStoreInd* tree) { assert(tree->OperIs(GT_STOREIND)); - regMaskTP killMask = RBM_NONE; - GCInfo::WriteBarrierForm writeBarrierForm = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tree); if (writeBarrierForm != GCInfo::WBF_NoBarrier) { @@ -753,16 +759,16 @@ regMaskTP LinearScan::getKillSetForStoreInd(GenTreeStoreInd* tree) // the allocated register for the `data` operand. However, all the (x86) optimized // helpers have the same kill set: EDX. And note that currently, only x86 can return // `true` for genUseOptimizedWriteBarriers(). - killMask = RBM_CALLEE_TRASH_NOGC; + return compiler->AllRegsMask_CALLEE_TRASH_NOGC; } else { // Figure out which helper we're going to use, and then get the kill set for that helper. CorInfoHelpFunc helper = compiler->codeGen->genWriteBarrierHelperForWriteBarrierForm(writeBarrierForm); - killMask = compiler->compHelperCallKillSet(helper); + return compiler->compHelperCallKillSet(helper); } } - return killMask; + return compiler->AllRegsMask_NONE; } //------------------------------------------------------------------------ @@ -773,9 +779,9 @@ regMaskTP LinearScan::getKillSetForStoreInd(GenTreeStoreInd* tree) // // Return Value: a register mask of the registers killed // -regMaskTP LinearScan::getKillSetForShiftRotate(GenTreeOp* shiftNode) +regMaskGpr LinearScan::getKillSetForShiftRotate(GenTreeOp* shiftNode) { - regMaskTP killMask = RBM_NONE; + regMaskGpr killMask = RBM_NONE; #ifdef TARGET_XARCH assert(shiftNode->OperIsShiftOrRotate()); GenTree* shiftBy = shiftNode->gtGetOp2(); @@ -795,9 +801,9 @@ regMaskTP LinearScan::getKillSetForShiftRotate(GenTreeOp* shiftNode) // // Return Value: a register mask of the registers killed // -regMaskTP LinearScan::getKillSetForMul(GenTreeOp* mulNode) +regMaskGpr LinearScan::getKillSetForMul(GenTreeOp* mulNode) { - regMaskTP killMask = RBM_NONE; + regMaskGpr killMask = RBM_NONE; #ifdef TARGET_XARCH assert(mulNode->OperIsMul()); if (!mulNode->OperIs(GT_MUL) || (((mulNode->gtFlags & GTF_UNSIGNED) != 0) && mulNode->gtOverflowEx())) @@ -816,9 +822,9 @@ regMaskTP LinearScan::getKillSetForMul(GenTreeOp* mulNode) // // Return Value: a register mask of the registers killed // -regMaskTP LinearScan::getKillSetForModDiv(GenTreeOp* node) +regMaskGpr LinearScan::getKillSetForModDiv(GenTreeOp* node) { - regMaskTP killMask = RBM_NONE; + regMaskGpr killMask = RBM_NONE; #ifdef TARGET_XARCH assert(node->OperIs(GT_MOD, GT_DIV, GT_UMOD, GT_UDIV)); if (varTypeUsesIntReg(node->TypeGet())) @@ -838,9 +844,10 @@ regMaskTP LinearScan::getKillSetForModDiv(GenTreeOp* node) // // Return Value: a register mask of the registers killed // -regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call) +AllRegsMask LinearScan::getKillSetForCall(GenTreeCall* call) { - regMaskTP killMask = RBM_CALLEE_TRASH; + AllRegsMask killMask = compiler->AllRegsMask_CALLEE_TRASH; + #ifdef TARGET_X86 if (compiler->compFloatingPointUsed) { @@ -863,24 +870,23 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call) // if there is no FP used, we can ignore the FP kills if (!compiler->compFloatingPointUsed) { -#if defined(TARGET_XARCH) - killMask &= ~(RBM_FLT_CALLEE_TRASH | RBM_MSK_CALLEE_TRASH); -#else - killMask &= ~RBM_FLT_CALLEE_TRASH; -#endif // TARGET_XARCH + killMask.RemoveRegTypeFromMask(RBM_FLT_CALLEE_TRASH, TYP_FLOAT); +#ifdef FEATURE_MASKED_HW_INTRINSICS + killMask.RemoveRegTypeFromMask(RBM_MSK_CALLEE_TRASH, TYP_MASK); +#endif // FEATURE_MASKED_HW_INTRINSICS } #ifdef TARGET_ARM if (call->IsVirtualStub()) { - killMask |= compiler->virtualStubParamInfo->GetRegMask(); + killMask.AddGprRegMask(compiler->virtualStubParamInfo->GetRegMask()); } #else // !TARGET_ARM // Verify that the special virtual stub call registers are in the kill mask. // We don't just add them unconditionally to the killMask because for most architectures // they are already in the RBM_CALLEE_TRASH set, // and we don't want to introduce extra checks and calls in this hot function. - assert(!call->IsVirtualStub() || - ((killMask & compiler->virtualStubParamInfo->GetRegMask()) == compiler->virtualStubParamInfo->GetRegMask())); + assert(!call->IsVirtualStub() || ((killMask.gprRegs() & compiler->virtualStubParamInfo->GetRegMask()) == + compiler->virtualStubParamInfo->GetRegMask())); #endif // !TARGET_ARM #ifdef SWIFT_SUPPORT @@ -888,7 +894,7 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call) // so don't use the register post-call until it is consumed by SwiftError. if (call->HasSwiftErrorHandling()) { - killMask |= RBM_SWIFT_ERROR; + killMask.AddGprRegMask(RBM_SWIFT_ERROR); } #endif // SWIFT_SUPPORT @@ -903,10 +909,10 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call) // // Return Value: a register mask of the registers killed // -regMaskTP LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode) +AllRegsMask LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode) { assert(blkNode->OperIsStoreBlk()); - regMaskTP killMask = RBM_NONE; + AllRegsMask killMask; bool isCopyBlk = varTypeIsStruct(blkNode->Data()); switch (blkNode->gtBlkOpKind) @@ -924,7 +930,7 @@ regMaskTP LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode) if (isCopyBlk) { // rep movs kills RCX, RDI and RSI - killMask = RBM_RCX | RBM_RDI | RBM_RSI; + killMask.AddGprRegMask(RBM_RCX | RBM_RDI | RBM_RSI); } else { @@ -932,7 +938,7 @@ regMaskTP LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode) // (Note that the Data() node, if not constant, will be assigned to // RCX, but it's find that this kills it, as the value is not available // after this node in any case.) - killMask = RBM_RDI | RBM_RCX; + killMask.AddGprRegMask(RBM_RDI | RBM_RCX); } break; #endif @@ -949,19 +955,16 @@ regMaskTP LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode) #ifdef FEATURE_HW_INTRINSICS //------------------------------------------------------------------------ -// getKillSetForHWIntrinsic: Determine the liveness kill set for a GT_STOREIND node. -// If the GT_STOREIND will generate a write barrier, determine the specific kill -// set required by the case-specific, platform-specific write barrier. If no -// write barrier is required, the kill set will be RBM_NONE. +// getKillSetForHWIntrinsic: Determine the liveness kill set for a GT_HWINTRINSIC node. // // Arguments: -// tree - the GT_STOREIND node +// tree - the GT_HWINTRINSIC node // // Return Value: a register mask of the registers killed // -regMaskTP LinearScan::getKillSetForHWIntrinsic(GenTreeHWIntrinsic* node) +regMaskGpr LinearScan::getKillSetForHWIntrinsic(GenTreeHWIntrinsic* node) { - regMaskTP killMask = RBM_NONE; + regMaskGpr killMask = RBM_NONE; #ifdef TARGET_XARCH switch (node->GetHWIntrinsicId()) { @@ -993,10 +996,10 @@ regMaskTP LinearScan::getKillSetForHWIntrinsic(GenTreeHWIntrinsic* node) // // Return Value: a register mask of the registers killed // -regMaskTP LinearScan::getKillSetForReturn() +CONSTREF_AllRegsMask LinearScan::getKillSetForReturn() { return compiler->compIsProfilerHookNeeded() ? compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_LEAVE) - : RBM_NONE; + : compiler->AllRegsMask_NONE; } //------------------------------------------------------------------------ @@ -1007,10 +1010,10 @@ regMaskTP LinearScan::getKillSetForReturn() // // Return Value: a register mask of the registers killed // -regMaskTP LinearScan::getKillSetForProfilerHook() +CONSTREF_AllRegsMask LinearScan::getKillSetForProfilerHook() { return compiler->compIsProfilerHookNeeded() ? compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_TAILCALL) - : RBM_NONE; + : compiler->AllRegsMask_NONE; } #ifdef DEBUG @@ -1022,9 +1025,9 @@ regMaskTP LinearScan::getKillSetForProfilerHook() // // Return Value: a register mask of the registers killed // -regMaskTP LinearScan::getKillSetForNode(GenTree* tree) +AllRegsMask LinearScan::getKillSetForNode(GenTree* tree) { - regMaskTP killMask = RBM_NONE; + AllRegsMask killMask; switch (tree->OperGet()) { case GT_LSH: @@ -1036,7 +1039,7 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree) case GT_LSH_HI: case GT_RSH_LO: #endif - killMask = getKillSetForShiftRotate(tree->AsOp()); + killMask = GprRegsMask(getKillSetForShiftRotate(tree->AsOp())); break; case GT_MUL: @@ -1044,14 +1047,14 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree) #if !defined(TARGET_64BIT) || defined(TARGET_ARM64) case GT_MUL_LONG: #endif - killMask = getKillSetForMul(tree->AsOp()); + killMask = GprRegsMask(getKillSetForMul(tree->AsOp())); break; case GT_MOD: case GT_DIV: case GT_UMOD: case GT_UDIV: - killMask = getKillSetForModDiv(tree->AsOp()); + killMask = GprRegsMask(getKillSetForModDiv(tree->AsOp())); break; case GT_STORE_BLK: @@ -1086,7 +1089,7 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree) #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: - killMask = getKillSetForHWIntrinsic(tree->AsHWIntrinsic()); + killMask = GprRegsMask(getKillSetForHWIntrinsic(tree->AsHWIntrinsic())); break; #endif // FEATURE_HW_INTRINSICS @@ -1121,11 +1124,11 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree) // This method can add kills even if killMask is RBM_NONE, if this tree is one of the // special cases that signals that we can't permit callee save registers to hold GC refs. -bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc, regMaskTP killMask) +bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc, CONSTREF_AllRegsMask killMask) { bool insertedKills = false; - if (killMask != RBM_NONE) + if (!killMask.IsEmpty()) { addRefsForPhysRegMask(killMask, currentLoc, RefTypeKill, true); @@ -1159,8 +1162,10 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo { continue; } - Interval* interval = getIntervalForLocalVar(varIndex); - const bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH)); + Interval* interval = getIntervalForLocalVar(varIndex); + regMaskOnlyOne regsKillMask = killMask.GetRegMaskForType(interval->registerType); + const bool isCallKill = + (killMask.gprRegs() == RBM_INT_CALLEE_TRASH) || (killMask == compiler->AllRegsMask_CALLEE_TRASH); if (isCallKill) { @@ -1173,7 +1178,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo // See the "heuristics for writeThru intervals" in 'buildIntervals()'. if (!interval->isWriteThru || !isCallKill) { - regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask); + regMaskOnlyOne newPreferences = allRegs(interval->registerType) & (~regsKillMask); if (newPreferences != RBM_NONE) { @@ -1181,7 +1186,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo { // Update the register aversion as long as this is not write-thru vars for // reason mentioned above. - interval->registerAversion |= killMask; + interval->registerAversion |= regsKillMask; } interval->updateRegisterPreferences(newPreferences); } @@ -1324,8 +1329,10 @@ bool LinearScan::checkContainedOrCandidateLclVar(GenTreeLclVar* lclNode) // currentLoc - Location of the temp Def position // regMask - register mask of candidates for temp // -RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP regMask) +RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskOnlyOne regMask) { + assert(compiler->IsOnlyOneRegMask(regMask)); + Interval* current = newInterval(regType); current->isInternal = true; RefPosition* newDef = newRefPosition(current, currentLoc, RefTypeDef, tree, regMask, 0); @@ -1344,10 +1351,10 @@ RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regTy // Returns: // The def RefPosition created for this internal temp. // -RefPosition* LinearScan::buildInternalIntRegisterDefForNode(GenTree* tree, regMaskTP internalCands) +RefPosition* LinearScan::buildInternalIntRegisterDefForNode(GenTree* tree, regMaskGpr internalCands) { // The candidate set should contain only integer registers. - assert((internalCands & ~availableIntRegs) == RBM_NONE); + assert(compiler->IsGprRegMask(internalCands)); RefPosition* defRefPosition = defineNewInternalTemp(tree, IntRegisterType, internalCands); return defRefPosition; @@ -1363,24 +1370,24 @@ RefPosition* LinearScan::buildInternalIntRegisterDefForNode(GenTree* tree, regMa // Returns: // The def RefPosition created for this internal temp. // -RefPosition* LinearScan::buildInternalFloatRegisterDefForNode(GenTree* tree, regMaskTP internalCands) +RefPosition* LinearScan::buildInternalFloatRegisterDefForNode(GenTree* tree, regMaskFloat internalCands) { // The candidate set should contain only float registers. - assert((internalCands & ~availableFloatRegs) == RBM_NONE); + assert(compiler->IsFloatRegMask(internalCands)); RefPosition* defRefPosition = defineNewInternalTemp(tree, FloatRegisterType, internalCands); return defRefPosition; } -#if defined(FEATURE_SIMD) && defined(TARGET_XARCH) -RefPosition* LinearScan::buildInternalMaskRegisterDefForNode(GenTree* tree, regMaskTP internalCands) +#ifdef FEATURE_MASKED_HW_INTRINSICS +RefPosition* LinearScan::buildInternalMaskRegisterDefForNode(GenTree* tree, regMaskPredicate internalCands) { // The candidate set should contain only float registers. - assert((internalCands & ~availableMaskRegs) == RBM_NONE); + assert(compiler->IsPredicateRegMask(internalCands)); return defineNewInternalTemp(tree, MaskRegisterType, internalCands); } -#endif +#endif // FEATURE_MASKED_HW_INTRINSICS //------------------------------------------------------------------------ // buildInternalRegisterUses - adds use positions for internal @@ -1403,9 +1410,9 @@ void LinearScan::buildInternalRegisterUses() assert(internalCount <= MaxInternalCount); for (int i = 0; i < internalCount; i++) { - RefPosition* def = internalDefs[i]; - regMaskTP mask = def->registerAssignment; - RefPosition* use = newRefPosition(def->getInterval(), currentLoc, RefTypeUse, def->treeNode, mask, 0); + RefPosition* def = internalDefs[i]; + regMaskOnlyOne mask = def->registerAssignment; + RefPosition* use = newRefPosition(def->getInterval(), currentLoc, RefTypeUse, def->treeNode, mask, 0); if (setInternalRegsDelayFree) { use->delayRegFree = true; @@ -1466,14 +1473,17 @@ Interval* LinearScan::getUpperVectorInterval(unsigned varIndex) // currentLoc - The location of the current node // fpCalleeKillSet - The set of registers killed by this node. // -// Notes: This is called by BuildDefsWithKills for any node that kills registers in the +// Notes: This is called by BuildCallDefsWithKills for any node that kills registers in the // RBM_FLT_CALLEE_TRASH set. We actually need to find any calls that kill the upper-half // of the callee-save vector registers. // But we will use as a proxy any node that kills floating point registers. // (Note that some calls are masquerading as other nodes at this point so we can't just check for calls.) // -void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, regMaskTP fpCalleeKillSet) +void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, + LsraLocation currentLoc DEBUG_ARG(regMaskFloat fpCalleeKillSet)) { + assert(compiler->IsFloatRegMask(fpCalleeKillSet)); + if ((tree != nullptr) && tree->IsCall()) { if (tree->AsCall()->IsNoReturn() || compiler->fgIsThrow(tree)) @@ -1860,10 +1870,10 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc // The use position of v02 cannot be allocated a reg since it is marked delay-reg free and // {eax,edx} are getting killed before the def of GT_DIV. For this reason, minRegCount for // the use position of v02 also needs to take into account the kill set of its consuming node. - regMaskTP killMask = getKillSetForNode(tree); - if (killMask != RBM_NONE) + AllRegsMask killMask = getKillSetForNode(tree); + if (!killMask.IsEmpty()) { - minRegCountForRef += genCountBits(killMask); + minRegCountForRef += killMask.Count(); } } else if ((newRefPosition->refType) == RefTypeDef && (newRefPosition->getInterval()->isSpecialPutArg)) @@ -1874,9 +1884,9 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc newRefPosition->minRegCandidateCount = minRegCountForRef; if (newRefPosition->IsActualRef() && doReverseCallerCallee()) { - Interval* interval = newRefPosition->getInterval(); - regMaskTP oldAssignment = newRefPosition->registerAssignment; - regMaskTP calleeSaveMask = calleeSaveRegs(interval->registerType); + Interval* interval = newRefPosition->getInterval(); + regMaskOnlyOne oldAssignment = newRefPosition->registerAssignment; + regMaskOnlyOne calleeSaveMask = calleeSaveRegs(interval->registerType); #ifdef TARGET_ARM64 if (newRefPosition->isLiveAtConsecutiveRegistersLoc(consecutiveRegistersLocation)) { @@ -1890,7 +1900,8 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc #endif // TARGET_ARM64 { newRefPosition->registerAssignment = - getConstrainedRegMask(newRefPosition, oldAssignment, calleeSaveMask, minRegCountForRef); + getConstrainedRegMask(newRefPosition, interval->registerType, oldAssignment, calleeSaveMask, + minRegCountForRef); } if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) && @@ -2288,8 +2299,8 @@ void LinearScan::buildIntervals() RegState* floatRegState = &compiler->codeGen->floatRegState; intRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE; floatRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE; - regsInUseThisLocation = RBM_NONE; - regsInUseNextLocation = RBM_NONE; + regsInUseThisLocation.Clear(); + regsInUseNextLocation.Clear(); #ifdef SWIFT_SUPPORT if (compiler->info.compCallConv == CorInfoCallConvExtension::Swift) @@ -2346,7 +2357,7 @@ void LinearScan::buildIntervals() { Interval* interval = getIntervalForLocalVar(varIndex); const var_types regType = argDsc->GetRegisterType(); - regMaskTP mask = allRegs(regType); + regMaskOnlyOne mask = allRegs(regType); if (argDsc->lvIsRegArg && !stressInitialParamReg()) { // Set this interval as currently assigned to that register @@ -2354,7 +2365,7 @@ void LinearScan::buildIntervals() assert(inArgReg < REG_COUNT); mask = genRegMask(inArgReg); assignPhysReg(inArgReg, interval); - INDEBUG(registersToDump |= getRegMask(inArgReg, interval->registerType)); + INDEBUG(registersToDump.AddRegNum(inArgReg, interval->registerType)); } RefPosition* pos = newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, mask); pos->setRegOptional(true); @@ -2428,7 +2439,7 @@ void LinearScan::buildIntervals() #endif numPlacedArgLocals = 0; - placedArgRegs = RBM_NONE; + placedArgRegs.Clear(); BasicBlock* predBlock = nullptr; BasicBlock* prevBlock = nullptr; @@ -2549,8 +2560,9 @@ void LinearScan::buildIntervals() // handling clobbers REG_SCRATCH, so kill it here. if ((block == compiler->fgFirstBB) && compiler->lvaHasAnySwiftStackParamToReassemble()) { + AllRegsMask scratchMask = AllRegsMask(genRegMask(REG_SCRATCH)); assert(compiler->fgFirstBBisScratch()); - addRefsForPhysRegMask(genRegMask(REG_SCRATCH), currentLoc + 1, RefTypeKill, true); + addRefsForPhysRegMask(scratchMask, currentLoc + 1, RefTypeKill, true); currentLoc += 2; } @@ -2560,13 +2572,14 @@ void LinearScan::buildIntervals() if (compiler->compShouldPoisonFrame() && (block == compiler->fgFirstBB)) { assert(compiler->fgFirstBBisScratch()); - regMaskTP killed; + AllRegsMask killed; #if defined(TARGET_XARCH) // Poisoning uses EAX for small vars and rep stosd that kills edi, ecx and eax for large vars. - killed = RBM_EDI | RBM_ECX | RBM_EAX; + killed.AddGprRegMask(RBM_EDI | RBM_ECX | RBM_EAX); #else // Poisoning uses REG_SCRATCH for small vars and memset helper for big vars. - killed = genRegMask(REG_SCRATCH) | compiler->compHelperCallKillSet(CORINFO_HELP_NATIVE_MEMSET); + killed = compiler->compHelperCallKillSet(CORINFO_HELP_NATIVE_MEMSET); + killed.AddRegNumInMask(REG_SCRATCH); #endif addRefsForPhysRegMask(killed, currentLoc + 1, RefTypeKill, true); currentLoc += 2; @@ -2788,12 +2801,12 @@ void LinearScan::buildIntervals() { calleeSaveCount = CNT_CALLEE_ENREG; } -#if defined(TARGET_XARCH) && defined(FEATURE_SIMD) +#ifdef FEATURE_MASKED_HW_INTRINSICS else if (varTypeUsesMaskReg(interval->registerType)) { calleeSaveCount = CNT_CALLEE_SAVED_MASK; } -#endif // TARGET_XARCH && FEATURE_SIMD +#endif // FEATURE_MASKED_HW_INTRINSICS else { assert(varTypeUsesFloatReg(interval->registerType)); @@ -2846,17 +2859,26 @@ void LinearScan::buildIntervals() if (!needNonIntegerRegisters) { availableRegCount = REG_INT_COUNT; + availableFloatRegs.OverrideAssign(RBM_NONE); + availableDoubleRegs.OverrideAssign(RBM_NONE); +#ifdef FEATURE_MASKED_HW_INTRINSICS + availableMaskRegs.OverrideAssign(RBM_NONE); +#endif } - if (availableRegCount < (sizeof(regMaskTP) * 8)) +#ifdef HAS_MORE_THAN_64_REGISTERS + actualRegistersMask = AllRegsMask(availableIntRegs, availableFloatRegs, availableMaskRegs); +#else + if (availableRegCount < (sizeof(RegBitSet64) * 8)) { // Mask out the bits that are between 64 ~ availableRegCount - actualRegistersMask = (1ULL << availableRegCount) - 1; + actualRegistersMask = AllRegsMask((1ULL << availableRegCount) - 1); } else { - actualRegistersMask = ~RBM_NONE; + actualRegistersMask = AllRegsMask(~RBM_NONE); } +#endif // HAS_MORE_THAN_64_REGISTERS #ifdef DEBUG // Make sure we don't have any blocks that were not visited @@ -2889,8 +2911,8 @@ void LinearScan::stressSetRandomParameterPreferences() { CLRRandom rng; rng.Init(compiler->info.compMethodHash()); - regMaskTP intRegs = compiler->codeGen->intRegState.rsCalleeRegArgMaskLiveIn; - regMaskTP floatRegs = compiler->codeGen->floatRegState.rsCalleeRegArgMaskLiveIn; + regMaskGpr intRegs = compiler->codeGen->intRegState.rsCalleeRegArgMaskLiveIn; + regMaskFloat floatRegs = compiler->codeGen->floatRegState.rsCalleeRegArgMaskLiveIn; for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++) { @@ -2903,7 +2925,7 @@ void LinearScan::stressSetRandomParameterPreferences() Interval* interval = getIntervalForLocalVar(varIndex); - regMaskTP* regs; + regMaskOnlyOne* regs; if (interval->registerType == FloatRegisterType) { regs = &floatRegs; @@ -2921,12 +2943,12 @@ void LinearScan::stressSetRandomParameterPreferences() continue; } - int bitIndex = rng.Next((int)numBits); - regNumber prefReg = REG_NA; - regMaskTP regsLeft = *regs; + int bitIndex = rng.Next((int)numBits); + regNumber prefReg = REG_NA; + regMaskOnlyOne regsLeft = *regs; for (int i = 0; i <= bitIndex; i++) { - prefReg = genFirstRegNumFromMaskAndToggle(regsLeft); + prefReg = genFirstRegNumFromMaskAndToggle(regsLeft MORE_THAN_64_REG_ARG(interval->registerType)); } *regs &= ~genRegMask(prefReg); @@ -3052,9 +3074,10 @@ void setTgtPref(Interval* interval, RefPosition* tgtPrefUse) // Notes: // Adds the RefInfo for the definition to the defList. // -RefPosition* LinearScan::BuildDef(GenTree* tree, regMaskTP dstCandidates, int multiRegIdx) +RefPosition* LinearScan::BuildDef(GenTree* tree, regMaskOnlyOne dstCandidates, int multiRegIdx) { assert(!tree->isContained()); + assert(compiler->IsOnlyOneRegMask(dstCandidates)); if (dstCandidates != RBM_NONE) { @@ -3131,7 +3154,7 @@ RefPosition* LinearScan::BuildDef(GenTree* tree, regMaskTP dstCandidates, int mu } //------------------------------------------------------------------------ -// BuildDef: Build one or more RefTypeDef RefPositions for the given node +// BuildDef: Build one or more RefTypeDef RefPositions for the given call node // // Arguments: // tree - The node that defines a register @@ -3141,68 +3164,80 @@ RefPosition* LinearScan::BuildDef(GenTree* tree, regMaskTP dstCandidates, int mu // Notes: // Adds the RefInfo for the definitions to the defList. // -void LinearScan::BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates) +void LinearScan::BuildCallDefs(GenTree* tree, int dstCount, REF_AllRegsMask dstCandidates) { - bool fixedReg = false; - if ((dstCount > 1) && (dstCandidates != RBM_NONE) && ((int)genCountBits(dstCandidates) == dstCount)) + assert(dstCount > 0); + assert((int)dstCandidates.Count() == dstCount); + assert(tree->IsMultiRegCall()); + + const ReturnTypeDesc* retTypeDesc = tree->AsCall()->GetReturnTypeDesc(); + assert(retTypeDesc != nullptr); + + for (int i = 0; i < dstCount; i++) { - fixedReg = true; + // In case of multi-reg call node, we have to query the i'th position return register. + // For all other cases of multi-reg definitions, the registers must be in sequential order. + regNumber thisReg = + tree->AsCall()->GetReturnTypeDesc()->GetABIReturnReg(i, tree->AsCall()->GetUnmanagedCallConv()); + + assert(dstCandidates.IsRegNumInMask(thisReg)); + dstCandidates.RemoveRegNumFromMask(thisReg); + + BuildDef(tree, genRegMask(thisReg), i); } - const ReturnTypeDesc* retTypeDesc = nullptr; - if (tree->IsMultiRegCall()) +} + +//------------------------------------------------------------------------ +// BuildDef: Build one or more RefTypeDef RefPositions for the given node +// +// Arguments: +// tree - The node that defines a register +// dstCount - The number of registers defined by the node +// dstCandidates - the candidate registers for the definition +// +// Notes: +// Adds the RefInfo for the definitions to the defList. +// Also, the `dstCandidates` is assumed to be of "onlyOne" type. If there are +// both gpr and float registers, use `BuildDefs` that takes `AllRegsMask` +// +void LinearScan::BuildDefs(GenTree* tree, int dstCount, regMaskOnlyOne dstCandidates) +{ + assert(dstCount > 0); + assert(compiler->IsOnlyOneRegMask(dstCandidates)); + + if ((dstCandidates == RBM_NONE) || ((int)genCountBits(dstCandidates) != dstCount)) { - retTypeDesc = tree->AsCall()->GetReturnTypeDesc(); + // This is not fixedReg case, so just create definitions based on dstCandidates + for (int i = 0; i < dstCount; i++) + { + BuildDef(tree, dstCandidates, i); + } + return; } + for (int i = 0; i < dstCount; i++) { - regMaskTP thisDstCandidates; - if (fixedReg) - { - // In case of multi-reg call node, we have to query the i'th position return register. - // For all other cases of multi-reg definitions, the registers must be in sequential order. - if (retTypeDesc != nullptr) - { - thisDstCandidates = genRegMask( - tree->AsCall()->GetReturnTypeDesc()->GetABIReturnReg(i, tree->AsCall()->GetUnmanagedCallConv())); - assert((dstCandidates & thisDstCandidates) != RBM_NONE); - } - else - { - thisDstCandidates = genFindLowestBit(dstCandidates); - } - dstCandidates &= ~thisDstCandidates; - } - else - { - thisDstCandidates = dstCandidates; - } + regMaskOnlyOne thisDstCandidates = genFindLowestBit(dstCandidates); BuildDef(tree, thisDstCandidates, i); + dstCandidates &= ~thisDstCandidates; } } //------------------------------------------------------------------------ -// BuildDef: Build one or more RefTypeDef RefPositions for the given node, -// as well as kills as specified by the given mask. +// BuildDef: Build Kills RefPositions as specified by the given mask. // // Arguments: // tree - The node that defines a register -// dstCount - The number of registers defined by the node -// dstCandidates - The candidate registers for the definition // killMask - The mask of registers killed by this node // -// Notes: -// Adds the RefInfo for the definitions to the defList. -// The def and kill functionality is folded into a single method so that the -// save and restores of upper vector registers can be bracketed around the def. -// -void LinearScan::BuildDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask) +void LinearScan::BuildKills(GenTree* tree, CONSTREF_AllRegsMask killMask) { assert(killMask == getKillSetForNode(tree)); // Call this even when killMask is RBM_NONE, as we have to check for some special cases buildKillPositionsForNode(tree, currentLoc + 1, killMask); - if (killMask != RBM_NONE) + if (!killMask.IsEmpty()) { #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE // Build RefPositions to account for the fact that, even in a callee-save register, the upper half of any large @@ -3216,15 +3251,83 @@ void LinearScan::BuildDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCa // RefPositions in that case. // This must be done after the kills, so that we know which large vectors are still live. // - if ((killMask & RBM_FLT_CALLEE_TRASH) != RBM_NONE) + if ((killMask.IsFloatMaskPresent(compiler, RBM_FLT_CALLEE_TRASH))) { - buildUpperVectorSaveRefPositions(tree, currentLoc + 1, killMask); + buildUpperVectorSaveRefPositions(tree, currentLoc + 1 DEBUG_ARG((killMask.floatRegs(compiler) & + RBM_FLT_CALLEE_TRASH))); } #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE } +} - // Now, create the Def(s) - BuildDefs(tree, dstCount, dstCandidates); +#ifndef TARGET_ARMARCH +//------------------------------------------------------------------------ +// BuildDefWithKills: Build one or two (for 32-bit) RefTypeDef RefPositions for the given node, +// as well as kills as specified by the given mask. +// +// Arguments: +// tree - The call node that defines a register +// dstCandidates - The candidate registers for the definition +// killMask - The mask of registers killed by this node +// +// Notes: +// Adds the RefInfo for the definitions to the defList. +// The def and kill functionality is folded into a single method so that the +// save and restores of upper vector registers can be bracketed around the def. +// +void LinearScan::BuildDefWithKills(GenTree* tree, int dstCount, regMaskOnlyOne dstCandidates, REF_AllRegsMask killMask) +{ + assert(compiler->IsOnlyOneRegMask(dstCandidates)); + + // Build the kill RefPositions + BuildKills(tree, killMask); + +#ifdef TARGET_64BIT + // For 64 bits, + assert(dstCount == 1); + BuildDef(tree, dstCandidates); +#else + if (dstCount == 1) + { + BuildDef(tree, dstCandidates); + } + else + { + assert(dstCount == 2); + BuildDefs(tree, 2, dstCandidates); + } +#endif // TARGET_64BIT +} +#endif + +//------------------------------------------------------------------------ +// BuildCallDefsWithKills: Build one or more RefTypeDef RefPositions for the given node, +// as well as kills as specified by the given mask. +// +// Arguments: +// tree - The node that defines a register +// dstCount - The number of registers defined by the node +// dstCandidates - The candidate registers for the definition +// killMask - The mask of registers killed by this node +// +// Notes: +// Adds the RefInfo for the definitions to the defList. +// The def and kill functionality is folded into a single method so that the +// save and restores of upper vector registers can be bracketed around the def. +// +void LinearScan::BuildCallDefsWithKills(GenTree* tree, + int dstCount, + REF_AllRegsMask dstCandidates, + REF_AllRegsMask killMask) +{ + assert(dstCount > 0); + assert(!dstCandidates.IsEmpty()); + + // Build the kill RefPositions + BuildKills(tree, killMask); + + // And then the Def(s) + BuildCallDefs(tree, dstCount, dstCandidates); } //------------------------------------------------------------------------ @@ -3248,7 +3351,7 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) // _after_ the call, then we are going to prefer callee-saved registers for // such local anyway, so there is no need to look at such local uses. // - if (placedArgRegs == RBM_NONE) + if (placedArgRegs.IsEmpty()) { return; } @@ -3263,19 +3366,19 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) // Find the registers that we should remove from the preference set because // they are occupied with argument values. - regMaskTP unpref = placedArgRegs; - unsigned varIndex = interval->getVarIndex(compiler); + AllRegsMask unpref = placedArgRegs; + unsigned varIndex = interval->getVarIndex(compiler); for (size_t i = 0; i < numPlacedArgLocals; i++) { if (placedArgLocals[i].VarIndex == varIndex) { // This local's value is going to be available in this register so // keep it in the preferences. - unpref &= ~genRegMask(placedArgLocals[i].Reg); + unpref.RemoveRegNumFromMask(placedArgLocals[i].Reg); } } - if (unpref != RBM_NONE) + if (!unpref.IsEmpty()) { #ifdef DEBUG if (VERBOSE) @@ -3287,8 +3390,9 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) } #endif - interval->registerAversion |= unpref; - regMaskTP newPreferences = allRegs(interval->registerType) & ~unpref; + regMaskOnlyOne unprefRegMask = unpref.GetRegMaskForType(interval->registerType); + interval->registerAversion |= unprefRegMask; + regMaskOnlyOne newPreferences = allRegs(interval->registerType) & ~unprefRegMask; interval->updateRegisterPreferences(newPreferences); } } @@ -3308,8 +3412,10 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) // Notes: // The node must not be contained, and must have been processed by buildRefPositionsForNode(). // -RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int multiRegIdx) +RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskOnlyOne candidates, int multiRegIdx) { + assert(compiler->IsOnlyOneRegMask(candidates)); + assert(!operand->isContained()); Interval* interval; bool regOptional = operand->IsRegOptional(); @@ -3378,12 +3484,12 @@ RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int mu // Notes: // This method may only be used if the candidates are the same for all sources. // -int LinearScan::BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates) +int LinearScan::BuildIndirUses(GenTreeIndir* indirTree, regMaskOnlyOne candidates) { return BuildAddrUses(indirTree->Addr(), candidates); } -int LinearScan::BuildAddrUses(GenTree* addr, regMaskTP candidates) +int LinearScan::BuildAddrUses(GenTree* addr, regMaskOnlyOne candidates) { if (!addr->isContained()) { @@ -3440,7 +3546,7 @@ int LinearScan::BuildAddrUses(GenTree* addr, regMaskTP candidates) // Return Value: // The number of source registers used by the *parent* of this node. // -int LinearScan::BuildOperandUses(GenTree* node, regMaskTP candidates) +int LinearScan::BuildOperandUses(GenTree* node, regMaskOnlyOne candidates) { if (!node->isContained()) { @@ -3584,10 +3690,10 @@ void LinearScan::AddDelayFreeUses(RefPosition* useRefPosition, GenTree* rmwNode) // Return Value: // The number of source registers used by the *parent* of this node. // -int LinearScan::BuildDelayFreeUses(GenTree* node, - GenTree* rmwNode, - regMaskTP candidates, - RefPosition** useRefPositionRef) +int LinearScan::BuildDelayFreeUses(GenTree* node, + GenTree* rmwNode, + regMaskOnlyOne candidates, + RefPosition** useRefPositionRef) { RefPosition* use = nullptr; GenTree* addr = nullptr; @@ -3683,7 +3789,7 @@ int LinearScan::BuildDelayFreeUses(GenTree* node, // The operands must already have been processed by buildRefPositionsForNode, and their // RefInfoListNodes placed in the defList. // -int LinearScan::BuildBinaryUses(GenTreeOp* node, regMaskTP candidates) +int LinearScan::BuildBinaryUses(GenTreeOp* node, regMaskOnlyOne candidates) { GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2IfPresent(); @@ -3717,7 +3823,7 @@ int LinearScan::BuildBinaryUses(GenTreeOp* node, regMaskTP candidates) // Return Value: // The number of actual register operands. // -int LinearScan::BuildCastUses(GenTreeCast* cast, regMaskTP candidates) +int LinearScan::BuildCastUses(GenTreeCast* cast, regMaskOnlyOne candidates) { GenTree* src = cast->CastOp(); @@ -3782,8 +3888,8 @@ void LinearScan::BuildStoreLocDef(GenTreeLclVarCommon* storeLoc, } } - regMaskTP defCandidates = RBM_NONE; - var_types type = varDsc->GetRegisterType(); + regMaskOnlyOne defCandidates = RBM_NONE; + var_types type = varDsc->GetRegisterType(); #ifdef TARGET_X86 if (varTypeIsByte(type)) @@ -3868,7 +3974,7 @@ int LinearScan::BuildMultiRegStoreLoc(GenTreeLclVar* storeLoc) if (isMultiRegSrc) { - regMaskTP srcCandidates = RBM_NONE; + regMaskGpr srcCandidates = RBM_NONE; #ifdef TARGET_X86 var_types type = fieldVarDsc->TypeGet(); if (varTypeIsByte(type)) @@ -3978,8 +4084,8 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc) } else { - srcCount = 1; - regMaskTP srcCandidates = RBM_NONE; + srcCount = 1; + regMaskGpr srcCandidates = RBM_NONE; #ifdef TARGET_X86 var_types type = varDsc->GetRegisterType(storeLoc); if (varTypeIsByte(type)) @@ -4070,7 +4176,7 @@ int LinearScan::BuildReturn(GenTree* tree) #endif // !defined(TARGET_64BIT) if ((tree->TypeGet() != TYP_VOID) && !op1->isContained()) { - regMaskTP useCandidates = RBM_NONE; + regMaskOnlyOne useCandidates = RBM_NONE; #if FEATURE_MULTIREG_RET #ifdef TARGET_ARM64 @@ -4086,7 +4192,7 @@ int LinearScan::BuildReturn(GenTree* tree) // op1 has to be either a lclvar or a multi-reg returning call if ((op1->OperGet() == GT_LCL_VAR) && !op1->IsMultiRegLclVar()) { - BuildUse(op1, useCandidates); + BuildUse(op1, RBM_NONE); } else { @@ -4108,19 +4214,19 @@ int LinearScan::BuildReturn(GenTree* tree) if (srcType != dstType) { hasMismatchedRegTypes = true; - regMaskTP dstRegMask = + regMaskOnlyOne dstRegMask = genRegMask(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv)); if (varTypeUsesIntReg(dstType)) { buildInternalIntRegisterDefForNode(tree, dstRegMask); } -#if defined(TARGET_XARCH) && defined(FEATURE_SIMD) +#ifdef FEATURE_MASKED_HW_INTRINSICS else if (varTypeUsesMaskReg(dstType)) { buildInternalMaskRegisterDefForNode(tree, dstRegMask); } -#endif // TARGET_XARCH && FEATURE_SIMD +#endif // FEATURE_MASKED_HW_INTRINSICS else { assert(varTypeUsesFloatReg(dstType)); @@ -4263,11 +4369,11 @@ int LinearScan::BuildPutArgReg(GenTreeUnOp* node) // To avoid redundant moves, have the argument operand computed in the // register in which the argument is passed to the call. - regMaskTP argMask = genRegMask(argReg); - RefPosition* use = BuildUse(op1, argMask); + singleRegMask argMask = genRegMask(argReg); + RefPosition* use = BuildUse(op1, argMask); // Record that this register is occupied by a register now. - placedArgRegs |= argMask; + placedArgRegs |= argReg; if (supportsSpecialPutArg() && isCandidateLocalRef(op1) && ((op1->gtFlags & GTF_VAR_DEATH) == 0)) { @@ -4295,7 +4401,7 @@ int LinearScan::BuildPutArgReg(GenTreeUnOp* node) if (node->TypeGet() == TYP_LONG) { srcCount++; - regMaskTP argMaskHi = genRegMask(REG_NEXT(argReg)); + singleRegMask argMaskHi = genRegMask(REG_NEXT(argReg)); assert(genRegArgNext(argReg) == REG_NEXT(argReg)); use = BuildUse(op1, argMaskHi, 1); BuildDef(node, argMask, 0); @@ -4360,8 +4466,8 @@ int LinearScan::BuildGCWriteBarrier(GenTree* tree) // is an indir through an lea, we need to actually instantiate the // lea in a register assert(!addr->isContained() && !src->isContained()); - regMaskTP addrCandidates = RBM_WRITE_BARRIER_DST; - regMaskTP srcCandidates = RBM_WRITE_BARRIER_SRC; + regMaskGpr addrCandidates = RBM_WRITE_BARRIER_DST; + regMaskGpr srcCandidates = RBM_WRITE_BARRIER_SRC; #if defined(TARGET_X86) && NOGC_WRITE_BARRIERS @@ -4380,7 +4486,7 @@ int LinearScan::BuildGCWriteBarrier(GenTree* tree) BuildUse(addr, addrCandidates); BuildUse(src, srcCandidates); - regMaskTP killMask = getKillSetForStoreInd(tree->AsStoreInd()); + AllRegsMask killMask = getKillSetForStoreInd(tree->AsStoreInd()); buildKillPositionsForNode(tree, currentLoc + 1, killMask); return 2; } @@ -4408,7 +4514,7 @@ int LinearScan::BuildCmp(GenTree* tree) if (!tree->TypeIs(TYP_VOID)) { - regMaskTP dstCandidates = RBM_NONE; + regMaskGpr dstCandidates = RBM_NONE; #ifdef TARGET_X86 // If the compare is used by a jump, we just need to set the condition codes. If not, then we need @@ -4432,10 +4538,10 @@ int LinearScan::BuildCmp(GenTree* tree) // int LinearScan::BuildCmpOperands(GenTree* tree) { - regMaskTP op1Candidates = RBM_NONE; - regMaskTP op2Candidates = RBM_NONE; - GenTree* op1 = tree->gtGetOp1(); - GenTree* op2 = tree->gtGetOp2(); + regMaskGpr op1Candidates = RBM_NONE; + regMaskGpr op2Candidates = RBM_NONE; + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); #ifdef TARGET_X86 bool needByteRegs = false; diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 7fe119ccfd165c..d1aee6faa45183 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -45,10 +45,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX int LinearScan::BuildNode(GenTree* tree) { assert(!tree->isContained()); - int srcCount; - int dstCount = 0; - regMaskTP killMask = RBM_NONE; - bool isLocalDefUse = false; + int srcCount; + int dstCount = 0; + bool isLocalDefUse = false; // Reset the build-related members of LinearScan. clearBuildState(); @@ -138,15 +137,16 @@ int LinearScan::BuildNode(GenTree* tree) // This kills GC refs in callee save regs srcCount = 0; assert(dstCount == 0); - BuildDefsWithKills(tree, 0, RBM_NONE, RBM_NONE); + BuildKills(tree, compiler->AllRegsMask_NONE); break; case GT_PROF_HOOK: + { srcCount = 0; assert(dstCount == 0); - killMask = getKillSetForProfilerHook(); - BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + BuildKills(tree, getKillSetForProfilerHook()); break; + } case GT_CNS_INT: case GT_CNS_LNG: @@ -188,10 +188,11 @@ int LinearScan::BuildNode(GenTree* tree) break; case GT_RETURN: + { srcCount = BuildReturn(tree); - killMask = getKillSetForReturn(); - BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + BuildKills(tree, getKillSetForReturn()); break; + } case GT_RETFILT: assert(dstCount == 0); @@ -295,8 +296,7 @@ int LinearScan::BuildNode(GenTree* tree) RefPosition* internalDef = buildInternalIntRegisterDefForNode(tree); srcCount = BuildOperandUses(tree->gtGetOp1()); buildInternalRegisterUses(); - killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); - BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + BuildKills(tree, compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC)); } break; @@ -767,16 +767,16 @@ bool LinearScan::isRMWRegOper(GenTree* tree) } // Support for building RefPositions for RMW nodes. -int LinearScan::BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, regMaskTP candidates) +int LinearScan::BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, regMaskOnlyOne candidates) { - int srcCount = 0; - regMaskTP op1Candidates = candidates; - regMaskTP op2Candidates = candidates; + int srcCount = 0; + regMaskGpr op1Candidates = candidates; + regMaskGpr op2Candidates = candidates; #ifdef TARGET_X86 if (varTypeIsByte(node)) { - regMaskTP byteCandidates = (candidates == RBM_NONE) ? allByteRegs() : (candidates & allByteRegs()); + regMaskGpr byteCandidates = (candidates == RBM_NONE) ? allByteRegs() : (candidates & allByteRegs()); if (!op1->isContained()) { assert(byteCandidates != RBM_NONE); @@ -1028,11 +1028,11 @@ int LinearScan::BuildShiftRotate(GenTree* tree) // For shift operations, we need that the number // of bits moved gets stored in CL in case // the number of bits to shift is not a constant. - int srcCount = 0; - GenTree* shiftBy = tree->gtGetOp2(); - GenTree* source = tree->gtGetOp1(); - regMaskTP srcCandidates = RBM_NONE; - regMaskTP dstCandidates = RBM_NONE; + int srcCount = 0; + GenTree* shiftBy = tree->gtGetOp2(); + GenTree* source = tree->gtGetOp1(); + regMaskGpr srcCandidates = RBM_NONE; + regMaskGpr dstCandidates = RBM_NONE; // x64 can encode 8 bits of shift and it will use 5 or 6. (the others are masked off) // We will allow whatever can be encoded - hope you know what you are doing. @@ -1046,8 +1046,8 @@ int LinearScan::BuildShiftRotate(GenTree* tree) { // shlx (as opposed to mov+shl) instructions handles all register forms, but it does not handle contained form // for memory operand. Likewise for sarx and shrx. - srcCount += BuildOperandUses(source, srcCandidates); - srcCount += BuildOperandUses(shiftBy, srcCandidates); + srcCount += BuildOperandUses(source); + srcCount += BuildOperandUses(shiftBy); BuildDef(tree, dstCandidates); return srcCount; } @@ -1110,7 +1110,7 @@ int LinearScan::BuildShiftRotate(GenTree* tree) if (!shiftBy->isContained()) { srcCount += BuildDelayFreeUses(shiftBy, source, RBM_RCX); - buildKillPositionsForNode(tree, currentLoc + 1, RBM_RCX); + buildKillPositionsForNode(tree, currentLoc + 1, GprRegsMask(RBM_RCX)); } BuildDef(tree, dstCandidates); } @@ -1119,7 +1119,7 @@ int LinearScan::BuildShiftRotate(GenTree* tree) if (!shiftBy->isContained()) { srcCount += BuildOperandUses(shiftBy, RBM_RCX); - buildKillPositionsForNode(tree, currentLoc + 1, RBM_RCX); + buildKillPositionsForNode(tree, currentLoc + 1, GprRegsMask(RBM_RCX)); } } return srcCount; @@ -1140,7 +1140,6 @@ int LinearScan::BuildCall(GenTreeCall* call) const ReturnTypeDesc* retTypeDesc = nullptr; int srcCount = 0; int dstCount = 0; - regMaskTP dstCandidates = RBM_NONE; assert(!call->isContained()); if (call->TypeGet() != TYP_VOID) @@ -1164,7 +1163,9 @@ int LinearScan::BuildCall(GenTreeCall* call) ctrlExpr = call->gtCallAddr; } - RegisterType registerType = regType(call); + AllRegsMask dstReturnCandidates; + regMaskOnlyOne dstCandidates = RBM_NONE; + RegisterType registerType = regType(call); // Set destination candidates for return value of the call. @@ -1181,8 +1182,8 @@ int LinearScan::BuildCall(GenTreeCall* call) if (hasMultiRegRetVal) { assert(retTypeDesc != nullptr); - dstCandidates = retTypeDesc->GetABIReturnRegs(call->GetUnmanagedCallConv()); - assert((int)genCountBits(dstCandidates) == dstCount); + dstReturnCandidates = retTypeDesc->GetABIReturnRegs(call->GetUnmanagedCallConv()); + assert((int)dstReturnCandidates.Count() == dstCount); } else if (varTypeUsesFloatReg(registerType)) { @@ -1313,7 +1314,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // set reg requirements on call target represented as control sequence. if (ctrlExpr != nullptr) { - regMaskTP ctrlExprCandidates = RBM_NONE; + regMaskGpr ctrlExprCandidates = RBM_NONE; // In case of fast tail implemented as jmp, make sure that gtControlExpr is // computed into appropriate registers. @@ -1363,15 +1364,34 @@ int LinearScan::BuildCall(GenTreeCall* call) buildInternalRegisterUses(); // Now generate defs and kills. - regMaskTP killMask = getKillSetForCall(call); - BuildDefsWithKills(call, dstCount, dstCandidates, killMask); + AllRegsMask killMask = getKillSetForCall(call); #ifdef SWIFT_SUPPORT - if (call->HasSwiftErrorHandling()) + // Tree is a Swift call with error handling; error register should have been killed + assert(!call->HasSwiftErrorHandling() || ((killMask.gprRegs() & RBM_SWIFT_ERROR) != 0)); +#endif // SWIFT_SUPPORT + + if (dstCount > 0) { - // Tree is a Swift call with error handling; error register should have been killed - assert((killMask & RBM_SWIFT_ERROR) != 0); + if (hasMultiRegRetVal) + { + assert(dstReturnCandidates.Count() > 0); + BuildCallDefsWithKills(call, dstCount, dstReturnCandidates, killMask); + } + else + { + assert(dstCount == 1); + BuildDefWithKills(call, dstCount, dstCandidates, killMask); + } + } + else + { + BuildKills(call, killMask); + } +#ifdef SWIFT_SUPPORT + if (call->HasSwiftErrorHandling()) + { // After a Swift call that might throw returns, we expect the error register to be consumed // by a GT_SWIFT_ERROR node. However, we want to ensure the error register won't be trashed // before GT_SWIFT_ERROR can consume it. @@ -1390,7 +1410,7 @@ int LinearScan::BuildCall(GenTreeCall* call) #endif // SWIFT_SUPPORT // No args are placed in registers anymore. - placedArgRegs = RBM_NONE; + placedArgRegs.Clear(); numPlacedArgLocals = 0; return srcCount; } @@ -1412,9 +1432,9 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) GenTree* srcAddrOrFill = nullptr; - regMaskTP dstAddrRegMask = RBM_NONE; - regMaskTP srcRegMask = RBM_NONE; - regMaskTP sizeRegMask = RBM_NONE; + regMaskGpr dstAddrRegMask = RBM_NONE; + regMaskGpr srcRegMask = RBM_NONE; + regMaskGpr sizeRegMask = RBM_NONE; RefPosition* internalIntDef = nullptr; #ifdef TARGET_X86 @@ -1512,7 +1532,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) // or if are but the remainder is a power of 2 and less than the // size of a register - regMaskTP regMask = availableIntRegs; + regMaskGpr regMask = availableIntRegs; #ifdef TARGET_X86 if ((size & 1) != 0) { @@ -1608,6 +1628,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) if (!dstAddr->isContained()) { + assert(compiler->IsGprRegMask(dstAddrRegMask)); + useCount++; BuildUse(dstAddr, dstAddrRegMask); } @@ -1620,6 +1642,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) { if (!srcAddrOrFill->isContained()) { + assert(compiler->IsGprRegMask(srcRegMask)); + useCount++; BuildUse(srcAddrOrFill, srcRegMask); } @@ -1629,6 +1653,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) } } + assert(compiler->IsGprRegMask(sizeRegMask)); + #ifdef TARGET_X86 // If we require a byte register on x86, we may run into an over-constrained situation // if we have BYTE_REG_COUNT or more uses (currently, it can be at most 4, if both the @@ -1650,8 +1676,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) #endif buildInternalRegisterUses(); - regMaskTP killMask = getKillSetForBlockStore(blkNode); - BuildDefsWithKills(blkNode, 0, RBM_NONE, killMask); + BuildKills(blkNode, getKillSetForBlockStore(blkNode)); return useCount; } @@ -1776,7 +1801,7 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk) // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg. if ((loadSize % XMM_REGSIZE_BYTES) != 0) { - regMaskTP regMask = availableIntRegs; + regMaskGpr regMask = availableIntRegs; #ifdef TARGET_X86 // Storing at byte granularity requires a byteable register. if ((loadSize & 1) != 0) @@ -1882,10 +1907,10 @@ int LinearScan::BuildLclHeap(GenTree* tree) // int LinearScan::BuildModDiv(GenTree* tree) { - GenTree* op1 = tree->gtGetOp1(); - GenTree* op2 = tree->gtGetOp2(); - regMaskTP dstCandidates = RBM_NONE; - int srcCount = 0; + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); + regMaskGpr dstCandidates = RBM_NONE; + int srcCount = 0; if (varTypeIsFloating(tree->TypeGet())) { @@ -1938,12 +1963,13 @@ int LinearScan::BuildModDiv(GenTree* tree) srcCount = 1; } - srcCount += BuildDelayFreeUses(op2, op1, availableIntRegs & ~(RBM_RAX | RBM_RDX)); + assert(compiler->IsGprRegMask(dstCandidates)); + srcCount += BuildDelayFreeUses(op2, op1, availableIntRegs & ~(RBM_RAX | RBM_RDX)); buildInternalRegisterUses(); - regMaskTP killMask = getKillSetForModDiv(tree->AsOp()); - BuildDefsWithKills(tree, 1, dstCandidates, killMask); + AllRegsMask killMask(getKillSetForModDiv(tree->AsOp())); + BuildDefWithKills(tree, 1, dstCandidates, killMask); return srcCount; } @@ -2100,7 +2126,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou dstCount = 0; } - regMaskTP dstCandidates = RBM_NONE; + regMaskOnlyOne dstCandidates = RBM_NONE; if (intrinsicTree->GetOperandCount() == 0) { @@ -2690,7 +2716,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (buildUses) { - regMaskTP op1RegCandidates = RBM_NONE; + regMaskFloat op1RegCandidates = RBM_NONE; #if defined(TARGET_AMD64) if (!isEvexCompatible) @@ -2715,7 +2741,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (op2 != nullptr) { - regMaskTP op2RegCandidates = RBM_NONE; + regMaskFloat op2RegCandidates = RBM_NONE; #if defined(TARGET_AMD64) if (!isEvexCompatible) @@ -2761,7 +2787,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (op3 != nullptr) { - regMaskTP op3RegCandidates = RBM_NONE; + regMaskFloat op3RegCandidates = RBM_NONE; #if defined(TARGET_AMD64) if (!isEvexCompatible) @@ -2775,7 +2801,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (op4 != nullptr) { - regMaskTP op4RegCandidates = RBM_NONE; + regMaskFloat op4RegCandidates = RBM_NONE; #if defined(TARGET_AMD64) assert(isEvexCompatible); @@ -2831,7 +2857,7 @@ int LinearScan::BuildCast(GenTreeCast* cast) const var_types srcType = genActualType(src->TypeGet()); const var_types castType = cast->gtCastType; - regMaskTP candidates = RBM_NONE; + regMaskGpr candidates = RBM_NONE; #ifdef TARGET_X86 if (varTypeIsByte(castType)) { @@ -2881,8 +2907,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) } #endif // FEATURE_SIMD - regMaskTP indirCandidates = RBM_NONE; - int srcCount = BuildIndirUses(indirTree, indirCandidates); + int srcCount = BuildIndirUses(indirTree); if (indirTree->gtOper == GT_STOREIND) { GenTree* source = indirTree->gtGetOp2(); @@ -2898,7 +2923,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) } else { - regMaskTP srcCandidates = RBM_NONE; + regMaskGpr srcCandidates = RBM_NONE; #ifdef TARGET_X86 // Determine if we need byte regs for the non-mem source, if any. @@ -2938,6 +2963,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) } #endif // TARGET_X86 + assert(compiler->IsGprRegMask(srcCandidates)); srcCount += BuildBinaryUses(source->AsOp(), srcCandidates); } } @@ -3003,9 +3029,9 @@ int LinearScan::BuildMul(GenTree* tree) return BuildSimple(tree); } - int srcCount = BuildBinaryUses(tree->AsOp()); - int dstCount = 1; - regMaskTP dstCandidates = RBM_NONE; + int srcCount = BuildBinaryUses(tree->AsOp()); + int dstCount = 1; + regMaskGpr dstCandidates = RBM_NONE; bool isUnsignedMultiply = ((tree->gtFlags & GTF_UNSIGNED) != 0); bool requiresOverflowCheck = tree->gtOverflowEx(); @@ -3059,8 +3085,11 @@ int LinearScan::BuildMul(GenTree* tree) { containedMemOp = op2; } - regMaskTP killMask = getKillSetForMul(tree->AsOp()); - BuildDefsWithKills(tree, dstCount, dstCandidates, killMask); + + assert(compiler->IsGprRegMask(dstCandidates)); + + AllRegsMask killMask(getKillSetForMul(tree->AsOp())); + BuildDefWithKills(tree, dstCount, dstCandidates, killMask); return srcCount; } @@ -3103,7 +3132,7 @@ void LinearScan::SetContainsAVXFlags(unsigned sizeOfSIMDVector /* = 0*/) // RBM_NONE if compatible with EVEX (or not a floating/SIMD register), // lowSIMDRegs() (XMM0-XMM16) otherwise. // -inline regMaskTP LinearScan::BuildEvexIncompatibleMask(GenTree* tree) +inline regMaskFloat LinearScan::BuildEvexIncompatibleMask(GenTree* tree) { #if defined(TARGET_AMD64) if (!(varTypeIsFloating(tree->gtType) || varTypeIsSIMD(tree->gtType))) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 8e9b07e8da6ccf..b8be505076d655 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1962,8 +1962,8 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call bool callIsVararg = IsVarArgs(); #ifdef TARGET_ARM - regMaskTP argSkippedRegMask = RBM_NONE; - regMaskTP fltArgSkippedRegMask = RBM_NONE; + regMaskGpr argSkippedRegMask = RBM_NONE; + regMaskFloat fltArgSkippedRegMask = RBM_NONE; #endif // TARGET_ARM #if defined(TARGET_X86) @@ -2539,8 +2539,8 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call (size == 1)) // The size to back-fill is one float register { // Back-fill the register. - isBackFilled = true; - regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask); + isBackFilled = true; + regMaskFloat backFillBitMask = genFindLowestBit(fltArgSkippedRegMask); fltArgSkippedRegMask &= ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask)); diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 2767686ecfd7cb..9ac37de2d8eb3b 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -4160,7 +4160,7 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho hoistCtxt->m_hoistedFPExprCount = 0; } -#ifdef TARGET_XARCH +#ifdef FEATURE_MASKED_HW_INTRINSICS if (!VarSetOps::IsEmpty(this, lvaMaskVars)) { VARSET_TP loopMskVars(VarSetOps::Intersection(this, loopVars, lvaMaskVars)); @@ -4189,7 +4189,9 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho { hoistCtxt->m_loopVarMskCount = 0; hoistCtxt->m_loopVarInOutMskCount = 0; +#ifdef FEATURE_MASKED_HW_INTRINSICS hoistCtxt->m_hoistedMskExprCount = 0; +#endif } #endif // TARGET_XARCH @@ -4313,9 +4315,9 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho optHoistLoopBlocks(loop, &defExec, hoistCtxt); unsigned numHoisted = hoistCtxt->m_hoistedFPExprCount + hoistCtxt->m_hoistedExprCount; -#ifdef TARGET_XARCH +#ifdef FEATURE_MASKED_HW_INTRINSICS numHoisted += hoistCtxt->m_hoistedMskExprCount; -#endif // TARGET_XARCH +#endif // FEATURE_MASKED_HW_INTRINSICS return numHoisted > 0; } @@ -4347,7 +4349,7 @@ bool Compiler::optIsProfitableToHoistTree(GenTree* tree, FlowGraphNaturalLoop* l } #endif } -#ifdef TARGET_XARCH +#ifdef FEATURE_MASKED_HW_INTRINSICS else if (varTypeUsesMaskReg(tree)) { hoistedExprCount = hoistCtxt->m_hoistedMskExprCount; @@ -4360,7 +4362,7 @@ bool Compiler::optIsProfitableToHoistTree(GenTree* tree, FlowGraphNaturalLoop* l availRegCount += CNT_CALLEE_TRASH_MASK - 1; } } -#endif // TARGET_XARCH +#endif // FEATURE_MASKED_HW_INTRINSICS else { assert(varTypeUsesFloatReg(tree)); @@ -5522,7 +5524,7 @@ void Compiler::optComputeInterestingVarSets() #ifndef TARGET_64BIT VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this)); #endif -#ifdef TARGET_XARCH +#ifdef FEATURE_MASKED_HW_INTRINSICS VarSetOps::AssignNoCopy(this, lvaMaskVars, VarSetOps::MakeEmpty(this)); #endif @@ -5541,7 +5543,7 @@ void Compiler::optComputeInterestingVarSets() VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex); } #endif -#ifdef TARGET_XARCH +#ifdef FEATURE_MASKED_HW_INTRINSICS else if (varTypeUsesMaskReg(varDsc->lvType)) { VarSetOps::AddElemD(this, lvaMaskVars, varDsc->lvVarIndex); diff --git a/src/coreclr/jit/regalloc.cpp b/src/coreclr/jit/regalloc.cpp index ba07086a63c212..516f4ea95d5355 100644 --- a/src/coreclr/jit/regalloc.cpp +++ b/src/coreclr/jit/regalloc.cpp @@ -99,8 +99,8 @@ bool Compiler::shouldDoubleAlign( // by linear scan. (It is not shared for System V AMD64 platform.) regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc) { - regNumber inArgReg = argDsc->GetArgReg(); - regMaskTP inArgMask = genRegMask(inArgReg); + regNumber inArgReg = argDsc->GetArgReg(); + singleRegMask inArgMask = genRegMask(inArgReg); if (regState->rsIsFloat) { diff --git a/src/coreclr/jit/register.h b/src/coreclr/jit/register.h index 43454e96ec64e4..e784f18a167fba 100644 --- a/src/coreclr/jit/register.h +++ b/src/coreclr/jit/register.h @@ -16,15 +16,15 @@ #if defined(TARGET_X86) /* -REGDEF(name, rnum, mask, sname) */ -REGDEF(EAX, 0, 0x01, "eax" ) -REGDEF(ECX, 1, 0x02, "ecx" ) -REGDEF(EDX, 2, 0x04, "edx" ) -REGDEF(EBX, 3, 0x08, "ebx" ) -REGDEF(ESP, 4, 0x10, "esp" ) -REGDEF(EBP, 5, 0x20, "ebp" ) -REGDEF(ESI, 6, 0x40, "esi" ) -REGDEF(EDI, 7, 0x80, "edi" ) +REGDEF(name, rnum, mask, sname, regTypeTag) */ +REGDEF(EAX, 0, 0x01, "eax", 0) +REGDEF(ECX, 1, 0x02, "ecx", 0) +REGDEF(EDX, 2, 0x04, "edx", 0) +REGDEF(EBX, 3, 0x08, "ebx", 0) +REGDEF(ESP, 4, 0x10, "esp", 0) +REGDEF(EBP, 5, 0x20, "ebp", 0) +REGDEF(ESI, 6, 0x40, "esi", 0) +REGDEF(EDI, 7, 0x80, "edi", 0) REGALIAS(RAX, EAX) REGALIAS(RCX, ECX) REGALIAS(RDX, EDX) @@ -37,23 +37,23 @@ REGALIAS(RDI, EDI) #else // !defined(TARGET_X86) /* -REGDEF(name, rnum, mask, sname) */ -REGDEF(RAX, 0, 0x0001, "rax" ) -REGDEF(RCX, 1, 0x0002, "rcx" ) -REGDEF(RDX, 2, 0x0004, "rdx" ) -REGDEF(RBX, 3, 0x0008, "rbx" ) -REGDEF(RSP, 4, 0x0010, "rsp" ) -REGDEF(RBP, 5, 0x0020, "rbp" ) -REGDEF(RSI, 6, 0x0040, "rsi" ) -REGDEF(RDI, 7, 0x0080, "rdi" ) -REGDEF(R8, 8, 0x0100, "r8" ) -REGDEF(R9, 9, 0x0200, "r9" ) -REGDEF(R10, 10, 0x0400, "r10" ) -REGDEF(R11, 11, 0x0800, "r11" ) -REGDEF(R12, 12, 0x1000, "r12" ) -REGDEF(R13, 13, 0x2000, "r13" ) -REGDEF(R14, 14, 0x4000, "r14" ) -REGDEF(R15, 15, 0x8000, "r15" ) +REGDEF(name, rnum, mask, sname, regTypeTag) */ +REGDEF(RAX, 0, 0x0001, "rax", 0) +REGDEF(RCX, 1, 0x0002, "rcx", 0) +REGDEF(RDX, 2, 0x0004, "rdx", 0) +REGDEF(RBX, 3, 0x0008, "rbx", 0) +REGDEF(RSP, 4, 0x0010, "rsp", 0) +REGDEF(RBP, 5, 0x0020, "rbp", 0) +REGDEF(RSI, 6, 0x0040, "rsi", 0) +REGDEF(RDI, 7, 0x0080, "rdi", 0) +REGDEF(R8, 8, 0x0100, "r8" , 0) +REGDEF(R9, 9, 0x0200, "r9" , 0) +REGDEF(R10, 10, 0x0400, "r10", 0) +REGDEF(R11, 11, 0x0800, "r11", 0) +REGDEF(R12, 12, 0x1000, "r12", 0) +REGDEF(R13, 13, 0x2000, "r13", 0) +REGDEF(R14, 14, 0x4000, "r14", 0) +REGDEF(R15, 15, 0x8000, "r15", 0) REGALIAS(EAX, RAX) REGALIAS(ECX, RCX) @@ -83,55 +83,55 @@ REGALIAS(EDI, RDI) #endif // !TARGET_AMD64 -REGDEF(XMM0, 0+XMMBASE, XMMMASK(0), "mm0" ) -REGDEF(XMM1, 1+XMMBASE, XMMMASK(1), "mm1" ) -REGDEF(XMM2, 2+XMMBASE, XMMMASK(2), "mm2" ) -REGDEF(XMM3, 3+XMMBASE, XMMMASK(3), "mm3" ) -REGDEF(XMM4, 4+XMMBASE, XMMMASK(4), "mm4" ) -REGDEF(XMM5, 5+XMMBASE, XMMMASK(5), "mm5" ) -REGDEF(XMM6, 6+XMMBASE, XMMMASK(6), "mm6" ) -REGDEF(XMM7, 7+XMMBASE, XMMMASK(7), "mm7" ) +REGDEF(XMM0, 0+XMMBASE, XMMMASK(0), "mm0", 1) +REGDEF(XMM1, 1+XMMBASE, XMMMASK(1), "mm1", 1) +REGDEF(XMM2, 2+XMMBASE, XMMMASK(2), "mm2", 1) +REGDEF(XMM3, 3+XMMBASE, XMMMASK(3), "mm3", 1) +REGDEF(XMM4, 4+XMMBASE, XMMMASK(4), "mm4", 1) +REGDEF(XMM5, 5+XMMBASE, XMMMASK(5), "mm5", 1) +REGDEF(XMM6, 6+XMMBASE, XMMMASK(6), "mm6", 1) +REGDEF(XMM7, 7+XMMBASE, XMMMASK(7), "mm7", 1) #ifdef TARGET_AMD64 -REGDEF(XMM8, 8+XMMBASE, XMMMASK(8), "mm8" ) -REGDEF(XMM9, 9+XMMBASE, XMMMASK(9), "mm9" ) -REGDEF(XMM10, 10+XMMBASE, XMMMASK(10), "mm10" ) -REGDEF(XMM11, 11+XMMBASE, XMMMASK(11), "mm11" ) -REGDEF(XMM12, 12+XMMBASE, XMMMASK(12), "mm12" ) -REGDEF(XMM13, 13+XMMBASE, XMMMASK(13), "mm13" ) -REGDEF(XMM14, 14+XMMBASE, XMMMASK(14), "mm14" ) -REGDEF(XMM15, 15+XMMBASE, XMMMASK(15), "mm15" ) - -REGDEF(XMM16, 16+XMMBASE, XMMMASK(16), "mm16" ) -REGDEF(XMM17, 17+XMMBASE, XMMMASK(17), "mm17" ) -REGDEF(XMM18, 18+XMMBASE, XMMMASK(18), "mm18" ) -REGDEF(XMM19, 19+XMMBASE, XMMMASK(19), "mm19" ) -REGDEF(XMM20, 20+XMMBASE, XMMMASK(20), "mm20" ) -REGDEF(XMM21, 21+XMMBASE, XMMMASK(21), "mm21" ) -REGDEF(XMM22, 22+XMMBASE, XMMMASK(22), "mm22" ) -REGDEF(XMM23, 23+XMMBASE, XMMMASK(23), "mm23" ) - -REGDEF(XMM24, 24+XMMBASE, XMMMASK(24), "mm24" ) -REGDEF(XMM25, 25+XMMBASE, XMMMASK(25), "mm25" ) -REGDEF(XMM26, 26+XMMBASE, XMMMASK(26), "mm26" ) -REGDEF(XMM27, 27+XMMBASE, XMMMASK(27), "mm27" ) -REGDEF(XMM28, 28+XMMBASE, XMMMASK(28), "mm28" ) -REGDEF(XMM29, 29+XMMBASE, XMMMASK(29), "mm29" ) -REGDEF(XMM30, 30+XMMBASE, XMMMASK(30), "mm30" ) -REGDEF(XMM31, 31+XMMBASE, XMMMASK(31), "mm31" ) +REGDEF(XMM8, 8+XMMBASE, XMMMASK(8), "mm8", 1) +REGDEF(XMM9, 9+XMMBASE, XMMMASK(9), "mm9", 1) +REGDEF(XMM10, 10+XMMBASE, XMMMASK(10), "mm10", 1) +REGDEF(XMM11, 11+XMMBASE, XMMMASK(11), "mm11", 1) +REGDEF(XMM12, 12+XMMBASE, XMMMASK(12), "mm12", 1) +REGDEF(XMM13, 13+XMMBASE, XMMMASK(13), "mm13", 1) +REGDEF(XMM14, 14+XMMBASE, XMMMASK(14), "mm14", 1) +REGDEF(XMM15, 15+XMMBASE, XMMMASK(15), "mm15", 1) + +REGDEF(XMM16, 16+XMMBASE, XMMMASK(16), "mm16", 1) +REGDEF(XMM17, 17+XMMBASE, XMMMASK(17), "mm17", 1) +REGDEF(XMM18, 18+XMMBASE, XMMMASK(18), "mm18", 1) +REGDEF(XMM19, 19+XMMBASE, XMMMASK(19), "mm19", 1) +REGDEF(XMM20, 20+XMMBASE, XMMMASK(20), "mm20", 1) +REGDEF(XMM21, 21+XMMBASE, XMMMASK(21), "mm21", 1) +REGDEF(XMM22, 22+XMMBASE, XMMMASK(22), "mm22", 1) +REGDEF(XMM23, 23+XMMBASE, XMMMASK(23), "mm23", 1) + +REGDEF(XMM24, 24+XMMBASE, XMMMASK(24), "mm24", 1) +REGDEF(XMM25, 25+XMMBASE, XMMMASK(25), "mm25", 1) +REGDEF(XMM26, 26+XMMBASE, XMMMASK(26), "mm26", 1) +REGDEF(XMM27, 27+XMMBASE, XMMMASK(27), "mm27", 1) +REGDEF(XMM28, 28+XMMBASE, XMMMASK(28), "mm28", 1) +REGDEF(XMM29, 29+XMMBASE, XMMMASK(29), "mm29", 1) +REGDEF(XMM30, 30+XMMBASE, XMMMASK(30), "mm30", 1) +REGDEF(XMM31, 31+XMMBASE, XMMMASK(31), "mm31", 1) #endif // !TARGET_AMD64 -REGDEF(K0, 0+KBASE, KMASK(0), "k0" ) -REGDEF(K1, 1+KBASE, KMASK(1), "k1" ) -REGDEF(K2, 2+KBASE, KMASK(2), "k2" ) -REGDEF(K3, 3+KBASE, KMASK(3), "k3" ) -REGDEF(K4, 4+KBASE, KMASK(4), "k4" ) -REGDEF(K5, 5+KBASE, KMASK(5), "k5" ) -REGDEF(K6, 6+KBASE, KMASK(6), "k6" ) -REGDEF(K7, 7+KBASE, KMASK(7), "k7" ) +REGDEF(K0, 0+KBASE, KMASK(0), "k0", 2) +REGDEF(K1, 1+KBASE, KMASK(1), "k1", 2) +REGDEF(K2, 2+KBASE, KMASK(2), "k2", 2) +REGDEF(K3, 3+KBASE, KMASK(3), "k3", 2) +REGDEF(K4, 4+KBASE, KMASK(4), "k4", 2) +REGDEF(K5, 5+KBASE, KMASK(5), "k5", 2) +REGDEF(K6, 6+KBASE, KMASK(6), "k6", 2) +REGDEF(K7, 7+KBASE, KMASK(7), "k7", 2) -REGDEF(STK, 8+KBASE, 0x0000, "STK" ) +REGDEF(STK, 8+KBASE, 0x0000, "STK", 3) #elif defined(TARGET_ARM) #include "registerarm.h" diff --git a/src/coreclr/jit/registerargconvention.cpp b/src/coreclr/jit/registerargconvention.cpp index f58388a39672c1..5321483f2739a1 100644 --- a/src/coreclr/jit/registerargconvention.cpp +++ b/src/coreclr/jit/registerargconvention.cpp @@ -28,7 +28,7 @@ unsigned InitVarDscInfo::allocRegArg(var_types type, unsigned numRegs /* = 1 */) // can't create a > 1 register alignment hole to back-fill. // Back-fill the register - regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask); + regMaskFloat backFillBitMask = genFindLowestBit(fltArgSkippedRegMask); fltArgSkippedRegMask &= ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask resultArgNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask)); assert(resultArgNum < MAX_FLOAT_REG_ARG); diff --git a/src/coreclr/jit/registerargconvention.h b/src/coreclr/jit/registerargconvention.h index 840f7adc4fcebd..897e019244dffa 100644 --- a/src/coreclr/jit/registerargconvention.h +++ b/src/coreclr/jit/registerargconvention.h @@ -21,8 +21,8 @@ struct InitVarDscInfo #ifdef TARGET_ARM // Support back-filling of FP parameters. This is similar to code in gtMorphArgs() that // handles arguments. - regMaskTP fltArgSkippedRegMask; - bool anyFloatStackArgs; + regMaskFloat fltArgSkippedRegMask; + bool anyFloatStackArgs; #endif // TARGET_ARM #if defined(TARGET_ARM) || defined(TARGET_RISCV64) diff --git a/src/coreclr/jit/registerarm.h b/src/coreclr/jit/registerarm.h index ad70eaa211ccf6..c8e4e06947d4fc 100644 --- a/src/coreclr/jit/registerarm.h +++ b/src/coreclr/jit/registerarm.h @@ -13,59 +13,59 @@ #endif /* -REGDEF(name, rnum, mask, sname) */ -REGDEF(R0, 0, 0x0001, "r0" ) -REGDEF(R1, 1, 0x0002, "r1" ) -REGDEF(R2, 2, 0x0004, "r2" ) -REGDEF(R3, 3, 0x0008, "r3" ) -REGDEF(R4, 4, 0x0010, "r4" ) -REGDEF(R5, 5, 0x0020, "r5" ) -REGDEF(R6, 6, 0x0040, "r6" ) -REGDEF(R7, 7, 0x0080, "r7" ) -REGDEF(R8, 8, 0x0100, "r8" ) -REGDEF(R9, 9, 0x0200, "r9" ) -REGDEF(R10, 10, 0x0400, "r10" ) -REGDEF(R11, 11, 0x0800, "r11" ) -REGDEF(R12, 12, 0x1000, "r12" ) -REGDEF(SP, 13, 0x2000, "sp" ) -REGDEF(LR, 14, 0x4000, "lr" ) -REGDEF(PC, 15, 0x8000, "pc" ) +REGDEF(name, rnum, mask, sname, regTypeTag) */ +REGDEF(R0, 0, 0x0001, "r0", 0) +REGDEF(R1, 1, 0x0002, "r1", 0) +REGDEF(R2, 2, 0x0004, "r2", 0) +REGDEF(R3, 3, 0x0008, "r3", 0) +REGDEF(R4, 4, 0x0010, "r4", 0) +REGDEF(R5, 5, 0x0020, "r5", 0) +REGDEF(R6, 6, 0x0040, "r6", 0) +REGDEF(R7, 7, 0x0080, "r7", 0) +REGDEF(R8, 8, 0x0100, "r8", 0) +REGDEF(R9, 9, 0x0200, "r9", 0) +REGDEF(R10, 10, 0x0400, "r10",0) +REGDEF(R11, 11, 0x0800, "r11",0) +REGDEF(R12, 12, 0x1000, "r12",0) +REGDEF(SP, 13, 0x2000, "sp", 0) +REGDEF(LR, 14, 0x4000, "lr", 0) +REGDEF(PC, 15, 0x8000, "pc", 0) #define FPBASE 16 #define VFPMASK(x) (((__int64)1) << (x+FPBASE)) -REGDEF(F0, 0+FPBASE, VFPMASK(0), "f0") -REGDEF(F1, 1+FPBASE, VFPMASK(1), "f1") -REGDEF(F2, 2+FPBASE, VFPMASK(2), "f2") -REGDEF(F3, 3+FPBASE, VFPMASK(3), "f3") -REGDEF(F4, 4+FPBASE, VFPMASK(4), "f4") -REGDEF(F5, 5+FPBASE, VFPMASK(5), "f5") -REGDEF(F6, 6+FPBASE, VFPMASK(6), "f6") -REGDEF(F7, 7+FPBASE, VFPMASK(7), "f7") -REGDEF(F8, 8+FPBASE, VFPMASK(8), "f8") -REGDEF(F9, 9+FPBASE, VFPMASK(9), "f9") -REGDEF(F10, 10+FPBASE, VFPMASK(10), "f10") -REGDEF(F11, 11+FPBASE, VFPMASK(11), "f11") -REGDEF(F12, 12+FPBASE, VFPMASK(12), "f12") -REGDEF(F13, 13+FPBASE, VFPMASK(13), "f13") -REGDEF(F14, 14+FPBASE, VFPMASK(14), "f14") -REGDEF(F15, 15+FPBASE, VFPMASK(15), "f15") -REGDEF(F16, 16+FPBASE, VFPMASK(16), "f16") -REGDEF(F17, 17+FPBASE, VFPMASK(17), "f17") -REGDEF(F18, 18+FPBASE, VFPMASK(18), "f18") -REGDEF(F19, 19+FPBASE, VFPMASK(19), "f19") -REGDEF(F20, 20+FPBASE, VFPMASK(20), "f20") -REGDEF(F21, 21+FPBASE, VFPMASK(21), "f21") -REGDEF(F22, 22+FPBASE, VFPMASK(22), "f22") -REGDEF(F23, 23+FPBASE, VFPMASK(23), "f23") -REGDEF(F24, 24+FPBASE, VFPMASK(24), "f24") -REGDEF(F25, 25+FPBASE, VFPMASK(25), "f25") -REGDEF(F26, 26+FPBASE, VFPMASK(26), "f26") -REGDEF(F27, 27+FPBASE, VFPMASK(27), "f27") -REGDEF(F28, 28+FPBASE, VFPMASK(28), "f28") -REGDEF(F29, 29+FPBASE, VFPMASK(29), "f29") -REGDEF(F30, 30+FPBASE, VFPMASK(30), "f30") -REGDEF(F31, 31+FPBASE, VFPMASK(31), "f31") +REGDEF(F0, 0+FPBASE, VFPMASK(0), "f0", 1) +REGDEF(F1, 1+FPBASE, VFPMASK(1), "f1", 1) +REGDEF(F2, 2+FPBASE, VFPMASK(2), "f2", 1) +REGDEF(F3, 3+FPBASE, VFPMASK(3), "f3", 1) +REGDEF(F4, 4+FPBASE, VFPMASK(4), "f4", 1) +REGDEF(F5, 5+FPBASE, VFPMASK(5), "f5", 1) +REGDEF(F6, 6+FPBASE, VFPMASK(6), "f6", 1) +REGDEF(F7, 7+FPBASE, VFPMASK(7), "f7", 1) +REGDEF(F8, 8+FPBASE, VFPMASK(8), "f8", 1) +REGDEF(F9, 9+FPBASE, VFPMASK(9), "f9", 1) +REGDEF(F10, 10+FPBASE, VFPMASK(10), "f10", 1) +REGDEF(F11, 11+FPBASE, VFPMASK(11), "f11", 1) +REGDEF(F12, 12+FPBASE, VFPMASK(12), "f12", 1) +REGDEF(F13, 13+FPBASE, VFPMASK(13), "f13", 1) +REGDEF(F14, 14+FPBASE, VFPMASK(14), "f14", 1) +REGDEF(F15, 15+FPBASE, VFPMASK(15), "f15", 1) +REGDEF(F16, 16+FPBASE, VFPMASK(16), "f16", 1) +REGDEF(F17, 17+FPBASE, VFPMASK(17), "f17", 1) +REGDEF(F18, 18+FPBASE, VFPMASK(18), "f18", 1) +REGDEF(F19, 19+FPBASE, VFPMASK(19), "f19", 1) +REGDEF(F20, 20+FPBASE, VFPMASK(20), "f20", 1) +REGDEF(F21, 21+FPBASE, VFPMASK(21), "f21", 1) +REGDEF(F22, 22+FPBASE, VFPMASK(22), "f22", 1) +REGDEF(F23, 23+FPBASE, VFPMASK(23), "f23", 1) +REGDEF(F24, 24+FPBASE, VFPMASK(24), "f24", 1) +REGDEF(F25, 25+FPBASE, VFPMASK(25), "f25", 1) +REGDEF(F26, 26+FPBASE, VFPMASK(26), "f26", 1) +REGDEF(F27, 27+FPBASE, VFPMASK(27), "f27", 1) +REGDEF(F28, 28+FPBASE, VFPMASK(28), "f28", 1) +REGDEF(F29, 29+FPBASE, VFPMASK(29), "f29", 1) +REGDEF(F30, 30+FPBASE, VFPMASK(30), "f30", 1) +REGDEF(F31, 31+FPBASE, VFPMASK(31), "f31", 1) // Allow us to call R11/FP, SP, LR and PC by their register number names @@ -75,7 +75,7 @@ REGALIAS(R14, LR) REGALIAS(R15, PC) // This must be last! -REGDEF(STK, 32+FPBASE, 0x0000, "STK") +REGDEF(STK, 32+FPBASE, 0x0000, "STK", 2) /*****************************************************************************/ #undef REGDEF diff --git a/src/coreclr/jit/registerarm64.h b/src/coreclr/jit/registerarm64.h index e8c126fac148cb..e754add488404b 100644 --- a/src/coreclr/jit/registerarm64.h +++ b/src/coreclr/jit/registerarm64.h @@ -15,39 +15,39 @@ #define RMASK(x) (1ULL << (x)) /* -REGDEF(name, rnum, mask, xname, wname) */ -REGDEF(R0, 0, 0x0001, "x0" , "w0" ) -REGDEF(R1, 1, 0x0002, "x1" , "w1" ) -REGDEF(R2, 2, 0x0004, "x2" , "w2" ) -REGDEF(R3, 3, 0x0008, "x3" , "w3" ) -REGDEF(R4, 4, 0x0010, "x4" , "w4" ) -REGDEF(R5, 5, 0x0020, "x5" , "w5" ) -REGDEF(R6, 6, 0x0040, "x6" , "w6" ) -REGDEF(R7, 7, 0x0080, "x7" , "w7" ) -REGDEF(R8, 8, 0x0100, "x8" , "w8" ) -REGDEF(R9, 9, 0x0200, "x9" , "w9" ) -REGDEF(R10, 10, 0x0400, "x10", "w10" ) -REGDEF(R11, 11, 0x0800, "x11", "w11" ) -REGDEF(R12, 12, 0x1000, "x12", "w12" ) -REGDEF(R13, 13, 0x2000, "x13", "w13" ) -REGDEF(R14, 14, 0x4000, "x14", "w14" ) -REGDEF(R15, 15, 0x8000, "x15", "w15" ) -REGDEF(IP0, 16, 0x10000, "xip0","wip0" ) -REGDEF(IP1, 17, 0x20000, "xip1","wip1" ) -REGDEF(PR, 18, 0x40000, "xpr", "wpr" ) -REGDEF(R19, 19, 0x80000, "x19", "w19" ) -REGDEF(R20, 20, 0x100000, "x20", "w20" ) -REGDEF(R21, 21, 0x200000, "x21", "w21" ) -REGDEF(R22, 22, 0x400000, "x22", "w22" ) -REGDEF(R23, 23, 0x800000, "x23", "w23" ) -REGDEF(R24, 24, 0x1000000, "x24", "w24" ) -REGDEF(R25, 25, 0x2000000, "x25", "w25" ) -REGDEF(R26, 26, 0x4000000, "x26", "w26" ) -REGDEF(R27, 27, 0x8000000, "x27", "w27" ) -REGDEF(R28, 28, 0x10000000, "x28", "w28" ) -REGDEF(FP, 29, 0x20000000, "fp" , "w29" ) -REGDEF(LR, 30, 0x40000000, "lr" , "w30" ) -REGDEF(ZR, 31, 0x80000000, "xzr", "wzr" ) +REGDEF(name, rnum, mask, xname, wname, regTypeTag) */ +REGDEF(R0, 0, 0x0001, "x0" , "w0", 0) +REGDEF(R1, 1, 0x0002, "x1" , "w1", 0) +REGDEF(R2, 2, 0x0004, "x2" , "w2", 0) +REGDEF(R3, 3, 0x0008, "x3" , "w3", 0) +REGDEF(R4, 4, 0x0010, "x4" , "w4", 0) +REGDEF(R5, 5, 0x0020, "x5" , "w5", 0) +REGDEF(R6, 6, 0x0040, "x6" , "w6", 0) +REGDEF(R7, 7, 0x0080, "x7" , "w7", 0) +REGDEF(R8, 8, 0x0100, "x8" , "w8", 0) +REGDEF(R9, 9, 0x0200, "x9" , "w9", 0) +REGDEF(R10, 10, 0x0400, "x10", "w10", 0) +REGDEF(R11, 11, 0x0800, "x11", "w11", 0) +REGDEF(R12, 12, 0x1000, "x12", "w12", 0) +REGDEF(R13, 13, 0x2000, "x13", "w13", 0) +REGDEF(R14, 14, 0x4000, "x14", "w14", 0) +REGDEF(R15, 15, 0x8000, "x15", "w15", 0) +REGDEF(IP0, 16, 0x10000, "xip0","wip0",0) +REGDEF(IP1, 17, 0x20000, "xip1","wip1",0) +REGDEF(PR, 18, 0x40000, "xpr", "wpr", 0) +REGDEF(R19, 19, 0x80000, "x19", "w19", 0) +REGDEF(R20, 20, 0x100000, "x20", "w20", 0) +REGDEF(R21, 21, 0x200000, "x21", "w21", 0) +REGDEF(R22, 22, 0x400000, "x22", "w22", 0) +REGDEF(R23, 23, 0x800000, "x23", "w23", 0) +REGDEF(R24, 24, 0x1000000, "x24", "w24", 0) +REGDEF(R25, 25, 0x2000000, "x25", "w25", 0) +REGDEF(R26, 26, 0x4000000, "x26", "w26", 0) +REGDEF(R27, 27, 0x8000000, "x27", "w27", 0) +REGDEF(R28, 28, 0x10000000, "x28", "w28", 0) +REGDEF(FP, 29, 0x20000000, "fp" , "w29", 0) +REGDEF(LR, 30, 0x40000000, "lr" , "w30", 0) +REGDEF(ZR, 31, 0x80000000, "xzr", "wzr", 0) // Allow us to call IP0,IP1,PR,FP,LR by their register number names REGALIAS(R16, IP0) @@ -60,66 +60,69 @@ REGALIAS(R30, LR) #define VMASK(x) (1ULL << (VBASE+(x))) /* -REGDEF(name, rnum, mask, xname, wname) */ -REGDEF(V0, 0+VBASE, VMASK(0), "d0", "s0") -REGDEF(V1, 1+VBASE, VMASK(1), "d1", "s1") -REGDEF(V2, 2+VBASE, VMASK(2), "d2", "s2") -REGDEF(V3, 3+VBASE, VMASK(3), "d3", "s3") -REGDEF(V4, 4+VBASE, VMASK(4), "d4", "s4") -REGDEF(V5, 5+VBASE, VMASK(5), "d5", "s5") -REGDEF(V6, 6+VBASE, VMASK(6), "d6", "s6") -REGDEF(V7, 7+VBASE, VMASK(7), "d7", "s7") -REGDEF(V8, 8+VBASE, VMASK(8), "d8", "s8") -REGDEF(V9, 9+VBASE, VMASK(9), "d9", "s9") -REGDEF(V10, 10+VBASE, VMASK(10), "d10", "s10") -REGDEF(V11, 11+VBASE, VMASK(11), "d11", "s11") -REGDEF(V12, 12+VBASE, VMASK(12), "d12", "s12") -REGDEF(V13, 13+VBASE, VMASK(13), "d13", "s13") -REGDEF(V14, 14+VBASE, VMASK(14), "d14", "s14") -REGDEF(V15, 15+VBASE, VMASK(15), "d15", "s15") -REGDEF(V16, 16+VBASE, VMASK(16), "d16", "s16") -REGDEF(V17, 17+VBASE, VMASK(17), "d17", "s17") -REGDEF(V18, 18+VBASE, VMASK(18), "d18", "s18") -REGDEF(V19, 19+VBASE, VMASK(19), "d19", "s19") -REGDEF(V20, 20+VBASE, VMASK(20), "d20", "s20") -REGDEF(V21, 21+VBASE, VMASK(21), "d21", "s21") -REGDEF(V22, 22+VBASE, VMASK(22), "d22", "s22") -REGDEF(V23, 23+VBASE, VMASK(23), "d23", "s23") -REGDEF(V24, 24+VBASE, VMASK(24), "d24", "s24") -REGDEF(V25, 25+VBASE, VMASK(25), "d25", "s25") -REGDEF(V26, 26+VBASE, VMASK(26), "d26", "s26") -REGDEF(V27, 27+VBASE, VMASK(27), "d27", "s27") -REGDEF(V28, 28+VBASE, VMASK(28), "d28", "s28") -REGDEF(V29, 29+VBASE, VMASK(29), "d29", "s29") -REGDEF(V30, 30+VBASE, VMASK(30), "d30", "s30") -REGDEF(V31, 31+VBASE, VMASK(31), "d31", "s31") +REGDEF(name, rnum, mask, xname, wname, regTypeTag) */ +REGDEF(V0, 0+VBASE, VMASK(0), "d0", "s0", 1) +REGDEF(V1, 1+VBASE, VMASK(1), "d1", "s1", 1) +REGDEF(V2, 2+VBASE, VMASK(2), "d2", "s2", 1) +REGDEF(V3, 3+VBASE, VMASK(3), "d3", "s3", 1) +REGDEF(V4, 4+VBASE, VMASK(4), "d4", "s4", 1) +REGDEF(V5, 5+VBASE, VMASK(5), "d5", "s5", 1) +REGDEF(V6, 6+VBASE, VMASK(6), "d6", "s6", 1) +REGDEF(V7, 7+VBASE, VMASK(7), "d7", "s7", 1) +REGDEF(V8, 8+VBASE, VMASK(8), "d8", "s8", 1) +REGDEF(V9, 9+VBASE, VMASK(9), "d9", "s9", 1) +REGDEF(V10, 10+VBASE, VMASK(10), "d10", "s10", 1) +REGDEF(V11, 11+VBASE, VMASK(11), "d11", "s11", 1) +REGDEF(V12, 12+VBASE, VMASK(12), "d12", "s12", 1) +REGDEF(V13, 13+VBASE, VMASK(13), "d13", "s13", 1) +REGDEF(V14, 14+VBASE, VMASK(14), "d14", "s14", 1) +REGDEF(V15, 15+VBASE, VMASK(15), "d15", "s15", 1) +REGDEF(V16, 16+VBASE, VMASK(16), "d16", "s16", 1) +REGDEF(V17, 17+VBASE, VMASK(17), "d17", "s17", 1) +REGDEF(V18, 18+VBASE, VMASK(18), "d18", "s18", 1) +REGDEF(V19, 19+VBASE, VMASK(19), "d19", "s19", 1) +REGDEF(V20, 20+VBASE, VMASK(20), "d20", "s20", 1) +REGDEF(V21, 21+VBASE, VMASK(21), "d21", "s21", 1) +REGDEF(V22, 22+VBASE, VMASK(22), "d22", "s22", 1) +REGDEF(V23, 23+VBASE, VMASK(23), "d23", "s23", 1) +REGDEF(V24, 24+VBASE, VMASK(24), "d24", "s24", 1) +REGDEF(V25, 25+VBASE, VMASK(25), "d25", "s25", 1) +REGDEF(V26, 26+VBASE, VMASK(26), "d26", "s26", 1) +REGDEF(V27, 27+VBASE, VMASK(27), "d27", "s27", 1) +REGDEF(V28, 28+VBASE, VMASK(28), "d28", "s28", 1) +REGDEF(V29, 29+VBASE, VMASK(29), "d29", "s29", 1) +REGDEF(V30, 30+VBASE, VMASK(30), "d30", "s30", 1) +REGDEF(V31, 31+VBASE, VMASK(31), "d31", "s31", 1) -// TODO-SVE: Fix once we add predicate registers -REGALIAS(P0, V0) -REGALIAS(P1, V1) -REGALIAS(P2, V2) -REGALIAS(P3, V3) -REGALIAS(P4, V4) -REGALIAS(P5, V5) -REGALIAS(P6, V6) -REGALIAS(P7, V7) -REGALIAS(P8, V8) -REGALIAS(P9, V9) -REGALIAS(P10, V10) -REGALIAS(P11, V11) -REGALIAS(P12, V12) -REGALIAS(P13, V13) -REGALIAS(P14, V14) -REGALIAS(P15, V15) +#define PBASE 64 +#define PMASK(x) (1ULL << x) +/* +REGDEF(name, rnum, mask, xname, wname, regTypeTag) */ +REGDEF(P0, 0+PBASE, PMASK(0), "p0" , "na", 2) +REGDEF(P1, 1+PBASE, PMASK(1), "p1" , "na", 2) +REGDEF(P2, 2+PBASE, PMASK(2), "p2" , "na", 2) +REGDEF(P3, 3+PBASE, PMASK(3), "p3" , "na", 2) +REGDEF(P4, 4+PBASE, PMASK(4), "p4" , "na", 2) +REGDEF(P5, 5+PBASE, PMASK(5), "p5" , "na", 2) +REGDEF(P6, 6+PBASE, PMASK(6), "p6" , "na", 2) +REGDEF(P7, 7+PBASE, PMASK(7), "p7" , "na", 2) +REGDEF(P8, 8+PBASE, PMASK(8), "p8" , "na", 2) +REGDEF(P9, 9+PBASE, PMASK(9), "p9" , "na", 2) +REGDEF(P10, 10+PBASE, PMASK(10), "p10", "na", 2) +REGDEF(P11, 11+PBASE, PMASK(11), "p11", "na", 2) +REGDEF(P12, 12+PBASE, PMASK(12), "p12", "na", 2) +REGDEF(P13, 13+PBASE, PMASK(13), "p13", "na", 2) +REGDEF(P14, 14+PBASE, PMASK(14), "p14", "na", 2) +REGDEF(P15, 15+PBASE, PMASK(15), "p15", "na", 2) -// The registers with values 64 (NBASE) and above are not real register numbers -#define NBASE 64 +// The registers with values 80 (NBASE) and above are not real register numbers +#define NBASE 80 -REGDEF(SP, 0+NBASE, 0x0000, "sp", "wsp?") +REGDEF(SP, 0+NBASE, 0x0000, "sp", "wsp?", 3) // This must be last! -REGDEF(STK, 1+NBASE, 0x0000, "STK", "STK") +REGDEF(STK, 1+NBASE, 0x0000, "STK", "STK", 3) /*****************************************************************************/ #undef RMASK diff --git a/src/coreclr/jit/registerloongarch64.h b/src/coreclr/jit/registerloongarch64.h index 8f3cd157016bb2..288bb020a93ad0 100644 --- a/src/coreclr/jit/registerloongarch64.h +++ b/src/coreclr/jit/registerloongarch64.h @@ -15,39 +15,39 @@ #define RMASK(x) (1ULL << (x)) /* -REGDEF(name, rnum, mask, sname) */ -REGDEF(R0, 0, 0x0001, "zero") -REGDEF(RA, 1, 0x0002, "ra" ) -REGDEF(TP, 2, 0x0004, "tp" ) -REGDEF(SP, 3, 0x0008, "sp" ) -REGDEF(A0, 4, 0x0010, "a0" ) -REGDEF(A1, 5, 0x0020, "a1" ) -REGDEF(A2, 6, 0x0040, "a2" ) -REGDEF(A3, 7, 0x0080, "a3" ) -REGDEF(A4, 8, 0x0100, "a4" ) -REGDEF(A5, 9, 0x0200, "a5" ) -REGDEF(A6, 10, 0x0400, "a6" ) -REGDEF(A7, 11, 0x0800, "a7" ) -REGDEF(T0, 12, 0x1000, "t0" ) -REGDEF(T1, 13, 0x2000, "t1" ) -REGDEF(T2, 14, 0x4000, "t2" ) -REGDEF(T3, 15, 0x8000, "t3" ) -REGDEF(T4, 16, 0x10000, "t4" ) -REGDEF(T5, 17, 0x20000, "t5" ) -REGDEF(T6, 18, 0x40000, "t6" ) -REGDEF(T7, 19, 0x80000, "t7" ) -REGDEF(T8, 20, 0x100000, "t8" ) -REGDEF(X0, 21, 0x200000, "x0" ) -REGDEF(FP, 22, 0x400000, "fp" ) -REGDEF(S0, 23, 0x800000, "s0" ) -REGDEF(S1, 24, 0x1000000, "s1" ) -REGDEF(S2, 25, 0x2000000, "s2" ) -REGDEF(S3, 26, 0x4000000, "s3" ) -REGDEF(S4, 27, 0x8000000, "s4" ) -REGDEF(S5, 28, 0x10000000, "s5" ) -REGDEF(S6, 29, 0x20000000, "s6" ) -REGDEF(S7, 30, 0x40000000, "s7" ) -REGDEF(S8, 31, 0x80000000, "s8" ) +REGDEF(name, rnum, mask, sname, regTypeTag) */ +REGDEF(R0, 0, 0x0001, "zero", 0) +REGDEF(RA, 1, 0x0002, "ra" , 0) +REGDEF(TP, 2, 0x0004, "tp" , 0) +REGDEF(SP, 3, 0x0008, "sp" , 0) +REGDEF(A0, 4, 0x0010, "a0" , 0) +REGDEF(A1, 5, 0x0020, "a1" , 0) +REGDEF(A2, 6, 0x0040, "a2" , 0) +REGDEF(A3, 7, 0x0080, "a3" , 0) +REGDEF(A4, 8, 0x0100, "a4" , 0) +REGDEF(A5, 9, 0x0200, "a5" , 0) +REGDEF(A6, 10, 0x0400, "a6" , 0) +REGDEF(A7, 11, 0x0800, "a7" , 0) +REGDEF(T0, 12, 0x1000, "t0" , 0) +REGDEF(T1, 13, 0x2000, "t1" , 0) +REGDEF(T2, 14, 0x4000, "t2" , 0) +REGDEF(T3, 15, 0x8000, "t3" , 0) +REGDEF(T4, 16, 0x10000, "t4" , 0) +REGDEF(T5, 17, 0x20000, "t5" , 0) +REGDEF(T6, 18, 0x40000, "t6" , 0) +REGDEF(T7, 19, 0x80000, "t7" , 0) +REGDEF(T8, 20, 0x100000, "t8" , 0) +REGDEF(X0, 21, 0x200000, "x0" , 0) +REGDEF(FP, 22, 0x400000, "fp" , 0) +REGDEF(S0, 23, 0x800000, "s0" , 0) +REGDEF(S1, 24, 0x1000000, "s1" , 0) +REGDEF(S2, 25, 0x2000000, "s2" , 0) +REGDEF(S3, 26, 0x4000000, "s3" , 0) +REGDEF(S4, 27, 0x8000000, "s4" , 0) +REGDEF(S5, 28, 0x10000000, "s5" , 0) +REGDEF(S6, 29, 0x20000000, "s6" , 0) +REGDEF(S7, 30, 0x40000000, "s7" , 0) +REGDEF(S8, 31, 0x80000000, "s8" , 0) //NOTE for LoongArch64: // The `REG_R21` which alias `REG_X0` is specially reserved !!! @@ -64,44 +64,44 @@ REGALIAS(R21, X0) /* REGDEF(name, rnum, mask, sname) */ -REGDEF(F0, 0+FBASE, FMASK(0), "f0") -REGDEF(F1, 1+FBASE, FMASK(1), "f1") -REGDEF(F2, 2+FBASE, FMASK(2), "f2") -REGDEF(F3, 3+FBASE, FMASK(3), "f3") -REGDEF(F4, 4+FBASE, FMASK(4), "f4") -REGDEF(F5, 5+FBASE, FMASK(5), "f5") -REGDEF(F6, 6+FBASE, FMASK(6), "f6") -REGDEF(F7, 7+FBASE, FMASK(7), "f7") -REGDEF(F8, 8+FBASE, FMASK(8), "f8") -REGDEF(F9, 9+FBASE, FMASK(9), "f9") -REGDEF(F10, 10+FBASE, FMASK(10), "f10") -REGDEF(F11, 11+FBASE, FMASK(11), "f11") -REGDEF(F12, 12+FBASE, FMASK(12), "f12") -REGDEF(F13, 13+FBASE, FMASK(13), "f13") -REGDEF(F14, 14+FBASE, FMASK(14), "f14") -REGDEF(F15, 15+FBASE, FMASK(15), "f15") -REGDEF(F16, 16+FBASE, FMASK(16), "f16") -REGDEF(F17, 17+FBASE, FMASK(17), "f17") -REGDEF(F18, 18+FBASE, FMASK(18), "f18") -REGDEF(F19, 19+FBASE, FMASK(19), "f19") -REGDEF(F20, 20+FBASE, FMASK(20), "f20") -REGDEF(F21, 21+FBASE, FMASK(21), "f21") -REGDEF(F22, 22+FBASE, FMASK(22), "f22") -REGDEF(F23, 23+FBASE, FMASK(23), "f23") -REGDEF(F24, 24+FBASE, FMASK(24), "f24") -REGDEF(F25, 25+FBASE, FMASK(25), "f25") -REGDEF(F26, 26+FBASE, FMASK(26), "f26") -REGDEF(F27, 27+FBASE, FMASK(27), "f27") -REGDEF(F28, 28+FBASE, FMASK(28), "f28") -REGDEF(F29, 29+FBASE, FMASK(29), "f29") -REGDEF(F30, 30+FBASE, FMASK(30), "f30") -REGDEF(F31, 31+FBASE, FMASK(31), "f31") +REGDEF(F0, 0+FBASE, FMASK(0), "f0", 1) +REGDEF(F1, 1+FBASE, FMASK(1), "f1", 1) +REGDEF(F2, 2+FBASE, FMASK(2), "f2", 1) +REGDEF(F3, 3+FBASE, FMASK(3), "f3", 1) +REGDEF(F4, 4+FBASE, FMASK(4), "f4", 1) +REGDEF(F5, 5+FBASE, FMASK(5), "f5", 1) +REGDEF(F6, 6+FBASE, FMASK(6), "f6", 1) +REGDEF(F7, 7+FBASE, FMASK(7), "f7", 1) +REGDEF(F8, 8+FBASE, FMASK(8), "f8", 1) +REGDEF(F9, 9+FBASE, FMASK(9), "f9", 1) +REGDEF(F10, 10+FBASE, FMASK(10), "f10", 1) +REGDEF(F11, 11+FBASE, FMASK(11), "f11", 1) +REGDEF(F12, 12+FBASE, FMASK(12), "f12", 1) +REGDEF(F13, 13+FBASE, FMASK(13), "f13", 1) +REGDEF(F14, 14+FBASE, FMASK(14), "f14", 1) +REGDEF(F15, 15+FBASE, FMASK(15), "f15", 1) +REGDEF(F16, 16+FBASE, FMASK(16), "f16", 1) +REGDEF(F17, 17+FBASE, FMASK(17), "f17", 1) +REGDEF(F18, 18+FBASE, FMASK(18), "f18", 1) +REGDEF(F19, 19+FBASE, FMASK(19), "f19", 1) +REGDEF(F20, 20+FBASE, FMASK(20), "f20", 1) +REGDEF(F21, 21+FBASE, FMASK(21), "f21", 1) +REGDEF(F22, 22+FBASE, FMASK(22), "f22", 1) +REGDEF(F23, 23+FBASE, FMASK(23), "f23", 1) +REGDEF(F24, 24+FBASE, FMASK(24), "f24", 1) +REGDEF(F25, 25+FBASE, FMASK(25), "f25", 1) +REGDEF(F26, 26+FBASE, FMASK(26), "f26", 1) +REGDEF(F27, 27+FBASE, FMASK(27), "f27", 1) +REGDEF(F28, 28+FBASE, FMASK(28), "f28", 1) +REGDEF(F29, 29+FBASE, FMASK(29), "f29", 1) +REGDEF(F30, 30+FBASE, FMASK(30), "f30", 1) +REGDEF(F31, 31+FBASE, FMASK(31), "f31", 1) // The registers with values 64 (NBASE) and above are not real register numbers #define NBASE 64 // This must be last! -REGDEF(STK, 0+NBASE, 0x0000, "STK") +REGDEF(STK, 0+NBASE, 0x0000, "STK", 2) /*****************************************************************************/ #undef RMASK diff --git a/src/coreclr/jit/registerriscv64.h b/src/coreclr/jit/registerriscv64.h index fe6d3cf8ece424..2522fd3f5f9e91 100644 --- a/src/coreclr/jit/registerriscv64.h +++ b/src/coreclr/jit/registerriscv64.h @@ -15,39 +15,39 @@ #define RMASK(x) (1ULL << (x)) /* -REGDEF(name, rnum, mask, sname) */ -REGDEF(R0, 0, 0x0001, "zero") -REGDEF(RA, 1, 0x0002, "ra" ) -REGDEF(SP, 2, 0x0004, "sp" ) -REGDEF(GP, 3, 0x0008, "gp" ) -REGDEF(TP, 4, 0x0010, "tp" ) -REGDEF(T0, 5, 0x0020, "t0" ) -REGDEF(T1, 6, 0x0040, "t1" ) -REGDEF(T2, 7, 0x0080, "t2" ) -REGDEF(FP, 8, 0x0100, "fp" ) -REGDEF(S1, 9, 0x0200, "s1" ) -REGDEF(A0, 10, 0x0400, "a0" ) -REGDEF(A1, 11, 0x0800, "a1" ) -REGDEF(A2, 12, 0x1000, "a2" ) -REGDEF(A3, 13, 0x2000, "a3" ) -REGDEF(A4, 14, 0x4000, "a4" ) -REGDEF(A5, 15, 0x8000, "a5" ) -REGDEF(A6, 16, 0x10000, "a6" ) -REGDEF(A7, 17, 0x20000, "a7" ) -REGDEF(S2, 18, 0x40000, "s2" ) -REGDEF(S3, 19, 0x80000, "s3" ) -REGDEF(S4, 20, 0x100000, "s4" ) -REGDEF(S5, 21, 0x200000, "s5" ) -REGDEF(S6, 22, 0x400000, "s6" ) -REGDEF(S7, 23, 0x800000, "s7" ) -REGDEF(S8, 24, 0x1000000, "s8" ) -REGDEF(S9, 25, 0x2000000, "s9" ) -REGDEF(S10, 26, 0x4000000, "s10" ) -REGDEF(S11, 27, 0x8000000, "s11" ) -REGDEF(T3, 28, 0x10000000, "t3" ) -REGDEF(T4, 29, 0x20000000, "t4" ) -REGDEF(T5, 30, 0x40000000, "t5" ) -REGDEF(T6, 31, 0x80000000, "t6" ) +REGDEF(name, rnum, mask, sname, regTypeTag) */ +REGDEF(R0, 0, 0x0001, "zero", 0) +REGDEF(RA, 1, 0x0002, "ra" , 0) +REGDEF(SP, 2, 0x0004, "sp" , 0) +REGDEF(GP, 3, 0x0008, "gp" , 0) +REGDEF(TP, 4, 0x0010, "tp" , 0) +REGDEF(T0, 5, 0x0020, "t0" , 0) +REGDEF(T1, 6, 0x0040, "t1" , 0) +REGDEF(T2, 7, 0x0080, "t2", 0) +REGDEF(FP, 8, 0x0100, "fp", 0) +REGDEF(S1, 9, 0x0200, "s1", 0) +REGDEF(A0, 10, 0x0400, "a0", 0) +REGDEF(A1, 11, 0x0800, "a1", 0) +REGDEF(A2, 12, 0x1000, "a2", 0) +REGDEF(A3, 13, 0x2000, "a3", 0) +REGDEF(A4, 14, 0x4000, "a4", 0) +REGDEF(A5, 15, 0x8000, "a5", 0) +REGDEF(A6, 16, 0x10000, "a6", 0) +REGDEF(A7, 17, 0x20000, "a7", 0) +REGDEF(S2, 18, 0x40000, "s2", 0) +REGDEF(S3, 19, 0x80000, "s3", 0) +REGDEF(S4, 20, 0x100000, "s4", 0) +REGDEF(S5, 21, 0x200000, "s5", 0) +REGDEF(S6, 22, 0x400000, "s6", 0) +REGDEF(S7, 23, 0x800000, "s7", 0) +REGDEF(S8, 24, 0x1000000, "s8", 0) +REGDEF(S9, 25, 0x2000000, "s9", 0) +REGDEF(S10, 26, 0x4000000, "s10", 0) +REGDEF(S11, 27, 0x8000000, "s11", 0) +REGDEF(T3, 28, 0x10000000, "t3", 0) +REGDEF(T4, 29, 0x20000000, "t4", 0) +REGDEF(T5, 30, 0x40000000, "t5", 0) +REGDEF(T6, 31, 0x80000000, "t6", 0) REGALIAS(R8, FP) REGALIAS(ZERO, R0) @@ -57,43 +57,43 @@ REGALIAS(ZERO, R0) /* REGDEF(name, rnum, mask, sname) */ -REGDEF(F0, 0+FBASE, FMASK(0), "f0") -REGDEF(F1, 1+FBASE, FMASK(1), "f1") -REGDEF(F2, 2+FBASE, FMASK(2), "f2") -REGDEF(F3, 3+FBASE, FMASK(3), "f3") -REGDEF(F4, 4+FBASE, FMASK(4), "f4") -REGDEF(F5, 5+FBASE, FMASK(5), "f5") -REGDEF(F6, 6+FBASE, FMASK(6), "f6") -REGDEF(F7, 7+FBASE, FMASK(7), "f7") -REGDEF(F8, 8+FBASE, FMASK(8), "f8") -REGDEF(F9, 9+FBASE, FMASK(9), "f9") -REGDEF(F10, 10+FBASE, FMASK(10), "f10") -REGDEF(F11, 11+FBASE, FMASK(11), "f11") -REGDEF(F12, 12+FBASE, FMASK(12), "f12") -REGDEF(F13, 13+FBASE, FMASK(13), "f13") -REGDEF(F14, 14+FBASE, FMASK(14), "f14") -REGDEF(F15, 15+FBASE, FMASK(15), "f15") -REGDEF(F16, 16+FBASE, FMASK(16), "f16") -REGDEF(F17, 17+FBASE, FMASK(17), "f17") -REGDEF(F18, 18+FBASE, FMASK(18), "f18") -REGDEF(F19, 19+FBASE, FMASK(19), "f19") -REGDEF(F20, 20+FBASE, FMASK(20), "f20") -REGDEF(F21, 21+FBASE, FMASK(21), "f21") -REGDEF(F22, 22+FBASE, FMASK(22), "f22") -REGDEF(F23, 23+FBASE, FMASK(23), "f23") -REGDEF(F24, 24+FBASE, FMASK(24), "f24") -REGDEF(F25, 25+FBASE, FMASK(25), "f25") -REGDEF(F26, 26+FBASE, FMASK(26), "f26") -REGDEF(F27, 27+FBASE, FMASK(27), "f27") -REGDEF(F28, 28+FBASE, FMASK(28), "f28") -REGDEF(F29, 29+FBASE, FMASK(29), "f29") -REGDEF(F30, 30+FBASE, FMASK(30), "f30") -REGDEF(F31, 31+FBASE, FMASK(31), "f31") +REGDEF(F0, 0+FBASE, FMASK(0), "f0", 1) +REGDEF(F1, 1+FBASE, FMASK(1), "f1", 1) +REGDEF(F2, 2+FBASE, FMASK(2), "f2", 1) +REGDEF(F3, 3+FBASE, FMASK(3), "f3", 1) +REGDEF(F4, 4+FBASE, FMASK(4), "f4", 1) +REGDEF(F5, 5+FBASE, FMASK(5), "f5", 1) +REGDEF(F6, 6+FBASE, FMASK(6), "f6", 1) +REGDEF(F7, 7+FBASE, FMASK(7), "f7", 1) +REGDEF(F8, 8+FBASE, FMASK(8), "f8", 1) +REGDEF(F9, 9+FBASE, FMASK(9), "f9", 1) +REGDEF(F10, 10+FBASE, FMASK(10), "f10", 1) +REGDEF(F11, 11+FBASE, FMASK(11), "f11", 1) +REGDEF(F12, 12+FBASE, FMASK(12), "f12", 1) +REGDEF(F13, 13+FBASE, FMASK(13), "f13", 1) +REGDEF(F14, 14+FBASE, FMASK(14), "f14", 1) +REGDEF(F15, 15+FBASE, FMASK(15), "f15", 1) +REGDEF(F16, 16+FBASE, FMASK(16), "f16", 1) +REGDEF(F17, 17+FBASE, FMASK(17), "f17", 1) +REGDEF(F18, 18+FBASE, FMASK(18), "f18", 1) +REGDEF(F19, 19+FBASE, FMASK(19), "f19", 1) +REGDEF(F20, 20+FBASE, FMASK(20), "f20", 1) +REGDEF(F21, 21+FBASE, FMASK(21), "f21", 1) +REGDEF(F22, 22+FBASE, FMASK(22), "f22", 1) +REGDEF(F23, 23+FBASE, FMASK(23), "f23", 1) +REGDEF(F24, 24+FBASE, FMASK(24), "f24", 1) +REGDEF(F25, 25+FBASE, FMASK(25), "f25", 1) +REGDEF(F26, 26+FBASE, FMASK(26), "f26", 1) +REGDEF(F27, 27+FBASE, FMASK(27), "f27", 1) +REGDEF(F28, 28+FBASE, FMASK(28), "f28", 1) +REGDEF(F29, 29+FBASE, FMASK(29), "f29", 1) +REGDEF(F30, 30+FBASE, FMASK(30), "f30", 1) +REGDEF(F31, 31+FBASE, FMASK(31), "f31", 1) // The registers with values 64 (NBASE) and above are not real register numbers #define NBASE 64 -REGDEF(STK, 0+NBASE, 0x0000, "STK") +REGDEF(STK, 0+NBASE, 0x0000, "STK", 2) /*****************************************************************************/ #undef RMASK diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp index 12975850a404ba..09b892377ae61f 100644 --- a/src/coreclr/jit/regset.cpp +++ b/src/coreclr/jit/regset.cpp @@ -25,12 +25,12 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #if defined(TARGET_ARM64) const regMaskSmall regMasks[] = { -#define REGDEF(name, rnum, mask, xname, wname) mask, +#define REGDEF(name, rnum, mask, xname, wname, regTypeTag) mask, #include "register.h" }; #else // !TARGET_ARM64 const regMaskSmall regMasks[] = { -#define REGDEF(name, rnum, mask, sname) mask, +#define REGDEF(name, rnum, mask, sname, regTypeTag) mask, #include "register.h" }; #endif @@ -45,7 +45,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ //------------------------------------------------------------------------ -// verifyRegUsed: verify that the register is marked as used. +// verifyGprRegUsed: verify that the GPR register is marked as used. // // Arguments: // reg - The register to verify. @@ -61,12 +61,33 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // This method is intended to be called during code generation, and // should simply validate that the register (or registers) have // already been added to the modified set. +// +void RegSet::verifyGprRegUsed(regNumber reg) +{ + assert(emitter::isGeneralRegister(reg)); + rsSetGprRegsModified(genRegMask(reg)); +} +//------------------------------------------------------------------------ +// verifyRegUsed: verify that the mask register is marked as used. +// +// Arguments: +// reg - The register to verify. +// +// Return Value: +// None. +// +// Assumptions: +// The caller must have ensured that the register is already marked +// as used. +// +// Notes: +// This method is intended to be called during code generation, and +// should simply validate that the register (or registers) have +// already been added to the modified set. void RegSet::verifyRegUsed(regNumber reg) { - // TODO-Cleanup: we need to identify the places where the register - // is not marked as used when this is called. - rsSetRegsModified(genRegMask(reg)); + rsSetRegModified(reg); } //------------------------------------------------------------------------ @@ -87,21 +108,27 @@ void RegSet::verifyRegUsed(regNumber reg) // should simply validate that the register (or registers) have // already been added to the modified set. -void RegSet::verifyRegistersUsed(regMaskTP regMask) +void RegSet::verifyRegistersUsed(CONSTREF_AllRegsMask regs) { if (m_rsCompiler->opts.OptimizationDisabled()) { return; } - if (regMask == RBM_NONE) + if (regs.IsEmpty()) { return; } - // TODO-Cleanup: we need to identify the places where the registers - // are not marked as used when this is called. - rsSetRegsModified(regMask); + // TODO-Cleanup: + // We need to identify the places where the register + // is not marked as used when this is called. + // + // See https://github.com/dotnet/runtime/issues/10411 and + // https://github.com/dotnet/coreclr/pull/18230 on why we call + // rsSetGprRegsModified() instead of assert(rsRegsModified()) + + rsSetRegsModified(regs); } void RegSet::rsClearRegsModified() @@ -116,23 +143,24 @@ void RegSet::rsClearRegsModified() rsModifiedRegsMaskInitialized = true; #endif // DEBUG - rsModifiedRegsMask = RBM_NONE; + rsModifiedRegsMask.Clear(); #ifdef SWIFT_SUPPORT // If this method has a SwiftError* parameter, we will return SwiftError::Value in REG_SWIFT_ERROR, // so don't treat it as callee-save. if (m_rsCompiler->lvaSwiftErrorArg != BAD_VAR_NUM) { - rsAllCalleeSavedMask &= ~RBM_SWIFT_ERROR; rsIntCalleeSavedMask &= ~RBM_SWIFT_ERROR; } #endif // SWIFT_SUPPORT } -void RegSet::rsSetRegsModified(regMaskTP mask DEBUGARG(bool suppressDump)) +#ifdef DEBUG +void RegSet::printModifiedRegsMask(regMaskOnlyOne currentMask, + regMaskOnlyOne modifiedMask DEBUGARG(bool suppressDump) + DEBUGARG(regMaskOnlyOne calleeSaveMask)) const { - assert(mask != RBM_NONE); - assert(rsModifiedRegsMaskInitialized); + regMaskOnlyOne newMask = (currentMask | modifiedMask); // We can't update the modified registers set after final frame layout (that is, during code // generation and after). Ignore prolog and epilog generation: they call register tracking to @@ -141,90 +169,181 @@ void RegSet::rsSetRegsModified(regMaskTP mask DEBUGARG(bool suppressDump)) // Frame layout is only affected by callee-saved registers, so only ensure that callee-saved // registers aren't modified after final frame layout. assert((m_rsCompiler->lvaDoneFrameLayout < Compiler::FINAL_FRAME_LAYOUT) || m_rsCompiler->compGeneratingProlog || - m_rsCompiler->compGeneratingEpilog || - (((rsModifiedRegsMask | mask) & RBM_CALLEE_SAVED) == (rsModifiedRegsMask & RBM_CALLEE_SAVED))); + m_rsCompiler->compGeneratingEpilog || ((newMask & calleeSaveMask) == (currentMask & calleeSaveMask))); -#ifdef DEBUG if (m_rsCompiler->verbose && !suppressDump) { - if (rsModifiedRegsMask != (rsModifiedRegsMask | mask)) + if (currentMask != newMask) { printf("Marking regs modified: "); - dspRegMask(mask); + dspRegMask(modifiedMask); printf(" ("); - dspRegMask(rsModifiedRegsMask); + dspRegMask(currentMask); printf(" => "); - dspRegMask(rsModifiedRegsMask | mask); + dspRegMask(newMask); printf(")\n"); } } +} +#endif + +AllRegsMask RegSet::rsGetModifiedCalleeSavedRegsMask() const +{ + assert(rsModifiedRegsMaskInitialized); + AllRegsMask allCalleeSavedMask = m_rsCompiler->AllRegsMask_CALLEE_SAVED; +#ifdef SWIFT_SUPPORT + // If this method has a SwiftError* parameter, we will return SwiftError::Value in REG_SWIFT_ERROR, + // so don't treat it as callee-save. + if (m_rsCompiler->lvaSwiftErrorArg != BAD_VAR_NUM) + { + allCalleeSavedMask.RemoveRegNum(REG_SWIFT_ERROR, TYP_INT); + } +#endif // SWIFT_SUPPORT + return (rsModifiedRegsMask & allCalleeSavedMask); +} + +void RegSet::rsSetGprRegsModified(regMaskGpr mask DEBUGARG(bool suppressDump)) +{ + assert(m_rsCompiler->IsGprRegMask(mask)); + assert(rsModifiedRegsMaskInitialized); +#ifdef DEBUG + printModifiedRegsMask(rsModifiedRegsMask.gprRegs(), mask DEBUG_ARG(suppressDump) DEBUG_ARG(RBM_INT_CALLEE_SAVED)); +#endif + + rsModifiedRegsMask.AddGprRegMask(mask); +} + +void RegSet::rsSetFloatRegsModified(regMaskFloat mask DEBUGARG(bool suppressDump)) +{ + assert(m_rsCompiler->IsFloatRegMask(mask)); + assert(rsModifiedRegsMaskInitialized); +#ifdef DEBUG + printModifiedRegsMask(rsModifiedRegsMask.floatRegs(m_rsCompiler), + mask DEBUG_ARG(suppressDump) DEBUG_ARG(RBM_FLT_CALLEE_SAVED)); +#endif + + rsModifiedRegsMask.AddFloatRegMask(mask); +} + +void RegSet::rsSetRegModified(regNumber reg DEBUGARG(bool suppressDump)) +{ + assert(rsModifiedRegsMaskInitialized); + +#ifdef DEBUG + if (genIsValidIntReg(reg)) + { + printModifiedRegsMask(rsModifiedRegsMask.gprRegs(), + genRegMask(reg) DEBUG_ARG(suppressDump) DEBUG_ARG(RBM_INT_CALLEE_SAVED)); + } + else if (genIsValidFloatReg(reg)) + { + printModifiedRegsMask(rsModifiedRegsMask.floatRegs(m_rsCompiler), + genRegMask(reg) DEBUG_ARG(suppressDump) DEBUG_ARG(RBM_FLT_CALLEE_SAVED)); + } +#ifdef FEATURE_MASKED_HW_INTRINSICS + else + { + assert(genIsValidMaskReg(reg)); + printModifiedRegsMask(rsModifiedRegsMask.predicateRegs(m_rsCompiler), + genRegMask(reg) DEBUG_ARG(suppressDump) DEBUG_ARG(RBM_MSK_CALLEE_SAVED)); + } +#endif // FEATURE_MASKED_HW_INTRINSICS #endif // DEBUG - rsModifiedRegsMask |= mask; + rsModifiedRegsMask.AddRegNumInMask(reg); } -void RegSet::rsRemoveRegsModified(regMaskTP mask) +void RegSet::rsSetRegsModified(CONSTREF_AllRegsMask modifiedMask DEBUGARG(bool suppressDump)) +{ + // TODO: Commented this, so that caller don't have to check if modifiedMask is not RBM_NONE + // It doesn't harm if this was RBM_NONE, as it will not modify the trackingMask + // assert(modifiedMask != RBM_NONE); + assert(rsModifiedRegsMaskInitialized); + +#ifdef DEBUG + printModifiedRegsMask(rsModifiedRegsMask.gprRegs(), + modifiedMask.gprRegs() DEBUG_ARG(suppressDump) DEBUG_ARG(RBM_INT_CALLEE_SAVED)); + printModifiedRegsMask(rsModifiedRegsMask.floatRegs(m_rsCompiler), + modifiedMask.floatRegs(m_rsCompiler) DEBUG_ARG(suppressDump) DEBUG_ARG(RBM_FLT_CALLEE_SAVED)); +#ifdef FEATURE_MASKED_HW_INTRINSICS + printModifiedRegsMask(rsModifiedRegsMask.predicateRegs(m_rsCompiler), modifiedMask.predicateRegs(m_rsCompiler) + DEBUG_ARG(suppressDump) + DEBUG_ARG(RBM_MSK_CALLEE_SAVED)); +#endif // FEATURE_MASKED_HW_INTRINSICS +#endif // DEBUG + + rsModifiedRegsMask |= modifiedMask; +} + +void RegSet::rsRemoveRegsModified(regMaskGpr mask) { assert(mask != RBM_NONE); assert(rsModifiedRegsMaskInitialized); + assert(m_rsCompiler->IsGprRegMask(mask)); +#ifdef DEBUG + regMaskGpr rsModifiedGprRegsMask = rsGetModifiedGprRegsMask(); // See comment in rsSetRegsModified(). assert((m_rsCompiler->lvaDoneFrameLayout < Compiler::FINAL_FRAME_LAYOUT) || m_rsCompiler->compGeneratingProlog || m_rsCompiler->compGeneratingEpilog || - (((rsModifiedRegsMask & ~mask) & RBM_CALLEE_SAVED) == (rsModifiedRegsMask & RBM_CALLEE_SAVED))); + (((rsModifiedGprRegsMask & ~mask) & RBM_CALLEE_SAVED) == (rsModifiedGprRegsMask & RBM_CALLEE_SAVED))); -#ifdef DEBUG if (m_rsCompiler->verbose) { printf("Removing modified regs: "); - dspRegMask(mask); - if (rsModifiedRegsMask == (rsModifiedRegsMask & ~mask)) + dspRegMask(mask, RBM_NONE); + if (rsModifiedGprRegsMask == (rsModifiedGprRegsMask & ~mask)) { printf(" (unchanged)"); } else { printf(" ("); - dspRegMask(rsModifiedRegsMask); + dspRegMask(rsModifiedGprRegsMask, RBM_NONE); printf(" => "); - dspRegMask(rsModifiedRegsMask & ~mask); + dspRegMask(rsModifiedGprRegsMask & ~mask, RBM_NONE); printf(")"); } printf("\n"); } #endif // DEBUG - rsModifiedRegsMask &= ~mask; + rsModifiedRegsMask.RemoveRegTypeFromMask(mask, TYP_INT); } -void RegSet::SetMaskVars(regMaskTP newMaskVars) +void RegSet::ClearMaskVars() // Like SetMaskVars(AllRegsMask), but without any debug output. +{ + _rsAllMaskVars.Clear(); +} + +void RegSet::SetMaskVars(CONSTREF_AllRegsMask newMaskVars) { #ifdef DEBUG if (m_rsCompiler->verbose) { printf("\t\t\t\t\t\t\tLive regs: "); - if (_rsMaskVars == newMaskVars) + if (_rsAllMaskVars == newMaskVars) { printf("(unchanged) "); } else { - printRegMask(_rsMaskVars); - m_rsCompiler->GetEmitter()->emitDispRegSet(_rsMaskVars); + printRegMask(_rsAllMaskVars); + m_rsCompiler->GetEmitter()->emitDispRegSet(_rsAllMaskVars); // deadSet = old - new - regMaskTP deadSet = _rsMaskVars & ~newMaskVars; + AllRegsMask deadSet = _rsAllMaskVars & ~newMaskVars; // bornSet = new - old - regMaskTP bornSet = newMaskVars & ~_rsMaskVars; + AllRegsMask bornSet = newMaskVars & ~_rsAllMaskVars; - if (deadSet != RBM_NONE) + if (!deadSet.IsEmpty()) { printf(" -"); m_rsCompiler->GetEmitter()->emitDispRegSet(deadSet); } - if (bornSet != RBM_NONE) + if (!bornSet.IsEmpty()) { printf(" +"); m_rsCompiler->GetEmitter()->emitDispRegSet(bornSet); @@ -238,7 +357,7 @@ void RegSet::SetMaskVars(regMaskTP newMaskVars) } #endif // DEBUG - _rsMaskVars = newMaskVars; + _rsAllMaskVars = newMaskVars; } /*****************************************************************************/ @@ -259,19 +378,23 @@ RegSet::RegSet(Compiler* compiler, GCInfo& gcInfo) rsMaskResvd = RBM_NONE; -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) + rsGprMaskCalleeSaved = RBM_NONE; + rsFloatMaskCalleeSaved = RBM_NONE; +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) rsMaskCalleeSaved = RBM_NONE; -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 + +#ifdef FEATURE_MASKED_HW_INTRINSICS + rsPredicateMaskCalleeSaved = RBM_NONE; +#endif // FEATURE_MASKED_HW_INTRINSICS #ifdef TARGET_ARM rsMaskPreSpillRegArg = RBM_NONE; rsMaskPreSpillAlign = RBM_NONE; #endif -#ifdef SWIFT_SUPPORT - rsAllCalleeSavedMask = RBM_CALLEE_SAVED; rsIntCalleeSavedMask = RBM_INT_CALLEE_SAVED; -#endif // SWIFT_SUPPORT #ifdef DEBUG rsModifiedRegsMaskInitialized = false; @@ -347,18 +470,12 @@ void RegSet::rsSpillTree(regNumber reg, GenTree* tree, unsigned regIdx /* =0 */) treeType = tree->TypeGet(); } - var_types tempType = RegSet::tmpNormalizeType(treeType); - regMaskTP mask; + var_types tempType = RegSet::tmpNormalizeType(treeType); bool floatSpill = false; if (isFloatRegType(treeType)) { floatSpill = true; - mask = genRegMaskFloat(reg ARM_ARG(treeType)); - } - else - { - mask = genRegMask(reg); } rsNeededSpillReg = true; @@ -952,15 +1069,15 @@ regNumber genRegArgNext(regNumber argReg) * register numbers and corresponding bitmaps. */ -const regNumber raRegCalleeSaveOrder[] = {REG_CALLEE_SAVED_ORDER}; -const regMaskTP raRbmCalleeSaveOrder[] = {RBM_CALLEE_SAVED_ORDER}; +const regNumber raRegCalleeSaveOrder[] = {REG_CALLEE_SAVED_ORDER}; +const regMaskGpr raRbmCalleeSaveOrder[] = {RBM_CALLEE_SAVED_ORDER}; regMaskSmall genRegMaskFromCalleeSavedMask(unsigned short calleeSaveMask) { regMaskSmall res = 0; for (int i = 0; i < CNT_CALLEE_SAVED; i++) { - if ((calleeSaveMask & ((regMaskTP)1 << i)) != 0) + if ((calleeSaveMask & ((regMaskOnlyOne)1 << i)) != 0) { res |= raRbmCalleeSaveOrder[i]; } diff --git a/src/coreclr/jit/regset.h b/src/coreclr/jit/regset.h index dae93baebad306..2709e3898ecaaa 100644 --- a/src/coreclr/jit/regset.h +++ b/src/coreclr/jit/regset.h @@ -43,7 +43,7 @@ class RegSet RegSet(Compiler* compiler, GCInfo& gcInfo); #ifdef TARGET_ARM - regMaskTP rsMaskPreSpillRegs(bool includeAlignment) const + regMaskGpr rsMaskPreSpillRegs(bool includeAlignment) const { return includeAlignment ? (rsMaskPreSpillRegArg | rsMaskPreSpillAlign) : rsMaskPreSpillRegArg; } @@ -67,111 +67,141 @@ class RegSet // private: - bool rsNeededSpillReg; // true if this method needed to spill any registers - regMaskTP rsModifiedRegsMask; // mask of the registers modified by the current function. + bool rsNeededSpillReg; // true if this method needed to spill any registers + AllRegsMask rsModifiedRegsMask; // mask of the registers modified by the current function. #ifdef DEBUG bool rsModifiedRegsMaskInitialized; // Has rsModifiedRegsMask been initialized? Guards against illegal use. -#endif // DEBUG + void printModifiedRegsMask(regMaskOnlyOne currentMask, + regMaskOnlyOne modifiedMask DEBUGARG(bool suppressDump = false) + DEBUGARG(regMaskOnlyOne calleeSaveMask = RBM_NONE)) const; +#endif // DEBUG -#ifdef SWIFT_SUPPORT - regMaskTP rsAllCalleeSavedMask; - regMaskTP rsIntCalleeSavedMask; -#else // !SWIFT_SUPPORT - static constexpr regMaskTP rsAllCalleeSavedMask = RBM_CALLEE_SAVED; - static constexpr regMaskTP rsIntCalleeSavedMask = RBM_INT_CALLEE_SAVED; -#endif // !SWIFT_SUPPORT + regMaskGpr rsIntCalleeSavedMask; public: - regMaskTP rsGetModifiedRegsMask() const + void rsSetRegsModified(CONSTREF_AllRegsMask modifiedMask DEBUGARG(bool suppressDump = false)); + void rsSetRegModified(regNumber reg DEBUGARG(bool suppressDump = false)); + + CONSTREF_AllRegsMask rsGetModifiedRegsMask() const { assert(rsModifiedRegsMaskInitialized); return rsModifiedRegsMask; } - regMaskTP rsGetModifiedCalleeSavedRegsMask() const + AllRegsMask rsGetModifiedCalleeSavedRegsMask() const; + + regMaskGpr rsGetModifiedIntCalleeSavedRegsMask() const { assert(rsModifiedRegsMaskInitialized); - return (rsModifiedRegsMask & rsAllCalleeSavedMask); + return (rsGetModifiedGprRegsMask() & rsIntCalleeSavedMask); } - regMaskTP rsGetModifiedIntCalleeSavedRegsMask() const + regMaskGpr rsGetModifiedGprRegsMask() const { assert(rsModifiedRegsMaskInitialized); - return (rsModifiedRegsMask & rsIntCalleeSavedMask); + return rsModifiedRegsMask.gprRegs(); } -#ifdef TARGET_AMD64 - regMaskTP rsGetModifiedOsrIntCalleeSavedRegsMask() const + regMaskFloat rsGetModifiedFloatRegsMask() const { assert(rsModifiedRegsMaskInitialized); - return (rsModifiedRegsMask & (rsIntCalleeSavedMask | RBM_EBP)); + return rsModifiedRegsMask.floatRegs(m_rsCompiler); } -#endif // TARGET_AMD64 - regMaskTP rsGetModifiedFltCalleeSavedRegsMask() const +#ifdef FEATURE_MASKED_HW_INTRINSICS + regMaskPredicate rsGetModifiedPredicateRegsMask() const { assert(rsModifiedRegsMaskInitialized); - return (rsModifiedRegsMask & RBM_FLT_CALLEE_SAVED); + return rsModifiedRegsMask.predicateRegs(m_rsCompiler); + } +#endif // FEATURE_MASKED_HW_INTRINSICS + + regMaskGpr rsGetModifiedRegsMask(var_types type) const + { + return rsModifiedRegsMask.GetRegMaskForType(type); } void rsClearRegsModified(); + void rsSetGprRegsModified(regMaskGpr mask DEBUGARG(bool suppressDump = false)); + void rsSetFloatRegsModified(regMaskFloat mask DEBUGARG(bool suppressDump = false)); - void rsSetRegsModified(regMaskTP mask DEBUGARG(bool suppressDump = false)); + void rsRemoveRegsModified(regMaskGpr mask); - void rsRemoveRegsModified(regMaskTP mask); + bool rsRegsModified(regMaskGpr mask) const + { + assert(rsModifiedRegsMaskInitialized); + return (rsModifiedRegsMask.gprRegs() & mask) != 0; + } - bool rsRegsModified(regMaskTP mask) const + bool rsRegsModified(CONSTREF_AllRegsMask mask) const { assert(rsModifiedRegsMaskInitialized); - return (rsModifiedRegsMask & mask) != 0; + return !((rsModifiedRegsMask & mask).IsEmpty()); } void verifyRegUsed(regNumber reg); + void verifyGprRegUsed(regNumber reg); - void verifyRegistersUsed(regMaskTP regMask); + void verifyRegistersUsed(CONSTREF_AllRegsMask mask); public: - regMaskTP GetMaskVars() const // 'get' property function for rsMaskVars property + regMaskOnlyOne GetMaskVars(var_types type) const // 'get' property function for rsMaskVars property { - return _rsMaskVars; + return _rsAllMaskVars.GetRegMaskForType(type); } - void SetMaskVars(regMaskTP newMaskVars); // 'put' property function for rsMaskVars property - - void AddMaskVars(regMaskTP addMaskVars) // union 'addMaskVars' with the rsMaskVars set + regMaskGpr GetGprMaskVars() const // 'get' property function for rsMaskVars property { - SetMaskVars(_rsMaskVars | addMaskVars); + return _rsAllMaskVars.gprRegs(); } - void RemoveMaskVars(regMaskTP removeMaskVars) // remove 'removeMaskVars' from the rsMaskVars set (like bitset DiffD) + void SetMaskVars(CONSTREF_AllRegsMask newMaskVars); // 'put' property function for rsMaskVars property + + void AddMaskVars(var_types type, regMaskOnlyOne addMaskVars) // union 'addMaskVars' with the rsMaskVars set { - SetMaskVars(_rsMaskVars & ~removeMaskVars); + AllRegsMask newMask = _rsAllMaskVars; + newMask.AddRegMaskForType(addMaskVars, type); + SetMaskVars(newMask); } - void ClearMaskVars() // Like SetMaskVars(RBM_NONE), but without any debug output. + // remove 'removeMaskVars' from the rsMaskVars set (like bitset DiffD) + void RemoveMaskVars(var_types type, regMaskOnlyOne removeMaskVars) { - _rsMaskVars = RBM_NONE; + // TODO: Skip assigning to newMask, just update _rsAllMaskVars directly. The only thing remaining + // would be to print the change if (newMask != _rsAllMaskVars). + AllRegsMask newMask = _rsAllMaskVars; + newMask.RemoveRegTypeFromMask(removeMaskVars, type); + SetMaskVars(newMask); } + void ClearMaskVars(); // Like SetMaskVars(RBM_NONE), but without any debug output. + private: - regMaskTP _rsMaskVars; // backing store for rsMaskVars property + AllRegsMask _rsAllMaskVars; // backing store for rsMaskVars property + +#if defined(TARGET_ARMARCH) + regMaskGpr rsGprMaskCalleeSaved; + regMaskFloat rsFloatMaskCalleeSaved; +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + regMaskMixed rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_LOONGARCH64 -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 +#ifdef FEATURE_MASKED_HW_INTRINSICS + regMaskPredicate rsPredicateMaskCalleeSaved; +#endif -public: // TODO-Cleanup: Should be private, but Compiler uses it - regMaskTP rsMaskResvd; // mask of the registers that are reserved for special purposes (typically empty) +public: // TODO-Cleanup: Should be private, but Compiler uses it + regMaskGpr rsMaskResvd; // mask of the registers that are reserved for special purposes (typically empty) public: // The PreSpill masks are used in LclVars.cpp #ifdef TARGET_ARM - regMaskTP rsMaskPreSpillAlign; // Mask of alignment padding added to prespill to keep double aligned args - // at aligned stack addresses. - regMaskTP rsMaskPreSpillRegArg; // mask of incoming registers that are spilled at the start of the prolog - // This includes registers used to pass a struct (or part of a struct) - // and all enregistered user arguments in a varargs call -#endif // TARGET_ARM + regMaskGpr rsMaskPreSpillAlign; // Mask of alignment padding added to prespill to keep double aligned args + // at aligned stack addresses. + regMaskGpr rsMaskPreSpillRegArg; // mask of incoming registers that are spilled at the start of the prolog + // This includes registers used to pass a struct (or part of a struct) + // and all enregistered user arguments in a varargs call +#endif // TARGET_ARM private: //------------------------------------------------------------------------- diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 06777fa9d5f709..a8773eb818c80e 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -109,8 +109,8 @@ inline bool compUnixX86Abi() #if defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) enum _regNumber_enum : unsigned { -#define REGDEF(name, rnum, mask, sname) REG_##name = rnum, -#define REGALIAS(alias, realname) REG_##alias = REG_##realname, +#define REGDEF(name, rnum, mask, sname, regTypeTag) REG_##name = rnum, +#define REGALIAS(alias, realname) REG_##alias = REG_##realname, #include "register.h" REG_COUNT, @@ -121,8 +121,8 @@ enum _regNumber_enum : unsigned enum _regMask_enum : unsigned __int64 { RBM_NONE = 0, -#define REGDEF(name, rnum, mask, sname) RBM_##name = mask, -#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, +#define REGDEF(name, rnum, mask, sname, regTypeTag) RBM_##name = mask, +#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, #include "register.h" }; @@ -130,8 +130,8 @@ enum _regMask_enum : unsigned __int64 enum _regNumber_enum : unsigned { -#define REGDEF(name, rnum, mask, xname, wname) REG_##name = rnum, -#define REGALIAS(alias, realname) REG_##alias = REG_##realname, +#define REGDEF(name, rnum, mask, xname, wname, regTypeTag) REG_##name = rnum, +#define REGALIAS(alias, realname) REG_##alias = REG_##realname, #include "register.h" REG_COUNT, @@ -142,8 +142,8 @@ enum _regNumber_enum : unsigned enum _regMask_enum : unsigned __int64 { RBM_NONE = 0, -#define REGDEF(name, rnum, mask, xname, wname) RBM_##name = mask, -#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, +#define REGDEF(name, rnum, mask, xname, wname, regTypeTag) RBM_##name = mask, +#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, #include "register.h" }; @@ -151,8 +151,8 @@ enum _regMask_enum : unsigned __int64 enum _regNumber_enum : unsigned { -#define REGDEF(name, rnum, mask, sname) REG_##name = rnum, -#define REGALIAS(alias, realname) REG_##alias = REG_##realname, +#define REGDEF(name, rnum, mask, sname, regTypeTag) REG_##name = rnum, +#define REGALIAS(alias, realname) REG_##alias = REG_##realname, #include "register.h" REG_COUNT, @@ -164,8 +164,8 @@ enum _regMask_enum : uint64_t { RBM_NONE = 0, -#define REGDEF(name, rnum, mask, sname) RBM_##name = mask, -#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, +#define REGDEF(name, rnum, mask, sname, regTypeTag) RBM_##name = mask, +#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, #include "register.h" }; @@ -173,8 +173,8 @@ enum _regMask_enum : uint64_t enum _regNumber_enum : unsigned { -#define REGDEF(name, rnum, mask, sname) REG_##name = rnum, -#define REGALIAS(alias, realname) REG_##alias = REG_##realname, +#define REGDEF(name, rnum, mask, sname, regTypeTag) REG_##name = rnum, +#define REGALIAS(alias, realname) REG_##alias = REG_##realname, #include "register.h" REG_COUNT, @@ -186,8 +186,8 @@ enum _regMask_enum : unsigned { RBM_NONE = 0, -#define REGDEF(name, rnum, mask, sname) RBM_##name = mask, -#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, +#define REGDEF(name, rnum, mask, sname, regTypeTag) RBM_##name = mask, +#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, #include "register.h" }; @@ -197,6 +197,13 @@ enum _regMask_enum : unsigned #define AVAILABLE_REG_COUNT get_AVAILABLE_REG_COUNT() +#ifdef TARGET_ARM64 +#define HAS_MORE_THAN_64_REGISTERS 1 +#define MORE_THAN_64_REG_ARG(x) , x +#else +#define MORE_THAN_64_REG_ARG(x) +#endif + /*****************************************************************************/ // TODO-Cleanup: The types defined below are mildly confusing: why are there both? @@ -208,21 +215,186 @@ enum _regMask_enum : unsigned // In any case, we believe that is OK to freely cast between these types; no information will // be lost. +typedef unsigned __int32 RegBitSet32; + #if defined(TARGET_AMD64) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) typedef unsigned __int64 regMaskTP; +typedef unsigned __int64 regMaskGpr; +typedef unsigned __int64 regMaskFloat; +typedef unsigned __int64 regMaskPredicate; +typedef unsigned __int64 RegBitSet64; + +// Represents that the mask in this type is from one of the register type - gpr/float/predicate +// but not more than 1. +typedef unsigned __int64 regMaskOnlyOne; +typedef unsigned __int64 singleRegMask; + #else +// x86 and arm typedef unsigned regMaskTP; +typedef unsigned RegBitSet64; + +#define regMaskGpr regMaskTP +#define regMaskFloat regMaskTP +#define regMaskPredicate regMaskTP +#define regMaskOnlyOne regMaskTP +#define regMaskMixed regMaskTP +#define singleRegMask regMaskTP +#endif // defined(TARGET_AMD64) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + +unsigned genCountBits(uint64_t bits); + +typedef _regNumber_enum regNumber; +typedef unsigned char regNumberSmall; + +typedef struct _regMaskAll +{ +private: +#ifdef HAS_MORE_THAN_64_REGISTERS + union + { + RegBitSet32 _registers[3]; + struct + { + union + { + // Represents combined registers bitset including gpr/float + RegBitSet64 _combinedRegisters; + struct + { + RegBitSet32 _gprRegs; + RegBitSet32 _floatRegs; + }; + }; + RegBitSet32 _predicateRegs; + }; + }; +#else + // Represents combined registers bitset including gpr/float and on some platforms + // mask or predicate registers + RegBitSet64 _combinedRegisters; +#endif + // TODO: Come up with a name of variable such that: + // 1. If HAS_MORE_THAN_64_REGISTERS==1, it represents float_gpr combined + // 2. If HAS_MORE_THAN_64_REGISTERS==0, it represents all registers possible be - gpr/float/predicate in same place + // Once we have that, we can just use and remove some of the #ifdef HAS_MORE_THAN_64_REGISTERS + + // This method shifts the high-32 bits of float to low-32 bits and return. + // For gpr and predicate registers, it returns the same value. + FORCEINLINE static RegBitSet32 encodeForIndex(int index, RegBitSet64 value) + { + int shiftAmount = 32 * (index == 1); + return (RegBitSet32)(value >> shiftAmount); + } + + FORCEINLINE static RegBitSet64 decodeForIndex(int index, RegBitSet32 value) + { + int shiftAmount = 32 * (index == 1); + return ((RegBitSet64)value << shiftAmount); + } + +public: + FORCEINLINE regMaskGpr gprRegs() const; + FORCEINLINE regMaskFloat floatRegs(const Compiler* compiler) const; + + // #ifdef DEBUG + +#ifdef FEATURE_MASKED_HW_INTRINSICS + FORCEINLINE regMaskPredicate predicateRegs(const Compiler* compiler) const; +#endif // FEATURE_MASKED_HW_INTRINSICS + + _regMaskAll(RegBitSet64 gprRegMask, RegBitSet64 floatRegMask) +#ifdef HAS_MORE_THAN_64_REGISTERS + : _combinedRegisters(floatRegMask | gprRegMask) + , _predicateRegs(RBM_NONE) +#else + : _combinedRegisters(floatRegMask | gprRegMask) +#endif + { + } + + // TODO: See if we can avoid the '|' operation here. + _regMaskAll(RegBitSet64 gprRegMask, RegBitSet64 floatRegMask, RegBitSet64 predicateRegs) +#ifdef HAS_MORE_THAN_64_REGISTERS + : _combinedRegisters(floatRegMask | gprRegMask) + , _predicateRegs((RegBitSet32)predicateRegs) +#else + : _combinedRegisters(predicateRegs | floatRegMask | gprRegMask) +#endif + { + } + + _regMaskAll() +#ifdef HAS_MORE_THAN_64_REGISTERS + : _combinedRegisters(RBM_NONE) + , _predicateRegs(RBM_NONE) +#else + : _combinedRegisters(RBM_NONE) +#endif + { + } + + _regMaskAll(RegBitSet64 allRegistersMask) +#ifdef HAS_MORE_THAN_64_REGISTERS + : _combinedRegisters(allRegistersMask) + , _predicateRegs(RBM_NONE) +#else + : _combinedRegisters(allRegistersMask) +#endif + { + } + + FORCEINLINE void Clear(); + FORCEINLINE bool IsEmpty() const; + FORCEINLINE unsigned Count() const; + FORCEINLINE void AddGprRegInMask(regNumber reg); + FORCEINLINE void AddRegMaskForType(regMaskOnlyOne maskToAdd, var_types type); + FORCEINLINE void AddGprRegMask(regMaskGpr maskToAdd); + FORCEINLINE void AddFloatRegMask(regMaskFloat maskToAdd); + + FORCEINLINE void AddRegNumInMask(regNumber reg); + FORCEINLINE void AddRegNum(regNumber reg, var_types type); + FORCEINLINE void RemoveRegNumFromMask(regNumber reg); + FORCEINLINE void RemoveRegNum(regNumber reg, var_types type); + FORCEINLINE bool IsRegNumInMask(regNumber reg) const; + FORCEINLINE bool IsRegNumPresent(regNumber reg, var_types type) const; + +#ifdef TARGET_ARM + FORCEINLINE void AddRegNumInMask(regNumber reg, var_types type); + FORCEINLINE void RemoveRegNumFromMask(regNumber reg, var_types type); + FORCEINLINE bool IsRegNumInMask(regNumber reg, var_types type) const; #endif -#if REGMASK_BITS == 8 -typedef unsigned char regMaskSmall; -#define REG_MASK_INT_FMT "%02X" -#define REG_MASK_ALL_FMT "%02X" -#elif REGMASK_BITS == 16 -typedef unsigned short regMaskSmall; -#define REG_MASK_INT_FMT "%04X" -#define REG_MASK_ALL_FMT "%04X" -#elif REGMASK_BITS == 32 + FORCEINLINE void RemoveRegTypeFromMask(regMaskOnlyOne regMaskToRemove, var_types type); + FORCEINLINE bool IsGprMaskPresent(regMaskGpr maskToCheck) const; + FORCEINLINE bool IsFloatMaskPresent(Compiler* compiler, regMaskFloat maskToCheck) const; + FORCEINLINE regMaskOnlyOne GetRegMaskForType(var_types type) const; + + FORCEINLINE bool IsGprOrFloatPresent() const; + FORCEINLINE RegBitSet64 GetGprFloatCombinedMask() const; +#ifndef HAS_MORE_THAN_64_REGISTERS + FORCEINLINE RegBitSet64 GetAllRegistersMask() const; +#endif // !HAS_MORE_THAN_64_REGISTERS + + FORCEINLINE regMaskOnlyOne operator[](int index) const; + FORCEINLINE void operator|=(const _regMaskAll& other); + FORCEINLINE void operator&=(const _regMaskAll& other); + FORCEINLINE void operator|=(const regNumber reg); + FORCEINLINE void operator^=(const regNumber reg); + FORCEINLINE _regMaskAll operator~() const; + FORCEINLINE bool operator==(const _regMaskAll& other) const; + FORCEINLINE bool operator!=(const _regMaskAll& other) const; + FORCEINLINE _regMaskAll operator&(const _regMaskAll& other) const; + FORCEINLINE _regMaskAll operator|(const _regMaskAll& other) const; + +} AllRegsMask; + +#define CONSTREF_AllRegsMask const AllRegsMask& +#define REF_AllRegsMask AllRegsMask + +#define GprRegsMask(gprRegs) AllRegsMask(gprRegs) + +#if REGMASK_BITS == 32 typedef unsigned regMaskSmall; #define REG_MASK_INT_FMT "%08X" #define REG_MASK_ALL_FMT "%08X" @@ -232,9 +404,6 @@ typedef unsigned __int64 regMaskSmall; #define REG_MASK_ALL_FMT "%016llX" #endif -typedef _regNumber_enum regNumber; -typedef unsigned char regNumberSmall; - /*****************************************************************************/ #ifdef DEBUG @@ -337,7 +506,7 @@ const char* getRegName(regNumber reg); #ifdef DEBUG const char* getRegNameFloat(regNumber reg, var_types type); -extern void dspRegMask(regMaskTP regMask, size_t minSiz = 0); +extern void dspRegMask(AllRegsMask mask, size_t minSiz = 0); #endif #if CPU_HAS_BYTE_REGS @@ -352,8 +521,8 @@ inline bool isByteReg(regNumber reg) } #endif -inline regMaskTP genRegMask(regNumber reg); -inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE)); +inline singleRegMask genRegMask(regNumber reg); +inline regMaskFloat genRegMaskFloat(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE)); /***************************************************************************** * Return true if the register number is valid @@ -451,7 +620,7 @@ inline regNumber theFixedRetBuffReg(CorInfoCallConvExtension callConv) // theFixedRetBuffMask: // Returns the regNumber to use for the fixed return buffer // -inline regMaskTP theFixedRetBuffMask(CorInfoCallConvExtension callConv) +inline regMaskGpr theFixedRetBuffMask(CorInfoCallConvExtension callConv) { assert(hasFixedRetBuffReg(callConv)); // This predicate should be checked before calling this method #if defined(TARGET_ARM64) @@ -486,9 +655,9 @@ inline unsigned theFixedRetBuffArgNum(CorInfoCallConvExtension callConv) // Returns the full mask of all possible integer registers // Note this includes the fixed return buffer register on Arm64 // -inline regMaskTP fullIntArgRegMask(CorInfoCallConvExtension callConv) +inline regMaskGpr fullIntArgRegMask(CorInfoCallConvExtension callConv) { - regMaskTP result = RBM_ARG_REGS; + regMaskGpr result = RBM_ARG_REGS; if (hasFixedRetBuffReg(callConv)) { result |= theFixedRetBuffMask(callConv); @@ -583,7 +752,7 @@ inline bool floatRegCanHoldType(regNumber reg, var_types type) extern const regMaskSmall regMasks[REG_COUNT]; -inline regMaskTP genRegMask(regNumber reg) +inline singleRegMask genRegMask(regNumber reg) { assert((unsigned)reg < ArrLen(regMasks)); #ifdef TARGET_AMD64 @@ -591,7 +760,7 @@ inline regMaskTP genRegMask(regNumber reg) // (L1 latency on sandy bridge is 4 cycles for [base] and 5 for [base + index*c] ) // the reason this is AMD-only is because the x86 BE will try to get reg masks for REG_STK // and the result needs to be zero. - regMaskTP result = 1ULL << reg; + singleRegMask result = 1ULL << reg; assert(result == regMasks[reg]); return result; #else @@ -604,7 +773,7 @@ inline regMaskTP genRegMask(regNumber reg) * Map a register number to a floating-point register mask. */ -inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* = TYP_DOUBLE */)) +inline regMaskFloat genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* = TYP_DOUBLE */)) { #if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_X86) || defined(TARGET_LOONGARCH64) || \ defined(TARGET_RISCV64) @@ -628,6 +797,36 @@ inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* = TYP_D #endif } +inline regNumber getRegForType(regNumber reg, var_types regType) +{ +#ifdef TARGET_ARM + if ((regType == TYP_DOUBLE) && !genIsValidDoubleReg(reg)) + { + reg = REG_PREV(reg); + } +#endif // TARGET_ARM + return reg; +} + +// This is similar to genRegMask(reg, regType) for all platforms +// except Arm. For Arm, if regType is DOUBLE and reg is also a +// valid double register, it is again same as genRegMask(reg, regType) +// but if not, it will return the pair of even/odd registers corresponding +// to the `reg`. +inline regMaskOnlyOne getRegMask(regNumber reg, var_types regType) +{ + reg = getRegForType(reg, regType); + singleRegMask regMask = genRegMask(reg); +#ifdef TARGET_ARM + if (regType == TYP_DOUBLE) + { + assert(genIsValidDoubleReg(reg)); + regMask |= (regMask << 1); + } +#endif // TARGET_ARM + return regMask; +} + //------------------------------------------------------------------------ // genRegMask: Given a register, and its type, generate the appropriate regMask // @@ -647,22 +846,20 @@ inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* = TYP_D // For registers that are used in pairs, the caller will be handling // each member of the pair separately. // -inline regMaskTP genRegMask(regNumber regNum, var_types type) +inline regMaskOnlyOne genRegMask(regNumber regNum, var_types type) { #if defined(TARGET_ARM) - regMaskTP regMask = RBM_NONE; - if (varTypeUsesIntReg(type)) { - regMask = genRegMask(regNum); + return genRegMask(regNum); } else { assert(varTypeUsesFloatReg(type)); - regMask = genRegMaskFloat(regNum, type); + return genRegMaskFloat(regNum, type); } - return regMask; + return RBM_NONE; #else return genRegMask(regNum); #endif @@ -673,8 +870,8 @@ inline regMaskTP genRegMask(regNumber regNum, var_types type) * These arrays list the callee-saved register numbers (and bitmaps, respectively) for * the current architecture. */ -extern const regNumber raRegCalleeSaveOrder[CNT_CALLEE_SAVED]; -extern const regMaskTP raRbmCalleeSaveOrder[CNT_CALLEE_SAVED]; +extern const regNumber raRegCalleeSaveOrder[CNT_CALLEE_SAVED]; +extern const regMaskGpr raRbmCalleeSaveOrder[CNT_CALLEE_SAVED]; // This method takes a "compact" bitset of the callee-saved registers, and "expands" it to a full register mask. regMaskSmall genRegMaskFromCalleeSavedMask(unsigned short); diff --git a/src/coreclr/jit/targetamd64.cpp b/src/coreclr/jit/targetamd64.cpp index 85b1ba6ef19a23..8e47e286263146 100644 --- a/src/coreclr/jit/targetamd64.cpp +++ b/src/coreclr/jit/targetamd64.cpp @@ -19,14 +19,14 @@ const Target::ArgOrder Target::g_tgtUnmanagedArgOrder = ARG_ORDER_R2L; // clang-format off #ifdef UNIX_AMD64_ABI const regNumber intArgRegs [] = { REG_EDI, REG_ESI, REG_EDX, REG_ECX, REG_R8, REG_R9 }; -const regMaskTP intArgMasks[] = { RBM_EDI, RBM_ESI, RBM_EDX, RBM_ECX, RBM_R8, RBM_R9 }; +const regMaskGpr intArgMasks[] = { RBM_EDI, RBM_ESI, RBM_EDX, RBM_ECX, RBM_R8, RBM_R9 }; const regNumber fltArgRegs [] = { REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3, REG_XMM4, REG_XMM5, REG_XMM6, REG_XMM7 }; -const regMaskTP fltArgMasks[] = { RBM_XMM0, RBM_XMM1, RBM_XMM2, RBM_XMM3, RBM_XMM4, RBM_XMM5, RBM_XMM6, RBM_XMM7 }; +const regMaskFloat fltArgMasks[] = { RBM_XMM0, RBM_XMM1, RBM_XMM2, RBM_XMM3, RBM_XMM4, RBM_XMM5, RBM_XMM6, RBM_XMM7 }; #else // !UNIX_AMD64_ABI const regNumber intArgRegs [] = { REG_ECX, REG_EDX, REG_R8, REG_R9 }; -const regMaskTP intArgMasks[] = { RBM_ECX, RBM_EDX, RBM_R8, RBM_R9 }; +const regMaskGpr intArgMasks[] = { RBM_ECX, RBM_EDX, RBM_R8, RBM_R9 }; const regNumber fltArgRegs [] = { REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3 }; -const regMaskTP fltArgMasks[] = { RBM_XMM0, RBM_XMM1, RBM_XMM2, RBM_XMM3 }; +const regMaskFloat fltArgMasks[] = { RBM_XMM0, RBM_XMM1, RBM_XMM2, RBM_XMM3 }; #endif // !UNIX_AMD64_ABI // clang-format on diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index 7e72da9cf2ccdc..6fbc6a81d0eecd 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -86,10 +86,12 @@ #define RBM_ALLDOUBLE RBM_ALLFLOAT #define REG_FP_FIRST REG_XMM0 #define REG_FP_LAST REG_XMM31 + #define REG_FP_COUNT (REG_FP_LAST - REG_FP_FIRST + 1) #define FIRST_FP_ARGREG REG_XMM0 #define REG_MASK_FIRST REG_K0 #define REG_MASK_LAST REG_K7 + #define REG_MASK_COUNT (REG_MASK_LAST - REG_MASK_FIRST + 1) #define RBM_ALLMASK_INIT (0) #define RBM_ALLMASK_EVEX (RBM_K1 | RBM_K2 | RBM_K3 | RBM_K4 | RBM_K5 | RBM_K6 | RBM_K7) @@ -172,6 +174,7 @@ #define RBM_CALLEE_SAVED (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED | RBM_MSK_CALLEE_SAVED) #define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH) + #define RBM_ALLGPR (RBM_ALLINT) // AMD64 write barrier ABI (see vm\amd64\JitHelpers_Fast.asm, vm\amd64\JitHelpers_Fast.S): // CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier): @@ -449,9 +452,9 @@ #define REG_ARG_5 REG_R9 extern const regNumber intArgRegs [MAX_REG_ARG]; - extern const regMaskTP intArgMasks[MAX_REG_ARG]; + extern const regMaskGpr intArgMasks[MAX_REG_ARG]; extern const regNumber fltArgRegs [MAX_FLOAT_REG_ARG]; - extern const regMaskTP fltArgMasks[MAX_FLOAT_REG_ARG]; + extern const regMaskFloat fltArgMasks[MAX_FLOAT_REG_ARG]; #define RBM_ARG_0 RBM_RDI #define RBM_ARG_1 RBM_RSI @@ -472,9 +475,9 @@ #define REG_ARG_3 REG_R9 extern const regNumber intArgRegs [MAX_REG_ARG]; - extern const regMaskTP intArgMasks[MAX_REG_ARG]; + extern const regMaskGpr intArgMasks[MAX_REG_ARG]; extern const regNumber fltArgRegs [MAX_FLOAT_REG_ARG]; - extern const regMaskTP fltArgMasks[MAX_FLOAT_REG_ARG]; + extern const regMaskFloat fltArgMasks[MAX_FLOAT_REG_ARG]; #define RBM_ARG_0 RBM_ECX #define RBM_ARG_1 RBM_EDX @@ -534,9 +537,6 @@ #define RBM_PROFILER_TAILCALL_TRASH RBM_PROFILER_LEAVE_TRASH #endif - // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. - #define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH - #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_R10 | RBM_RCX)) #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_RCX #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_RAX diff --git a/src/coreclr/jit/targetarm.cpp b/src/coreclr/jit/targetarm.cpp index 037578fa67b85c..4ce22dc05ebba8 100644 --- a/src/coreclr/jit/targetarm.cpp +++ b/src/coreclr/jit/targetarm.cpp @@ -18,10 +18,10 @@ const Target::ArgOrder Target::g_tgtUnmanagedArgOrder = ARG_ORDER_R2L; // clang-format off const regNumber intArgRegs [] = {REG_R0, REG_R1, REG_R2, REG_R3}; -const regMaskTP intArgMasks[] = {RBM_R0, RBM_R1, RBM_R2, RBM_R3}; +const regMaskGpr intArgMasks[] = {RBM_R0, RBM_R1, RBM_R2, RBM_R3}; const regNumber fltArgRegs [] = {REG_F0, REG_F1, REG_F2, REG_F3, REG_F4, REG_F5, REG_F6, REG_F7, REG_F8, REG_F9, REG_F10, REG_F11, REG_F12, REG_F13, REG_F14, REG_F15 }; -const regMaskTP fltArgMasks[] = {RBM_F0, RBM_F1, RBM_F2, RBM_F3, RBM_F4, RBM_F5, RBM_F6, RBM_F7, RBM_F8, RBM_F9, RBM_F10, RBM_F11, RBM_F12, RBM_F13, RBM_F14, RBM_F15 }; +const regMaskFloat fltArgMasks[] = {RBM_F0, RBM_F1, RBM_F2, RBM_F3, RBM_F4, RBM_F5, RBM_F6, RBM_F7, RBM_F8, RBM_F9, RBM_F10, RBM_F11, RBM_F12, RBM_F13, RBM_F14, RBM_F15 }; // clang-format on static_assert_no_msg(RBM_ALLDOUBLE == (RBM_ALLDOUBLE_HIGH >> 1)); diff --git a/src/coreclr/jit/targetarm.h b/src/coreclr/jit/targetarm.h index 0f56ebe1ce989a..f9d486a5e7f6c5 100644 --- a/src/coreclr/jit/targetarm.h +++ b/src/coreclr/jit/targetarm.h @@ -45,6 +45,7 @@ #define REG_FP_FIRST REG_F0 #define REG_FP_LAST REG_F31 + #define REG_FP_COUNT (REG_FP_LAST - REG_FP_FIRST + 1) #define FIRST_FP_ARGREG REG_F0 #define LAST_FP_ARGREG REG_F15 @@ -89,6 +90,7 @@ #define RBM_LOW_REGS (RBM_R0|RBM_R1|RBM_R2|RBM_R3|RBM_R4|RBM_R5|RBM_R6|RBM_R7) #define RBM_HIGH_REGS (RBM_R8|RBM_R9|RBM_R10|RBM_R11|RBM_R12|RBM_SP|RBM_LR|RBM_PC) + #define RBM_ALLGPR (RBM_LOW_REGS | RBM_HIGH_REGS) #define REG_CALLEE_SAVED_ORDER REG_R4,REG_R5,REG_R6,REG_R7,REG_R8,REG_R9,REG_R10,REG_R11 #define RBM_CALLEE_SAVED_ORDER RBM_R4,RBM_R5,RBM_R6,RBM_R7,RBM_R8,RBM_R9,RBM_R10,RBM_R11 @@ -241,13 +243,6 @@ #define RBM_FLOATRET RBM_F0 #define RBM_DOUBLERET (RBM_F0|RBM_F1) - // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper (JIT_RareDisableHelper). - // See vm\arm\amshelpers.asm for more details. - #define RBM_STOP_FOR_GC_TRASH (RBM_CALLEE_TRASH & ~(RBM_LNGRET|RBM_R7|RBM_R8|RBM_R11|RBM_DOUBLERET|RBM_F2|RBM_F3|RBM_F4|RBM_F5|RBM_F6|RBM_F7)) - - // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. - #define RBM_INIT_PINVOKE_FRAME_TRASH (RBM_CALLEE_TRASH | RBM_PINVOKE_TCB | RBM_PINVOKE_SCRATCH) - #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH) #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_R0 @@ -276,7 +271,7 @@ #define REG_ARG_3 REG_R3 extern const regNumber intArgRegs [MAX_REG_ARG]; - extern const regMaskTP intArgMasks[MAX_REG_ARG]; + extern const regMaskGpr intArgMasks[MAX_REG_ARG]; #define RBM_ARG_0 RBM_R0 #define RBM_ARG_1 RBM_R1 @@ -287,7 +282,7 @@ #define RBM_FLTARG_REGS (RBM_F0|RBM_F1|RBM_F2|RBM_F3|RBM_F4|RBM_F5|RBM_F6|RBM_F7|RBM_F8|RBM_F9|RBM_F10|RBM_F11|RBM_F12|RBM_F13|RBM_F14|RBM_F15) extern const regNumber fltArgRegs [MAX_FLOAT_REG_ARG]; - extern const regMaskTP fltArgMasks[MAX_FLOAT_REG_ARG]; + extern const regMaskFloat fltArgMasks[MAX_FLOAT_REG_ARG]; #define LBL_DIST_SMALL_MAX_NEG (0) #define LBL_DIST_SMALL_MAX_POS (+1020) diff --git a/src/coreclr/jit/targetarm64.cpp b/src/coreclr/jit/targetarm64.cpp index f95663202456b8..9cbc615f955680 100644 --- a/src/coreclr/jit/targetarm64.cpp +++ b/src/coreclr/jit/targetarm64.cpp @@ -18,10 +18,10 @@ const Target::ArgOrder Target::g_tgtUnmanagedArgOrder = ARG_ORDER_R2L; // clang-format off const regNumber intArgRegs [] = {REG_R0, REG_R1, REG_R2, REG_R3, REG_R4, REG_R5, REG_R6, REG_R7}; -const regMaskTP intArgMasks[] = {RBM_R0, RBM_R1, RBM_R2, RBM_R3, RBM_R4, RBM_R5, RBM_R6, RBM_R7}; +const regMaskGpr intArgMasks[] = {RBM_R0, RBM_R1, RBM_R2, RBM_R3, RBM_R4, RBM_R5, RBM_R6, RBM_R7}; const regNumber fltArgRegs [] = {REG_V0, REG_V1, REG_V2, REG_V3, REG_V4, REG_V5, REG_V6, REG_V7 }; -const regMaskTP fltArgMasks[] = {RBM_V0, RBM_V1, RBM_V2, RBM_V3, RBM_V4, RBM_V5, RBM_V6, RBM_V7 }; +const regMaskFloat fltArgMasks[] = {RBM_V0, RBM_V1, RBM_V2, RBM_V3, RBM_V4, RBM_V5, RBM_V6, RBM_V7 }; // clang-format on //----------------------------------------------------------------------------- diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index 6d33d378bcd96e..1cde57bc59ab8a 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -47,10 +47,12 @@ #define REG_FP_FIRST REG_V0 #define REG_FP_LAST REG_V31 + #define REG_FP_COUNT (REG_FP_LAST - REG_FP_FIRST + 1) #define FIRST_FP_ARGREG REG_V0 #define LAST_FP_ARGREG REG_V15 #define REG_PREDICATE_FIRST REG_P0 #define REG_PREDICATE_LAST REG_P15 + #define REG_MASK_COUNT (REG_PREDICATE_LAST - REG_PREDICATE_FIRST + 1) #define REG_PREDICATE_LOW_LAST REG_P7 // Some instructions can only use the first half of the predicate registers. #define REG_PREDICATE_HIGH_FIRST REG_P8 // Similarly, some instructions can only use the second half of the predicate registers. #define REG_PREDICATE_HIGH_LAST REG_P15 @@ -60,7 +62,7 @@ static_assert_no_msg(REG_PREDICATE_HIGH_LAST == REG_PREDICATE_LAST); - #define REGNUM_BITS 6 // number of bits in a REG_* + #define REGNUM_BITS 7 // number of bits in a REG_* #define REGSIZE_BYTES 8 // number of bytes in one general purpose register #define FP_REGSIZE_BYTES 16 // number of bytes in one FP/SIMD register #define FPSAVE_REGSIZE_BYTES 8 // number of bytes in one FP/SIMD register that are saved/restored, for callee-saved registers @@ -84,6 +86,7 @@ #define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH) #define RBM_ALLFLOAT (RBM_FLT_CALLEE_SAVED | RBM_FLT_CALLEE_TRASH) #define RBM_ALLDOUBLE RBM_ALLFLOAT + #define RBM_ALLGPR (RBM_ALLINT | RBM_FP | RBM_LR | RBM_ZR) // REG_VAR_ORDER is: (CALLEE_TRASH & ~CALLEE_TRASH_NOGC), CALLEE_TRASH_NOGC, CALLEE_SAVED #define REG_VAR_ORDER REG_R0, REG_R1, REG_R2, REG_R3, REG_R4, REG_R5, \ @@ -110,6 +113,8 @@ #define CNT_CALLEE_SAVED_FLOAT (8) #define CNT_CALLEE_TRASH_FLOAT (24) + #define CNT_CALLEE_SAVED_MASK (4) + #define CNT_CALLEE_TRASH_MASK (8) #define CALLEE_SAVED_REG_MAXSZ (CNT_CALLEE_SAVED * REGSIZE_BYTES) #define CALLEE_SAVED_FLOAT_MAXSZ (CNT_CALLEE_SAVED_FLOAT * FPSAVE_REGSIZE_BYTES) @@ -147,8 +152,8 @@ #define RBM_ALLMASK (RBM_LOWMASK | RBM_HIGHMASK) // TODO-SVE: Fix when adding predicate register allocation - #define RBM_MSK_CALLEE_SAVED (0) - #define RBM_MSK_CALLEE_TRASH (0) + #define RBM_MSK_CALLEE_SAVED (RBM_P0 | RBM_P1 | RBM_P2 | RBM_P3) + #define RBM_MSK_CALLEE_TRASH (RBM_P4 | RBM_P5 | RBM_P6 | RBM_P7 | RBM_P8 | RBM_P9 | RBM_P10 | RBM_P11 | RBM_P12 | RBM_P13 | RBM_P14 | RBM_P15) // ARM64 write barrier ABI (see vm\arm64\asmhelpers.asm, vm\arm64\asmhelpers.S): // CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier): @@ -186,7 +191,7 @@ #define REG_WRITE_BARRIER_SRC_BYREF REG_R13 #define RBM_WRITE_BARRIER_SRC_BYREF RBM_R13 - #define RBM_CALLEE_TRASH_NOGC (RBM_R12|RBM_R15|RBM_IP0|RBM_IP1|RBM_DEFAULT_HELPER_CALL_TARGET) + #define RBM_CALLEE_TRASH_NOGC (RBM_R12|RBM_R15|RBM_IP0|RBM_IP1|RBM_DEFAULT_HELPER_CALL_TARGET) // Registers killed by CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF. #define RBM_CALLEE_TRASH_WRITEBARRIER (RBM_R14|RBM_CALLEE_TRASH_NOGC) @@ -263,12 +268,6 @@ #define RBM_FLOATRET RBM_V0 #define RBM_DOUBLERET RBM_V0 - // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper - #define RBM_STOP_FOR_GC_TRASH RBM_CALLEE_TRASH - - // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. - #define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH - #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R15)) #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_R15 #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_R9 @@ -310,7 +309,7 @@ #define REG_ARG_7 REG_R7 extern const regNumber intArgRegs [MAX_REG_ARG]; - extern const regMaskTP intArgMasks[MAX_REG_ARG]; + extern const regMaskGpr intArgMasks[MAX_REG_ARG]; #define RBM_ARG_0 RBM_R0 #define RBM_ARG_1 RBM_R1 @@ -343,7 +342,7 @@ #define RBM_FLTARG_REGS (RBM_FLTARG_0|RBM_FLTARG_1|RBM_FLTARG_2|RBM_FLTARG_3|RBM_FLTARG_4|RBM_FLTARG_5|RBM_FLTARG_6|RBM_FLTARG_7) extern const regNumber fltArgRegs [MAX_FLOAT_REG_ARG]; - extern const regMaskTP fltArgMasks[MAX_FLOAT_REG_ARG]; + extern const regMaskFloat fltArgMasks[MAX_FLOAT_REG_ARG]; #define LBL_DIST_SMALL_MAX_NEG (-1048576) #define LBL_DIST_SMALL_MAX_POS (+1048575) diff --git a/src/coreclr/jit/targetx86.cpp b/src/coreclr/jit/targetx86.cpp index 2a7c906962b0cb..36347c15331d5a 100644 --- a/src/coreclr/jit/targetx86.cpp +++ b/src/coreclr/jit/targetx86.cpp @@ -18,7 +18,7 @@ const Target::ArgOrder Target::g_tgtUnmanagedArgOrder = ARG_ORDER_R2L; // clang-format off const regNumber intArgRegs [] = {REG_ECX, REG_EDX}; -const regMaskTP intArgMasks[] = {RBM_ECX, RBM_EDX}; +const regMaskGpr intArgMasks[] = {RBM_ECX, RBM_EDX}; // clang-format on //----------------------------------------------------------------------------- diff --git a/src/coreclr/jit/targetx86.h b/src/coreclr/jit/targetx86.h index dfeb96ae9e977c..8a19781ed67e4a 100644 --- a/src/coreclr/jit/targetx86.h +++ b/src/coreclr/jit/targetx86.h @@ -70,9 +70,11 @@ #define REG_FP_FIRST REG_XMM0 #define REG_FP_LAST REG_XMM7 + #define REG_FP_COUNT (REG_FP_LAST - REG_FP_FIRST + 1) #define REG_MASK_FIRST REG_K0 #define REG_MASK_LAST REG_K7 + #define REG_MASK_COUNT (REG_MASK_LAST - REG_MASK_FIRST) #define CNT_MASK_REGS 8 #define FIRST_FP_ARGREG REG_XMM0 @@ -140,6 +142,7 @@ #define RBM_CALLEE_TRASH (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH | RBM_MSK_CALLEE_TRASH) #define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH) + #define RBM_ALLGPR (RBM_ALLINT) #define REG_VAR_ORDER REG_EAX,REG_EDX,REG_ECX,REG_ESI,REG_EDI,REG_EBX #define MAX_VAR_ORDER_SIZE 6 @@ -319,7 +322,7 @@ #define REG_ARG_1 REG_EDX extern const regNumber intArgRegs [MAX_REG_ARG]; - extern const regMaskTP intArgMasks[MAX_REG_ARG]; + extern const regMaskGpr intArgMasks[MAX_REG_ARG]; #define RBM_ARG_0 RBM_ECX #define RBM_ARG_1 RBM_EDX diff --git a/src/coreclr/jit/typelist.h b/src/coreclr/jit/typelist.h index bf5acb5ee014a5..2f121ae84f8cc2 100644 --- a/src/coreclr/jit/typelist.h +++ b/src/coreclr/jit/typelist.h @@ -64,9 +64,9 @@ DEF_TP(SIMD16 ,"simd16" , TYP_SIMD16, 16,16, 16, 4,16, VTR_FLOAT, available DEF_TP(SIMD32 ,"simd32" , TYP_SIMD32, 32,32, 32, 8,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) DEF_TP(SIMD64 ,"simd64" , TYP_SIMD64, 64,64, 64, 16,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) #endif // TARGET_XARCH -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#ifdef FEATURE_MASKED_HW_INTRINSICS DEF_TP(MASK ,"mask" , TYP_MASK, 8, 8, 8, 2, 8, VTR_MASK, availableMaskRegs, RBM_MSK_CALLEE_SAVED, RBM_MSK_CALLEE_TRASH, VTF_S) -#endif // TARGET_XARCH || TARGET_ARM64 +#endif // FEATURE_MASKED_HW_INTRINSICS #endif // FEATURE_SIMD DEF_TP(UNKNOWN ,"unknown" ,TYP_UNKNOWN, 0, 0, 0, 0, 0, VTR_INT, availableIntRegs, RBM_INT_CALLEE_SAVED, RBM_INT_CALLEE_TRASH, VTF_ANY) diff --git a/src/coreclr/jit/unwind.cpp b/src/coreclr/jit/unwind.cpp index a51a52ab21d640..f084bc3ab73713 100644 --- a/src/coreclr/jit/unwind.cpp +++ b/src/coreclr/jit/unwind.cpp @@ -150,18 +150,7 @@ void Compiler::unwindPushPopCFI(regNumber reg) FuncInfoDsc* func = funCurrentFunc(); UNATIVE_OFFSET cbProlog = unwindGetCurrentOffset(func); - regMaskTP relOffsetMask = RBM_CALLEE_SAVED -#if defined(UNIX_AMD64_ABI) && ETW_EBP_FRAMED - // In case of ETW_EBP_FRAMED defined the REG_FPBASE (RBP) - // is excluded from the callee-save register list. - // Make sure the register gets PUSH unwind info in this case, - // since it is pushed as a frame register. - | RBM_FPBASE -#endif -#if defined(TARGET_ARM) - | RBM_R11 | RBM_LR | RBM_PC -#endif - ; + regMaskOnlyOne mask = genRegMask(reg); #if defined(TARGET_ARM) createCfiCode(func, cbProlog, CFI_ADJUST_CFA_OFFSET, DWARF_REG_ILLEGAL, @@ -170,7 +159,35 @@ void Compiler::unwindPushPopCFI(regNumber reg) assert(reg < REG_FP_FIRST); createCfiCode(func, cbProlog, CFI_ADJUST_CFA_OFFSET, DWARF_REG_ILLEGAL, REGSIZE_BYTES); #endif - if (relOffsetMask & genRegMask(reg)) + + bool shouldCreateCfiCode = false; + if (emitter::isGeneralRegister(reg)) + { + +#if defined(UNIX_AMD64_ABI) && ETW_EBP_FRAMED + // In case of ETW_EBP_FRAMED defined the REG_FPBASE (RBP) + // is excluded from the callee-save register list. + // Make sure the register gets PUSH unwind info in this case, + // since it is pushed as a frame register. + mask |= RBM_FPBASE; +#endif +#if defined(TARGET_ARM) + mask |= RBM_R11 | RBM_LR | RBM_PC; +#endif + shouldCreateCfiCode = (RBM_INT_CALLEE_SAVED & mask); + } + else if (emitter::isFloatReg(reg)) + { + shouldCreateCfiCode = (RBM_FLT_CALLEE_SAVED & mask); + } +#ifdef FEATURE_MASKED_HW_INTRINSICS + else if (emitter::isMaskReg(reg) && (RBM_MSK_CALLEE_SAVED & mask)) + { + shouldCreateCfiCode = (RBM_MSK_CALLEE_SAVED & mask); + } +#endif // FEATURE_MASKED_HW_INTRINSICS + + if (shouldCreateCfiCode) { createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg)); } @@ -200,14 +217,16 @@ void Compiler::unwindBegPrologCFI() } } -void Compiler::unwindPushPopMaskCFI(regMaskTP regMask, bool isFloat) +void Compiler::unwindPushPopMaskCFI(regMaskOnlyOne regMask, bool isFloat) { + assert(IsOnlyOneRegMask(regMask)); + #if TARGET_ARM - regNumber regNum = isFloat ? REG_PREV(REG_FP_LAST) : REG_INT_LAST; - regMaskTP regBit = isFloat ? genRegMask(regNum) | genRegMask(REG_NEXT(regNum)) : genRegMask(regNum); + regNumber regNum = isFloat ? REG_PREV(REG_FP_LAST) : REG_INT_LAST; + regMaskOnlyOne regBit = isFloat ? genRegMask(regNum) | genRegMask(REG_NEXT(regNum)) : genRegMask(regNum); #else - regNumber regNum = isFloat ? REG_FP_LAST : REG_INT_LAST; - regMaskTP regBit = genRegMask(regNum); + regNumber regNum = isFloat ? REG_FP_LAST : REG_INT_LAST; + regMaskOnlyOne regBit = genRegMask(regNum); #endif for (; regMask != 0 && regBit != RBM_NONE;) diff --git a/src/coreclr/jit/unwindarmarch.cpp b/src/coreclr/jit/unwindarmarch.cpp index 51af7f24889d1b..ccbdb4350aeabd 100644 --- a/src/coreclr/jit/unwindarmarch.cpp +++ b/src/coreclr/jit/unwindarmarch.cpp @@ -196,10 +196,10 @@ void Compiler::unwindEndEpilog() #if defined(TARGET_ARM) -void Compiler::unwindPushPopMaskInt(regMaskTP maskInt, bool useOpsize16) +void Compiler::unwindPushPopMaskInt(regMaskGpr maskInt, bool useOpsize16) { // floating point registers cannot be specified in 'maskInt' - assert((maskInt & RBM_ALLFLOAT) == 0); + assert(IsGprRegMask(maskInt)); UnwindInfo* pu = &funCurrentFunc()->uwi; @@ -213,8 +213,8 @@ void Compiler::unwindPushPopMaskInt(regMaskTP maskInt, bool useOpsize16) if ((maskInt & (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3)) == 0) { - regMaskTP matchMask = maskInt & (RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7); - regMaskTP valMask = RBM_R4; + regMaskGpr matchMask = maskInt & (RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7); + regMaskGpr valMask = RBM_R4; while (val < 4) { if (matchMask == valMask) @@ -252,8 +252,8 @@ void Compiler::unwindPushPopMaskInt(regMaskTP maskInt, bool useOpsize16) if (((maskInt & (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3)) == 0) && ((maskInt & (RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8)) == (RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8))) { - regMaskTP matchMask = maskInt & (RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R9 | RBM_R10 | RBM_R11); - regMaskTP valMask = RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8; + regMaskGpr matchMask = maskInt & (RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R9 | RBM_R10 | RBM_R11); + regMaskGpr valMask = RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8; while (val < 4) { if (matchMask == valMask) @@ -282,10 +282,10 @@ void Compiler::unwindPushPopMaskInt(regMaskTP maskInt, bool useOpsize16) } } -void Compiler::unwindPushPopMaskFloat(regMaskTP maskFloat) +void Compiler::unwindPushPopMaskFloat(regMaskFloat maskFloat) { // Only floating pointer registers can be specified in 'maskFloat' - assert((maskFloat & ~RBM_ALLFLOAT) == 0); + assert(IsFloatRegMask(maskFloat)); // If the maskFloat is zero there is no unwind code to emit // @@ -296,8 +296,8 @@ void Compiler::unwindPushPopMaskFloat(regMaskTP maskFloat) UnwindInfo* pu = &funCurrentFunc()->uwi; - BYTE val = 0; - regMaskTP valMask = (RBM_F16 | RBM_F17); + BYTE val = 0; + regMaskFloat valMask = (RBM_F16 | RBM_F17); while (maskFloat != valMask) { @@ -317,8 +317,10 @@ void Compiler::unwindPushPopMaskFloat(regMaskTP maskFloat) pu->AddCode(0xE0 | val); } -void Compiler::unwindPushMaskInt(regMaskTP maskInt) +void Compiler::unwindPushMaskInt(regMaskGpr maskInt) { + assert(IsGprRegMask(maskInt)); + // Only r0-r12 and lr are supported assert((maskInt & ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R9 | RBM_R10 | RBM_R11 | RBM_R12 | RBM_LR)) == 0); @@ -336,8 +338,10 @@ void Compiler::unwindPushMaskInt(regMaskTP maskInt) unwindPushPopMaskInt(maskInt, useOpsize16); } -void Compiler::unwindPushMaskFloat(regMaskTP maskFloat) +void Compiler::unwindPushMaskFloat(regMaskFloat maskFloat) { + assert(IsFloatRegMask(maskFloat)); + // Only floating point registers should be in maskFloat assert((maskFloat & RBM_ALLFLOAT) == maskFloat); @@ -352,8 +356,10 @@ void Compiler::unwindPushMaskFloat(regMaskTP maskFloat) unwindPushPopMaskFloat(maskFloat); } -void Compiler::unwindPopMaskInt(regMaskTP maskInt) +void Compiler::unwindPopMaskInt(regMaskGpr maskInt) { + assert(IsGprRegMask(maskInt)); + #if defined(FEATURE_CFI_SUPPORT) if (generateCFIUnwindCodes()) { @@ -379,8 +385,10 @@ void Compiler::unwindPopMaskInt(regMaskTP maskInt) unwindPushPopMaskInt(maskInt, useOpsize16); } -void Compiler::unwindPopMaskFloat(regMaskTP maskFloat) +void Compiler::unwindPopMaskFloat(regMaskFloat maskFloat) { + assert(IsFloatRegMask(maskFloat)); + #if defined(FEATURE_CFI_SUPPORT) if (generateCFIUnwindCodes()) { diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index ea33f1d14fb035..373532702d6346 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -111,6 +111,20 @@ extern const BYTE opcodeArgKinds[] = { /*****************************************************************************/ +const int regIndexForRegister(regNumber reg) +{ + static const BYTE _registerTypeIndex[] = { +#ifdef TARGET_ARM64 +#define REGDEF(name, rnum, mask, xname, wname, regTypeTag) regTypeTag, +#else +#define REGDEF(name, rnum, mask, sname, regTypeTag) regTypeTag, +#endif +#include "register.h" + }; + + return _registerTypeIndex[reg]; +} + const char* varTypeName(var_types vt) { static const char* const varTypeNames[] = { @@ -139,9 +153,9 @@ const char* getRegName(regNumber reg) static const char* const regNames[] = { #if defined(TARGET_ARM64) -#define REGDEF(name, rnum, mask, xname, wname) xname, +#define REGDEF(name, rnum, mask, xname, wname, regTypeTag) xname, #else -#define REGDEF(name, rnum, mask, sname) sname, +#define REGDEF(name, rnum, mask, sname, regTypeTag) sname, #endif #include "register.h" }; @@ -227,7 +241,7 @@ const char* getRegNameFloat(regNumber reg, var_types type) #elif defined(TARGET_ARM64) static const char* regNamesFloat[] = { -#define REGDEF(name, rnum, mask, xname, wname) xname, +#define REGDEF(name, rnum, mask, xname, wname, regTypeTag) xname, #include "register.h" }; assert((unsigned)reg < ArrLen(regNamesFloat)); @@ -237,7 +251,7 @@ const char* getRegNameFloat(regNumber reg, var_types type) #elif defined(TARGET_LOONGARCH64) static const char* regNamesFloat[] = { -#define REGDEF(name, rnum, mask, sname) sname, +#define REGDEF(name, rnum, mask, sname, regTypeTag) sname, #include "register.h" }; @@ -247,16 +261,16 @@ const char* getRegNameFloat(regNumber reg, var_types type) #else static const char* regNamesFloat[] = { -#define REGDEF(name, rnum, mask, sname) "x" sname, +#define REGDEF(name, rnum, mask, sname, regTypeTag) "x" sname, #include "register.h" }; #ifdef FEATURE_SIMD static const char* regNamesYMM[] = { -#define REGDEF(name, rnum, mask, sname) "y" sname, +#define REGDEF(name, rnum, mask, sname, regTypeTag) "y" sname, #include "register.h" }; static const char* regNamesZMM[] = { -#define REGDEF(name, rnum, mask, sname) "z" sname, +#define REGDEF(name, rnum, mask, sname, regTypeTag) "z" sname, #include "register.h" }; #endif // FEATURE_SIMD @@ -282,9 +296,9 @@ const char* getRegNameFloat(regNumber reg, var_types type) * Displays a range of registers * -- This is a helper used by dspRegMask */ -const char* dspRegRange(regMaskTP regMask, size_t& minSiz, const char* sep, regNumber regFirst, regNumber regLast) +const char* dspRegRange(regMaskOnlyOne regMask, size_t& minSiz, const char* sep, regNumber regFirst, regNumber regLast) { -#ifdef TARGET_XARCH +#ifdef FEATURE_MASKED_HW_INTRINSICS assert(((regFirst == REG_INT_FIRST) && (regLast == REG_INT_LAST)) || ((regFirst == REG_FP_FIRST) && (regLast == REG_FP_LAST)) || ((regFirst == REG_MASK_FIRST) && (regLast == REG_MASK_LAST))); @@ -306,7 +320,7 @@ const char* dspRegRange(regMaskTP regMask, size_t& minSiz, const char* sep, regN for (regNumber regNum = regFirst; regNum <= regLast; regNum = REG_NEXT(regNum)) { - regMaskTP regBit = genRegMask(regNum); + singleRegMask regBit = genRegMask(regNum); if ((regMask & regBit) != 0) { @@ -433,18 +447,18 @@ const char* dspRegRange(regMaskTP regMask, size_t& minSiz, const char* sep, regN * Displays a register set. * TODO-ARM64-Cleanup: don't allow ip0, ip1 as part of a range. */ -void dspRegMask(regMaskTP regMask, size_t minSiz) +void dspRegMask(AllRegsMask mask, size_t minSiz) { const char* sep = ""; printf("["); - sep = dspRegRange(regMask, minSiz, sep, REG_INT_FIRST, REG_INT_LAST); - sep = dspRegRange(regMask, minSiz, sep, REG_FP_FIRST, REG_FP_LAST); + sep = dspRegRange(mask.gprRegs(), minSiz, sep, REG_INT_FIRST, REG_INT_LAST); + sep = dspRegRange(mask.floatRegs(nullptr), minSiz, sep, REG_FP_FIRST, REG_FP_LAST); -#ifdef TARGET_XARCH - sep = dspRegRange(regMask, minSiz, sep, REG_MASK_FIRST, REG_MASK_LAST); -#endif // TARGET_XARCH +#ifdef FEATURE_MASKED_HW_INTRINSICS + sep = dspRegRange(mask.predicateRegs(nullptr), minSiz, sep, REG_MASK_FIRST, REG_MASK_LAST); +#endif // FEATURE_MASKED_HW_INTRINSICS printf("]");