From 40b32cdf6f89755efbb532e55a2b5575ab48f11f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 3 Apr 2024 07:34:01 +0200 Subject: [PATCH 01/24] JIT: Rewrite register parameter homing Generalize register parameter homing to handle float and integer parameters simultaneously, and to handle all parameters (including the Swift self register). Base it on the new ABI representation. The new algorithm constructs a graph in which nodes are the source and destination registers of all parameters. Edges in the graph indicate that (part of) a register has to be moved into (part of) another register. To home parameters we repeatedly pick a register (preferring nodes without any outgoing edges) and perform the reg-reg moves indicated by its incoming edges. If we pick a register that has any outgoing edges it means there is circularity, so we need a temporary register to save its value. --- src/coreclr/jit/abi.cpp | 53 + src/coreclr/jit/abi.h | 2 + src/coreclr/jit/codegen.h | 9 +- src/coreclr/jit/codegencommon.cpp | 1739 +++++------------------- src/coreclr/jit/codegenloongarch64.cpp | 3 +- src/coreclr/jit/codegenriscv64.cpp | 3 +- src/coreclr/jit/lsra.cpp | 15 - src/coreclr/jit/targetarm64.cpp | 2 +- 8 files changed, 437 insertions(+), 1389 deletions(-) diff --git a/src/coreclr/jit/abi.cpp b/src/coreclr/jit/abi.cpp index 3dd8fcec32fcc9..e139c2d5028298 100644 --- a/src/coreclr/jit/abi.cpp +++ b/src/coreclr/jit/abi.cpp @@ -54,6 +54,59 @@ unsigned ABIPassingSegment::GetStackOffset() const return m_stackOffset; } +//----------------------------------------------------------------------------- +// GetRegisterType: +// Return the smallest type larger or equal to Size that most naturally +// represents the register this segment is passed in. +// +// Return Value: +// A type that matches ABIPassingSegment::Size and the register. +// +var_types ABIPassingSegment::GetRegisterType() const +{ + assert(IsPassedInRegister()); + if (genIsValidFloatReg(m_register)) + { + switch (Size) + { + case 4: + return TYP_FLOAT; + case 8: + return TYP_DOUBLE; +#ifdef FEATURE_SIMD + case 16: + return TYP_SIMD16; +#endif + default: + assert(!"Unexpected size for floating point register"); + return TYP_UNDEF; + } + } + else + { + switch (Size) + { + case 1: + return TYP_UBYTE; + case 2: + return TYP_USHORT; + case 3: + case 4: + return TYP_INT; +#ifdef TARGET_64BIT + case 5: + case 6: + case 7: + case 8: + return TYP_LONG; +#endif + default: + assert(!"Unexpected size for integer register"); + return TYP_UNDEF; + } + } +} + //----------------------------------------------------------------------------- // InRegister: // Create an ABIPassingSegment representing that a segment is passed in a diff --git a/src/coreclr/jit/abi.h b/src/coreclr/jit/abi.h index 27e53c27efc7e3..08c42a7bb3b550 100644 --- a/src/coreclr/jit/abi.h +++ b/src/coreclr/jit/abi.h @@ -29,6 +29,8 @@ class ABIPassingSegment // offset, relative to the first stack argument's offset. unsigned GetStackOffset() const; + var_types GetRegisterType() const; + static ABIPassingSegment InRegister(regNumber reg, unsigned offset, unsigned size); static ABIPassingSegment OnStack(unsigned stackOffset, unsigned offset, unsigned size); }; diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 7a2359c9fb5fc9..83ff3023b4b383 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -265,11 +265,10 @@ class CodeGen final : public CodeGenInterface // void genEstablishFramePointer(int delta, bool reportUnwindData); -#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - void genFnPrologCalleeRegArgs(); -#else - void genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState); -#endif + void genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed); + + var_types genParamStackStoreType(LclVarDsc* dsc, const ABIPassingSegment& seg); + void genSpillOrAddRegisterParam(class RegGraph* graph, unsigned lclNum); void genEnregisterIncomingStackArgs(); #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) void genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed); diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 24f5a922f18878..8db766515df23c 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2832,1389 +2832,450 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ -/***************************************************************************** - * - * Generates code for moving incoming register arguments to their - * assigned location, in the function prolog. - */ - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function -#endif - -#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) -void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState) -{ -#ifdef DEBUG - if (verbose) - { - printf("*************** In genFnPrologCalleeRegArgs() for %s regs\n", regState->rsIsFloat ? "float" : "int"); - } -#endif - - unsigned argMax; // maximum argNum value plus 1, (including the RetBuffArg) - unsigned argNum; // current argNum, always in [0..argMax-1] - unsigned fixedRetBufIndex; // argNum value used by the fixed return buffer argument (ARM64) - unsigned regArgNum; // index into the regArgTab[] table - regMaskTP regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; - bool doingFloat = regState->rsIsFloat; - - // We should be generating the prolog block when we are called - assert(compiler->compGeneratingProlog); - - // We expect to have some registers of the type we are doing, that are LiveIn, otherwise we don't need to be called. - noway_assert(regArgMaskLive != 0); - - // If a method has 3 args (and no fixed return buffer) then argMax is 3 and valid indexes are 0,1,2 - // If a method has a fixed return buffer (on ARM64) then argMax gets set to 9 and valid index are 0-8 - // - // The regArgTab can always have unused entries, - // for example if an architecture always increments the arg register number but uses either - // an integer register or a floating point register to hold the next argument - // then with a mix of float and integer args you could have: - // - // sampleMethod(int i, float x, int j, float y, int k, float z); - // r0, r2 and r4 as valid integer arguments with argMax as 5 - // and f1, f3 and f5 and valid floating point arguments with argMax as 6 - // The first one is doingFloat==false and the second one is doingFloat==true - // - // If a fixed return buffer (in r8) was also present then the first one would become: - // r0, r2, r4 and r8 as valid integer arguments with argMax as 9 - // - - argMax = regState->rsCalleeRegArgCount; - fixedRetBufIndex = (unsigned)-1; // Invalid value - - // If necessary we will select a correct xtraReg for circular floating point args later. - if (doingFloat) - { - xtraReg = REG_NA; - noway_assert(argMax <= MAX_FLOAT_REG_ARG); - } - else // we are doing the integer registers - { - noway_assert(argMax <= MAX_REG_ARG); - if (hasFixedRetBuffReg(compiler->info.compCallConv)) - { - fixedRetBufIndex = theFixedRetBuffArgNum(compiler->info.compCallConv); - // We have an additional integer register argument when hasFixedRetBuffReg() is true - argMax = fixedRetBufIndex + 1; - assert(argMax == (MAX_REG_ARG + 1)); - } - } - - // - // Construct a table with the register arguments, for detecting circular and - // non-circular dependencies between the register arguments. A dependency is when - // an argument register Rn needs to be moved to register Rm that is also an argument - // register. The table is constructed in the order the arguments are passed in - // registers: the first register argument is in regArgTab[0], the second in - // regArgTab[1], etc. Note that on ARM, a TYP_DOUBLE takes two entries, starting - // at an even index. The regArgTab is indexed from 0 to argMax - 1. - // Note that due to an extra argument register for ARM64 (i.e theFixedRetBuffReg()) - // we have increased the allocated size of the regArgTab[] by one. - // - struct regArgElem - { - unsigned varNum; // index into compiler->lvaTable[] for this register argument - var_types type; // the Jit type of this regArgTab entry - unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register. - // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to - // argument register number 'x'. Only used when circular = true. - char slot; // 0 means the register is not used for a register argument - // 1 means the first part of a register argument - // 2, 3 or 4 means the second,third or fourth part of a multireg argument - bool stackArg; // true if the argument gets homed to the stack - bool writeThru; // true if the argument gets homed to both stack and register - bool processed; // true after we've processed the argument (and it is in its final location) - bool circular; // true if this register participates in a circular dependency loop. - bool hfaConflict; // arg is part of an HFA that will end up in the same register - // but in a different slot (eg arg in s3 = v3.s[0], needs to end up in v3.s[3]) - } regArgTab[max(MAX_REG_ARG + 1, MAX_FLOAT_REG_ARG)] = {}; - - unsigned varNum; - LclVarDsc* varDsc; - - for (varNum = 0; varNum < compiler->lvaCount; ++varNum) - { - varDsc = compiler->lvaGetDesc(varNum); - - // Is this variable a register arg? - if (!varDsc->lvIsParam) - { - continue; - } - - if (!varDsc->lvIsRegArg) - { - continue; - } - - // When we have a promoted struct we have two possible LclVars that can represent the incoming argument - // in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField. - // We will use the lvStructField if we have a TYPE_INDEPENDENT promoted struct field otherwise - // use the original TYP_STRUCT argument. - // - if (varDsc->lvPromoted || varDsc->lvIsStructField) - { - LclVarDsc* parentVarDsc = varDsc; - if (varDsc->lvIsStructField) - { - assert(!varDsc->lvPromoted); - parentVarDsc = compiler->lvaGetDesc(varDsc->lvParentLcl); - } - - Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(parentVarDsc); - - if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) - { - // For register arguments that are independent promoted structs we put the promoted field varNum in the - // regArgTab[] - if (varDsc->lvPromoted) - { - continue; - } - } - else - { - // For register arguments that are not independent promoted structs we put the parent struct varNum in - // the regArgTab[] - if (varDsc->lvIsStructField) - { - continue; - } - } - } - -#ifdef SWIFT_SUPPORT - // The Swift self parameter is passed in a callee save register and is - // not part of the arg register order that this function relies on to - // handle conflicts. For this reason we always mark it as DNER and - // handle it outside the normal register arguments. - // TODO-CQ: Fix this. - if (varNum == compiler->lvaSwiftSelfArg) - { - continue; - } -#endif - - var_types regType = compiler->mangleVarArgsType(varDsc->TypeGet()); - // Change regType to the HFA type when we have a HFA argument - if (varDsc->lvIsHfaRegArg()) - { - if (TargetOS::IsWindows && TargetArchitecture::IsArm64 && compiler->info.compIsVarArgs) - { - assert(!"Illegal incoming HFA arg encountered in Vararg method."); - } - regType = varDsc->GetHfaType(); - } - -#if defined(UNIX_AMD64_ABI) - if (!varTypeIsStruct(regType)) -#endif // defined(UNIX_AMD64_ABI) - { - bool isFloatReg = emitter::isFloatReg(varDsc->GetArgReg()); - - if (isFloatReg != doingFloat) - { - // A struct might be passed partially in XMM register for System V calls. - // So a single arg might use both register files. - continue; - } - else if (isFloatReg != varTypeUsesFloatArgReg(regType)) - { - if (regType == TYP_FLOAT) - { - regType = TYP_INT; - } - else - { - assert(regType == TYP_DOUBLE); - regType = TYP_LONG; - } - } - } - - int slots = 0; - -#if defined(UNIX_AMD64_ABI) - if (varTypeIsStruct(varDsc)) - { - CORINFO_CLASS_HANDLE typeHnd; - if (varDsc->lvIsStructField) - { - // The only case we currently permit is a wrapped SIMD field, - // where we won't have the class handle available, so get it - // from the parent struct -- they will agree on ABI details. - LclVarDsc* parentDsc = compiler->lvaGetDesc(varDsc->lvParentLcl); - assert(varTypeIsSIMD(varDsc) && (parentDsc->lvFieldCnt == 1)); - typeHnd = parentDsc->GetLayout()->GetClassHandle(); - } - else - { - typeHnd = varDsc->GetLayout()->GetClassHandle(); - } - assert(typeHnd != nullptr); - SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; - compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); - if (!structDesc.passedInRegisters) - { - // The var is not passed in registers. - continue; - } - - unsigned firstRegSlot = 0; - for (unsigned slotCounter = 0; slotCounter < structDesc.eightByteCount; slotCounter++) - { - regNumber regNum = varDsc->lvRegNumForSlot(slotCounter); - var_types slotRegType; - -#ifdef FEATURE_SIMD - // Assumption 1: - // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off - // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for - // reading and writing purposes. Hence while homing a Vector3 type arg on stack we should - // home entire 16-bytes so that the upper-most 4-bytes will be zeroed when written to stack. - // - // Assumption 2: - // RyuJit backend is making another implicit assumption that Vector3 type args when passed in - // registers or on stack, the upper most 4-bytes will be zero. - // - // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee - // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is - // invalid. - // - // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12 - // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and - // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason, - // there is no need to clear upper 4-bytes of Vector3 type args. - // - // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16. - // Vector3 return values are returned two return registers and Caller assembles them into a - // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3 - // type args in prolog and Vector3 type return value of a call - - if (varDsc->lvType == TYP_SIMD12) - { - slotRegType = TYP_DOUBLE; - } - else -#endif - { - slotRegType = compiler->GetEightByteType(structDesc, slotCounter); - } - - regArgNum = genMapRegNumToRegArgNum(regNum, slotRegType, compiler->info.compCallConv); - - if ((!doingFloat && (structDesc.IsIntegralSlot(slotCounter))) || - (doingFloat && (structDesc.IsSseSlot(slotCounter)))) - { - // Store the reg for the first slot. - if (slots == 0) - { - firstRegSlot = regArgNum; - } - - // Bingo - add it to our table - noway_assert(regArgNum < argMax); - noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better - // not be multiple vars representing this argument - // register) - regArgTab[regArgNum].varNum = varNum; - regArgTab[regArgNum].slot = (char)(slotCounter + 1); - regArgTab[regArgNum].type = slotRegType; - slots++; - } - } - - if (slots == 0) - { - continue; // Nothing to do for this regState set. - } - - regArgNum = firstRegSlot; - } - else -#endif // defined(UNIX_AMD64_ABI) - { - // Bingo - add it to our table - regArgNum = genMapRegNumToRegArgNum(varDsc->GetArgReg(), regType, compiler->info.compCallConv); - slots = 1; - - if (TargetArchitecture::IsArm32 || - (TargetOS::IsWindows && TargetArchitecture::IsArm64 && compiler->info.compIsVarArgs)) - { - int lclSize = compiler->lvaLclSize(varNum); - if (lclSize > REGSIZE_BYTES) - { - slots = lclSize / REGSIZE_BYTES; - } - } -#if FEATURE_MULTIREG_ARGS - else if (varDsc->lvIsMultiRegArg) - { - if (varDsc->lvIsHfaRegArg()) - { - // We have an HFA argument, set slots to the number of registers used - slots = varDsc->lvHfaSlots(); - } - else - { - // Currently all non-HFA multireg structs are two registers in size (i.e. two slots) - assert(varDsc->lvSize() == (2 * TARGET_POINTER_SIZE)); - // We have a non-HFA multireg argument, set slots to two - slots = 2; - } - } -#endif // FEATURE_MULTIREG_ARGS - - // Handle args split between registers and stack. The arm64 fixed ret buf arg is never split. - if (compFeatureArgSplit() && (fixedRetBufIndex != regArgNum)) - { - unsigned maxRegArgNum = doingFloat ? MAX_FLOAT_REG_ARG : MAX_REG_ARG; - if (regArgNum + slots > maxRegArgNum) - { - JITDUMP("Splitting V%02u: %u registers, %u stack slots\n", varNum, maxRegArgNum - regArgNum, - regArgNum + slots - maxRegArgNum); - slots = maxRegArgNum - regArgNum; - } - } - - // Note that regArgNum + 1 represents an argument index not an actual argument register; - // see genMapRegArgNumToRegNum(). - - for (int i = 0; i < slots; i++) - { - noway_assert((regArgNum + i) < argMax); - - // We better not have added it already (there better not be multiple vars representing this argument - // register) - noway_assert(regArgTab[regArgNum + i].slot == 0); - - regArgTab[regArgNum + i].varNum = varNum; - regArgTab[regArgNum + i].slot = static_cast(i + 1); - - regArgTab[regArgNum + i].type = regType; // Set the register type. - } - } - - for (int i = 0; i < slots; i++) - { - regType = regArgTab[regArgNum + i].type; - regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType, compiler->info.compCallConv); - -#if !defined(UNIX_AMD64_ABI) - assert((i > 0) || (regNum == varDsc->GetArgReg())); -#endif // defined(UNIX_AMD64_ABI) - - // Is the arg dead on entry to the method ? - - if ((regArgMaskLive & genRegMask(regNum)) == 0) - { - if (varDsc->lvTrackedNonStruct()) - { - // We may now see some tracked locals with zero refs. - // See Lowering::DoPhase. Tolerate these. - if (varDsc->lvRefCnt() > 0) - { - noway_assert(!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex)); - } - } - else - { -#ifdef TARGET_X86 - noway_assert(varDsc->lvType == TYP_STRUCT); -#else // !TARGET_X86 - // For LSRA, it may not be in regArgMaskLive if it has a zero - // refcnt. This is in contrast with the non-LSRA case in which all - // non-tracked args are assumed live on entry. - noway_assert((varDsc->lvRefCnt() == 0) || (varDsc->lvType == TYP_STRUCT) || - (varDsc->IsAddressExposed() && compiler->info.compIsVarArgs) || - (varDsc->IsAddressExposed() && compiler->opts.compUseSoftFP)); -#endif // !TARGET_X86 - } - // Mark it as processed and be done with it - regArgTab[regArgNum + i].processed = true; - goto NON_DEP; - } - -#ifdef TARGET_ARM - // On the ARM when the varDsc is a struct arg (or pre-spilled due to varargs) the initReg/xtraReg - // could be equal to GetArgReg(). The pre-spilled registers are also not considered live either since - // they've already been spilled. - // - if ((regSet.rsMaskPreSpillRegs(false) & genRegMask(regNum)) == 0) -#endif // TARGET_ARM - { -#if !defined(UNIX_AMD64_ABI) - noway_assert(xtraReg != (varDsc->GetArgReg() + i)); -#endif - noway_assert(regArgMaskLive & genRegMask(regNum)); - } - - regArgTab[regArgNum + i].processed = false; - regArgTab[regArgNum + i].writeThru = (varDsc->lvIsInReg() && varDsc->lvLiveInOutOfHndlr); - - /* mark stack arguments since we will take care of those first */ - regArgTab[regArgNum + i].stackArg = varDsc->lvIsInReg() ? false : true; - - /* If it goes on the stack or in a register that doesn't hold - * an argument anymore -> CANNOT form a circular dependency */ - - if (varDsc->lvIsInReg() && (genRegMask(regNum) & regArgMaskLive)) - { - /* will trash another argument -> possible dependency - * We may need several passes after the table is constructed - * to decide on that */ - - /* Maybe the argument stays in the register (IDEAL) */ - - if ((i == 0) && (varDsc->GetRegNum() == regNum)) - { - goto NON_DEP; - } - -#if !defined(TARGET_64BIT) - if ((i == 1) && varTypeIsStruct(varDsc) && (varDsc->GetOtherReg() == regNum)) - { - goto NON_DEP; - } - if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_LONG) && (varDsc->GetOtherReg() == regNum)) - { - goto NON_DEP; - } - - if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) && - (REG_NEXT(varDsc->GetRegNum()) == regNum)) - { - goto NON_DEP; - } -#endif // !defined(TARGET_64BIT) - regArgTab[regArgNum + i].circular = true; - } - else - { - NON_DEP: - regArgTab[regArgNum + i].circular = false; - - /* mark the argument register as free */ - regArgMaskLive &= ~genRegMask(regNum); - } - } - } - - /* Find the circular dependencies for the argument registers, if any. - * A circular dependency is a set of registers R1, R2, ..., Rn - * such that R1->R2 (that is, R1 needs to be moved to R2), R2->R3, ..., Rn->R1 */ - - bool change = true; - bool hasHfaConflict = false; - if (regArgMaskLive) - { - /* Possible circular dependencies still exist; the previous pass was not enough - * to filter them out. Use a "sieve" strategy to find all circular dependencies. */ - - while (change) - { - change = false; - - for (argNum = 0; argNum < argMax; argNum++) - { - // If we already marked the argument as non-circular then continue - - if (!regArgTab[argNum].circular) - { - continue; - } - - if (regArgTab[argNum].slot == 0) // Not a register argument - { - continue; - } - - varNum = regArgTab[argNum].varNum; - varDsc = compiler->lvaGetDesc(varNum); - const var_types varRegType = varDsc->GetRegisterType(); - noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg); - - /* cannot possibly have stack arguments */ - noway_assert(varDsc->lvIsInReg()); - noway_assert(!regArgTab[argNum].stackArg); - - var_types regType = regArgTab[argNum].type; - regNumber regNum = genMapRegArgNumToRegNum(argNum, regType, compiler->info.compCallConv); - - regNumber destRegNum = REG_NA; - if (varTypeIsPromotable(varDsc) && - (compiler->lvaGetPromotionType(varDsc) == Compiler::PROMOTION_TYPE_INDEPENDENT)) - { - assert(regArgTab[argNum].slot <= varDsc->lvFieldCnt); - LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(varDsc->lvFieldLclStart + regArgTab[argNum].slot - 1); - destRegNum = fieldVarDsc->GetRegNum(); - } - else if (regArgTab[argNum].slot == 1) - { - destRegNum = varDsc->GetRegNum(); - } -#if defined(TARGET_ARM64) && defined(FEATURE_SIMD) - else if (varDsc->lvIsHfa()) - { - // This must be a SIMD type that's fully enregistered, but is passed as an HFA. - // Each field will be inserted into the same destination register. - // - assert(varTypeIsSIMD(varDsc)); - assert(regArgTab[argNum].slot <= (int)varDsc->lvHfaSlots()); - assert(argNum > 0); - assert(regArgTab[argNum - 1].varNum == varNum); - - // If the field is passed in the same register as the destination, - // but is in the wrong part of the register, mark it specially so later - // we make sure to move it to the right spot before "freeing" the destination. - // - destRegNum = varDsc->GetRegNum(); - if (regNum == destRegNum) - { - // We only get here if the HFA part is not already in the right slot in - // the destination. That is, it is not slot-1. - // - const int slot = regArgTab[argNum].slot; - assert(slot != 1); - JITDUMP("HFA conflict; arg num %u needs to move from %s[%u] to %s[%u]\n", argNum, - getRegName(regNum), 0, getRegName(destRegNum), slot - 1); - regArgTab[argNum].hfaConflict = true; - - // We'll need to do a special pass later to resolve these - // - hasHfaConflict = true; - } - regArgMaskLive &= ~genRegMask(regNum); - regArgTab[argNum].circular = false; - change = true; - continue; - } -#elif defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD) - else - { - assert(regArgTab[argNum].slot == 2); - assert(argNum > 0); - assert(regArgTab[argNum - 1].slot == 1); - assert(regArgTab[argNum - 1].varNum == varNum); - assert((varRegType == TYP_SIMD12) || (varRegType == TYP_SIMD16)); - regArgMaskLive &= ~genRegMask(regNum); - regArgTab[argNum].circular = false; - change = true; - continue; - } -#endif // defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD) -#if !defined(TARGET_64BIT) - else if (regArgTab[argNum].slot == 2 && genActualType(varDsc->TypeGet()) == TYP_LONG) - { - destRegNum = varDsc->GetOtherReg(); - } - else - { - assert(regArgTab[argNum].slot == 2); - assert(varDsc->TypeGet() == TYP_DOUBLE); - destRegNum = REG_NEXT(varDsc->GetRegNum()); - } -#endif // !defined(TARGET_64BIT) - noway_assert(destRegNum != REG_NA); - if (genRegMask(destRegNum) & regArgMaskLive) - { - /* we are trashing a live argument register - record it */ - unsigned destRegArgNum = genMapRegNumToRegArgNum(destRegNum, regType, compiler->info.compCallConv); - noway_assert(destRegArgNum < argMax); - regArgTab[destRegArgNum].trashBy = argNum; - } - else - { - /* argument goes to a free register */ - regArgTab[argNum].circular = false; - change = true; - - /* mark the argument register as free */ - regArgMaskLive &= ~genRegMask(regNum); - } - } - } - } - - /* At this point, everything that has the "circular" flag - * set to "true" forms a circular dependency */ - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef DEBUG - if (regArgMaskLive) - { - if (verbose) - { - printf("Circular dependencies found while home-ing the incoming arguments.\n"); - } - } -#endif - - // LSRA allocates registers to incoming parameters in order and will not overwrite - // a register still holding a live parameter. - - noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) && - "Homing of float argument registers with circular dependencies not implemented."); - - // Now move the arguments to their locations. - // First consider ones that go on the stack since they may free some registers. - // Also home writeThru args, since they're also homed to the stack. - - regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start - for (argNum = 0; argNum < argMax; argNum++) - { - emitAttr size; - -#if defined(UNIX_AMD64_ABI) - // If this is the wrong register file, just continue. - if (regArgTab[argNum].type == TYP_UNDEF) - { - // This could happen if the reg in regArgTab[argNum] is of the other register file - - // for System V register passed structs where the first reg is GPR and the second an XMM reg. - // The next register file processing will process it. - continue; - } -#endif // defined(UNIX_AMD64_ABI) - - // If the arg is dead on entry to the method, skip it - - if (regArgTab[argNum].processed) - { - continue; - } - - if (regArgTab[argNum].slot == 0) // Not a register argument - { - continue; - } - - varNum = regArgTab[argNum].varNum; - varDsc = compiler->lvaGetDesc(varNum); - -#ifndef TARGET_64BIT - // If this arg is never on the stack, go to the next one. - if (varDsc->lvType == TYP_LONG) - { - if (regArgTab[argNum].slot == 1 && !regArgTab[argNum].stackArg && !regArgTab[argNum].writeThru) - { - continue; - } - else if (varDsc->GetOtherReg() != REG_STK) - { - continue; - } - } - else -#endif // !TARGET_64BIT - { - // If this arg is never on the stack, go to the next one. - if (!regArgTab[argNum].stackArg && !regArgTab[argNum].writeThru) - { - continue; - } - } - -#if defined(TARGET_ARM) - if (varDsc->lvType == TYP_DOUBLE) - { - if (regArgTab[argNum].slot == 2) - { - // We handled the entire double when processing the first half (slot == 1) - continue; - } - } -#endif - - noway_assert(regArgTab[argNum].circular == false); - - noway_assert(varDsc->lvIsParam); - noway_assert(varDsc->lvIsRegArg); - noway_assert(varDsc->lvIsInReg() == false || varDsc->lvLiveInOutOfHndlr || - (varDsc->lvType == TYP_LONG && varDsc->GetOtherReg() == REG_STK && regArgTab[argNum].slot == 2)); - - var_types storeType = TYP_UNDEF; - unsigned slotSize = TARGET_POINTER_SIZE; - - if (varTypeIsStruct(varDsc)) - { - storeType = TYP_I_IMPL; // Default store type for a struct type is a pointer sized integer -#if FEATURE_MULTIREG_ARGS - // Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers - noway_assert(varDsc->lvSize() <= MAX_PASS_MULTIREG_BYTES); -#endif // FEATURE_MULTIREG_ARGS -#ifdef UNIX_AMD64_ABI - storeType = regArgTab[argNum].type; -#endif // !UNIX_AMD64_ABI - if (varDsc->lvIsHfaRegArg()) - { -#ifdef TARGET_ARM - // On ARM32 the storeType for HFA args is always TYP_FLOAT - storeType = TYP_FLOAT; - slotSize = (unsigned)emitActualTypeSize(storeType); -#else // TARGET_ARM64 - storeType = genActualType(varDsc->GetHfaType()); - slotSize = (unsigned)emitActualTypeSize(storeType); -#endif // TARGET_ARM64 - } - } - else // Not a struct type - { - storeType = genActualType(regArgTab[argNum].type); - } - size = emitActualTypeSize(storeType); -#ifdef TARGET_X86 - noway_assert(genTypeSize(storeType) == TARGET_POINTER_SIZE); -#endif // TARGET_X86 - - regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType, compiler->info.compCallConv); - - // Stack argument - if the ref count is 0 don't care about it - - if (!varDsc->lvOnFrame) - { - noway_assert(varDsc->lvRefCnt() == 0); - } - else - { - // Since slot is typically 1, baseOffset is typically 0 - int baseOffset = (regArgTab[argNum].slot - 1) * slotSize; - - GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset); - -#ifndef UNIX_AMD64_ABI - // Check if we are writing past the end of the struct - if (varTypeIsStruct(varDsc)) - { - assert(varDsc->lvSize() >= baseOffset + (unsigned)size); - } -#endif // !UNIX_AMD64_ABI - } - - // Mark the argument as processed, and set it as no longer live in srcRegNum, - // unless it is a writeThru var, in which case we home it to the stack, but - // don't mark it as processed until below. - if (!regArgTab[argNum].writeThru) - { - regArgTab[argNum].processed = true; - regArgMaskLive &= ~genRegMask(srcRegNum); - } - -#if defined(TARGET_ARM) - if ((storeType == TYP_DOUBLE) && !regArgTab[argNum].writeThru) - { - regArgTab[argNum + 1].processed = true; - regArgMaskLive &= ~genRegMask(REG_NEXT(srcRegNum)); - } -#endif - } - - /* Process any circular dependencies */ - if (regArgMaskLive) - { - unsigned begReg, destReg, srcReg; - unsigned varNumDest, varNumSrc; - LclVarDsc* varDscDest; - LclVarDsc* varDscSrc; - instruction insCopy = INS_mov; - - if (doingFloat) - { -#ifndef UNIX_AMD64_ABI - if (GlobalJitOptions::compFeatureHfa) -#endif // !UNIX_AMD64_ABI - { - insCopy = ins_Copy(TYP_DOUBLE); - // Compute xtraReg here when we have a float argument - assert(xtraReg == REG_NA); - - regMaskTP fpAvailMask; - - fpAvailMask = RBM_FLT_CALLEE_TRASH & ~regArgMaskLive; - if (GlobalJitOptions::compFeatureHfa) - { - fpAvailMask &= RBM_ALLDOUBLE; - } - - if (fpAvailMask == RBM_NONE) - { - fpAvailMask = RBM_ALLFLOAT & ~regArgMaskLive; - if (GlobalJitOptions::compFeatureHfa) - { - fpAvailMask &= RBM_ALLDOUBLE; - } - } - - assert(fpAvailMask != RBM_NONE); - - // We pick the lowest avail register number - regMaskTP tempMask = genFindLowestBit(fpAvailMask); - xtraReg = genRegNumFromMask(tempMask); - } -#if defined(TARGET_X86) - // This case shouldn't occur on x86 since NYI gets converted to an assert - NYI("Homing circular FP registers via xtraReg"); -#endif - } - - for (argNum = 0; argNum < argMax; argNum++) - { - // If not a circular dependency then continue - if (!regArgTab[argNum].circular) - { - continue; - } - - // If already processed the dependency then continue - - if (regArgTab[argNum].processed) - { - continue; - } - - if (regArgTab[argNum].slot == 0) // Not a register argument - { - continue; - } - - destReg = begReg = argNum; - srcReg = regArgTab[argNum].trashBy; - - varNumDest = regArgTab[destReg].varNum; - varDscDest = compiler->lvaGetDesc(varNumDest); - noway_assert(varDscDest->lvIsParam && varDscDest->lvIsRegArg); - - noway_assert(srcReg < argMax); - varNumSrc = regArgTab[srcReg].varNum; - varDscSrc = compiler->lvaGetDesc(varNumSrc); - noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg); - - emitAttr size = EA_PTRSIZE; - -#ifdef TARGET_XARCH - // - // The following code relies upon the target architecture having an - // 'xchg' instruction which directly swaps the values held in two registers. - // On the ARM architecture we do not have such an instruction. - // - if (destReg == regArgTab[srcReg].trashBy) - { - /* only 2 registers form the circular dependency - use "xchg" */ - - varNum = regArgTab[argNum].varNum; - varDsc = compiler->lvaGetDesc(varNum); - noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg); - - noway_assert(genTypeSize(genActualType(varDscSrc->TypeGet())) <= REGSIZE_BYTES); - - /* Set "size" to indicate GC if one and only one of - * the operands is a pointer - * RATIONALE: If both are pointers, nothing changes in - * the GC pointer tracking. If only one is a pointer we - * have to "swap" the registers in the GC reg pointer mask - */ - - if (varTypeIsGC(varDscSrc) != varTypeIsGC(varDscDest)) - { - size = EA_GCREF; - } - - noway_assert(varDscDest->GetArgReg() == varDscSrc->GetRegNum()); - - GetEmitter()->emitIns_R_R(INS_xchg, size, varDscSrc->GetRegNum(), varDscSrc->GetArgReg()); - regSet.verifyRegUsed(varDscSrc->GetRegNum()); - regSet.verifyRegUsed(varDscSrc->GetArgReg()); - - /* mark both arguments as processed */ - regArgTab[destReg].processed = true; - regArgTab[srcReg].processed = true; - - regArgMaskLive &= ~genRegMask(varDscSrc->GetArgReg()); - regArgMaskLive &= ~genRegMask(varDscDest->GetArgReg()); - } - else -#endif // TARGET_XARCH - { - var_types destMemType = varDscDest->TypeGet(); - -#ifdef TARGET_ARM - bool cycleAllDouble = true; // assume the best - - unsigned iter = begReg; - do - { - if (compiler->lvaGetDesc(regArgTab[iter].varNum)->TypeGet() != TYP_DOUBLE) - { - cycleAllDouble = false; - break; - } - iter = regArgTab[iter].trashBy; - } while (iter != begReg); - - // We may treat doubles as floats for ARM because we could have partial circular - // dependencies of a float with a lo/hi part of the double. We mark the - // trashBy values for each slot of the double, so let the circular dependency - // logic work its way out for floats rather than doubles. If a cycle has all - // doubles, then optimize so that instead of two vmov.f32's to move a double, - // we can use one vmov.f64. - // - if (!cycleAllDouble && destMemType == TYP_DOUBLE) - { - destMemType = TYP_FLOAT; - } -#endif // TARGET_ARM - - if (destMemType == TYP_REF) - { - size = EA_GCREF; - } - else if (destMemType == TYP_BYREF) - { - size = EA_BYREF; - } - else if (destMemType == TYP_DOUBLE) - { - size = EA_8BYTE; - } - else if (destMemType == TYP_FLOAT) - { - size = EA_4BYTE; - } - // HVA types...? - - /* move the dest reg (begReg) in the extra reg */ - - assert(xtraReg != REG_NA); - - regNumber begRegNum = genMapRegArgNumToRegNum(begReg, destMemType, compiler->info.compCallConv); - GetEmitter()->emitIns_Mov(insCopy, size, xtraReg, begRegNum, /* canSkip */ false); - assert(!genIsValidIntReg(xtraReg) || !genIsValidFloatReg(begRegNum)); - - regSet.verifyRegUsed(xtraReg); - - *pXtraRegClobbered = true; - /* start moving everything to its right place */ +#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) +struct RegNode; - while (srcReg != begReg) - { - /* mov dest, src */ +struct RegNodeEdge +{ + RegNodeEdge* nextOutgoing; + RegNodeEdge* nextIncoming; + RegNode* from; + RegNode* to; + unsigned destOffset; + var_types type; +}; + +struct RegNode +{ + regNumber reg; + regNumber copiedReg; + var_types type; + RegNodeEdge* incoming; + RegNodeEdge* outgoing; + RegNode* next; +#ifdef DEBUG + bool wrote = false; +#endif +}; - regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType, compiler->info.compCallConv); - regNumber srcRegNum = genMapRegArgNumToRegNum(srcReg, destMemType, compiler->info.compCallConv); +class RegGraph +{ + Compiler* m_comp; + ArrayStack m_nodes; - GetEmitter()->emitIns_Mov(insCopy, size, destRegNum, srcRegNum, /* canSkip */ false); - assert(!genIsValidIntReg(destRegNum) || !genIsValidFloatReg(srcRegNum)); +public: + RegGraph(Compiler* compiler) : m_comp(compiler), m_nodes(compiler->getAllocator(CMK_Codegen)) + { + } - regSet.verifyRegUsed(destRegNum); + unsigned NumEdges = 0; - /* mark 'src' as processed */ - noway_assert(srcReg < argMax); - regArgTab[srcReg].processed = true; -#ifdef TARGET_ARM - if (size == EA_8BYTE) - regArgTab[srcReg + 1].processed = true; -#endif - regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType); + RegNode* GetOrAdd(regNumber reg, var_types type) + { + assert(type != TYP_STRUCT); - /* move to the next pair */ - destReg = srcReg; - srcReg = regArgTab[srcReg].trashBy; + for (int i = 0; i < m_nodes.Height(); i++) + { + RegNode* node = m_nodes.Bottom(i); + if (node->reg == reg) + { + return node; + } - varDscDest = varDscSrc; - destMemType = varDscDest->TypeGet(); #ifdef TARGET_ARM - if (!cycleAllDouble && destMemType == TYP_DOUBLE) - { - destMemType = TYP_FLOAT; - } + if ((node->type == TYP_DOUBLE) && (reg == REG_NEXT(node->reg))) + { + return node; + } #endif - varNumSrc = regArgTab[srcReg].varNum; - varDscSrc = compiler->lvaGetDesc(varNumSrc); - noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg); - - if (destMemType == TYP_REF) - { - size = EA_GCREF; - } - else if (destMemType == TYP_DOUBLE) - { - size = EA_8BYTE; - } - else - { - size = EA_4BYTE; - } - } + } - /* take care of the beginning register */ + RegNode* node = new (m_comp, CMK_Codegen) RegNode; + node->reg = reg; + node->copiedReg = REG_NA; + node->type = type; + node->incoming = nullptr; + node->outgoing = nullptr; - noway_assert(srcReg == begReg); + m_nodes.Push(node); + return node; + } - /* move the dest reg (begReg) in the extra reg */ + void AddEdge(RegNode* from, RegNode* to, var_types type, unsigned destOffset) + { + assert(type != TYP_STRUCT); + RegNodeEdge* edge = new (m_comp, CMK_Codegen) RegNodeEdge; + edge->from = from; + edge->to = to; + edge->type = type; + edge->destOffset = destOffset; - regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType, compiler->info.compCallConv); + edge->nextOutgoing = from->outgoing; + from->outgoing = edge; - GetEmitter()->emitIns_Mov(insCopy, size, destRegNum, xtraReg, /* canSkip */ false); - assert(!genIsValidIntReg(destRegNum) || !genIsValidFloatReg(xtraReg)); + edge->nextIncoming = to->incoming; + to->incoming = edge; - regSet.verifyRegUsed(destRegNum); - /* mark the beginning register as processed */ + NumEdges++; + } - regArgTab[srcReg].processed = true; -#ifdef TARGET_ARM - if (size == EA_8BYTE) - regArgTab[srcReg + 1].processed = true; -#endif - regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType); + RegNodeEdge* FindEdgeToHandle() + { + RegNodeEdge* lastEdge = nullptr; + for (int i = 0; i < m_nodes.Height(); i++) + { + RegNode* reg = m_nodes.Bottom(i); + for (RegNodeEdge* edge = reg->outgoing; edge != nullptr; edge = edge->nextOutgoing) + { + lastEdge = edge; + // If going to a register without any conflicts then it's easy. + if (edge->to->outgoing == nullptr) + { + return edge; + } } } + + return lastEdge; } -#if defined(TARGET_ARM64) && defined(FEATURE_SIMD) - // If we saw any hfa conflicts, handle those now. - // - if (hasHfaConflict) + void RemoveEdge(RegNodeEdge* edge) { - // Up above we noticed that there was at least one non-slot-1 HFA arg whose - // destination register was the same as the arg register. - // - // For example, say an HFA was passed as s0-s3 and the destination was v3. - // s3 is in the right register, but not in the right slot in the register. - // - // We handle this by first moving the conflicting part to the right slot - // in the destination (via pass 0 below), and then moving the remaining parts - // to their respective slots (via pass 1). - // - // Note the slot index in the register is one less than value of - // regArgTab[argNum].slot, so a slot-1 hfa arg goes into slot 0 of the destination). - // - // So for the above example, we'd first move the "slot-4" s3 (== v3.s[0]) to v3.s[3]. - // Then we can insert s0 to v3.s[0]) and so on. - // - // We can exempt slot-1 cases as the conflicting part is already in the - // right slot, and code lower down correctly handles populating the remaining slots. - // - for (argNum = 0; argNum < argMax; argNum++) + RegNodeEdge** slot = &edge->from->outgoing; + while ((*slot) != edge) { - if (!regArgTab[argNum].hfaConflict) - { - continue; - } + slot = &(*slot)->nextOutgoing; + } - varNum = regArgTab[argNum].varNum; - varDsc = compiler->lvaGetDesc(varNum); - const regNumber destRegNum = varDsc->GetRegNum(); - const var_types regType = regArgTab[argNum].type; - const unsigned firstArgNum = argNum - (regArgTab[argNum].slot - 1); - const unsigned lastArgNum = firstArgNum + varDsc->lvHfaSlots() - 1; + *slot = edge->nextOutgoing; - assert(varDsc->lvIsHfa()); - assert((argNum >= firstArgNum) && (argNum <= lastArgNum)); - assert(destRegNum == genMapRegArgNumToRegNum(argNum, regType, compiler->info.compCallConv)); + slot = &edge->to->incoming; + while ((*slot) != edge) + { + slot = &(*slot)->nextIncoming; + } - // Pass 0: move the conflicting part; Pass1: insert everything else - // - for (int pass = 0; pass <= 1; pass++) + *slot = edge->nextIncoming; + + NumEdges--; + } + +#ifdef DEBUG + void Dump() + { + printf("%d registers in register parameter interference graph\n", m_nodes.Height()); + for (int i = 0; i < m_nodes.Height(); i++) + { + RegNode* regNode = m_nodes.Bottom(i); + printf(" %s (%s)", getRegName(regNode->reg), varTypeName(regNode->type)); + for (RegNodeEdge* incoming = regNode->incoming; incoming != nullptr; incoming = incoming->nextIncoming) { - for (unsigned currentArgNum = firstArgNum; currentArgNum <= lastArgNum; currentArgNum++) + printf("\n <- %s (%s)", getRegName(incoming->from->reg), varTypeName(incoming->from->type)); + if (incoming->type != incoming->from->type) { - const regNumber regNum = - genMapRegArgNumToRegNum(currentArgNum, regType, compiler->info.compCallConv); - bool insertArg = - ((pass == 0) && (currentArgNum == argNum)) || ((pass == 1) && (currentArgNum != argNum)); + printf(" (edge type: %s)", varTypeName(incoming->type)); + } - if (insertArg) - { - assert(!regArgTab[currentArgNum].processed); - - // EA_4BYTE is probably wrong here (and below) - // todo -- suppress self move - GetEmitter()->emitIns_R_R_I_I(INS_mov, EA_4BYTE, destRegNum, regNum, - regArgTab[currentArgNum].slot - 1, 0); - assert(!genIsValidIntReg(destRegNum) || !genIsValidFloatReg(regNum)); - regArgTab[currentArgNum].processed = true; - regArgMaskLive &= ~genRegMask(regNum); - } + if (incoming->destOffset != 0) + { + printf(" (offset: %d)", incoming->destOffset); } } + + printf("\n"); } } -#endif // defined(TARGET_ARM64) && defined(FEATURE_SIMD) +#endif +}; - /* Finally take care of the remaining arguments that must be enregistered */ - while (regArgMaskLive) - { - regMaskTP regArgMaskLiveSave = regArgMaskLive; +// ----------------------------------------------------------------------------- +// genParamStackStoreType: Get the type to use to store part of a parameter +// passed in a register to stack. +// +// Parameters: +// dsc - The parameter +// seg - The segment that is being stored to stack +// +// Return Value: +// Suitable type for the store. +// +var_types CodeGen::genParamStackStoreType(LclVarDsc* dsc, const ABIPassingSegment& seg) +{ + assert(seg.IsPassedInRegister()); - for (argNum = 0; argNum < argMax; argNum++) + switch (dsc->TypeGet()) + { + case TYP_BYREF: + case TYP_REF: + assert((seg.Offset == 0) && (seg.Size == TARGET_POINTER_SIZE)); + return dsc->TypeGet(); + case TYP_STRUCT: { - /* If already processed go to the next one */ - if (regArgTab[argNum].processed) + if (genIsValidFloatReg(seg.GetRegister())) { - continue; + return seg.GetRegisterType(); } - if (regArgTab[argNum].slot == 0) - { // Not a register argument - continue; + ClassLayout* layout = dsc->GetLayout(); + assert(seg.Offset < layout->GetSize()); + if (((seg.Offset % TARGET_POINTER_SIZE) == 0) && (seg.Size == TARGET_POINTER_SIZE)) + { + return layout->GetGCPtrType(seg.Offset / TARGET_POINTER_SIZE); } - varNum = regArgTab[argNum].varNum; - varDsc = compiler->lvaGetDesc(varNum); - const var_types regType = regArgTab[argNum].type; - const regNumber regNum = genMapRegArgNumToRegNum(argNum, regType, compiler->info.compCallConv); - const var_types varRegType = varDsc->GetRegisterType(); +#ifdef TARGET_ARM64 + // We round struct sizes up to TYP_I_IMPL on the stack frame so we can + // always store the full register here. This allows us to use stp more + // often. + return TYP_I_IMPL; +#else + return genActualType(seg.GetRegisterType()); +#endif + } + default: + { + return genActualType(seg.GetRegisterType()); + } + } +} -#if defined(UNIX_AMD64_ABI) - if (regType == TYP_UNDEF) - { - // This could happen if the reg in regArgTab[argNum] is of the other register file - - // for System V register passed structs where the first reg is GPR and the second an XMM reg. - // The next register file processing will process it. - regArgMaskLive &= ~genRegMask(regNum); - continue; - } -#endif // defined(UNIX_AMD64_ABI) +// ----------------------------------------------------------------------------- +// genSpillOrAddRegisterParam: Handle a register parameter either by homing it +// to stack immediately, or by adding it to the register graph. +// +// Parameters: +// initReg - A register that this method should communicate if it trashes +// initRegStillZeroed - [out] whether or not xtraReg is still zeroed +// +void CodeGen::genSpillOrAddRegisterParam(RegGraph* graph, unsigned lclNum) +{ + regMaskTP paramRegs = intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn; + LclVarDsc* varDsc = compiler->lvaGetDesc(lclNum); - noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg); -#ifdef TARGET_X86 - // On x86 we don't enregister args that are not pointer sized. - noway_assert(genTypeSize(varDsc->GetStackSlotHomeType()) == TARGET_POINTER_SIZE); -#endif // TARGET_X86 + unsigned baseOffset = varDsc->lvIsStructField ? varDsc->lvFldOffset : 0; + unsigned size = varDsc->lvExactSize(); - noway_assert(varDsc->lvIsInReg() && !regArgTab[argNum].circular); + unsigned paramLclNum = varDsc->lvIsStructField ? varDsc->lvParentLcl : lclNum; + LclVarDsc* paramVarDsc = compiler->lvaGetDesc(paramLclNum); + const ABIPassingInformation& abiInfo = compiler->lvaParameterPassingInfo[paramLclNum]; + for (unsigned i = 0; i < abiInfo.NumSegments; i++) + { + const ABIPassingSegment& seg = abiInfo.Segments[i]; + if (!seg.IsPassedInRegister() || ((paramRegs & genRegMask(seg.GetRegister())) == 0)) + { + continue; + } - /* Register argument - hopefully it stays in the same register */ - regNumber destRegNum = REG_NA; - var_types destMemType = varDsc->GetRegisterType(); + if (seg.Offset + seg.Size <= baseOffset) + { + continue; + } - if (regArgTab[argNum].slot == 1) - { - destRegNum = varDsc->GetRegNum(); + if (baseOffset + size <= seg.Offset) + { + continue; + } -#ifdef TARGET_ARM - if (genActualType(destMemType) == TYP_DOUBLE && regArgTab[argNum + 1].processed) - { - // The second half of the double has already been processed! Treat this as a single. - destMemType = TYP_FLOAT; - } -#endif // TARGET_ARM - } -#ifndef TARGET_64BIT - else if (regArgTab[argNum].slot == 2 && genActualType(destMemType) == TYP_LONG) + if (varDsc->lvOnFrame && (!varDsc->lvIsInReg() || varDsc->lvLiveInOutOfHndlr)) + { + var_types storeType = genParamStackStoreType(paramVarDsc, seg); + if ((varDsc->TypeGet() != TYP_STRUCT) && (genTypeSize(genActualType(varDsc)) < genTypeSize(storeType))) { - assert(genActualType(varDsc->TypeGet()) == TYP_LONG || genActualType(varDsc->TypeGet()) == TYP_DOUBLE); - if (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) - { - destRegNum = regNum; - } - else - { - destRegNum = varDsc->GetOtherReg(); - } - - assert(destRegNum != REG_STK); + // Can happen for struct fields due to padding. + storeType = genActualType(varDsc); } - else - { - assert(regArgTab[argNum].slot == 2); - assert(destMemType == TYP_DOUBLE); - - // For doubles, we move the entire double using the argNum representing - // the first half of the double. There are two things we won't do: - // (1) move the double when the 1st half of the destination is free but the - // 2nd half is occupied, and (2) move the double when the 2nd half of the - // destination is free but the 1st half is occupied. Here we consider the - // case where the first half can't be moved initially because its target is - // still busy, but the second half can be moved. We wait until the entire - // double can be moved, if possible. For example, we have F0/F1 double moving to F2/F3, - // and F2 single moving to F16. When we process F0, its target F2 is busy, - // so we skip it on the first pass. When we process F1, its target F3 is - // available. However, we want to move F0/F1 all at once, so we skip it here. - // We process F2, which frees up F2. The next pass through, we process F0 and - // F2/F3 are empty, so we move it. Note that if half of a double is involved - // in a circularity with a single, then we will have already moved that half - // above, so we go ahead and move the remaining half as a single. - // Because there are no circularities left, we are guaranteed to terminate. - - assert(argNum > 0); - assert(regArgTab[argNum - 1].slot == 1); - - if (!regArgTab[argNum - 1].processed) - { - // The first half of the double hasn't been processed; try to be processed at the same time - continue; - } - // The first half of the double has been processed but the second half hasn't! - // This could happen for double F2/F3 moving to F0/F1, and single F0 moving to F2. - // In that case, there is a F0/F2 loop that is not a double-only loop. The circular - // dependency logic above will move them as singles, leaving just F3 to move. Treat - // it as a single to finish the shuffling. + GetEmitter()->emitIns_S_R(ins_Store(storeType), emitActualTypeSize(storeType), seg.GetRegister(), lclNum, + seg.Offset - baseOffset); + } - destMemType = TYP_FLOAT; - destRegNum = REG_NEXT(varDsc->GetRegNum()); - } -#endif // !TARGET_64BIT -#if (defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64)) && defined(FEATURE_SIMD) - else - { - assert(regArgTab[argNum].slot == 2); - assert(argNum > 0); - assert(regArgTab[argNum - 1].slot == 1); - assert((varRegType == TYP_SIMD12) || (varRegType == TYP_SIMD16)); - destRegNum = varDsc->GetRegNum(); - noway_assert(regNum != destRegNum); - continue; - } -#endif // (defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64)) && defined(FEATURE_SIMD) - noway_assert(destRegNum != REG_NA); - if (destRegNum != regNum) - { - /* Cannot trash a currently live register argument. - * Skip this one until its target will be free - * which is guaranteed to happen since we have no circular dependencies. */ + if (!varDsc->lvIsInReg()) + { + continue; + } + + var_types varRegType = genActualType(varDsc->GetRegisterType()); + + var_types edgeType = varRegType; + // Some parameters can be passed in multiple registers but enregistered + // in a single one (e.g. SIMD types on arm64). In this case the edges + // we add here represent insertions of each element. + if (seg.Size < genTypeSize(edgeType)) + { + edgeType = seg.GetRegisterType(); + } + + RegNode* sourceReg = graph->GetOrAdd(seg.GetRegister(), seg.GetRegisterType()); + RegNode* destReg = graph->GetOrAdd(varDsc->GetRegNum(), varRegType); - regMaskTP destMask = genRegMask(destRegNum); #ifdef TARGET_ARM - // Don't process the double until both halves of the destination are clear. - if (genActualType(destMemType) == TYP_DOUBLE) - { - assert((destMask & RBM_ALLDOUBLE) != 0); - destMask |= genRegMask(REG_NEXT(destRegNum)); - } + if (destReg->reg != varDsc->GetRegNum()) + { + assert(varDsc->TypeGet() == TYP_FLOAT); + assert(varDsc->GetRegNum() == REG_NEXT(destReg->reg)); + graph->AddEdge(sourceReg, destReg, edgeType, 4); + continue; + } #endif - if (destMask & regArgMaskLive) - { - continue; - } + if ((sourceReg != destReg) || (baseOffset != seg.Offset)) + { + graph->AddEdge(sourceReg, destReg, edgeType, seg.Offset - baseOffset); + } + } +} + +// ----------------------------------------------------------------------------- +// genHomeIncomingRegisters: Move all register arguments to their initial assigned +// location. +// +// Parameters: +// initReg - A register that this method should communicate if it trashes +// initRegStillZeroed - [out] whether or not initReg is still zeroed +// +void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) +{ +#ifdef DEBUG + if (verbose) + { + printf("*************** In genHomeRegisterParams()\n"); + } +#endif - /* Move it to the new register */ + regMaskTP paramRegs = intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn; + if (compiler->opts.OptimizationDisabled()) + { + // All registers are going to frame + for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++) + { + LclVarDsc* lclDsc = compiler->lvaGetDesc(lclNum); - emitAttr size = emitActualTypeSize(destMemType); + if (!lclDsc->lvOnFrame) + { + continue; + } -#if defined(TARGET_ARM64) - if (varTypeIsSIMD(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2) + const ABIPassingInformation& abiInfo = compiler->lvaParameterPassingInfo[lclNum]; + for (unsigned i = 0; i < abiInfo.NumSegments; i++) + { + const ABIPassingSegment& seg = abiInfo.Segments[i]; + if (seg.IsPassedInRegister() && ((paramRegs & genRegMask(seg.GetRegister())) != 0)) { - // For a SIMD type that is passed in two integer registers, - // Limit the copy below to the first 8 bytes from the first integer register. - // Handle the remaining 8 bytes from the second slot in the code further below - assert(EA_SIZE(size) >= 8); - size = EA_8BYTE; + var_types storeType = genParamStackStoreType(lclDsc, seg); + GetEmitter()->emitIns_S_R(ins_Store(storeType), emitActualTypeSize(storeType), seg.GetRegister(), + lclNum, seg.Offset); } -#endif - inst_Mov(destMemType, destRegNum, regNum, /* canSkip */ false, size); - assert(!genIsValidIntReg(destRegNum) || !genIsValidFloatReg(regNum)); } + } + + return; + } - /* mark the argument as processed */ + // We build an interference graph where each node represents a register, + // and an edge regX -> regY represents moving (part of) register X to (part + // of) register Y. Note that in the general case each register can have + // multiple incoming edges. Examples: + // 1. On arm32 float registers overlay the double registers. If a double is passed in d0 (i.e. s0 and s1) + // we can have two float params going into s0 and s1, giving two incoming edges + // 2. On arm64/SysV x64 SIMD types can be passed in multiple registers but + // enregistered in a single vector register + // Currently we never have multiple outgoing edges but one could imagine + // this situation if we allowed promotion when fields didn't fit exactly on + // top of the underlying registers. TODO-CQ: Lift this restriction and + // implement the support for this. + RegGraph graph(compiler); - assert(!regArgTab[argNum].processed); - regArgTab[argNum].processed = true; - regArgMaskLive &= ~genRegMask(regNum); -#if FEATURE_MULTIREG_ARGS - int argRegCount = 1; #ifdef TARGET_ARM - if (genActualType(destMemType) == TYP_DOUBLE) + // Float and double registers overlap on arm32. When we have double + // parameters, we want to represent those as double nodes in the graph. Pre + // create them here to ensure a previous float parameter doesn't create an + // overlapping float node. + for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++) + { + const ABIPassingInformation& abiInfo = compiler->lvaParameterPassingInfo[lclNum]; + for (unsigned i = 0; i < abiInfo.NumSegments; i++) + { + const ABIPassingSegment& seg = abiInfo.Segments[i]; + + if (seg.IsPassedInRegister() && genIsValidFloatReg(seg.GetRegister()) && (seg.Size == 8)) { - argRegCount = 2; + graph.GetOrAdd(seg.GetRegister(), TYP_DOUBLE); } + } + } #endif -#if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD) - if (varTypeIsStruct(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2) - { - argRegCount = 2; - int nextArgNum = argNum + 1; - regNumber nextRegNum = - genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].type, compiler->info.compCallConv); - noway_assert(regArgTab[nextArgNum].varNum == varNum); - // Emit a shufpd with a 0 immediate, which preserves the 0th element of the dest reg - // and moves the 0th element of the src reg into the 1st element of the dest reg. - GetEmitter()->emitIns_R_R_I(INS_shufpd, emitActualTypeSize(varRegType), destRegNum, nextRegNum, 0); - assert(!genIsValidIntReg(destRegNum) || !genIsValidFloatReg(nextRegNum)); - // Set destRegNum to regNum so that we skip the setting of the register below, - // but mark argNum as processed and clear regNum from the live mask. - destRegNum = regNum; - } -#endif // defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD) -#ifdef TARGET_ARMARCH - if (varDsc->lvIsHfa()) - { - // This includes both fixed-size SIMD types that are independently promoted, as well - // as other HFA structs. - argRegCount = varDsc->lvHfaSlots(); - if (argNum < (argMax - argRegCount + 1)) - { - if (compiler->lvaGetPromotionType(varDsc) == Compiler::PROMOTION_TYPE_INDEPENDENT) - { - // For an HFA type that is passed in multiple registers and promoted, we copy each field to its - // destination register. - for (int i = 0; i < argRegCount; i++) - { - int nextArgNum = argNum + i; - LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(varDsc->lvFieldLclStart + i); - regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].type, - compiler->info.compCallConv); - destRegNum = fieldVarDsc->GetRegNum(); - noway_assert(regArgTab[nextArgNum].varNum == varNum); - noway_assert(genIsValidFloatReg(nextRegNum)); - noway_assert(genIsValidFloatReg(destRegNum)); - GetEmitter()->emitIns_Mov(INS_mov, EA_8BYTE, destRegNum, nextRegNum, /* canSkip */ false); - assert(!genIsValidIntReg(destRegNum) || !genIsValidFloatReg(nextRegNum)); - } - } -#if defined(TARGET_ARM64) && defined(FEATURE_SIMD) - else - { - // For a SIMD type that is passed in multiple registers but enregistered as a vector, - // the code above copies the first argument register into the lower 4 or 8 bytes - // of the target register. Here we must handle the subsequent fields by - // inserting them into the upper bytes of the target SIMD floating point register. - argRegCount = varDsc->lvHfaSlots(); - for (int i = 1; i < argRegCount; i++) - { - int nextArgNum = argNum + i; - regArgElem* nextArgElem = ®ArgTab[nextArgNum]; - var_types nextArgType = nextArgElem->type; - regNumber nextRegNum = - genMapRegArgNumToRegNum(nextArgNum, nextArgType, compiler->info.compCallConv); - noway_assert(nextArgElem->varNum == varNum); - noway_assert(genIsValidFloatReg(nextRegNum)); - noway_assert(genIsValidFloatReg(destRegNum)); - GetEmitter()->emitIns_R_R_I_I(INS_mov, EA_4BYTE, destRegNum, nextRegNum, i, 0); - assert(!genIsValidIntReg(destRegNum) || !genIsValidFloatReg(nextRegNum)); - } - } -#endif // defined(TARGET_ARM64) && defined(FEATURE_SIMD) - } - } -#endif // TARGET_ARMARCH - // Mark the rest of the argument registers corresponding to this multi-reg type as - // being processed and no longer live. - for (int regSlot = 1; regSlot < argRegCount; regSlot++) + for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++) + { + LclVarDsc* lclDsc = compiler->lvaGetDesc(lclNum); + + if (compiler->lvaGetPromotionType(lclNum) == Compiler::PROMOTION_TYPE_INDEPENDENT) + { + for (unsigned fld = 0; fld < lclDsc->lvFieldCnt; fld++) { - int nextArgNum = argNum + regSlot; - assert(!regArgTab[nextArgNum].processed); - regArgTab[nextArgNum].processed = true; - regNumber nextRegNum = - genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].type, compiler->info.compCallConv); - regArgMaskLive &= ~genRegMask(nextRegNum); + unsigned fieldLclNum = lclDsc->lvFieldLclStart + fld; + genSpillOrAddRegisterParam(&graph, fieldLclNum); } -#endif // FEATURE_MULTIREG_ARGS + } + else + { + genSpillOrAddRegisterParam(&graph, lclNum); + } + } + + DBEXEC(VERBOSE, graph.Dump()); + + // TODO: + // - handle per register node instead of per edge to ensure destOffset == 0 + // case always happens first below + // - arm32 insertions when a float is allocated to the second register of a double + // - cycle handling + // None of these problems seem to be hit in any tests, so we need to add + // some stress modes for this. + while (graph.NumEdges > 0) + { + RegNodeEdge* edge = graph.FindEdgeToHandle(); + assert(edge != nullptr); + if ((edge->to->outgoing != nullptr) && (edge->to->copiedReg == REG_NA)) + { + assert(!"Circular arg"); + } + + regNumber sourceReg = edge->from->copiedReg != REG_NA ? edge->from->copiedReg : edge->from->reg; + + if (edge->destOffset != 0) + { + INDEBUG(edge->to->wrote = true); +#if defined(TARGET_ARM64) + // On arm64 SIMD parameters are HFAs and passed in multiple float + // registers while we can enregister them as single registers. + GetEmitter()->emitIns_R_R_I_I(INS_mov, emitTypeSize(edge->type), edge->to->reg, sourceReg, + edge->destOffset / genTypeSize(edge->type), 0); +#elif defined(UNIX_AMD64_ABI) + // For SysV x64 the only insertions we should have is to offset 8, + // which happens for example for Vector3 which can be passed in + // xmm0[0..8), xmm1[8..12) but enregistered in a single register. + noway_assert(edge->destOffset == 8); + // The shufpd here picks the first 8 bytes from the dest register + // to go in the lower half, and the second 8 bytes from the source + // register to go in the upper half. + GetEmitter()->emitIns_R_R_I(INS_shufpd, EA_16BYTE, edge->to->reg, sourceReg, 0); +#else + noway_assert(!"Insertion into register is not supported"); +#endif + } + else + { + assert(!edge->to->wrote); + instruction ins = ins_Copy(sourceReg, genActualType(edge->type)); + GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(edge->type), edge->to->reg, sourceReg, + /* canSkip */ true); + INDEBUG(edge->to->wrote = true); } - noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop + graph.RemoveEdge(edge); } } -#endif // !TARGET_LOONGARCH64 && !TARGET_RISCV64 - -#ifdef _PREFAST_ -#pragma warning(pop) #endif /***************************************************************************** @@ -6148,14 +5209,6 @@ void CodeGen::genFnProlog() intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SECRET_STUB_PARAM; } -#ifdef SWIFT_SUPPORT - if ((compiler->lvaSwiftSelfArg != BAD_VAR_NUM) && ((intRegState.rsCalleeRegArgMaskLiveIn & RBM_SWIFT_SELF) != 0)) - { - GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SWIFT_SELF, compiler->lvaSwiftSelfArg, 0); - intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SWIFT_SELF; - } -#endif - // // Zero out the frame as needed // @@ -6271,56 +5324,10 @@ void CodeGen::genFnProlog() { compiler->lvaUpdateArgsWithInitialReg(); -#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if (intRegState.rsCalleeRegArgMaskLiveIn || floatRegState.rsCalleeRegArgMaskLiveIn) + if ((intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn) != RBM_NONE) { - initRegZeroed = false; - genFnPrologCalleeRegArgs(); + genHomeRegisterParams(initReg, &initRegZeroed); } -#else - auto assignIncomingRegisterArgs = [this, initReg, &initRegZeroed](RegState* regState) { - if (regState->rsCalleeRegArgMaskLiveIn) - { - // If we need an extra register to shuffle around the incoming registers - // we will use xtraReg (initReg) and set the xtraRegClobbered flag, - // if we don't need to use the xtraReg then this flag will stay false - // - regNumber xtraReg; - bool xtraRegClobbered = false; - - if (genRegMask(initReg) & RBM_ARG_REGS) - { - xtraReg = initReg; - } - else - { - xtraReg = REG_SCRATCH; - initRegZeroed = false; - } - - genFnPrologCalleeRegArgs(xtraReg, &xtraRegClobbered, regState); - - if (xtraRegClobbered) - { - initRegZeroed = false; - } - } - }; - -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM) - // Handle float parameters first; in the presence of struct promotion - // we can have parameters that are homed into float registers but - // passed in integer registers. So make sure we get those out of the - // integer registers before we potentially override those as part of - // handling integer parameters. - - assignIncomingRegisterArgs(&floatRegState); - assignIncomingRegisterArgs(&intRegState); -#else - assignIncomingRegisterArgs(&intRegState); -#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 - -#endif // TARGET_LOONGARCH64 || TARGET_RISCV64 // Home the incoming arguments. genEnregisterIncomingStackArgs(); diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index a99199aedc634c..cc001198bd9074 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -7984,9 +7984,10 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) } } -void CodeGen::genFnPrologCalleeRegArgs() +void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) { assert(!(intRegState.rsCalleeRegArgMaskLiveIn & floatRegState.rsCalleeRegArgMaskLiveIn)); + *initRegStillZeroed = false; regMaskTP regArgMaskLive = intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn; diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 87745fabe3e04b..3d11d05627abbc 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -8064,8 +8064,9 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) } } -void CodeGen::genFnPrologCalleeRegArgs() +void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) { + *initRegStillZeroed = false; assert(!(intRegState.rsCalleeRegArgMaskLiveIn & floatRegState.rsCalleeRegArgMaskLiveIn)); regMaskTP regArgMaskLive = intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn; diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index b2d37b9becad9d..3dcf0bbb12b029 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1681,21 +1681,6 @@ bool LinearScan::isRegCandidate(LclVarDsc* varDsc) return false; } - // Avoid allocating parameters that are passed in float regs into integer - // registers. We currently home float registers before integer registers, - // so that kind of enregistration can trash integer registers containing - // other parameters. - // We assume that these cases will be homed to float registers if they are - // promoted. - // TODO-CQ: Combine integer and float register homing to handle these kinds - // of conflicts. - if ((varDsc->TypeGet() == TYP_STRUCT) && varDsc->lvIsRegArg && !varDsc->lvPromoted && - varTypeUsesIntReg(varDsc->GetRegisterType()) && genIsValidFloatReg(varDsc->GetArgReg())) - { - compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::IsStructArg)); - return false; - } - // Are we not optimizing and we have exception handlers? // if so mark all args and locals as volatile, so that they // won't ever get enregistered. diff --git a/src/coreclr/jit/targetarm64.cpp b/src/coreclr/jit/targetarm64.cpp index f48cfae542cd34..1a464a119ba767 100644 --- a/src/coreclr/jit/targetarm64.cpp +++ b/src/coreclr/jit/targetarm64.cpp @@ -151,7 +151,7 @@ ABIPassingInformation Arm64Classifier::Classify(Compiler* comp, { info.NumSegments = slots; info.Segments = new (comp, CMK_ABI) ABIPassingSegment[slots]; - unsigned slotSize = varTypeIsStruct(type) ? TARGET_POINTER_SIZE : genTypeSize(type); + unsigned slotSize = min(passedSize, TARGET_POINTER_SIZE); info.Segments[0] = ABIPassingSegment::InRegister(regs->Dequeue(), 0, slotSize); if (slots == 2) { From bf4308946613385090180fbd8b4793a08547a7e0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 3 Apr 2024 08:15:58 +0200 Subject: [PATCH 02/24] Remove more unnecessary stuff --- src/coreclr/jit/compiler.cpp | 5 ----- src/coreclr/jit/compiler.h | 2 -- src/coreclr/jit/lclvars.cpp | 5 ----- 3 files changed, 12 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 0e75f2073dc5e0..85eb1ac88ed36b 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -10912,10 +10912,6 @@ void Compiler::EnregisterStats::RecordLocal(const LclVarDsc* varDsc) m_simdUserForcesDep++; break; - case DoNotEnregisterReason::NonStandardParameter: - m_nonStandardParameter++; - break; - default: unreached(); break; @@ -11043,7 +11039,6 @@ void Compiler::EnregisterStats::Dump(FILE* fout) const PRINT_STATS(m_returnSpCheck, notEnreg); PRINT_STATS(m_callSpCheck, notEnreg); PRINT_STATS(m_simdUserForcesDep, notEnreg); - PRINT_STATS(m_nonStandardParameter, notEnreg); fprintf(fout, "\nAddr exposed details:\n"); if (m_addrExposed == 0) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 8615784b6ab1d8..f6cb39129d8844 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -464,7 +464,6 @@ enum class DoNotEnregisterReason CallSpCheck, // the local is used to do SP check on every call SimdUserForcesDep, // a promoted struct was used by a SIMD/HWI node; it must be dependently promoted HiddenBufferStructArg, // the argument is a hidden return buffer passed to a method. - NonStandardParameter, // local is a parameter that is passed in a register unhandled by genFnPrologCalleeRegArgs }; enum class AddressExposedReason @@ -10668,7 +10667,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX unsigned m_returnSpCheck; unsigned m_callSpCheck; unsigned m_simdUserForcesDep; - unsigned m_nonStandardParameter; unsigned m_liveInOutHndlr; unsigned m_depField; unsigned m_noRegVars; diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 50997980ca7488..b69fcc669d26e0 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -1401,7 +1401,6 @@ bool Compiler::lvaInitSpecialSwiftParam(InitVarDscInfo* varDscInfo, CorInfoType compArgSize += TARGET_POINTER_SIZE; lvaSwiftSelfArg = varDscInfo->varNum; - lvaSetVarDoNotEnregister(lvaSwiftSelfArg DEBUGARG(DoNotEnregisterReason::NonStandardParameter)); return true; } @@ -3003,10 +3002,6 @@ void Compiler::lvaSetVarDoNotEnregister(unsigned varNum DEBUGARG(DoNotEnregister JITDUMP("Promoted struct used by a SIMD/HWI node\n"); break; - case DoNotEnregisterReason::NonStandardParameter: - JITDUMP("Non-standard parameter\n"); - break; - default: unreached(); break; From 803b86c18499d81c9db338225ec8262ddb70708b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 3 Apr 2024 08:24:53 +0200 Subject: [PATCH 03/24] Clean up --- src/coreclr/jit/codegencommon.cpp | 10 +++++----- src/coreclr/jit/codegenloongarch64.cpp | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 8db766515df23c..2d5180b7034618 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3044,10 +3044,10 @@ var_types CodeGen::genParamStackStoreType(LclVarDsc* dsc, const ABIPassingSegmen // to stack immediately, or by adding it to the register graph. // // Parameters: -// initReg - A register that this method should communicate if it trashes -// initRegStillZeroed - [out] whether or not xtraReg is still zeroed +// lclNum - Parameter local (or field of it) +// graph - The register graph to add to // -void CodeGen::genSpillOrAddRegisterParam(RegGraph* graph, unsigned lclNum) +void CodeGen::genSpillOrAddRegisterParam(unsigned lclNum, RegGraph* graph) { regMaskTP paramRegs = intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn; LclVarDsc* varDsc = compiler->lvaGetDesc(lclNum); @@ -3214,12 +3214,12 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) for (unsigned fld = 0; fld < lclDsc->lvFieldCnt; fld++) { unsigned fieldLclNum = lclDsc->lvFieldLclStart + fld; - genSpillOrAddRegisterParam(&graph, fieldLclNum); + genSpillOrAddRegisterParam(fieldLclNum, &graph); } } else { - genSpillOrAddRegisterParam(&graph, lclNum); + genSpillOrAddRegisterParam(lclNum, &graph); } } diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index cc001198bd9074..9dee368b6fc583 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -7986,8 +7986,8 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) { - assert(!(intRegState.rsCalleeRegArgMaskLiveIn & floatRegState.rsCalleeRegArgMaskLiveIn)); *initRegStillZeroed = false; + assert(!(intRegState.rsCalleeRegArgMaskLiveIn & floatRegState.rsCalleeRegArgMaskLiveIn)); regMaskTP regArgMaskLive = intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn; From 843c1c8c07f56820e73308b3513c70581d33b43d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 3 Apr 2024 08:42:50 +0200 Subject: [PATCH 04/24] Handle by register instead of edge --- src/coreclr/jit/codegen.h | 2 +- src/coreclr/jit/codegencommon.cpp | 99 +++++++++++++++++-------------- 2 files changed, 56 insertions(+), 45 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 83ff3023b4b383..2a09b1f3d19f93 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -268,7 +268,7 @@ class CodeGen final : public CodeGenInterface void genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed); var_types genParamStackStoreType(LclVarDsc* dsc, const ABIPassingSegment& seg); - void genSpillOrAddRegisterParam(class RegGraph* graph, unsigned lclNum); + void genSpillOrAddRegisterParam(unsigned lclNum, class RegGraph* graph); void genEnregisterIncomingStackArgs(); #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) void genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed); diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 2d5180b7034618..ffe9efb464b4a1 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2868,8 +2868,6 @@ class RegGraph { } - unsigned NumEdges = 0; - RegNode* GetOrAdd(regNumber reg, var_types type) { assert(type != TYP_STRUCT); @@ -2915,49 +2913,45 @@ class RegGraph edge->nextIncoming = to->incoming; to->incoming = edge; - - NumEdges++; } - RegNodeEdge* FindEdgeToHandle() + RegNode* FindNodeToHandle() { - RegNodeEdge* lastEdge = nullptr; + RegNode* lastNode = nullptr; for (int i = 0; i < m_nodes.Height(); i++) { RegNode* reg = m_nodes.Bottom(i); - for (RegNodeEdge* edge = reg->outgoing; edge != nullptr; edge = edge->nextOutgoing) + if (reg->incoming == nullptr) { - lastEdge = edge; - // If going to a register without any conflicts then it's easy. - if (edge->to->outgoing == nullptr) - { - return edge; - } + continue; } + + if (reg->outgoing == nullptr) + { + return reg; + } + + lastNode = reg; } - return lastEdge; + return lastNode; } - void RemoveEdge(RegNodeEdge* edge) + void RemoveIncomingEdges(RegNode* node) { - RegNodeEdge** slot = &edge->from->outgoing; - while ((*slot) != edge) + for (RegNodeEdge* edge = node->incoming; edge != nullptr; edge = edge->nextIncoming) { - slot = &(*slot)->nextOutgoing; - } - - *slot = edge->nextOutgoing; + // Unlink from source. + RegNodeEdge** slot = &edge->from->outgoing; + while ((*slot) != edge) + { + slot = &(*slot)->nextOutgoing; + } - slot = &edge->to->incoming; - while ((*slot) != edge) - { - slot = &(*slot)->nextIncoming; + *slot = edge->nextOutgoing; } - *slot = edge->nextIncoming; - - NumEdges--; + node->incoming = nullptr; } #ifdef DEBUG @@ -3232,20 +3226,44 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) // - cycle handling // None of these problems seem to be hit in any tests, so we need to add // some stress modes for this. - while (graph.NumEdges > 0) + while (true) { - RegNodeEdge* edge = graph.FindEdgeToHandle(); - assert(edge != nullptr); - if ((edge->to->outgoing != nullptr) && (edge->to->copiedReg == REG_NA)) + RegNode* node = graph.FindNodeToHandle(); + if (node == nullptr) + { + break; + } + + if ((node->outgoing != nullptr) && (node->copiedReg == REG_NA)) { assert(!"Circular arg"); } - regNumber sourceReg = edge->from->copiedReg != REG_NA ? edge->from->copiedReg : edge->from->reg; + // First handle edges that aren't insertions. We clobber the full register for these edges. + for (RegNodeEdge* edge = node->incoming; edge != nullptr; edge = edge->nextIncoming) + { + if (edge->destOffset != 0) + { + continue; + } + + regNumber sourceReg = edge->from->copiedReg != REG_NA ? edge->from->copiedReg : edge->from->reg; + instruction ins = ins_Copy(sourceReg, genActualType(edge->type)); + GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(edge->type), edge->to->reg, sourceReg, + /* canSkip */ true); + break; + } - if (edge->destOffset != 0) + // Next handle all insertions. + for (RegNodeEdge* edge = node->incoming; edge != nullptr; edge = edge->nextIncoming) { - INDEBUG(edge->to->wrote = true); + if (edge->destOffset == 0) + { + continue; + } + + regNumber sourceReg = edge->from->copiedReg != REG_NA ? edge->from->copiedReg : edge->from->reg; + #if defined(TARGET_ARM64) // On arm64 SIMD parameters are HFAs and passed in multiple float // registers while we can enregister them as single registers. @@ -3256,6 +3274,7 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) // which happens for example for Vector3 which can be passed in // xmm0[0..8), xmm1[8..12) but enregistered in a single register. noway_assert(edge->destOffset == 8); + assert(genIsValidFloatReg(edge->to->reg)); // The shufpd here picks the first 8 bytes from the dest register // to go in the lower half, and the second 8 bytes from the source // register to go in the upper half. @@ -3264,16 +3283,8 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) noway_assert(!"Insertion into register is not supported"); #endif } - else - { - assert(!edge->to->wrote); - instruction ins = ins_Copy(sourceReg, genActualType(edge->type)); - GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(edge->type), edge->to->reg, sourceReg, - /* canSkip */ true); - INDEBUG(edge->to->wrote = true); - } - graph.RemoveEdge(edge); + graph.RemoveIncomingEdges(node); } } #endif From 0b445fb4fa348be46aceab81c3490bc074ad452e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 3 Apr 2024 18:23:05 +0200 Subject: [PATCH 05/24] Handle circularity v1 --- src/coreclr/jit/codegencommon.cpp | 36 ++++++++++++++++++------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index ffe9efb464b4a1..69917598e912b9 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2837,7 +2837,6 @@ struct RegNode; struct RegNodeEdge { - RegNodeEdge* nextOutgoing; RegNodeEdge* nextIncoming; RegNode* from; RegNode* to; @@ -2853,9 +2852,6 @@ struct RegNode RegNodeEdge* incoming; RegNodeEdge* outgoing; RegNode* next; -#ifdef DEBUG - bool wrote = false; -#endif }; class RegGraph @@ -2908,7 +2904,8 @@ class RegGraph edge->type = type; edge->destOffset = destOffset; - edge->nextOutgoing = from->outgoing; + // We currently never have multiple outgoing edges. + assert(from->outgoing == nullptr); from->outgoing = edge; edge->nextIncoming = to->incoming; @@ -2937,18 +2934,13 @@ class RegGraph return lastNode; } - void RemoveIncomingEdges(RegNode* node) + void RemoveIncomingEdges(RegNode* node, regMaskTP* busyRegs) { for (RegNodeEdge* edge = node->incoming; edge != nullptr; edge = edge->nextIncoming) { // Unlink from source. - RegNodeEdge** slot = &edge->from->outgoing; - while ((*slot) != edge) - { - slot = &(*slot)->nextOutgoing; - } - - *slot = edge->nextOutgoing; + assert(edge->from->outgoing == edge); + edge->from->outgoing = nullptr; } node->incoming = nullptr; @@ -3226,6 +3218,7 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) // - cycle handling // None of these problems seem to be hit in any tests, so we need to add // some stress modes for this. + regMaskTP busyRegs = intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn; while (true) { RegNode* node = graph.FindNodeToHandle(); @@ -3236,7 +3229,19 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) if ((node->outgoing != nullptr) && (node->copiedReg == REG_NA)) { - assert(!"Circular arg"); + var_types copyType = node->outgoing->type; + if (varTypeUsesFloatReg(copyType)) + { + node->copiedReg = genFirstRegNumFromMask(RBM_FLT_CALLEE_TRASH & ~busyRegs); + } + else + { + node->copiedReg = genFirstRegNumFromMask(RBM_INT_CALLEE_TRASH & ~busyRegs); + } + + busyRegs |= genRegMask(node->copiedReg); + instruction ins = ins_Copy(node->reg, copyType); + GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(copyType), node->copiedReg, node->reg, /* canSkip */ false); } // First handle edges that aren't insertions. We clobber the full register for these edges. @@ -3284,7 +3289,8 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) #endif } - graph.RemoveIncomingEdges(node); + graph.RemoveIncomingEdges(node, &busyRegs); + busyRegs |= genRegMask(node->reg); } } #endif From b04c7a140b67a45cbe3ad91e840f8a58095203ca Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 3 Apr 2024 18:53:08 +0200 Subject: [PATCH 06/24] Simplify, remove args when no longer busy --- src/coreclr/jit/codegencommon.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 69917598e912b9..2cc722d5fee7fb 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2941,6 +2941,7 @@ class RegGraph // Unlink from source. assert(edge->from->outgoing == edge); edge->from->outgoing = nullptr; + *busyRegs &= ~genRegMask(edge->from->reg); } node->incoming = nullptr; @@ -3254,7 +3255,7 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) regNumber sourceReg = edge->from->copiedReg != REG_NA ? edge->from->copiedReg : edge->from->reg; instruction ins = ins_Copy(sourceReg, genActualType(edge->type)); - GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(edge->type), edge->to->reg, sourceReg, + GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(edge->type), node->reg, sourceReg, /* canSkip */ true); break; } @@ -3272,18 +3273,18 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) #if defined(TARGET_ARM64) // On arm64 SIMD parameters are HFAs and passed in multiple float // registers while we can enregister them as single registers. - GetEmitter()->emitIns_R_R_I_I(INS_mov, emitTypeSize(edge->type), edge->to->reg, sourceReg, + GetEmitter()->emitIns_R_R_I_I(INS_mov, emitTypeSize(edge->type), node->reg, sourceReg, edge->destOffset / genTypeSize(edge->type), 0); #elif defined(UNIX_AMD64_ABI) // For SysV x64 the only insertions we should have is to offset 8, // which happens for example for Vector3 which can be passed in // xmm0[0..8), xmm1[8..12) but enregistered in a single register. noway_assert(edge->destOffset == 8); - assert(genIsValidFloatReg(edge->to->reg)); + assert(genIsValidFloatReg(node->reg)); // The shufpd here picks the first 8 bytes from the dest register // to go in the lower half, and the second 8 bytes from the source // register to go in the upper half. - GetEmitter()->emitIns_R_R_I(INS_shufpd, EA_16BYTE, edge->to->reg, sourceReg, 0); + GetEmitter()->emitIns_R_R_I(INS_shufpd, EA_16BYTE, node->reg, sourceReg, 0); #else noway_assert(!"Insertion into register is not supported"); #endif From d3ea5d33afaa9b416aef4f8801f6320d150e9a99 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 4 Apr 2024 14:53:02 +0200 Subject: [PATCH 07/24] Handle init reg properly --- src/coreclr/jit/codegencommon.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 2cc722d5fee7fb..ff59a7c6e4310c 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3243,6 +3243,10 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) busyRegs |= genRegMask(node->copiedReg); instruction ins = ins_Copy(node->reg, copyType); GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(copyType), node->copiedReg, node->reg, /* canSkip */ false); + if (node->copiedReg == initReg) + { + *initRegStillZeroed = false; + } } // First handle edges that aren't insertions. We clobber the full register for these edges. @@ -3292,6 +3296,11 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) graph.RemoveIncomingEdges(node, &busyRegs); busyRegs |= genRegMask(node->reg); + + if (node->reg == initReg) + { + *initRegStillZeroed = false; + } } } #endif From fb3a7fe8cbeadf23bfd440d24e99365516bd8fbc Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 4 Apr 2024 22:53:03 +0200 Subject: [PATCH 08/24] Add stress mode --- src/coreclr/jit/compiler.h | 1 + src/coreclr/jit/lsra.cpp | 2 +- src/coreclr/jit/lsra.h | 5 ++++ src/coreclr/jit/lsrabuild.cpp | 51 ++++++++++++++++++++++++++++++++++- 4 files changed, 57 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 869db1c5ea3655..647418ada2860d 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -10128,6 +10128,7 @@ class Compiler STRESS_MODE(PHYSICAL_PROMOTION_COST) \ STRESS_MODE(UNWIND) /* stress unwind info; e.g., create function fragments */ \ STRESS_MODE(OPT_REPEAT) /* stress JitOptRepeat */ \ + STRESS_MODE(INITIAL_PARAM_REG) /* Stress initial register assigned to parameters */ \ \ /* After COUNT_VARN, stress level 2 does all of these all the time */ \ \ diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 7c9bcf8a7e4fe6..06f880a3d79868 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -5507,7 +5507,7 @@ void LinearScan::allocateRegisters() Interval* currentInterval = &interval; currentInterval->recentRefPosition = nullptr; currentInterval->isActive = false; - if (currentInterval->isLocalVar) + if (currentInterval->isLocalVar && !stressInitialParamReg()) { LclVarDsc* varDsc = currentInterval->getLocalVar(compiler); if (varDsc->lvIsRegArg && currentInterval->firstRefPosition != nullptr) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 2705a93188dad6..1cd5a89407b2ea 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -963,6 +963,11 @@ class LinearScan : public LinearScanInterface return ((lsraStressMask & (LSRA_LIMIT_MASK | LSRA_SELECT_MASK)) != 0); } + bool stressInitialParamReg() + { + return compiler->compStressCompile(Compiler::STRESS_INITIAL_PARAM_REG, 25); + } + // Dump support void dumpDefList(); void lsraDumpIntervals(const char* msg); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 86abef939a9d8c..0bfbeadddf114f 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2321,7 +2321,7 @@ void LinearScan::buildIntervals() Interval* interval = getIntervalForLocalVar(varIndex); const var_types regType = argDsc->GetRegisterType(); regMaskTP mask = allRegs(regType); - if (argDsc->lvIsRegArg) + if (argDsc->lvIsRegArg && !stressInitialParamReg()) { // Set this interval as currently assigned to that register regNumber inArgReg = argDsc->GetArgReg(); @@ -2394,6 +2394,55 @@ void LinearScan::buildIntervals() intRegState->rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM; } + if (stressInitialParamReg()) + { + CLRRandom rng; + rng.Init(compiler->info.compMethodHash()); + regMaskTP intRegs = intRegState->rsCalleeRegArgMaskLiveIn; + regMaskTP floatRegs = floatRegState->rsCalleeRegArgMaskLiveIn; + + for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++) + { + LclVarDsc* argDsc = compiler->lvaGetDescByTrackedIndex(varIndex); + + if (!argDsc->lvIsParam || !isCandidateVar(argDsc)) + { + continue; + } + + Interval* interval = getIntervalForLocalVar(varIndex); + + regMaskTP* regs; + if (interval->registerType == FloatRegisterType) + { + regs = &floatRegs; + } + else + { + regs = &intRegs; + } + + // Select a random register from all possible parameter registers + // (of the right type). Preference this parameter to that register. + unsigned numBits = BitOperations::PopCount(*regs); + if (numBits == 0) + { + continue; + } + + int bitIndex = rng.Next((int)numBits); + regNumber prefReg = REG_NA; + regMaskTP regsLeft = *regs; + for (int i = 0; i <= bitIndex; i++) + { + prefReg = genFirstRegNumFromMaskAndToggle(regsLeft); + } + + *regs &= ~genRegMask(prefReg); + interval->mergeRegisterPreferences(genRegMask(prefReg)); + } + } + numPlacedArgLocals = 0; placedArgRegs = RBM_NONE; From 7926f1e74639755a945ed09141b10f4453357920 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 4 Apr 2024 23:05:37 +0200 Subject: [PATCH 09/24] Fix after merge --- src/coreclr/jit/targetarm64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/targetarm64.cpp b/src/coreclr/jit/targetarm64.cpp index 9f6ec768a53f66..f95663202456b8 100644 --- a/src/coreclr/jit/targetarm64.cpp +++ b/src/coreclr/jit/targetarm64.cpp @@ -154,7 +154,7 @@ ABIPassingInformation Arm64Classifier::Classify(Compiler* comp, { info.NumSegments = slots; info.Segments = new (comp, CMK_ABI) ABIPassingSegment[slots]; - unsigned slotSize = min(passedSize, TARGET_POINTER_SIZE); + unsigned slotSize = min(passedSize, (unsigned)TARGET_POINTER_SIZE); info.Segments[0] = ABIPassingSegment::InRegister(regs->Dequeue(), 0, slotSize); if (slots == 2) { From 705090d58c185c8df3d67702a82783ccb8868d42 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 4 Apr 2024 23:22:05 +0200 Subject: [PATCH 10/24] Enable it always for CI run --- src/coreclr/jit/lsra.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 1cd5a89407b2ea..ed54facc040f5f 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -965,7 +965,7 @@ class LinearScan : public LinearScanInterface bool stressInitialParamReg() { - return compiler->compStressCompile(Compiler::STRESS_INITIAL_PARAM_REG, 25); + return true; // compiler->compStressCompile(Compiler::STRESS_INITIAL_PARAM_REG, 25); } // Dump support From fcaa40a8578105a4ff7cae6ea450370c07547ca1 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 5 Apr 2024 11:19:11 +0200 Subject: [PATCH 11/24] Fix after merge --- src/coreclr/jit/codegencommon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 106733f9c01442..bf89635ea8ed71 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -4079,7 +4079,7 @@ void CodeGen::genHomeSwiftStructParameters(bool handleStack) if ((regState->rsCalleeRegArgMaskLiveIn & regs) != RBM_NONE) { - var_types storeType = seg.GetRegisterStoreType(); + var_types storeType = seg.GetRegisterType(); assert(storeType != TYP_UNDEF); GetEmitter()->emitIns_S_R(ins_Store(storeType), emitTypeSize(storeType), seg.GetRegister(), lclNum, seg.Offset); From 0b04efc08566141ca65c77697354677a0cfdac7e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 6 Apr 2024 12:35:51 +0200 Subject: [PATCH 12/24] Make multiple edges for arm32 doubles --- src/coreclr/jit/codegen.h | 1 + src/coreclr/jit/codegencommon.cpp | 170 +++++++++++++++++++----------- 2 files changed, 107 insertions(+), 64 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 4e6219fc5453d1..0f5c8e76631d85 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -263,6 +263,7 @@ class CodeGen final : public CodeGenInterface void genEstablishFramePointer(int delta, bool reportUnwindData); void genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed); + regMaskTP genGetParameterHomingTempRegisterCandidates(); var_types genParamStackStoreType(LclVarDsc* dsc, const ABIPassingSegment& seg); void genSpillOrAddRegisterParam(unsigned lclNum, class RegGraph* graph); diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index bf89635ea8ed71..db6b1624137dc2 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2821,7 +2821,6 @@ struct RegNode { regNumber reg; regNumber copiedReg; - var_types type; RegNodeEdge* incoming; RegNodeEdge* outgoing; RegNode* next; @@ -2837,10 +2836,8 @@ class RegGraph { } - RegNode* GetOrAdd(regNumber reg, var_types type) + RegNode* Get(regNumber reg) { - assert(type != TYP_STRUCT); - for (int i = 0; i < m_nodes.Height(); i++) { RegNode* node = m_nodes.Bottom(i); @@ -2848,23 +2845,23 @@ class RegGraph { return node; } - -#ifdef TARGET_ARM - if ((node->type == TYP_DOUBLE) && (reg == REG_NEXT(node->reg))) - { - return node; - } -#endif } - RegNode* node = new (m_comp, CMK_Codegen) RegNode; - node->reg = reg; - node->copiedReg = REG_NA; - node->type = type; - node->incoming = nullptr; - node->outgoing = nullptr; + return nullptr; + } - m_nodes.Push(node); + RegNode* GetOrAdd(regNumber reg) + { + RegNode* node = Get(reg); + if (node == nullptr) + { + node = new (m_comp, CMK_Codegen) RegNode; + node->reg = reg; + node->copiedReg = REG_NA; + node->incoming = nullptr; + node->outgoing = nullptr; + m_nodes.Push(node); + } return node; } @@ -2888,6 +2885,9 @@ class RegGraph RegNode* FindNodeToHandle() { RegNode* lastNode = nullptr; + + // Prefer a node with no outgoing edges meaning that its value does not + // need to be saved. for (int i = 0; i < m_nodes.Height(); i++) { RegNode* reg = m_nodes.Bottom(i); @@ -2904,6 +2904,30 @@ class RegGraph lastNode = reg; } + if (lastNode == nullptr) + { + return lastNode; + } + + // All remaining nodes have an outgoing edge, so we will need to save + // the value of the register we pick. Prefer to pick one whose outgoing + // edge is the sole incoming edge of the target. This means we can + // always continue with that node after this one, and only need one + // temporary register. + for (int i = 0; i < m_nodes.Height(); i++) + { + RegNode* reg = m_nodes.Bottom(i); + if (reg->incoming == nullptr) + { + continue; + } + + if (reg->outgoing->to->incoming->nextIncoming == nullptr) + { + return reg; + } + } + return lastNode; } @@ -2914,7 +2938,12 @@ class RegGraph // Unlink from source. assert(edge->from->outgoing == edge); edge->from->outgoing = nullptr; - *busyRegs &= ~genRegMask(edge->from->reg); + + // Source no longer has outgoing edges, so its value is no longer + // needed for anything. Make the registers it was occupying + // available. + regNumber sourceReg = edge->from->copiedReg != REG_NA ? edge->from->copiedReg : edge->from->reg; + *busyRegs &= ~genRegMask(sourceReg); } node->incoming = nullptr; @@ -2927,14 +2956,10 @@ class RegGraph for (int i = 0; i < m_nodes.Height(); i++) { RegNode* regNode = m_nodes.Bottom(i); - printf(" %s (%s)", getRegName(regNode->reg), varTypeName(regNode->type)); + printf(" %s", getRegName(regNode->reg)); for (RegNodeEdge* incoming = regNode->incoming; incoming != nullptr; incoming = incoming->nextIncoming) { - printf("\n <- %s (%s)", getRegName(incoming->from->reg), varTypeName(incoming->from->type)); - if (incoming->type != incoming->from->type) - { - printf(" (edge type: %s)", varTypeName(incoming->type)); - } + printf("\n <- %s", getRegName(incoming->from->reg), varTypeName(incoming->type)); if (incoming->destOffset != 0) { @@ -3046,7 +3071,7 @@ void CodeGen::genSpillOrAddRegisterParam(unsigned lclNum, RegGraph* graph) } GetEmitter()->emitIns_S_R(ins_Store(storeType), emitActualTypeSize(storeType), seg.GetRegister(), lclNum, - seg.Offset - baseOffset); + seg.Offset - baseOffset); } if (!varDsc->lvIsInReg()) @@ -3054,9 +3079,7 @@ void CodeGen::genSpillOrAddRegisterParam(unsigned lclNum, RegGraph* graph) continue; } - var_types varRegType = genActualType(varDsc->GetRegisterType()); - - var_types edgeType = varRegType; + var_types edgeType = genActualType(varDsc->GetRegisterType()); // Some parameters can be passed in multiple registers but enregistered // in a single one (e.g. SIMD types on arm64). In this case the edges // we add here represent insertions of each element. @@ -3065,21 +3088,22 @@ void CodeGen::genSpillOrAddRegisterParam(unsigned lclNum, RegGraph* graph) edgeType = seg.GetRegisterType(); } - RegNode* sourceReg = graph->GetOrAdd(seg.GetRegister(), seg.GetRegisterType()); - RegNode* destReg = graph->GetOrAdd(varDsc->GetRegNum(), varRegType); - -#ifdef TARGET_ARM - if (destReg->reg != varDsc->GetRegNum()) - { - assert(varDsc->TypeGet() == TYP_FLOAT); - assert(varDsc->GetRegNum() == REG_NEXT(destReg->reg)); - graph->AddEdge(sourceReg, destReg, edgeType, 4); - continue; - } -#endif + RegNode* sourceReg = graph->GetOrAdd(seg.GetRegister()); + RegNode* destReg = graph->GetOrAdd(varDsc->GetRegNum()); if ((sourceReg != destReg) || (baseOffset != seg.Offset)) { +#ifdef TARGET_ARM + if (edgeType == TYP_DOUBLE) + { + graph->AddEdge(sourceReg, destReg, TYP_FLOAT, seg.Offset - baseOffset); + + sourceReg = graph->GetOrAdd(REG_NEXT(sourceReg->reg)); + destReg = graph->GetOrAdd(REG_NEXT(destReg->reg)); + graph->AddEdge(sourceReg, destReg, TYP_FLOAT, seg.Offset - baseOffset); + continue; + } +#endif graph->AddEdge(sourceReg, destReg, edgeType, seg.Offset - baseOffset); } } @@ -3145,26 +3169,6 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) // implement the support for this. RegGraph graph(compiler); -#ifdef TARGET_ARM - // Float and double registers overlap on arm32. When we have double - // parameters, we want to represent those as double nodes in the graph. Pre - // create them here to ensure a previous float parameter doesn't create an - // overlapping float node. - for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++) - { - const ABIPassingInformation& abiInfo = compiler->lvaParameterPassingInfo[lclNum]; - for (unsigned i = 0; i < abiInfo.NumSegments; i++) - { - const ABIPassingSegment& seg = abiInfo.Segments[i]; - - if (seg.IsPassedInRegister() && genIsValidFloatReg(seg.GetRegister()) && (seg.Size == 8)) - { - graph.GetOrAdd(seg.GetRegister(), TYP_DOUBLE); - } - } - } -#endif - for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++) { LclVarDsc* lclDsc = compiler->lvaGetDesc(lclNum); @@ -3204,16 +3208,27 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) if ((node->outgoing != nullptr) && (node->copiedReg == REG_NA)) { var_types copyType = node->outgoing->type; + regMaskTP tempRegCandidates = genGetParameterHomingTempRegisterCandidates(); + + tempRegCandidates &= ~busyRegs; + if (varTypeUsesFloatReg(copyType)) { - node->copiedReg = genFirstRegNumFromMask(RBM_FLT_CALLEE_TRASH & ~busyRegs); + regMaskTP availRegs = tempRegCandidates & RBM_ALLFLOAT; + // We should have ensured temporary registers are available in + // genFinalizeFrame. + noway_assert(availRegs != RBM_NONE); + node->copiedReg = genFirstRegNumFromMask(availRegs); + busyRegs |= genRegMaskFloat(node->copiedReg); } else { - node->copiedReg = genFirstRegNumFromMask(RBM_INT_CALLEE_TRASH & ~busyRegs); + regMaskTP availRegs = tempRegCandidates & RBM_ALLINT; + noway_assert(availRegs != RBM_NONE); + node->copiedReg = genFirstRegNumFromMask(availRegs); + busyRegs |= genRegMask(node->copiedReg); } - busyRegs |= genRegMask(node->copiedReg); instruction ins = ins_Copy(node->reg, copyType); GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(copyType), node->copiedReg, node->reg, /* canSkip */ false); if (node->copiedReg == initReg) @@ -3276,6 +3291,12 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) } } } + +regMaskTP CodeGen::genGetParameterHomingTempRegisterCandidates() +{ + return RBM_CALLEE_TRASH | intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn | regSet.rsGetModifiedRegsMask(); +} + #endif /***************************************************************************** @@ -4544,6 +4565,27 @@ void CodeGen::genFinalizeFrame() regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE); } + // Parameter homing may need an additional register to handle conflicts if + // all callee trash registers are used by parameters. + regMaskTP homingCandidates = genGetParameterHomingTempRegisterCandidates(); + if (((homingCandidates & ~intRegState.rsCalleeRegArgMaskLiveIn) & RBM_ALLINT) == RBM_NONE) + { + regMaskTP extraRegMask = RBM_ALLINT & ~homingCandidates; + assert(extraRegMask != RBM_NONE); + regNumber extraReg = genFirstRegNumFromMask(extraRegMask); + JITDUMP("No temporary registers are available for integer parameter homing. Adding %s\n", getRegName(extraReg)); + regSet.rsSetRegsModified(genRegMask(extraReg)); + } + + if (((homingCandidates & ~floatRegState.rsCalleeRegArgMaskLiveIn) & RBM_ALLFLOAT) == RBM_NONE) + { + regMaskTP extraRegMask = RBM_ALLFLOAT & ~homingCandidates; + assert(extraRegMask != RBM_NONE); + regNumber extraReg = genFirstRegNumFromMask(extraRegMask); + JITDUMP("No temporary registers are available for float parameter homing. Adding %s\n", getRegName(extraReg)); + regSet.rsSetRegsModified(genRegMask(extraReg)); + } + #ifdef UNIX_AMD64_ABI // On Unix x64 we also save R14 and R15 for ELT profiler hook generation. if (compiler->compIsProfilerHookNeeded()) From 0e20159d37447739666e45e5b0f53b0f36c6517f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 6 Apr 2024 12:36:47 +0200 Subject: [PATCH 13/24] Simplify --- src/coreclr/jit/codegencommon.cpp | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index db6b1624137dc2..57581641e3c5f4 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2904,30 +2904,6 @@ class RegGraph lastNode = reg; } - if (lastNode == nullptr) - { - return lastNode; - } - - // All remaining nodes have an outgoing edge, so we will need to save - // the value of the register we pick. Prefer to pick one whose outgoing - // edge is the sole incoming edge of the target. This means we can - // always continue with that node after this one, and only need one - // temporary register. - for (int i = 0; i < m_nodes.Height(); i++) - { - RegNode* reg = m_nodes.Bottom(i); - if (reg->incoming == nullptr) - { - continue; - } - - if (reg->outgoing->to->incoming->nextIncoming == nullptr) - { - return reg; - } - } - return lastNode; } @@ -3189,13 +3165,6 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) DBEXEC(VERBOSE, graph.Dump()); - // TODO: - // - handle per register node instead of per edge to ensure destOffset == 0 - // case always happens first below - // - arm32 insertions when a float is allocated to the second register of a double - // - cycle handling - // None of these problems seem to be hit in any tests, so we need to add - // some stress modes for this. regMaskTP busyRegs = intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn; while (true) { From 753bdd5cbcc33e02e04bcf74f70207320f8791a0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 6 Apr 2024 12:44:49 +0200 Subject: [PATCH 14/24] Further simplification --- src/coreclr/jit/codegencommon.cpp | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 57581641e3c5f4..f2390f94095b25 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3072,11 +3072,12 @@ void CodeGen::genSpillOrAddRegisterParam(unsigned lclNum, RegGraph* graph) #ifdef TARGET_ARM if (edgeType == TYP_DOUBLE) { - graph->AddEdge(sourceReg, destReg, TYP_FLOAT, seg.Offset - baseOffset); + assert(seg.Offset == baseOffset); + graph->AddEdge(sourceReg, destReg, TYP_FLOAT, 0); sourceReg = graph->GetOrAdd(REG_NEXT(sourceReg->reg)); destReg = graph->GetOrAdd(REG_NEXT(destReg->reg)); - graph->AddEdge(sourceReg, destReg, TYP_FLOAT, seg.Offset - baseOffset); + graph->AddEdge(sourceReg, destReg, TYP_FLOAT, 0); continue; } #endif @@ -3178,25 +3179,16 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) { var_types copyType = node->outgoing->type; regMaskTP tempRegCandidates = genGetParameterHomingTempRegisterCandidates(); - tempRegCandidates &= ~busyRegs; - if (varTypeUsesFloatReg(copyType)) - { - regMaskTP availRegs = tempRegCandidates & RBM_ALLFLOAT; - // We should have ensured temporary registers are available in - // genFinalizeFrame. - noway_assert(availRegs != RBM_NONE); - node->copiedReg = genFirstRegNumFromMask(availRegs); - busyRegs |= genRegMaskFloat(node->copiedReg); - } - else - { - regMaskTP availRegs = tempRegCandidates & RBM_ALLINT; - noway_assert(availRegs != RBM_NONE); - node->copiedReg = genFirstRegNumFromMask(availRegs); - busyRegs |= genRegMask(node->copiedReg); - } + regMaskTP regTypeMask = varTypeUsesFloatReg(copyType) ? RBM_ALLFLOAT : RBM_ALLINT; + regMaskTP availRegs = tempRegCandidates & regTypeMask; + + // We should have ensured temporary registers are available in + // genFinalizeFrame. + noway_assert(availRegs != RBM_NONE); + node->copiedReg = genFirstRegNumFromMask(availRegs); + busyRegs |= genRegMask(node->copiedReg); instruction ins = ins_Copy(node->reg, copyType); GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(copyType), node->copiedReg, node->reg, /* canSkip */ false); From 993e88374448ccca57b4ba061941c696c991bc2f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 6 Apr 2024 12:45:52 +0200 Subject: [PATCH 15/24] Run jit-format --- src/coreclr/jit/codegen.h | 8 ++++---- src/coreclr/jit/codegencommon.cpp | 34 +++++++++++++++++-------------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 0f5c8e76631d85..c78f39fe29b94d 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -261,13 +261,13 @@ class CodeGen final : public CodeGenInterface // Prolog functions and data (there are a few exceptions for more generally used things) // - void genEstablishFramePointer(int delta, bool reportUnwindData); - void genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed); + void genEstablishFramePointer(int delta, bool reportUnwindData); + void genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed); regMaskTP genGetParameterHomingTempRegisterCandidates(); var_types genParamStackStoreType(LclVarDsc* dsc, const ABIPassingSegment& seg); - void genSpillOrAddRegisterParam(unsigned lclNum, class RegGraph* graph); - void genEnregisterIncomingStackArgs(); + void genSpillOrAddRegisterParam(unsigned lclNum, class RegGraph* graph); + void genEnregisterIncomingStackArgs(); #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) void genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed); #else diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index f2390f94095b25..127f5d5a701d92 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2832,7 +2832,9 @@ class RegGraph ArrayStack m_nodes; public: - RegGraph(Compiler* compiler) : m_comp(compiler), m_nodes(compiler->getAllocator(CMK_Codegen)) + RegGraph(Compiler* compiler) + : m_comp(compiler) + , m_nodes(compiler->getAllocator(CMK_Codegen)) { } @@ -2855,11 +2857,11 @@ class RegGraph RegNode* node = Get(reg); if (node == nullptr) { - node = new (m_comp, CMK_Codegen) RegNode; - node->reg = reg; + node = new (m_comp, CMK_Codegen) RegNode; + node->reg = reg; node->copiedReg = REG_NA; - node->incoming = nullptr; - node->outgoing = nullptr; + node->incoming = nullptr; + node->outgoing = nullptr; m_nodes.Push(node); } return node; @@ -2876,7 +2878,7 @@ class RegGraph // We currently never have multiple outgoing edges. assert(from->outgoing == nullptr); - from->outgoing = edge; + from->outgoing = edge; edge->nextIncoming = to->incoming; to->incoming = edge; @@ -3047,7 +3049,7 @@ void CodeGen::genSpillOrAddRegisterParam(unsigned lclNum, RegGraph* graph) } GetEmitter()->emitIns_S_R(ins_Store(storeType), emitActualTypeSize(storeType), seg.GetRegister(), lclNum, - seg.Offset - baseOffset); + seg.Offset - baseOffset); } if (!varDsc->lvIsInReg()) @@ -3076,7 +3078,7 @@ void CodeGen::genSpillOrAddRegisterParam(unsigned lclNum, RegGraph* graph) graph->AddEdge(sourceReg, destReg, TYP_FLOAT, 0); sourceReg = graph->GetOrAdd(REG_NEXT(sourceReg->reg)); - destReg = graph->GetOrAdd(REG_NEXT(destReg->reg)); + destReg = graph->GetOrAdd(REG_NEXT(destReg->reg)); graph->AddEdge(sourceReg, destReg, TYP_FLOAT, 0); continue; } @@ -3177,12 +3179,12 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) if ((node->outgoing != nullptr) && (node->copiedReg == REG_NA)) { - var_types copyType = node->outgoing->type; + var_types copyType = node->outgoing->type; regMaskTP tempRegCandidates = genGetParameterHomingTempRegisterCandidates(); tempRegCandidates &= ~busyRegs; regMaskTP regTypeMask = varTypeUsesFloatReg(copyType) ? RBM_ALLFLOAT : RBM_ALLINT; - regMaskTP availRegs = tempRegCandidates & regTypeMask; + regMaskTP availRegs = tempRegCandidates & regTypeMask; // We should have ensured temporary registers are available in // genFinalizeFrame. @@ -3191,7 +3193,8 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) busyRegs |= genRegMask(node->copiedReg); instruction ins = ins_Copy(node->reg, copyType); - GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(copyType), node->copiedReg, node->reg, /* canSkip */ false); + GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(copyType), node->copiedReg, node->reg, + /* canSkip */ false); if (node->copiedReg == initReg) { *initRegStillZeroed = false; @@ -3206,10 +3209,10 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) continue; } - regNumber sourceReg = edge->from->copiedReg != REG_NA ? edge->from->copiedReg : edge->from->reg; - instruction ins = ins_Copy(sourceReg, genActualType(edge->type)); + regNumber sourceReg = edge->from->copiedReg != REG_NA ? edge->from->copiedReg : edge->from->reg; + instruction ins = ins_Copy(sourceReg, genActualType(edge->type)); GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(edge->type), node->reg, sourceReg, - /* canSkip */ true); + /* canSkip */ true); break; } @@ -3255,7 +3258,8 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) regMaskTP CodeGen::genGetParameterHomingTempRegisterCandidates() { - return RBM_CALLEE_TRASH | intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn | regSet.rsGetModifiedRegsMask(); + return RBM_CALLEE_TRASH | intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn | + regSet.rsGetModifiedRegsMask(); } #endif From 85fd6675b531d8517651d03233d1e1db9efbd196 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 6 Apr 2024 12:50:31 +0200 Subject: [PATCH 16/24] Fix release build --- src/coreclr/jit/lsra.h | 4 ++++ src/coreclr/jit/lsrabuild.cpp | 2 ++ 2 files changed, 6 insertions(+) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index ed54facc040f5f..ce42edf8be7e9b 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1023,6 +1023,10 @@ class LinearScan : public LinearScanInterface { return false; } + bool stressInitialParamReg() + { + return false; + } #endif // !DEBUG public: diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index abbd61fd339459..02ca427a97543e 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2420,6 +2420,7 @@ void LinearScan::buildIntervals() intRegState->rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM; } +#ifdef DEBUG if (stressInitialParamReg()) { CLRRandom rng; @@ -2468,6 +2469,7 @@ void LinearScan::buildIntervals() interval->mergeRegisterPreferences(genRegMask(prefReg)); } } +#endif numPlacedArgLocals = 0; placedArgRegs = RBM_NONE; From 196c7d78a37b068b08cda3b55442b55f6c8c0b30 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 6 Apr 2024 13:54:07 +0200 Subject: [PATCH 17/24] Fix RISC-V build --- src/coreclr/jit/codegencommon.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 127f5d5a701d92..12744593fadab5 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3256,14 +3256,24 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) } } +#endif + +// ----------------------------------------------------------------------------- +// genGetParameterHomingTempRegisterCandidates: Get the registers that are +// usable during register homing. +// +// Remarks: +// Register homing is expected to take into account that values in some of +// these registers may still be needed. For example because it is the final +// destination register of a parameter, or because a value passed in one of +// these registers is still needed. +// regMaskTP CodeGen::genGetParameterHomingTempRegisterCandidates() { return RBM_CALLEE_TRASH | intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn | regSet.rsGetModifiedRegsMask(); } -#endif - /***************************************************************************** * If any incoming stack arguments live in registers, load them. */ From f271cc51e58b24a4f1060b4b45e329ab60c36074 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 6 Apr 2024 14:41:33 +0200 Subject: [PATCH 18/24] Move new stress logic into its own function in LSRA --- src/coreclr/jit/lsra.h | 2 + src/coreclr/jit/lsrabuild.cpp | 104 +++++++++++++++++++--------------- 2 files changed, 61 insertions(+), 45 deletions(-) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index ce42edf8be7e9b..9ecef34db3679d 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1609,6 +1609,8 @@ class LinearScan : public LinearScanInterface RegisterScore registerScore = NONE); void validateIntervals(); + + void stressSetRandomParameterPreferences(); #endif // DEBUG #if TRACK_LSRA_STATS diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 02ca427a97543e..4ec80f3f9d217f 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2423,51 +2423,7 @@ void LinearScan::buildIntervals() #ifdef DEBUG if (stressInitialParamReg()) { - CLRRandom rng; - rng.Init(compiler->info.compMethodHash()); - regMaskTP intRegs = intRegState->rsCalleeRegArgMaskLiveIn; - regMaskTP floatRegs = floatRegState->rsCalleeRegArgMaskLiveIn; - - for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++) - { - LclVarDsc* argDsc = compiler->lvaGetDescByTrackedIndex(varIndex); - - if (!argDsc->lvIsParam || !isCandidateVar(argDsc)) - { - continue; - } - - Interval* interval = getIntervalForLocalVar(varIndex); - - regMaskTP* regs; - if (interval->registerType == FloatRegisterType) - { - regs = &floatRegs; - } - else - { - regs = &intRegs; - } - - // Select a random register from all possible parameter registers - // (of the right type). Preference this parameter to that register. - unsigned numBits = BitOperations::PopCount(*regs); - if (numBits == 0) - { - continue; - } - - int bitIndex = rng.Next((int)numBits); - regNumber prefReg = REG_NA; - regMaskTP regsLeft = *regs; - for (int i = 0; i <= bitIndex; i++) - { - prefReg = genFirstRegNumFromMaskAndToggle(regsLeft); - } - - *regs &= ~genRegMask(prefReg); - interval->mergeRegisterPreferences(genRegMask(prefReg)); - } + stressSetRandomParameterPreferences(); } #endif @@ -2920,6 +2876,64 @@ void LinearScan::buildIntervals() } #ifdef DEBUG + +//------------------------------------------------------------------------ +// stressSetRandomParameterPreferences: Randomize preferences of parameter +// intervals. +// +// Remarks: +// The intention of this stress is to make the parameter homing logic in +// genHomeRegisterParams see harder cases. +// +void LinearScan::stressSetRandomParameterPreferences() +{ + CLRRandom rng; + rng.Init(compiler->info.compMethodHash()); + regMaskTP intRegs = intRegState->rsCalleeRegArgMaskLiveIn; + regMaskTP floatRegs = floatRegState->rsCalleeRegArgMaskLiveIn; + + for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++) + { + LclVarDsc* argDsc = compiler->lvaGetDescByTrackedIndex(varIndex); + + if (!argDsc->lvIsParam || !isCandidateVar(argDsc)) + { + continue; + } + + Interval* interval = getIntervalForLocalVar(varIndex); + + regMaskTP* regs; + if (interval->registerType == FloatRegisterType) + { + regs = &floatRegs; + } + else + { + regs = &intRegs; + } + + // Select a random register from all possible parameter registers + // (of the right type). Preference this parameter to that register. + unsigned numBits = BitOperations::PopCount(*regs); + if (numBits == 0) + { + continue; + } + + int bitIndex = rng.Next((int)numBits); + regNumber prefReg = REG_NA; + regMaskTP regsLeft = *regs; + for (int i = 0; i <= bitIndex; i++) + { + prefReg = genFirstRegNumFromMaskAndToggle(regsLeft); + } + + *regs &= ~genRegMask(prefReg); + interval->mergeRegisterPreferences(genRegMask(prefReg)); + } +} + //------------------------------------------------------------------------ // validateIntervals: A DEBUG-only method that checks that: // - the lclVar RefPositions do not reflect uses of undefined values From 9d7bbbef4cce43fe1898f3c6d4626d836552238c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 6 Apr 2024 17:13:26 +0200 Subject: [PATCH 19/24] Disable stress mode by default --- src/coreclr/jit/lsra.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 9ecef34db3679d..797c9d69c91d8f 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -965,7 +965,7 @@ class LinearScan : public LinearScanInterface bool stressInitialParamReg() { - return true; // compiler->compStressCompile(Compiler::STRESS_INITIAL_PARAM_REG, 25); + return compiler->compStressCompile(Compiler::STRESS_INITIAL_PARAM_REG, 25); } // Dump support From 22acb2a4637c105a4c1e8dcda9caa103ed2443b6 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 6 Apr 2024 18:12:36 +0200 Subject: [PATCH 20/24] Fix build, hopefully last cleanup --- src/coreclr/jit/codegencommon.cpp | 22 +++++++--------------- src/coreclr/jit/lsrabuild.cpp | 4 ++-- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 12744593fadab5..e4f0265b84bfe6 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2838,7 +2838,7 @@ class RegGraph { } - RegNode* Get(regNumber reg) + RegNode* GetOrAdd(regNumber reg) { for (int i = 0; i < m_nodes.Height(); i++) { @@ -2849,21 +2849,13 @@ class RegGraph } } - return nullptr; - } + RegNode* node = new (m_comp, CMK_Codegen) RegNode; + node->reg = reg; + node->copiedReg = REG_NA; + node->incoming = nullptr; + node->outgoing = nullptr; - RegNode* GetOrAdd(regNumber reg) - { - RegNode* node = Get(reg); - if (node == nullptr) - { - node = new (m_comp, CMK_Codegen) RegNode; - node->reg = reg; - node->copiedReg = REG_NA; - node->incoming = nullptr; - node->outgoing = nullptr; - m_nodes.Push(node); - } + m_nodes.Push(node); return node; } diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 4ec80f3f9d217f..c64faa9c194744 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2889,8 +2889,8 @@ void LinearScan::stressSetRandomParameterPreferences() { CLRRandom rng; rng.Init(compiler->info.compMethodHash()); - regMaskTP intRegs = intRegState->rsCalleeRegArgMaskLiveIn; - regMaskTP floatRegs = floatRegState->rsCalleeRegArgMaskLiveIn; + regMaskTP intRegs = compiler->codeGen->intRegState.rsCalleeRegArgMaskLiveIn; + regMaskTP floatRegs = compiler->codeGen->floatRegState.rsCalleeRegArgMaskLiveIn; for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++) { From 335500299f3b500135116830a6472124b6f7dd80 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 7 Apr 2024 00:01:07 +0200 Subject: [PATCH 21/24] Add function headers and fix a comment --- src/coreclr/jit/codegencommon.cpp | 60 +++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index e4f0265b84bfe6..7e1afdb5b38626 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2838,6 +2838,16 @@ class RegGraph { } + // ----------------------------------------------------------------------------- + // GetOrAdd: Find (or create) the node representing a register. + // + // Parameters: + // reg - Register + // + // Returns: + // Node in the graph that represents "reg". If no node exists it is + // created. + // RegNode* GetOrAdd(regNumber reg) { for (int i = 0; i < m_nodes.Height(); i++) @@ -2859,6 +2869,16 @@ class RegGraph return node; } + // ----------------------------------------------------------------------------- + // AddEdge: Add an edge to the graph, indicating that data needs to be + // moved from one register to another. + // + // Parameters: + // from - The source register node + // to - The destination register node + // type - The type of the data that is being moved from the source into the destination + // destOffset - The offset in the destination register where the data should be put + // void AddEdge(RegNode* from, RegNode* to, var_types type, unsigned destOffset) { assert(type != TYP_STRUCT); @@ -2876,6 +2896,13 @@ class RegGraph to->incoming = edge; } + // ----------------------------------------------------------------------------- + // FindNodeToHandle: Find the next register node to handle incoming moves to. + // + // Returns: + // A register node to handle, based on heuristics that try to reduce the + // amount of shuffling that needs to happen. + // RegNode* FindNodeToHandle() { RegNode* lastNode = nullptr; @@ -2898,9 +2925,22 @@ class RegGraph lastNode = reg; } + // Otherwise we'll need to save some value regardless, so any node will + // do. return lastNode; } + // ----------------------------------------------------------------------------- + // RemoveIncomingEdges: Mark that the incoming edges of a register nodes + // have been handled by deleting all its incoming edges from the graph. + // + // Parameters: + // node - The register node that has been handled and now contains its correct value + // busyRegs - [in, out] Pointer to register mask of registers that have live values we may need. + // This function may remove registers from this set since the source registers of the + // incoming edges no longer have outgoing edges and thus do not need to have their values + // preserved. + // void RemoveIncomingEdges(RegNode* node, regMaskTP* busyRegs) { for (RegNodeEdge* edge = node->incoming; edge != nullptr; edge = edge->nextIncoming) @@ -2920,6 +2960,9 @@ class RegGraph } #ifdef DEBUG + // ----------------------------------------------------------------------------- + // Dump: Dump a textual representation of the graph to jitstdout. + // void Dump() { printf("%d registers in register parameter interference graph\n", m_nodes.Height()); @@ -3081,11 +3124,11 @@ void CodeGen::genSpillOrAddRegisterParam(unsigned lclNum, RegGraph* graph) } // ----------------------------------------------------------------------------- -// genHomeIncomingRegisters: Move all register arguments to their initial assigned -// location. +// genHomeRegisterParams: Move all register parameters to their initial +// assigned location. // // Parameters: -// initReg - A register that this method should communicate if it trashes +// initReg - A register that this method should communicate if it becomes non-zero // initRegStillZeroed - [out] whether or not initReg is still zeroed // void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) @@ -3129,15 +3172,12 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) // We build an interference graph where each node represents a register, // and an edge regX -> regY represents moving (part of) register X to (part // of) register Y. Note that in the general case each register can have - // multiple incoming edges. Examples: - // 1. On arm32 float registers overlay the double registers. If a double is passed in d0 (i.e. s0 and s1) - // we can have two float params going into s0 and s1, giving two incoming edges - // 2. On arm64/SysV x64 SIMD types can be passed in multiple registers but - // enregistered in a single vector register + // multiple incoming edges. For example, on arm64/SysV x64 SIMD types can + // be passed in multiple registers but enregistered in a single vector + // register. // Currently we never have multiple outgoing edges but one could imagine // this situation if we allowed promotion when fields didn't fit exactly on - // top of the underlying registers. TODO-CQ: Lift this restriction and - // implement the support for this. + // top of the underlying registers. RegGraph graph(compiler); for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++) From bbdc3d086b2bee0c552df308071c28b6ecfff35c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 7 Apr 2024 12:57:04 +0200 Subject: [PATCH 22/24] Add arm32 double move optimization --- src/coreclr/jit/codegencommon.cpp | 81 +++++++++++++++++++++++++++---- 1 file changed, 71 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 7e1afdb5b38626..e49e8d79634510 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2839,16 +2839,16 @@ class RegGraph } // ----------------------------------------------------------------------------- - // GetOrAdd: Find (or create) the node representing a register. + // GetOrAdd: Find the node representing a register. // // Parameters: // reg - Register // // Returns: - // Node in the graph that represents "reg". If no node exists it is - // created. + // Node in the graph that represents "reg". Returns nullptr if no such + // node exists. // - RegNode* GetOrAdd(regNumber reg) + RegNode* Get(regNumber reg) { for (int i = 0; i < m_nodes.Height(); i++) { @@ -2859,13 +2859,33 @@ class RegGraph } } - RegNode* node = new (m_comp, CMK_Codegen) RegNode; - node->reg = reg; - node->copiedReg = REG_NA; - node->incoming = nullptr; - node->outgoing = nullptr; + return nullptr; + } + + // ----------------------------------------------------------------------------- + // GetOrAdd: Find (or create) the node representing a register. + // + // Parameters: + // reg - Register + // + // Returns: + // Node in the graph that represents "reg". If no node exists it is + // created. + // + RegNode* GetOrAdd(regNumber reg) + { + RegNode* node = Get(reg); + + if (node == nullptr) + { + node = new (m_comp, CMK_Codegen) RegNode; + node->reg = reg; + node->copiedReg = REG_NA; + node->incoming = nullptr; + node->outgoing = nullptr; + m_nodes.Push(node); + } - m_nodes.Push(node); return node; } @@ -3209,6 +3229,47 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) break; } + assert(node->incoming != nullptr); + +#ifdef TARGET_ARM + // As an optimization on arm32 we handle the easy double move cases in + // a single move. + if (genIsValidFloatReg(node->reg) && (node->incoming->nextIncoming == nullptr) && (node->outgoing == nullptr) && + (node->incoming->from->copiedReg == REG_NA)) + { + RegNode* otherReg; + RegNode* lowReg; + RegNode* highReg; + + if (genIsValidDoubleReg(node->reg)) + { + otherReg = graph.Get(REG_NEXT(node->reg)); + lowReg = node; + highReg = otherReg; + } + else + { + otherReg = graph.Get(REG_PREV(node->reg)); + lowReg = otherReg; + highReg = node; + } + + if ((otherReg != nullptr) && (otherReg->incoming != nullptr) && + (otherReg->incoming->nextIncoming == nullptr) && (otherReg->incoming->from->copiedReg == REG_NA) && + (otherReg->outgoing == nullptr) && genIsValidDoubleReg(lowReg->incoming->from->reg) && + (highReg->incoming->from->reg == REG_NEXT(lowReg->incoming->from->reg))) + { + instruction ins = ins_Copy(lowReg->incoming->from->reg, TYP_DOUBLE); + GetEmitter()->emitIns_Mov(ins, EA_8BYTE, lowReg->reg, lowReg->incoming->from->reg, false); + graph.RemoveIncomingEdges(lowReg, &busyRegs); + graph.RemoveIncomingEdges(highReg, &busyRegs); + busyRegs |= genRegMask(lowReg->reg) | genRegMask(highReg->reg); + assert((lowReg->reg != initReg) && (highReg->reg != initReg)); + continue; + } + } +#endif + if ((node->outgoing != nullptr) && (node->copiedReg == REG_NA)) { var_types copyType = node->outgoing->type; From 6ab674995869c4820e47bea888cf0d1643ccda26 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 8 Apr 2024 22:20:33 +0200 Subject: [PATCH 23/24] Rename FindNodeToHandle -> FindNodeToProcess --- src/coreclr/jit/codegencommon.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index e49e8d79634510..b78d96cae0ac54 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2839,7 +2839,7 @@ class RegGraph } // ----------------------------------------------------------------------------- - // GetOrAdd: Find the node representing a register. + // Get: Find the node representing a register. // // Parameters: // reg - Register @@ -2917,13 +2917,13 @@ class RegGraph } // ----------------------------------------------------------------------------- - // FindNodeToHandle: Find the next register node to handle incoming moves to. + // FindNodeToProcess: Find the next register node to process incoming moves to. // // Returns: - // A register node to handle, based on heuristics that try to reduce the + // A register node to process, based on heuristics that try to reduce the // amount of shuffling that needs to happen. // - RegNode* FindNodeToHandle() + RegNode* FindNodeToProcess() { RegNode* lastNode = nullptr; @@ -3223,7 +3223,7 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) regMaskTP busyRegs = intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn; while (true) { - RegNode* node = graph.FindNodeToHandle(); + RegNode* node = graph.FindNodeToProcess(); if (node == nullptr) { break; From 75126d72893ccaa9e3a2507db46c6e39c846b9f9 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 9 Apr 2024 11:00:39 +0200 Subject: [PATCH 24/24] Switch lvaParameterPassingInfo accesses to lvaGetParameterABIInfo --- src/coreclr/jit/codegencommon.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index ad8341fd726eaf..1edfb2ea124a72 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3073,7 +3073,7 @@ void CodeGen::genSpillOrAddRegisterParam(unsigned lclNum, RegGraph* graph) unsigned paramLclNum = varDsc->lvIsStructField ? varDsc->lvParentLcl : lclNum; LclVarDsc* paramVarDsc = compiler->lvaGetDesc(paramLclNum); - const ABIPassingInformation& abiInfo = compiler->lvaParameterPassingInfo[paramLclNum]; + const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(paramLclNum); for (unsigned i = 0; i < abiInfo.NumSegments; i++) { const ABIPassingSegment& seg = abiInfo.Segments[i]; @@ -3171,7 +3171,7 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) continue; } - const ABIPassingInformation& abiInfo = compiler->lvaParameterPassingInfo[lclNum]; + const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(lclNum); for (unsigned i = 0; i < abiInfo.NumSegments; i++) { const ABIPassingSegment& seg = abiInfo.Segments[i];