diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 60c8041555930c..576b39f748fbfc 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6801,11 +6801,6 @@ class Compiler optMethodFlags |= OMF_HAS_GUARDEDDEVIRT; } - void clearMethodHasGuardedDevirtualization() - { - optMethodFlags &= ~OMF_HAS_GUARDEDDEVIRT; - } - void considerGuardedDevirtualization(GenTreeCall* call, IL_OFFSET ilOffset, bool isInterface, @@ -7300,10 +7295,20 @@ class Compiler struct LoopCloneVisitorInfo { LoopCloneContext* context; - unsigned loopNum; Statement* stmt; - LoopCloneVisitorInfo(LoopCloneContext* context, unsigned loopNum, Statement* stmt) - : context(context), loopNum(loopNum), stmt(nullptr) + const unsigned loopNum; + const bool cloneForArrayBounds; + const bool cloneForTypeTests; + LoopCloneVisitorInfo(LoopCloneContext* context, + unsigned loopNum, + Statement* stmt, + bool cloneForArrayBounds, + bool cloneForTypeTests) + : context(context) + , stmt(nullptr) + , loopNum(loopNum) + , cloneForArrayBounds(cloneForArrayBounds) + , cloneForTypeTests(cloneForTypeTests) { } }; diff --git a/src/coreclr/jit/indirectcalltransformer.cpp b/src/coreclr/jit/indirectcalltransformer.cpp index 52ad8090f314e2..14242b8f718b90 100644 --- a/src/coreclr/jit/indirectcalltransformer.cpp +++ b/src/coreclr/jit/indirectcalltransformer.cpp @@ -1204,7 +1204,6 @@ Compiler::fgWalkResult Compiler::fgDebugCheckForTransformableIndirectCalls(GenTr void Compiler::CheckNoTransformableIndirectCallsRemain() { assert(!doesMethodHaveFatPointer()); - assert(!doesMethodHaveGuardedDevirtualization()); assert(!doesMethodHaveExpRuntimeLookup()); for (BasicBlock* const block : Blocks()) @@ -1244,7 +1243,6 @@ PhaseStatus Compiler::fgTransformIndirectCalls() } clearMethodHasFatPointer(); - clearMethodHasGuardedDevirtualization(); clearMethodHasExpRuntimeLookup(); } else diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 29e977cf1d8c9b..e125dfa41f5efb 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -42,6 +42,8 @@ CONFIG_INTEGER(JitBreakOnBadCode, W("JitBreakOnBadCode"), 0) CONFIG_INTEGER(JitBreakOnMinOpts, W("JITBreakOnMinOpts"), 0) // Halt if jit switches to MinOpts CONFIG_INTEGER(JitBreakOnUnsafeCode, W("JitBreakOnUnsafeCode"), 0) CONFIG_INTEGER(JitCloneLoops, W("JitCloneLoops"), 1) // If 0, don't clone. Otherwise clone loops for optimizations. +CONFIG_INTEGER(JitCloneLoopsWithTypeTests, W("JitCloneLoopsWithTypeTests"), 1) // If 0, don't clone loops based on + // invariant type tests CONFIG_INTEGER(JitDebugLogLoopCloning, W("JitDebugLogLoopCloning"), 0) // In debug builds log places where loop cloning // optimizations are performed on the fast path. CONFIG_INTEGER(JitDefaultFill, W("JitDefaultFill"), 0xdd) // In debug builds, initialize the memory allocated by the nra diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 1713db4979f552..459f577f02dbf8 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -129,11 +129,19 @@ GenTree* LC_Ident::ToGenTree(Compiler* comp, BasicBlock* bb) assert(constant <= INT32_MAX); return comp->gtNewIconNode(constant); case Var: - return comp->gtNewLclvNode(constant, comp->lvaTable[constant].lvType); + return comp->gtNewLclvNode(lclNum, comp->lvaTable[lclNum].lvType); case ArrLen: return arrLen.ToGenTree(comp, bb); case Null: return comp->gtNewIconNode(0, TYP_REF); + case ClassHandle: + return comp->gtNewIconHandleNode((size_t)clsHnd, GTF_ICON_CLASS_HDL); + case Indir: + { + GenTree* const indir = comp->gtNewIndir(TYP_I_IMPL, comp->gtNewLclvNode(lclNum, TYP_REF)); + indir->gtFlags |= GTF_IND_INVARIANT; + return indir; + } default: assert(!"Could not convert LC_Ident to GenTree"); unreached(); @@ -353,21 +361,39 @@ JitExpandArrayStack* LoopCloneContext::GetConditions(unsigned loop } //-------------------------------------------------------------------------------------------------- -// EnsureDerefs - Ensure an array of dereferences is created if it doesn't exist. +// EnsureArrayDerefs - Ensure an array of array dereferences is created if it doesn't exist. // // Arguments: // loopNum the loop index. // // Return Values: -// The array of dereferences for the loop. +// The array of array dereferences for the loop. // -JitExpandArrayStack* LoopCloneContext::EnsureDerefs(unsigned loopNum) +JitExpandArrayStack* LoopCloneContext::EnsureArrayDerefs(unsigned loopNum) { - if (derefs[loopNum] == nullptr) + if (arrayDerefs[loopNum] == nullptr) { - derefs[loopNum] = new (alloc) JitExpandArrayStack(alloc, 4); + arrayDerefs[loopNum] = new (alloc) JitExpandArrayStack(alloc, 4); } - return derefs[loopNum]; + return arrayDerefs[loopNum]; +} + +//-------------------------------------------------------------------------------------------------- +// EnsureObjDerefs - Ensure an array of object dereferences is created if it doesn't exist. +// +// Arguments: +// loopNum the loop index. +// +// Return Values: +// The array of object dereferences for the loop. +// +JitExpandArrayStack* LoopCloneContext::EnsureObjDerefs(unsigned loopNum) +{ + if (objDerefs[loopNum] == nullptr) + { + objDerefs[loopNum] = new (alloc) JitExpandArrayStack(alloc, 4); + } + return objDerefs[loopNum]; } //-------------------------------------------------------------------------------------------------- @@ -862,7 +888,7 @@ BasicBlock* LoopCloneContext::CondToStmtInBlock(Compiler* // Return Values: // The local variable in the node's level. // -unsigned LC_Deref::Lcl() +unsigned LC_ArrayDeref::Lcl() { unsigned lvl = level; if (lvl == 0) @@ -882,7 +908,7 @@ unsigned LC_Deref::Lcl() // Return Values: // Return true if children are present. // -bool LC_Deref::HasChildren() +bool LC_ArrayDeref::HasChildren() { return children != nullptr && children->Size() > 0; } @@ -901,7 +927,7 @@ bool LC_Deref::HasChildren() // Return Values: // None // -void LC_Deref::DeriveLevelConditions(JitExpandArrayStack*>* conds) +void LC_ArrayDeref::DeriveLevelConditions(JitExpandArrayStack*>* conds) { if (level == 0) { @@ -943,11 +969,11 @@ void LC_Deref::DeriveLevelConditions(JitExpandArrayStack(alloc); + children = new (alloc) JitExpandArrayStack(alloc); } } @@ -960,7 +986,7 @@ void LC_Deref::EnsureChildren(CompAllocator alloc) // Return Values: // The child node if found or nullptr. // -LC_Deref* LC_Deref::Find(unsigned lcl) +LC_ArrayDeref* LC_ArrayDeref::Find(unsigned lcl) { return Find(children, lcl); } @@ -977,7 +1003,7 @@ LC_Deref* LC_Deref::Find(unsigned lcl) // // static -LC_Deref* LC_Deref::Find(JitExpandArrayStack* children, unsigned lcl) +LC_ArrayDeref* LC_ArrayDeref::Find(JitExpandArrayStack* children, unsigned lcl) { if (children == nullptr) { @@ -1022,200 +1048,249 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext JITDUMP("------------------------------------------------------------\n"); JITDUMP("Deriving cloning conditions for " FMT_LP "\n", loopNum); - LoopDsc* loop = &optLoopTable[loopNum]; - JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loopNum); - bool isIncreasingLoop = loop->lpIsIncreasingLoop(); + LoopDsc* loop = &optLoopTable[loopNum]; + JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loopNum); + assert(optInfos->Size() > 0); + + // We only need to check for iteration behavior if we have array checks. + // + bool checkIterationBehavior = false; + + for (unsigned i = 0; i < optInfos->Size(); ++i) + { + LcOptInfo* const optInfo = optInfos->Get(i); + switch (optInfo->GetOptType()) + { + case LcOptInfo::LcJaggedArray: + case LcOptInfo::LcMdArray: + checkIterationBehavior = true; + break; + + case LcOptInfo::LcTypeTest: + { + LcTypeTestOptInfo* ttInfo = optInfo->AsLcTypeTestOptInfo(); + LC_Ident objDeref = LC_Ident(ttInfo->lclNum, LC_Ident::Indir); + LC_Ident methodTable = LC_Ident(ttInfo->clsHnd, LC_Ident::ClassHandle); + LC_Condition cond(GT_EQ, LC_Expr(objDeref), LC_Expr(methodTable)); + context->EnsureObjDerefs(loopNum)->Push(objDeref); + context->EnsureConditions(loopNum)->Push(cond); + break; + } + + default: + JITDUMP("Unknown opt\n"); + return false; + } + } + + if (!checkIterationBehavior) + { + // No array conditions here, so we're done + // + JITDUMP("Conditions: "); + DBEXEC(verbose, context->PrintConditions(loopNum)); + JITDUMP("\n"); + return true; + } + + // Note we see cases where the test oper is NE (array.Len) which we could handle + // with some extra care. + // + if (!GenTree::StaticOperIs(loop->lpTestOper(), GT_LT, GT_LE, GT_GT, GT_GE)) + { + // We can't reason about how this loop iterates + return false; + } + + const bool isIncreasingLoop = loop->lpIsIncreasingLoop(); assert(isIncreasingLoop || loop->lpIsDecreasingLoop()); - if (GenTree::StaticOperIs(loop->lpTestOper(), GT_LT, GT_LE, GT_GT, GT_GE)) + // We already know that this is either increasing or decreasing loop and the + // stride is (> 0) or (< 0). Here, just take the abs() value and check if it + // is beyond the limit. + int stride = abs(loop->lpIterConst()); + + if (stride >= 58) { - // We already know that this is either increasing or decreasing loop and the - // stride is (> 0) or (< 0). Here, just take the abs() value and check if it - // is beyond the limit. - int stride = abs(loop->lpIterConst()); + // Array.MaxLength can have maximum of 0X7FFFFFC7 elements, so make sure + // the stride increment doesn't overflow or underflow the index. Hence, + // the maximum stride limit is set to + // (int.MaxValue - (Array.MaxLength - 1) + 1), which is + // (0X7fffffff - 0x7fffffc7 + 2) = 0x3a or 58. + return false; + } - if (stride >= 58) + LC_Ident ident; + // Init conditions + if (loop->lpFlags & LPFLG_CONST_INIT) + { + // Only allowing non-negative const init at this time. + // This is because the variable initialized with this constant will be used as an array index, + // and array indices must be non-negative. + if (loop->lpConstInit < 0) { - // Array.MaxLength can have maximum of 0X7FFFFFC7 elements, so make sure - // the stride increment doesn't overflow or underflow the index. Hence, - // the maximum stride limit is set to - // (int.MaxValue - (Array.MaxLength - 1) + 1), which is - // (0X7fffffff - 0x7fffffc7 + 2) = 0x3a or 58. + JITDUMP("> Init %d is invalid\n", loop->lpConstInit); return false; } - LC_Ident ident; - // Init conditions - if (loop->lpFlags & LPFLG_CONST_INIT) + if (!isIncreasingLoop) { - // Only allowing non-negative const init at this time. - // This is because the variable initialized with this constant will be used as an array index, - // and array indices must be non-negative. - if (loop->lpConstInit < 0) - { - JITDUMP("> Init %d is invalid\n", loop->lpConstInit); - return false; - } - - if (!isIncreasingLoop) - { - // For decreasing loop, the init value needs to be checked against the array length - ident = LC_Ident(static_cast(loop->lpConstInit), LC_Ident::Const); - } + // For decreasing loop, the init value needs to be checked against the array length + ident = LC_Ident(static_cast(loop->lpConstInit), LC_Ident::Const); } - else + } + else + { + // iterVar >= 0 + const unsigned initLcl = loop->lpIterVar(); + if (!genActualTypeIsInt(lvaGetDesc(initLcl))) { - // iterVar >= 0 - const unsigned initLcl = loop->lpIterVar(); - if (!genActualTypeIsInt(lvaGetDesc(initLcl))) - { - JITDUMP("> Init var V%02u not compatible with TYP_INT\n", initLcl); - return false; - } - - LC_Condition geZero; - if (isIncreasingLoop) - { - geZero = LC_Condition(GT_GE, LC_Expr(LC_Ident(initLcl, LC_Ident::Var)), - LC_Expr(LC_Ident(0, LC_Ident::Const))); - } - else - { - // For decreasing loop, the init value needs to be checked against the array length - ident = LC_Ident(initLcl, LC_Ident::Var); - geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const))); - } - context->EnsureConditions(loopNum)->Push(geZero); + JITDUMP("> Init var V%02u not compatible with TYP_INT\n", initLcl); + return false; } - // Limit Conditions - if (loop->lpFlags & LPFLG_CONST_LIMIT) + LC_Condition geZero; + if (isIncreasingLoop) { - int limit = loop->lpConstLimit(); - if (limit < 0) - { - JITDUMP("> limit %d is invalid\n", limit); - return false; - } - - if (isIncreasingLoop) - { - // For increasing loop, thelimit value needs to be checked against the array length - ident = LC_Ident(static_cast(limit), LC_Ident::Const); - } + geZero = + LC_Condition(GT_GE, LC_Expr(LC_Ident(initLcl, LC_Ident::Var)), LC_Expr(LC_Ident(0u, LC_Ident::Const))); } - else if (loop->lpFlags & LPFLG_VAR_LIMIT) + else { - const unsigned limitLcl = loop->lpVarLimit(); - if (!genActualTypeIsInt(lvaGetDesc(limitLcl))) - { - JITDUMP("> Limit var V%02u not compatible with TYP_INT\n", limitLcl); - return false; - } + // For decreasing loop, the init value needs to be checked against the array length + ident = LC_Ident(initLcl, LC_Ident::Var); + geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0u, LC_Ident::Const))); + } + context->EnsureConditions(loopNum)->Push(geZero); + } - LC_Condition geZero; - if (isIncreasingLoop) - { - // For increasing loop, thelimit value needs to be checked against the array length - ident = LC_Ident(limitLcl, LC_Ident::Var); - geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const))); - } - else - { - geZero = LC_Condition(GT_GE, LC_Expr(LC_Ident(limitLcl, LC_Ident::Var)), - LC_Expr(LC_Ident(0, LC_Ident::Const))); - } + // Limit Conditions + if (loop->lpFlags & LPFLG_CONST_LIMIT) + { + int limit = loop->lpConstLimit(); + if (limit < 0) + { + JITDUMP("> limit %d is invalid\n", limit); + return false; + } - context->EnsureConditions(loopNum)->Push(geZero); + if (isIncreasingLoop) + { + // For increasing loop, thelimit value needs to be checked against the array length + ident = LC_Ident(static_cast(limit), LC_Ident::Const); } - else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT) + } + else if (loop->lpFlags & LPFLG_VAR_LIMIT) + { + const unsigned limitLcl = loop->lpVarLimit(); + if (!genActualTypeIsInt(lvaGetDesc(limitLcl))) { - ArrIndex* index = new (getAllocator(CMK_LoopClone)) ArrIndex(getAllocator(CMK_LoopClone)); - if (!loop->lpArrLenLimit(this, index)) - { - JITDUMP("> ArrLen not matching\n"); - return false; - } - ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen)); + JITDUMP("> Limit var V%02u not compatible with TYP_INT\n", limitLcl); + return false; + } - // Ensure that this array must be dereference-able, before executing the actual condition. - LC_Array array(LC_Array::Jagged, index, LC_Array::None); - context->EnsureDerefs(loopNum)->Push(array); + LC_Condition geZero; + if (isIncreasingLoop) + { + // For increasing loop, thelimit value needs to be checked against the array length + ident = LC_Ident(limitLcl, LC_Ident::Var); + geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0u, LC_Ident::Const))); } else { - JITDUMP("> Undetected limit\n"); - return false; + geZero = + LC_Condition(GT_GE, LC_Expr(LC_Ident(limitLcl, LC_Ident::Var)), LC_Expr(LC_Ident(0u, LC_Ident::Const))); } - // Increasing loops - // GT_LT loop test: (start < end) ==> (end <= arrLen) - // GT_LE loop test: (start <= end) ==> (end < arrLen) - // - // Decreasing loops - // GT_GT loop test: (end > start) ==> (end <= arrLen) - // GT_GE loop test: (end >= start) ==> (end < arrLen) - genTreeOps opLimitCondition; - switch (loop->lpTestOper()) - { - case GT_LT: - case GT_GT: - opLimitCondition = GT_LE; - break; - case GT_LE: - case GT_GE: - opLimitCondition = GT_LT; - break; - default: - unreached(); + context->EnsureConditions(loopNum)->Push(geZero); + } + else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT) + { + ArrIndex* index = new (getAllocator(CMK_LoopClone)) ArrIndex(getAllocator(CMK_LoopClone)); + if (!loop->lpArrLenLimit(this, index)) + { + JITDUMP("> ArrLen not matching\n"); + return false; } + ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen)); - for (unsigned i = 0; i < optInfos->Size(); ++i) + // Ensure that this array must be dereference-able, before executing the actual condition. + LC_Array array(LC_Array::Jagged, index, LC_Array::None); + context->EnsureArrayDerefs(loopNum)->Push(array); + } + else + { + JITDUMP("> Undetected limit\n"); + return false; + } + + // Increasing loops + // GT_LT loop test: (start < end) ==> (end <= arrLen) + // GT_LE loop test: (start <= end) ==> (end < arrLen) + // + // Decreasing loops + // GT_GT loop test: (end > start) ==> (end <= arrLen) + // GT_GE loop test: (end >= start) ==> (end < arrLen) + genTreeOps opLimitCondition; + switch (loop->lpTestOper()) + { + case GT_LT: + case GT_GT: + opLimitCondition = GT_LE; + break; + case GT_LE: + case GT_GE: + opLimitCondition = GT_LT; + break; + default: + unreached(); + } + + for (unsigned i = 0; i < optInfos->Size(); ++i) + { + LcOptInfo* optInfo = optInfos->Get(i); + switch (optInfo->GetOptType()) { - LcOptInfo* optInfo = optInfos->Get(i); - switch (optInfo->GetOptType()) + case LcOptInfo::LcJaggedArray: { - case LcOptInfo::LcJaggedArray: - { - LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo(); - LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen); - LC_Ident arrLenIdent = LC_Ident(arrLen); - LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(arrLenIdent)); - context->EnsureConditions(loopNum)->Push(cond); - - // Ensure that this array must be dereference-able, before executing the actual condition. - LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None); - context->EnsureDerefs(loopNum)->Push(array); - } - break; - case LcOptInfo::LcMdArray: - { - LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo(); - LC_Array arrLen(LC_Array(LC_Array::MdArray, - mdArrInfo->GetArrIndexForDim(getAllocator(CMK_LoopClone)), mdArrInfo->dim, - LC_Array::None)); - LC_Ident arrLenIdent = LC_Ident(arrLen); - LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(arrLenIdent)); - context->EnsureConditions(loopNum)->Push(cond); - - // TODO: ensure array is dereference-able? - } + LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo(); + LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen); + LC_Ident arrLenIdent = LC_Ident(arrLen); + LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(arrLenIdent)); + context->EnsureConditions(loopNum)->Push(cond); + + // Ensure that this array must be dereference-able, before executing the actual condition. + LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None); + context->EnsureArrayDerefs(loopNum)->Push(array); + } + break; + case LcOptInfo::LcMdArray: + { + LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo(); + LC_Array arrLen(LC_Array(LC_Array::MdArray, mdArrInfo->GetArrIndexForDim(getAllocator(CMK_LoopClone)), + mdArrInfo->dim, LC_Array::None)); + LC_Ident arrLenIdent = LC_Ident(arrLen); + LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(arrLenIdent)); + context->EnsureConditions(loopNum)->Push(cond); + + // TODO: ensure array is dereference-able? + } + break; + case LcOptInfo::LcTypeTest: + // handled above break; - default: - JITDUMP("Unknown opt\n"); - return false; - } + default: + JITDUMP("Unknown opt\n"); + return false; } - - JITDUMP("Conditions: "); - DBEXEC(verbose, context->PrintConditions(loopNum)); - JITDUMP("\n"); - - return true; } - return false; + JITDUMP("Conditions: "); + DBEXEC(verbose, context->PrintConditions(loopNum)); + JITDUMP("\n"); + + return true; } //------------------------------------------------------------------------------------ @@ -1325,26 +1400,34 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext // bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context) { - JitExpandArrayStack nodes(getAllocator(CMK_LoopClone)); - int maxRank = -1; + // Get the dereference-able arrays and objects. + JitExpandArrayStack* const arrayDeref = context->EnsureArrayDerefs(loopNum); + JitExpandArrayStack* const objDeref = context->EnsureObjDerefs(loopNum); - // Get the dereference-able arrays. - JitExpandArrayStack* deref = context->EnsureDerefs(loopNum); + // We currently expect to have at least one of these. + // + assert((arrayDeref->Size() != 0) || (objDeref->Size() != 0)); + // Generate the array dereference checks. + // // For each array in the dereference list, construct a tree, - // where the nodes are array and index variables and an edge 'u-v' + // where the arrayDerefNodes are array and index variables and an edge 'u-v' // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge // 'u-v-w' transitively if u[v][w] occurs. - for (unsigned i = 0; i < deref->Size(); ++i) + // + JitExpandArrayStack arrayDerefNodes(getAllocator(CMK_LoopClone)); + int maxRank = -1; + + for (unsigned i = 0; i < arrayDeref->Size(); ++i) { - LC_Array& array = (*deref)[i]; + LC_Array& array = (*arrayDeref)[i]; // First populate the array base variable. - LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl); + LC_ArrayDeref* node = LC_ArrayDeref::Find(&arrayDerefNodes, array.arrIndex->arrLcl); if (node == nullptr) { - node = new (getAllocator(CMK_LoopClone)) LC_Deref(array, 0 /*level*/); - nodes.Push(node); + node = new (getAllocator(CMK_LoopClone)) LC_ArrayDeref(array, 0 /*level*/); + arrayDerefNodes.Push(node); } // For each dimension (level) for the array, populate the tree with the variable @@ -1353,10 +1436,10 @@ bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* con for (unsigned i = 0; i < rank; ++i) { node->EnsureChildren(getAllocator(CMK_LoopClone)); - LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]); + LC_ArrayDeref* tmp = node->Find(array.arrIndex->indLcls[i]); if (tmp == nullptr) { - tmp = new (getAllocator(CMK_LoopClone)) LC_Deref(array, node->level + 1); + tmp = new (getAllocator(CMK_LoopClone)) LC_ArrayDeref(array, node->level + 1); node->children->Push(tmp); } @@ -1371,45 +1454,68 @@ bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* con #ifdef DEBUG if (verbose) { - printf("Deref condition tree:\n"); - for (unsigned i = 0; i < nodes.Size(); ++i) + if (arrayDerefNodes.Size() > 0) { - nodes[i]->Print(); - printf("\n"); + printf("Array deref condition tree:\n"); + for (unsigned i = 0; i < arrayDerefNodes.Size(); ++i) + { + arrayDerefNodes[i]->Print(); + printf("\n"); + } + } + else + { + printf("No array deref conditions\n"); } } #endif - if (maxRank == -1) + if (arrayDeref->Size() > 0) { - JITDUMP("> maxRank undefined\n"); - return false; - } - - // First level will always yield the null-check, since it is made of the array base variables. - // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null) - // So add 1 after rank * 2. - unsigned condBlocks = (unsigned)maxRank * 2 + 1; + // If we have array derefs we should have set maxRank. + // + assert(maxRank != -1); + + // First level will always yield the null-check, since it is made of the array base variables. + // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null) + // So add 1 after rank * 2. + const unsigned condBlocks = (unsigned)maxRank * 2 + 1; + + // Heuristic to not create too many blocks. Defining as 3 allows, effectively, loop cloning on + // doubly-nested loops. + // REVIEW: make this based on a COMPlus configuration, at least for debug? + const unsigned maxAllowedCondBlocks = 3; + if (condBlocks > maxAllowedCondBlocks) + { + JITDUMP("> Too many condition blocks (%u > %u)\n", condBlocks, maxAllowedCondBlocks); + return false; + } - // Heuristic to not create too many blocks. Defining as 3 allows, effectively, loop cloning on - // doubly-nested loops. - // REVIEW: make this based on a COMPlus configuration, at least for debug? - const unsigned maxAllowedCondBlocks = 3; - if (condBlocks > maxAllowedCondBlocks) - { - JITDUMP("> Too many condition blocks (%u > %u)\n", condBlocks, maxAllowedCondBlocks); - return false; + // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds] + JitExpandArrayStack*>* levelCond = + context->EnsureBlockConditions(loopNum, condBlocks); + for (unsigned i = 0; i < arrayDerefNodes.Size(); ++i) + { + arrayDerefNodes[i]->DeriveLevelConditions(levelCond); + } } - // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds] - JitExpandArrayStack*>* levelCond = - context->EnsureBlockConditions(loopNum, condBlocks); - for (unsigned i = 0; i < nodes.Size(); ++i) + if (objDeref->Size() > 0) { - nodes[i]->DeriveLevelConditions(levelCond); + JitExpandArrayStack*>* levelCond = context->EnsureBlockConditions(loopNum, 1); + + for (unsigned i = 0; i < objDeref->Size(); ++i) + { + // ObjDeref array has indir(lcl), we want lcl. + // + LC_Ident& mtIndirIdent = (*objDeref)[i]; + LC_Ident ident(mtIndirIdent.LclNum(), LC_Ident::Var); + (*levelCond)[0]->Push(LC_Condition(GT_NE, LC_Expr(ident), LC_Expr(LC_Ident(LC_Ident::Null)))); + } } DBEXEC(verbose, context->PrintBlockConditions(loopNum)); + return true; } @@ -1523,6 +1629,10 @@ void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* case LcOptInfo::LcMdArray: // TODO-CQ: CLONE: Implement. break; + case LcOptInfo::LcTypeTest: + // We could optimize here. For now, let downstream opts clean this up. + break; + default: break; } @@ -1545,9 +1655,10 @@ void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* // bool Compiler::optIsLoopClonable(unsigned loopInd) { - const LoopDsc& loop = optLoopTable[loopInd]; + const LoopDsc& loop = optLoopTable[loopInd]; + const bool requireIterable = !doesMethodHaveGuardedDevirtualization(); - if (!(loop.lpFlags & LPFLG_ITER)) + if (requireIterable && !(loop.lpFlags & LPFLG_ITER)) { JITDUMP("Loop cloning: rejecting loop " FMT_LP ". No LPFLG_ITER flag.\n", loopInd); return false; @@ -1628,12 +1739,15 @@ bool Compiler::optIsLoopClonable(unsigned loopInd) return false; } - unsigned ivLclNum = loop.lpIterVar(); - if (lvaVarAddrExposed(ivLclNum)) + if (requireIterable) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Rejected V%02u as iter var because is address-exposed.\n", - loopInd, ivLclNum); - return false; + const unsigned ivLclNum = loop.lpIterVar(); + if (lvaVarAddrExposed(ivLclNum)) + { + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Rejected V%02u as iter var because is address-exposed.\n", + loopInd, ivLclNum); + return false; + } } BasicBlock* top = loop.lpTop; @@ -1651,39 +1765,45 @@ bool Compiler::optIsLoopClonable(unsigned loopInd) return false; } - if ((loop.lpFlags & LPFLG_CONST_LIMIT) == 0 && (loop.lpFlags & LPFLG_VAR_LIMIT) == 0 && - (loop.lpFlags & LPFLG_ARRLEN_LIMIT) == 0) + if (requireIterable) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Loop limit is neither constant, variable or array length.\n", - loopInd); - return false; - } + if ((loop.lpFlags & LPFLG_CONST_LIMIT) == 0 && (loop.lpFlags & LPFLG_VAR_LIMIT) == 0 && + (loop.lpFlags & LPFLG_ARRLEN_LIMIT) == 0) + { + JITDUMP("Loop cloning: rejecting loop " FMT_LP + ". Loop limit is neither constant, variable or array length.\n", + loopInd); + return false; + } - // TODO-CQ: Handle other loops like: - // - The ones whose limit operator is "==" or "!=" - // - The incrementing operator is multiple and divide - // - The ones that are inverted are not handled here for cases like "i *= 2" because - // they are converted to "i + i". - if (!(loop.lpIsIncreasingLoop() || loop.lpIsDecreasingLoop())) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP - ". Loop test (%s) doesn't agree with the direction (%s) of the loop.\n", - loopInd, GenTree::OpName(loop.lpTestOper()), GenTree::OpName(loop.lpIterOper())); - return false; - } + // TODO-CQ: Handle other loops like: + // - The ones whose limit operator is "==" or "!=" + // - The incrementing operator is multiple and divide + // - The ones that are inverted are not handled here for cases like "i *= 2" because + // they are converted to "i + i". + if (!(loop.lpIsIncreasingLoop() || loop.lpIsDecreasingLoop())) + { + JITDUMP("Loop cloning: rejecting loop " FMT_LP + ". Loop test (%s) doesn't agree with the direction (%s) of the loop.\n", + loopInd, GenTree::OpName(loop.lpTestOper()), GenTree::OpName(loop.lpIterOper())); + return false; + } - if (!loop.lpTestTree->OperIsCompare() || !(loop.lpTestTree->gtFlags & GTF_RELOP_ZTT)) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Loop inversion NOT present, loop test [%06u] may not protect " - "entry from head.\n", - loopInd, loop.lpTestTree->gtTreeID); - return false; - } + if (!loop.lpTestTree->OperIsCompare() || !(loop.lpTestTree->gtFlags & GTF_RELOP_ZTT)) + { + JITDUMP("Loop cloning: rejecting loop " FMT_LP + ". Loop inversion NOT present, loop test [%06u] may not protect " + "entry from head.\n", + loopInd, loop.lpTestTree->gtTreeID); + return false; + } #ifdef DEBUG - GenTree* op1 = loop.lpIterator(); - assert((op1->gtOper == GT_LCL_VAR) && (op1->AsLclVarCommon()->GetLclNum() == ivLclNum)); + const unsigned ivLclNum = loop.lpIterVar(); + GenTree* const op1 = loop.lpIterator(); + assert((op1->gtOper == GT_LCL_VAR) && (op1->AsLclVarCommon()->GetLclNum() == ivLclNum)); #endif + } // Otherwise, we're going to add those return blocks. fgReturnCount += loopRetCount; @@ -1725,12 +1845,15 @@ BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, assert(slowHead != nullptr); assert(insertAfter->bbJumpKind == BBJ_NONE); - JitExpandArrayStack*>* levelCond = context->GetBlockConditions(loopNum); - for (unsigned i = 0; i < levelCond->Size(); ++i) + if (context->HasBlockConditions(loopNum)) { - JITDUMP("Adding loop " FMT_LP " level %u block conditions\n ", loopNum, i); - DBEXEC(verbose, context->PrintBlockLevelConditions(i, (*levelCond)[i])); - insertAfter = context->CondToStmtInBlock(this, *((*levelCond)[i]), slowHead, insertAfter); + JitExpandArrayStack*>* levelCond = context->GetBlockConditions(loopNum); + for (unsigned i = 0; i < levelCond->Size(); ++i) + { + JITDUMP("Adding loop " FMT_LP " level %u block conditions\n ", loopNum, i); + DBEXEC(verbose, context->PrintBlockLevelConditions(i, (*levelCond)[i])); + insertAfter = context->CondToStmtInBlock(this, *((*levelCond)[i]), slowHead, insertAfter); + } } // Finally insert cloning conditions after all deref conditions have been inserted. @@ -2097,7 +2220,8 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // ... // slowHead -?> e2 (slowHead) branch or fall-through to e2 // - // We should always have block conditions; at the minimum, the array should be deref-able. + // We should always have block conditions. + assert(context->HasBlockConditions(loopInd)); if (h->bbJumpKind == BBJ_NONE) @@ -2495,6 +2619,9 @@ bool Compiler::optReconstructArrIndex(GenTree* tree, ArrIndex* result) // array index var in some dimension. Also ensure other index vars before the identified // dimension are loop invariant. // +// If the loop has invariant type tests, check if they will succeed often enough that +// they should inspire cloning (on their own, or in conjunction with array bounds checks). +// // Return Value: // Skip sub trees if the optimization candidate is identified or else continue walking // @@ -2503,7 +2630,8 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop ArrIndex arrIndex(getAllocator(CMK_LoopClone)); // Check if array index can be optimized. - if (optReconstructArrIndex(tree, &arrIndex)) + // + if (info->cloneForArrayBounds && optReconstructArrIndex(tree, &arrIndex)) { assert(tree->gtOper == GT_COMMA); @@ -2562,11 +2690,156 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop } return WALK_SKIP_SUBTREES; } - else if (tree->gtOper == GT_ARR_ELEM) + + if (info->cloneForTypeTests && tree->OperIs(GT_JTRUE)) { - // TODO-CQ: CLONE: Implement. - return WALK_SKIP_SUBTREES; + JITDUMP("...TT considering [%06u]\n", dspTreeID(tree)); + // Look for invariant type tests. + // + GenTree* const relop = tree->AsOp()->gtGetOp1(); + + // Must be an equality comparison of some kind. + // + if (!relop->OperIs(GT_EQ, GT_NE)) + { + return WALK_CONTINUE; + } + + GenTree* relopOp1 = relop->AsOp()->gtGetOp1(); + GenTree* relopOp2 = relop->AsOp()->gtGetOp2(); + + // One side or the other must be an indir + // The other must be a loop invariant. + // Currently, we'll just look for a constant. + // + bool match = false; + if (relopOp1->OperIs(GT_IND) && relopOp2->IsIntegralConst()) + { + match = true; + } + else if (relopOp2->OperIs(GT_IND) && relopOp1->IsIntegralConst()) + { + std::swap(relopOp1, relopOp2); + match = true; + } + + if (!match) + { + return WALK_CONTINUE; + } + + // The indir addr must be loop invariant TYP_REF local + // + GenTree* const indirAddr = relopOp1->AsIndir()->Addr(); + + if (!indirAddr->TypeIs(TYP_REF)) + { + return WALK_CONTINUE; + } + + if (!indirAddr->OperIs(GT_LCL_VAR)) + { + return WALK_CONTINUE; + } + + if (!relopOp2->IsIconHandle(GTF_ICON_CLASS_HDL)) + { + return WALK_CONTINUE; + } + + GenTreeLclVarCommon* const indirAddrLcl = indirAddr->AsLclVarCommon(); + const unsigned lclNum = indirAddrLcl->GetLclNum(); + + JITDUMP("... right form, V%02u\n", lclNum); + + if (!optIsStackLocalInvariant(info->loopNum, lclNum)) + { + JITDUMP("... but not invariant\n"); + return WALK_CONTINUE; + } + + // Looks like we found an invariant type test. + // + JITDUMP("Loop " FMT_LP " has invariant type test [%06u] on V%02u ... ", info->loopNum, dspTreeID(tree), lclNum); + + // We only want this type test to inspire cloning if + // + // (1) we have profile data + // (2) the loop iterates frequently each time the method is called + // (3) the type test is frequently hit during the loop iteration + // (4) the type test is biased and highly likely to succeed + // + const LoopDsc& loopDsc = optLoopTable[info->loopNum]; + BasicBlock* const loopEntry = loopDsc.lpEntry; + BasicBlock* const typeTestBlock = compCurBB; + double const loopFrequency = 0.50; + double const typeTestFrequency = 0.50; + double const typeTestBias = 0.05; + + // Check for (1) + // + if (!loopEntry->hasProfileWeight() || !typeTestBlock->hasProfileWeight()) + { + JITDUMP(" but loop does not have profile data.\n"); + return WALK_CONTINUE; + } + + // Check for (2) + // + if (loopEntry->getBBWeight(this) < (loopFrequency * BB_UNITY_WEIGHT)) + { + JITDUMP(" but loop does not iterate often enough.\n"); + return WALK_CONTINUE; + } + + // Check for (3) + // + if (typeTestBlock->bbWeight < (typeTestFrequency * loopEntry->bbWeight)) + { + JITDUMP(" but type test does not execute often enough within the loop.\n"); + return WALK_CONTINUE; + } + + // Check for (4) + // + BasicBlock* const hotSuccessor = relop->OperIs(GT_EQ) ? typeTestBlock->bbJumpDest : typeTestBlock->bbNext; + BasicBlock* const coldSuccessor = relop->OperIs(GT_EQ) ? typeTestBlock->bbNext : typeTestBlock->bbJumpDest; + + if (!hotSuccessor->hasProfileWeight() || !coldSuccessor->hasProfileWeight()) + { + JITDUMP(" but type test successor blocks were not profiled.\n"); + return WALK_CONTINUE; + } + + if (hotSuccessor->bbWeight == BB_ZERO_WEIGHT) + { + JITDUMP(" but hot successor block " FMT_BB " is rarely run.\n", hotSuccessor->bbNum); + return WALK_CONTINUE; + } + + if (coldSuccessor->bbWeight > BB_ZERO_WEIGHT) + { + const weight_t bias = coldSuccessor->bbWeight / (hotSuccessor->bbWeight + coldSuccessor->bbWeight); + + if (bias > typeTestBias) + { + JITDUMP(" but type test not sufficiently biased: failure likelihood is " FMT_WT " > " FMT_WT "\n", bias, + typeTestBias); + return WALK_CONTINUE; + } + } + + JITDUMP(" passed profile screening\n"); + + // Update the loop context. + // + assert(relopOp2->IsIconHandle(GTF_ICON_CLASS_HDL)); + CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)relopOp2->AsIntConCommon()->IconValue(); + + info->context->EnsureLoopOptInfo(info->loopNum) + ->Push(new (this, CMK_LoopOpt) LcTypeTestOptInfo(lclNum, clsHnd)); } + return WALK_CONTINUE; } @@ -2596,11 +2869,28 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTree** pT // bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context) { - JITDUMP("Checking loop " FMT_LP " for optimization candidates\n", loopNum); - const LoopDsc& loop = optLoopTable[loopNum]; + const bool canCloneForArrayBounds = + ((optMethodFlags & OMF_HAS_ARRAYREF) != 0) && ((loop.lpFlags & LPFLG_ITER) != 0); + const bool canCloneForTypeTests = ((optMethodFlags & OMF_HAS_GUARDEDDEVIRT) != 0); + + if (!canCloneForArrayBounds && !canCloneForTypeTests) + { + JITDUMP("Not checking loop " FMT_LP " -- no array bounds or type tests in this method\n", loopNum); + return false; + } + + bool shouldCloneForArrayBounds = canCloneForArrayBounds; + bool shouldCloneForTypeTests = canCloneForTypeTests; + +#ifdef DEBUG + shouldCloneForTypeTests &= JitConfig.JitCloneLoopsWithTypeTests() != 0; +#endif + + JITDUMP("Checking loop " FMT_LP " for optimization candidates%s%s\n", loopNum, + shouldCloneForArrayBounds ? " (array bounds)" : "", shouldCloneForTypeTests ? " (type tests)" : ""); - LoopCloneVisitorInfo info(context, loopNum, nullptr); + LoopCloneVisitorInfo info(context, loopNum, nullptr, shouldCloneForArrayBounds, shouldCloneForTypeTests); for (BasicBlock* const block : loop.LoopBlocks()) { compCurBB = block; diff --git a/src/coreclr/jit/loopcloning.h b/src/coreclr/jit/loopcloning.h index 13bd2fa5e090c5..460a88490ffbc3 100644 --- a/src/coreclr/jit/loopcloning.h +++ b/src/coreclr/jit/loopcloning.h @@ -316,6 +316,21 @@ struct LcJaggedArrayOptInfo : public LcOptInfo } }; +// Optimization info for a type test +// +struct LcTypeTestOptInfo : public LcOptInfo +{ + // local whose method table is tested + unsigned lclNum; + // handle being tested for + CORINFO_CLASS_HANDLE clsHnd; + + LcTypeTestOptInfo(unsigned lclNum, CORINFO_CLASS_HANDLE clsHnd) + : LcOptInfo(LcTypeTest), lclNum(lclNum), clsHnd(clsHnd) + { + } +}; + /** * * Symbolic representation of a.length, or a[i][j].length or a[i,j].length and so on. @@ -407,11 +422,11 @@ struct LC_Array GenTree* ToGenTree(Compiler* comp, BasicBlock* bb); }; -/** - * - * Symbolic representation of either a constant like 1 or 2, or a variable like V02 or V03, or an "LC_Array", - * or the null constant. - */ +//------------------------------------------------------------------------ +// LC_Ident: symbolic representation of either a constant like 1 or 2, +// or a variable like V02 or V03, or an "LC_Array", or the null constant, +// or a class handle, or an indir of a variable like *V02. +// struct LC_Ident { enum IdentType @@ -421,30 +436,55 @@ struct LC_Ident Var, ArrLen, Null, + ClassHandle, + Indir, + }; + +private: + union { + unsigned constant; + unsigned lclNum; + LC_Array arrLen; + CORINFO_CLASS_HANDLE clsHnd; }; - LC_Array arrLen; // The LC_Array if the type is "ArrLen" - unsigned constant; // The constant value if this node is of type "Const", or the lcl num if "Var" - IdentType type; // The type of this object +public: + // The type of this object + IdentType type; // Equality operator bool operator==(const LC_Ident& that) const { + if (type != that.type) + { + return false; + } + switch (type) { case Const: + return (constant == that.constant); + case ClassHandle: + return (clsHnd == that.clsHnd); case Var: - return (type == that.type) && (constant == that.constant); + case Indir: + return (lclNum == that.lclNum); case ArrLen: - return (type == that.type) && (arrLen == that.arrLen); + return (arrLen == that.arrLen); case Null: - return (type == that.type); + return true; default: assert(!"Unknown LC_Ident type"); unreached(); } } + unsigned LclNum() const + { + assert((type == Var) || (type == Indir)); + return lclNum; + } + #ifdef DEBUG void Print() { @@ -454,7 +494,13 @@ struct LC_Ident printf("%u", constant); break; case Var: - printf("V%02d", constant); + printf("V%02u", lclNum); + break; + case Indir: + printf("*V%02u", lclNum); + break; + case ClassHandle: + printf("%p", clsHnd); break; case ArrLen: arrLen.Print(); @@ -472,12 +518,40 @@ struct LC_Ident LC_Ident() : type(Invalid) { } - LC_Ident(unsigned constant, IdentType type) : constant(constant), type(type) + + explicit LC_Ident(unsigned val, IdentType type) : type(type) { + if (type == Const) + { + constant = val; + } + else if ((type == Var) || (type == Indir)) + { + lclNum = val; + } + else + { + unreached(); + } } + + explicit LC_Ident(CORINFO_CLASS_HANDLE val, IdentType type) : type(type) + { + if (type == ClassHandle) + { + clsHnd = val; + } + else + { + unreached(); + } + } + explicit LC_Ident(IdentType type) : type(type) { + assert(type == Null); } + explicit LC_Ident(const LC_Array& arrLen) : arrLen(arrLen), type(ArrLen) { } @@ -597,24 +671,24 @@ struct LC_Condition * i => {} * } */ -struct LC_Deref +struct LC_ArrayDeref { - const LC_Array array; - JitExpandArrayStack* children; + const LC_Array array; + JitExpandArrayStack* children; unsigned level; - LC_Deref(const LC_Array& array, unsigned level) : array(array), children(nullptr), level(level) + LC_ArrayDeref(const LC_Array& array, unsigned level) : array(array), children(nullptr), level(level) { } - LC_Deref* Find(unsigned lcl); + LC_ArrayDeref* Find(unsigned lcl); unsigned Lcl(); bool HasChildren(); void EnsureChildren(CompAllocator alloc); - static LC_Deref* Find(JitExpandArrayStack* children, unsigned lcl); + static LC_ArrayDeref* Find(JitExpandArrayStack* children, unsigned lcl); void DeriveLevelConditions(JitExpandArrayStack*>* len); @@ -635,7 +709,7 @@ struct LC_Deref #ifdef _MSC_VER (*children)[i]->Print(indent + 1); #else // _MSC_VER - (*((JitExpandArray*)children))[i]->Print(indent + 1); + (*((JitExpandArray*)children))[i]->Print(indent + 1); #endif // _MSC_VER } } @@ -669,18 +743,22 @@ struct LoopCloneContext // The array of conditions that influence which path to take for each loop. (loop x cloning-conditions) jitstd::vector*> conditions; - // The array of dereference conditions found in each loop. (loop x deref-conditions) - jitstd::vector*> derefs; + // The array of array dereference conditions found in each loop. (loop x deref-conditions) + jitstd::vector*> arrayDerefs; + + // The array of object dereference conditions found in each loop. + jitstd::vector*> objDerefs; // The array of block levels of conditions for each loop. (loop x level x conditions) jitstd::vector*>*> blockConditions; LoopCloneContext(unsigned loopCount, CompAllocator alloc) - : alloc(alloc), optInfo(alloc), conditions(alloc), derefs(alloc), blockConditions(alloc) + : alloc(alloc), optInfo(alloc), conditions(alloc), arrayDerefs(alloc), objDerefs(alloc), blockConditions(alloc) { optInfo.resize(loopCount, nullptr); conditions.resize(loopCount, nullptr); - derefs.resize(loopCount, nullptr); + arrayDerefs.resize(loopCount, nullptr); + objDerefs.resize(loopCount, nullptr); blockConditions.resize(loopCount, nullptr); } @@ -709,8 +787,11 @@ struct LoopCloneContext // Get the conditions for loop. No allocation is performed. JitExpandArrayStack* GetConditions(unsigned loopNum); - // Ensure that the "deref" conditions array is allocated. - JitExpandArrayStack* EnsureDerefs(unsigned loopNum); + // Ensure that the array "deref" conditions array is allocated. + JitExpandArrayStack* EnsureArrayDerefs(unsigned loopNum); + + // Ensure that the obj "deref" conditions array is allocated. + JitExpandArrayStack* EnsureObjDerefs(unsigned loopNum); // Get block conditions for each loop, no allocation is performed. JitExpandArrayStack*>* GetBlockConditions(unsigned loopNum); diff --git a/src/coreclr/jit/loopcloningopts.h b/src/coreclr/jit/loopcloningopts.h index f17b915c830a69..2df5e7baf63b5f 100644 --- a/src/coreclr/jit/loopcloningopts.h +++ b/src/coreclr/jit/loopcloningopts.h @@ -11,5 +11,6 @@ // Types of Loop Cloning based optimizations. LC_OPT(LcMdArray) LC_OPT(LcJaggedArray) +LC_OPT(LcTypeTest) #undef LC_OPT diff --git a/src/tests/JIT/opt/GuardedDevirtualization/enumerablecloning.cs b/src/tests/JIT/opt/GuardedDevirtualization/enumerablecloning.cs new file mode 100644 index 00000000000000..8aa2bb14457fd3 --- /dev/null +++ b/src/tests/JIT/opt/GuardedDevirtualization/enumerablecloning.cs @@ -0,0 +1,48 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// PGO enables an invariant GDV type test in a loop. +// We then clone the loop based on this test. +// +// COMPlus_TieredPGO=1 + +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Threading; + +class CloningForIEnumerable +{ + [MethodImpl(MethodImplOptions.NoInlining)] + public static int Sum(IEnumerable e) + { + int r = 0; + foreach(int i in e) + { + r += i; + } + return r; + } + + public static int Main() + { + List list = new List { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + + int r = 0; + + for (int i = 0; i < 30; i++) + { + r += Sum(list); + Thread.Sleep(15); + } + + Thread.Sleep(50); + + for (int i = 0; i < 70; i++) + { + r += Sum(list); + } + + return r - 5400; + } +} diff --git a/src/tests/JIT/opt/GuardedDevirtualization/enumerablecloning.csproj b/src/tests/JIT/opt/GuardedDevirtualization/enumerablecloning.csproj new file mode 100644 index 00000000000000..6630219f047cd0 --- /dev/null +++ b/src/tests/JIT/opt/GuardedDevirtualization/enumerablecloning.csproj @@ -0,0 +1,22 @@ + + + Exe + + True + + + + + + + + + diff --git a/src/tests/JIT/opt/GuardedDevirtualization/typetestcloning.cs b/src/tests/JIT/opt/GuardedDevirtualization/typetestcloning.cs new file mode 100644 index 00000000000000..4795cf91fd81b4 --- /dev/null +++ b/src/tests/JIT/opt/GuardedDevirtualization/typetestcloning.cs @@ -0,0 +1,84 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// PGO enables an invariant GDV type test in a loop. +// We then clone the loop based on this test. +// +// COMPlus_TieredPGO=1 + +using System; +using System.Runtime.CompilerServices; +using System.Threading; + +interface I +{ + public int F(int x, int y); +} + +class Add : I +{ + int I.F(int x, int y) => x + y; +} + +class Mul : I +{ + int I.F(int x, int y) => x * y; +} + +class CloningForTypeTests +{ + [MethodImpl(MethodImplOptions.NoInlining)] + public static int BothTypeAndArray(I m, int[] xs, int[] ys, int from, int to) + { + int r = 0; + + // Cloning conditions for this loop should include a type test + // + for (int i = from; i < to; i++) + { + r += m.F(xs[i], ys[i]); + } + return r; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public static int JustType(I m, int from, int to) + { + int r = 0; + + // Cloning conditions for this loop should only include a type test + // + for (int i = from; i < to; i++) + { + r += m.F(1, 1); + } + return r; + } + + public static int Main() + { + int[] xs = new int[] { 1, 2, 3, 4 }; + int[] ys = new int[] { 4, 3, 2, 1 }; + I m = new Add(); + + int r0 = 0; + int r1 = 0; + + for (int i = 0; i < 30; i++) + { + r0 += BothTypeAndArray(m, xs, ys, 0, 3); + r1 += JustType(m, 0, 3); + Thread.Sleep(15); + } + + Thread.Sleep(50); + + for (int i = 0; i < 70; i++) + { + r0 += BothTypeAndArray(m, xs, ys, 0, 3); + r1 += JustType(m, 0, 3); + } + + return (r0 + r1) / 21; + } +} diff --git a/src/tests/JIT/opt/GuardedDevirtualization/typetestcloning.csproj b/src/tests/JIT/opt/GuardedDevirtualization/typetestcloning.csproj new file mode 100644 index 00000000000000..6630219f047cd0 --- /dev/null +++ b/src/tests/JIT/opt/GuardedDevirtualization/typetestcloning.csproj @@ -0,0 +1,22 @@ + + + Exe + + True + + + + + + + + +