From d0a03736c6e93d91183b8ac3fba9c80a704d9d55 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 18 Feb 2025 19:51:05 +0100 Subject: [PATCH 1/4] Handle overlapped groups of bounds checks --- src/coreclr/jit/rangecheckcloning.cpp | 113 +++++++++++++++++++------- src/coreclr/jit/rangecheckcloning.h | 9 +- 2 files changed, 92 insertions(+), 30 deletions(-) diff --git a/src/coreclr/jit/rangecheckcloning.cpp b/src/coreclr/jit/rangecheckcloning.cpp index eac1768eb03532..c7ad1dc89ec5b3 100644 --- a/src/coreclr/jit/rangecheckcloning.cpp +++ b/src/coreclr/jit/rangecheckcloning.cpp @@ -48,17 +48,18 @@ // Arguments: // comp - The compiler instance // statement - The statement containing the bounds check -// bndChkNode - The bounds check node -// bndChkParentNode - The parent node of the bounds check node (either null or COMMA) +// statementIdx - The index of the statement in the block +// bndChk - The bounds check node (its use edge) // // Return Value: // true if the initialization was successful, false otherwise. // -bool BoundsCheckInfo::Initialize(const Compiler* comp, Statement* statement, GenTree** bndChk) +bool BoundsCheckInfo::Initialize(const Compiler* comp, Statement* statement, int statementIdx, GenTree** bndChk) { assert((bndChk != nullptr) && ((*bndChk) != nullptr)); stmt = statement; + stmtIdx = statementIdx; bndChkUse = bndChk; idxVN = comp->vnStore->VNConservativeNormalValue(BndChk()->GetIndex()->gtVNPair); lenVN = comp->vnStore->VNConservativeNormalValue(BndChk()->GetArrayLength()->gtVNPair); @@ -98,16 +99,26 @@ bool BoundsCheckInfo::Initialize(const Compiler* comp, Statement* statement, Gen // RemoveBoundsChk - Remove the given bounds check from the statement and the block. // // Arguments: -// comp - compiler instance -// check - bounds check node to remove -// comma - check's parent node (either null or COMMA) -// stmt - statement containing the bounds check +// comp - compiler instance +// treeUse - the bounds check node to remove (its use edge) +// stmt - the statement containing the bounds check // static void RemoveBoundsChk(Compiler* comp, GenTree** treeUse, Statement* stmt) { JITDUMP("Before RemoveBoundsChk:\n"); DISPTREE(*treeUse); + // If we deal with GT_ARR_LEN(lcl), we can unmark the array length node + // as having an NRE. Technically, this should be done by the assertion prop, + // but not today, see https://github.com/dotnet/runtime/pull/93531 + // + GenTreeBoundsChk* bndsChk = (*treeUse)->AsBoundsChk(); + GenTree* arrObj = bndsChk->GetArray(); + if ((arrObj != nullptr) && ((arrObj->gtFlags & GTF_ALL_EFFECT) == 0)) + { + bndsChk->GetArrayLength()->gtFlags &= ~GTF_EXCEPT; + } + GenTree* sideEffList = nullptr; comp->gtExtractSideEffList(*treeUse, &sideEffList, GTF_SIDE_EFFECT, /*ignoreRoot*/ true); *treeUse = (sideEffList != nullptr) ? sideEffList : comp->gtNewNothingNode(); @@ -155,18 +166,21 @@ static void RemoveBoundsChk(Compiler* comp, GenTree** treeUse, Statement* stmt) // comp - The compiler instance // block - The block to clone // bndChkStack - The stack of bounds checks to clone +// lastStmt - The last statement in the block (the block is split after this statement) // // Return Value: // The block containing the fast path. // -static BasicBlock* optRangeCheckCloning_DoClone(Compiler* comp, BasicBlock* block, BoundsCheckInfoStack* bndChkStack) +static BasicBlock* optRangeCheckCloning_DoClone(Compiler* comp, + BasicBlock* block, + BoundsCheckInfoStack* bndChkStack, + Statement* lastStmt) { assert(block != nullptr); assert(bndChkStack->Height() > 0); // The bound checks are in the execution order (top of the stack is the last check) BoundsCheckInfo firstCheck = bndChkStack->Bottom(); - BoundsCheckInfo lastCheck = bndChkStack->Top(); BasicBlock* prevBb = block; // First, split the block at the first bounds check using gtSplitTree (via fgSplitBlockBeforeTree): @@ -187,7 +201,7 @@ static BasicBlock* optRangeCheckCloning_DoClone(Compiler* comp, BasicBlock* bloc // Now split the block at the last bounds check using fgSplitBlockAfterStatement: // TODO-RangeCheckCloning: call gtSplitTree for lastBndChkStmt as well, to cut off // the stuff we don't have to clone. - BasicBlock* lastBb = comp->fgSplitBlockAfterStatement(fastpathBb, lastCheck.stmt); + BasicBlock* lastBb = comp->fgSplitBlockAfterStatement(fastpathBb, lastStmt); DebugInfo debugInfo = fastpathBb->firstStmt()->GetDebugInfo(); @@ -351,7 +365,7 @@ static BasicBlock* optRangeCheckCloning_DoClone(Compiler* comp, BasicBlock* bloc assert(BasicBlock::sameEHRegion(prevBb, fallbackBb)); assert(BasicBlock::sameEHRegion(prevBb, lastBb)); - return fastpathBb; + return fastpathBb->Prev(); } // A visitor to record all the bounds checks in a statement in the execution order @@ -359,6 +373,7 @@ class BoundsChecksVisitor final : public GenTreeVisitor { Statement* m_stmt; ArrayStack* m_boundsChks; + int m_stmtIdx; public: enum @@ -368,10 +383,14 @@ class BoundsChecksVisitor final : public GenTreeVisitor UseExecutionOrder = true }; - BoundsChecksVisitor(Compiler* compiler, Statement* stmt, ArrayStack* bndChkLocations) + BoundsChecksVisitor(Compiler* compiler, + Statement* stmt, + int stmtIdx, + ArrayStack* bndChkLocations) : GenTreeVisitor(compiler) , m_stmt(stmt) , m_boundsChks(bndChkLocations) + , m_stmtIdx(stmtIdx) { } @@ -389,7 +408,7 @@ class BoundsChecksVisitor final : public GenTreeVisitor { if ((*use)->OperIs(GT_BOUNDS_CHECK)) { - m_boundsChks->Push(BoundCheckLocation(m_stmt, use)); + m_boundsChks->Push(BoundCheckLocation(m_stmt, use, m_stmtIdx)); } return fgWalkResult::WALK_CONTINUE; } @@ -401,6 +420,7 @@ class BoundsChecksVisitor final : public GenTreeVisitor // the bounds checks. // // Arguments: +// comp - The compiler instance // bndChks - The stack of bounds checks // // Return Value: @@ -499,8 +519,10 @@ PhaseStatus Compiler::optRangeCheckCloning() bndChkLocations.Reset(); bndChkMap.RemoveAll(); + int stmtIdx = -1; for (Statement* const stmt : block->Statements()) { + stmtIdx++; if (block->HasTerminator() && (stmt == block->lastStmt())) { // TODO-RangeCheckCloning: Splitting these blocks at the last statements @@ -510,7 +532,7 @@ PhaseStatus Compiler::optRangeCheckCloning() // Now just record all the bounds checks in the block (in the execution order) // - BoundsChecksVisitor visitor(this, stmt, &bndChkLocations); + BoundsChecksVisitor visitor(this, stmt, stmtIdx, &bndChkLocations); visitor.WalkTree(stmt->GetRootNodePointer(), nullptr); } @@ -528,7 +550,7 @@ PhaseStatus Compiler::optRangeCheckCloning() { BoundCheckLocation loc = bndChkLocations.Bottom(i); BoundsCheckInfo bci{}; - if (bci.Initialize(this, loc.stmt, loc.bndChkUse)) + if (bci.Initialize(this, loc.stmt, loc.stmtIdx, loc.bndChkUse)) { IdxLenPair key(bci.idxVN, bci.lenVN); BoundsCheckInfoStack** value = bndChkMap.LookupPointerOrAdd(key, nullptr); @@ -552,34 +574,69 @@ PhaseStatus Compiler::optRangeCheckCloning() } // Now choose the largest group of bounds checks (the one with the most checks) - BoundsCheckInfoStack* largestGroup = nullptr; + // BoundsCheckInfoStack* largestGroup = nullptr; + ArrayStack groups(getAllocator(CMK_RangeCheckCloning)); + for (BoundsCheckInfoMap::Node* keyValuePair : BoundsCheckInfoMap::KeyValueIteration(&bndChkMap)) { ArrayStack* value = keyValuePair->GetValue(); - if ((largestGroup == nullptr) || (value->Height() > largestGroup->Height())) + if ((value->Height() >= MIN_CHECKS_PER_GROUP) && !DoesComplexityExceed(this, value)) { - if (DoesComplexityExceed(this, value)) - { - continue; - } - largestGroup = value; + groups.Push(value); } } - if (largestGroup == nullptr) + if (groups.Height() == 0) { JITDUMP("No suitable group of bounds checks in the block - bail out.\n"); continue; } - if (largestGroup->Height() < MIN_CHECKS_PER_GROUP) + // We have multiple groups of bounds checks in the block. + // let's pick a group that appears first in the block and the one whose last bounds check + // appears last in the block. + // + BoundsCheckInfoStack* firstGroup = groups.Top(); + BoundsCheckInfoStack* lastGroup = groups.Top(); + for (int i = 0; i < groups.Height(); i++) { - JITDUMP("Not enough bounds checks in the largest group - bail out.\n"); - continue; + BoundsCheckInfoStack* group = groups.Bottom(i); + int firstStmt = group->Bottom().stmtIdx; + int secondStmt = group->Top().stmtIdx; + if (firstStmt < firstGroup->Bottom().stmtIdx) + { + firstGroup = group; + } + if (secondStmt > lastGroup->Top().stmtIdx) + { + lastGroup = group; + } } - JITDUMP("Cloning bounds checks in " FMT_BB "\n", block->bbNum); - block = optRangeCheckCloning_DoClone(this, block, largestGroup); + // We're going to clone for the first group. + // But let's see if we can extend the end of the group so future iterations + // can fit more groups in the same block. + // + Statement* lastStmt = firstGroup->Top().stmt; + + int firstGroupStarts = firstGroup->Bottom().stmtIdx; + int firstGroupEnds = firstGroup->Top().stmtIdx; + int lastGroupStarts = lastGroup->Bottom().stmtIdx; + int lastGroupEnds = lastGroup->Top().stmtIdx; + + // The only requirement is that both groups must overlap - we don't want to + // end up cloning unrelated statements between them (not a correctness issue, + // just a heuristic to avoid cloning too much). + // + if (firstGroupEnds < lastGroupEnds && firstGroupEnds >= lastGroupStarts) + { + lastStmt = lastGroup->Top().stmt; + } + + JITDUMP("Cloning bounds checks in " FMT_BB " from " FMT_STMT " to " FMT_STMT "\n", block->bbNum, + firstGroup->Bottom().stmt->GetID(), lastStmt->GetID()); + + block = optRangeCheckCloning_DoClone(this, block, firstGroup, lastStmt); modified = true; } diff --git a/src/coreclr/jit/rangecheckcloning.h b/src/coreclr/jit/rangecheckcloning.h index d6f2903e4dbce1..724bbe19f77bd8 100644 --- a/src/coreclr/jit/rangecheckcloning.h +++ b/src/coreclr/jit/rangecheckcloning.h @@ -22,15 +22,18 @@ struct BoundCheckLocation { Statement* stmt; GenTree** bndChkUse; + int stmtIdx; - BoundCheckLocation(Statement* stmt, GenTree** bndChkUse) + BoundCheckLocation(Statement* stmt, GenTree** bndChkUse, int stmtIdx) : stmt(stmt) , bndChkUse(bndChkUse) + , stmtIdx(stmtIdx) { assert(stmt != nullptr); assert((bndChkUse != nullptr)); assert((*bndChkUse) != nullptr); assert((*bndChkUse)->OperIs(GT_BOUNDS_CHECK)); + assert(stmtIdx >= 0); } }; @@ -41,6 +44,7 @@ struct BoundsCheckInfo ValueNum lenVN; ValueNum idxVN; int offset; + int stmtIdx; BoundsCheckInfo() : stmt(nullptr) @@ -48,10 +52,11 @@ struct BoundsCheckInfo , lenVN(ValueNumStore::NoVN) , idxVN(ValueNumStore::NoVN) , offset(0) + , stmtIdx(0) { } - bool Initialize(const Compiler* comp, Statement* statement, GenTree** bndChkUse); + bool Initialize(const Compiler* comp, Statement* statement, int statementIdx, GenTree** bndChkUse); GenTreeBoundsChk* BndChk() const { From fd5cd2edba594bacb359f9310ed85b77133c60ad Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Thu, 20 Feb 2025 11:04:41 +0100 Subject: [PATCH 2/4] Remove unused array length node handling --- src/coreclr/jit/rangecheckcloning.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/coreclr/jit/rangecheckcloning.cpp b/src/coreclr/jit/rangecheckcloning.cpp index c7ad1dc89ec5b3..78507b03c34084 100644 --- a/src/coreclr/jit/rangecheckcloning.cpp +++ b/src/coreclr/jit/rangecheckcloning.cpp @@ -108,17 +108,6 @@ static void RemoveBoundsChk(Compiler* comp, GenTree** treeUse, Statement* stmt) JITDUMP("Before RemoveBoundsChk:\n"); DISPTREE(*treeUse); - // If we deal with GT_ARR_LEN(lcl), we can unmark the array length node - // as having an NRE. Technically, this should be done by the assertion prop, - // but not today, see https://github.com/dotnet/runtime/pull/93531 - // - GenTreeBoundsChk* bndsChk = (*treeUse)->AsBoundsChk(); - GenTree* arrObj = bndsChk->GetArray(); - if ((arrObj != nullptr) && ((arrObj->gtFlags & GTF_ALL_EFFECT) == 0)) - { - bndsChk->GetArrayLength()->gtFlags &= ~GTF_EXCEPT; - } - GenTree* sideEffList = nullptr; comp->gtExtractSideEffList(*treeUse, &sideEffList, GTF_SIDE_EFFECT, /*ignoreRoot*/ true); *treeUse = (sideEffList != nullptr) ? sideEffList : comp->gtNewNothingNode(); From b11011a07f462269fc1ed7e7f8d58dfed3e975ff Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Thu, 20 Feb 2025 11:11:40 +0100 Subject: [PATCH 3/4] Update src/coreclr/jit/rangecheckcloning.cpp Co-authored-by: Jakob Botsch Nielsen --- src/coreclr/jit/rangecheckcloning.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/jit/rangecheckcloning.cpp b/src/coreclr/jit/rangecheckcloning.cpp index 78507b03c34084..a3d8c95d6dc118 100644 --- a/src/coreclr/jit/rangecheckcloning.cpp +++ b/src/coreclr/jit/rangecheckcloning.cpp @@ -563,7 +563,6 @@ PhaseStatus Compiler::optRangeCheckCloning() } // Now choose the largest group of bounds checks (the one with the most checks) - // BoundsCheckInfoStack* largestGroup = nullptr; ArrayStack groups(getAllocator(CMK_RangeCheckCloning)); for (BoundsCheckInfoMap::Node* keyValuePair : BoundsCheckInfoMap::KeyValueIteration(&bndChkMap)) From 28c62b5618332327d5e957e9b6ec78553cdb0f71 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 20 Feb 2025 11:35:05 +0100 Subject: [PATCH 4/4] feedback --- src/coreclr/jit/rangecheckcloning.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/rangecheckcloning.cpp b/src/coreclr/jit/rangecheckcloning.cpp index a3d8c95d6dc118..4e0460ab4ce6f9 100644 --- a/src/coreclr/jit/rangecheckcloning.cpp +++ b/src/coreclr/jit/rangecheckcloning.cpp @@ -158,7 +158,7 @@ static void RemoveBoundsChk(Compiler* comp, GenTree** treeUse, Statement* stmt) // lastStmt - The last statement in the block (the block is split after this statement) // // Return Value: -// The block containing the fast path. +// The next block to visit after the cloning. // static BasicBlock* optRangeCheckCloning_DoClone(Compiler* comp, BasicBlock* block, @@ -354,7 +354,7 @@ static BasicBlock* optRangeCheckCloning_DoClone(Compiler* comp, assert(BasicBlock::sameEHRegion(prevBb, fallbackBb)); assert(BasicBlock::sameEHRegion(prevBb, lastBb)); - return fastpathBb->Prev(); + return fastpathBb; } // A visitor to record all the bounds checks in a statement in the execution order @@ -624,7 +624,11 @@ PhaseStatus Compiler::optRangeCheckCloning() JITDUMP("Cloning bounds checks in " FMT_BB " from " FMT_STMT " to " FMT_STMT "\n", block->bbNum, firstGroup->Bottom().stmt->GetID(), lastStmt->GetID()); - block = optRangeCheckCloning_DoClone(this, block, firstGroup, lastStmt); + BasicBlock* nextBbToVisit = optRangeCheckCloning_DoClone(this, block, firstGroup, lastStmt); + assert(nextBbToVisit != nullptr); + // optRangeCheckCloning_DoClone wants us to visit nextBbToVisit next + block = nextBbToVisit->Prev(); + assert(block != nullptr); modified = true; }