Skip to content

Commit a9d67ec

Browse files
hez2010AndyAyersMS
andauthored
JIT: Enable inlining for late devirtualization (#110827)
* Enable inlining for late devirt * Pass correct IL offset * Only creates RET_EXPR if the node is not top-level or not returning void * Do not try inlining if BBF_INTERNAL is set * Ensure the type matches * Set inliner context correctly * Address review feedbacks * Fix non inline candidate marking * Handle calls with retbuf * Handle BBF_INTERNAL * Get real type from local * Always set IL offset * Add comments * Oops * Use gtReturnType instead * Remove unused ilOffset * Use genActualType * Remove unnecessary spillTemp * Handle nested call correctly * Don't promote compCurStmt * Remove unused ilOffset * Handle BBF_INTERNAL * Use correct return type * Use bbInCatchHandlerBBRange and bbInFilterBBRange * Cleanup fncRetType * Add a runtime check to prevent accidental execution order change * Format jit * Revert some changes * Remove unused local * Check whether a call can be spilled without side effect * Get rid of BAD_VAR_NUM * Add comments for CanSpillCallWithoutSideEffect * Use ancestors to estimate whether a call can be spilled or not * Reset m_ancestors before walking * Nit * Fix assertion * Limit to GT_STORE_LCL_VAR only * Oops * Inline the check * Remove leftovers * Hoist the check * Make sure the store parent is the statement root Co-authored-by: Andy Ayers <[email protected]> * Format JIT * Check side effects before trying inlining * Fix * Nit * Make lvHasLdAddrOp check optional * Rename to early * Split effects if necessary * Use gtSplitTree * Teach gtSplitTree to support early use * Cleanup * ClearInlineInfo is not needed --------- Co-authored-by: Andy Ayers <[email protected]>
1 parent 21ab780 commit a9d67ec

File tree

4 files changed

+102
-20
lines changed

4 files changed

+102
-20
lines changed

src/coreclr/jit/compiler.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3790,7 +3790,7 @@ class Compiler
37903790
bool ignoreRoot = false);
37913791

37923792
bool gtSplitTree(
3793-
BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitPointUse);
3793+
BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitPointUse, bool early = false);
37943794

37953795
bool gtStoreDefinesField(
37963796
LclVarDsc* fieldVarDsc, ssize_t offset, unsigned size, ssize_t* pFieldStoreOffset, unsigned* pFieldStoreSize);
@@ -5114,6 +5114,8 @@ class Compiler
51145114
SpillCliqueSucc
51155115
};
51165116

5117+
friend class SubstitutePlaceholdersAndDevirtualizeWalker;
5118+
51175119
// Abstract class for receiving a callback while walking a spill clique
51185120
class SpillCliqueWalker
51195121
{

src/coreclr/jit/fginline.cpp

Lines changed: 84 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,9 @@ bool Compiler::TypeInstantiationComplexityExceeds(CORINFO_CLASS_HANDLE handle, i
204204

205205
class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<SubstitutePlaceholdersAndDevirtualizeWalker>
206206
{
207-
bool m_madeChanges = false;
207+
bool m_madeChanges = false;
208+
Statement* m_curStmt = nullptr;
209+
Statement* m_firstNewStmt = nullptr;
208210

209211
public:
210212
enum
@@ -219,11 +221,29 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi
219221
{
220222
}
221223

222-
bool MadeChanges()
224+
bool MadeChanges() const
223225
{
224226
return m_madeChanges;
225227
}
226228

229+
// ------------------------------------------------------------------------
230+
// WalkStatement: Walk the tree of a statement, and return the first newly
231+
// added statement if any, otherwise return the original statement.
232+
//
233+
// Arguments:
234+
// stmt - the statement to walk.
235+
//
236+
// Return Value:
237+
// The first newly added statement if any, or the original statement.
238+
//
239+
Statement* WalkStatement(Statement* stmt)
240+
{
241+
m_curStmt = stmt;
242+
m_firstNewStmt = nullptr;
243+
WalkTree(m_curStmt->GetRootNodePointer(), nullptr);
244+
return m_firstNewStmt == nullptr ? m_curStmt : m_firstNewStmt;
245+
}
246+
227247
fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
228248
{
229249
GenTree* tree = *use;
@@ -586,8 +606,59 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi
586606
}
587607

588608
CORINFO_CONTEXT_HANDLE contextInput = context;
609+
context = nullptr;
589610
m_compiler->impDevirtualizeCall(call, nullptr, &method, &methodFlags, &contextInput, &context,
590611
isLateDevirtualization, explicitTailCall);
612+
613+
if (!call->IsVirtual())
614+
{
615+
assert(context != nullptr);
616+
CORINFO_CALL_INFO callInfo = {};
617+
callInfo.hMethod = method;
618+
callInfo.methodFlags = methodFlags;
619+
m_compiler->impMarkInlineCandidate(call, context, false, &callInfo);
620+
621+
if (call->IsInlineCandidate())
622+
{
623+
Statement* newStmt = nullptr;
624+
GenTree** callUse = nullptr;
625+
if (m_compiler->gtSplitTree(m_compiler->compCurBB, m_curStmt, call, &newStmt, &callUse, true))
626+
{
627+
if (m_firstNewStmt == nullptr)
628+
{
629+
m_firstNewStmt = newStmt;
630+
}
631+
}
632+
633+
// If the call is the root expression in a statement, and it returns void,
634+
// we can inline it directly without creating a RET_EXPR.
635+
if (parent != nullptr || call->gtReturnType != TYP_VOID)
636+
{
637+
Statement* stmt = m_compiler->gtNewStmt(call);
638+
m_compiler->fgInsertStmtBefore(m_compiler->compCurBB, m_curStmt, stmt);
639+
if (m_firstNewStmt == nullptr)
640+
{
641+
m_firstNewStmt = stmt;
642+
}
643+
644+
GenTreeRetExpr* retExpr =
645+
m_compiler->gtNewInlineCandidateReturnExpr(call->AsCall(),
646+
genActualType(call->TypeGet()));
647+
call->GetSingleInlineCandidateInfo()->retExpr = retExpr;
648+
649+
JITDUMP("Creating new RET_EXPR for [%06u]:\n", call->gtTreeID);
650+
DISPTREE(retExpr);
651+
652+
*pTree = retExpr;
653+
}
654+
655+
call->GetSingleInlineCandidateInfo()->exactContextHandle = context;
656+
INDEBUG(call->GetSingleInlineCandidateInfo()->inlinersContext = call->gtInlineContext);
657+
658+
JITDUMP("New inline candidate due to late devirtualization:\n");
659+
DISPTREE(call);
660+
}
661+
}
591662
m_madeChanges = true;
592663
}
593664
}
@@ -730,17 +801,10 @@ PhaseStatus Compiler::fgInline()
730801
do
731802
{
732803
// Make the current basic block address available globally
733-
compCurBB = block;
734-
735-
for (Statement* const stmt : block->Statements())
804+
compCurBB = block;
805+
Statement* stmt = block->firstStmt();
806+
while (stmt != nullptr)
736807
{
737-
738-
#if defined(DEBUG)
739-
// In debug builds we want the inline tree to show all failed
740-
// inlines. Some inlines may fail very early and never make it to
741-
// candidate stage. So scan the tree looking for those early failures.
742-
fgWalkTreePre(stmt->GetRootNodePointer(), fgFindNonInlineCandidate, stmt);
743-
#endif
744808
// See if we need to replace some return value place holders.
745809
// Also, see if this replacement enables further devirtualization.
746810
//
@@ -755,7 +819,7 @@ PhaseStatus Compiler::fgInline()
755819
// possible further optimization, as the (now complete) GT_RET_EXPR
756820
// replacement may have enabled optimizations by providing more
757821
// specific types for trees or variables.
758-
walker.WalkTree(stmt->GetRootNodePointer(), nullptr);
822+
stmt = walker.WalkStatement(stmt);
759823

760824
GenTree* expr = stmt->GetRootNode();
761825

@@ -805,6 +869,13 @@ PhaseStatus Compiler::fgInline()
805869
madeChanges = true;
806870
stmt->SetRootNode(expr->AsOp()->gtOp1);
807871
}
872+
873+
#if defined(DEBUG)
874+
// In debug builds we want the inline tree to show all failed
875+
// inlines.
876+
fgWalkTreePre(stmt->GetRootNodePointer(), fgFindNonInlineCandidate, stmt);
877+
#endif
878+
stmt = stmt->GetNextStmt();
808879
}
809880

810881
block = block->Next();

src/coreclr/jit/gentree.cpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17005,6 +17005,8 @@ bool Compiler::gtTreeHasSideEffects(GenTree* tree, GenTreeFlags flags /* = GTF_S
1700517005
// firstNewStmt - [out] The first new statement that was introduced.
1700617006
// [firstNewStmt..stmt) are the statements added by this function.
1700717007
// splitNodeUse - The use of the tree to split at.
17008+
// early - The run is in the early phase where we still don't have valid
17009+
// GTF_GLOB_REF yet.
1700817010
//
1700917011
// Notes:
1701017012
// This method turns all non-invariant nodes that would be executed before
@@ -17025,14 +17027,19 @@ bool Compiler::gtTreeHasSideEffects(GenTree* tree, GenTreeFlags flags /* = GTF_S
1702517027
// Returns:
1702617028
// True if any changes were made; false if nothing needed to be done to split the tree.
1702717029
//
17028-
bool Compiler::gtSplitTree(
17029-
BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitNodeUse)
17030+
bool Compiler::gtSplitTree(BasicBlock* block,
17031+
Statement* stmt,
17032+
GenTree* splitPoint,
17033+
Statement** firstNewStmt,
17034+
GenTree*** splitNodeUse,
17035+
bool early)
1703017036
{
1703117037
class Splitter final : public GenTreeVisitor<Splitter>
1703217038
{
1703317039
BasicBlock* m_bb;
1703417040
Statement* m_splitStmt;
1703517041
GenTree* m_splitNode;
17042+
bool m_early;
1703617043

1703717044
struct UseInfo
1703817045
{
@@ -17049,11 +17056,12 @@ bool Compiler::gtSplitTree(
1704917056
UseExecutionOrder = true
1705017057
};
1705117058

17052-
Splitter(Compiler* compiler, BasicBlock* bb, Statement* stmt, GenTree* splitNode)
17059+
Splitter(Compiler* compiler, BasicBlock* bb, Statement* stmt, GenTree* splitNode, bool early)
1705317060
: GenTreeVisitor(compiler)
1705417061
, m_bb(bb)
1705517062
, m_splitStmt(stmt)
1705617063
, m_splitNode(splitNode)
17064+
, m_early(early)
1705717065
, m_useStack(compiler->getAllocator(CMK_ArrayStack))
1705817066
{
1705917067
}
@@ -17195,7 +17203,8 @@ bool Compiler::gtSplitTree(
1719517203
return;
1719617204
}
1719717205

17198-
if ((*use)->OperIs(GT_LCL_VAR) && !m_compiler->lvaGetDesc((*use)->AsLclVarCommon())->IsAddressExposed())
17206+
if ((*use)->OperIs(GT_LCL_VAR) && !m_compiler->lvaGetDesc((*use)->AsLclVarCommon())->IsAddressExposed() &&
17207+
!(m_early && m_compiler->lvaGetDesc((*use)->AsLclVarCommon())->lvHasLdAddrOp))
1719917208
{
1720017209
// The splitting we do here should always guarantee that we
1720117210
// only introduce locals for the tree edges that overlap the
@@ -17278,7 +17287,7 @@ bool Compiler::gtSplitTree(
1727817287
}
1727917288
};
1728017289

17281-
Splitter splitter(this, block, stmt, splitPoint);
17290+
Splitter splitter(this, block, stmt, splitPoint, early);
1728217291
splitter.WalkTree(stmt->GetRootNodePointer(), nullptr);
1728317292
*firstNewStmt = splitter.FirstStatement;
1728417293
*splitNodeUse = splitter.SplitNodeUse;

src/coreclr/jit/importercalls.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ var_types Compiler::impImportCall(OPCODE opcode,
329329
assert((sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_VARARG &&
330330
(sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_NATIVEVARARG);
331331

332-
call = gtNewIndCallNode(stubAddr, callRetTyp);
332+
call = gtNewIndCallNode(stubAddr, callRetTyp, di);
333333

334334
call->gtFlags |= GTF_EXCEPT | (stubAddr->gtFlags & GTF_GLOB_EFFECT);
335335
call->gtFlags |= GTF_CALL_VIRT_STUB;

0 commit comments

Comments
 (0)