From 015a7ce65192793d07d7faff5ca90be42bff7b1b Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 20 Jan 2021 21:58:40 -0800 Subject: [PATCH 01/14] Enable EhWriteThry for SingleDef --- src/coreclr/jit/jitconfigvalues.h | 2 +- src/coreclr/jit/lsra.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 8019c61e0496db..77d1b627a44a6a 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -274,7 +274,7 @@ CONFIG_INTEGER(EnablePOPCNT, W("EnablePOPCNT"), 1) // Enable POPCNT CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 0) #endif // !defined(TARGET_AMD64) && !defined(TARGET_X86) -CONFIG_INTEGER(EnableEHWriteThru, W("EnableEHWriteThru"), 0) // Enable the register allocator to support EH-write thru: +CONFIG_INTEGER(EnableEHWriteThru, W("EnableEHWriteThru"), 1) // Enable the register allocator to support EH-write thru: // partial enregistration of vars exposed on EH boundaries CONFIG_INTEGER(EnableMultiRegLocals, W("EnableMultiRegLocals"), 1) // Enable the enregistration of locals that are // defined or used in a multireg context. diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 31127e89afd758..68f8f470f02960 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1781,7 +1781,7 @@ void LinearScan::identifyCandidates() if (varDsc->lvLiveInOutOfHndlr) { - newInt->isWriteThru = true; + newInt->isWriteThru = varDsc->lvSingleDef; setIntervalAsSpilled(newInt); } From 71b5c52821dc491991e6b6d3e3a5a2e3e141186e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 20 Jan 2021 22:06:27 -0800 Subject: [PATCH 02/14] If EhWriteThru is enabled, DoNotEnregister if variable is not singleDef --- src/coreclr/jit/lclvars.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index ea51815de2e030..9d645d9888da51 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -2614,14 +2614,16 @@ void Compiler::lvaSetVarLiveInOutOfHandler(unsigned varNum) { noway_assert(lvaTable[i].lvIsStructField); lvaTable[i].lvLiveInOutOfHndlr = 1; - if (!lvaEnregEHVars) + // For now, only enregister an EH Var if it is a single def. + if (!lvaEnregEHVars || !lvaTable[i].lvSingleDef) { lvaSetVarDoNotEnregister(i DEBUGARG(DNER_LiveInOutOfHandler)); } } } - if (!lvaEnregEHVars) + // For now, only enregister an EH Var if it is a single def. + if (!lvaEnregEHVars || !varDsc->lvSingleDef) { lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LiveInOutOfHandler)); } @@ -7314,7 +7316,7 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r { printf("V"); } - if (lvaEnregEHVars && varDsc->lvLiveInOutOfHndlr) + if (lvaEnregEHVars && varDsc->lvLiveInOutOfHndlr && varDsc->lvSingleDef) { printf("H"); } From ca3935961c696cb666640615091fe97e447a735a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 27 Jan 2021 12:53:15 -0800 Subject: [PATCH 03/14] Revert code in ExecutionContext.RunInternal --- .../src/System/Threading/ExecutionContext.cs | 35 ++++++------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Threading/ExecutionContext.cs b/src/libraries/System.Private.CoreLib/src/System/Threading/ExecutionContext.cs index e1950bd48587d2..fb5c5e41055440 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Threading/ExecutionContext.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Threading/ExecutionContext.cs @@ -149,27 +149,16 @@ public static void Run(ExecutionContext executionContext, ContextCallback callba internal static void RunInternal(ExecutionContext? executionContext, ContextCallback callback, object? state) { - // Note: ExecutionContext.RunInternal is an extremely hot function and used by every await, ThreadPool execution, etc. - // Note: Manual enregistering may be addressed by "Exception Handling Write Through Optimization" - // https://github.com/dotnet/runtime/blob/main/docs/design/features/eh-writethru.md - - // Enregister variables with 0 post-fix so they can be used in registers without EH forcing them to stack - // Capture references to Thread Contexts - Thread currentThread0 = Thread.CurrentThread; - Thread currentThread = currentThread0; - ExecutionContext? previousExecutionCtx0 = currentThread0._executionContext; + Thread currentThread = Thread.CurrentThread; + ExecutionContext? previousExecutionCtx0 = currentThread._executionContext; if (previousExecutionCtx0 != null && previousExecutionCtx0.m_isDefault) { // Default is a null ExecutionContext internally previousExecutionCtx0 = null; } - // Store current ExecutionContext and SynchronizationContext as "previousXxx". - // This allows us to restore them and undo any Context changes made in callback.Invoke - // so that they won't "leak" back into caller. - // These variables will cross EH so be forced to stack ExecutionContext? previousExecutionCtx = previousExecutionCtx0; - SynchronizationContext? previousSyncCtx = currentThread0._synchronizationContext; + SynchronizationContext? previousSyncCtx = currentThread._synchronizationContext; if (executionContext != null && executionContext.m_isDefault) { @@ -177,9 +166,9 @@ internal static void RunInternal(ExecutionContext? executionContext, ContextCall executionContext = null; } - if (previousExecutionCtx0 != executionContext) + if (previousExecutionCtx != executionContext) { - RestoreChangedContextToThread(currentThread0, executionContext, previousExecutionCtx0); + RestoreChangedContextToThread(currentThread, executionContext, previousExecutionCtx); } ExceptionDispatchInfo? edi = null; @@ -195,21 +184,17 @@ internal static void RunInternal(ExecutionContext? executionContext, ContextCall edi = ExceptionDispatchInfo.Capture(ex); } - // Re-enregistrer variables post EH with 1 post-fix so they can be used in registers rather than from stack - SynchronizationContext? previousSyncCtx1 = previousSyncCtx; - Thread currentThread1 = currentThread; // The common case is that these have not changed, so avoid the cost of a write barrier if not needed. - if (currentThread1._synchronizationContext != previousSyncCtx1) + if (currentThread._synchronizationContext != previousSyncCtx) { // Restore changed SynchronizationContext back to previous - currentThread1._synchronizationContext = previousSyncCtx1; + currentThread._synchronizationContext = previousSyncCtx; } - ExecutionContext? previousExecutionCtx1 = previousExecutionCtx; - ExecutionContext? currentExecutionCtx1 = currentThread1._executionContext; - if (currentExecutionCtx1 != previousExecutionCtx1) + ExecutionContext? currentExecutionCtx = currentThread._executionContext; + if (currentExecutionCtx != previousExecutionCtx) { - RestoreChangedContextToThread(currentThread1, previousExecutionCtx1, currentExecutionCtx1); + RestoreChangedContextToThread(currentThread, previousExecutionCtx, currentExecutionCtx); } // If exception was thrown by callback, rethrow it now original contexts are restored From f51bc7cf3ebf0d3f74e587a9e3b230eb062da6bf Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 27 Jan 2021 13:29:41 -0800 Subject: [PATCH 04/14] Revert code in AsyncMethodBuildCore.Start() --- .../AsyncMethodBuilderCore.cs | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncMethodBuilderCore.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncMethodBuilderCore.cs index 6d598e99296216..19b3d826866716 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncMethodBuilderCore.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncMethodBuilderCore.cs @@ -25,17 +25,13 @@ public static void Start(ref TStateMachine stateMachine) where TS ThrowHelper.ThrowArgumentNullException(ExceptionArgument.stateMachine); } - // enregistrer variables with 0 post-fix so they can be used in registers without EH forcing them to stack - // Capture references to Thread Contexts - Thread currentThread0 = Thread.CurrentThread; - Thread currentThread = currentThread0; - ExecutionContext? previousExecutionCtx0 = currentThread0._executionContext; + Thread currentThread = Thread.CurrentThread; // Store current ExecutionContext and SynchronizationContext as "previousXxx". // This allows us to restore them and undo any Context changes made in stateMachine.MoveNext // so that they won't "leak" out of the first await. - ExecutionContext? previousExecutionCtx = previousExecutionCtx0; - SynchronizationContext? previousSyncCtx = currentThread0._synchronizationContext; + ExecutionContext? previousExecutionCtx = currentThread._executionContext; + SynchronizationContext? previousSyncCtx = currentThread._synchronizationContext; try { @@ -43,21 +39,17 @@ public static void Start(ref TStateMachine stateMachine) where TS } finally { - // Re-enregistrer variables post EH with 1 post-fix so they can be used in registers rather than from stack - SynchronizationContext? previousSyncCtx1 = previousSyncCtx; - Thread currentThread1 = currentThread; // The common case is that these have not changed, so avoid the cost of a write barrier if not needed. - if (previousSyncCtx1 != currentThread1._synchronizationContext) + if (previousSyncCtx != currentThread._synchronizationContext) { // Restore changed SynchronizationContext back to previous - currentThread1._synchronizationContext = previousSyncCtx1; + currentThread._synchronizationContext = previousSyncCtx; } - ExecutionContext? previousExecutionCtx1 = previousExecutionCtx; - ExecutionContext? currentExecutionCtx1 = currentThread1._executionContext; - if (previousExecutionCtx1 != currentExecutionCtx1) + ExecutionContext? currentExecutionCtx = currentThread._executionContext; + if (previousExecutionCtx != currentExecutionCtx) { - ExecutionContext.RestoreChangedContextToThread(currentThread1, previousExecutionCtx1, currentExecutionCtx1); + ExecutionContext.RestoreChangedContextToThread(currentThread, previousExecutionCtx, currentExecutionCtx); } } } From e434daabf1e970f56860ff0d45008e333c9476a6 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 28 Jan 2021 18:52:00 -0800 Subject: [PATCH 05/14] Make sure we do not reset lvSingleDef --- src/coreclr/jit/lclvars.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 9d645d9888da51..485c810dd6fd51 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -4180,6 +4180,8 @@ void Compiler::lvaMarkLocalVars(BasicBlock* block, bool isRecompute) Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) { + // TODO: Stop passing isRecompute once we are sure that this assert is never hit. + assert(!m_isRecompute); m_compiler->lvaMarkLclRefs(*use, m_block, m_stmt, m_isRecompute); return WALK_CONTINUE; } @@ -4439,7 +4441,12 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers) // Set initial value for lvSingleDef for explicit and implicit // argument locals as they are "defined" on entry. - varDsc->lvSingleDef = varDsc->lvIsParam; + // However, if we are just recomputing the ref counts, retain the value + // that was set by past phases. + if (!isRecompute) + { + varDsc->lvSingleDef = varDsc->lvIsParam; + } } // Remember current state of generic context use, and prepare @@ -7316,7 +7323,7 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r { printf("V"); } - if (lvaEnregEHVars && varDsc->lvLiveInOutOfHndlr && varDsc->lvSingleDef) + if (lvaEnregEHVars && varDsc->lvLiveInOutOfHndlr) { printf("H"); } From a85efd2a90b77a6bb5c1eab496508c685cfe15e6 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 23 Feb 2021 23:21:10 -0800 Subject: [PATCH 06/14] Consitent display of frame offset misc change in superpmi.py --- src/coreclr/jit/lclvars.cpp | 2 +- src/coreclr/scripts/superpmi.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 485c810dd6fd51..7180f4ec5a32e4 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -7203,7 +7203,7 @@ void Compiler::lvaDumpFrameLocation(unsigned lclNum) baseReg = EBPbased ? REG_FPBASE : REG_SPBASE; #endif - printf("[%2s%1s0x%02X] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset)); + printf("[%2s%1s%02XH] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset)); } /***************************************************************************** diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index 66beb3f1213d5c..5f1edd95270ea6 100755 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -1346,7 +1346,7 @@ def save_repro_mc_files(temp_location, coreclr_args, repro_base_command_line): shutil.copy2(item, repro_location) logging.info("") - logging.info("Repro .mc files created for failures:") + logging.info("Repro {} .mc file(s) created for failures:".format(len(repro_files))) for item in repro_files: logging.info(item) From aff80dea82e0cc397701ceba56f9fed3083348e4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 2 Mar 2021 11:25:32 -0800 Subject: [PATCH 07/14] Use lvEHWriteThruCandidate --- src/coreclr/jit/compiler.h | 4 ++++ src/coreclr/jit/lclvars.cpp | 27 +++++++++++++++++++++++++-- src/coreclr/jit/lsra.cpp | 26 +++++++++++++++++++++++++- 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index a38071f31a3696..1c848d2608b213 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -454,6 +454,10 @@ class LclVarDsc // before lvaMarkLocalVars: identifies ref type locals that can get type updates // after lvaMarkLocalVars: identifies locals that are suitable for optAddCopies + unsigned char lvEhWriteThruCandidate : 1; + + unsigned char lvDisqualifyForEhWriteThru : 1; + #if ASSERTION_PROP unsigned char lvDisqualify : 1; // variable is no longer OK for add copy optimization unsigned char lvVolatileHint : 1; // hint for AssertionProp diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 7180f4ec5a32e4..9d37fbe30eb794 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -2615,7 +2615,7 @@ void Compiler::lvaSetVarLiveInOutOfHandler(unsigned varNum) noway_assert(lvaTable[i].lvIsStructField); lvaTable[i].lvLiveInOutOfHndlr = 1; // For now, only enregister an EH Var if it is a single def. - if (!lvaEnregEHVars || !lvaTable[i].lvSingleDef) + if (!lvaEnregEHVars || !lvaTable[i].lvEhWriteThruCandidate) { lvaSetVarDoNotEnregister(i DEBUGARG(DNER_LiveInOutOfHandler)); } @@ -2623,7 +2623,7 @@ void Compiler::lvaSetVarLiveInOutOfHandler(unsigned varNum) } // For now, only enregister an EH Var if it is a single def. - if (!lvaEnregEHVars || !varDsc->lvSingleDef) + if (!lvaEnregEHVars || !varDsc->lvEhWriteThruCandidate) { lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LiveInOutOfHandler)); } @@ -4077,6 +4077,28 @@ void Compiler::lvaMarkLclRefs(GenTree* tree, BasicBlock* block, Statement* stmt, BlockSetOps::AddElemD(this, varDsc->lvRefBlks, block->bbNum); } } + + if (!varDsc->lvDisqualifyForEhWriteThru) + { + if (tree->gtFlags & GTF_VAR_DEF) + { + bool bbInALoop = (block->bbFlags & BBF_BACKWARD_JUMP) != 0; + bool bbIsReturn = block->bbJumpKind == BBJ_RETURN; + bool needsExplicitZeroInit = fgVarNeedsExplicitZeroInit(lclNum, bbInALoop, bbIsReturn); + + if ((varDsc->lvEhWriteThruCandidate == true) || (needsExplicitZeroInit == true) || + (tree->gtFlags & GTF_COLON_COND) || (tree->gtFlags & GTF_VAR_USEASG)) + { + varDsc->lvEhWriteThruCandidate = false; + varDsc->lvDisqualifyForEhWriteThru = true; + } + else + { + varDsc->lvEhWriteThruCandidate = true; + } + } + } + #endif // ASSERTION_PROP bool allowStructs = false; @@ -4446,6 +4468,7 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers) if (!isRecompute) { varDsc->lvSingleDef = varDsc->lvIsParam; + varDsc->lvEhWriteThruCandidate = varDsc->lvIsParam; } } diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 68f8f470f02960..78b418003954cf 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -874,6 +874,7 @@ void LinearScan::setBlockSequence() } } + if (!block->isBBCallAlwaysPairTail() && (predBlock->hasEHBoundaryOut() || predBlock->isBBCallAlwaysPairTail())) { @@ -1781,7 +1782,18 @@ void LinearScan::identifyCandidates() if (varDsc->lvLiveInOutOfHndlr) { - newInt->isWriteThru = varDsc->lvSingleDef; + newInt->isWriteThru = varDsc->lvEhWriteThruCandidate; +#ifdef DEBUG + if (newInt->isWriteThru) + { + JITDUMP("Marking Interal %d as writeThru because V%02u is a single def.\n", newInt->intervalIndex, lclNum); + } + else + { + JITDUMP("Skipping Interal %d as writeThru because V%02u is not a single def.\n", + newInt->intervalIndex, lclNum); + } +#endif setIntervalAsSpilled(newInt); } @@ -7949,6 +7961,18 @@ void LinearScan::insertMove( // These block kinds don't have a branch at the end. assert((lastNode == nullptr) || (!lastNode->OperIsConditionalJump() && !lastNode->OperIs(GT_SWITCH_TABLE, GT_SWITCH, GT_RETURN, GT_RETFILT))); + + /*if (lastNode != nullptr && lastNode->OperIs(GT_STORE_LCL_VAR)) + { + regNumber prevToReg = lastNode->GetReg(); + GenTree* op1 = lastNode->gtGetOp1(); + regNumber prevFromReg = op1->GetReg(); + if (op1->TypeGet() == varDsc->TypeGet() && (fromReg == prevToReg) && (toReg == prevFromReg)) + { + JITDUMP("Skipping redundant resoltion"); + return; + } + }*/ blockRange.InsertAtEnd(std::move(treeRange)); } } From 24b0af6c38b3016af687728d7357817eda7eb28d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 3 Mar 2021 00:15:40 -0800 Subject: [PATCH 08/14] Do not enregister EH Var that has single use --- src/coreclr/jit/lclvars.cpp | 4 ++-- src/coreclr/jit/lsra.cpp | 11 ----------- src/coreclr/scripts/superpmi.py | 2 +- 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 9d37fbe30eb794..47196a08db44d1 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -2615,7 +2615,7 @@ void Compiler::lvaSetVarLiveInOutOfHandler(unsigned varNum) noway_assert(lvaTable[i].lvIsStructField); lvaTable[i].lvLiveInOutOfHndlr = 1; // For now, only enregister an EH Var if it is a single def. - if (!lvaEnregEHVars || !lvaTable[i].lvEhWriteThruCandidate) + if (!lvaEnregEHVars || !lvaTable[i].lvEhWriteThruCandidate || lvaTable[i].lvRefCnt() <= 1) { lvaSetVarDoNotEnregister(i DEBUGARG(DNER_LiveInOutOfHandler)); } @@ -2623,7 +2623,7 @@ void Compiler::lvaSetVarLiveInOutOfHandler(unsigned varNum) } // For now, only enregister an EH Var if it is a single def. - if (!lvaEnregEHVars || !varDsc->lvEhWriteThruCandidate) + if (!lvaEnregEHVars || !varDsc->lvEhWriteThruCandidate || varDsc->lvRefCnt() <= 1) { lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LiveInOutOfHandler)); } diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 78b418003954cf..f6e99c7614f28c 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1783,17 +1783,6 @@ void LinearScan::identifyCandidates() if (varDsc->lvLiveInOutOfHndlr) { newInt->isWriteThru = varDsc->lvEhWriteThruCandidate; -#ifdef DEBUG - if (newInt->isWriteThru) - { - JITDUMP("Marking Interal %d as writeThru because V%02u is a single def.\n", newInt->intervalIndex, lclNum); - } - else - { - JITDUMP("Skipping Interal %d as writeThru because V%02u is not a single def.\n", - newInt->intervalIndex, lclNum); - } -#endif setIntervalAsSpilled(newInt); } diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index 5f1edd95270ea6..ef3dcbd8eab6a6 100755 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -1565,7 +1565,7 @@ def replay_with_asm_diffs(self): "COMPlus_JitDiffableDasm": "1", "COMPlus_JitEnableNoWayAssert": "1", "COMPlus_JitNoForceFallback": "1", - "COMPlus_JitDisasmWithGC": "1" } + "COMPlus_JitDisasmWithGC": "0" } if self.coreclr_args.gcinfo: asm_complus_vars.update({ From ba579577682388107af799dbf4015095d82ba127 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 4 Mar 2021 15:38:24 -0800 Subject: [PATCH 09/14] do not enregister simdtype --- src/coreclr/jit/compiler.h | 18 ++++++++++++++++++ src/coreclr/jit/lclvars.cpp | 13 ++++++++++--- src/coreclr/jit/lsra.cpp | 23 +++++------------------ src/coreclr/jit/lsra.h | 15 +-------------- src/coreclr/jit/lsrabuild.cpp | 13 ++++++------- 5 files changed, 40 insertions(+), 42 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 1c848d2608b213..c070804767e16f 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7373,6 +7373,24 @@ class Compiler void raMarkStkVars(); +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE +#if defined(TARGET_AMD64) + static bool varTypeNeedsPartialCalleeSave(var_types type) + { + return (type == TYP_SIMD32); + } +#elif defined(TARGET_ARM64) + static bool varTypeNeedsPartialCalleeSave(var_types type) + { + // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes + // For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes. + return ((type == TYP_SIMD16) || (type == TYP_SIMD12)); + } +#else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) +#error("Unknown target architecture for FEATURE_SIMD") +#endif // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + protected: // Some things are used by both LSRA and regpredict allocators. diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 47196a08db44d1..e576e26dcf996a 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -4089,12 +4089,19 @@ void Compiler::lvaMarkLclRefs(GenTree* tree, BasicBlock* block, Statement* stmt, if ((varDsc->lvEhWriteThruCandidate == true) || (needsExplicitZeroInit == true) || (tree->gtFlags & GTF_COLON_COND) || (tree->gtFlags & GTF_VAR_USEASG)) { - varDsc->lvEhWriteThruCandidate = false; + varDsc->lvEhWriteThruCandidate = false; varDsc->lvDisqualifyForEhWriteThru = true; } else { - varDsc->lvEhWriteThruCandidate = true; +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + // TODO-CQ: If the varType needs partial callee save, conservatively do not enregister + // such variable. In future, need to enable enregisteration for such variables. + if (!varTypeNeedsPartialCalleeSave(varDsc->lvType)) +#endif + { + varDsc->lvEhWriteThruCandidate = true; + } } } } @@ -4467,7 +4474,7 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers) // that was set by past phases. if (!isRecompute) { - varDsc->lvSingleDef = varDsc->lvIsParam; + varDsc->lvSingleDef = varDsc->lvIsParam; varDsc->lvEhWriteThruCandidate = varDsc->lvIsParam; } } diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index f6e99c7614f28c..71dd2e759dcecb 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -874,7 +874,6 @@ void LinearScan::setBlockSequence() } } - if (!block->isBBCallAlwaysPairTail() && (predBlock->hasEHBoundaryOut() || predBlock->isBBCallAlwaysPairTail())) { @@ -1797,7 +1796,7 @@ void LinearScan::identifyCandidates() // Additionally, when we are generating code for a target with partial SIMD callee-save // (AVX on non-UNIX amd64 and 16-byte vectors on arm64), we keep a separate set of the // LargeVectorType vars. - if (varTypeNeedsPartialCalleeSave(varDsc->lvType)) + if (Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType)) { largeVectorVarCount++; VarSetOps::AddElemD(compiler, largeVectorVars, varDsc->lvVarIndex); @@ -5051,7 +5050,7 @@ void LinearScan::processBlockEndLocations(BasicBlock* currentBlock) } #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE // Ensure that we have no partially-spilled large vector locals. - assert(!varTypeNeedsPartialCalleeSave(interval->registerType) || !interval->isPartiallySpilled); + assert(!Compiler::varTypeNeedsPartialCalleeSave(interval->registerType) || !interval->isPartiallySpilled); #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE } INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_END_BB)); @@ -6923,7 +6922,7 @@ void LinearScan::insertUpperVectorSave(GenTree* tree, } LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum; - assert(varTypeNeedsPartialCalleeSave(varDsc->lvType)); + assert(Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType)); // On Arm64, we must always have a register to save the upper half, // while on x86 we can spill directly to memory. @@ -7004,7 +7003,7 @@ void LinearScan::insertUpperVectorRestore(GenTree* tree, // lclVar as spilled). assert(lclVarReg != REG_NA); LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum; - assert(varTypeNeedsPartialCalleeSave(varDsc->lvType)); + assert(Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType)); GenTree* restoreLcl = nullptr; restoreLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType); @@ -7069,7 +7068,7 @@ void LinearScan::insertUpperVectorRestore(GenTree* tree, } else { - assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS); + assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS || block->bbJumpKind == BBJ_THROW); blockRange.InsertAtEnd(LIR::SeqTree(compiler, simdNode)); } } @@ -7950,18 +7949,6 @@ void LinearScan::insertMove( // These block kinds don't have a branch at the end. assert((lastNode == nullptr) || (!lastNode->OperIsConditionalJump() && !lastNode->OperIs(GT_SWITCH_TABLE, GT_SWITCH, GT_RETURN, GT_RETFILT))); - - /*if (lastNode != nullptr && lastNode->OperIs(GT_STORE_LCL_VAR)) - { - regNumber prevToReg = lastNode->GetReg(); - GenTree* op1 = lastNode->gtGetOp1(); - regNumber prevFromReg = op1->GetReg(); - if (op1->TypeGet() == varDsc->TypeGet() && (fromReg == prevToReg) && (toReg == prevFromReg)) - { - JITDUMP("Skipping redundant resoltion"); - return; - } - }*/ blockRange.InsertAtEnd(std::move(treeRange)); } } diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 345cbb451f788d..cd5740a294ab18 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -988,7 +988,7 @@ class LinearScan : public LinearScanInterface void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition); - void buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation loc); + void buildRefPositionsForNode(GenTree* tree, LsraLocation loc); #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE void buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, regMaskTP fpCalleeKillSet); @@ -1500,23 +1500,10 @@ class LinearScan : public LinearScanInterface #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE #if defined(TARGET_AMD64) - static bool varTypeNeedsPartialCalleeSave(var_types type) - { - return (type == TYP_SIMD32); - } static const var_types LargeVectorSaveType = TYP_SIMD16; #elif defined(TARGET_ARM64) - static bool varTypeNeedsPartialCalleeSave(var_types type) - { - // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes - // For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes. - return ((type == TYP_SIMD16) || (type == TYP_SIMD12)); - } static const var_types LargeVectorSaveType = TYP_DOUBLE; -#else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) -#error("Unknown target architecture for FEATURE_SIMD") #endif // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) - // Set of large vector (TYP_SIMD32 on AVX) variables. VARSET_TP largeVectorVars; // Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers. diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 88430f70fe771a..7d21d34e4a68bf 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1160,7 +1160,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo { LclVarDsc* varDsc = compiler->lvaGetDescByTrackedIndex(varIndex); #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - if (varTypeNeedsPartialCalleeSave(varDsc->lvType)) + if (Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType)) { if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex)) { @@ -1424,7 +1424,7 @@ void LinearScan::buildInternalRegisterUses() void LinearScan::makeUpperVectorInterval(unsigned varIndex) { Interval* lclVarInterval = getIntervalForLocalVar(varIndex); - assert(varTypeNeedsPartialCalleeSave(lclVarInterval->registerType)); + assert(Compiler::varTypeNeedsPartialCalleeSave(lclVarInterval->registerType)); Interval* newInt = newInterval(LargeVectorSaveType); newInt->relatedInterval = lclVarInterval; newInt->isUpperVector = true; @@ -1506,7 +1506,7 @@ void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation cu for (RefInfoListNode *listNode = defList.Begin(), *end = defList.End(); listNode != end; listNode = listNode->Next()) { - if (varTypeNeedsPartialCalleeSave(listNode->treeNode->TypeGet())) + if (Compiler::varTypeNeedsPartialCalleeSave(listNode->treeNode->TypeGet())) { // In the rare case where such an interval is live across nested calls, we don't need to insert another. if (listNode->ref->getInterval()->recentRefPosition->refType != RefTypeUpperVectorSave) @@ -1637,10 +1637,9 @@ int LinearScan::ComputeAvailableSrcCount(GenTree* node) // // Arguments: // tree - The node for which we are building RefPositions -// block - The BasicBlock in which the node resides // currentLoc - The LsraLocation of the given node // -void LinearScan::buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation currentLoc) +void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc) { // The LIR traversal doesn't visit GT_LIST or GT_ARGPLACE nodes. // GT_CLS_VAR nodes should have been eliminated by rationalizer. @@ -2351,7 +2350,7 @@ void LinearScan::buildIntervals() node->SetRegNum(node->GetRegNum()); #endif - buildRefPositionsForNode(node, block, currentLoc); + buildRefPositionsForNode(node, currentLoc); #ifdef DEBUG if (currentLoc > maxNodeLocation) @@ -3232,7 +3231,7 @@ void LinearScan::BuildStoreLocDef(GenTreeLclVarCommon* storeLoc, def->regOptional = true; } #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - if (varTypeNeedsPartialCalleeSave(varDefInterval->registerType)) + if (Compiler::varTypeNeedsPartialCalleeSave(varDefInterval->registerType)) { varDefInterval->isPartiallySpilled = false; } From 061f8e0440266e1b69b5276faeea1ddb66322f0c Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 5 Mar 2021 14:51:27 -0800 Subject: [PATCH 10/14] add missing comments --- src/coreclr/jit/compiler.h | 5 +++-- src/coreclr/jit/lclvars.cpp | 4 ++-- src/coreclr/scripts/superpmi.py | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index c070804767e16f..e49a16f8c42a3e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -454,9 +454,10 @@ class LclVarDsc // before lvaMarkLocalVars: identifies ref type locals that can get type updates // after lvaMarkLocalVars: identifies locals that are suitable for optAddCopies - unsigned char lvEhWriteThruCandidate : 1; + unsigned char lvEhWriteThruCandidate : 1; // variable has a single def and hence is a register candidate if + // if it is an EH variable - unsigned char lvDisqualifyForEhWriteThru : 1; + unsigned char lvDisqualifyForEhWriteThru : 1; // tracks variable that are disqualified from register candidancy #if ASSERTION_PROP unsigned char lvDisqualify : 1; // variable is no longer OK for add copy optimization diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index e576e26dcf996a..3a409115d4259f 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -2614,7 +2614,7 @@ void Compiler::lvaSetVarLiveInOutOfHandler(unsigned varNum) { noway_assert(lvaTable[i].lvIsStructField); lvaTable[i].lvLiveInOutOfHndlr = 1; - // For now, only enregister an EH Var if it is a single def. + // For now, only enregister an EH Var if it is a single def and whose refCnt > 1. if (!lvaEnregEHVars || !lvaTable[i].lvEhWriteThruCandidate || lvaTable[i].lvRefCnt() <= 1) { lvaSetVarDoNotEnregister(i DEBUGARG(DNER_LiveInOutOfHandler)); @@ -2622,7 +2622,7 @@ void Compiler::lvaSetVarLiveInOutOfHandler(unsigned varNum) } } - // For now, only enregister an EH Var if it is a single def. + // For now, only enregister an EH Var if it is a single def and whose refCnt > 1. if (!lvaEnregEHVars || !varDsc->lvEhWriteThruCandidate || varDsc->lvRefCnt() <= 1) { lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LiveInOutOfHandler)); diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index ef3dcbd8eab6a6..5f1edd95270ea6 100755 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -1565,7 +1565,7 @@ def replay_with_asm_diffs(self): "COMPlus_JitDiffableDasm": "1", "COMPlus_JitEnableNoWayAssert": "1", "COMPlus_JitNoForceFallback": "1", - "COMPlus_JitDisasmWithGC": "0" } + "COMPlus_JitDisasmWithGC": "1" } if self.coreclr_args.gcinfo: asm_complus_vars.update({ From 802424abb5befe621f681aecd310aecde136a1e4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sun, 7 Mar 2021 19:23:19 -0800 Subject: [PATCH 11/14] jit format --- src/coreclr/jit/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index e49a16f8c42a3e..7fae2166e8e292 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -457,7 +457,7 @@ class LclVarDsc unsigned char lvEhWriteThruCandidate : 1; // variable has a single def and hence is a register candidate if // if it is an EH variable - unsigned char lvDisqualifyForEhWriteThru : 1; // tracks variable that are disqualified from register candidancy + unsigned char lvDisqualifyForEhWriteThru : 1; // tracks variable that are disqualified from register candidancy #if ASSERTION_PROP unsigned char lvDisqualify : 1; // variable is no longer OK for add copy optimization From c28e628ce0d0c20fd2f8de667a0a9f641057e046 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 8 Mar 2021 08:42:34 -0800 Subject: [PATCH 12/14] revert an unintended change --- src/coreclr/jit/lsra.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 71dd2e759dcecb..cd4a48bcdfe5de 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -7068,7 +7068,7 @@ void LinearScan::insertUpperVectorRestore(GenTree* tree, } else { - assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS || block->bbJumpKind == BBJ_THROW); + assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS); blockRange.InsertAtEnd(LIR::SeqTree(compiler, simdNode)); } } From fc6823e4ae02a9bddbdf7b5b98e7c1a5c422970e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 8 Mar 2021 14:24:27 -0800 Subject: [PATCH 13/14] jit format --- src/coreclr/jit/lsra.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index cd5740a294ab18..7e41701aca5e04 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1502,7 +1502,7 @@ class LinearScan : public LinearScanInterface #if defined(TARGET_AMD64) static const var_types LargeVectorSaveType = TYP_SIMD16; #elif defined(TARGET_ARM64) - static const var_types LargeVectorSaveType = TYP_DOUBLE; + static const var_types LargeVectorSaveType = TYP_DOUBLE; #endif // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) // Set of large vector (TYP_SIMD32 on AVX) variables. VARSET_TP largeVectorVars; From a3a996706e87a4d358b29848beccf684523e7cbc Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 8 Mar 2021 23:20:56 -0800 Subject: [PATCH 14/14] Add missing comments --- src/coreclr/jit/lclvars.cpp | 9 ++++----- .../src/System/Threading/ExecutionContext.cs | 6 ++++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 3a409115d4259f..ca6e34a083e1ca 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -4042,7 +4042,7 @@ void Compiler::lvaMarkLclRefs(GenTree* tree, BasicBlock* block, Statement* stmt, /* Record if the variable has a single def or not */ - if (!varDsc->lvDisqualify) // If this variable is already disqualified we can skip this + if (!varDsc->lvDisqualify) // If this variable is already disqualified, we can skip this { if (tree->gtFlags & GTF_VAR_DEF) // Is this is a def of our variable { @@ -4078,16 +4078,15 @@ void Compiler::lvaMarkLclRefs(GenTree* tree, BasicBlock* block, Statement* stmt, } } - if (!varDsc->lvDisqualifyForEhWriteThru) + if (!varDsc->lvDisqualifyForEhWriteThru) // If this EH var already disqualified, we can skip this { - if (tree->gtFlags & GTF_VAR_DEF) + if (tree->gtFlags & GTF_VAR_DEF) // Is this is a def of our variable { bool bbInALoop = (block->bbFlags & BBF_BACKWARD_JUMP) != 0; bool bbIsReturn = block->bbJumpKind == BBJ_RETURN; bool needsExplicitZeroInit = fgVarNeedsExplicitZeroInit(lclNum, bbInALoop, bbIsReturn); - if ((varDsc->lvEhWriteThruCandidate == true) || (needsExplicitZeroInit == true) || - (tree->gtFlags & GTF_COLON_COND) || (tree->gtFlags & GTF_VAR_USEASG)) + if (varDsc->lvEhWriteThruCandidate || needsExplicitZeroInit) { varDsc->lvEhWriteThruCandidate = false; varDsc->lvDisqualifyForEhWriteThru = true; diff --git a/src/libraries/System.Private.CoreLib/src/System/Threading/ExecutionContext.cs b/src/libraries/System.Private.CoreLib/src/System/Threading/ExecutionContext.cs index fb5c5e41055440..30b6f9e2561e3f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Threading/ExecutionContext.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Threading/ExecutionContext.cs @@ -149,6 +149,12 @@ public static void Run(ExecutionContext executionContext, ContextCallback callba internal static void RunInternal(ExecutionContext? executionContext, ContextCallback callback, object? state) { + // Note: ExecutionContext.RunInternal is an extremely hot function and used by every await, ThreadPool execution, etc. + // Note: Manual enregistering may be addressed by "Exception Handling Write Through Optimization" + // https://github.com/dotnet/runtime/blob/main/docs/design/features/eh-writethru.md + + // Enregister previousExecutionCtx0 so they can be used in registers without EH forcing them to stack + Thread currentThread = Thread.CurrentThread; ExecutionContext? previousExecutionCtx0 = currentThread._executionContext; if (previousExecutionCtx0 != null && previousExecutionCtx0.m_isDefault)