From 52c1022c47e3c733c8c00db69e2ec418fa730447 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 25 Apr 2023 16:07:15 +0200 Subject: [PATCH 01/37] JIT: Generalize assignment decomposition in physical promotion Generalize assignment decomposition to handle arbitrary combinations of physically promoted structs. Do this by introducing a DecompositionPlan class that keeps track of the copies to do that involve replacement fields. The first step is then to fill out this plan. In the general case where both the source and destination are physically promoted this involves iterating the replacements in lockstep. For promotions that map exactly, a direct copy between their locals is queued into the plan; in other cases (e.g. partial overlap) it may involve writing the source back to the struct local. The plan is used to generate the IR and to figure out whether the writes of all the replacement fields covers the destination such that the original struct copy can be omitted. Also unnest StatementList and rename it to DecompositionStatementList as the other name conflicted with another class. --- src/coreclr/jit/compiler.h | 3 + src/coreclr/jit/promotion.cpp | 965 ++++++++++++------ .../physicalpromotion/physicalpromotion.cs | 70 +- 3 files changed, 735 insertions(+), 303 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 718967d62e89a1..d4c4985e3033bc 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6145,8 +6145,11 @@ class Compiler bool gtTreeContainsOper(GenTree* tree, genTreeOps op); ExceptionSetFlags gtCollectExceptions(GenTree* tree); +public: bool fgIsBigOffset(size_t offset); +private: + bool fgNeedReturnSpillTemp(); /* diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 4cab8522818b58..da96afa26ebd66 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -147,8 +147,6 @@ struct Replacement // a basic block, i.e. all predecessors would have read the replacement // back before transferring control if necessary. bool NeedsReadBack = false; - // Arbitrary flag bit used e.g. by decomposition. Assumed to be false. - bool Handled = false; #ifdef DEBUG const char* Name; #endif @@ -608,6 +606,7 @@ class LocalsUseVisitor : public GenTreeVisitor return m_uses[lclNum]; } + //------------------------------------------------------------------------ // ClassifyLocalAccess: // Given a local use and its user, classify information about it. @@ -675,6 +674,505 @@ class LocalsUseVisitor : public GenTreeVisitor } }; +class DecompositionStatementList +{ + GenTree* m_head = nullptr; + +public: + void AddStatement(GenTree* stmt) + { + stmt->gtNext = m_head; + m_head = stmt; + } + + GenTree* ToCommaTree(Compiler* comp) + { + if (m_head == nullptr) + { + return comp->gtNewNothingNode(); + } + + GenTree* tree = m_head; + + for (GenTree* cur = m_head->gtNext; cur != nullptr; cur = cur->gtNext) + { + tree = comp->gtNewOperNode(GT_COMMA, TYP_VOID, cur, tree); + } + + return tree; + } +}; + +class DecompositionPlan +{ + struct Entry + { + unsigned ToLclNum; + unsigned FromLclNum; + unsigned Offset; + var_types Type; + }; + + Compiler* m_compiler; + ArrayStack m_entries; + GenTree* m_dst; + GenTree* m_src; + +public: + DecompositionPlan(Compiler* comp, GenTree* dst, GenTree* src) + : m_compiler(comp), m_entries(comp->getAllocator(CMK_Promotion)), m_dst(dst), m_src(src) + { + } + + //------------------------------------------------------------------------ + // CopyBetweenReplacements: + // Add an entry specifying to copy from a local into another local. + // + // Parameters: + // dstLcl - The destination local to write. + // srcLcl - The source local. + // offset - The offset this covers in the struct copy. + // type - The type of copy. + // + // Remarks: + // This may be used for cases where the destination or source is a regularly promoted field. + // + void CopyBetweenReplacements(unsigned dstLcl, unsigned srcLcl, unsigned offset, var_types type) + { + m_entries.Push(Entry{dstLcl, srcLcl, offset, type}); + } + + //------------------------------------------------------------------------ + // CopyToReplacement: + // Add an entry specifying to copy from the source into a replacement local. + // + // Parameters: + // dstLcl - The destination local to write. + // offset - The relative offset into the source. + // type - The type of copy. + // + void CopyToReplacement(unsigned dstLcl, unsigned offset, var_types type) + { + m_entries.Push(Entry{dstLcl, BAD_VAR_NUM, offset, type}); + } + + //------------------------------------------------------------------------ + // CopyFromReplacement: + // Add an entry specifying to copy from a replacement local into the destination. + // + // Parameters: + // srcLcl - The source local to copy from. + // offset - The relative offset into the destination to write. + // type - The type of copy. + // + void CopyFromReplacement(unsigned srcLcl, unsigned offset, var_types type) + { + m_entries.Push(Entry{BAD_VAR_NUM, srcLcl, offset, type}); + } + + //------------------------------------------------------------------------ + // InitReplacement: + // Add an entry specifying that a specified replacement local should be + // constant initialized. + // + // Parameters: + // dstLcl - The destination local. + // offset - The offset covered by this initialization. + // type - The type to initialize. + // + void InitReplacement(unsigned dstLcl, unsigned offset, var_types type) + { + m_entries.Push(Entry{dstLcl, BAD_VAR_NUM, offset, type}); + } + + //------------------------------------------------------------------------ + // Finalize: + // Create IR to perform the full decomposed struct copy as specified by + // the entries that were added to the decomposition plan. Add the + // statements to the specified list. + // + // Parameters: + // statements - The list of statements to add to. + // + void Finalize(DecompositionStatementList* statements) + { + if (m_src->IsConstInitVal()) + { + FinalizeInit(statements); + } + else + { + FinalizeCopy(statements); + } + } + +private: + //------------------------------------------------------------------------ + // CoversDestination: + // Check if the destination is fully defined by the entries that have + // been added to the current plan. + // + // Returns: + // True if so. + // + // Remarks: + // When the plan fully covers the destination the remaining struct + // copy/initialization can be omitted. + // + bool CoversDestination() + { + unsigned prevEnd = 0; + unsigned dstLclOffs = 0; + if (m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + dstLclOffs = m_dst->AsLclVarCommon()->GetLclOffs(); + } + + for (int i = 0; i < m_entries.Height(); i++) + { + const Entry& entry = m_entries.BottomRef(i); + + unsigned writeBegin = entry.Offset; + unsigned writeSize = genTypeSize(entry.Type); + + assert(writeBegin >= prevEnd); + + if (writeBegin > prevEnd) + { + // Uncovered hole from [prevEnd..writeBegin). + // TODO-CQ: In many cases it's more efficient to "plug" the holes. However, + // it is made more complicated by the fact that the holes can contain GC pointers in them and + // we cannot (yet) represent custom class layouts with GC pointers in them. + // TODO-CQ: Many of these cases are just padding. We should handle structs with insignificant + // padding here. + return false; + } + + prevEnd = writeBegin + writeSize; + } + + return prevEnd == m_dst->GetLayout(m_compiler)->GetSize(); + } + + //------------------------------------------------------------------------ + // FinalizeInit: + // Create IR to perform the decomposed initialization. + // + // Parameters: + // statements - List to add statements to. + // + void FinalizeInit(DecompositionStatementList* statements) + { + GenTree* cns = m_src->OperIsInitVal() ? m_src->gtGetOp1() : m_src; + int64_t initPattern = int64_t(cns->AsIntCon()->IconValue() & 0xFF) * 0x0101010101010101LL; + + for (int i = 0; i < m_entries.Height(); i++) + { + const Entry& entry = m_entries.BottomRef(i); + GenTree* srcVal; + switch (entry.Type) + { + case TYP_BOOL: + case TYP_BYTE: + case TYP_UBYTE: + case TYP_SHORT: + case TYP_USHORT: + case TYP_INT: + { + int64_t mask = (int64_t(1) << (genTypeSize(entry.Type) * 8)) - 1; + srcVal = m_compiler->gtNewIconNode(static_cast(initPattern & mask)); + break; + } + case TYP_LONG: + srcVal = m_compiler->gtNewLconNode(initPattern); + break; + case TYP_FLOAT: + float floatPattern; + memcpy(&floatPattern, &initPattern, sizeof(floatPattern)); + srcVal = m_compiler->gtNewDconNode(floatPattern, TYP_FLOAT); + break; + case TYP_DOUBLE: + double doublePattern; + memcpy(&doublePattern, &initPattern, sizeof(doublePattern)); + srcVal = m_compiler->gtNewDconNode(doublePattern); + break; + case TYP_REF: + case TYP_BYREF: +#ifdef FEATURE_SIMD + case TYP_SIMD8: + case TYP_SIMD12: + case TYP_SIMD16: +#if defined(TARGET_XARCH) + case TYP_SIMD32: + case TYP_SIMD64: +#endif // TARGET_XARCH +#endif // FEATURE_SIMD + { + assert(initPattern == 0); + srcVal = m_compiler->gtNewZeroConNode(entry.Type); + break; + } + default: + unreached(); + } + + assert(entry.ToLclNum != BAD_VAR_NUM); + GenTree* dst = m_compiler->gtNewLclvNode(entry.ToLclNum, entry.Type); + statements->AddStatement(m_compiler->gtNewAssignNode(dst, srcVal)); + } + + if (!CoversDestination()) + { + GenTree* asg = m_compiler->gtNewBlkOpNode(m_dst, cns); + statements->AddStatement(asg); + } + } + + //------------------------------------------------------------------------ + // FinalizeCopy: + // Create IR to perform the decomposed copy. + // + // Parameters: + // statements - List to add statements to. + // + void FinalizeCopy(DecompositionStatementList* statements) + { + assert(m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD) && + m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD)); + bool coversDestination = CoversDestination(); + + GenTree* addr = nullptr; + unsigned addrBaseOffs = 0; + + if (m_dst->OperIs(GT_BLK, GT_FIELD)) + { + addr = m_dst->gtGetOp1(); + + if (m_dst->OperIs(GT_FIELD)) + { + addrBaseOffs = m_dst->AsField()->gtFldOffset; + } + } + else if (m_src->OperIs(GT_BLK, GT_FIELD)) + { + addr = m_src->gtGetOp1(); + + if (m_src->OperIs(GT_FIELD)) + { + addrBaseOffs = m_src->AsField()->gtFldOffset; + } + } + + int numAddrUses = addr == nullptr ? 0 : (m_entries.Height() + (coversDestination ? 0 : 1)); + + // If the destination is fully covered we may need a null check for the GT_FIELD case. + // If the destination is not covered then the initial struct copy is enough. + bool needsNullCheck = coversDestination && (addr != nullptr) && m_compiler->fgAddrCouldBeNull(addr); + + if (needsNullCheck) + { + // See if our first indirection will subsume the null check (usual case). + assert(m_entries.Height() > 0); + const Entry& entry = m_entries.BottomRef(0); + + assert((entry.FromLclNum == BAD_VAR_NUM) || (entry.ToLclNum == BAD_VAR_NUM)); + needsNullCheck = m_compiler->fgIsBigOffset(addrBaseOffs + entry.Offset); + } + + if (needsNullCheck) + { + numAddrUses++; + } + + if ((addr != nullptr) && (numAddrUses > 1)) + { + if (addr->OperIsLocal() && (!m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD) || + (addr->AsLclVarCommon()->GetLclNum() != m_dst->AsLclVarCommon()->GetLclNum()))) + { + // We will introduce more uses of the address local, so it is + // no longer dying here. + addr->gtFlags &= ~GTF_VAR_DEATH; + } + else if (addr->IsInvariant()) + { + // Fall through + } + else + { + unsigned addrLcl = m_compiler->lvaGrabTemp(true DEBUGARG("Spilling address for field-by-field copy")); + statements->AddStatement(m_compiler->gtNewTempAssign(addrLcl, addr)); + addr = m_compiler->gtNewLclvNode(addrLcl, addr->TypeGet()); + UpdateEarlyRefCount(m_compiler, addr); + } + } + + auto grabAddr = [&numAddrUses, addr, this](unsigned offs) { + assert(numAddrUses > 0); + numAddrUses--; + + GenTree* addrUse; + if (numAddrUses == 0) + { + // Last use of the address, reuse the node. + addrUse = addr; + } + else + { + addrUse = m_compiler->gtCloneExpr(addr); + UpdateEarlyRefCount(m_compiler, addrUse); + } + + if (offs != 0) + { + var_types addrType = varTypeIsGC(addrUse) ? TYP_BYREF : TYP_I_IMPL; + addrUse = m_compiler->gtNewOperNode(GT_ADD, addrType, addrUse, + m_compiler->gtNewIconNode((ssize_t)offs, TYP_I_IMPL)); + } + + return addrUse; + }; + + if (!coversDestination) + { + // Note that this does not handle partially overlapping copies, + // but that is left undefined (and normal block copies do not + // handle this either). + if (m_src->OperIs(GT_BLK, GT_FIELD)) + { + // Note that we should use 0 instead of addrBaseOffs here + // since this ends up as the address of the GT_FIELD node + // that already has the field offset. + m_src->AsUnOp()->gtOp1 = grabAddr(0); + } + else if (m_dst->OperIs(GT_BLK, GT_FIELD)) + { + // Like above, use 0 intentionally here. + m_dst->AsUnOp()->gtOp1 = grabAddr(0); + } + + statements->AddStatement(m_compiler->gtNewBlkOpNode(m_dst, m_src)); + } + + if (needsNullCheck) + { + statements->AddStatement(m_compiler->gtNewIndir(TYP_BYTE, grabAddr(addrBaseOffs))); + } + + for (int i = 0; i < m_entries.Height(); i++) + { + const Entry& entry = m_entries.BottomRef(i); + + GenTree* dst; + if (entry.ToLclNum != BAD_VAR_NUM) + { + dst = m_compiler->gtNewLclvNode(entry.ToLclNum, entry.Type); + + if (m_compiler->lvaGetDesc(entry.ToLclNum)->lvIsStructField) + UpdateEarlyRefCount(m_compiler, dst); + } + else + { + assert(entry.FromLclNum != BAD_VAR_NUM); + + if (m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + unsigned offs = m_dst->AsLclVarCommon()->GetLclOffs() + entry.Offset; + // Local morph ensures we do not see local indirs here that dereference beyond UINT16_MAX. + noway_assert(FitsIn(offs)); + dst = m_compiler->gtNewLclFldNode(m_dst->AsLclVarCommon()->GetLclNum(), entry.Type, offs); + m_compiler->lvaSetVarDoNotEnregister(m_dst->AsLclVarCommon()->GetLclNum() + DEBUGARG(DoNotEnregisterReason::LocalField)); + UpdateEarlyRefCount(m_compiler, dst); + } + else + { + GenTree* addr = grabAddr(addrBaseOffs + entry.Offset); + dst = m_compiler->gtNewIndir(entry.Type, addr); + } + } + + GenTree* src; + if (entry.FromLclNum != BAD_VAR_NUM) + { + src = m_compiler->gtNewLclvNode(entry.FromLclNum, entry.Type); + + if (m_compiler->lvaGetDesc(entry.FromLclNum)->lvIsStructField) + UpdateEarlyRefCount(m_compiler, src); + } + else + { + assert(entry.ToLclNum != BAD_VAR_NUM); + if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + unsigned offs = m_src->AsLclVarCommon()->GetLclOffs() + entry.Offset; + noway_assert(FitsIn(offs)); + src = m_compiler->gtNewLclFldNode(m_src->AsLclVarCommon()->GetLclNum(), entry.Type, offs); + m_compiler->lvaSetVarDoNotEnregister(m_src->AsLclVarCommon()->GetLclNum() + DEBUGARG(DoNotEnregisterReason::LocalField)); + UpdateEarlyRefCount(m_compiler, src); + } + else + { + GenTree* addr = grabAddr(addrBaseOffs + entry.Offset); + src = m_compiler->gtNewIndir(entry.Type, addr); + } + } + + statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); + } + + assert(numAddrUses == 0); + } + + //------------------------------------------------------------------------ + // UpdateEarlyRefCount: + // Update early ref counts if necessary for the specified IR node. + // + // Parameters: + // comp - compiler instance + // candidate - the IR node that may be a local that should have its early + // ref counts updated. + // + static void UpdateEarlyRefCount(Compiler* comp, GenTree* candidate) + { + if (!candidate->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_LCL_ADDR)) + { + return; + } + + IncrementRefCount(comp, candidate->AsLclVarCommon()->GetLclNum()); + + LclVarDsc* varDsc = comp->lvaGetDesc(candidate->AsLclVarCommon()); + if (varDsc->lvIsStructField) + { + IncrementRefCount(comp, varDsc->lvParentLcl); + } + + if (varDsc->lvPromoted) + { + for (unsigned fldLclNum = varDsc->lvFieldLclStart; fldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; + fldLclNum++) + { + IncrementRefCount(comp, fldLclNum); + } + } + } + + //------------------------------------------------------------------------ + // IncrementRefCount: + // Increment the ref count for the specified local. + // + // Parameters: + // comp - compiler instance + // lclNum - the local + // + static void IncrementRefCount(Compiler* comp, unsigned lclNum) + { + LclVarDsc* varDsc = comp->lvaGetDesc(lclNum); + varDsc->incLvRefCntSaturating(1, RCS_EARLY); + } +}; + class ReplaceVisitor : public GenTreeVisitor { Promotion* m_prom; @@ -749,35 +1247,6 @@ class ReplaceVisitor : public GenTreeVisitor return fgWalkResult::WALK_CONTINUE; } - class StatementList - { - GenTree* m_head = nullptr; - - public: - void AddStatement(GenTree* stmt) - { - stmt->gtNext = m_head; - m_head = stmt; - } - - GenTree* ToCommaTree(Compiler* comp) - { - if (m_head == nullptr) - { - return comp->gtNewNothingNode(); - } - - GenTree* tree = m_head; - - for (GenTree* cur = m_head->gtNext; cur != nullptr; cur = cur->gtNext) - { - tree = comp->gtNewOperNode(GT_COMMA, tree->TypeGet(), cur, tree); - } - - return tree; - } - }; - //------------------------------------------------------------------------ // DecomposeAssignment: // Handle an assignment that may be between struct locals with replacements. @@ -797,17 +1266,17 @@ class ReplaceVisitor : public GenTreeVisitor GenTree* dst = asg->gtGetOp1(); assert(!dst->OperIs(GT_COMMA)); - GenTree* src = asg->gtGetOp2()->gtEffectiveVal(); - Replacement* dstFirstRep = nullptr; - Replacement* dstEndRep = nullptr; - bool dstInvolvesReplacements = asg->gtGetOp1()->OperIs(GT_LCL_VAR, GT_LCL_FLD) && - OverlappingReplacements(dst->AsLclVarCommon(), &dstFirstRep, &dstEndRep); - Replacement* srcFirstRep = nullptr; - Replacement* srcEndRep = nullptr; - bool srcInvolvesReplacements = asg->gtGetOp2()->OperIs(GT_LCL_VAR, GT_LCL_FLD) && - OverlappingReplacements(src->AsLclVarCommon(), &srcFirstRep, &srcEndRep); + GenTreeLclVarCommon* dstLcl = dst->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? dst->AsLclVarCommon() : nullptr; + GenTreeLclVarCommon* srcLcl = src->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? src->AsLclVarCommon() : nullptr; + + Replacement* dstFirstRep = nullptr; + Replacement* dstEndRep = nullptr; + bool dstInvolvesReplacements = (dstLcl != nullptr) && OverlappingReplacements(dstLcl, &dstFirstRep, &dstEndRep); + Replacement* srcFirstRep = nullptr; + Replacement* srcEndRep = nullptr; + bool srcInvolvesReplacements = (srcLcl != nullptr) && OverlappingReplacements(srcLcl, &srcFirstRep, &srcEndRep); if (!dstInvolvesReplacements && !srcInvolvesReplacements) { @@ -816,37 +1285,19 @@ class ReplaceVisitor : public GenTreeVisitor JITDUMP("Processing block operation [%06u] that involves replacements\n", Compiler::dspTreeID(asg)); - if (dstInvolvesReplacements && (src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD) || src->IsConstInitVal())) + if (src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD) || src->IsConstInitVal()) { - StatementList result; + DecompositionStatementList result; EliminateCommasInBlockOp(asg, &result); - if (dstInvolvesReplacements && srcInvolvesReplacements) - { - JITDUMP("Copy [%06u] is between two physically promoted locals with replacements\n", - Compiler::dspTreeID(asg)); - JITDUMP("*** Conservative: Phys<->phys copies not yet supported; inserting conservative write-back\n"); - for (Replacement* rep = srcFirstRep; rep < srcEndRep; rep++) - { - if (rep->NeedsWriteBack) - { - result.AddStatement(CreateWriteBack(src->AsLclVarCommon()->GetLclNum(), *rep)); - rep->NeedsWriteBack = false; - } - } - - srcInvolvesReplacements = false; - } - if (dstInvolvesReplacements) { - GenTreeLclVarCommon* dstLcl = dst->AsLclVarCommon(); - unsigned dstLclOffs = dstLcl->GetLclOffs(); - unsigned dstLclSize = dstLcl->GetLayout(m_compiler)->GetSize(); - + unsigned dstLclOffs = dstLcl->GetLclOffs(); + unsigned dstLclSize = dstLcl->GetLayout(m_compiler)->GetSize(); if (dstFirstRep->Offset < dstLclOffs) { - JITDUMP("*** Block operation partially overlaps with %s. Write and read-backs are necessary.\n", + JITDUMP("*** Block operation partially overlaps with destination %s. Write and read-backs are " + "necessary.\n", dstFirstRep->Name); // The value of the replacement will be partially assembled from its old value and this struct // operation. @@ -864,7 +1315,8 @@ class ReplaceVisitor : public GenTreeVisitor Replacement* dstLastRep = dstEndRep - 1; if (dstLastRep->Offset + genTypeSize(dstLastRep->AccessType) > dstLclOffs + dstLclSize) { - JITDUMP("*** Block operation partially overlaps with %s. Write and read-backs are necessary.\n", + JITDUMP("*** Block operation partially overlaps with destination %s. Write and read-backs are " + "necessary.\n", dstLastRep->Name); result.AddStatement(CreateWriteBack(dstLcl->GetLclNum(), *dstLastRep)); @@ -873,71 +1325,54 @@ class ReplaceVisitor : public GenTreeVisitor dstEndRep--; } } + } - if (src->IsConstInitVal()) - { - GenTree* cns = src->OperIsInitVal() ? src->gtGetOp1() : src; - InitFieldByField(dstFirstRep, dstEndRep, static_cast(cns->AsIntCon()->IconValue()), - &result); - } - else - { - CopyIntoFields(dstFirstRep, dstEndRep, dstLcl, src, &result); - } - - // At this point all replacements that have Handled = true contain their correct value. - // Check if these cover the entire block operation. - unsigned prevEnd = dstLclOffs; - bool covered = true; + if (srcInvolvesReplacements) + { + unsigned srcLclOffs = srcLcl->GetLclOffs(); + unsigned srcLclSize = srcLcl->GetLayout(m_compiler)->GetSize(); - for (Replacement* rep = dstFirstRep; rep < dstEndRep; rep++) + if (srcFirstRep->Offset < srcLclOffs) { - if (!rep->Handled) - { - covered = false; - break; - } + JITDUMP("*** Block operation partially overlaps with source %s. Write back is necessary.\n", + srcFirstRep->Name); - assert(rep->Offset >= prevEnd); - if (rep->Offset != prevEnd) - { - // Uncovered hole from [lastEnd..rep->Offset). - // TODO-CQ: In many cases it's more efficient to "plug" the holes. However, - // it is made more complicated by the fact that the holes can contain GC pointers in them and - // we cannot (yet) represent custom class layouts with GC pointers in them. - // TODO-CQ: Many of these cases are just padding. We should handle structs with insignificant - // padding here. - covered = false; - break; - } + result.AddStatement(CreateWriteBack(srcLcl->GetLclNum(), *srcFirstRep)); - prevEnd = rep->Offset + genTypeSize(rep->AccessType); + srcFirstRep->NeedsWriteBack = false; + srcFirstRep++; } - covered &= prevEnd == dstLclOffs + dstLclSize; - - if (!covered) + if (srcEndRep > srcFirstRep) { - JITDUMP("Struct operation is not fully covered by replaced fields. Keeping struct operation.\n"); - result.AddStatement(asg); + Replacement* srcLastRep = srcEndRep - 1; + if (srcLastRep->Offset + genTypeSize(srcLastRep->AccessType) > srcLclOffs + srcLclSize) + { + JITDUMP("*** Block operation partially overlaps with source %s. Write back is necessary.\n", + srcLastRep->Name); + + result.AddStatement(CreateWriteBack(srcLcl->GetLclNum(), *srcLastRep)); + srcLastRep->NeedsWriteBack = false; + srcEndRep--; + } } + } - // For unhandled replacements, mark that they will require a read back before their next access. - // Conversely, the replacements we handled above are now up to date and should not be read back. - // We also keep the invariant that Replacement::Handled == false, so reset it here as well. + DecompositionPlan plan(m_compiler, dst, src); - for (Replacement* rep = dstFirstRep; rep < dstEndRep; rep++) - { - rep->NeedsReadBack = !rep->Handled; - rep->NeedsWriteBack = rep->Handled; - rep->Handled = false; - } + if (src->IsConstInitVal()) + { + GenTree* cns = src->OperIsInitVal() ? src->gtGetOp1() : src; + InitFields(dst->AsLclVarCommon(), dstFirstRep, dstEndRep, + static_cast(cns->AsIntCon()->IconValue()), &plan); } else { - assert(srcInvolvesReplacements); + CopyBetweenFields(dst, dstFirstRep, dstEndRep, src, srcFirstRep, srcEndRep, &result, &plan); } + plan.Finalize(&result); + *use = result.ToCommaTree(m_compiler); m_madeChanges = true; } @@ -960,8 +1395,9 @@ class ReplaceVisitor : public GenTreeVisitor } //------------------------------------------------------------------------ - // InitFieldByField: - // Initialize the specified replacements with a specified pattern. + // InitFields: + // Add entries into the plan specifying which replacements can be + // directly inited, and mark the other ones as requiring read back. // // Parameters: // firstRep - The first replacement. @@ -973,231 +1409,162 @@ class ReplaceVisitor : public GenTreeVisitor // Sets Replacement::Handled if the replacement was handled and IR was // created to initialize it with the correct value. // - void InitFieldByField(Replacement* firstRep, Replacement* endRep, unsigned char initVal, StatementList* result) + void InitFields(GenTreeLclVarCommon* dst, + Replacement* firstRep, + Replacement* endRep, + unsigned char initVal, + DecompositionPlan* plan) { - int64_t initPattern = int64_t(initVal) * 0x0101010101010101LL; - for (Replacement* rep = firstRep; rep < endRep; rep++) { - assert(!rep->Handled); - - GenTree* srcVal; - if ((initPattern != 0) && (varTypeIsSIMD(rep->AccessType) || varTypeIsGC(rep->AccessType))) + if ((initVal != 0) && (varTypeIsSIMD(rep->AccessType) || varTypeIsGC(rep->AccessType))) { - // Leave unhandled, we will do this via a read back on the next access. + // We will need to read this one back after initing the struct. + rep->NeedsWriteBack = false; + rep->NeedsReadBack = true; continue; } - switch (rep->AccessType) - { - case TYP_BOOL: - case TYP_BYTE: - case TYP_UBYTE: - case TYP_SHORT: - case TYP_USHORT: - case TYP_INT: - { - int64_t mask = (int64_t(1) << (genTypeSize(rep->AccessType) * 8)) - 1; - srcVal = m_compiler->gtNewIconNode(static_cast(initPattern & mask)); - break; - } - case TYP_LONG: - srcVal = m_compiler->gtNewLconNode(initPattern); - break; - case TYP_FLOAT: - float floatPattern; - memcpy(&floatPattern, &initPattern, sizeof(floatPattern)); - srcVal = m_compiler->gtNewDconNode(floatPattern, TYP_FLOAT); - break; - case TYP_DOUBLE: - double doublePattern; - memcpy(&doublePattern, &initPattern, sizeof(doublePattern)); - srcVal = m_compiler->gtNewDconNode(doublePattern); - break; - case TYP_REF: - case TYP_BYREF: -#ifdef FEATURE_SIMD - case TYP_SIMD8: - case TYP_SIMD12: - case TYP_SIMD16: -#if defined(TARGET_XARCH) - case TYP_SIMD32: - case TYP_SIMD64: -#endif // TARGET_XARCH -#endif // FEATURE_SIMD - { - assert(initPattern == 0); - srcVal = m_compiler->gtNewZeroConNode(rep->AccessType); - break; - } - default: - unreached(); - } - - GenTree* lcl = m_compiler->gtNewLclvNode(rep->LclNum, rep->AccessType); - GenTree* asg = m_compiler->gtNewAssignNode(lcl, srcVal); - result->AddStatement(asg); - rep->Handled = true; + plan->InitReplacement(rep->LclNum, rep->Offset - dst->GetLclOffs(), rep->AccessType); + rep->NeedsWriteBack = true; + rep->NeedsReadBack = false; } } //------------------------------------------------------------------------ - // CopyIntoFields: - // Copy from a specified block source into the specified replacements. + // CopyBetweenFields: + // Copy between two struct locals that may involve replacements. // // Parameters: - // firstRep - The first replacement. - // endRep - End of the replacements. - // dst - Local containing the replacements. - // src - The block source. - // result - Statement list to add resulting statements to. + // dst - Destination node + // dstFirstRep - First replacement of the destination or nullptr if destination is not a promoted local. + // dstEndRep - One past last replacement of the destination. + // src - Source node + // srcFirstRep - First replacement of the source or nullptr if source is not a promoted local. + // srcEndRep - One past last replacement of the source. + // statements - Statement list to add potential "init" statements to. + // plan - Data structure that tracks the specific copies to be done. // - void CopyIntoFields( - Replacement* firstRep, Replacement* endRep, GenTreeLclVarCommon* dst, GenTree* src, StatementList* result) + void CopyBetweenFields(GenTree* dst, + Replacement* dstFirstRep, + Replacement* dstEndRep, + GenTree* src, + Replacement* srcFirstRep, + Replacement* srcEndRep, + DecompositionStatementList* statements, + DecompositionPlan* plan) { assert(src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD)); - if (src->OperIs(GT_BLK, GT_FIELD)) - { - GenTree* addr = src->gtGetOp1(); + GenTreeLclVarCommon* dstLcl = dst->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? dst->AsLclVarCommon() : nullptr; + GenTreeLclVarCommon* srcLcl = src->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? src->AsLclVarCommon() : nullptr; + unsigned dstBaseOffs = dstLcl != nullptr ? dstLcl->GetLclOffs() : 0; + unsigned srcBaseOffs = srcLcl != nullptr ? srcLcl->GetLclOffs() : 0; - if (addr->OperIsLocal() && (addr->AsLclVarCommon()->GetLclNum() != dst->GetLclNum())) - { - // We will introduce more uses of the address local, so it is - // no longer dying here. - addr->gtFlags &= ~GTF_VAR_DEATH; - } - else if (addr->IsInvariant()) - { - // Fall through - } - else - { - // TODO-CQ: Avoid this local if we only use the address once? A - // bit complicated since our caller may use the address too. - unsigned addrLcl = m_compiler->lvaGrabTemp(true DEBUGARG("Spilling address for field-by-field copy")); - result->AddStatement(m_compiler->gtNewTempAssign(addrLcl, addr)); - src->AsUnOp()->gtOp1 = m_compiler->gtNewLclvNode(addrLcl, addr->TypeGet()); - } - } + LclVarDsc* dstDsc = dstLcl != nullptr ? m_compiler->lvaGetDesc(dstLcl) : nullptr; + LclVarDsc* srcDsc = srcLcl != nullptr ? m_compiler->lvaGetDesc(srcLcl) : nullptr; - LclVarDsc* srcDsc = - src->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? m_compiler->lvaGetDesc(src->AsLclVarCommon()) : nullptr; + Replacement* dstRep = dstFirstRep; + Replacement* srcRep = srcFirstRep; - for (Replacement* rep = firstRep; rep < endRep; rep++) + while ((dstRep < dstEndRep) || (srcRep < srcEndRep)) { - assert(!rep->Handled); - assert(rep->Offset >= dst->GetLclOffs()); + if ((dstRep < dstEndRep) && (srcRep < srcEndRep)) + { + if (srcRep->Offset - srcBaseOffs + genTypeSize(srcRep->AccessType) < dstRep->Offset - dstBaseOffs) + { + // This source replacement ends before the next destination replacement starts. + // Write it directly to the destination struct local. + plan->CopyFromReplacement(srcRep->LclNum, srcRep->Offset - srcBaseOffs, srcRep->AccessType); + srcRep++; + continue; + } - unsigned srcOffs = rep->Offset - dst->GetLclOffs(); + if (dstRep->Offset - dstBaseOffs + genTypeSize(dstRep->AccessType) < srcRep->Offset - srcBaseOffs) + { + // Destination replacement ends before the next source replacement starts. + // Read it directly from the source struct local. + plan->CopyToReplacement(dstRep->LclNum, dstRep->Offset - dstBaseOffs, dstRep->AccessType); + dstRep->NeedsWriteBack = true; + dstRep->NeedsReadBack = false; + dstRep++; + continue; + } - GenTree* dstLcl = m_compiler->gtNewLclvNode(rep->LclNum, rep->AccessType); - GenTree* srcFld = nullptr; - if (srcDsc != nullptr) - { - srcOffs += src->AsLclVarCommon()->GetLclOffs(); + // Overlap. Check for exact match of replacements. + // TODO-CQ: Allow copies between small types of different signs, and between TYP_I_IMPL/TYP_BYREF? + if (((dstRep->Offset - dstBaseOffs) == (srcRep->Offset - srcBaseOffs)) && + (dstRep->AccessType == srcRep->AccessType)) + { + plan->CopyBetweenReplacements(dstRep->LclNum, srcRep->LclNum, dstRep->Offset - dstBaseOffs, + dstRep->AccessType); + dstRep->NeedsWriteBack = true; + dstRep->NeedsReadBack = false; + dstRep++; + srcRep++; + continue; + } + + // Partial overlap. Write source back to the struct local. We + // will handle the destination replacement in a future + // iteration of the loop. + statements->AddStatement(CreateWriteBack(srcLcl->GetLclNum(), *srcRep)); + srcRep++; + continue; + } - if (srcDsc->lvPromoted) + if (dstRep < dstEndRep) + { + if ((srcDsc != nullptr) && srcDsc->lvPromoted) { + unsigned srcOffs = srcLcl->GetLclOffs() + (dstRep->Offset - dstBaseOffs); unsigned fieldLcl = m_compiler->lvaGetFieldLocal(srcDsc, srcOffs); LclVarDsc* fieldLclDsc = m_compiler->lvaGetDesc(fieldLcl); - if (fieldLclDsc->lvType == rep->AccessType) + if (fieldLclDsc->lvType == dstRep->AccessType) { - srcFld = m_compiler->gtNewLclvNode(fieldLcl, fieldLclDsc->lvType); + plan->CopyBetweenReplacements(dstRep->LclNum, fieldLcl, dstRep->Offset - dstBaseOffs, + dstRep->AccessType); + dstRep->NeedsWriteBack = true; + dstRep->NeedsReadBack = false; + dstRep++; + continue; } } - if (srcFld == nullptr) - { - srcFld = m_compiler->gtNewLclFldNode(src->AsLclVarCommon()->GetLclNum(), rep->AccessType, srcOffs); - // TODO-CQ: This may be better left as a read back if the - // source is non-physically promoted. - m_compiler->lvaSetVarDoNotEnregister(src->AsLclVarCommon()->GetLclNum() - DEBUGARG(DoNotEnregisterReason::LocalField)); - } - - UpdateEarlyRefCount(srcFld); + // TODO-CQ: If the source is promoted then this will result in + // DNER'ing it. Alternatively we could copy the promoted field + // directly the destination's struct local and the overlapping + // fields as needing read back to avoid this DNER. + plan->CopyToReplacement(dstRep->LclNum, dstRep->Offset - dstBaseOffs, dstRep->AccessType); + dstRep->NeedsWriteBack = true; + dstRep->NeedsReadBack = false; + dstRep++; } else { - if (src->OperIs(GT_FIELD)) + assert(srcRep < srcEndRep); + if ((dstDsc != nullptr) && dstDsc->lvPromoted) { - srcOffs += src->AsField()->gtFldOffset; - } - - if ((rep == firstRep) && m_compiler->fgIsBigOffset(srcOffs) && - m_compiler->fgAddrCouldBeNull(src->gtGetOp1())) - { - GenTree* addrForNullCheck = m_compiler->gtCloneExpr(src->gtGetOp1()); - result->AddStatement(m_compiler->gtNewIndir(TYP_BYTE, addrForNullCheck)); - UpdateEarlyRefCount(addrForNullCheck); - } + unsigned dstOffs = dstLcl->GetLclOffs() + (srcRep->Offset - srcBaseOffs); + unsigned fieldLcl = m_compiler->lvaGetFieldLocal(dstDsc, dstOffs); + LclVarDsc* fieldLclDsc = m_compiler->lvaGetDesc(fieldLcl); - GenTree* addr = m_compiler->gtCloneExpr(src->gtGetOp1()); - UpdateEarlyRefCount(addr); - if (srcOffs != 0) - { - var_types addrType = varTypeIsGC(addr) ? TYP_BYREF : TYP_I_IMPL; - addr = m_compiler->gtNewOperNode(GT_ADD, addrType, addr, - m_compiler->gtNewIconNode(srcOffs, TYP_I_IMPL)); + if (fieldLclDsc->lvType == srcRep->AccessType) + { + plan->CopyBetweenReplacements(fieldLcl, srcRep->LclNum, srcRep->Offset - srcBaseOffs, + srcRep->AccessType); + srcRep++; + continue; + } } - GenTree* dstLcl = m_compiler->gtNewLclvNode(rep->LclNum, rep->AccessType); - srcFld = m_compiler->gtNewIndir(rep->AccessType, addr, src->gtFlags & GTF_IND_VOLATILE); - srcFld->gtFlags |= GTF_GLOB_REF; + plan->CopyFromReplacement(srcRep->LclNum, srcRep->Offset - srcBaseOffs, srcRep->AccessType); + srcRep++; } - - result->AddStatement(m_compiler->gtNewAssignNode(dstLcl, srcFld)); - rep->Handled = true; } } - //------------------------------------------------------------------------ - // UpdateEarlyRefCount: - // Update early ref counts if necessary for the specified IR node. - // - // Parameters: - // candidate - the IR node that may be a local that should have its early ref counts updated. - // - void UpdateEarlyRefCount(GenTree* candidate) - { - if (!candidate->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_LCL_ADDR)) - { - return; - } - - IncrementRefCount(candidate->AsLclVarCommon()->GetLclNum()); - - LclVarDsc* varDsc = m_compiler->lvaGetDesc(candidate->AsLclVarCommon()); - if (varDsc->lvIsStructField) - { - IncrementRefCount(varDsc->lvParentLcl); - } - - if (varDsc->lvPromoted) - { - for (unsigned fldLclNum = varDsc->lvFieldLclStart; fldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; - fldLclNum++) - { - IncrementRefCount(fldLclNum); - } - } - } - - //------------------------------------------------------------------------ - // IncrementRefCount: - // Increment the ref count for the specified local. - // - // Parameters: - // lclNum - the local - // - void IncrementRefCount(unsigned lclNum) - { - LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); - varDsc->incLvRefCntSaturating(1, RCS_EARLY); - } - //------------------------------------------------------------------------ // EliminateCommasInBlockOp: // Ensure that the sources of a block op are not commas by extracting side effects. @@ -1209,7 +1576,7 @@ class ReplaceVisitor : public GenTreeVisitor // Remarks: // Works similarly to MorphInitBlockHelper::EliminateCommas. // - void EliminateCommasInBlockOp(GenTreeOp* asg, StatementList* result) + void EliminateCommasInBlockOp(GenTreeOp* asg, DecompositionStatementList* result) { bool any = false; GenTree* lhs = asg->gtGetOp1(); diff --git a/src/tests/JIT/Directed/physicalpromotion/physicalpromotion.cs b/src/tests/JIT/Directed/physicalpromotion/physicalpromotion.cs index 6d06cedbbcc0c4..0e334903e75f10 100644 --- a/src/tests/JIT/Directed/physicalpromotion/physicalpromotion.cs +++ b/src/tests/JIT/Directed/physicalpromotion/physicalpromotion.cs @@ -10,7 +10,7 @@ public class PhysicalPromotion { [Fact] - public static unsafe void PartialOverlap1() + public static void PartialOverlap1() { S s = default; s.A = 0x10101010; @@ -23,7 +23,7 @@ public static unsafe void PartialOverlap1() private static S s_static = new S { A = 0x10101010, B = 0x20202020 }; [Fact] - public static unsafe void CopyFromLocalVar() + public static void CopyFromLocalVar() { S src = s_static; S dst; @@ -36,7 +36,7 @@ public static unsafe void CopyFromLocalVar() } [Fact] - public static unsafe void CopyFromLocalField() + public static void CopyFromLocalField() { SWithInner src; src.S = s_static; @@ -50,7 +50,7 @@ public static unsafe void CopyFromLocalField() } [Fact] - public static unsafe void CopyFromBlk() + public static void CopyFromBlk() { S dst; dst = s_static; @@ -61,6 +61,47 @@ public static unsafe void CopyFromBlk() Assert.Equal(0x20202020U, dst.B); } + [Fact] + public static void CopyToBlk() + { + S s = default; + CopyToBlkInner(ref s); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void CopyToBlkInner(ref S mutate) + { + S src = s_static; + src.A = src.B + 3; + src.B = 0x20202020; + mutate = src; + Assert.Equal(0x20202023U, mutate.A); + Assert.Equal(0x20202020U, mutate.B); + } + + private static VeryOverlapping _overlappy1 = new VeryOverlapping { F0 = 0x12345678, F4 = 0xdeadbeef }; + private static VeryOverlapping _overlappy2 = new VeryOverlapping { F1 = 0xde, F2 = 0x1357, F5 = 0x17, F7 = 0x42 }; + + [Fact] + public static void Overlappy() + { + VeryOverlapping lcl1 = _overlappy1; + VeryOverlapping lcl2 = _overlappy2; + VeryOverlapping lcl3 = _overlappy1; + + lcl1.F0 = lcl3.F0 + 3; + lcl1.F4 = lcl3.F0 + lcl3.F4; + + lcl3 = lcl1; + + lcl2.F1 = (byte)(lcl2.F2 + lcl2.F5 + lcl2.F7); + lcl1 = lcl2; + + Consume(lcl1); + Consume(lcl2); + Consume(lcl3); + } + [MethodImpl(MethodImplOptions.NoInlining)] private static void Consume(T val) { @@ -82,4 +123,25 @@ private struct SWithInner public int Field; public S S; } + + [StructLayout(LayoutKind.Explicit)] + private struct VeryOverlapping + { + [FieldOffset(0)] + public uint F0; + [FieldOffset(1)] + public byte F1; + [FieldOffset(2)] + public ushort F2; + [FieldOffset(3)] + public byte F3; + [FieldOffset(4)] + public uint F4; + [FieldOffset(5)] + public byte F5; + [FieldOffset(6)] + public ushort F6; + [FieldOffset(7)] + public byte F7; + } } From 09db21e901c2040441ed11f4c489d806ddc0188f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 25 Apr 2023 17:59:02 +0200 Subject: [PATCH 02/37] Handle no promoted field --- src/coreclr/jit/promotion.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index da96afa26ebd66..eaaa1a60e8c5fc 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -1519,9 +1519,8 @@ class ReplaceVisitor : public GenTreeVisitor { unsigned srcOffs = srcLcl->GetLclOffs() + (dstRep->Offset - dstBaseOffs); unsigned fieldLcl = m_compiler->lvaGetFieldLocal(srcDsc, srcOffs); - LclVarDsc* fieldLclDsc = m_compiler->lvaGetDesc(fieldLcl); - if (fieldLclDsc->lvType == dstRep->AccessType) + if ((fieldLcl != BAD_VAR_NUM) && m_compiler->lvaGetDesc(fieldLcl)->lvType == dstRep->AccessType) { plan->CopyBetweenReplacements(dstRep->LclNum, fieldLcl, dstRep->Offset - dstBaseOffs, dstRep->AccessType); @@ -1550,7 +1549,7 @@ class ReplaceVisitor : public GenTreeVisitor unsigned fieldLcl = m_compiler->lvaGetFieldLocal(dstDsc, dstOffs); LclVarDsc* fieldLclDsc = m_compiler->lvaGetDesc(fieldLcl); - if (fieldLclDsc->lvType == srcRep->AccessType) + if ((fieldLcl != BAD_VAR_NUM) && (m_compiler->lvaGetDesc(fieldLcl)->lvType == srcRep->AccessType)) { plan->CopyBetweenReplacements(fieldLcl, srcRep->LclNum, srcRep->Offset - srcBaseOffs, srcRep->AccessType); From 813fa83879f8bed9fd0654522784acb171e687e5 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 25 Apr 2023 17:59:10 +0200 Subject: [PATCH 03/37] Add test for no promoted field --- .../physicalpromotion/mixedpromotion.cs | 71 ++++++++++++++++--- 1 file changed, 60 insertions(+), 11 deletions(-) diff --git a/src/tests/JIT/Directed/physicalpromotion/mixedpromotion.cs b/src/tests/JIT/Directed/physicalpromotion/mixedpromotion.cs index a61402a3f097ad..35e16a0023474b 100644 --- a/src/tests/JIT/Directed/physicalpromotion/mixedpromotion.cs +++ b/src/tests/JIT/Directed/physicalpromotion/mixedpromotion.cs @@ -9,41 +9,85 @@ public class PhysicalPromotion { - private static S s_static = new S { A = 0x10101010, B = 0x20202020 }; + private static S s_static = new S { A = 0xdeadbeef, B = 0xcafebabe }; + private static S2 s_static2 = new S2 { A = 0x12, B = 0x34, C = 0x56, D = 0x78, E = 0x9A, F = 0xBC, G = 0xDE, H = 0xF0 }; [Fact] - public static unsafe void FromPhysicalToOld() + public static void FromPhysicalToOld() { SWithInner src; src.S = s_static; src.S.A = src.S.B + 3; - src.S.B = 0x20202020; + src.S.B = 0x21222324; S dst; dst = src.S; dst.A = dst.B + 3; - dst.B = 0x10101010; + dst.B = 0x11121314; Consume(dst); - Assert.Equal(0x20202023U, dst.A); - Assert.Equal(0x10101010U, dst.B); + Assert.Equal(0x21222327U, dst.A); + Assert.Equal(0x11121314U, dst.B); } [Fact] - public static unsafe void FromOldToPhysical() + public static void FromOldToPhysical() { S src; src = s_static; src.A = src.B + 3; - src.B = 0x20202020; + src.B = 0x21222324; SWithInner dst; dst.Field = 0; dst.S = src; dst.S.A = dst.S.B + 3; - dst.S.B = 0x10101010; + dst.S.B = 0x11121314; Consume(dst); - Assert.Equal(0x20202023U, dst.S.A); - Assert.Equal(0x10101010U, dst.S.B); + Assert.Equal(0x21222327U, dst.S.A); + Assert.Equal(0x11121314U, dst.S.B); + } + + [Fact] + public static unsafe void FromOldToPhysicalMismatched() + { + S src = s_static; + src.A = src.B + 3; + src.B = 0x21222324; + + S2 dst = s_static2; + dst.A = (byte)(dst.B + 2); + dst.B = (byte)(dst.C + 2); + dst.C = (byte)(dst.D + 2); + dst.D = (byte)(dst.E + 2); + dst.E = (byte)(dst.F + 2); + dst.F = (byte)(dst.G + 2); + dst.G = (byte)(dst.H + 2); + dst.H = (byte)(dst.A + 2); + Consume(dst); + + Assert.Equal(0xcafebac1U, src.A); + Assert.Equal(0x21222324U, src.B); + + Assert.Equal(0x36, dst.A); + Assert.Equal(0x58, dst.B); + Assert.Equal(0x7A, dst.C); + Assert.Equal(0x9C, dst.D); + Assert.Equal(0xBE, dst.E); + Assert.Equal(0xE0, dst.F); + Assert.Equal(0xF2, dst.G); + Assert.Equal(0x38, dst.H); + + dst = *(S2*)&src; + Consume(dst); + + Assert.Equal(0xc1, dst.A); + Assert.Equal(0xba, dst.B); + Assert.Equal(0xfe, dst.C); + Assert.Equal(0xca, dst.D); + Assert.Equal(0x24, dst.E); + Assert.Equal(0x23, dst.F); + Assert.Equal(0x22, dst.G); + Assert.Equal(0x21, dst.H); } [MethodImpl(MethodImplOptions.NoInlining)] @@ -57,6 +101,11 @@ private struct S public uint B; } + private struct S2 + { + public byte A, B, C, D, E, F, G, H; + } + private struct SWithInner { public int Field; From 5a372afbd65ebb91dd6a0d854567200b31a04889 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 25 Apr 2023 19:16:33 +0200 Subject: [PATCH 04/37] Source may no longer be dying --- src/coreclr/jit/promotion.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index eaaa1a60e8c5fc..c3bdc21594949e 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -1051,6 +1051,13 @@ class DecompositionPlan } statements->AddStatement(m_compiler->gtNewBlkOpNode(m_dst, m_src)); + + if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + // We will introduce uses of the source below so this struct + // copy is no longer the last use if it was before. + m_src->gtFlags &= ~GTF_VAR_DEATH; + } } if (needsNullCheck) @@ -1517,8 +1524,8 @@ class ReplaceVisitor : public GenTreeVisitor { if ((srcDsc != nullptr) && srcDsc->lvPromoted) { - unsigned srcOffs = srcLcl->GetLclOffs() + (dstRep->Offset - dstBaseOffs); - unsigned fieldLcl = m_compiler->lvaGetFieldLocal(srcDsc, srcOffs); + unsigned srcOffs = srcLcl->GetLclOffs() + (dstRep->Offset - dstBaseOffs); + unsigned fieldLcl = m_compiler->lvaGetFieldLocal(srcDsc, srcOffs); if ((fieldLcl != BAD_VAR_NUM) && m_compiler->lvaGetDesc(fieldLcl)->lvType == dstRep->AccessType) { From 5e548631884bdd7a63197a19aeda76cd5a8c321c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 25 Apr 2023 21:05:04 +0200 Subject: [PATCH 05/37] Fix a possible out of bounds in OverlappingReplacements --- src/coreclr/jit/promotion.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index c3bdc21594949e..d5abd9ea74e10f 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -1673,6 +1673,11 @@ class ReplaceVisitor : public GenTreeVisitor // Overlap with last entry starting before offs. firstIndex--; } + else if (firstIndex >= replacements.size()) + { + // Starts after last replacement ends. + return false; + } } const Replacement& first = replacements[firstIndex]; @@ -1683,7 +1688,7 @@ class ReplaceVisitor : public GenTreeVisitor } } - assert(replacements[firstIndex].Overlaps(offs, size)); + assert((firstIndex < replacements.size()) && replacements[firstIndex].Overlaps(offs, size)); *firstReplacement = &replacements[firstIndex]; if (endReplacement != nullptr) From 22aa3f7d42d530823354f594089ad3652359226b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 25 Apr 2023 22:59:56 +0200 Subject: [PATCH 06/37] Remove leftover code --- src/coreclr/jit/promotion.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index d5abd9ea74e10f..edfb4ddbde2739 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -1527,7 +1527,7 @@ class ReplaceVisitor : public GenTreeVisitor unsigned srcOffs = srcLcl->GetLclOffs() + (dstRep->Offset - dstBaseOffs); unsigned fieldLcl = m_compiler->lvaGetFieldLocal(srcDsc, srcOffs); - if ((fieldLcl != BAD_VAR_NUM) && m_compiler->lvaGetDesc(fieldLcl)->lvType == dstRep->AccessType) + if ((fieldLcl != BAD_VAR_NUM) && (m_compiler->lvaGetDesc(fieldLcl)->lvType == dstRep->AccessType)) { plan->CopyBetweenReplacements(dstRep->LclNum, fieldLcl, dstRep->Offset - dstBaseOffs, dstRep->AccessType); @@ -1554,7 +1554,6 @@ class ReplaceVisitor : public GenTreeVisitor { unsigned dstOffs = dstLcl->GetLclOffs() + (srcRep->Offset - srcBaseOffs); unsigned fieldLcl = m_compiler->lvaGetFieldLocal(dstDsc, dstOffs); - LclVarDsc* fieldLclDsc = m_compiler->lvaGetDesc(fieldLcl); if ((fieldLcl != BAD_VAR_NUM) && (m_compiler->lvaGetDesc(fieldLcl)->lvType == srcRep->AccessType)) { From 87e55d6195d4d88d9bf1a459a45af22a5842d77a Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 25 Apr 2023 23:52:43 +0200 Subject: [PATCH 07/37] Handle that source replacements may need a read back --- src/coreclr/jit/promotion.cpp | 74 +++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index edfb4ddbde2739..5f54251e6d6470 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -1303,17 +1303,21 @@ class ReplaceVisitor : public GenTreeVisitor unsigned dstLclSize = dstLcl->GetLayout(m_compiler)->GetSize(); if (dstFirstRep->Offset < dstLclOffs) { - JITDUMP("*** Block operation partially overlaps with destination %s. Write and read-backs are " - "necessary.\n", - dstFirstRep->Name); - // The value of the replacement will be partially assembled from its old value and this struct - // operation. - // We accomplish this by an initial write back, the struct copy, followed by a later read back. - // TODO-CQ: This is very expensive and unreflected in heuristics, but it is also very rare. - result.AddStatement(CreateWriteBack(dstLcl->GetLclNum(), *dstFirstRep)); - - dstFirstRep->NeedsWriteBack = false; - dstFirstRep->NeedsReadBack = true; + if (dstFirstRep->NeedsWriteBack) + { + JITDUMP("*** Block operation partially overlaps with destination %s. Write and read-backs are " + "necessary.\n", + dstFirstRep->Name); + // The value of the replacement will be partially assembled from its old value and this struct + // operation. + // We accomplish this by an initial write back, the struct copy, followed by a later read back. + // TODO-CQ: This is very expensive and unreflected in heuristics, but it is also very rare. + result.AddStatement(CreateWriteBack(dstLcl->GetLclNum(), *dstFirstRep)); + + dstFirstRep->NeedsWriteBack = false; + dstFirstRep->NeedsReadBack = true; + } + dstFirstRep++; } @@ -1322,13 +1326,18 @@ class ReplaceVisitor : public GenTreeVisitor Replacement* dstLastRep = dstEndRep - 1; if (dstLastRep->Offset + genTypeSize(dstLastRep->AccessType) > dstLclOffs + dstLclSize) { - JITDUMP("*** Block operation partially overlaps with destination %s. Write and read-backs are " + if (dstLastRep->NeedsWriteBack) + { + JITDUMP( + "*** Block operation partially overlaps with destination %s. Write and read-backs are " "necessary.\n", dstLastRep->Name); - result.AddStatement(CreateWriteBack(dstLcl->GetLclNum(), *dstLastRep)); + result.AddStatement(CreateWriteBack(dstLcl->GetLclNum(), *dstLastRep)); + + dstLastRep->NeedsWriteBack = false; + dstLastRep->NeedsReadBack = true; + } - dstLastRep->NeedsWriteBack = false; - dstLastRep->NeedsReadBack = true; dstEndRep--; } } @@ -1341,12 +1350,16 @@ class ReplaceVisitor : public GenTreeVisitor if (srcFirstRep->Offset < srcLclOffs) { - JITDUMP("*** Block operation partially overlaps with source %s. Write back is necessary.\n", - srcFirstRep->Name); + if (srcFirstRep->NeedsWriteBack) + { + JITDUMP("*** Block operation partially overlaps with source %s. Write back is necessary.\n", + srcFirstRep->Name); + + result.AddStatement(CreateWriteBack(srcLcl->GetLclNum(), *srcFirstRep)); - result.AddStatement(CreateWriteBack(srcLcl->GetLclNum(), *srcFirstRep)); + srcFirstRep->NeedsWriteBack = false; + } - srcFirstRep->NeedsWriteBack = false; srcFirstRep++; } @@ -1355,11 +1368,15 @@ class ReplaceVisitor : public GenTreeVisitor Replacement* srcLastRep = srcEndRep - 1; if (srcLastRep->Offset + genTypeSize(srcLastRep->AccessType) > srcLclOffs + srcLclSize) { - JITDUMP("*** Block operation partially overlaps with source %s. Write back is necessary.\n", - srcLastRep->Name); + if (srcLastRep->NeedsWriteBack) + { + JITDUMP("*** Block operation partially overlaps with source %s. Write back is necessary.\n", + srcLastRep->Name); + + result.AddStatement(CreateWriteBack(srcLcl->GetLclNum(), *srcLastRep)); + srcLastRep->NeedsWriteBack = false; + } - result.AddStatement(CreateWriteBack(srcLcl->GetLclNum(), *srcLastRep)); - srcLastRep->NeedsWriteBack = false; srcEndRep--; } } @@ -1476,6 +1493,13 @@ class ReplaceVisitor : public GenTreeVisitor while ((dstRep < dstEndRep) || (srcRep < srcEndRep)) { + if ((srcRep < srcEndRep) && srcRep->NeedsReadBack) + { + assert(srcLcl != nullptr); + statements->AddStatement(CreateReadBack(srcLcl->GetLclNum(), *srcRep)); + srcRep->NeedsReadBack = false; + } + if ((dstRep < dstEndRep) && (srcRep < srcEndRep)) { if (srcRep->Offset - srcBaseOffs + genTypeSize(srcRep->AccessType) < dstRep->Offset - dstBaseOffs) @@ -1552,8 +1576,8 @@ class ReplaceVisitor : public GenTreeVisitor assert(srcRep < srcEndRep); if ((dstDsc != nullptr) && dstDsc->lvPromoted) { - unsigned dstOffs = dstLcl->GetLclOffs() + (srcRep->Offset - srcBaseOffs); - unsigned fieldLcl = m_compiler->lvaGetFieldLocal(dstDsc, dstOffs); + unsigned dstOffs = dstLcl->GetLclOffs() + (srcRep->Offset - srcBaseOffs); + unsigned fieldLcl = m_compiler->lvaGetFieldLocal(dstDsc, dstOffs); if ((fieldLcl != BAD_VAR_NUM) && (m_compiler->lvaGetDesc(fieldLcl)->lvType == srcRep->AccessType)) { From 70defdcdc2567175a61c3b60f35e0b3cd9513da7 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 25 Apr 2023 23:56:58 +0200 Subject: [PATCH 08/37] Always read back partially overlapping destination --- src/coreclr/jit/promotion.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 5f54251e6d6470..27d16a56ec7a63 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -1315,9 +1315,9 @@ class ReplaceVisitor : public GenTreeVisitor result.AddStatement(CreateWriteBack(dstLcl->GetLclNum(), *dstFirstRep)); dstFirstRep->NeedsWriteBack = false; - dstFirstRep->NeedsReadBack = true; } + dstFirstRep->NeedsReadBack = true; dstFirstRep++; } @@ -1335,9 +1335,9 @@ class ReplaceVisitor : public GenTreeVisitor result.AddStatement(CreateWriteBack(dstLcl->GetLclNum(), *dstLastRep)); dstLastRep->NeedsWriteBack = false; - dstLastRep->NeedsReadBack = true; } + dstLastRep->NeedsReadBack = true; dstEndRep--; } } From fc2e53a7acbf60e377e9c7b1eeb0fc947fe19341 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 26 Apr 2023 15:18:04 +0200 Subject: [PATCH 09/37] JIT: Change GTF_ICON_INITCLASS -> GTF_IND_INITCLASS The JIT has a flag GTF_ICON_INITCLASS that represents that accesses off that address are cctor dependent. Hoisting uses this to avoid hoisting cctor dependent indirections unless all cctors are also hoisted. However, local constant prop/VN-based constant prop do not handle this flag, so we could run into cases where addresses with GTF_ICON_INITCLASS were propagated and then subsequently hoisted incorrectly. This change moves the flag to an OperIsIndir() flag instead of being a flag on the constant. After some digging, I found that the original reason the flag was not an indir flag was simply that there were no more indir flags available, but we do have available flags today. This fix is much simpler than the alternatives which would be to teach VN/local copy prop to propagate this GTF_ICON_INITCLASS flag. Also remove GTF_FLD_INITCLASS which is never set. --- src/coreclr/jit/compiler.cpp | 4 ++++ src/coreclr/jit/gentree.cpp | 22 +++++++++------------- src/coreclr/jit/gentree.h | 11 ++--------- src/coreclr/jit/importer.cpp | 18 ++++++++++-------- src/coreclr/jit/morph.cpp | 7 ------- src/coreclr/jit/optimizer.cpp | 8 +------- 6 files changed, 26 insertions(+), 44 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 6cf88c87b0a237..53357d0ecb7404 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -9766,6 +9766,10 @@ void cTreeFlags(Compiler* comp, GenTree* tree) { chars += printf("[IND_NONNULL]"); } + if (tree->gtFlags & GTF_IND_INITCLASS) + { + chars += printf("[IND_INITCLASS]"); + } break; case GT_MUL: diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 7083a26534bccc..6ca64529ccac9f 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -10597,6 +10597,12 @@ void Compiler::gtDispNode(GenTree* tree, IndentStack* indentStack, _In_ _In_opt_ --msgLength; break; } + if (tree->gtFlags & GTF_IND_INITCLASS) + { + printf("I"); + --msgLength; + break; + } if (tree->gtFlags & GTF_IND_INVARIANT) { printf("#"); @@ -10770,19 +10776,9 @@ void Compiler::gtDispNode(GenTree* tree, IndentStack* indentStack, _In_ _In_opt_ case GT_CNS_INT: if (tree->IsIconHandle()) { - if ((tree->gtFlags & GTF_ICON_INITCLASS) != 0) - { - printf("I"); // Static Field handle with INITCLASS requirement - --msgLength; - break; - } - else - { - // Some other handle - printf("H"); - --msgLength; - break; - } + printf("H"); + --msgLength; + break; } goto DASH; diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 87bd94b46b20d7..2dcbaec452457e 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -476,7 +476,6 @@ enum GenTreeFlags : unsigned int GTF_FLD_TLS = 0x80000000, // GT_FIELD_ADDR -- field address is a Windows x86 TLS reference GTF_FLD_VOLATILE = 0x40000000, // GT_FIELD -- same as GTF_IND_VOLATILE - GTF_FLD_INITCLASS = 0x20000000, // GT_FIELD/GT_FIELD_ADDR -- field access requires preceding class/static init helper GTF_FLD_TGT_HEAP = 0x10000000, // GT_FIELD -- same as GTF_IND_TGT_HEAP GTF_INX_RNGCHK = 0x80000000, // GT_INDEX_ADDR -- this array address should be range-checked @@ -494,9 +493,10 @@ enum GenTreeFlags : unsigned int GTF_IND_UNALIGNED = 0x02000000, // OperIsIndir() -- the load or store is unaligned (we assume worst case alignment of 1 byte) GTF_IND_INVARIANT = 0x01000000, // GT_IND -- the target is invariant (a prejit indirection) GTF_IND_NONNULL = 0x00400000, // GT_IND -- the indirection never returns null (zero) + GTF_IND_INITCLASS = 0x00200000, // OperIsIndir() -- the indirection requires preceding static cctor GTF_IND_FLAGS = GTF_IND_VOLATILE | GTF_IND_NONFAULTING | GTF_IND_UNALIGNED | GTF_IND_INVARIANT | - GTF_IND_NONNULL | GTF_IND_TGT_NOT_HEAP | GTF_IND_TGT_HEAP, + GTF_IND_NONNULL | GTF_IND_TGT_NOT_HEAP | GTF_IND_TGT_HEAP | GTF_IND_INITCLASS, GTF_ADDRMODE_NO_CSE = 0x80000000, // GT_ADD/GT_MUL/GT_LSH -- Do not CSE this node only, forms complex // addressing mode @@ -547,13 +547,6 @@ enum GenTreeFlags : unsigned int // GTF_ICON_REUSE_REG_VAL = 0x00800000 // GT_CNS_INT -- GTF_REUSE_REG_VAL, defined above GTF_ICON_SIMD_COUNT = 0x00200000, // GT_CNS_INT -- constant is Vector.Count - GTF_ICON_INITCLASS = 0x00100000, // GT_CNS_INT -- Constant is used to access a static that requires preceding - // class/static init helper. In some cases, the constant is - // the address of the static field itself, and in other cases - // there's an extra layer of indirection and it is the address - // of the cell that the runtime will fill in with the address - // of the static field; in both of those cases, the constant - // is what gets flagged. GTF_OVERFLOW = 0x10000000, // Supported for: GT_ADD, GT_SUB, GT_MUL and GT_CAST. // Requires an overflow check. Use gtOverflow(Ex)() to check this flag. diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 5a82719562fed6..a68d52a872539c 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -4167,10 +4167,11 @@ GenTree* Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolvedT outerFldSeq = nullptr; } - bool isHoistable = false; - bool isStaticReadOnlyInitedRef = false; - unsigned typeIndex = 0; - GenTree* op1; + bool isHoistable = false; + bool isStaticReadOnlyInitedRef = false; + GenTreeFlags indirFlags = GTF_EMPTY; + unsigned typeIndex = 0; + GenTree* op1; switch (pFieldInfo->fieldAccessor) { case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER: @@ -4348,7 +4349,7 @@ GenTree* Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolvedT INDEBUG(op1->AsIntCon()->gtTargetHandle = reinterpret_cast(pResolvedToken->hField)); if (pFieldInfo->fieldFlags & CORINFO_FLG_FIELD_INITCLASS) { - op1->gtFlags |= GTF_ICON_INITCLASS; + indirFlags |= GTF_IND_INITCLASS; } break; } @@ -4356,8 +4357,9 @@ GenTree* Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolvedT if (isBoxedStatic) { - op1 = gtNewIndir(TYP_REF, op1, GTF_IND_INVARIANT | GTF_IND_NONFAULTING | GTF_IND_NONNULL); - op1 = gtNewOperNode(GT_ADD, TYP_BYREF, op1, gtNewIconNode(TARGET_POINTER_SIZE, outerFldSeq)); + op1 = gtNewIndir(TYP_REF, op1, GTF_IND_INVARIANT | GTF_IND_NONFAULTING | GTF_IND_NONNULL | indirFlags); + indirFlags = GTF_EMPTY; + op1 = gtNewOperNode(GT_ADD, TYP_BYREF, op1, gtNewIconNode(TARGET_POINTER_SIZE, outerFldSeq)); } if (!(access & CORINFO_ACCESS_ADDRESS)) @@ -4366,7 +4368,7 @@ GenTree* Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolvedT lclTyp = TypeHandleToVarType(pFieldInfo->fieldType, pFieldInfo->structType, &layout); // TODO-CQ: mark the indirections non-faulting. - op1 = (lclTyp == TYP_STRUCT) ? gtNewBlkIndir(layout, op1) : gtNewIndir(lclTyp, op1); + op1 = (lclTyp == TYP_STRUCT) ? gtNewBlkIndir(layout, op1, indirFlags) : gtNewIndir(lclTyp, op1, indirFlags); if (isStaticReadOnlyInitedRef) { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index ea5838af5870db..459edc6dfed42b 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -5347,13 +5347,6 @@ GenTree* Compiler::fgMorphExpandTlsFieldAddr(GenTree* tree) // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] GenTree* tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL); - // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS - if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0) - { - tree->gtFlags &= ~GTF_FLD_INITCLASS; - tlsRef->gtFlags |= GTF_ICON_INITCLASS; - } - tlsRef = gtNewIndir(TYP_I_IMPL, tlsRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); if (dllRef != nullptr) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 58f5c549a8b505..c32ab445e2794d 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -7502,13 +7502,7 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo return fgWalkResult::WALK_CONTINUE; } - // Initclass CLS_VARs and IconHandles are the base cases of cctor dependent trees. - // In the IconHandle case, it's of course the dereference, rather than the constant itself, that is - // truly dependent on the cctor. So a more precise approach would be to separately propagate - // isCctorDependent and isAddressWhoseDereferenceWouldBeCctorDependent, but we don't for - // simplicity/throughput; the constant itself would be considered non-hoistable anyway, since - // optIsCSEcandidate returns false for constants. - bool treeIsCctorDependent = tree->OperIs(GT_CNS_INT) && ((tree->gtFlags & GTF_ICON_INITCLASS) != 0); + bool treeIsCctorDependent = tree->OperIsIndir() && ((tree->gtFlags & GTF_IND_INITCLASS) != 0); bool treeIsInvariant = true; bool treeHasHoistableChildren = false; int childCount; From 2b7440a8c6174fc9851c9037506ba1481a454edc Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 26 Apr 2023 16:01:40 +0200 Subject: [PATCH 10/37] Propagate indir flags --- src/coreclr/jit/promotion.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 27d16a56ec7a63..bed43d0381002f 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -941,8 +941,9 @@ class DecompositionPlan m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD)); bool coversDestination = CoversDestination(); - GenTree* addr = nullptr; - unsigned addrBaseOffs = 0; + GenTree* addr = nullptr; + unsigned addrBaseOffs = 0; + GenTreeFlags indirFlags = GTF_EMPTY; if (m_dst->OperIs(GT_BLK, GT_FIELD)) { @@ -961,6 +962,9 @@ class DecompositionPlan { addrBaseOffs = m_src->AsField()->gtFldOffset; } + + indirFlags = + m_src->gtFlags & (GTF_IND_VOLATILE | GTF_IND_NONFAULTING | GTF_IND_UNALIGNED | GTF_IND_INITCLASS); } int numAddrUses = addr == nullptr ? 0 : (m_entries.Height() + (coversDestination ? 0 : 1)); @@ -1062,7 +1066,9 @@ class DecompositionPlan if (needsNullCheck) { - statements->AddStatement(m_compiler->gtNewIndir(TYP_BYTE, grabAddr(addrBaseOffs))); + GenTreeIndir* indir = m_compiler->gtNewIndir(TYP_BYTE, grabAddr(addrBaseOffs)); + indir->gtFlags |= indirFlags & ~GTF_IND_UNALIGNED; + statements->AddStatement(indir); } for (int i = 0; i < m_entries.Height(); i++) @@ -1095,6 +1101,7 @@ class DecompositionPlan { GenTree* addr = grabAddr(addrBaseOffs + entry.Offset); dst = m_compiler->gtNewIndir(entry.Type, addr); + dst->gtFlags |= indirFlags; } } @@ -1122,6 +1129,7 @@ class DecompositionPlan { GenTree* addr = grabAddr(addrBaseOffs + entry.Offset); src = m_compiler->gtNewIndir(entry.Type, addr); + src->gtFlags |= indirFlags; } } From 05c848c6e2f01368111ba0c84146f8cb1d837b66 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 26 Apr 2023 16:14:28 +0200 Subject: [PATCH 11/37] More correct propagation of flags --- src/coreclr/jit/promotion.cpp | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index bed43d0381002f..12e35f909e66ba 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -953,6 +953,8 @@ class DecompositionPlan { addrBaseOffs = m_dst->AsField()->gtFldOffset; } + + indirFlags = GetPropagatedIndirFlags(m_dst); } else if (m_src->OperIs(GT_BLK, GT_FIELD)) { @@ -963,8 +965,7 @@ class DecompositionPlan addrBaseOffs = m_src->AsField()->gtFldOffset; } - indirFlags = - m_src->gtFlags & (GTF_IND_VOLATILE | GTF_IND_NONFAULTING | GTF_IND_UNALIGNED | GTF_IND_INITCLASS); + indirFlags = GetPropagatedIndirFlags(m_src); } int numAddrUses = addr == nullptr ? 0 : (m_entries.Height() + (coversDestination ? 0 : 1)); @@ -1067,7 +1068,7 @@ class DecompositionPlan if (needsNullCheck) { GenTreeIndir* indir = m_compiler->gtNewIndir(TYP_BYTE, grabAddr(addrBaseOffs)); - indir->gtFlags |= indirFlags & ~GTF_IND_UNALIGNED; + PropagateIndirFlags(indir, indirFlags); statements->AddStatement(indir); } @@ -1101,7 +1102,7 @@ class DecompositionPlan { GenTree* addr = grabAddr(addrBaseOffs + entry.Offset); dst = m_compiler->gtNewIndir(entry.Type, addr); - dst->gtFlags |= indirFlags; + PropagateIndirFlags(dst, indirFlags); } } @@ -1129,7 +1130,7 @@ class DecompositionPlan { GenTree* addr = grabAddr(addrBaseOffs + entry.Offset); src = m_compiler->gtNewIndir(entry.Type, addr); - src->gtFlags |= indirFlags; + PropagateIndirFlags(src, indirFlags); } } @@ -1139,6 +1140,28 @@ class DecompositionPlan assert(numAddrUses == 0); } + GenTreeFlags GetPropagatedIndirFlags(GenTree* indir) + { + assert(indir->OperIs(GT_BLK, GT_FIELD)); + if (indir->OperIs(GT_BLK)) + { + return indir->gtFlags & (GTF_IND_VOLATILE | GTF_IND_NONFAULTING | GTF_IND_UNALIGNED | GTF_IND_INITCLASS); + } + + static_assert_no_msg(GTF_FLD_VOLATILE == GTF_IND_VOLATILE); + return indir->gtFlags & GTF_IND_VOLATILE; + } + + void PropagateIndirFlags(GenTree* indir, GenTreeFlags flags) + { + if (genTypeSize(indir) == 1) + { + flags &= ~GTF_IND_UNALIGNED; + } + + indir->gtFlags |= flags; + } + //------------------------------------------------------------------------ // UpdateEarlyRefCount: // Update early ref counts if necessary for the specified IR node. From 58c9d82e4b8e8c2d71a64372f6beaf9dd04b4994 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 26 Apr 2023 16:17:38 +0200 Subject: [PATCH 12/37] Rename scenarios --- eng/pipelines/common/templates/runtimes/run-test-job.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eng/pipelines/common/templates/runtimes/run-test-job.yml b/eng/pipelines/common/templates/runtimes/run-test-job.yml index 025649429c385a..5311529ed52d47 100644 --- a/eng/pipelines/common/templates/runtimes/run-test-job.yml +++ b/eng/pipelines/common/templates/runtimes/run-test-job.yml @@ -583,8 +583,8 @@ jobs: - jitpartialcompilation - jitpartialcompilation_pgo - jitobjectstackallocation - - jitgeneralizedpromotion - - jitgeneralizedpromotion_full + - jitphysicalpromotion + - jitphysicalpromotion_full ${{ if in(parameters.testGroup, 'jit-cfg') }}: scenarios: From 043df4642a14b91a6952941bf48849becd1148e5 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 27 Apr 2023 11:19:37 +0200 Subject: [PATCH 13/37] Do struct op last when possible --- src/coreclr/jit/promotion.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 12e35f909e66ba..442dfe262e6b4b 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -717,10 +717,11 @@ class DecompositionPlan ArrayStack m_entries; GenTree* m_dst; GenTree* m_src; + bool m_srcInvolvesReplacements; public: - DecompositionPlan(Compiler* comp, GenTree* dst, GenTree* src) - : m_compiler(comp), m_entries(comp->getAllocator(CMK_Promotion)), m_dst(dst), m_src(src) + DecompositionPlan(Compiler* comp, GenTree* dst, GenTree* src, bool srcInvolvesReplacements) + : m_compiler(comp), m_entries(comp->getAllocator(CMK_Promotion)), m_dst(dst), m_src(src), m_srcInvolvesReplacements(srcInvolvesReplacements) { } @@ -1054,7 +1055,14 @@ class DecompositionPlan // Like above, use 0 intentionally here. m_dst->AsUnOp()->gtOp1 = grabAddr(0); } + } + // If the source involves replacements then do the struct op first -- + // otherwise we would overwrite the destination with stale bits. + // If the source does not involve replacements then CQ analysis shows + // that it's best to do it last. + if (!coversDestination && m_srcInvolvesReplacements) + { statements->AddStatement(m_compiler->gtNewBlkOpNode(m_dst, m_src)); if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) @@ -1137,6 +1145,11 @@ class DecompositionPlan statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); } + if (!coversDestination && !m_srcInvolvesReplacements) + { + statements->AddStatement(m_compiler->gtNewBlkOpNode(m_dst, m_src)); + } + assert(numAddrUses == 0); } @@ -1413,7 +1426,7 @@ class ReplaceVisitor : public GenTreeVisitor } } - DecompositionPlan plan(m_compiler, dst, src); + DecompositionPlan plan(m_compiler, dst, src, srcInvolvesReplacements); if (src->IsConstInitVal()) { From 33b899b1bdc220544ed5deaa71a2f6c10933c5b8 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 27 Apr 2023 13:50:16 +0200 Subject: [PATCH 14/37] Add an assert, fix a comment --- src/coreclr/jit/promotion.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 442dfe262e6b4b..71a0211082d250 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -1542,6 +1542,7 @@ class ReplaceVisitor : public GenTreeVisitor assert(srcLcl != nullptr); statements->AddStatement(CreateReadBack(srcLcl->GetLclNum(), *srcRep)); srcRep->NeedsReadBack = false; + assert(!srcRep->NeedsWriteBack); } if ((dstRep < dstEndRep) && (srcRep < srcEndRep)) @@ -1608,8 +1609,8 @@ class ReplaceVisitor : public GenTreeVisitor // TODO-CQ: If the source is promoted then this will result in // DNER'ing it. Alternatively we could copy the promoted field - // directly the destination's struct local and the overlapping - // fields as needing read back to avoid this DNER. + // directly to the destination's struct local and mark the + // overlapping fields as needing read back to avoid this DNER. plan->CopyToReplacement(dstRep->LclNum, dstRep->Offset - dstBaseOffs, dstRep->AccessType); dstRep->NeedsWriteBack = true; dstRep->NeedsReadBack = false; From 3b0d92eed12e25a88934834e8c804f6197af6529 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 27 Apr 2023 23:30:16 +0200 Subject: [PATCH 15/37] Handle remainders --- src/coreclr/jit/jitstd/vector.h | 1 - src/coreclr/jit/promotion.cpp | 455 ++++++++++++++++++++++++++++++-- src/coreclr/jit/utils.cpp | 13 + src/coreclr/jit/utils.h | 6 + 4 files changed, 454 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/jitstd/vector.h b/src/coreclr/jit/jitstd/vector.h index 268ce3a0c43e85..e73314150ee476 100644 --- a/src/coreclr/jit/jitstd/vector.h +++ b/src/coreclr/jit/jitstd/vector.h @@ -479,7 +479,6 @@ typename vector::iterator assert(last.m_pElem >= m_pArray); assert(first.m_pElem <= m_pArray + m_nSize); assert(last.m_pElem <= m_pArray + m_nSize); - assert(last.m_pElem > first.m_pElem); pointer fptr = first.m_pElem; pointer lptr = last.m_pElem; diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 71a0211082d250..897796f0e05c09 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -703,6 +703,208 @@ class DecompositionStatementList } }; +class StructSegments +{ +public: + struct Interval + { + unsigned Start = 0; + unsigned End = 0; + + Interval() + { + } + + Interval(unsigned start, unsigned end) + : Start(start), End(end) + { + } + + bool IntersectsInclusive(const Interval& other) const + { + if (End < other.Start) + { + return false; + } + + if (other.End < Start) + { + return false; + } + + return true; + } + + bool Contains(const Interval& other) const + { + return other.Start >= Start && other.End <= End; + } + + void Merge(const Interval& other) + { + Start = min(Start, other.Start); + End = max(End, other.End); + } + }; + +private: + jitstd::vector m_intervals; + +public: + StructSegments(CompAllocator allocator) : m_intervals(allocator) + { + } + + void Add(const Interval& interval) + { + size_t index = BinarySearch(m_intervals, interval.Start); + + if ((ssize_t)index < 0) + { + index = ~index; + } + + m_intervals.insert(m_intervals.begin() + index, interval); + size_t endIndex; + for (endIndex = index + 1; endIndex < m_intervals.size(); endIndex++) + { + if (!m_intervals[index].IntersectsInclusive(m_intervals[endIndex])) + { + break; + } + + m_intervals[index].Merge(m_intervals[endIndex]); + } + + m_intervals.erase(m_intervals.begin() + index + 1, m_intervals.begin() + endIndex); + } + + void Subtract(const Interval& interval) + { + size_t index = BinarySearch(m_intervals, interval.Start); + if ((ssize_t)index < 0) + { + index = ~index; + } + else + { + // Start == interval[index].End, which makes it non-interesting. + index++; + } + + if (index >= m_intervals.size()) + { + return; + } + + // Here we know Start < interval[index].End. Do they not intersect at all? + if (m_intervals[index].Start >= interval.End) + { + // Does not intersect any segment. + return; + } + + assert(m_intervals[index].IntersectsInclusive(interval)); + + if (m_intervals[index].Contains(interval)) + { + if (interval.Start > m_intervals[index].Start) + { + // New interval (existing.Start, interval.Start) + if (interval.End < m_intervals[index].End) + { + m_intervals.insert(m_intervals.begin() + index, Interval(m_intervals[index].Start, interval.Start)); + + // And new interval (interval.End, existing.End) + m_intervals[index + 1].Start = interval.End; + return; + } + + m_intervals[index].End = interval.Start; + return; + } + if (interval.End < m_intervals[index].End) + { + // New interval (interval.End, existing.End) + m_intervals[index].Start = interval.End; + return; + } + + // Full interval is being removed + m_intervals.erase(m_intervals.begin() + index); + return; + } + + if (interval.Start > m_intervals[index].Start) + { + m_intervals[index].End = interval.Start; + index++; + } + + size_t endIndex = BinarySearch(m_intervals, interval.End); + if ((ssize_t)endIndex >= 0) + { + m_intervals.erase(m_intervals.begin() + index, m_intervals.begin() + endIndex + 1); + return; + } + + endIndex = ~endIndex; + if (endIndex == m_intervals.size()) + { + m_intervals.erase(m_intervals.begin() + index, m_intervals.end()); + return; + } + + if (interval.End > m_intervals[endIndex].Start) + { + m_intervals[endIndex].Start = interval.End; + } + + m_intervals.erase(m_intervals.begin() + index, m_intervals.begin() + endIndex); + } + +#ifdef DEBUG + void Check(FixedBitVect* vect) + { + bool first = true; + unsigned last = 0; + for (const Interval& interval : m_intervals) + { + assert(first || (last < interval.Start)); + assert(interval.End <= vect->bitVectGetSize()); + + for (unsigned i = last; i < interval.Start; i++) + assert(!vect->bitVectTest(i)); + + for (unsigned i = interval.Start; i < interval.End; i++) + assert(vect->bitVectTest(i)); + + first = false; + last = interval.End; + } + + for (unsigned i = last, size = vect->bitVectGetSize(); i < size; i++) + assert(!vect->bitVectTest(i)); + } +#endif + + bool IsEmpty() + { + return m_intervals.size() == 0; + } + + bool IsSingleInterval(Interval* result) + { + if (m_intervals.size() == 1) + { + *result = m_intervals[0]; + return true; + } + + return false; + } +}; + class DecompositionPlan { struct Entry @@ -822,6 +1024,10 @@ class DecompositionPlan // bool CoversDestination() { + StructSegments segments = GetRemainder(); + + ClassLayout* dstLayout = m_dst->GetLayout(m_compiler); + unsigned prevEnd = 0; unsigned dstLclOffs = 0; if (m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD)) @@ -855,6 +1061,164 @@ class DecompositionPlan return prevEnd == m_dst->GetLayout(m_compiler)->GetSize(); } + StructSegments GetRemainder() + { + ClassLayout* dstLayout = m_dst->GetLayout(m_compiler); + + StructSegments segments(m_compiler->getAllocator(CMK_Promotion)); + + INDEBUG(FixedBitVect* segmentBitVect = FixedBitVect::bitVectInit(dstLayout->GetSize(), m_compiler)); + + COMP_HANDLE compHnd = m_compiler->info.compCompHnd; + + bool significantPadding; + if (dstLayout->IsBlockLayout()) + { + significantPadding = true; + } + else + { + uint32_t attribs = compHnd->getClassAttribs(dstLayout->GetClassHandle()); + if ((attribs & (CORINFO_FLG_INDEXABLE_FIELDS | CORINFO_FLG_DONT_DIG_FIELDS)) != 0) + { + significantPadding = true; + } + else if (((attribs & CORINFO_FLG_CUSTOMLAYOUT) != 0) && ((attribs & CORINFO_FLG_CONTAINS_GC_PTR) == 0)) + { + significantPadding = true; + } + else + { + significantPadding = false; + } + } + + if (significantPadding) + { + segments.Add(StructSegments::Interval(0, dstLayout->GetSize())); + +#ifdef DEBUG + for (unsigned i = 0; i < dstLayout->GetSize(); i++) + segmentBitVect->bitVectSet(i); +#endif + } + else + { + unsigned numFields = compHnd->getClassNumInstanceFields(dstLayout->GetClassHandle()); + for (unsigned i = 0; i < numFields; i++) + { + CORINFO_FIELD_HANDLE fieldHnd = compHnd->getFieldInClass(dstLayout->GetClassHandle(), (int)i); + unsigned fldOffset = compHnd->getFieldOffset(fieldHnd); + CORINFO_CLASS_HANDLE fieldClassHandle; + CorInfoType corType = compHnd->getFieldType(fieldHnd, &fieldClassHandle); + var_types varType = JITtype2varType(corType); + unsigned size = genTypeSize(varType); + if (size == 0) + { + // TODO-CQ: Recursively handle padding in sub structures + // here. Might be better to introduce a single JIT-EE call + // to query the significant segments -- that would also be + // usable by R2R even outside the version bubble in many + // cases. + size = compHnd->getClassSize(fieldClassHandle); + assert(size != 0); + } + + segments.Add(StructSegments::Interval(fldOffset, fldOffset + size)); +#ifdef DEBUG + for (unsigned i = 0; i < size; i++) + segmentBitVect->bitVectSet(fldOffset + i); +#endif + } + } + + for (int i = 0; i < m_entries.Height(); i++) + { + const Entry& entry = m_entries.BottomRef(i); + + segments.Subtract(StructSegments::Interval(entry.Offset, entry.Offset + genTypeSize(entry.Type))); + +#ifdef DEBUG + for (unsigned i = 0; i < genTypeSize(entry.Type); i++) + segmentBitVect->bitVectClear(entry.Offset + i); +#endif + } + + INDEBUG(segments.Check(segmentBitVect)); + + return segments; + } + + struct RemainderStrategy + { + enum + { + NoRemainder, + Primitive, + FullBlock, + }; + + int Type; + unsigned PrimitiveOffset; + var_types PrimitiveType; + + RemainderStrategy(int type, unsigned primitiveOffset = 0, var_types primitiveType = TYP_UNDEF) + : Type(type), PrimitiveOffset(primitiveOffset), PrimitiveType(primitiveType) + { + } + }; + + RemainderStrategy DetermineRemainderStrategy() + { + StructSegments remainder = GetRemainder(); + if (remainder.IsEmpty()) + { + return RemainderStrategy(RemainderStrategy::NoRemainder); + } + + StructSegments::Interval interval; + // See if we can "plug the hole" with a single primitive. For LCL_VAR + // destinations do not do this as it will essentially add a use of the + // local due to the partial def -- so it is better to prefer the full + // def that DCE might be able to get rid of. + if (remainder.IsSingleInterval(&interval) && !m_dst->OperIs(GT_LCL_VAR)) + { + var_types primitiveType = TYP_UNDEF; + unsigned size = interval.End - interval.Start; + switch (size) + { + case 1: + primitiveType = TYP_UBYTE; + break; + case 2: + primitiveType = TYP_USHORT; + break; +#ifdef TARGET_64BIT + case 4: + primitiveType = TYP_INT; + break; +#endif + case TARGET_POINTER_SIZE: + primitiveType = TYP_I_IMPL; + if ((interval.Start % TARGET_POINTER_SIZE) == 0) + { + ClassLayout* dstLayout = m_dst->GetLayout(m_compiler); + primitiveType = dstLayout->GetGCPtrType(interval.Start / TARGET_POINTER_SIZE); + } + break; + + // TODO-CQ: SIMD sizes + } + + if (primitiveType != TYP_UNDEF) + { + return RemainderStrategy(RemainderStrategy::Primitive, interval.Start, primitiveType); + } + } + + return RemainderStrategy(RemainderStrategy::FullBlock); + } + //------------------------------------------------------------------------ // FinalizeInit: // Create IR to perform the decomposed initialization. @@ -940,7 +1304,8 @@ class DecompositionPlan { assert(m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD) && m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD)); - bool coversDestination = CoversDestination(); + + RemainderStrategy remainderStrategy = DetermineRemainderStrategy(); GenTree* addr = nullptr; unsigned addrBaseOffs = 0; @@ -969,20 +1334,31 @@ class DecompositionPlan indirFlags = GetPropagatedIndirFlags(m_src); } - int numAddrUses = addr == nullptr ? 0 : (m_entries.Height() + (coversDestination ? 0 : 1)); + int numAddrUses = 0; - // If the destination is fully covered we may need a null check for the GT_FIELD case. - // If the destination is not covered then the initial struct copy is enough. - bool needsNullCheck = coversDestination && (addr != nullptr) && m_compiler->fgAddrCouldBeNull(addr); - - if (needsNullCheck) + if (addr != nullptr) { - // See if our first indirection will subsume the null check (usual case). - assert(m_entries.Height() > 0); - const Entry& entry = m_entries.BottomRef(0); + numAddrUses += m_entries.Height(); + + if (remainderStrategy.Type != RemainderStrategy::NoRemainder) + numAddrUses++; + } - assert((entry.FromLclNum == BAD_VAR_NUM) || (entry.ToLclNum == BAD_VAR_NUM)); - needsNullCheck = m_compiler->fgIsBigOffset(addrBaseOffs + entry.Offset); + bool needsNullCheck = false; + if ((addr != nullptr) && m_compiler->fgAddrCouldBeNull(addr)) + { + switch (remainderStrategy.Type) + { + case RemainderStrategy::NoRemainder: + case RemainderStrategy::Primitive: + // See if our first indirection will subsume the null check (usual case). + assert(m_entries.Height() > 0); + const Entry& entry = m_entries.BottomRef(0); + + assert((entry.FromLclNum == BAD_VAR_NUM) || (entry.ToLclNum == BAD_VAR_NUM)); + needsNullCheck = m_compiler->fgIsBigOffset(addrBaseOffs + entry.Offset); + break; + } } if (needsNullCheck) @@ -1038,11 +1414,10 @@ class DecompositionPlan return addrUse; }; - if (!coversDestination) + if (remainderStrategy.Type == RemainderStrategy::FullBlock) { - // Note that this does not handle partially overlapping copies, - // but that is left undefined (and normal block copies do not - // handle this either). + // We will reuse the existing block op's operands. Rebase the + // address off of the new local we created. if (m_src->OperIs(GT_BLK, GT_FIELD)) { // Note that we should use 0 instead of addrBaseOffs here @@ -1061,7 +1436,7 @@ class DecompositionPlan // otherwise we would overwrite the destination with stale bits. // If the source does not involve replacements then CQ analysis shows // that it's best to do it last. - if (!coversDestination && m_srcInvolvesReplacements) + if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && m_srcInvolvesReplacements) { statements->AddStatement(m_compiler->gtNewBlkOpNode(m_dst, m_src)); @@ -1145,11 +1520,40 @@ class DecompositionPlan statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); } - if (!coversDestination && !m_srcInvolvesReplacements) + if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && !m_srcInvolvesReplacements) { statements->AddStatement(m_compiler->gtNewBlkOpNode(m_dst, m_src)); } + if (remainderStrategy.Type == RemainderStrategy::Primitive) + { + GenTree* dst; + if (m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + dst = m_compiler->gtNewLclFldNode(m_dst->AsLclVarCommon()->GetLclNum(), remainderStrategy.PrimitiveType, remainderStrategy.PrimitiveOffset); + m_compiler->lvaSetVarDoNotEnregister(m_dst->AsLclVarCommon()->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); + } + else + { + dst = m_compiler->gtNewIndir(remainderStrategy.PrimitiveType, grabAddr(remainderStrategy.PrimitiveOffset)); + PropagateIndirFlags(dst, indirFlags); + } + + GenTree* src; + if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + src = m_compiler->gtNewLclFldNode(m_src->AsLclVarCommon()->GetLclNum(), remainderStrategy.PrimitiveType, remainderStrategy.PrimitiveOffset); + m_compiler->lvaSetVarDoNotEnregister(m_src->AsLclVarCommon()->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); + } + else + { + src = m_compiler->gtNewIndir(remainderStrategy.PrimitiveType, grabAddr(remainderStrategy.PrimitiveOffset)); + PropagateIndirFlags(src, indirFlags); + } + + statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); + } + assert(numAddrUses == 0); } @@ -1551,7 +1955,12 @@ class ReplaceVisitor : public GenTreeVisitor { // This source replacement ends before the next destination replacement starts. // Write it directly to the destination struct local. - plan->CopyFromReplacement(srcRep->LclNum, srcRep->Offset - srcBaseOffs, srcRep->AccessType); + //plan->CopyFromReplacement(srcRep->LclNum, srcRep->Offset - srcBaseOffs, srcRep->AccessType); + if (srcRep->NeedsWriteBack) + { + statements->AddStatement(CreateWriteBack(src->AsLclVarCommon()->GetLclNum(), *srcRep)); + srcRep->NeedsWriteBack = false; + } srcRep++; continue; } @@ -1633,7 +2042,13 @@ class ReplaceVisitor : public GenTreeVisitor } } - plan->CopyFromReplacement(srcRep->LclNum, srcRep->Offset - srcBaseOffs, srcRep->AccessType); + //plan->CopyFromReplacement(srcRep->LclNum, srcRep->Offset - srcBaseOffs, srcRep->AccessType); + if (srcRep->NeedsWriteBack) + { + statements->AddStatement(CreateWriteBack(src->AsLclVarCommon()->GetLclNum(), *srcRep)); + srcRep->NeedsWriteBack = false; + } + srcRep++; } } diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index 5dcb315af0a35d..22c7a847fc7fb5 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -1054,6 +1054,19 @@ void FixedBitVect::bitVectSet(UINT bitNum) bitVect[index] |= bitNumToBit(bitNum); } +// bitVectClear() - Clears the given bit +void FixedBitVect::bitVectClear(UINT bitNum) +{ + UINT index; + + assert(bitNum <= bitVectSize); + + index = bitNum / bitChunkSize(); + bitNum -= index * bitChunkSize(); + + bitVect[index] &= ~bitNumToBit(bitNum); +} + // bitVectTest() - Tests the given bit bool FixedBitVect::bitVectTest(UINT bitNum) { diff --git a/src/coreclr/jit/utils.h b/src/coreclr/jit/utils.h index 7fd3e7d10f8840..46a411a3991dfe 100644 --- a/src/coreclr/jit/utils.h +++ b/src/coreclr/jit/utils.h @@ -264,9 +264,15 @@ class FixedBitVect // bitVectInit() - Initializes a bit vector of a given size static FixedBitVect* bitVectInit(UINT size, Compiler* comp); + // bitVectGetSize() - Get number of bits in the bit set + UINT bitVectGetSize() { return bitVectSize; } + // bitVectSet() - Sets the given bit void bitVectSet(UINT bitNum); + // bitVectClear() - Clears the given bit + void bitVectClear(UINT bitNum); + // bitVectTest() - Tests the given bit bool bitVectTest(UINT bitNum); From 63ec4fbcbbf0308ce0971aa4f430115a50c09765 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 28 Apr 2023 15:06:55 +0200 Subject: [PATCH 16/37] Add logging, add docs, generalize to initialization --- src/coreclr/jit/promotion.cpp | 634 ++++++++++++++++++++++------------ 1 file changed, 411 insertions(+), 223 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 897796f0e05c09..31e90e0735751c 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -148,15 +148,15 @@ struct Replacement // back before transferring control if necessary. bool NeedsReadBack = false; #ifdef DEBUG - const char* Name; + const char* Description; #endif - Replacement(unsigned offset, var_types accessType, unsigned lclNum DEBUGARG(const char* name)) + Replacement(unsigned offset, var_types accessType, unsigned lclNum DEBUGARG(const char* description)) : Offset(offset) , AccessType(accessType) , LclNum(lclNum) #ifdef DEBUG - , Name(name) + , Description(description) #endif { } @@ -674,6 +674,7 @@ class LocalsUseVisitor : public GenTreeVisitor } }; +// Represents a list of statements; this is the result of assignment decomposition. class DecompositionStatementList { GenTree* m_head = nullptr; @@ -703,24 +704,29 @@ class DecompositionStatementList } }; +// Represents significant segments of a struct operation. +// +// Essentially a segment tree (but not stored as a tree) that supports boolean +// Add/Subtract operations of segments. Used to compute the remainder after +// replacements have been handled as part of a decomposed block operation. class StructSegments { public: - struct Interval + struct Segment { unsigned Start = 0; unsigned End = 0; - Interval() + Segment() { } - Interval(unsigned start, unsigned end) + Segment(unsigned start, unsigned end) : Start(start), End(end) { } - bool IntersectsInclusive(const Interval& other) const + bool IntersectsInclusive(const Segment& other) const { if (End < other.Start) { @@ -735,12 +741,12 @@ class StructSegments return true; } - bool Contains(const Interval& other) const + bool Contains(const Segment& other) const { return other.Start >= Start && other.End <= End; } - void Merge(const Interval& other) + void Merge(const Segment& other) { Start = min(Start, other.Start); End = max(End, other.End); @@ -748,163 +754,231 @@ class StructSegments }; private: - jitstd::vector m_intervals; + jitstd::vector m_segments; public: - StructSegments(CompAllocator allocator) : m_intervals(allocator) + StructSegments(CompAllocator allocator) : m_segments(allocator) { } - void Add(const Interval& interval) + //------------------------------------------------------------------------ + // Add: + // Add a segment to the data structure. + // + // Parameters: + // segment - The segment to add. + // + void Add(const Segment& segment) { - size_t index = BinarySearch(m_intervals, interval.Start); + size_t index = BinarySearch(m_segments, segment.Start); if ((ssize_t)index < 0) { index = ~index; } - m_intervals.insert(m_intervals.begin() + index, interval); + m_segments.insert(m_segments.begin() + index, segment); size_t endIndex; - for (endIndex = index + 1; endIndex < m_intervals.size(); endIndex++) + for (endIndex = index + 1; endIndex < m_segments.size(); endIndex++) { - if (!m_intervals[index].IntersectsInclusive(m_intervals[endIndex])) + if (!m_segments[index].IntersectsInclusive(m_segments[endIndex])) { break; } - m_intervals[index].Merge(m_intervals[endIndex]); + m_segments[index].Merge(m_segments[endIndex]); } - m_intervals.erase(m_intervals.begin() + index + 1, m_intervals.begin() + endIndex); + m_segments.erase(m_segments.begin() + index + 1, m_segments.begin() + endIndex); } - void Subtract(const Interval& interval) + //------------------------------------------------------------------------ + // Subtract: + // Subtract a segment from the data structure. + // + // Parameters: + // segment - The segment to subtract. + // + void Subtract(const Segment& segment) { - size_t index = BinarySearch(m_intervals, interval.Start); + size_t index = BinarySearch(m_segments, segment.Start); if ((ssize_t)index < 0) { index = ~index; } else { - // Start == interval[index].End, which makes it non-interesting. + // Start == segment[index].End, which makes it non-interesting. index++; } - if (index >= m_intervals.size()) + if (index >= m_segments.size()) { return; } - // Here we know Start < interval[index].End. Do they not intersect at all? - if (m_intervals[index].Start >= interval.End) + // Here we know Start < segment[index].End. Do they not intersect at all? + if (m_segments[index].Start >= segment.End) { // Does not intersect any segment. return; } - assert(m_intervals[index].IntersectsInclusive(interval)); + assert(m_segments[index].IntersectsInclusive(segment)); - if (m_intervals[index].Contains(interval)) + if (m_segments[index].Contains(segment)) { - if (interval.Start > m_intervals[index].Start) + if (segment.Start > m_segments[index].Start) { - // New interval (existing.Start, interval.Start) - if (interval.End < m_intervals[index].End) + // New segment (existing.Start, segment.Start) + if (segment.End < m_segments[index].End) { - m_intervals.insert(m_intervals.begin() + index, Interval(m_intervals[index].Start, interval.Start)); + m_segments.insert(m_segments.begin() + index, Segment(m_segments[index].Start, segment.Start)); - // And new interval (interval.End, existing.End) - m_intervals[index + 1].Start = interval.End; + // And new segment (segment.End, existing.End) + m_segments[index + 1].Start = segment.End; return; } - m_intervals[index].End = interval.Start; + m_segments[index].End = segment.Start; return; } - if (interval.End < m_intervals[index].End) + if (segment.End < m_segments[index].End) { - // New interval (interval.End, existing.End) - m_intervals[index].Start = interval.End; + // New segment (segment.End, existing.End) + m_segments[index].Start = segment.End; return; } - // Full interval is being removed - m_intervals.erase(m_intervals.begin() + index); + // Full segment is being removed + m_segments.erase(m_segments.begin() + index); return; } - if (interval.Start > m_intervals[index].Start) + if (segment.Start > m_segments[index].Start) { - m_intervals[index].End = interval.Start; + m_segments[index].End = segment.Start; index++; } - size_t endIndex = BinarySearch(m_intervals, interval.End); + size_t endIndex = BinarySearch(m_segments, segment.End); if ((ssize_t)endIndex >= 0) { - m_intervals.erase(m_intervals.begin() + index, m_intervals.begin() + endIndex + 1); + m_segments.erase(m_segments.begin() + index, m_segments.begin() + endIndex + 1); return; } endIndex = ~endIndex; - if (endIndex == m_intervals.size()) + if (endIndex == m_segments.size()) { - m_intervals.erase(m_intervals.begin() + index, m_intervals.end()); + m_segments.erase(m_segments.begin() + index, m_segments.end()); return; } - if (interval.End > m_intervals[endIndex].Start) + if (segment.End > m_segments[endIndex].Start) { - m_intervals[endIndex].Start = interval.End; + m_segments[endIndex].Start = segment.End; } - m_intervals.erase(m_intervals.begin() + index, m_intervals.begin() + endIndex); + m_segments.erase(m_segments.begin() + index, m_segments.begin() + endIndex); + } + + //------------------------------------------------------------------------ + // IsEmpty: + // Check if the segment tree is empty. + // + // Returns: + // True if so. + // + bool IsEmpty() + { + return m_segments.size() == 0; + } + + //------------------------------------------------------------------------ + // IsSingleSegment: + // Check if the segment tree contains only a single segment, and return + // it if so. + // + // Parameters: + // result - [out] The single segment. Only valid if the method returns true. + // + // Returns: + // True if so. + // + bool IsSingleSegment(Segment* result) + { + if (m_segments.size() == 1) + { + *result = m_segments[0]; + return true; + } + + return false; } #ifdef DEBUG + //------------------------------------------------------------------------ + // Check: + // Validate that the data structure is normalized and that it equals a + // specific fixed bit vector. + // + // Parameters: + // vect - The bit vector + // + // Remarks: + // This validates that the internal representation is normalized (i.e. + // all adjacent intervals are merged) and that it contains an index iff + // the specified vector contains that index. + // void Check(FixedBitVect* vect) { bool first = true; unsigned last = 0; - for (const Interval& interval : m_intervals) + for (const Segment& segment : m_segments) { - assert(first || (last < interval.Start)); - assert(interval.End <= vect->bitVectGetSize()); + assert(first || (last < segment.Start)); + assert(segment.End <= vect->bitVectGetSize()); - for (unsigned i = last; i < interval.Start; i++) + for (unsigned i = last; i < segment.Start; i++) assert(!vect->bitVectTest(i)); - for (unsigned i = interval.Start; i < interval.End; i++) + for (unsigned i = segment.Start; i < segment.End; i++) assert(vect->bitVectTest(i)); first = false; - last = interval.End; + last = segment.End; } for (unsigned i = last, size = vect->bitVectGetSize(); i < size; i++) assert(!vect->bitVectTest(i)); } -#endif - bool IsEmpty() - { - return m_intervals.size() == 0; - } - - bool IsSingleInterval(Interval* result) + //------------------------------------------------------------------------ + // Dump: + // Dump a string representation of the segment tree to stdout. + // + void Dump() { - if (m_intervals.size() == 1) + if (m_segments.size() == 0) { - *result = m_intervals[0]; - return true; + printf(""); + } + else + { + const char* sep = ""; + for (const Segment& segment : m_segments) + { + printf("%s[%03x..%03x)", sep, segment.Start, segment.End); + sep = " "; + } } - - return false; } +#endif }; +// Represents a plan for decomposing a block operation into direct treatment of +// replacement fields and the remainder. class DecompositionPlan { struct Entry @@ -999,7 +1073,7 @@ class DecompositionPlan // void Finalize(DecompositionStatementList* statements) { - if (m_src->IsConstInitVal()) + if (IsInit()) { FinalizeInit(statements); } @@ -1009,83 +1083,99 @@ class DecompositionPlan } } -private: //------------------------------------------------------------------------ - // CoversDestination: - // Check if the destination is fully defined by the entries that have - // been added to the current plan. + // CanInitPrimitive: + // Check if we can handle initializing a primitive of the specified type. + // For example, we cannot directly initialize SIMD types to non-zero + // constants. + // + // Parameters: + // type - The primitive type // // Returns: // True if so. // - // Remarks: - // When the plan fully covers the destination the remaining struct - // copy/initialization can be omitted. - // - bool CoversDestination() + bool CanInitPrimitive(var_types type) { - StructSegments segments = GetRemainder(); - - ClassLayout* dstLayout = m_dst->GetLayout(m_compiler); - - unsigned prevEnd = 0; - unsigned dstLclOffs = 0; - if (m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + assert(IsInit()); + if (varTypeIsGC(type) || varTypeIsSIMD(type)) { - dstLclOffs = m_dst->AsLclVarCommon()->GetLclOffs(); + return GetInitPattern() == 0; } - for (int i = 0; i < m_entries.Height(); i++) - { - const Entry& entry = m_entries.BottomRef(i); - - unsigned writeBegin = entry.Offset; - unsigned writeSize = genTypeSize(entry.Type); - - assert(writeBegin >= prevEnd); - - if (writeBegin > prevEnd) - { - // Uncovered hole from [prevEnd..writeBegin). - // TODO-CQ: In many cases it's more efficient to "plug" the holes. However, - // it is made more complicated by the fact that the holes can contain GC pointers in them and - // we cannot (yet) represent custom class layouts with GC pointers in them. - // TODO-CQ: Many of these cases are just padding. We should handle structs with insignificant - // padding here. - return false; - } + return true; + } - prevEnd = writeBegin + writeSize; - } +private: + //------------------------------------------------------------------------ + // IsInit: + // Check if this is an init block operation. + // + // Returns: + // True if so. + // + bool IsInit() + { + return m_src->IsConstInitVal(); + } - return prevEnd == m_dst->GetLayout(m_compiler)->GetSize(); + //------------------------------------------------------------------------ + // GetInitPattern: + // For an init block operation, get the pattern to init with. + // + // Returns: + // Byte pattern broadcast into every byte of a 64-bit int. + // + int64_t GetInitPattern() + { + assert(IsInit()); + GenTree* cns = m_src->OperIsInitVal() ? m_src->gtGetOp1() : m_src; + int64_t pattern = int64_t(cns->AsIntCon()->IconValue() & 0xFF) * 0x0101010101010101LL; + return pattern; } - StructSegments GetRemainder() + //------------------------------------------------------------------------ + // ComputeRemainder: + // Compute the remainder of the block operation that needs to be inited + // or copied after the replacements stored in the plan have been handled. + // + // Returns: + // Segments representing the remainder. + // + // Remarks: + // This function takes into account that insignificant padding does not + // need to be considered part of the remainder. For example, the last 4 + // bytes of Span on 64-bit are not returned as the remainder. + // + StructSegments ComputeRemainder() { ClassLayout* dstLayout = m_dst->GetLayout(m_compiler); - StructSegments segments(m_compiler->getAllocator(CMK_Promotion)); - - INDEBUG(FixedBitVect* segmentBitVect = FixedBitVect::bitVectInit(dstLayout->GetSize(), m_compiler)); - COMP_HANDLE compHnd = m_compiler->info.compCompHnd; bool significantPadding; if (dstLayout->IsBlockLayout()) { significantPadding = true; + JITDUMP(" Block op has significant padding due to block layout\n"); } else { uint32_t attribs = compHnd->getClassAttribs(dstLayout->GetClassHandle()); - if ((attribs & (CORINFO_FLG_INDEXABLE_FIELDS | CORINFO_FLG_DONT_DIG_FIELDS)) != 0) + if ((attribs & CORINFO_FLG_INDEXABLE_FIELDS) != 0) + { + significantPadding = true; + JITDUMP(" Block op has significant padding due to indexable fields\n"); + } + else if ((attribs & CORINFO_FLG_DONT_DIG_FIELDS) != 0) { significantPadding = true; + JITDUMP(" Block op has significant padding due to CORINFO_FLG_DONT_DIG_FIELDS\n"); } else if (((attribs & CORINFO_FLG_CUSTOMLAYOUT) != 0) && ((attribs & CORINFO_FLG_CONTAINS_GC_PTR) == 0)) { significantPadding = true; + JITDUMP(" Block op has significant padding due to CUSTOMLAYOUT without GC pointers\n"); } else { @@ -1093,9 +1183,14 @@ class DecompositionPlan } } + StructSegments segments(m_compiler->getAllocator(CMK_Promotion)); + + // Validate with "obviously correct" but less scalable fixed bit vector implementation. + INDEBUG(FixedBitVect* segmentBitVect = FixedBitVect::bitVectInit(dstLayout->GetSize(), m_compiler)); + if (significantPadding) { - segments.Add(StructSegments::Interval(0, dstLayout->GetSize())); + segments.Add(StructSegments::Segment(0, dstLayout->GetSize())); #ifdef DEBUG for (unsigned i = 0; i < dstLayout->GetSize(); i++) @@ -1124,7 +1219,7 @@ class DecompositionPlan assert(size != 0); } - segments.Add(StructSegments::Interval(fldOffset, fldOffset + size)); + segments.Add(StructSegments::Segment(fldOffset, fldOffset + size)); #ifdef DEBUG for (unsigned i = 0; i < size; i++) segmentBitVect->bitVectSet(fldOffset + i); @@ -1132,11 +1227,14 @@ class DecompositionPlan } } + // TODO-TP: Cache above StructSegments per class layout and just clone + // it there before the following subtract operations. + for (int i = 0; i < m_entries.Height(); i++) { const Entry& entry = m_entries.BottomRef(i); - segments.Subtract(StructSegments::Interval(entry.Offset, entry.Offset + genTypeSize(entry.Type))); + segments.Subtract(StructSegments::Segment(entry.Offset, entry.Offset + genTypeSize(entry.Type))); #ifdef DEBUG for (unsigned i = 0; i < genTypeSize(entry.Type); i++) @@ -1144,11 +1242,22 @@ class DecompositionPlan #endif } - INDEBUG(segments.Check(segmentBitVect)); +#ifdef DEBUG + segments.Check(segmentBitVect); + + if (m_compiler->verbose) + { + printf(" Remainder: "); + segments.Dump(); + printf("\n"); + } +#endif return segments; } + // Represents the strategy for handling the remainder part of the block + // operation. struct RemainderStrategy { enum @@ -1168,23 +1277,36 @@ class DecompositionPlan } }; + //------------------------------------------------------------------------ + // DetermineRemainderStrategy: + // Determine the strategy to use to handle the remaining parts of the struct + // once replacements have been handled. + // + // Returns: + // Type describing how it should be handled; for example, by a full block + // copy (that may be redundant with some of the replacements, but covers + // the rest of the remainder); or by handling a specific 'hole' as a + // primitive. + // RemainderStrategy DetermineRemainderStrategy() { - StructSegments remainder = GetRemainder(); + StructSegments remainder = ComputeRemainder(); if (remainder.IsEmpty()) { + JITDUMP(" => remainder strategy: do nothing\n"); return RemainderStrategy(RemainderStrategy::NoRemainder); } - StructSegments::Interval interval; + StructSegments::Segment segment; // See if we can "plug the hole" with a single primitive. For LCL_VAR // destinations do not do this as it will essentially add a use of the // local due to the partial def -- so it is better to prefer the full // def that DCE might be able to get rid of. - if (remainder.IsSingleInterval(&interval) && !m_dst->OperIs(GT_LCL_VAR)) + // TODO-CQ: Once we have liveness we can unlock this for LCL_VARs. + if (remainder.IsSingleSegment(&segment) && !m_dst->OperIs(GT_LCL_VAR)) { var_types primitiveType = TYP_UNDEF; - unsigned size = interval.End - interval.Start; + unsigned size = segment.End - segment.Start; switch (size) { case 1: @@ -1200,10 +1322,10 @@ class DecompositionPlan #endif case TARGET_POINTER_SIZE: primitiveType = TYP_I_IMPL; - if ((interval.Start % TARGET_POINTER_SIZE) == 0) + if ((segment.Start % TARGET_POINTER_SIZE) == 0) { ClassLayout* dstLayout = m_dst->GetLayout(m_compiler); - primitiveType = dstLayout->GetGCPtrType(interval.Start / TARGET_POINTER_SIZE); + primitiveType = dstLayout->GetGCPtrType(segment.Start / TARGET_POINTER_SIZE); } break; @@ -1212,10 +1334,19 @@ class DecompositionPlan if (primitiveType != TYP_UNDEF) { - return RemainderStrategy(RemainderStrategy::Primitive, interval.Start, primitiveType); + if (!IsInit() || CanInitPrimitive(primitiveType)) + { + JITDUMP(" => remainder strategy: %s at %03x\n", varTypeName(primitiveType), segment.Start); + return RemainderStrategy(RemainderStrategy::Primitive, segment.Start, primitiveType); + } + else + { + JITDUMP("Cannot handle initing remainder as primitive of type %s\n", varTypeName(primitiveType)); + } } } + JITDUMP(" => remainder strategy: retain a full block op\n"); return RemainderStrategy(RemainderStrategy::FullBlock); } @@ -1229,68 +1360,92 @@ class DecompositionPlan void FinalizeInit(DecompositionStatementList* statements) { GenTree* cns = m_src->OperIsInitVal() ? m_src->gtGetOp1() : m_src; - int64_t initPattern = int64_t(cns->AsIntCon()->IconValue() & 0xFF) * 0x0101010101010101LL; + int64_t initPattern = GetInitPattern(); for (int i = 0; i < m_entries.Height(); i++) { const Entry& entry = m_entries.BottomRef(i); - GenTree* srcVal; - switch (entry.Type) - { - case TYP_BOOL: - case TYP_BYTE: - case TYP_UBYTE: - case TYP_SHORT: - case TYP_USHORT: - case TYP_INT: - { - int64_t mask = (int64_t(1) << (genTypeSize(entry.Type) * 8)) - 1; - srcVal = m_compiler->gtNewIconNode(static_cast(initPattern & mask)); - break; - } - case TYP_LONG: - srcVal = m_compiler->gtNewLconNode(initPattern); - break; - case TYP_FLOAT: - float floatPattern; - memcpy(&floatPattern, &initPattern, sizeof(floatPattern)); - srcVal = m_compiler->gtNewDconNode(floatPattern, TYP_FLOAT); - break; - case TYP_DOUBLE: - double doublePattern; - memcpy(&doublePattern, &initPattern, sizeof(doublePattern)); - srcVal = m_compiler->gtNewDconNode(doublePattern); - break; - case TYP_REF: - case TYP_BYREF: -#ifdef FEATURE_SIMD - case TYP_SIMD8: - case TYP_SIMD12: - case TYP_SIMD16: -#if defined(TARGET_XARCH) - case TYP_SIMD32: - case TYP_SIMD64: -#endif // TARGET_XARCH -#endif // FEATURE_SIMD - { - assert(initPattern == 0); - srcVal = m_compiler->gtNewZeroConNode(entry.Type); - break; - } - default: - unreached(); - } assert(entry.ToLclNum != BAD_VAR_NUM); + GenTree* src = CreateInitValue(entry.Type, initPattern); GenTree* dst = m_compiler->gtNewLclvNode(entry.ToLclNum, entry.Type); - statements->AddStatement(m_compiler->gtNewAssignNode(dst, srcVal)); + statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); } - if (!CoversDestination()) + RemainderStrategy remainderStrategy = DetermineRemainderStrategy(); + if (remainderStrategy.Type == RemainderStrategy::FullBlock) { GenTree* asg = m_compiler->gtNewBlkOpNode(m_dst, cns); statements->AddStatement(asg); } + else if (remainderStrategy.Type == RemainderStrategy::Primitive) + { + GenTree* src = CreateInitValue(remainderStrategy.PrimitiveType, initPattern); + GenTreeLclVarCommon* dstLcl = m_dst->AsLclVarCommon(); + GenTree* dst = m_compiler->gtNewLclFldNode(dstLcl->GetLclNum(), remainderStrategy.PrimitiveType, dstLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); + m_compiler->lvaSetVarDoNotEnregister(dstLcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); + statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); + } + } + + //------------------------------------------------------------------------ + // CreateInitValue: + // Create an IR node representing a constant value with the specified init pattern. + // + // Parameters: + // type - The primitive type + // initPattern - Pattern to init with + // + // Returns: + // A constant. + // + // Remarks: + // Should only be called when that pattern can actually be represented; + // for example, SIMD types and GC pointers only support an init pattern + // of zero. + // + GenTree* CreateInitValue(var_types type, int64_t initPattern) + { + switch (type) + { + case TYP_BOOL: + case TYP_BYTE: + case TYP_UBYTE: + case TYP_SHORT: + case TYP_USHORT: + case TYP_INT: + { + int64_t mask = (int64_t(1) << (genTypeSize(type) * 8)) - 1; + return m_compiler->gtNewIconNode(static_cast(initPattern & mask)); + } + case TYP_LONG: + return m_compiler->gtNewLconNode(initPattern); + case TYP_FLOAT: + float floatPattern; + memcpy(&floatPattern, &initPattern, sizeof(floatPattern)); + return m_compiler->gtNewDconNode(floatPattern, TYP_FLOAT); + case TYP_DOUBLE: + double doublePattern; + memcpy(&doublePattern, &initPattern, sizeof(doublePattern)); + return m_compiler->gtNewDconNode(doublePattern); + case TYP_REF: + case TYP_BYREF: +#ifdef FEATURE_SIMD + case TYP_SIMD8: + case TYP_SIMD12: + case TYP_SIMD16: +#if defined(TARGET_XARCH) + case TYP_SIMD32: + case TYP_SIMD64: +#endif // TARGET_XARCH +#endif // FEATURE_SIMD + { + assert(initPattern == 0); + return m_compiler->gtNewZeroConNode(type); + } + default: + unreached(); + } } //------------------------------------------------------------------------ @@ -1530,8 +1685,9 @@ class DecompositionPlan GenTree* dst; if (m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD)) { - dst = m_compiler->gtNewLclFldNode(m_dst->AsLclVarCommon()->GetLclNum(), remainderStrategy.PrimitiveType, remainderStrategy.PrimitiveOffset); - m_compiler->lvaSetVarDoNotEnregister(m_dst->AsLclVarCommon()->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); + GenTreeLclVarCommon* dstLcl = dst->AsLclVarCommon(); + dst = m_compiler->gtNewLclFldNode(dstLcl->GetLclNum(), remainderStrategy.PrimitiveType, dstLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); + m_compiler->lvaSetVarDoNotEnregister(dstLcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); } else { @@ -1542,8 +1698,9 @@ class DecompositionPlan GenTree* src; if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) { - src = m_compiler->gtNewLclFldNode(m_src->AsLclVarCommon()->GetLclNum(), remainderStrategy.PrimitiveType, remainderStrategy.PrimitiveOffset); - m_compiler->lvaSetVarDoNotEnregister(m_src->AsLclVarCommon()->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); + GenTreeLclVarCommon* srcLcl = src->AsLclVarCommon(); + src = m_compiler->gtNewLclFldNode(srcLcl->GetLclNum(), remainderStrategy.PrimitiveType, srcLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); + m_compiler->lvaSetVarDoNotEnregister(srcLcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); } else { @@ -1557,6 +1714,17 @@ class DecompositionPlan assert(numAddrUses == 0); } + //------------------------------------------------------------------------ + // GetPropagatedIndirFlags: + // Convert GT_BLK or GT_FIELD indir flags into flags that should be + // propagated to derived GT_IND nodes. + // + // Parameters: + // indir - The indirection + // + // Returns: + // Flags to propagate to created derived GT_IND nodes. + // GenTreeFlags GetPropagatedIndirFlags(GenTree* indir) { assert(indir->OperIs(GT_BLK, GT_FIELD)); @@ -1569,6 +1737,14 @@ class DecompositionPlan return indir->gtFlags & GTF_IND_VOLATILE; } + //------------------------------------------------------------------------ + // PropagateIndirFlags: + // Propagate the specified flags to a GT_IND node. + // + // Parameters: + // indir - The indirection to apply flags to + // flags - The specified indirection flags. + // void PropagateIndirFlags(GenTree* indir, GenTreeFlags flags) { if (genTypeSize(indir) == 1) @@ -1753,9 +1929,9 @@ class ReplaceVisitor : public GenTreeVisitor { if (dstFirstRep->NeedsWriteBack) { - JITDUMP("*** Block operation partially overlaps with destination %s. Write and read-backs are " + JITDUMP("*** Block operation partially overlaps with destination V%02u (%s). Write and read-backs are " "necessary.\n", - dstFirstRep->Name); + dstFirstRep->LclNum, dstFirstRep->Description); // The value of the replacement will be partially assembled from its old value and this struct // operation. // We accomplish this by an initial write back, the struct copy, followed by a later read back. @@ -1777,9 +1953,9 @@ class ReplaceVisitor : public GenTreeVisitor if (dstLastRep->NeedsWriteBack) { JITDUMP( - "*** Block operation partially overlaps with destination %s. Write and read-backs are " + "*** Block operation partially overlaps with destination V%02u (%s). Write and read-backs are " "necessary.\n", - dstLastRep->Name); + dstLastRep->LclNum, dstLastRep->Description); result.AddStatement(CreateWriteBack(dstLcl->GetLclNum(), *dstLastRep)); dstLastRep->NeedsWriteBack = false; @@ -1800,8 +1976,8 @@ class ReplaceVisitor : public GenTreeVisitor { if (srcFirstRep->NeedsWriteBack) { - JITDUMP("*** Block operation partially overlaps with source %s. Write back is necessary.\n", - srcFirstRep->Name); + JITDUMP("*** Block operation partially overlaps with source V%02u (%s). Write back is necessary.\n", + srcFirstRep->LclNum, srcFirstRep->Description); result.AddStatement(CreateWriteBack(srcLcl->GetLclNum(), *srcFirstRep)); @@ -1818,8 +1994,8 @@ class ReplaceVisitor : public GenTreeVisitor { if (srcLastRep->NeedsWriteBack) { - JITDUMP("*** Block operation partially overlaps with source %s. Write back is necessary.\n", - srcLastRep->Name); + JITDUMP("*** Block operation partially overlaps with source V%02u (%s). Write back is necessary.\n", + srcLastRep->LclNum, srcLastRep->Description); result.AddStatement(CreateWriteBack(srcLcl->GetLclNum(), *srcLastRep)); srcLastRep->NeedsWriteBack = false; @@ -1834,9 +2010,7 @@ class ReplaceVisitor : public GenTreeVisitor if (src->IsConstInitVal()) { - GenTree* cns = src->OperIsInitVal() ? src->gtGetOp1() : src; - InitFields(dst->AsLclVarCommon(), dstFirstRep, dstEndRep, - static_cast(cns->AsIntCon()->IconValue()), &plan); + InitFields(dst->AsLclVarCommon(), dstFirstRep, dstEndRep, &plan); } else { @@ -1872,31 +2046,30 @@ class ReplaceVisitor : public GenTreeVisitor // directly inited, and mark the other ones as requiring read back. // // Parameters: + // dst - Destination local that involves replacement. // firstRep - The first replacement. // endRep - End of the replacements. - // initVal - byte pattern to init with - // result - Statement list to add resulting statements to. - // - // Remarks: - // Sets Replacement::Handled if the replacement was handled and IR was - // created to initialize it with the correct value. + // plan - Decomposition plan to add initialization entries into. // void InitFields(GenTreeLclVarCommon* dst, Replacement* firstRep, Replacement* endRep, - unsigned char initVal, DecompositionPlan* plan) { for (Replacement* rep = firstRep; rep < endRep; rep++) { - if ((initVal != 0) && (varTypeIsSIMD(rep->AccessType) || varTypeIsGC(rep->AccessType))) + if (!plan->CanInitPrimitive(rep->AccessType)) { + JITDUMP(" Unsupported init of %s %s. Will init as struct and read back.\n", + varTypeName(rep->AccessType), rep->Description); + // We will need to read this one back after initing the struct. rep->NeedsWriteBack = false; rep->NeedsReadBack = true; continue; } + JITDUMP(" Init V%02u (%s)\n", rep->LclNum, rep->Description); plan->InitReplacement(rep->LclNum, rep->Offset - dst->GetLclOffs(), rep->AccessType); rep->NeedsWriteBack = true; rep->NeedsReadBack = false; @@ -1943,6 +2116,9 @@ class ReplaceVisitor : public GenTreeVisitor { if ((srcRep < srcEndRep) && srcRep->NeedsReadBack) { + JITDUMP(" Source replacement V%02u (%s) is stale. Will read it back before copy.\n", + srcRep->LclNum, srcRep->Description); + assert(srcLcl != nullptr); statements->AddStatement(CreateReadBack(srcLcl->GetLclNum(), *srcRep)); srcRep->NeedsReadBack = false; @@ -1955,12 +2131,9 @@ class ReplaceVisitor : public GenTreeVisitor { // This source replacement ends before the next destination replacement starts. // Write it directly to the destination struct local. - //plan->CopyFromReplacement(srcRep->LclNum, srcRep->Offset - srcBaseOffs, srcRep->AccessType); - if (srcRep->NeedsWriteBack) - { - statements->AddStatement(CreateWriteBack(src->AsLclVarCommon()->GetLclNum(), *srcRep)); - srcRep->NeedsWriteBack = false; - } + unsigned offs = srcRep->Offset - srcBaseOffs; + plan->CopyFromReplacement(srcRep->LclNum, offs, srcRep->AccessType); + JITDUMP(" dst+%03x <- V%02u (%s)\n", offs, srcRep->LclNum, srcRep->Description); srcRep++; continue; } @@ -1969,7 +2142,9 @@ class ReplaceVisitor : public GenTreeVisitor { // Destination replacement ends before the next source replacement starts. // Read it directly from the source struct local. - plan->CopyToReplacement(dstRep->LclNum, dstRep->Offset - dstBaseOffs, dstRep->AccessType); + unsigned offs = dstRep->Offset - dstBaseOffs; + plan->CopyToReplacement(dstRep->LclNum, offs, dstRep->AccessType); + JITDUMP(" V%02u (%s) <- src+%03x\n", dstRep->LclNum, dstRep->Description, offs); dstRep->NeedsWriteBack = true; dstRep->NeedsReadBack = false; dstRep++; @@ -1983,6 +2158,8 @@ class ReplaceVisitor : public GenTreeVisitor { plan->CopyBetweenReplacements(dstRep->LclNum, srcRep->LclNum, dstRep->Offset - dstBaseOffs, dstRep->AccessType); + JITDUMP(" V%02u (%s) <- V%02u (%s)\n", dstRep->LclNum, dstRep->Description, srcRep->LclNum, srcRep->Description); + dstRep->NeedsWriteBack = true; dstRep->NeedsReadBack = false; dstRep++; @@ -1994,25 +2171,34 @@ class ReplaceVisitor : public GenTreeVisitor // will handle the destination replacement in a future // iteration of the loop. statements->AddStatement(CreateWriteBack(srcLcl->GetLclNum(), *srcRep)); + JITDUMP(" Partial overlap of V%02u (%s) <- V%02u (%s). Will read source back before copy\n", + dstRep->LclNum, dstRep->Description, srcRep->LclNum, srcRep->Description); srcRep++; continue; } if (dstRep < dstEndRep) { + unsigned offs = dstRep->Offset - dstBaseOffs; + if ((srcDsc != nullptr) && srcDsc->lvPromoted) { - unsigned srcOffs = srcLcl->GetLclOffs() + (dstRep->Offset - dstBaseOffs); + unsigned srcOffs = srcLcl->GetLclOffs() + offs; unsigned fieldLcl = m_compiler->lvaGetFieldLocal(srcDsc, srcOffs); - if ((fieldLcl != BAD_VAR_NUM) && (m_compiler->lvaGetDesc(fieldLcl)->lvType == dstRep->AccessType)) + if (fieldLcl != BAD_VAR_NUM) { - plan->CopyBetweenReplacements(dstRep->LclNum, fieldLcl, dstRep->Offset - dstBaseOffs, - dstRep->AccessType); - dstRep->NeedsWriteBack = true; - dstRep->NeedsReadBack = false; - dstRep++; - continue; + LclVarDsc* dsc = m_compiler->lvaGetDesc(fieldLcl); + if (dsc->lvType == dstRep->AccessType) + { + plan->CopyBetweenReplacements(dstRep->LclNum, fieldLcl, offs, + dstRep->AccessType); + JITDUMP(" V%02u (%s) <- V%02u (%s)\n", dstRep->LclNum, dstRep->Description, dsc->lvReason); + dstRep->NeedsWriteBack = true; + dstRep->NeedsReadBack = false; + dstRep++; + continue; + } } } @@ -2020,7 +2206,8 @@ class ReplaceVisitor : public GenTreeVisitor // DNER'ing it. Alternatively we could copy the promoted field // directly to the destination's struct local and mark the // overlapping fields as needing read back to avoid this DNER. - plan->CopyToReplacement(dstRep->LclNum, dstRep->Offset - dstBaseOffs, dstRep->AccessType); + plan->CopyToReplacement(dstRep->LclNum, offs, dstRep->AccessType); + JITDUMP(" V%02u (%s) <- src+%03x\n", dstRep->LclNum, dstRep->Description, offs); dstRep->NeedsWriteBack = true; dstRep->NeedsReadBack = false; dstRep++; @@ -2028,27 +2215,28 @@ class ReplaceVisitor : public GenTreeVisitor else { assert(srcRep < srcEndRep); + unsigned offs = srcRep->Offset - srcBaseOffs; if ((dstDsc != nullptr) && dstDsc->lvPromoted) { - unsigned dstOffs = dstLcl->GetLclOffs() + (srcRep->Offset - srcBaseOffs); + unsigned dstOffs = dstLcl->GetLclOffs() + offs; unsigned fieldLcl = m_compiler->lvaGetFieldLocal(dstDsc, dstOffs); - if ((fieldLcl != BAD_VAR_NUM) && (m_compiler->lvaGetDesc(fieldLcl)->lvType == srcRep->AccessType)) + if (fieldLcl != BAD_VAR_NUM) { - plan->CopyBetweenReplacements(fieldLcl, srcRep->LclNum, srcRep->Offset - srcBaseOffs, - srcRep->AccessType); - srcRep++; - continue; + LclVarDsc* dsc = m_compiler->lvaGetDesc(fieldLcl); + if (dsc->lvType == srcRep->AccessType) + { + plan->CopyBetweenReplacements(fieldLcl, srcRep->LclNum, offs, + srcRep->AccessType); + JITDUMP(" V%02u (%s) <- V%02u (%s)\n", fieldLcl, dsc->lvReason, srcRep->LclNum, srcRep->Description); + srcRep++; + continue; + } } } - //plan->CopyFromReplacement(srcRep->LclNum, srcRep->Offset - srcBaseOffs, srcRep->AccessType); - if (srcRep->NeedsWriteBack) - { - statements->AddStatement(CreateWriteBack(src->AsLclVarCommon()->GetLclNum(), *srcRep)); - srcRep->NeedsWriteBack = false; - } - + plan->CopyFromReplacement(srcRep->LclNum, offs, srcRep->AccessType); + JITDUMP(" dst+%03x <- V%02u (%s)\n", offs, srcRep->LclNum, srcRep->Description); srcRep++; } } From 6c12a740b2589d75cdb0d07c774d060acb22047d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 28 Apr 2023 15:10:31 +0200 Subject: [PATCH 17/37] Run jit-format --- src/coreclr/jit/promotion.cpp | 154 ++++++++++++++++++---------------- src/coreclr/jit/utils.h | 5 +- 2 files changed, 85 insertions(+), 74 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 31e90e0735751c..9d24440dc0c792 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -715,14 +715,13 @@ class StructSegments struct Segment { unsigned Start = 0; - unsigned End = 0; + unsigned End = 0; Segment() { } - Segment(unsigned start, unsigned end) - : Start(start), End(end) + Segment(unsigned start, unsigned end) : Start(start), End(end) { } @@ -749,7 +748,7 @@ class StructSegments void Merge(const Segment& other) { Start = min(Start, other.Start); - End = max(End, other.End); + End = max(End, other.End); } }; @@ -933,8 +932,8 @@ class StructSegments // void Check(FixedBitVect* vect) { - bool first = true; - unsigned last = 0; + bool first = true; + unsigned last = 0; for (const Segment& segment : m_segments) { assert(first || (last < segment.Start)); @@ -947,7 +946,7 @@ class StructSegments assert(vect->bitVectTest(i)); first = false; - last = segment.End; + last = segment.End; } for (unsigned i = last, size = vect->bitVectGetSize(); i < size; i++) @@ -997,7 +996,11 @@ class DecompositionPlan public: DecompositionPlan(Compiler* comp, GenTree* dst, GenTree* src, bool srcInvolvesReplacements) - : m_compiler(comp), m_entries(comp->getAllocator(CMK_Promotion)), m_dst(dst), m_src(src), m_srcInvolvesReplacements(srcInvolvesReplacements) + : m_compiler(comp) + , m_entries(comp->getAllocator(CMK_Promotion)) + , m_dst(dst) + , m_src(src) + , m_srcInvolvesReplacements(srcInvolvesReplacements) { } @@ -1129,8 +1132,8 @@ class DecompositionPlan int64_t GetInitPattern() { assert(IsInit()); - GenTree* cns = m_src->OperIsInitVal() ? m_src->gtGetOp1() : m_src; - int64_t pattern = int64_t(cns->AsIntCon()->IconValue() & 0xFF) * 0x0101010101010101LL; + GenTree* cns = m_src->OperIsInitVal() ? m_src->gtGetOp1() : m_src; + int64_t pattern = int64_t(cns->AsIntCon()->IconValue() & 0xFF) * 0x0101010101010101LL; return pattern; } @@ -1202,12 +1205,12 @@ class DecompositionPlan unsigned numFields = compHnd->getClassNumInstanceFields(dstLayout->GetClassHandle()); for (unsigned i = 0; i < numFields; i++) { - CORINFO_FIELD_HANDLE fieldHnd = compHnd->getFieldInClass(dstLayout->GetClassHandle(), (int)i); - unsigned fldOffset = compHnd->getFieldOffset(fieldHnd); + CORINFO_FIELD_HANDLE fieldHnd = compHnd->getFieldInClass(dstLayout->GetClassHandle(), (int)i); + unsigned fldOffset = compHnd->getFieldOffset(fieldHnd); CORINFO_CLASS_HANDLE fieldClassHandle; - CorInfoType corType = compHnd->getFieldType(fieldHnd, &fieldClassHandle); - var_types varType = JITtype2varType(corType); - unsigned size = genTypeSize(varType); + CorInfoType corType = compHnd->getFieldType(fieldHnd, &fieldClassHandle); + var_types varType = JITtype2varType(corType); + unsigned size = genTypeSize(varType); if (size == 0) { // TODO-CQ: Recursively handle padding in sub structures @@ -1267,8 +1270,8 @@ class DecompositionPlan FullBlock, }; - int Type; - unsigned PrimitiveOffset; + int Type; + unsigned PrimitiveOffset; var_types PrimitiveType; RemainderStrategy(int type, unsigned primitiveOffset = 0, var_types primitiveType = TYP_UNDEF) @@ -1306,30 +1309,30 @@ class DecompositionPlan if (remainder.IsSingleSegment(&segment) && !m_dst->OperIs(GT_LCL_VAR)) { var_types primitiveType = TYP_UNDEF; - unsigned size = segment.End - segment.Start; + unsigned size = segment.End - segment.Start; switch (size) { - case 1: - primitiveType = TYP_UBYTE; - break; - case 2: - primitiveType = TYP_USHORT; - break; + case 1: + primitiveType = TYP_UBYTE; + break; + case 2: + primitiveType = TYP_USHORT; + break; #ifdef TARGET_64BIT - case 4: - primitiveType = TYP_INT; - break; + case 4: + primitiveType = TYP_INT; + break; #endif - case TARGET_POINTER_SIZE: - primitiveType = TYP_I_IMPL; - if ((segment.Start % TARGET_POINTER_SIZE) == 0) - { - ClassLayout* dstLayout = m_dst->GetLayout(m_compiler); - primitiveType = dstLayout->GetGCPtrType(segment.Start / TARGET_POINTER_SIZE); - } - break; + case TARGET_POINTER_SIZE: + primitiveType = TYP_I_IMPL; + if ((segment.Start % TARGET_POINTER_SIZE) == 0) + { + ClassLayout* dstLayout = m_dst->GetLayout(m_compiler); + primitiveType = dstLayout->GetGCPtrType(segment.Start / TARGET_POINTER_SIZE); + } + break; - // TODO-CQ: SIMD sizes + // TODO-CQ: SIMD sizes } if (primitiveType != TYP_UNDEF) @@ -1380,9 +1383,10 @@ class DecompositionPlan } else if (remainderStrategy.Type == RemainderStrategy::Primitive) { - GenTree* src = CreateInitValue(remainderStrategy.PrimitiveType, initPattern); + GenTree* src = CreateInitValue(remainderStrategy.PrimitiveType, initPattern); GenTreeLclVarCommon* dstLcl = m_dst->AsLclVarCommon(); - GenTree* dst = m_compiler->gtNewLclFldNode(dstLcl->GetLclNum(), remainderStrategy.PrimitiveType, dstLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); + GenTree* dst = m_compiler->gtNewLclFldNode(dstLcl->GetLclNum(), remainderStrategy.PrimitiveType, + dstLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); m_compiler->lvaSetVarDoNotEnregister(dstLcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); } @@ -1504,15 +1508,15 @@ class DecompositionPlan { switch (remainderStrategy.Type) { - case RemainderStrategy::NoRemainder: - case RemainderStrategy::Primitive: - // See if our first indirection will subsume the null check (usual case). - assert(m_entries.Height() > 0); - const Entry& entry = m_entries.BottomRef(0); + case RemainderStrategy::NoRemainder: + case RemainderStrategy::Primitive: + // See if our first indirection will subsume the null check (usual case). + assert(m_entries.Height() > 0); + const Entry& entry = m_entries.BottomRef(0); - assert((entry.FromLclNum == BAD_VAR_NUM) || (entry.ToLclNum == BAD_VAR_NUM)); - needsNullCheck = m_compiler->fgIsBigOffset(addrBaseOffs + entry.Offset); - break; + assert((entry.FromLclNum == BAD_VAR_NUM) || (entry.ToLclNum == BAD_VAR_NUM)); + needsNullCheck = m_compiler->fgIsBigOffset(addrBaseOffs + entry.Offset); + break; } } @@ -1686,12 +1690,14 @@ class DecompositionPlan if (m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD)) { GenTreeLclVarCommon* dstLcl = dst->AsLclVarCommon(); - dst = m_compiler->gtNewLclFldNode(dstLcl->GetLclNum(), remainderStrategy.PrimitiveType, dstLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); + dst = m_compiler->gtNewLclFldNode(dstLcl->GetLclNum(), remainderStrategy.PrimitiveType, + dstLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); m_compiler->lvaSetVarDoNotEnregister(dstLcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); } else { - dst = m_compiler->gtNewIndir(remainderStrategy.PrimitiveType, grabAddr(remainderStrategy.PrimitiveOffset)); + dst = m_compiler->gtNewIndir(remainderStrategy.PrimitiveType, + grabAddr(remainderStrategy.PrimitiveOffset)); PropagateIndirFlags(dst, indirFlags); } @@ -1699,12 +1705,14 @@ class DecompositionPlan if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) { GenTreeLclVarCommon* srcLcl = src->AsLclVarCommon(); - src = m_compiler->gtNewLclFldNode(srcLcl->GetLclNum(), remainderStrategy.PrimitiveType, srcLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); + src = m_compiler->gtNewLclFldNode(srcLcl->GetLclNum(), remainderStrategy.PrimitiveType, + srcLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); m_compiler->lvaSetVarDoNotEnregister(srcLcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); } else { - src = m_compiler->gtNewIndir(remainderStrategy.PrimitiveType, grabAddr(remainderStrategy.PrimitiveOffset)); + src = m_compiler->gtNewIndir(remainderStrategy.PrimitiveType, + grabAddr(remainderStrategy.PrimitiveOffset)); PropagateIndirFlags(src, indirFlags); } @@ -1929,7 +1937,8 @@ class ReplaceVisitor : public GenTreeVisitor { if (dstFirstRep->NeedsWriteBack) { - JITDUMP("*** Block operation partially overlaps with destination V%02u (%s). Write and read-backs are " + JITDUMP("*** Block operation partially overlaps with destination V%02u (%s). Write and " + "read-backs are " "necessary.\n", dstFirstRep->LclNum, dstFirstRep->Description); // The value of the replacement will be partially assembled from its old value and this struct @@ -1952,10 +1961,10 @@ class ReplaceVisitor : public GenTreeVisitor { if (dstLastRep->NeedsWriteBack) { - JITDUMP( - "*** Block operation partially overlaps with destination V%02u (%s). Write and read-backs are " - "necessary.\n", - dstLastRep->LclNum, dstLastRep->Description); + JITDUMP("*** Block operation partially overlaps with destination V%02u (%s). Write and " + "read-backs are " + "necessary.\n", + dstLastRep->LclNum, dstLastRep->Description); result.AddStatement(CreateWriteBack(dstLcl->GetLclNum(), *dstLastRep)); dstLastRep->NeedsWriteBack = false; @@ -1976,8 +1985,9 @@ class ReplaceVisitor : public GenTreeVisitor { if (srcFirstRep->NeedsWriteBack) { - JITDUMP("*** Block operation partially overlaps with source V%02u (%s). Write back is necessary.\n", - srcFirstRep->LclNum, srcFirstRep->Description); + JITDUMP( + "*** Block operation partially overlaps with source V%02u (%s). Write back is necessary.\n", + srcFirstRep->LclNum, srcFirstRep->Description); result.AddStatement(CreateWriteBack(srcLcl->GetLclNum(), *srcFirstRep)); @@ -1994,7 +2004,8 @@ class ReplaceVisitor : public GenTreeVisitor { if (srcLastRep->NeedsWriteBack) { - JITDUMP("*** Block operation partially overlaps with source V%02u (%s). Write back is necessary.\n", + JITDUMP("*** Block operation partially overlaps with source V%02u (%s). Write back is " + "necessary.\n", srcLastRep->LclNum, srcLastRep->Description); result.AddStatement(CreateWriteBack(srcLcl->GetLclNum(), *srcLastRep)); @@ -2051,17 +2062,14 @@ class ReplaceVisitor : public GenTreeVisitor // endRep - End of the replacements. // plan - Decomposition plan to add initialization entries into. // - void InitFields(GenTreeLclVarCommon* dst, - Replacement* firstRep, - Replacement* endRep, - DecompositionPlan* plan) + void InitFields(GenTreeLclVarCommon* dst, Replacement* firstRep, Replacement* endRep, DecompositionPlan* plan) { for (Replacement* rep = firstRep; rep < endRep; rep++) { if (!plan->CanInitPrimitive(rep->AccessType)) { JITDUMP(" Unsupported init of %s %s. Will init as struct and read back.\n", - varTypeName(rep->AccessType), rep->Description); + varTypeName(rep->AccessType), rep->Description); // We will need to read this one back after initing the struct. rep->NeedsWriteBack = false; @@ -2116,8 +2124,8 @@ class ReplaceVisitor : public GenTreeVisitor { if ((srcRep < srcEndRep) && srcRep->NeedsReadBack) { - JITDUMP(" Source replacement V%02u (%s) is stale. Will read it back before copy.\n", - srcRep->LclNum, srcRep->Description); + JITDUMP(" Source replacement V%02u (%s) is stale. Will read it back before copy.\n", srcRep->LclNum, + srcRep->Description); assert(srcLcl != nullptr); statements->AddStatement(CreateReadBack(srcLcl->GetLclNum(), *srcRep)); @@ -2158,7 +2166,8 @@ class ReplaceVisitor : public GenTreeVisitor { plan->CopyBetweenReplacements(dstRep->LclNum, srcRep->LclNum, dstRep->Offset - dstBaseOffs, dstRep->AccessType); - JITDUMP(" V%02u (%s) <- V%02u (%s)\n", dstRep->LclNum, dstRep->Description, srcRep->LclNum, srcRep->Description); + JITDUMP(" V%02u (%s) <- V%02u (%s)\n", dstRep->LclNum, dstRep->Description, srcRep->LclNum, + srcRep->Description); dstRep->NeedsWriteBack = true; dstRep->NeedsReadBack = false; @@ -2172,7 +2181,7 @@ class ReplaceVisitor : public GenTreeVisitor // iteration of the loop. statements->AddStatement(CreateWriteBack(srcLcl->GetLclNum(), *srcRep)); JITDUMP(" Partial overlap of V%02u (%s) <- V%02u (%s). Will read source back before copy\n", - dstRep->LclNum, dstRep->Description, srcRep->LclNum, srcRep->Description); + dstRep->LclNum, dstRep->Description, srcRep->LclNum, srcRep->Description); srcRep++; continue; } @@ -2191,11 +2200,10 @@ class ReplaceVisitor : public GenTreeVisitor LclVarDsc* dsc = m_compiler->lvaGetDesc(fieldLcl); if (dsc->lvType == dstRep->AccessType) { - plan->CopyBetweenReplacements(dstRep->LclNum, fieldLcl, offs, - dstRep->AccessType); + plan->CopyBetweenReplacements(dstRep->LclNum, fieldLcl, offs, dstRep->AccessType); JITDUMP(" V%02u (%s) <- V%02u (%s)\n", dstRep->LclNum, dstRep->Description, dsc->lvReason); dstRep->NeedsWriteBack = true; - dstRep->NeedsReadBack = false; + dstRep->NeedsReadBack = false; dstRep++; continue; } @@ -2226,9 +2234,9 @@ class ReplaceVisitor : public GenTreeVisitor LclVarDsc* dsc = m_compiler->lvaGetDesc(fieldLcl); if (dsc->lvType == srcRep->AccessType) { - plan->CopyBetweenReplacements(fieldLcl, srcRep->LclNum, offs, - srcRep->AccessType); - JITDUMP(" V%02u (%s) <- V%02u (%s)\n", fieldLcl, dsc->lvReason, srcRep->LclNum, srcRep->Description); + plan->CopyBetweenReplacements(fieldLcl, srcRep->LclNum, offs, srcRep->AccessType); + JITDUMP(" V%02u (%s) <- V%02u (%s)\n", fieldLcl, dsc->lvReason, srcRep->LclNum, + srcRep->Description); srcRep++; continue; } diff --git a/src/coreclr/jit/utils.h b/src/coreclr/jit/utils.h index 46a411a3991dfe..c4b2832994a768 100644 --- a/src/coreclr/jit/utils.h +++ b/src/coreclr/jit/utils.h @@ -265,7 +265,10 @@ class FixedBitVect static FixedBitVect* bitVectInit(UINT size, Compiler* comp); // bitVectGetSize() - Get number of bits in the bit set - UINT bitVectGetSize() { return bitVectSize; } + UINT bitVectGetSize() + { + return bitVectSize; + } // bitVectSet() - Sets the given bit void bitVectSet(UINT bitNum); From 1eef1d91e54f4a443a3652475226bfa9b788d569 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 28 Apr 2023 15:31:21 +0200 Subject: [PATCH 18/37] Fix --- src/coreclr/jit/jitstd/vector.h | 1 + src/coreclr/jit/promotion.cpp | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/jitstd/vector.h b/src/coreclr/jit/jitstd/vector.h index e73314150ee476..cc0afdc20d27ff 100644 --- a/src/coreclr/jit/jitstd/vector.h +++ b/src/coreclr/jit/jitstd/vector.h @@ -479,6 +479,7 @@ typename vector::iterator assert(last.m_pElem >= m_pArray); assert(first.m_pElem <= m_pArray + m_nSize); assert(last.m_pElem <= m_pArray + m_nSize); + assert(last.m_pElem >= first.m_pElem); pointer fptr = first.m_pElem; pointer lptr = last.m_pElem; diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 07a4abb6539e88..ec1c887612b620 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -1689,7 +1689,7 @@ class DecompositionPlan GenTree* dst; if (m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD)) { - GenTreeLclVarCommon* dstLcl = dst->AsLclVarCommon(); + GenTreeLclVarCommon* dstLcl = m_dst->AsLclVarCommon(); dst = m_compiler->gtNewLclFldNode(dstLcl->GetLclNum(), remainderStrategy.PrimitiveType, dstLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); m_compiler->lvaSetVarDoNotEnregister(dstLcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); @@ -1704,7 +1704,7 @@ class DecompositionPlan GenTree* src; if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) { - GenTreeLclVarCommon* srcLcl = src->AsLclVarCommon(); + GenTreeLclVarCommon* srcLcl = m_src->AsLclVarCommon(); src = m_compiler->gtNewLclFldNode(srcLcl->GetLclNum(), remainderStrategy.PrimitiveType, srcLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); m_compiler->lvaSetVarDoNotEnregister(srcLcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); From 9578acd62c845e24ea175a601fd17f944ce7c144 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 29 Apr 2023 01:44:28 +0200 Subject: [PATCH 19/37] Move some logging statements --- src/coreclr/jit/promotion.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index ec1c887612b620..5cefc29575446b 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -1160,7 +1160,7 @@ class DecompositionPlan if (dstLayout->IsBlockLayout()) { significantPadding = true; - JITDUMP(" Block op has significant padding due to block layout\n"); + JITDUMP(" Block op has significant padding due to block layout\n"); } else { @@ -1168,17 +1168,17 @@ class DecompositionPlan if ((attribs & CORINFO_FLG_INDEXABLE_FIELDS) != 0) { significantPadding = true; - JITDUMP(" Block op has significant padding due to indexable fields\n"); + JITDUMP(" Block op has significant padding due to indexable fields\n"); } else if ((attribs & CORINFO_FLG_DONT_DIG_FIELDS) != 0) { significantPadding = true; - JITDUMP(" Block op has significant padding due to CORINFO_FLG_DONT_DIG_FIELDS\n"); + JITDUMP(" Block op has significant padding due to CORINFO_FLG_DONT_DIG_FIELDS\n"); } else if (((attribs & CORINFO_FLG_CUSTOMLAYOUT) != 0) && ((attribs & CORINFO_FLG_CONTAINS_GC_PTR) == 0)) { significantPadding = true; - JITDUMP(" Block op has significant padding due to CUSTOMLAYOUT without GC pointers\n"); + JITDUMP(" Block op has significant padding due to CUSTOMLAYOUT without GC pointers\n"); } else { @@ -1250,7 +1250,7 @@ class DecompositionPlan if (m_compiler->verbose) { - printf(" Remainder: "); + printf(" Remainder: "); segments.Dump(); printf("\n"); } @@ -1296,7 +1296,7 @@ class DecompositionPlan StructSegments remainder = ComputeRemainder(); if (remainder.IsEmpty()) { - JITDUMP(" => remainder strategy: do nothing\n"); + JITDUMP(" => Remainder strategy: do nothing\n"); return RemainderStrategy(RemainderStrategy::NoRemainder); } From be5b80989f8582217dc5e2b50f651ccdf648ed12 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 29 Apr 2023 01:44:41 +0200 Subject: [PATCH 20/37] Add missing address base offset for primitive remainder --- src/coreclr/jit/promotion.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 5cefc29575446b..577dc0c9ce836f 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -1697,7 +1697,7 @@ class DecompositionPlan else { dst = m_compiler->gtNewIndir(remainderStrategy.PrimitiveType, - grabAddr(remainderStrategy.PrimitiveOffset)); + grabAddr(addrBaseOffs + remainderStrategy.PrimitiveOffset)); PropagateIndirFlags(dst, indirFlags); } @@ -1712,7 +1712,7 @@ class DecompositionPlan else { src = m_compiler->gtNewIndir(remainderStrategy.PrimitiveType, - grabAddr(remainderStrategy.PrimitiveOffset)); + grabAddr(addrBaseOffs + remainderStrategy.PrimitiveOffset)); PropagateIndirFlags(src, indirFlags); } From d84ecf0ec1344b93c5e5ffe0fdcc7db4fdefd680 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 29 Apr 2023 02:14:08 +0200 Subject: [PATCH 21/37] Switch to decimal offsets --- src/coreclr/jit/promotion.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 577dc0c9ce836f..3b97d521e65367 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -968,7 +968,7 @@ class StructSegments const char* sep = ""; for (const Segment& segment : m_segments) { - printf("%s[%03x..%03x)", sep, segment.Start, segment.End); + printf("%s[%03u..%03u)", sep, segment.Start, segment.End); sep = " "; } } @@ -1339,7 +1339,7 @@ class DecompositionPlan { if (!IsInit() || CanInitPrimitive(primitiveType)) { - JITDUMP(" => remainder strategy: %s at %03x\n", varTypeName(primitiveType), segment.Start); + JITDUMP(" => remainder strategy: %s at %03u\n", varTypeName(primitiveType), segment.Start); return RemainderStrategy(RemainderStrategy::Primitive, segment.Start, primitiveType); } else @@ -2141,7 +2141,7 @@ class ReplaceVisitor : public GenTreeVisitor // Write it directly to the destination struct local. unsigned offs = srcRep->Offset - srcBaseOffs; plan->CopyFromReplacement(srcRep->LclNum, offs, srcRep->AccessType); - JITDUMP(" dst+%03x <- V%02u (%s)\n", offs, srcRep->LclNum, srcRep->Description); + JITDUMP(" dst+%03u <- V%02u (%s)\n", offs, srcRep->LclNum, srcRep->Description); srcRep++; continue; } @@ -2152,7 +2152,7 @@ class ReplaceVisitor : public GenTreeVisitor // Read it directly from the source struct local. unsigned offs = dstRep->Offset - dstBaseOffs; plan->CopyToReplacement(dstRep->LclNum, offs, dstRep->AccessType); - JITDUMP(" V%02u (%s) <- src+%03x\n", dstRep->LclNum, dstRep->Description, offs); + JITDUMP(" V%02u (%s) <- src+%03u\n", dstRep->LclNum, dstRep->Description, offs); dstRep->NeedsWriteBack = true; dstRep->NeedsReadBack = false; dstRep++; @@ -2215,7 +2215,7 @@ class ReplaceVisitor : public GenTreeVisitor // directly to the destination's struct local and mark the // overlapping fields as needing read back to avoid this DNER. plan->CopyToReplacement(dstRep->LclNum, offs, dstRep->AccessType); - JITDUMP(" V%02u (%s) <- src+%03x\n", dstRep->LclNum, dstRep->Description, offs); + JITDUMP(" V%02u (%s) <- src+%03u\n", dstRep->LclNum, dstRep->Description, offs); dstRep->NeedsWriteBack = true; dstRep->NeedsReadBack = false; dstRep++; @@ -2244,7 +2244,7 @@ class ReplaceVisitor : public GenTreeVisitor } plan->CopyFromReplacement(srcRep->LclNum, offs, srcRep->AccessType); - JITDUMP(" dst+%03x <- V%02u (%s)\n", offs, srcRep->LclNum, srcRep->Description); + JITDUMP(" dst+%03u <- V%02u (%s)\n", offs, srcRep->LclNum, srcRep->Description); srcRep++; } } From d882e09a6a0fcd9f58854f56723e0187dd8c2bea Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 30 Apr 2023 00:21:03 +0200 Subject: [PATCH 22/37] Avoid unnecessary write barriers, some cleanup --- src/coreclr/jit/promotion.cpp | 299 +++++++++++++++++++++++----------- 1 file changed, 201 insertions(+), 98 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 3b97d521e65367..e127f554affa57 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -179,6 +179,52 @@ struct Replacement } }; +//------------------------------------------------------------------------ +// CreateWriteBack: +// Create IR that writes a replacement local's value back to its struct local: +// +// ASG +// LCL_FLD int V00 [+4] +// LCL_VAR int V01 +// +// Parameters: +// structLclNum - Struct local +// replacement - Information about the replacement +// +// Returns: +// IR nodes. +// +static GenTree* CreateWriteBack(Compiler* compiler, unsigned structLclNum, const Replacement& replacement) +{ + GenTree* dst = compiler->gtNewLclFldNode(structLclNum, replacement.AccessType, replacement.Offset); + GenTree* src = compiler->gtNewLclvNode(replacement.LclNum, genActualType(replacement.AccessType)); + GenTree* asg = compiler->gtNewAssignNode(dst, src); + return asg; +} + +//------------------------------------------------------------------------ +// CreateReadBack: +// Create IR that reads a replacement local's value back from its struct local: +// +// ASG +// LCL_VAR int V01 +// LCL_FLD int V00 [+4] +// +// Parameters: +// structLclNum - Struct local +// replacement - Information about the replacement +// +// Returns: +// IR nodes. +// +static GenTree* CreateReadBack(Compiler* compiler, unsigned structLclNum, const Replacement& replacement) +{ + GenTree* dst = compiler->gtNewLclvNode(replacement.LclNum, genActualType(replacement.AccessType)); + GenTree* src = compiler->gtNewLclFldNode(structLclNum, replacement.AccessType, replacement.Offset); + GenTree* asg = compiler->gtNewAssignNode(dst, src); + return asg; +} + enum class AccessKindFlags : uint32_t { None = 0, @@ -982,10 +1028,12 @@ class DecompositionPlan { struct Entry { - unsigned ToLclNum; - unsigned FromLclNum; - unsigned Offset; - var_types Type; + unsigned ToLclNum; + Replacement* ToReplacement; + unsigned FromLclNum; + Replacement* FromReplacement; + unsigned Offset; + var_types Type; }; Compiler* m_compiler; @@ -1006,20 +1054,53 @@ class DecompositionPlan //------------------------------------------------------------------------ // CopyBetweenReplacements: - // Add an entry specifying to copy from a local into another local. + // Add an entry specifying to copy from a replacement into another replacement. // // Parameters: - // dstLcl - The destination local to write. - // srcLcl - The source local. + // dstRep - The destination replacement. + // srcRep - The source replacement. + // offset - The offset this covers in the struct copy. + // type - The type of copy. + // + void CopyBetweenReplacements(Replacement* dstRep, Replacement* srcRep, unsigned offset) + { + m_entries.Push(Entry{dstRep->LclNum, dstRep, srcRep->LclNum, srcRep, offset, dstRep->AccessType}); + } + + //------------------------------------------------------------------------ + // CopyBetweenReplacements: + // Add an entry specifying to copy from a promoted field into a replacement. + // + // Parameters: + // dstRep - The destination replacement. + // srcLcl - Local number of regularly promoted source field. // offset - The offset this covers in the struct copy. // type - The type of copy. // // Remarks: - // This may be used for cases where the destination or source is a regularly promoted field. + // Used when the source local is a regular promoted field. // - void CopyBetweenReplacements(unsigned dstLcl, unsigned srcLcl, unsigned offset, var_types type) + void CopyBetweenReplacements(Replacement* dstRep, unsigned srcLcl, unsigned offset) { - m_entries.Push(Entry{dstLcl, srcLcl, offset, type}); + m_entries.Push(Entry{dstRep->LclNum, dstRep, srcLcl, nullptr, offset, dstRep->AccessType}); + } + + //------------------------------------------------------------------------ + // CopyBetweenReplacements: + // Add an entry specifying to copy from a promoted field into a replacement. + // + // Parameters: + // dstRep - The destination replacement. + // srcLcl - Local number of regularly promoted source field. + // offset - The offset this covers in the struct copy. + // type - The type of copy. + // + // Remarks: + // Used when the source local is a regular promoted field. + // + void CopyBetweenReplacements(unsigned dstLcl, Replacement* srcRep, unsigned offset) + { + m_entries.Push(Entry{dstLcl, nullptr, srcRep->LclNum, srcRep, offset, srcRep->AccessType}); } //------------------------------------------------------------------------ @@ -1031,9 +1112,23 @@ class DecompositionPlan // offset - The relative offset into the source. // type - The type of copy. // - void CopyToReplacement(unsigned dstLcl, unsigned offset, var_types type) + void CopyToReplacement(Replacement* dstRep, unsigned offset) + { + m_entries.Push(Entry{dstRep->LclNum, dstRep, BAD_VAR_NUM, nullptr, offset, dstRep->AccessType}); + } + + //------------------------------------------------------------------------ + // CopyFromReplacement: + // Add an entry specifying to copy from a replacement local into the destination. + // + // Parameters: + // srcLcl - The source local to copy from. + // offset - The relative offset into the destination to write. + // type - The type of copy. + // + void CopyFromReplacement(Replacement* srcRep, unsigned offset) { - m_entries.Push(Entry{dstLcl, BAD_VAR_NUM, offset, type}); + m_entries.Push(Entry{BAD_VAR_NUM, nullptr, srcRep->LclNum, srcRep, offset, srcRep->AccessType}); } //------------------------------------------------------------------------ @@ -1047,7 +1142,7 @@ class DecompositionPlan // void CopyFromReplacement(unsigned srcLcl, unsigned offset, var_types type) { - m_entries.Push(Entry{BAD_VAR_NUM, srcLcl, offset, type}); + m_entries.Push(Entry{BAD_VAR_NUM, nullptr, srcLcl, nullptr, offset, type}); } //------------------------------------------------------------------------ @@ -1060,9 +1155,9 @@ class DecompositionPlan // offset - The offset covered by this initialization. // type - The type to initialize. // - void InitReplacement(unsigned dstLcl, unsigned offset, var_types type) + void InitReplacement(Replacement* dstRep, unsigned offset) { - m_entries.Push(Entry{dstLcl, BAD_VAR_NUM, offset, type}); + m_entries.Push(Entry{dstRep->LclNum, dstRep, BAD_VAR_NUM, nullptr, offset, dstRep->AccessType}); } //------------------------------------------------------------------------ @@ -1301,12 +1396,10 @@ class DecompositionPlan } StructSegments::Segment segment; - // See if we can "plug the hole" with a single primitive. For LCL_VAR - // destinations do not do this as it will essentially add a use of the - // local due to the partial def -- so it is better to prefer the full - // def that DCE might be able to get rid of. + // See if we can "plug the hole" with a single primitive. + // TODO-CQ: Why does doing this for LCL_VAR result in so many regressions? // TODO-CQ: Once we have liveness we can unlock this for LCL_VARs. - if (remainder.IsSingleSegment(&segment) && !m_dst->OperIs(GT_LCL_VAR)) + if (remainder.IsSingleSegment(&segment)) { var_types primitiveType = TYP_UNDEF; unsigned size = segment.End - segment.Start; @@ -1339,17 +1432,17 @@ class DecompositionPlan { if (!IsInit() || CanInitPrimitive(primitiveType)) { - JITDUMP(" => remainder strategy: %s at %03u\n", varTypeName(primitiveType), segment.Start); + JITDUMP(" => Remainder strategy: %s at %03u\n", varTypeName(primitiveType), segment.Start); return RemainderStrategy(RemainderStrategy::Primitive, segment.Start, primitiveType); } else { - JITDUMP("Cannot handle initing remainder as primitive of type %s\n", varTypeName(primitiveType)); + JITDUMP(" Cannot handle initing remainder as primitive of type %s\n", varTypeName(primitiveType)); } } } - JITDUMP(" => remainder strategy: retain a full block op\n"); + JITDUMP(" => Remainder strategy: retain a full block op\n"); return RemainderStrategy(RemainderStrategy::FullBlock); } @@ -1466,6 +1559,33 @@ class DecompositionPlan RemainderStrategy remainderStrategy = DetermineRemainderStrategy(); + // If the remainder is a full block and is going to incur write barrier + // then avoid incurring multiple write barriers for each source + // replacement that is a GC pointer -- write them back to the struct + // first instead. + if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && m_dst->OperIs(GT_BLK, GT_FIELD) && + m_dst->GetLayout(m_compiler)->HasGCPtr()) + { + for (int i = 0; i < m_entries.Height(); i++) + { + const Entry& entry = m_entries.BottomRef(i); + // TODO: Double check that TYP_BYREF do not incur any write barriers. + if ((entry.FromReplacement != nullptr) && (entry.Type == TYP_REF)) + { + Replacement* rep = entry.FromReplacement; + if (rep->NeedsWriteBack) + { + statements->AddStatement( + CreateWriteBack(m_compiler, m_src->AsLclVarCommon()->GetLclNum(), *rep)); + JITDUMP(" Will write back V%02u (%s) to avoid an additional write barrier\n", rep->LclNum, + rep->Description); + + rep->NeedsWriteBack = false; + } + } + } + } + GenTree* addr = nullptr; unsigned addrBaseOffs = 0; GenTreeFlags indirFlags = GTF_EMPTY; @@ -1497,10 +1617,18 @@ class DecompositionPlan if (addr != nullptr) { - numAddrUses += m_entries.Height(); + for (int i = 0; i < m_entries.Height(); i++) + { + if (!IsHandledByRemainder(m_entries.BottomRef(i), remainderStrategy)) + { + numAddrUses++; + } + } if (remainderStrategy.Type != RemainderStrategy::NoRemainder) + { numAddrUses++; + } } bool needsNullCheck = false; @@ -1510,12 +1638,20 @@ class DecompositionPlan { case RemainderStrategy::NoRemainder: case RemainderStrategy::Primitive: + needsNullCheck = true; // See if our first indirection will subsume the null check (usual case). - assert(m_entries.Height() > 0); - const Entry& entry = m_entries.BottomRef(0); + for (int i = 0; i < m_entries.Height(); i++) + { + if (IsHandledByRemainder(m_entries.BottomRef(i), remainderStrategy)) + { + continue; + } + + const Entry& entry = m_entries.BottomRef(0); - assert((entry.FromLclNum == BAD_VAR_NUM) || (entry.ToLclNum == BAD_VAR_NUM)); - needsNullCheck = m_compiler->fgIsBigOffset(addrBaseOffs + entry.Offset); + assert((entry.FromLclNum == BAD_VAR_NUM) || (entry.ToLclNum == BAD_VAR_NUM)); + needsNullCheck = m_compiler->fgIsBigOffset(addrBaseOffs + entry.Offset); + } break; } } @@ -1618,6 +1754,14 @@ class DecompositionPlan { const Entry& entry = m_entries.BottomRef(i); + if (IsHandledByRemainder(entry, remainderStrategy)) + { + JITDUMP(" Skipping dst+%03u <- V%02u (%s); it is up-to-date in its struct local and will be handled " + "as part of the remainder\n", + entry.Offset, entry.FromReplacement->LclNum, entry.FromReplacement->Description); + continue; + } + GenTree* dst; if (entry.ToLclNum != BAD_VAR_NUM) { @@ -1722,6 +1866,14 @@ class DecompositionPlan assert(numAddrUses == 0); } + bool IsHandledByRemainder(const Entry& entry, const RemainderStrategy& remainderStrategy) + { + // If the remainder is being handled as a full block copy and this + // replacement is up-to-date in its struct local then we can skip + // copying the replacement explicitly. + return (remainderStrategy.Type == RemainderStrategy::FullBlock) && (entry.FromReplacement != nullptr) && + !entry.FromReplacement->NeedsWriteBack && (entry.ToLclNum == BAD_VAR_NUM); + } //------------------------------------------------------------------------ // GetPropagatedIndirFlags: // Convert GT_BLK or GT_FIELD indir flags into flags that should be @@ -1945,7 +2097,7 @@ class ReplaceVisitor : public GenTreeVisitor // operation. // We accomplish this by an initial write back, the struct copy, followed by a later read back. // TODO-CQ: This is very expensive and unreflected in heuristics, but it is also very rare. - result.AddStatement(CreateWriteBack(dstLcl->GetLclNum(), *dstFirstRep)); + result.AddStatement(CreateWriteBack(m_compiler, dstLcl->GetLclNum(), *dstFirstRep)); dstFirstRep->NeedsWriteBack = false; } @@ -1965,7 +2117,7 @@ class ReplaceVisitor : public GenTreeVisitor "read-backs are " "necessary.\n", dstLastRep->LclNum, dstLastRep->Description); - result.AddStatement(CreateWriteBack(dstLcl->GetLclNum(), *dstLastRep)); + result.AddStatement(CreateWriteBack(m_compiler, dstLcl->GetLclNum(), *dstLastRep)); dstLastRep->NeedsWriteBack = false; } @@ -1989,7 +2141,7 @@ class ReplaceVisitor : public GenTreeVisitor "*** Block operation partially overlaps with source V%02u (%s). Write back is necessary.\n", srcFirstRep->LclNum, srcFirstRep->Description); - result.AddStatement(CreateWriteBack(srcLcl->GetLclNum(), *srcFirstRep)); + result.AddStatement(CreateWriteBack(m_compiler, srcLcl->GetLclNum(), *srcFirstRep)); srcFirstRep->NeedsWriteBack = false; } @@ -2008,7 +2160,7 @@ class ReplaceVisitor : public GenTreeVisitor "necessary.\n", srcLastRep->LclNum, srcLastRep->Description); - result.AddStatement(CreateWriteBack(srcLcl->GetLclNum(), *srcLastRep)); + result.AddStatement(CreateWriteBack(m_compiler, srcLcl->GetLclNum(), *srcLastRep)); srcLastRep->NeedsWriteBack = false; } @@ -2078,7 +2230,7 @@ class ReplaceVisitor : public GenTreeVisitor } JITDUMP(" Init V%02u (%s)\n", rep->LclNum, rep->Description); - plan->InitReplacement(rep->LclNum, rep->Offset - dst->GetLclOffs(), rep->AccessType); + plan->InitReplacement(rep, rep->Offset - dst->GetLclOffs()); rep->NeedsWriteBack = true; rep->NeedsReadBack = false; } @@ -2128,7 +2280,7 @@ class ReplaceVisitor : public GenTreeVisitor srcRep->Description); assert(srcLcl != nullptr); - statements->AddStatement(CreateReadBack(srcLcl->GetLclNum(), *srcRep)); + statements->AddStatement(CreateReadBack(m_compiler, srcLcl->GetLclNum(), *srcRep)); srcRep->NeedsReadBack = false; assert(!srcRep->NeedsWriteBack); } @@ -2140,7 +2292,7 @@ class ReplaceVisitor : public GenTreeVisitor // This source replacement ends before the next destination replacement starts. // Write it directly to the destination struct local. unsigned offs = srcRep->Offset - srcBaseOffs; - plan->CopyFromReplacement(srcRep->LclNum, offs, srcRep->AccessType); + plan->CopyFromReplacement(srcRep, offs); JITDUMP(" dst+%03u <- V%02u (%s)\n", offs, srcRep->LclNum, srcRep->Description); srcRep++; continue; @@ -2151,7 +2303,7 @@ class ReplaceVisitor : public GenTreeVisitor // Destination replacement ends before the next source replacement starts. // Read it directly from the source struct local. unsigned offs = dstRep->Offset - dstBaseOffs; - plan->CopyToReplacement(dstRep->LclNum, offs, dstRep->AccessType); + plan->CopyToReplacement(dstRep, offs); JITDUMP(" V%02u (%s) <- src+%03u\n", dstRep->LclNum, dstRep->Description, offs); dstRep->NeedsWriteBack = true; dstRep->NeedsReadBack = false; @@ -2164,8 +2316,7 @@ class ReplaceVisitor : public GenTreeVisitor if (((dstRep->Offset - dstBaseOffs) == (srcRep->Offset - srcBaseOffs)) && (dstRep->AccessType == srcRep->AccessType)) { - plan->CopyBetweenReplacements(dstRep->LclNum, srcRep->LclNum, dstRep->Offset - dstBaseOffs, - dstRep->AccessType); + plan->CopyBetweenReplacements(dstRep, srcRep, dstRep->Offset - dstBaseOffs); JITDUMP(" V%02u (%s) <- V%02u (%s)\n", dstRep->LclNum, dstRep->Description, srcRep->LclNum, srcRep->Description); @@ -2179,7 +2330,7 @@ class ReplaceVisitor : public GenTreeVisitor // Partial overlap. Write source back to the struct local. We // will handle the destination replacement in a future // iteration of the loop. - statements->AddStatement(CreateWriteBack(srcLcl->GetLclNum(), *srcRep)); + statements->AddStatement(CreateWriteBack(m_compiler, srcLcl->GetLclNum(), *srcRep)); JITDUMP(" Partial overlap of V%02u (%s) <- V%02u (%s). Will read source back before copy\n", dstRep->LclNum, dstRep->Description, srcRep->LclNum, srcRep->Description); srcRep++; @@ -2200,7 +2351,7 @@ class ReplaceVisitor : public GenTreeVisitor LclVarDsc* dsc = m_compiler->lvaGetDesc(fieldLcl); if (dsc->lvType == dstRep->AccessType) { - plan->CopyBetweenReplacements(dstRep->LclNum, fieldLcl, offs, dstRep->AccessType); + plan->CopyBetweenReplacements(dstRep, fieldLcl, offs); JITDUMP(" V%02u (%s) <- V%02u (%s)\n", dstRep->LclNum, dstRep->Description, dsc->lvReason); dstRep->NeedsWriteBack = true; dstRep->NeedsReadBack = false; @@ -2214,7 +2365,7 @@ class ReplaceVisitor : public GenTreeVisitor // DNER'ing it. Alternatively we could copy the promoted field // directly to the destination's struct local and mark the // overlapping fields as needing read back to avoid this DNER. - plan->CopyToReplacement(dstRep->LclNum, offs, dstRep->AccessType); + plan->CopyToReplacement(dstRep, offs); JITDUMP(" V%02u (%s) <- src+%03u\n", dstRep->LclNum, dstRep->Description, offs); dstRep->NeedsWriteBack = true; dstRep->NeedsReadBack = false; @@ -2234,7 +2385,7 @@ class ReplaceVisitor : public GenTreeVisitor LclVarDsc* dsc = m_compiler->lvaGetDesc(fieldLcl); if (dsc->lvType == srcRep->AccessType) { - plan->CopyBetweenReplacements(fieldLcl, srcRep->LclNum, offs, srcRep->AccessType); + plan->CopyBetweenReplacements(fieldLcl, srcRep, offs); JITDUMP(" V%02u (%s) <- V%02u (%s)\n", fieldLcl, dsc->lvReason, srcRep->LclNum, srcRep->Description); srcRep++; @@ -2243,7 +2394,7 @@ class ReplaceVisitor : public GenTreeVisitor } } - plan->CopyFromReplacement(srcRep->LclNum, offs, srcRep->AccessType); + plan->CopyFromReplacement(srcRep, offs); JITDUMP(" dst+%03u <- V%02u (%s)\n", offs, srcRep->LclNum, srcRep->Description); srcRep++; } @@ -2511,7 +2662,8 @@ class ReplaceVisitor : public GenTreeVisitor } else if (rep.NeedsReadBack) { - *use = m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(), CreateReadBack(lclNum, rep), *use); + *use = + m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(), CreateReadBack(m_compiler, lclNum, rep), *use); rep.NeedsReadBack = false; // TODO-CQ: Local copy prop does not take into account that the @@ -2602,7 +2754,7 @@ class ReplaceVisitor : public GenTreeVisitor if (rep.NeedsWriteBack) { GenTreeOp* comma = - m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(), CreateWriteBack(lcl, rep), *use); + m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(), CreateWriteBack(m_compiler, lcl, rep), *use); *use = comma; use = &comma->gtOp2; @@ -2614,52 +2766,6 @@ class ReplaceVisitor : public GenTreeVisitor } } - //------------------------------------------------------------------------ - // CreateWriteBack: - // Create IR that writes a replacement local's value back to its struct local: - // - // ASG - // LCL_FLD int V00 [+4] - // LCL_VAR int V01 - // - // Parameters: - // structLclNum - Struct local - // replacement - Information about the replacement - // - // Returns: - // IR nodes. - // - GenTree* CreateWriteBack(unsigned structLclNum, const Replacement& replacement) - { - GenTree* dst = m_compiler->gtNewLclFldNode(structLclNum, replacement.AccessType, replacement.Offset); - GenTree* src = m_compiler->gtNewLclvNode(replacement.LclNum, genActualType(replacement.AccessType)); - GenTree* asg = m_compiler->gtNewAssignNode(dst, src); - return asg; - } - - //------------------------------------------------------------------------ - // CreateReadBack: - // Create IR that reads a replacement local's value back from its struct local: - // - // ASG - // LCL_VAR int V01 - // LCL_FLD int V00 [+4] - // - // Parameters: - // structLclNum - Struct local - // replacement - Information about the replacement - // - // Returns: - // IR nodes. - // - GenTree* CreateReadBack(unsigned structLclNum, const Replacement& replacement) - { - GenTree* dst = m_compiler->gtNewLclvNode(replacement.LclNum, genActualType(replacement.AccessType)); - GenTree* src = m_compiler->gtNewLclFldNode(structLclNum, replacement.AccessType, replacement.Offset); - GenTree* asg = m_compiler->gtNewAssignNode(dst, src); - return asg; - } - //------------------------------------------------------------------------ // MarkForReadBack: // Mark that replacements in the specified struct local need to be read @@ -2813,7 +2919,7 @@ PhaseStatus Promotion::Run() JITDUMP("Reading back replacement V%02u.[%03u..%03u) -> V%02u near the end of " FMT_BB ":\n", i, rep.Offset, rep.Offset + genTypeSize(rep.AccessType), rep.LclNum, bb->bbNum); - GenTree* readBack = replacer.CreateReadBack(i, rep); + GenTree* readBack = CreateReadBack(m_compiler, i, rep); Statement* stmt = m_compiler->fgNewStmtFromTree(readBack); DISPSTMT(stmt); m_compiler->fgInsertStmtNearEnd(bb, stmt); @@ -2869,12 +2975,9 @@ void Promotion::InsertInitialReadBack(unsigned lclNum, { for (unsigned i = 0; i < replacements.size(); i++) { - const Replacement& rep = replacements[i]; - - GenTree* dst = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); - GenTree* src = m_compiler->gtNewLclFldNode(lclNum, rep.AccessType, rep.Offset); - GenTree* asg = m_compiler->gtNewAssignNode(dst, src); - InsertInitStatement(prevStmt, asg); + const Replacement& rep = replacements[i]; + GenTree* readBack = CreateReadBack(m_compiler, lclNum, rep); + InsertInitStatement(prevStmt, readBack); } } From 6d46dd2dc84805563f475ad5a156b8e6f798e0c3 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 2 May 2023 21:43:05 +0200 Subject: [PATCH 23/37] Enhance some logging --- src/coreclr/jit/promotion.cpp | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index e127f554affa57..ccb1b16af69deb 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -366,6 +366,8 @@ class LocalUses return; } + JITDUMP("Picking promotions for V%02u\n", lclNum); + assert(*replacements == nullptr); for (size_t i = 0; i < m_accesses.size(); i++) { @@ -401,6 +403,8 @@ class LocalUses (*replacements)->push_back(Replacement(access.Offset, access.AccessType, newLcl DEBUGARG(bufp))); } + + JITDUMP("\n"); } //------------------------------------------------------------------------ @@ -485,28 +489,28 @@ class LocalUses countOverlappedCallsWtd + countOverlappedReturnsWtd + countOverlappedAssignmentSourceWtd; costWith += countWriteBacksWtd * writeBackCost; - JITDUMP("Evaluating access %s @ %03u\n", varTypeName(access.AccessType), access.Offset); - JITDUMP(" Single write-back cost: " FMT_WT "\n", writeBackCost); - JITDUMP(" Write backs: " FMT_WT "\n", countWriteBacksWtd); - JITDUMP(" Read backs: " FMT_WT "\n", countReadBacksWtd); - JITDUMP(" Cost with: " FMT_WT "\n", costWith); - JITDUMP(" Cost without: " FMT_WT "\n", costWithout); + JITDUMP(" Evaluating access %s @ %03u\n", varTypeName(access.AccessType), access.Offset); + JITDUMP(" Single write-back cost: " FMT_WT "\n", writeBackCost); + JITDUMP(" Write backs: " FMT_WT "\n", countWriteBacksWtd); + JITDUMP(" Read backs: " FMT_WT "\n", countReadBacksWtd); + JITDUMP(" Cost with: " FMT_WT "\n", costWith); + JITDUMP(" Cost without: " FMT_WT "\n", costWithout); if (costWith < costWithout) { - JITDUMP(" Promoting replacement\n"); + JITDUMP(" Promoting replacement\n\n"); return true; } #ifdef DEBUG if (comp->compStressCompile(Compiler::STRESS_PHYSICAL_PROMOTION_COST, 25)) { - JITDUMP(" Promoting replacement due to stress\n"); + JITDUMP(" Promoting replacement due to stress\n\n"); return true; } #endif - JITDUMP(" Disqualifying replacement\n"); + JITDUMP(" Disqualifying replacement\n\n"); return false; } @@ -2851,7 +2855,7 @@ PhaseStatus Promotion::Run() } #endif - // Pick promotion based on the use information we just collected. + // Pick promotions based on the use information we just collected. bool anyReplacements = false; jitstd::vector** replacements = new (m_compiler, CMK_Promotion) jitstd::vector*[m_compiler->lvaCount]{}; From d252517984bf8cb2515b6a348450217deed91b54 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 1 May 2023 16:18:13 +0200 Subject: [PATCH 24/37] Allow DCE of partial defs --- src/coreclr/jit/compiler.h | 3 ++- src/coreclr/jit/liveness.cpp | 17 ++++++++++------- src/coreclr/jit/ssabuilder.cpp | 2 +- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index f96901a826ae87..834e7503f3ec40 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4865,7 +4865,7 @@ class Compiler return !opts.MinOpts() || m_pLinearScan->willEnregisterLocalVars(); } - void fgLocalVarLiveness(); + void fgLocalVarLiveness(bool partielDefsAreUses = false); void fgLocalVarLivenessInit(); @@ -9264,6 +9264,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX bool fgLocalVarLivenessDone; // Note that this one is used outside of debug. bool fgLocalVarLivenessChanged; + bool fgLocalVarLivenessPartialDefsAreUses; bool fgIsDoingEarlyLiveness; bool fgDidEarlyLiveness; bool compPostImportationCleanupDone; diff --git a/src/coreclr/jit/liveness.cpp b/src/coreclr/jit/liveness.cpp index 0e415743f561db..709267f053c4c4 100644 --- a/src/coreclr/jit/liveness.cpp +++ b/src/coreclr/jit/liveness.cpp @@ -36,7 +36,8 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree) } const bool isDef = (tree->gtFlags & GTF_VAR_DEF) != 0; - const bool isUse = !isDef || ((tree->gtFlags & GTF_VAR_USEASG) != 0); + bool isFullDef = isDef && ((tree->gtFlags & GTF_VAR_USEASG) == 0); + const bool isUse = fgLocalVarLivenessPartialDefsAreUses ? !isFullDef : !isDef; if (varDsc->lvTracked) { @@ -60,7 +61,7 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree) VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex); } - if (isDef) + if (fgLocalVarLivenessPartialDefsAreUses ? isDef : isFullDef) { // This is a def, add it to the set of defs. VarSetOps::AddElemD(this, fgCurDefSet, varDsc->lvVarIndex); @@ -106,7 +107,7 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree) } // For pure defs (i.e. not an "update" def which is also a use), add to the (all) def set. - if (!isUse) + if (isFullDef) { assert(isDef); VarSetOps::UnionD(this, fgCurDefSet, bitMask); @@ -122,7 +123,7 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree) } /*****************************************************************************/ -void Compiler::fgLocalVarLiveness() +void Compiler::fgLocalVarLiveness(bool partialDefsAreUses) { #ifdef DEBUG if (verbose) @@ -136,6 +137,8 @@ void Compiler::fgLocalVarLiveness() } #endif // DEBUG + fgLocalVarLivenessPartialDefsAreUses = partialDefsAreUses; + // Init liveness data structures. fgLocalVarLivenessInit(); @@ -490,7 +493,7 @@ void Compiler::fgPerBlockLocalVarLiveness() // qmark arms. for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) { - bool isUse = ((lcl->gtFlags & GTF_VAR_DEF) == 0) || ((lcl->gtFlags & GTF_VAR_USEASG) != 0); + bool isUse = ((lcl->gtFlags & GTF_VAR_DEF) == 0); // || ((lcl->gtFlags & GTF_VAR_USEASG) != 0); // We can still handle the pure def at the top level. bool conditional = lcl != dst; if (isUse || !conditional) @@ -1876,7 +1879,7 @@ void Compiler::fgComputeLife(VARSET_TP& life, break; } - if (isUse && !storeRemoved) + if (fgLocalVarLivenessPartialDefsAreUses && isUse && !storeRemoved) { // SSA and VN treat "partial definitions" as true uses, so for this // front-end liveness pass we must add them to the live set in case @@ -2734,7 +2737,7 @@ void Compiler::fgInterBlockLocalVarLiveness() for (GenTree* cur = stmt->GetTreeListEnd(); cur != nullptr;) { assert(cur->OperIsAnyLocal()); - bool isDef = ((cur->gtFlags & GTF_VAR_DEF) != 0) && ((cur->gtFlags & GTF_VAR_USEASG) == 0); + bool isDef = ((cur->gtFlags & GTF_VAR_DEF) != 0); // && ((cur->gtFlags & GTF_VAR_USEASG) == 0); bool conditional = cur != dst; // Ignore conditional defs that would otherwise // (incorrectly) interfere with liveness in other diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 2eb9dddbd927b2..14faf81920e01e 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1563,7 +1563,7 @@ void SsaBuilder::Build() EndPhase(PHASE_BUILD_SSA_DOMS); // Compute liveness on the graph. - m_pCompiler->fgLocalVarLiveness(); + m_pCompiler->fgLocalVarLiveness(true); EndPhase(PHASE_BUILD_SSA_LIVENESS); m_pCompiler->optRemoveRedundantZeroInits(); From 2afbf82ae13af4264e20294c6be354b79d0e6458 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 2 May 2023 21:43:42 +0200 Subject: [PATCH 25/37] Hack for regex case --- src/coreclr/jit/promotion.cpp | 48 +++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index ccb1b16af69deb..4eeffad51acdae 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -349,6 +349,47 @@ class LocalUses } } + bool matchGlob(const char* pattern, const char* patternEnd, const char* str) + { + // Invariant: [patternStart..backtrackPattern) matches [stringStart..backtrackStr) + const char* backtrackPattern = nullptr; + const char* backtrackStr = nullptr; + + while (true) + { + if (pattern == patternEnd) + { + if (*str == '\0') + return true; + } + else if (*pattern == '*') + { + backtrackPattern = ++pattern; + backtrackStr = str; + continue; + } + else if (*str == '\0') + { + // No match since pattern needs at least one char in remaining cases. + } + else if ((*pattern == '?') || (*pattern == *str)) + { + pattern++; + str++; + continue; + } + + // In this case there was no match, see if we can backtrack to a wild + // card and consume one more character from the string. + if ((backtrackPattern == nullptr) || (*backtrackStr == '\0')) + return false; + + // Consume one more character for the wildcard. + pattern = backtrackPattern; + str = ++backtrackStr; + } + } + //------------------------------------------------------------------------ // PickPromotions: // Pick specific replacements to make for this struct local after a set @@ -510,6 +551,13 @@ class LocalUses } #endif + const char* glob = "System.Text.RegularExpressions.CompiledRegexRunner:Regex*_TryFindNextPossibleStartingPosition*"; + if ((lclNum == 2) && (access.Offset == 0) && (access.AccessType == TYP_BYREF) && matchGlob(glob, glob + strlen(glob), comp->info.compFullName)) + { + JITDUMP(" Promoting replacement due to weird circumstances\n\n"); + return true; + } + JITDUMP(" Disqualifying replacement\n\n"); return false; } From 846ce0d82a54e72a18598f374bda876a45a9e265 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 2 May 2023 21:44:26 +0200 Subject: [PATCH 26/37] Revert "Hack for regex case" This reverts commit 2afbf82ae13af4264e20294c6be354b79d0e6458. --- src/coreclr/jit/promotion.cpp | 48 ----------------------------------- 1 file changed, 48 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 4eeffad51acdae..ccb1b16af69deb 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -349,47 +349,6 @@ class LocalUses } } - bool matchGlob(const char* pattern, const char* patternEnd, const char* str) - { - // Invariant: [patternStart..backtrackPattern) matches [stringStart..backtrackStr) - const char* backtrackPattern = nullptr; - const char* backtrackStr = nullptr; - - while (true) - { - if (pattern == patternEnd) - { - if (*str == '\0') - return true; - } - else if (*pattern == '*') - { - backtrackPattern = ++pattern; - backtrackStr = str; - continue; - } - else if (*str == '\0') - { - // No match since pattern needs at least one char in remaining cases. - } - else if ((*pattern == '?') || (*pattern == *str)) - { - pattern++; - str++; - continue; - } - - // In this case there was no match, see if we can backtrack to a wild - // card and consume one more character from the string. - if ((backtrackPattern == nullptr) || (*backtrackStr == '\0')) - return false; - - // Consume one more character for the wildcard. - pattern = backtrackPattern; - str = ++backtrackStr; - } - } - //------------------------------------------------------------------------ // PickPromotions: // Pick specific replacements to make for this struct local after a set @@ -551,13 +510,6 @@ class LocalUses } #endif - const char* glob = "System.Text.RegularExpressions.CompiledRegexRunner:Regex*_TryFindNextPossibleStartingPosition*"; - if ((lclNum == 2) && (access.Offset == 0) && (access.AccessType == TYP_BYREF) && matchGlob(glob, glob + strlen(glob), comp->info.compFullName)) - { - JITDUMP(" Promoting replacement due to weird circumstances\n\n"); - return true; - } - JITDUMP(" Disqualifying replacement\n\n"); return false; } From 0a9481ecea4f5ef1fefa7ba9a9149c5bcf29cd74 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 2 May 2023 21:44:28 +0200 Subject: [PATCH 27/37] Revert "Allow DCE of partial defs" This reverts commit d252517984bf8cb2515b6a348450217deed91b54. --- src/coreclr/jit/compiler.h | 3 +-- src/coreclr/jit/liveness.cpp | 17 +++++++---------- src/coreclr/jit/ssabuilder.cpp | 2 +- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 834e7503f3ec40..f96901a826ae87 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4865,7 +4865,7 @@ class Compiler return !opts.MinOpts() || m_pLinearScan->willEnregisterLocalVars(); } - void fgLocalVarLiveness(bool partielDefsAreUses = false); + void fgLocalVarLiveness(); void fgLocalVarLivenessInit(); @@ -9264,7 +9264,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX bool fgLocalVarLivenessDone; // Note that this one is used outside of debug. bool fgLocalVarLivenessChanged; - bool fgLocalVarLivenessPartialDefsAreUses; bool fgIsDoingEarlyLiveness; bool fgDidEarlyLiveness; bool compPostImportationCleanupDone; diff --git a/src/coreclr/jit/liveness.cpp b/src/coreclr/jit/liveness.cpp index 709267f053c4c4..0e415743f561db 100644 --- a/src/coreclr/jit/liveness.cpp +++ b/src/coreclr/jit/liveness.cpp @@ -36,8 +36,7 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree) } const bool isDef = (tree->gtFlags & GTF_VAR_DEF) != 0; - bool isFullDef = isDef && ((tree->gtFlags & GTF_VAR_USEASG) == 0); - const bool isUse = fgLocalVarLivenessPartialDefsAreUses ? !isFullDef : !isDef; + const bool isUse = !isDef || ((tree->gtFlags & GTF_VAR_USEASG) != 0); if (varDsc->lvTracked) { @@ -61,7 +60,7 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree) VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex); } - if (fgLocalVarLivenessPartialDefsAreUses ? isDef : isFullDef) + if (isDef) { // This is a def, add it to the set of defs. VarSetOps::AddElemD(this, fgCurDefSet, varDsc->lvVarIndex); @@ -107,7 +106,7 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree) } // For pure defs (i.e. not an "update" def which is also a use), add to the (all) def set. - if (isFullDef) + if (!isUse) { assert(isDef); VarSetOps::UnionD(this, fgCurDefSet, bitMask); @@ -123,7 +122,7 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree) } /*****************************************************************************/ -void Compiler::fgLocalVarLiveness(bool partialDefsAreUses) +void Compiler::fgLocalVarLiveness() { #ifdef DEBUG if (verbose) @@ -137,8 +136,6 @@ void Compiler::fgLocalVarLiveness(bool partialDefsAreUses) } #endif // DEBUG - fgLocalVarLivenessPartialDefsAreUses = partialDefsAreUses; - // Init liveness data structures. fgLocalVarLivenessInit(); @@ -493,7 +490,7 @@ void Compiler::fgPerBlockLocalVarLiveness() // qmark arms. for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) { - bool isUse = ((lcl->gtFlags & GTF_VAR_DEF) == 0); // || ((lcl->gtFlags & GTF_VAR_USEASG) != 0); + bool isUse = ((lcl->gtFlags & GTF_VAR_DEF) == 0) || ((lcl->gtFlags & GTF_VAR_USEASG) != 0); // We can still handle the pure def at the top level. bool conditional = lcl != dst; if (isUse || !conditional) @@ -1879,7 +1876,7 @@ void Compiler::fgComputeLife(VARSET_TP& life, break; } - if (fgLocalVarLivenessPartialDefsAreUses && isUse && !storeRemoved) + if (isUse && !storeRemoved) { // SSA and VN treat "partial definitions" as true uses, so for this // front-end liveness pass we must add them to the live set in case @@ -2737,7 +2734,7 @@ void Compiler::fgInterBlockLocalVarLiveness() for (GenTree* cur = stmt->GetTreeListEnd(); cur != nullptr;) { assert(cur->OperIsAnyLocal()); - bool isDef = ((cur->gtFlags & GTF_VAR_DEF) != 0); // && ((cur->gtFlags & GTF_VAR_USEASG) == 0); + bool isDef = ((cur->gtFlags & GTF_VAR_DEF) != 0) && ((cur->gtFlags & GTF_VAR_USEASG) == 0); bool conditional = cur != dst; // Ignore conditional defs that would otherwise // (incorrectly) interfere with liveness in other diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 14faf81920e01e..2eb9dddbd927b2 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1563,7 +1563,7 @@ void SsaBuilder::Build() EndPhase(PHASE_BUILD_SSA_DOMS); // Compute liveness on the graph. - m_pCompiler->fgLocalVarLiveness(true); + m_pCompiler->fgLocalVarLiveness(); EndPhase(PHASE_BUILD_SSA_LIVENESS); m_pCompiler->optRemoveRedundantZeroInits(); From 5ad9291403496d24f59f89032b39d9588e76b9bb Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 3 May 2023 19:09:34 +0200 Subject: [PATCH 28/37] Nest some classes to see if it makes github diff work properly --- src/coreclr/jit/promotion.cpp | 2172 ++++++++++++++++----------------- 1 file changed, 1086 insertions(+), 1086 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index ccb1b16af69deb..a06a818c9bfafa 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -192,7 +192,7 @@ struct Replacement // replacement - Information about the replacement // // Returns: -// IR nodes. +// IR node. // static GenTree* CreateWriteBack(Compiler* compiler, unsigned structLclNum, const Replacement& replacement) { @@ -215,7 +215,7 @@ static GenTree* CreateWriteBack(Compiler* compiler, unsigned structLclNum, const // replacement - Information about the replacement // // Returns: -// IR nodes. +// IR node. // static GenTree* CreateReadBack(Compiler* compiler, unsigned structLclNum, const Replacement& replacement) { @@ -724,1323 +724,1323 @@ class LocalsUseVisitor : public GenTreeVisitor } }; -// Represents a list of statements; this is the result of assignment decomposition. -class DecompositionStatementList +class ReplaceVisitor : public GenTreeVisitor { - GenTree* m_head = nullptr; + Promotion* m_prom; + jitstd::vector** m_replacements; + bool m_madeChanges = false; public: - void AddStatement(GenTree* stmt) + enum + { + DoPostOrder = true, + UseExecutionOrder = true, + }; + + ReplaceVisitor(Promotion* prom, jitstd::vector** replacements) + : GenTreeVisitor(prom->m_compiler), m_prom(prom), m_replacements(replacements) + { + } + + bool MadeChanges() { - stmt->gtNext = m_head; - m_head = stmt; + return m_madeChanges; + } + + void Reset() + { + m_madeChanges = false; } - GenTree* ToCommaTree(Compiler* comp) + fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) { - if (m_head == nullptr) + GenTree* tree = *use; + + if (tree->OperIs(GT_ASG)) + { + // If LHS of the ASG was a local then we skipped it as we don't + // want to see it until after the RHS. + if (tree->gtGetOp1()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + ReplaceLocal(&tree->AsOp()->gtOp1, tree); + } + + // Assignments can be decomposed directly into accesses of the replacements. + DecomposeAssignment(use, user); + return fgWalkResult::WALK_CONTINUE; + } + + if (tree->OperIs(GT_CALL)) { - return comp->gtNewNothingNode(); + // Calls need to store replacements back into the struct local for args + // and need to restore replacements from the result (for + // retbufs/returns). + LoadStoreAroundCall((*use)->AsCall(), user); + return fgWalkResult::WALK_CONTINUE; } - GenTree* tree = m_head; + if (tree->OperIs(GT_RETURN)) + { + // Returns need to store replacements back into the struct local. + StoreBeforeReturn((*use)->AsUnOp()); + return fgWalkResult::WALK_CONTINUE; + } - for (GenTree* cur = m_head->gtNext; cur != nullptr; cur = cur->gtNext) + // Skip the local on the LHS of ASGs when we see it in the normal tree + // visit; we handle it as part of the parent ASG instead. + if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD) && + ((user == nullptr) || !user->OperIs(GT_ASG) || (user->gtGetOp1() != tree))) { - tree = comp->gtNewOperNode(GT_COMMA, TYP_VOID, cur, tree); + ReplaceLocal(use, user); + return fgWalkResult::WALK_CONTINUE; } - return tree; + return fgWalkResult::WALK_CONTINUE; } -}; -// Represents significant segments of a struct operation. -// -// Essentially a segment tree (but not stored as a tree) that supports boolean -// Add/Subtract operations of segments. Used to compute the remainder after -// replacements have been handled as part of a decomposed block operation. -class StructSegments -{ -public: - struct Segment + // Represents a list of statements; this is the result of assignment decomposition. + class DecompositionStatementList { - unsigned Start = 0; - unsigned End = 0; + GenTree* m_head = nullptr; - Segment() + public: + void AddStatement(GenTree* stmt) { + stmt->gtNext = m_head; + m_head = stmt; } - Segment(unsigned start, unsigned end) : Start(start), End(end) + GenTree* ToCommaTree(Compiler* comp) { + if (m_head == nullptr) + { + return comp->gtNewNothingNode(); + } + + GenTree* tree = m_head; + + for (GenTree* cur = m_head->gtNext; cur != nullptr; cur = cur->gtNext) + { + tree = comp->gtNewOperNode(GT_COMMA, TYP_VOID, cur, tree); + } + + return tree; } + }; - bool IntersectsInclusive(const Segment& other) const + // Represents significant segments of a struct operation. + // + // Essentially a segment tree (but not stored as a tree) that supports boolean + // Add/Subtract operations of segments. Used to compute the remainder after + // replacements have been handled as part of a decomposed block operation. + class StructSegments + { + public: + struct Segment { - if (End < other.Start) + unsigned Start = 0; + unsigned End = 0; + + Segment() { - return false; } - if (other.End < Start) + Segment(unsigned start, unsigned end) : Start(start), End(end) { - return false; } - return true; - } + bool IntersectsInclusive(const Segment& other) const + { + if (End < other.Start) + { + return false; + } - bool Contains(const Segment& other) const - { - return other.Start >= Start && other.End <= End; - } + if (other.End < Start) + { + return false; + } - void Merge(const Segment& other) - { - Start = min(Start, other.Start); - End = max(End, other.End); - } - }; + return true; + } -private: - jitstd::vector m_segments; + bool Contains(const Segment& other) const + { + return other.Start >= Start && other.End <= End; + } -public: - StructSegments(CompAllocator allocator) : m_segments(allocator) - { - } + void Merge(const Segment& other) + { + Start = min(Start, other.Start); + End = max(End, other.End); + } + }; - //------------------------------------------------------------------------ - // Add: - // Add a segment to the data structure. - // - // Parameters: - // segment - The segment to add. - // - void Add(const Segment& segment) - { - size_t index = BinarySearch(m_segments, segment.Start); + private: + jitstd::vector m_segments; - if ((ssize_t)index < 0) + public: + StructSegments(CompAllocator allocator) : m_segments(allocator) { - index = ~index; } - m_segments.insert(m_segments.begin() + index, segment); - size_t endIndex; - for (endIndex = index + 1; endIndex < m_segments.size(); endIndex++) + //------------------------------------------------------------------------ + // Add: + // Add a segment to the data structure. + // + // Parameters: + // segment - The segment to add. + // + void Add(const Segment& segment) { - if (!m_segments[index].IntersectsInclusive(m_segments[endIndex])) + size_t index = BinarySearch(m_segments, segment.Start); + + if ((ssize_t)index < 0) { - break; + index = ~index; } - m_segments[index].Merge(m_segments[endIndex]); - } + m_segments.insert(m_segments.begin() + index, segment); + size_t endIndex; + for (endIndex = index + 1; endIndex < m_segments.size(); endIndex++) + { + if (!m_segments[index].IntersectsInclusive(m_segments[endIndex])) + { + break; + } - m_segments.erase(m_segments.begin() + index + 1, m_segments.begin() + endIndex); - } + m_segments[index].Merge(m_segments[endIndex]); + } - //------------------------------------------------------------------------ - // Subtract: - // Subtract a segment from the data structure. - // - // Parameters: - // segment - The segment to subtract. - // - void Subtract(const Segment& segment) - { - size_t index = BinarySearch(m_segments, segment.Start); - if ((ssize_t)index < 0) - { - index = ~index; - } - else - { - // Start == segment[index].End, which makes it non-interesting. - index++; + m_segments.erase(m_segments.begin() + index + 1, m_segments.begin() + endIndex); } - if (index >= m_segments.size()) + //------------------------------------------------------------------------ + // Subtract: + // Subtract a segment from the data structure. + // + // Parameters: + // segment - The segment to subtract. + // + void Subtract(const Segment& segment) { - return; - } + size_t index = BinarySearch(m_segments, segment.Start); + if ((ssize_t)index < 0) + { + index = ~index; + } + else + { + // Start == segment[index].End, which makes it non-interesting. + index++; + } - // Here we know Start < segment[index].End. Do they not intersect at all? - if (m_segments[index].Start >= segment.End) - { - // Does not intersect any segment. - return; - } + if (index >= m_segments.size()) + { + return; + } + + // Here we know Start < segment[index].End. Do they not intersect at all? + if (m_segments[index].Start >= segment.End) + { + // Does not intersect any segment. + return; + } - assert(m_segments[index].IntersectsInclusive(segment)); + assert(m_segments[index].IntersectsInclusive(segment)); - if (m_segments[index].Contains(segment)) - { - if (segment.Start > m_segments[index].Start) + if (m_segments[index].Contains(segment)) { - // New segment (existing.Start, segment.Start) - if (segment.End < m_segments[index].End) + if (segment.Start > m_segments[index].Start) { - m_segments.insert(m_segments.begin() + index, Segment(m_segments[index].Start, segment.Start)); + // New segment (existing.Start, segment.Start) + if (segment.End < m_segments[index].End) + { + m_segments.insert(m_segments.begin() + index, Segment(m_segments[index].Start, segment.Start)); + + // And new segment (segment.End, existing.End) + m_segments[index + 1].Start = segment.End; + return; + } - // And new segment (segment.End, existing.End) - m_segments[index + 1].Start = segment.End; + m_segments[index].End = segment.Start; + return; + } + if (segment.End < m_segments[index].End) + { + // New segment (segment.End, existing.End) + m_segments[index].Start = segment.End; return; } + // Full segment is being removed + m_segments.erase(m_segments.begin() + index); + return; + } + + if (segment.Start > m_segments[index].Start) + { m_segments[index].End = segment.Start; + index++; + } + + size_t endIndex = BinarySearch(m_segments, segment.End); + if ((ssize_t)endIndex >= 0) + { + m_segments.erase(m_segments.begin() + index, m_segments.begin() + endIndex + 1); return; } - if (segment.End < m_segments[index].End) + + endIndex = ~endIndex; + if (endIndex == m_segments.size()) { - // New segment (segment.End, existing.End) - m_segments[index].Start = segment.End; + m_segments.erase(m_segments.begin() + index, m_segments.end()); return; } - // Full segment is being removed - m_segments.erase(m_segments.begin() + index); - return; - } + if (segment.End > m_segments[endIndex].Start) + { + m_segments[endIndex].Start = segment.End; + } - if (segment.Start > m_segments[index].Start) - { - m_segments[index].End = segment.Start; - index++; + m_segments.erase(m_segments.begin() + index, m_segments.begin() + endIndex); } - size_t endIndex = BinarySearch(m_segments, segment.End); - if ((ssize_t)endIndex >= 0) + //------------------------------------------------------------------------ + // IsEmpty: + // Check if the segment tree is empty. + // + // Returns: + // True if so. + // + bool IsEmpty() { - m_segments.erase(m_segments.begin() + index, m_segments.begin() + endIndex + 1); - return; + return m_segments.size() == 0; } - endIndex = ~endIndex; - if (endIndex == m_segments.size()) + //------------------------------------------------------------------------ + // IsSingleSegment: + // Check if the segment tree contains only a single segment, and return + // it if so. + // + // Parameters: + // result - [out] The single segment. Only valid if the method returns true. + // + // Returns: + // True if so. + // + bool IsSingleSegment(Segment* result) { - m_segments.erase(m_segments.begin() + index, m_segments.end()); - return; - } + if (m_segments.size() == 1) + { + *result = m_segments[0]; + return true; + } - if (segment.End > m_segments[endIndex].Start) - { - m_segments[endIndex].Start = segment.End; + return false; } - m_segments.erase(m_segments.begin() + index, m_segments.begin() + endIndex); - } - - //------------------------------------------------------------------------ - // IsEmpty: - // Check if the segment tree is empty. - // - // Returns: - // True if so. - // - bool IsEmpty() - { - return m_segments.size() == 0; - } + #ifdef DEBUG + //------------------------------------------------------------------------ + // Check: + // Validate that the data structure is normalized and that it equals a + // specific fixed bit vector. + // + // Parameters: + // vect - The bit vector + // + // Remarks: + // This validates that the internal representation is normalized (i.e. + // all adjacent intervals are merged) and that it contains an index iff + // the specified vector contains that index. + // + void Check(FixedBitVect* vect) + { + bool first = true; + unsigned last = 0; + for (const Segment& segment : m_segments) + { + assert(first || (last < segment.Start)); + assert(segment.End <= vect->bitVectGetSize()); - //------------------------------------------------------------------------ - // IsSingleSegment: - // Check if the segment tree contains only a single segment, and return - // it if so. - // - // Parameters: - // result - [out] The single segment. Only valid if the method returns true. - // - // Returns: - // True if so. - // - bool IsSingleSegment(Segment* result) - { - if (m_segments.size() == 1) - { - *result = m_segments[0]; - return true; - } + for (unsigned i = last; i < segment.Start; i++) + assert(!vect->bitVectTest(i)); - return false; - } + for (unsigned i = segment.Start; i < segment.End; i++) + assert(vect->bitVectTest(i)); -#ifdef DEBUG - //------------------------------------------------------------------------ - // Check: - // Validate that the data structure is normalized and that it equals a - // specific fixed bit vector. - // - // Parameters: - // vect - The bit vector - // - // Remarks: - // This validates that the internal representation is normalized (i.e. - // all adjacent intervals are merged) and that it contains an index iff - // the specified vector contains that index. - // - void Check(FixedBitVect* vect) - { - bool first = true; - unsigned last = 0; - for (const Segment& segment : m_segments) - { - assert(first || (last < segment.Start)); - assert(segment.End <= vect->bitVectGetSize()); + first = false; + last = segment.End; + } - for (unsigned i = last; i < segment.Start; i++) + for (unsigned i = last, size = vect->bitVectGetSize(); i < size; i++) assert(!vect->bitVectTest(i)); - - for (unsigned i = segment.Start; i < segment.End; i++) - assert(vect->bitVectTest(i)); - - first = false; - last = segment.End; } - for (unsigned i = last, size = vect->bitVectGetSize(); i < size; i++) - assert(!vect->bitVectTest(i)); - } - - //------------------------------------------------------------------------ - // Dump: - // Dump a string representation of the segment tree to stdout. - // - void Dump() - { - if (m_segments.size() == 0) - { - printf(""); - } - else + //------------------------------------------------------------------------ + // Dump: + // Dump a string representation of the segment tree to stdout. + // + void Dump() { - const char* sep = ""; - for (const Segment& segment : m_segments) + if (m_segments.size() == 0) + { + printf(""); + } + else { - printf("%s[%03u..%03u)", sep, segment.Start, segment.End); - sep = " "; + const char* sep = ""; + for (const Segment& segment : m_segments) + { + printf("%s[%03u..%03u)", sep, segment.Start, segment.End); + sep = " "; + } } } - } -#endif -}; + #endif + }; -// Represents a plan for decomposing a block operation into direct treatment of -// replacement fields and the remainder. -class DecompositionPlan -{ - struct Entry + // Represents a plan for decomposing a block operation into direct treatment of + // replacement fields and the remainder. + class DecompositionPlan { - unsigned ToLclNum; - Replacement* ToReplacement; - unsigned FromLclNum; - Replacement* FromReplacement; - unsigned Offset; - var_types Type; - }; + struct Entry + { + unsigned ToLclNum; + Replacement* ToReplacement; + unsigned FromLclNum; + Replacement* FromReplacement; + unsigned Offset; + var_types Type; + }; - Compiler* m_compiler; - ArrayStack m_entries; - GenTree* m_dst; - GenTree* m_src; - bool m_srcInvolvesReplacements; + Compiler* m_compiler; + ArrayStack m_entries; + GenTree* m_dst; + GenTree* m_src; + bool m_srcInvolvesReplacements; + + public: + DecompositionPlan(Compiler* comp, GenTree* dst, GenTree* src, bool srcInvolvesReplacements) + : m_compiler(comp) + , m_entries(comp->getAllocator(CMK_Promotion)) + , m_dst(dst) + , m_src(src) + , m_srcInvolvesReplacements(srcInvolvesReplacements) + { + } + + //------------------------------------------------------------------------ + // CopyBetweenReplacements: + // Add an entry specifying to copy from a replacement into another replacement. + // + // Parameters: + // dstRep - The destination replacement. + // srcRep - The source replacement. + // offset - The offset this covers in the struct copy. + // type - The type of copy. + // + void CopyBetweenReplacements(Replacement* dstRep, Replacement* srcRep, unsigned offset) + { + m_entries.Push(Entry{dstRep->LclNum, dstRep, srcRep->LclNum, srcRep, offset, dstRep->AccessType}); + } + + //------------------------------------------------------------------------ + // CopyBetweenReplacements: + // Add an entry specifying to copy from a promoted field into a replacement. + // + // Parameters: + // dstRep - The destination replacement. + // srcLcl - Local number of regularly promoted source field. + // offset - The offset this covers in the struct copy. + // type - The type of copy. + // + // Remarks: + // Used when the source local is a regular promoted field. + // + void CopyBetweenReplacements(Replacement* dstRep, unsigned srcLcl, unsigned offset) + { + m_entries.Push(Entry{dstRep->LclNum, dstRep, srcLcl, nullptr, offset, dstRep->AccessType}); + } + + //------------------------------------------------------------------------ + // CopyBetweenReplacements: + // Add an entry specifying to copy from a promoted field into a replacement. + // + // Parameters: + // dstRep - The destination replacement. + // srcLcl - Local number of regularly promoted source field. + // offset - The offset this covers in the struct copy. + // type - The type of copy. + // + // Remarks: + // Used when the source local is a regular promoted field. + // + void CopyBetweenReplacements(unsigned dstLcl, Replacement* srcRep, unsigned offset) + { + m_entries.Push(Entry{dstLcl, nullptr, srcRep->LclNum, srcRep, offset, srcRep->AccessType}); + } + + //------------------------------------------------------------------------ + // CopyToReplacement: + // Add an entry specifying to copy from the source into a replacement local. + // + // Parameters: + // dstLcl - The destination local to write. + // offset - The relative offset into the source. + // type - The type of copy. + // + void CopyToReplacement(Replacement* dstRep, unsigned offset) + { + m_entries.Push(Entry{dstRep->LclNum, dstRep, BAD_VAR_NUM, nullptr, offset, dstRep->AccessType}); + } + + //------------------------------------------------------------------------ + // CopyFromReplacement: + // Add an entry specifying to copy from a replacement local into the destination. + // + // Parameters: + // srcLcl - The source local to copy from. + // offset - The relative offset into the destination to write. + // type - The type of copy. + // + void CopyFromReplacement(Replacement* srcRep, unsigned offset) + { + m_entries.Push(Entry{BAD_VAR_NUM, nullptr, srcRep->LclNum, srcRep, offset, srcRep->AccessType}); + } + + //------------------------------------------------------------------------ + // CopyFromReplacement: + // Add an entry specifying to copy from a replacement local into the destination. + // + // Parameters: + // srcLcl - The source local to copy from. + // offset - The relative offset into the destination to write. + // type - The type of copy. + // + void CopyFromReplacement(unsigned srcLcl, unsigned offset, var_types type) + { + m_entries.Push(Entry{BAD_VAR_NUM, nullptr, srcLcl, nullptr, offset, type}); + } + + //------------------------------------------------------------------------ + // InitReplacement: + // Add an entry specifying that a specified replacement local should be + // constant initialized. + // + // Parameters: + // dstLcl - The destination local. + // offset - The offset covered by this initialization. + // type - The type to initialize. + // + void InitReplacement(Replacement* dstRep, unsigned offset) + { + m_entries.Push(Entry{dstRep->LclNum, dstRep, BAD_VAR_NUM, nullptr, offset, dstRep->AccessType}); + } + + //------------------------------------------------------------------------ + // Finalize: + // Create IR to perform the full decomposed struct copy as specified by + // the entries that were added to the decomposition plan. Add the + // statements to the specified list. + // + // Parameters: + // statements - The list of statements to add to. + // + void Finalize(DecompositionStatementList* statements) + { + if (IsInit()) + { + FinalizeInit(statements); + } + else + { + FinalizeCopy(statements); + } + } -public: - DecompositionPlan(Compiler* comp, GenTree* dst, GenTree* src, bool srcInvolvesReplacements) - : m_compiler(comp) - , m_entries(comp->getAllocator(CMK_Promotion)) - , m_dst(dst) - , m_src(src) - , m_srcInvolvesReplacements(srcInvolvesReplacements) - { - } + //------------------------------------------------------------------------ + // CanInitPrimitive: + // Check if we can handle initializing a primitive of the specified type. + // For example, we cannot directly initialize SIMD types to non-zero + // constants. + // + // Parameters: + // type - The primitive type + // + // Returns: + // True if so. + // + bool CanInitPrimitive(var_types type) + { + assert(IsInit()); + if (varTypeIsGC(type) || varTypeIsSIMD(type)) + { + return GetInitPattern() == 0; + } - //------------------------------------------------------------------------ - // CopyBetweenReplacements: - // Add an entry specifying to copy from a replacement into another replacement. - // - // Parameters: - // dstRep - The destination replacement. - // srcRep - The source replacement. - // offset - The offset this covers in the struct copy. - // type - The type of copy. - // - void CopyBetweenReplacements(Replacement* dstRep, Replacement* srcRep, unsigned offset) - { - m_entries.Push(Entry{dstRep->LclNum, dstRep, srcRep->LclNum, srcRep, offset, dstRep->AccessType}); - } + return true; + } - //------------------------------------------------------------------------ - // CopyBetweenReplacements: - // Add an entry specifying to copy from a promoted field into a replacement. - // - // Parameters: - // dstRep - The destination replacement. - // srcLcl - Local number of regularly promoted source field. - // offset - The offset this covers in the struct copy. - // type - The type of copy. - // - // Remarks: - // Used when the source local is a regular promoted field. - // - void CopyBetweenReplacements(Replacement* dstRep, unsigned srcLcl, unsigned offset) - { - m_entries.Push(Entry{dstRep->LclNum, dstRep, srcLcl, nullptr, offset, dstRep->AccessType}); - } - - //------------------------------------------------------------------------ - // CopyBetweenReplacements: - // Add an entry specifying to copy from a promoted field into a replacement. - // - // Parameters: - // dstRep - The destination replacement. - // srcLcl - Local number of regularly promoted source field. - // offset - The offset this covers in the struct copy. - // type - The type of copy. - // - // Remarks: - // Used when the source local is a regular promoted field. - // - void CopyBetweenReplacements(unsigned dstLcl, Replacement* srcRep, unsigned offset) - { - m_entries.Push(Entry{dstLcl, nullptr, srcRep->LclNum, srcRep, offset, srcRep->AccessType}); - } - - //------------------------------------------------------------------------ - // CopyToReplacement: - // Add an entry specifying to copy from the source into a replacement local. - // - // Parameters: - // dstLcl - The destination local to write. - // offset - The relative offset into the source. - // type - The type of copy. - // - void CopyToReplacement(Replacement* dstRep, unsigned offset) - { - m_entries.Push(Entry{dstRep->LclNum, dstRep, BAD_VAR_NUM, nullptr, offset, dstRep->AccessType}); - } - - //------------------------------------------------------------------------ - // CopyFromReplacement: - // Add an entry specifying to copy from a replacement local into the destination. - // - // Parameters: - // srcLcl - The source local to copy from. - // offset - The relative offset into the destination to write. - // type - The type of copy. - // - void CopyFromReplacement(Replacement* srcRep, unsigned offset) - { - m_entries.Push(Entry{BAD_VAR_NUM, nullptr, srcRep->LclNum, srcRep, offset, srcRep->AccessType}); - } - - //------------------------------------------------------------------------ - // CopyFromReplacement: - // Add an entry specifying to copy from a replacement local into the destination. - // - // Parameters: - // srcLcl - The source local to copy from. - // offset - The relative offset into the destination to write. - // type - The type of copy. - // - void CopyFromReplacement(unsigned srcLcl, unsigned offset, var_types type) - { - m_entries.Push(Entry{BAD_VAR_NUM, nullptr, srcLcl, nullptr, offset, type}); - } - - //------------------------------------------------------------------------ - // InitReplacement: - // Add an entry specifying that a specified replacement local should be - // constant initialized. - // - // Parameters: - // dstLcl - The destination local. - // offset - The offset covered by this initialization. - // type - The type to initialize. - // - void InitReplacement(Replacement* dstRep, unsigned offset) - { - m_entries.Push(Entry{dstRep->LclNum, dstRep, BAD_VAR_NUM, nullptr, offset, dstRep->AccessType}); - } - - //------------------------------------------------------------------------ - // Finalize: - // Create IR to perform the full decomposed struct copy as specified by - // the entries that were added to the decomposition plan. Add the - // statements to the specified list. - // - // Parameters: - // statements - The list of statements to add to. - // - void Finalize(DecompositionStatementList* statements) - { - if (IsInit()) - { - FinalizeInit(statements); - } - else - { - FinalizeCopy(statements); - } - } - - //------------------------------------------------------------------------ - // CanInitPrimitive: - // Check if we can handle initializing a primitive of the specified type. - // For example, we cannot directly initialize SIMD types to non-zero - // constants. - // - // Parameters: - // type - The primitive type - // - // Returns: - // True if so. - // - bool CanInitPrimitive(var_types type) - { - assert(IsInit()); - if (varTypeIsGC(type) || varTypeIsSIMD(type)) - { - return GetInitPattern() == 0; - } - - return true; - } - -private: - //------------------------------------------------------------------------ - // IsInit: - // Check if this is an init block operation. - // - // Returns: - // True if so. - // - bool IsInit() - { - return m_src->IsConstInitVal(); - } - - //------------------------------------------------------------------------ - // GetInitPattern: - // For an init block operation, get the pattern to init with. - // - // Returns: - // Byte pattern broadcast into every byte of a 64-bit int. - // - int64_t GetInitPattern() - { - assert(IsInit()); - GenTree* cns = m_src->OperIsInitVal() ? m_src->gtGetOp1() : m_src; - int64_t pattern = int64_t(cns->AsIntCon()->IconValue() & 0xFF) * 0x0101010101010101LL; - return pattern; - } - - //------------------------------------------------------------------------ - // ComputeRemainder: - // Compute the remainder of the block operation that needs to be inited - // or copied after the replacements stored in the plan have been handled. - // - // Returns: - // Segments representing the remainder. - // - // Remarks: - // This function takes into account that insignificant padding does not - // need to be considered part of the remainder. For example, the last 4 - // bytes of Span on 64-bit are not returned as the remainder. - // - StructSegments ComputeRemainder() - { - ClassLayout* dstLayout = m_dst->GetLayout(m_compiler); - - COMP_HANDLE compHnd = m_compiler->info.compCompHnd; - - bool significantPadding; - if (dstLayout->IsBlockLayout()) - { - significantPadding = true; - JITDUMP(" Block op has significant padding due to block layout\n"); - } - else - { - uint32_t attribs = compHnd->getClassAttribs(dstLayout->GetClassHandle()); - if ((attribs & CORINFO_FLG_INDEXABLE_FIELDS) != 0) + private: + //------------------------------------------------------------------------ + // IsInit: + // Check if this is an init block operation. + // + // Returns: + // True if so. + // + bool IsInit() + { + return m_src->IsConstInitVal(); + } + + //------------------------------------------------------------------------ + // GetInitPattern: + // For an init block operation, get the pattern to init with. + // + // Returns: + // Byte pattern broadcast into every byte of a 64-bit int. + // + int64_t GetInitPattern() + { + assert(IsInit()); + GenTree* cns = m_src->OperIsInitVal() ? m_src->gtGetOp1() : m_src; + int64_t pattern = int64_t(cns->AsIntCon()->IconValue() & 0xFF) * 0x0101010101010101LL; + return pattern; + } + + //------------------------------------------------------------------------ + // ComputeRemainder: + // Compute the remainder of the block operation that needs to be inited + // or copied after the replacements stored in the plan have been handled. + // + // Returns: + // Segments representing the remainder. + // + // Remarks: + // This function takes into account that insignificant padding does not + // need to be considered part of the remainder. For example, the last 4 + // bytes of Span on 64-bit are not returned as the remainder. + // + StructSegments ComputeRemainder() + { + ClassLayout* dstLayout = m_dst->GetLayout(m_compiler); + + COMP_HANDLE compHnd = m_compiler->info.compCompHnd; + + bool significantPadding; + if (dstLayout->IsBlockLayout()) { significantPadding = true; - JITDUMP(" Block op has significant padding due to indexable fields\n"); - } - else if ((attribs & CORINFO_FLG_DONT_DIG_FIELDS) != 0) - { - significantPadding = true; - JITDUMP(" Block op has significant padding due to CORINFO_FLG_DONT_DIG_FIELDS\n"); - } - else if (((attribs & CORINFO_FLG_CUSTOMLAYOUT) != 0) && ((attribs & CORINFO_FLG_CONTAINS_GC_PTR) == 0)) - { - significantPadding = true; - JITDUMP(" Block op has significant padding due to CUSTOMLAYOUT without GC pointers\n"); + JITDUMP(" Block op has significant padding due to block layout\n"); } else { - significantPadding = false; + uint32_t attribs = compHnd->getClassAttribs(dstLayout->GetClassHandle()); + if ((attribs & CORINFO_FLG_INDEXABLE_FIELDS) != 0) + { + significantPadding = true; + JITDUMP(" Block op has significant padding due to indexable fields\n"); + } + else if ((attribs & CORINFO_FLG_DONT_DIG_FIELDS) != 0) + { + significantPadding = true; + JITDUMP(" Block op has significant padding due to CORINFO_FLG_DONT_DIG_FIELDS\n"); + } + else if (((attribs & CORINFO_FLG_CUSTOMLAYOUT) != 0) && ((attribs & CORINFO_FLG_CONTAINS_GC_PTR) == 0)) + { + significantPadding = true; + JITDUMP(" Block op has significant padding due to CUSTOMLAYOUT without GC pointers\n"); + } + else + { + significantPadding = false; + } } - } - StructSegments segments(m_compiler->getAllocator(CMK_Promotion)); + StructSegments segments(m_compiler->getAllocator(CMK_Promotion)); - // Validate with "obviously correct" but less scalable fixed bit vector implementation. - INDEBUG(FixedBitVect* segmentBitVect = FixedBitVect::bitVectInit(dstLayout->GetSize(), m_compiler)); + // Validate with "obviously correct" but less scalable fixed bit vector implementation. + INDEBUG(FixedBitVect* segmentBitVect = FixedBitVect::bitVectInit(dstLayout->GetSize(), m_compiler)); - if (significantPadding) - { - segments.Add(StructSegments::Segment(0, dstLayout->GetSize())); + if (significantPadding) + { + segments.Add(StructSegments::Segment(0, dstLayout->GetSize())); -#ifdef DEBUG - for (unsigned i = 0; i < dstLayout->GetSize(); i++) - segmentBitVect->bitVectSet(i); -#endif - } - else - { - unsigned numFields = compHnd->getClassNumInstanceFields(dstLayout->GetClassHandle()); - for (unsigned i = 0; i < numFields; i++) + #ifdef DEBUG + for (unsigned i = 0; i < dstLayout->GetSize(); i++) + segmentBitVect->bitVectSet(i); + #endif + } + else { - CORINFO_FIELD_HANDLE fieldHnd = compHnd->getFieldInClass(dstLayout->GetClassHandle(), (int)i); - unsigned fldOffset = compHnd->getFieldOffset(fieldHnd); - CORINFO_CLASS_HANDLE fieldClassHandle; - CorInfoType corType = compHnd->getFieldType(fieldHnd, &fieldClassHandle); - var_types varType = JITtype2varType(corType); - unsigned size = genTypeSize(varType); - if (size == 0) + unsigned numFields = compHnd->getClassNumInstanceFields(dstLayout->GetClassHandle()); + for (unsigned i = 0; i < numFields; i++) { - // TODO-CQ: Recursively handle padding in sub structures - // here. Might be better to introduce a single JIT-EE call - // to query the significant segments -- that would also be - // usable by R2R even outside the version bubble in many - // cases. - size = compHnd->getClassSize(fieldClassHandle); - assert(size != 0); - } + CORINFO_FIELD_HANDLE fieldHnd = compHnd->getFieldInClass(dstLayout->GetClassHandle(), (int)i); + unsigned fldOffset = compHnd->getFieldOffset(fieldHnd); + CORINFO_CLASS_HANDLE fieldClassHandle; + CorInfoType corType = compHnd->getFieldType(fieldHnd, &fieldClassHandle); + var_types varType = JITtype2varType(corType); + unsigned size = genTypeSize(varType); + if (size == 0) + { + // TODO-CQ: Recursively handle padding in sub structures + // here. Might be better to introduce a single JIT-EE call + // to query the significant segments -- that would also be + // usable by R2R even outside the version bubble in many + // cases. + size = compHnd->getClassSize(fieldClassHandle); + assert(size != 0); + } - segments.Add(StructSegments::Segment(fldOffset, fldOffset + size)); -#ifdef DEBUG - for (unsigned i = 0; i < size; i++) - segmentBitVect->bitVectSet(fldOffset + i); -#endif + segments.Add(StructSegments::Segment(fldOffset, fldOffset + size)); + #ifdef DEBUG + for (unsigned i = 0; i < size; i++) + segmentBitVect->bitVectSet(fldOffset + i); + #endif + } } - } - // TODO-TP: Cache above StructSegments per class layout and just clone - // it there before the following subtract operations. + // TODO-TP: Cache above StructSegments per class layout and just clone + // it there before the following subtract operations. - for (int i = 0; i < m_entries.Height(); i++) - { - const Entry& entry = m_entries.BottomRef(i); + for (int i = 0; i < m_entries.Height(); i++) + { + const Entry& entry = m_entries.BottomRef(i); - segments.Subtract(StructSegments::Segment(entry.Offset, entry.Offset + genTypeSize(entry.Type))); + segments.Subtract(StructSegments::Segment(entry.Offset, entry.Offset + genTypeSize(entry.Type))); -#ifdef DEBUG - for (unsigned i = 0; i < genTypeSize(entry.Type); i++) - segmentBitVect->bitVectClear(entry.Offset + i); -#endif - } + #ifdef DEBUG + for (unsigned i = 0; i < genTypeSize(entry.Type); i++) + segmentBitVect->bitVectClear(entry.Offset + i); + #endif + } -#ifdef DEBUG - segments.Check(segmentBitVect); + #ifdef DEBUG + segments.Check(segmentBitVect); - if (m_compiler->verbose) - { - printf(" Remainder: "); - segments.Dump(); - printf("\n"); - } -#endif + if (m_compiler->verbose) + { + printf(" Remainder: "); + segments.Dump(); + printf("\n"); + } + #endif - return segments; - } + return segments; + } - // Represents the strategy for handling the remainder part of the block - // operation. - struct RemainderStrategy - { - enum + // Represents the strategy for handling the remainder part of the block + // operation. + struct RemainderStrategy { - NoRemainder, - Primitive, - FullBlock, + enum + { + NoRemainder, + Primitive, + FullBlock, + }; + + int Type; + unsigned PrimitiveOffset; + var_types PrimitiveType; + + RemainderStrategy(int type, unsigned primitiveOffset = 0, var_types primitiveType = TYP_UNDEF) + : Type(type), PrimitiveOffset(primitiveOffset), PrimitiveType(primitiveType) + { + } }; - int Type; - unsigned PrimitiveOffset; - var_types PrimitiveType; + //------------------------------------------------------------------------ + // DetermineRemainderStrategy: + // Determine the strategy to use to handle the remaining parts of the struct + // once replacements have been handled. + // + // Returns: + // Type describing how it should be handled; for example, by a full block + // copy (that may be redundant with some of the replacements, but covers + // the rest of the remainder); or by handling a specific 'hole' as a + // primitive. + // + RemainderStrategy DetermineRemainderStrategy() + { + StructSegments remainder = ComputeRemainder(); + if (remainder.IsEmpty()) + { + JITDUMP(" => Remainder strategy: do nothing\n"); + return RemainderStrategy(RemainderStrategy::NoRemainder); + } + + StructSegments::Segment segment; + // See if we can "plug the hole" with a single primitive. + // TODO-CQ: Why does doing this for LCL_VAR result in so many regressions? + // TODO-CQ: Once we have liveness we can unlock this for LCL_VARs. + if (remainder.IsSingleSegment(&segment)) + { + var_types primitiveType = TYP_UNDEF; + unsigned size = segment.End - segment.Start; + switch (size) + { + case 1: + primitiveType = TYP_UBYTE; + break; + case 2: + primitiveType = TYP_USHORT; + break; + #ifdef TARGET_64BIT + case 4: + primitiveType = TYP_INT; + break; + #endif + case TARGET_POINTER_SIZE: + primitiveType = TYP_I_IMPL; + if ((segment.Start % TARGET_POINTER_SIZE) == 0) + { + ClassLayout* dstLayout = m_dst->GetLayout(m_compiler); + primitiveType = dstLayout->GetGCPtrType(segment.Start / TARGET_POINTER_SIZE); + } + break; - RemainderStrategy(int type, unsigned primitiveOffset = 0, var_types primitiveType = TYP_UNDEF) - : Type(type), PrimitiveOffset(primitiveOffset), PrimitiveType(primitiveType) - { - } - }; + // TODO-CQ: SIMD sizes + } - //------------------------------------------------------------------------ - // DetermineRemainderStrategy: - // Determine the strategy to use to handle the remaining parts of the struct - // once replacements have been handled. - // - // Returns: - // Type describing how it should be handled; for example, by a full block - // copy (that may be redundant with some of the replacements, but covers - // the rest of the remainder); or by handling a specific 'hole' as a - // primitive. - // - RemainderStrategy DetermineRemainderStrategy() - { - StructSegments remainder = ComputeRemainder(); - if (remainder.IsEmpty()) - { - JITDUMP(" => Remainder strategy: do nothing\n"); - return RemainderStrategy(RemainderStrategy::NoRemainder); + if (primitiveType != TYP_UNDEF) + { + if (!IsInit() || CanInitPrimitive(primitiveType)) + { + JITDUMP(" => Remainder strategy: %s at %03u\n", varTypeName(primitiveType), segment.Start); + return RemainderStrategy(RemainderStrategy::Primitive, segment.Start, primitiveType); + } + else + { + JITDUMP(" Cannot handle initing remainder as primitive of type %s\n", varTypeName(primitiveType)); + } + } + } + + JITDUMP(" => Remainder strategy: retain a full block op\n"); + return RemainderStrategy(RemainderStrategy::FullBlock); } - StructSegments::Segment segment; - // See if we can "plug the hole" with a single primitive. - // TODO-CQ: Why does doing this for LCL_VAR result in so many regressions? - // TODO-CQ: Once we have liveness we can unlock this for LCL_VARs. - if (remainder.IsSingleSegment(&segment)) + //------------------------------------------------------------------------ + // FinalizeInit: + // Create IR to perform the decomposed initialization. + // + // Parameters: + // statements - List to add statements to. + // + void FinalizeInit(DecompositionStatementList* statements) { - var_types primitiveType = TYP_UNDEF; - unsigned size = segment.End - segment.Start; - switch (size) + GenTree* cns = m_src->OperIsInitVal() ? m_src->gtGetOp1() : m_src; + int64_t initPattern = GetInitPattern(); + + for (int i = 0; i < m_entries.Height(); i++) { - case 1: - primitiveType = TYP_UBYTE; - break; - case 2: - primitiveType = TYP_USHORT; - break; -#ifdef TARGET_64BIT - case 4: - primitiveType = TYP_INT; - break; -#endif - case TARGET_POINTER_SIZE: - primitiveType = TYP_I_IMPL; - if ((segment.Start % TARGET_POINTER_SIZE) == 0) - { - ClassLayout* dstLayout = m_dst->GetLayout(m_compiler); - primitiveType = dstLayout->GetGCPtrType(segment.Start / TARGET_POINTER_SIZE); - } - break; + const Entry& entry = m_entries.BottomRef(i); - // TODO-CQ: SIMD sizes + assert(entry.ToLclNum != BAD_VAR_NUM); + GenTree* src = CreateInitValue(entry.Type, initPattern); + GenTree* dst = m_compiler->gtNewLclvNode(entry.ToLclNum, entry.Type); + statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); } - if (primitiveType != TYP_UNDEF) + RemainderStrategy remainderStrategy = DetermineRemainderStrategy(); + if (remainderStrategy.Type == RemainderStrategy::FullBlock) + { + GenTree* asg = m_compiler->gtNewBlkOpNode(m_dst, cns); + statements->AddStatement(asg); + } + else if (remainderStrategy.Type == RemainderStrategy::Primitive) { - if (!IsInit() || CanInitPrimitive(primitiveType)) + GenTree* src = CreateInitValue(remainderStrategy.PrimitiveType, initPattern); + GenTreeLclVarCommon* dstLcl = m_dst->AsLclVarCommon(); + GenTree* dst = m_compiler->gtNewLclFldNode(dstLcl->GetLclNum(), remainderStrategy.PrimitiveType, + dstLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); + m_compiler->lvaSetVarDoNotEnregister(dstLcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); + statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); + } + } + + //------------------------------------------------------------------------ + // CreateInitValue: + // Create an IR node representing a constant value with the specified init pattern. + // + // Parameters: + // type - The primitive type + // initPattern - Pattern to init with + // + // Returns: + // A constant. + // + // Remarks: + // Should only be called when that pattern can actually be represented; + // for example, SIMD types and GC pointers only support an init pattern + // of zero. + // + GenTree* CreateInitValue(var_types type, int64_t initPattern) + { + switch (type) + { + case TYP_BOOL: + case TYP_BYTE: + case TYP_UBYTE: + case TYP_SHORT: + case TYP_USHORT: + case TYP_INT: { - JITDUMP(" => Remainder strategy: %s at %03u\n", varTypeName(primitiveType), segment.Start); - return RemainderStrategy(RemainderStrategy::Primitive, segment.Start, primitiveType); + int64_t mask = (int64_t(1) << (genTypeSize(type) * 8)) - 1; + return m_compiler->gtNewIconNode(static_cast(initPattern & mask)); } - else + case TYP_LONG: + return m_compiler->gtNewLconNode(initPattern); + case TYP_FLOAT: + float floatPattern; + memcpy(&floatPattern, &initPattern, sizeof(floatPattern)); + return m_compiler->gtNewDconNode(floatPattern, TYP_FLOAT); + case TYP_DOUBLE: + double doublePattern; + memcpy(&doublePattern, &initPattern, sizeof(doublePattern)); + return m_compiler->gtNewDconNode(doublePattern); + case TYP_REF: + case TYP_BYREF: + #ifdef FEATURE_SIMD + case TYP_SIMD8: + case TYP_SIMD12: + case TYP_SIMD16: + #if defined(TARGET_XARCH) + case TYP_SIMD32: + case TYP_SIMD64: + #endif // TARGET_XARCH + #endif // FEATURE_SIMD { - JITDUMP(" Cannot handle initing remainder as primitive of type %s\n", varTypeName(primitiveType)); + assert(initPattern == 0); + return m_compiler->gtNewZeroConNode(type); } + default: + unreached(); } } - JITDUMP(" => Remainder strategy: retain a full block op\n"); - return RemainderStrategy(RemainderStrategy::FullBlock); - } - - //------------------------------------------------------------------------ - // FinalizeInit: - // Create IR to perform the decomposed initialization. - // - // Parameters: - // statements - List to add statements to. - // - void FinalizeInit(DecompositionStatementList* statements) - { - GenTree* cns = m_src->OperIsInitVal() ? m_src->gtGetOp1() : m_src; - int64_t initPattern = GetInitPattern(); - - for (int i = 0; i < m_entries.Height(); i++) + //------------------------------------------------------------------------ + // FinalizeCopy: + // Create IR to perform the decomposed copy. + // + // Parameters: + // statements - List to add statements to. + // + void FinalizeCopy(DecompositionStatementList* statements) { - const Entry& entry = m_entries.BottomRef(i); + assert(m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD) && + m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD)); - assert(entry.ToLclNum != BAD_VAR_NUM); - GenTree* src = CreateInitValue(entry.Type, initPattern); - GenTree* dst = m_compiler->gtNewLclvNode(entry.ToLclNum, entry.Type); - statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); - } - - RemainderStrategy remainderStrategy = DetermineRemainderStrategy(); - if (remainderStrategy.Type == RemainderStrategy::FullBlock) - { - GenTree* asg = m_compiler->gtNewBlkOpNode(m_dst, cns); - statements->AddStatement(asg); - } - else if (remainderStrategy.Type == RemainderStrategy::Primitive) - { - GenTree* src = CreateInitValue(remainderStrategy.PrimitiveType, initPattern); - GenTreeLclVarCommon* dstLcl = m_dst->AsLclVarCommon(); - GenTree* dst = m_compiler->gtNewLclFldNode(dstLcl->GetLclNum(), remainderStrategy.PrimitiveType, - dstLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); - m_compiler->lvaSetVarDoNotEnregister(dstLcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); - statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); - } - } + RemainderStrategy remainderStrategy = DetermineRemainderStrategy(); - //------------------------------------------------------------------------ - // CreateInitValue: - // Create an IR node representing a constant value with the specified init pattern. - // - // Parameters: - // type - The primitive type - // initPattern - Pattern to init with - // - // Returns: - // A constant. - // - // Remarks: - // Should only be called when that pattern can actually be represented; - // for example, SIMD types and GC pointers only support an init pattern - // of zero. - // - GenTree* CreateInitValue(var_types type, int64_t initPattern) - { - switch (type) - { - case TYP_BOOL: - case TYP_BYTE: - case TYP_UBYTE: - case TYP_SHORT: - case TYP_USHORT: - case TYP_INT: - { - int64_t mask = (int64_t(1) << (genTypeSize(type) * 8)) - 1; - return m_compiler->gtNewIconNode(static_cast(initPattern & mask)); - } - case TYP_LONG: - return m_compiler->gtNewLconNode(initPattern); - case TYP_FLOAT: - float floatPattern; - memcpy(&floatPattern, &initPattern, sizeof(floatPattern)); - return m_compiler->gtNewDconNode(floatPattern, TYP_FLOAT); - case TYP_DOUBLE: - double doublePattern; - memcpy(&doublePattern, &initPattern, sizeof(doublePattern)); - return m_compiler->gtNewDconNode(doublePattern); - case TYP_REF: - case TYP_BYREF: -#ifdef FEATURE_SIMD - case TYP_SIMD8: - case TYP_SIMD12: - case TYP_SIMD16: -#if defined(TARGET_XARCH) - case TYP_SIMD32: - case TYP_SIMD64: -#endif // TARGET_XARCH -#endif // FEATURE_SIMD - { - assert(initPattern == 0); - return m_compiler->gtNewZeroConNode(type); - } - default: - unreached(); - } - } - - //------------------------------------------------------------------------ - // FinalizeCopy: - // Create IR to perform the decomposed copy. - // - // Parameters: - // statements - List to add statements to. - // - void FinalizeCopy(DecompositionStatementList* statements) - { - assert(m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD) && - m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD)); - - RemainderStrategy remainderStrategy = DetermineRemainderStrategy(); - - // If the remainder is a full block and is going to incur write barrier - // then avoid incurring multiple write barriers for each source - // replacement that is a GC pointer -- write them back to the struct - // first instead. - if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && m_dst->OperIs(GT_BLK, GT_FIELD) && - m_dst->GetLayout(m_compiler)->HasGCPtr()) - { - for (int i = 0; i < m_entries.Height(); i++) + // If the remainder is a full block and is going to incur write barrier + // then avoid incurring multiple write barriers for each source + // replacement that is a GC pointer -- write them back to the struct + // first instead. + if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && m_dst->OperIs(GT_BLK, GT_FIELD) && + m_dst->GetLayout(m_compiler)->HasGCPtr()) { - const Entry& entry = m_entries.BottomRef(i); - // TODO: Double check that TYP_BYREF do not incur any write barriers. - if ((entry.FromReplacement != nullptr) && (entry.Type == TYP_REF)) + for (int i = 0; i < m_entries.Height(); i++) { - Replacement* rep = entry.FromReplacement; - if (rep->NeedsWriteBack) + const Entry& entry = m_entries.BottomRef(i); + // TODO: Double check that TYP_BYREF do not incur any write barriers. + if ((entry.FromReplacement != nullptr) && (entry.Type == TYP_REF)) { - statements->AddStatement( - CreateWriteBack(m_compiler, m_src->AsLclVarCommon()->GetLclNum(), *rep)); - JITDUMP(" Will write back V%02u (%s) to avoid an additional write barrier\n", rep->LclNum, - rep->Description); + Replacement* rep = entry.FromReplacement; + if (rep->NeedsWriteBack) + { + statements->AddStatement( + CreateWriteBack(m_compiler, m_src->AsLclVarCommon()->GetLclNum(), *rep)); + JITDUMP(" Will write back V%02u (%s) to avoid an additional write barrier\n", rep->LclNum, + rep->Description); - rep->NeedsWriteBack = false; + rep->NeedsWriteBack = false; + } } } } - } - GenTree* addr = nullptr; - unsigned addrBaseOffs = 0; - GenTreeFlags indirFlags = GTF_EMPTY; - - if (m_dst->OperIs(GT_BLK, GT_FIELD)) - { - addr = m_dst->gtGetOp1(); + GenTree* addr = nullptr; + unsigned addrBaseOffs = 0; + GenTreeFlags indirFlags = GTF_EMPTY; - if (m_dst->OperIs(GT_FIELD)) + if (m_dst->OperIs(GT_BLK, GT_FIELD)) { - addrBaseOffs = m_dst->AsField()->gtFldOffset; - } + addr = m_dst->gtGetOp1(); - indirFlags = GetPropagatedIndirFlags(m_dst); - } - else if (m_src->OperIs(GT_BLK, GT_FIELD)) - { - addr = m_src->gtGetOp1(); + if (m_dst->OperIs(GT_FIELD)) + { + addrBaseOffs = m_dst->AsField()->gtFldOffset; + } - if (m_src->OperIs(GT_FIELD)) - { - addrBaseOffs = m_src->AsField()->gtFldOffset; + indirFlags = GetPropagatedIndirFlags(m_dst); } + else if (m_src->OperIs(GT_BLK, GT_FIELD)) + { + addr = m_src->gtGetOp1(); - indirFlags = GetPropagatedIndirFlags(m_src); - } + if (m_src->OperIs(GT_FIELD)) + { + addrBaseOffs = m_src->AsField()->gtFldOffset; + } + + indirFlags = GetPropagatedIndirFlags(m_src); + } - int numAddrUses = 0; + int numAddrUses = 0; - if (addr != nullptr) - { - for (int i = 0; i < m_entries.Height(); i++) + if (addr != nullptr) { - if (!IsHandledByRemainder(m_entries.BottomRef(i), remainderStrategy)) + for (int i = 0; i < m_entries.Height(); i++) { - numAddrUses++; + if (!IsHandledByRemainder(m_entries.BottomRef(i), remainderStrategy)) + { + numAddrUses++; + } } - } - if (remainderStrategy.Type != RemainderStrategy::NoRemainder) - { - numAddrUses++; + if (remainderStrategy.Type != RemainderStrategy::NoRemainder) + { + numAddrUses++; + } } - } - bool needsNullCheck = false; - if ((addr != nullptr) && m_compiler->fgAddrCouldBeNull(addr)) - { - switch (remainderStrategy.Type) + bool needsNullCheck = false; + if ((addr != nullptr) && m_compiler->fgAddrCouldBeNull(addr)) { - case RemainderStrategy::NoRemainder: - case RemainderStrategy::Primitive: - needsNullCheck = true; - // See if our first indirection will subsume the null check (usual case). - for (int i = 0; i < m_entries.Height(); i++) - { - if (IsHandledByRemainder(m_entries.BottomRef(i), remainderStrategy)) + switch (remainderStrategy.Type) + { + case RemainderStrategy::NoRemainder: + case RemainderStrategy::Primitive: + needsNullCheck = true; + // See if our first indirection will subsume the null check (usual case). + for (int i = 0; i < m_entries.Height(); i++) { - continue; - } + if (IsHandledByRemainder(m_entries.BottomRef(i), remainderStrategy)) + { + continue; + } - const Entry& entry = m_entries.BottomRef(0); + const Entry& entry = m_entries.BottomRef(0); - assert((entry.FromLclNum == BAD_VAR_NUM) || (entry.ToLclNum == BAD_VAR_NUM)); - needsNullCheck = m_compiler->fgIsBigOffset(addrBaseOffs + entry.Offset); - } - break; + assert((entry.FromLclNum == BAD_VAR_NUM) || (entry.ToLclNum == BAD_VAR_NUM)); + needsNullCheck = m_compiler->fgIsBigOffset(addrBaseOffs + entry.Offset); + } + break; + } } - } - if (needsNullCheck) - { - numAddrUses++; - } - - if ((addr != nullptr) && (numAddrUses > 1)) - { - if (addr->OperIsLocal() && (!m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD) || - (addr->AsLclVarCommon()->GetLclNum() != m_dst->AsLclVarCommon()->GetLclNum()))) + if (needsNullCheck) { - // We will introduce more uses of the address local, so it is - // no longer dying here. - addr->gtFlags &= ~GTF_VAR_DEATH; - } - else if (addr->IsInvariant()) - { - // Fall through + numAddrUses++; } - else + + if ((addr != nullptr) && (numAddrUses > 1)) { - unsigned addrLcl = m_compiler->lvaGrabTemp(true DEBUGARG("Spilling address for field-by-field copy")); - statements->AddStatement(m_compiler->gtNewTempAssign(addrLcl, addr)); - addr = m_compiler->gtNewLclvNode(addrLcl, addr->TypeGet()); - UpdateEarlyRefCount(m_compiler, addr); + if (addr->OperIsLocal() && (!m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD) || + (addr->AsLclVarCommon()->GetLclNum() != m_dst->AsLclVarCommon()->GetLclNum()))) + { + // We will introduce more uses of the address local, so it is + // no longer dying here. + addr->gtFlags &= ~GTF_VAR_DEATH; + } + else if (addr->IsInvariant()) + { + // Fall through + } + else + { + unsigned addrLcl = m_compiler->lvaGrabTemp(true DEBUGARG("Spilling address for field-by-field copy")); + statements->AddStatement(m_compiler->gtNewTempAssign(addrLcl, addr)); + addr = m_compiler->gtNewLclvNode(addrLcl, addr->TypeGet()); + UpdateEarlyRefCount(m_compiler, addr); + } } - } - auto grabAddr = [&numAddrUses, addr, this](unsigned offs) { - assert(numAddrUses > 0); - numAddrUses--; + auto grabAddr = [&numAddrUses, addr, this](unsigned offs) { + assert(numAddrUses > 0); + numAddrUses--; - GenTree* addrUse; - if (numAddrUses == 0) - { - // Last use of the address, reuse the node. - addrUse = addr; - } - else + GenTree* addrUse; + if (numAddrUses == 0) + { + // Last use of the address, reuse the node. + addrUse = addr; + } + else + { + addrUse = m_compiler->gtCloneExpr(addr); + UpdateEarlyRefCount(m_compiler, addrUse); + } + + if (offs != 0) + { + var_types addrType = varTypeIsGC(addrUse) ? TYP_BYREF : TYP_I_IMPL; + addrUse = m_compiler->gtNewOperNode(GT_ADD, addrType, addrUse, + m_compiler->gtNewIconNode((ssize_t)offs, TYP_I_IMPL)); + } + + return addrUse; + }; + + if (remainderStrategy.Type == RemainderStrategy::FullBlock) { - addrUse = m_compiler->gtCloneExpr(addr); - UpdateEarlyRefCount(m_compiler, addrUse); + // We will reuse the existing block op's operands. Rebase the + // address off of the new local we created. + if (m_src->OperIs(GT_BLK, GT_FIELD)) + { + // Note that we should use 0 instead of addrBaseOffs here + // since this ends up as the address of the GT_FIELD node + // that already has the field offset. + m_src->AsUnOp()->gtOp1 = grabAddr(0); + } + else if (m_dst->OperIs(GT_BLK, GT_FIELD)) + { + // Like above, use 0 intentionally here. + m_dst->AsUnOp()->gtOp1 = grabAddr(0); + } } - if (offs != 0) + // If the source involves replacements then do the struct op first -- + // otherwise we would overwrite the destination with stale bits. + // If the source does not involve replacements then CQ analysis shows + // that it's best to do it last. + if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && m_srcInvolvesReplacements) { - var_types addrType = varTypeIsGC(addrUse) ? TYP_BYREF : TYP_I_IMPL; - addrUse = m_compiler->gtNewOperNode(GT_ADD, addrType, addrUse, - m_compiler->gtNewIconNode((ssize_t)offs, TYP_I_IMPL)); - } + statements->AddStatement(m_compiler->gtNewBlkOpNode(m_dst, m_src)); - return addrUse; - }; + if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + // We will introduce uses of the source below so this struct + // copy is no longer the last use if it was before. + m_src->gtFlags &= ~GTF_VAR_DEATH; + } + } - if (remainderStrategy.Type == RemainderStrategy::FullBlock) - { - // We will reuse the existing block op's operands. Rebase the - // address off of the new local we created. - if (m_src->OperIs(GT_BLK, GT_FIELD)) + if (needsNullCheck) { - // Note that we should use 0 instead of addrBaseOffs here - // since this ends up as the address of the GT_FIELD node - // that already has the field offset. - m_src->AsUnOp()->gtOp1 = grabAddr(0); + GenTreeIndir* indir = m_compiler->gtNewIndir(TYP_BYTE, grabAddr(addrBaseOffs)); + PropagateIndirFlags(indir, indirFlags); + statements->AddStatement(indir); } - else if (m_dst->OperIs(GT_BLK, GT_FIELD)) + + for (int i = 0; i < m_entries.Height(); i++) { - // Like above, use 0 intentionally here. - m_dst->AsUnOp()->gtOp1 = grabAddr(0); - } - } + const Entry& entry = m_entries.BottomRef(i); - // If the source involves replacements then do the struct op first -- - // otherwise we would overwrite the destination with stale bits. - // If the source does not involve replacements then CQ analysis shows - // that it's best to do it last. - if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && m_srcInvolvesReplacements) - { - statements->AddStatement(m_compiler->gtNewBlkOpNode(m_dst, m_src)); + if (IsHandledByRemainder(entry, remainderStrategy)) + { + JITDUMP(" Skipping dst+%03u <- V%02u (%s); it is up-to-date in its struct local and will be handled " + "as part of the remainder\n", + entry.Offset, entry.FromReplacement->LclNum, entry.FromReplacement->Description); + continue; + } - if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - // We will introduce uses of the source below so this struct - // copy is no longer the last use if it was before. - m_src->gtFlags &= ~GTF_VAR_DEATH; - } - } + GenTree* dst; + if (entry.ToLclNum != BAD_VAR_NUM) + { + dst = m_compiler->gtNewLclvNode(entry.ToLclNum, entry.Type); - if (needsNullCheck) - { - GenTreeIndir* indir = m_compiler->gtNewIndir(TYP_BYTE, grabAddr(addrBaseOffs)); - PropagateIndirFlags(indir, indirFlags); - statements->AddStatement(indir); - } + if (m_compiler->lvaGetDesc(entry.ToLclNum)->lvIsStructField) + UpdateEarlyRefCount(m_compiler, dst); + } + else + { + assert(entry.FromLclNum != BAD_VAR_NUM); - for (int i = 0; i < m_entries.Height(); i++) - { - const Entry& entry = m_entries.BottomRef(i); + if (m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + unsigned offs = m_dst->AsLclVarCommon()->GetLclOffs() + entry.Offset; + // Local morph ensures we do not see local indirs here that dereference beyond UINT16_MAX. + noway_assert(FitsIn(offs)); + dst = m_compiler->gtNewLclFldNode(m_dst->AsLclVarCommon()->GetLclNum(), entry.Type, offs); + m_compiler->lvaSetVarDoNotEnregister(m_dst->AsLclVarCommon()->GetLclNum() + DEBUGARG(DoNotEnregisterReason::LocalField)); + UpdateEarlyRefCount(m_compiler, dst); + } + else + { + GenTree* addr = grabAddr(addrBaseOffs + entry.Offset); + dst = m_compiler->gtNewIndir(entry.Type, addr); + PropagateIndirFlags(dst, indirFlags); + } + } - if (IsHandledByRemainder(entry, remainderStrategy)) - { - JITDUMP(" Skipping dst+%03u <- V%02u (%s); it is up-to-date in its struct local and will be handled " - "as part of the remainder\n", - entry.Offset, entry.FromReplacement->LclNum, entry.FromReplacement->Description); - continue; - } + GenTree* src; + if (entry.FromLclNum != BAD_VAR_NUM) + { + src = m_compiler->gtNewLclvNode(entry.FromLclNum, entry.Type); - GenTree* dst; - if (entry.ToLclNum != BAD_VAR_NUM) - { - dst = m_compiler->gtNewLclvNode(entry.ToLclNum, entry.Type); + if (m_compiler->lvaGetDesc(entry.FromLclNum)->lvIsStructField) + UpdateEarlyRefCount(m_compiler, src); + } + else + { + assert(entry.ToLclNum != BAD_VAR_NUM); + if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + unsigned offs = m_src->AsLclVarCommon()->GetLclOffs() + entry.Offset; + noway_assert(FitsIn(offs)); + src = m_compiler->gtNewLclFldNode(m_src->AsLclVarCommon()->GetLclNum(), entry.Type, offs); + m_compiler->lvaSetVarDoNotEnregister(m_src->AsLclVarCommon()->GetLclNum() + DEBUGARG(DoNotEnregisterReason::LocalField)); + UpdateEarlyRefCount(m_compiler, src); + } + else + { + GenTree* addr = grabAddr(addrBaseOffs + entry.Offset); + src = m_compiler->gtNewIndir(entry.Type, addr); + PropagateIndirFlags(src, indirFlags); + } + } - if (m_compiler->lvaGetDesc(entry.ToLclNum)->lvIsStructField) - UpdateEarlyRefCount(m_compiler, dst); + statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); } - else + + if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && !m_srcInvolvesReplacements) { - assert(entry.FromLclNum != BAD_VAR_NUM); + statements->AddStatement(m_compiler->gtNewBlkOpNode(m_dst, m_src)); + } + if (remainderStrategy.Type == RemainderStrategy::Primitive) + { + GenTree* dst; if (m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD)) { - unsigned offs = m_dst->AsLclVarCommon()->GetLclOffs() + entry.Offset; - // Local morph ensures we do not see local indirs here that dereference beyond UINT16_MAX. - noway_assert(FitsIn(offs)); - dst = m_compiler->gtNewLclFldNode(m_dst->AsLclVarCommon()->GetLclNum(), entry.Type, offs); - m_compiler->lvaSetVarDoNotEnregister(m_dst->AsLclVarCommon()->GetLclNum() - DEBUGARG(DoNotEnregisterReason::LocalField)); - UpdateEarlyRefCount(m_compiler, dst); + GenTreeLclVarCommon* dstLcl = m_dst->AsLclVarCommon(); + dst = m_compiler->gtNewLclFldNode(dstLcl->GetLclNum(), remainderStrategy.PrimitiveType, + dstLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); + m_compiler->lvaSetVarDoNotEnregister(dstLcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); } else { - GenTree* addr = grabAddr(addrBaseOffs + entry.Offset); - dst = m_compiler->gtNewIndir(entry.Type, addr); + dst = m_compiler->gtNewIndir(remainderStrategy.PrimitiveType, + grabAddr(addrBaseOffs + remainderStrategy.PrimitiveOffset)); PropagateIndirFlags(dst, indirFlags); } - } - GenTree* src; - if (entry.FromLclNum != BAD_VAR_NUM) - { - src = m_compiler->gtNewLclvNode(entry.FromLclNum, entry.Type); - - if (m_compiler->lvaGetDesc(entry.FromLclNum)->lvIsStructField) - UpdateEarlyRefCount(m_compiler, src); - } - else - { - assert(entry.ToLclNum != BAD_VAR_NUM); + GenTree* src; if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) { - unsigned offs = m_src->AsLclVarCommon()->GetLclOffs() + entry.Offset; - noway_assert(FitsIn(offs)); - src = m_compiler->gtNewLclFldNode(m_src->AsLclVarCommon()->GetLclNum(), entry.Type, offs); - m_compiler->lvaSetVarDoNotEnregister(m_src->AsLclVarCommon()->GetLclNum() - DEBUGARG(DoNotEnregisterReason::LocalField)); - UpdateEarlyRefCount(m_compiler, src); + GenTreeLclVarCommon* srcLcl = m_src->AsLclVarCommon(); + src = m_compiler->gtNewLclFldNode(srcLcl->GetLclNum(), remainderStrategy.PrimitiveType, + srcLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); + m_compiler->lvaSetVarDoNotEnregister(srcLcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); } else { - GenTree* addr = grabAddr(addrBaseOffs + entry.Offset); - src = m_compiler->gtNewIndir(entry.Type, addr); + src = m_compiler->gtNewIndir(remainderStrategy.PrimitiveType, + grabAddr(addrBaseOffs + remainderStrategy.PrimitiveOffset)); PropagateIndirFlags(src, indirFlags); } + + statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); } - statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); + assert(numAddrUses == 0); } - if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && !m_srcInvolvesReplacements) + bool IsHandledByRemainder(const Entry& entry, const RemainderStrategy& remainderStrategy) { - statements->AddStatement(m_compiler->gtNewBlkOpNode(m_dst, m_src)); + // If the remainder is being handled as a full block copy and this + // replacement is up-to-date in its struct local then we can skip + // copying the replacement explicitly. + return (remainderStrategy.Type == RemainderStrategy::FullBlock) && (entry.FromReplacement != nullptr) && + !entry.FromReplacement->NeedsWriteBack && (entry.ToLclNum == BAD_VAR_NUM); } - - if (remainderStrategy.Type == RemainderStrategy::Primitive) + //------------------------------------------------------------------------ + // GetPropagatedIndirFlags: + // Convert GT_BLK or GT_FIELD indir flags into flags that should be + // propagated to derived GT_IND nodes. + // + // Parameters: + // indir - The indirection + // + // Returns: + // Flags to propagate to created derived GT_IND nodes. + // + GenTreeFlags GetPropagatedIndirFlags(GenTree* indir) { - GenTree* dst; - if (m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - GenTreeLclVarCommon* dstLcl = m_dst->AsLclVarCommon(); - dst = m_compiler->gtNewLclFldNode(dstLcl->GetLclNum(), remainderStrategy.PrimitiveType, - dstLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); - m_compiler->lvaSetVarDoNotEnregister(dstLcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); - } - else - { - dst = m_compiler->gtNewIndir(remainderStrategy.PrimitiveType, - grabAddr(addrBaseOffs + remainderStrategy.PrimitiveOffset)); - PropagateIndirFlags(dst, indirFlags); - } - - GenTree* src; - if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - GenTreeLclVarCommon* srcLcl = m_src->AsLclVarCommon(); - src = m_compiler->gtNewLclFldNode(srcLcl->GetLclNum(), remainderStrategy.PrimitiveType, - srcLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); - m_compiler->lvaSetVarDoNotEnregister(srcLcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); - } - else + assert(indir->OperIs(GT_BLK, GT_FIELD)); + if (indir->OperIs(GT_BLK)) { - src = m_compiler->gtNewIndir(remainderStrategy.PrimitiveType, - grabAddr(addrBaseOffs + remainderStrategy.PrimitiveOffset)); - PropagateIndirFlags(src, indirFlags); + return indir->gtFlags & (GTF_IND_VOLATILE | GTF_IND_NONFAULTING | GTF_IND_UNALIGNED | GTF_IND_INITCLASS); } - statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); + static_assert_no_msg(GTF_FLD_VOLATILE == GTF_IND_VOLATILE); + return indir->gtFlags & GTF_IND_VOLATILE; } - assert(numAddrUses == 0); - } - - bool IsHandledByRemainder(const Entry& entry, const RemainderStrategy& remainderStrategy) - { - // If the remainder is being handled as a full block copy and this - // replacement is up-to-date in its struct local then we can skip - // copying the replacement explicitly. - return (remainderStrategy.Type == RemainderStrategy::FullBlock) && (entry.FromReplacement != nullptr) && - !entry.FromReplacement->NeedsWriteBack && (entry.ToLclNum == BAD_VAR_NUM); - } - //------------------------------------------------------------------------ - // GetPropagatedIndirFlags: - // Convert GT_BLK or GT_FIELD indir flags into flags that should be - // propagated to derived GT_IND nodes. - // - // Parameters: - // indir - The indirection - // - // Returns: - // Flags to propagate to created derived GT_IND nodes. - // - GenTreeFlags GetPropagatedIndirFlags(GenTree* indir) - { - assert(indir->OperIs(GT_BLK, GT_FIELD)); - if (indir->OperIs(GT_BLK)) + //------------------------------------------------------------------------ + // PropagateIndirFlags: + // Propagate the specified flags to a GT_IND node. + // + // Parameters: + // indir - The indirection to apply flags to + // flags - The specified indirection flags. + // + void PropagateIndirFlags(GenTree* indir, GenTreeFlags flags) { - return indir->gtFlags & (GTF_IND_VOLATILE | GTF_IND_NONFAULTING | GTF_IND_UNALIGNED | GTF_IND_INITCLASS); - } - - static_assert_no_msg(GTF_FLD_VOLATILE == GTF_IND_VOLATILE); - return indir->gtFlags & GTF_IND_VOLATILE; - } - - //------------------------------------------------------------------------ - // PropagateIndirFlags: - // Propagate the specified flags to a GT_IND node. - // - // Parameters: - // indir - The indirection to apply flags to - // flags - The specified indirection flags. - // - void PropagateIndirFlags(GenTree* indir, GenTreeFlags flags) - { - if (genTypeSize(indir) == 1) - { - flags &= ~GTF_IND_UNALIGNED; - } - - indir->gtFlags |= flags; - } - - //------------------------------------------------------------------------ - // UpdateEarlyRefCount: - // Update early ref counts if necessary for the specified IR node. - // - // Parameters: - // comp - compiler instance - // candidate - the IR node that may be a local that should have its early - // ref counts updated. - // - static void UpdateEarlyRefCount(Compiler* comp, GenTree* candidate) - { - if (!candidate->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_LCL_ADDR)) - { - return; - } - - IncrementRefCount(comp, candidate->AsLclVarCommon()->GetLclNum()); + if (genTypeSize(indir) == 1) + { + flags &= ~GTF_IND_UNALIGNED; + } - LclVarDsc* varDsc = comp->lvaGetDesc(candidate->AsLclVarCommon()); - if (varDsc->lvIsStructField) - { - IncrementRefCount(comp, varDsc->lvParentLcl); + indir->gtFlags |= flags; } - if (varDsc->lvPromoted) + //------------------------------------------------------------------------ + // UpdateEarlyRefCount: + // Update early ref counts if necessary for the specified IR node. + // + // Parameters: + // comp - compiler instance + // candidate - the IR node that may be a local that should have its early + // ref counts updated. + // + static void UpdateEarlyRefCount(Compiler* comp, GenTree* candidate) { - for (unsigned fldLclNum = varDsc->lvFieldLclStart; fldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; - fldLclNum++) + if (!candidate->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_LCL_ADDR)) { - IncrementRefCount(comp, fldLclNum); + return; } - } - } - - //------------------------------------------------------------------------ - // IncrementRefCount: - // Increment the ref count for the specified local. - // - // Parameters: - // comp - compiler instance - // lclNum - the local - // - static void IncrementRefCount(Compiler* comp, unsigned lclNum) - { - LclVarDsc* varDsc = comp->lvaGetDesc(lclNum); - varDsc->incLvRefCntSaturating(1, RCS_EARLY); - } -}; - -class ReplaceVisitor : public GenTreeVisitor -{ - Promotion* m_prom; - jitstd::vector** m_replacements; - bool m_madeChanges = false; - -public: - enum - { - DoPostOrder = true, - UseExecutionOrder = true, - }; - - ReplaceVisitor(Promotion* prom, jitstd::vector** replacements) - : GenTreeVisitor(prom->m_compiler), m_prom(prom), m_replacements(replacements) - { - } - bool MadeChanges() - { - return m_madeChanges; - } + IncrementRefCount(comp, candidate->AsLclVarCommon()->GetLclNum()); - void Reset() - { - m_madeChanges = false; - } - - fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) - { - GenTree* tree = *use; - - if (tree->OperIs(GT_ASG)) - { - // If LHS of the ASG was a local then we skipped it as we don't - // want to see it until after the RHS. - if (tree->gtGetOp1()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + LclVarDsc* varDsc = comp->lvaGetDesc(candidate->AsLclVarCommon()); + if (varDsc->lvIsStructField) { - ReplaceLocal(&tree->AsOp()->gtOp1, tree); + IncrementRefCount(comp, varDsc->lvParentLcl); } - // Assignments can be decomposed directly into accesses of the replacements. - DecomposeAssignment(use, user); - return fgWalkResult::WALK_CONTINUE; - } - - if (tree->OperIs(GT_CALL)) - { - // Calls need to store replacements back into the struct local for args - // and need to restore replacements from the result (for - // retbufs/returns). - LoadStoreAroundCall((*use)->AsCall(), user); - return fgWalkResult::WALK_CONTINUE; - } - - if (tree->OperIs(GT_RETURN)) - { - // Returns need to store replacements back into the struct local. - StoreBeforeReturn((*use)->AsUnOp()); - return fgWalkResult::WALK_CONTINUE; + if (varDsc->lvPromoted) + { + for (unsigned fldLclNum = varDsc->lvFieldLclStart; fldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; + fldLclNum++) + { + IncrementRefCount(comp, fldLclNum); + } + } } - // Skip the local on the LHS of ASGs when we see it in the normal tree - // visit; we handle it as part of the parent ASG instead. - if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD) && - ((user == nullptr) || !user->OperIs(GT_ASG) || (user->gtGetOp1() != tree))) + //------------------------------------------------------------------------ + // IncrementRefCount: + // Increment the ref count for the specified local. + // + // Parameters: + // comp - compiler instance + // lclNum - the local + // + static void IncrementRefCount(Compiler* comp, unsigned lclNum) { - ReplaceLocal(use, user); - return fgWalkResult::WALK_CONTINUE; + LclVarDsc* varDsc = comp->lvaGetDesc(lclNum); + varDsc->incLvRefCntSaturating(1, RCS_EARLY); } - - return fgWalkResult::WALK_CONTINUE; - } + }; //------------------------------------------------------------------------ // DecomposeAssignment: From c7254bfaf0392db1604a845bc14400ed5a2476cb Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 3 May 2023 19:16:50 +0200 Subject: [PATCH 29/37] What if I remove this --- src/coreclr/jit/promotion.cpp | 1244 --------------------------------- 1 file changed, 1244 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index a06a818c9bfafa..9071df7f774495 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -798,1250 +798,6 @@ class ReplaceVisitor : public GenTreeVisitor return fgWalkResult::WALK_CONTINUE; } - // Represents a list of statements; this is the result of assignment decomposition. - class DecompositionStatementList - { - GenTree* m_head = nullptr; - - public: - void AddStatement(GenTree* stmt) - { - stmt->gtNext = m_head; - m_head = stmt; - } - - GenTree* ToCommaTree(Compiler* comp) - { - if (m_head == nullptr) - { - return comp->gtNewNothingNode(); - } - - GenTree* tree = m_head; - - for (GenTree* cur = m_head->gtNext; cur != nullptr; cur = cur->gtNext) - { - tree = comp->gtNewOperNode(GT_COMMA, TYP_VOID, cur, tree); - } - - return tree; - } - }; - - // Represents significant segments of a struct operation. - // - // Essentially a segment tree (but not stored as a tree) that supports boolean - // Add/Subtract operations of segments. Used to compute the remainder after - // replacements have been handled as part of a decomposed block operation. - class StructSegments - { - public: - struct Segment - { - unsigned Start = 0; - unsigned End = 0; - - Segment() - { - } - - Segment(unsigned start, unsigned end) : Start(start), End(end) - { - } - - bool IntersectsInclusive(const Segment& other) const - { - if (End < other.Start) - { - return false; - } - - if (other.End < Start) - { - return false; - } - - return true; - } - - bool Contains(const Segment& other) const - { - return other.Start >= Start && other.End <= End; - } - - void Merge(const Segment& other) - { - Start = min(Start, other.Start); - End = max(End, other.End); - } - }; - - private: - jitstd::vector m_segments; - - public: - StructSegments(CompAllocator allocator) : m_segments(allocator) - { - } - - //------------------------------------------------------------------------ - // Add: - // Add a segment to the data structure. - // - // Parameters: - // segment - The segment to add. - // - void Add(const Segment& segment) - { - size_t index = BinarySearch(m_segments, segment.Start); - - if ((ssize_t)index < 0) - { - index = ~index; - } - - m_segments.insert(m_segments.begin() + index, segment); - size_t endIndex; - for (endIndex = index + 1; endIndex < m_segments.size(); endIndex++) - { - if (!m_segments[index].IntersectsInclusive(m_segments[endIndex])) - { - break; - } - - m_segments[index].Merge(m_segments[endIndex]); - } - - m_segments.erase(m_segments.begin() + index + 1, m_segments.begin() + endIndex); - } - - //------------------------------------------------------------------------ - // Subtract: - // Subtract a segment from the data structure. - // - // Parameters: - // segment - The segment to subtract. - // - void Subtract(const Segment& segment) - { - size_t index = BinarySearch(m_segments, segment.Start); - if ((ssize_t)index < 0) - { - index = ~index; - } - else - { - // Start == segment[index].End, which makes it non-interesting. - index++; - } - - if (index >= m_segments.size()) - { - return; - } - - // Here we know Start < segment[index].End. Do they not intersect at all? - if (m_segments[index].Start >= segment.End) - { - // Does not intersect any segment. - return; - } - - assert(m_segments[index].IntersectsInclusive(segment)); - - if (m_segments[index].Contains(segment)) - { - if (segment.Start > m_segments[index].Start) - { - // New segment (existing.Start, segment.Start) - if (segment.End < m_segments[index].End) - { - m_segments.insert(m_segments.begin() + index, Segment(m_segments[index].Start, segment.Start)); - - // And new segment (segment.End, existing.End) - m_segments[index + 1].Start = segment.End; - return; - } - - m_segments[index].End = segment.Start; - return; - } - if (segment.End < m_segments[index].End) - { - // New segment (segment.End, existing.End) - m_segments[index].Start = segment.End; - return; - } - - // Full segment is being removed - m_segments.erase(m_segments.begin() + index); - return; - } - - if (segment.Start > m_segments[index].Start) - { - m_segments[index].End = segment.Start; - index++; - } - - size_t endIndex = BinarySearch(m_segments, segment.End); - if ((ssize_t)endIndex >= 0) - { - m_segments.erase(m_segments.begin() + index, m_segments.begin() + endIndex + 1); - return; - } - - endIndex = ~endIndex; - if (endIndex == m_segments.size()) - { - m_segments.erase(m_segments.begin() + index, m_segments.end()); - return; - } - - if (segment.End > m_segments[endIndex].Start) - { - m_segments[endIndex].Start = segment.End; - } - - m_segments.erase(m_segments.begin() + index, m_segments.begin() + endIndex); - } - - //------------------------------------------------------------------------ - // IsEmpty: - // Check if the segment tree is empty. - // - // Returns: - // True if so. - // - bool IsEmpty() - { - return m_segments.size() == 0; - } - - //------------------------------------------------------------------------ - // IsSingleSegment: - // Check if the segment tree contains only a single segment, and return - // it if so. - // - // Parameters: - // result - [out] The single segment. Only valid if the method returns true. - // - // Returns: - // True if so. - // - bool IsSingleSegment(Segment* result) - { - if (m_segments.size() == 1) - { - *result = m_segments[0]; - return true; - } - - return false; - } - - #ifdef DEBUG - //------------------------------------------------------------------------ - // Check: - // Validate that the data structure is normalized and that it equals a - // specific fixed bit vector. - // - // Parameters: - // vect - The bit vector - // - // Remarks: - // This validates that the internal representation is normalized (i.e. - // all adjacent intervals are merged) and that it contains an index iff - // the specified vector contains that index. - // - void Check(FixedBitVect* vect) - { - bool first = true; - unsigned last = 0; - for (const Segment& segment : m_segments) - { - assert(first || (last < segment.Start)); - assert(segment.End <= vect->bitVectGetSize()); - - for (unsigned i = last; i < segment.Start; i++) - assert(!vect->bitVectTest(i)); - - for (unsigned i = segment.Start; i < segment.End; i++) - assert(vect->bitVectTest(i)); - - first = false; - last = segment.End; - } - - for (unsigned i = last, size = vect->bitVectGetSize(); i < size; i++) - assert(!vect->bitVectTest(i)); - } - - //------------------------------------------------------------------------ - // Dump: - // Dump a string representation of the segment tree to stdout. - // - void Dump() - { - if (m_segments.size() == 0) - { - printf(""); - } - else - { - const char* sep = ""; - for (const Segment& segment : m_segments) - { - printf("%s[%03u..%03u)", sep, segment.Start, segment.End); - sep = " "; - } - } - } - #endif - }; - - // Represents a plan for decomposing a block operation into direct treatment of - // replacement fields and the remainder. - class DecompositionPlan - { - struct Entry - { - unsigned ToLclNum; - Replacement* ToReplacement; - unsigned FromLclNum; - Replacement* FromReplacement; - unsigned Offset; - var_types Type; - }; - - Compiler* m_compiler; - ArrayStack m_entries; - GenTree* m_dst; - GenTree* m_src; - bool m_srcInvolvesReplacements; - - public: - DecompositionPlan(Compiler* comp, GenTree* dst, GenTree* src, bool srcInvolvesReplacements) - : m_compiler(comp) - , m_entries(comp->getAllocator(CMK_Promotion)) - , m_dst(dst) - , m_src(src) - , m_srcInvolvesReplacements(srcInvolvesReplacements) - { - } - - //------------------------------------------------------------------------ - // CopyBetweenReplacements: - // Add an entry specifying to copy from a replacement into another replacement. - // - // Parameters: - // dstRep - The destination replacement. - // srcRep - The source replacement. - // offset - The offset this covers in the struct copy. - // type - The type of copy. - // - void CopyBetweenReplacements(Replacement* dstRep, Replacement* srcRep, unsigned offset) - { - m_entries.Push(Entry{dstRep->LclNum, dstRep, srcRep->LclNum, srcRep, offset, dstRep->AccessType}); - } - - //------------------------------------------------------------------------ - // CopyBetweenReplacements: - // Add an entry specifying to copy from a promoted field into a replacement. - // - // Parameters: - // dstRep - The destination replacement. - // srcLcl - Local number of regularly promoted source field. - // offset - The offset this covers in the struct copy. - // type - The type of copy. - // - // Remarks: - // Used when the source local is a regular promoted field. - // - void CopyBetweenReplacements(Replacement* dstRep, unsigned srcLcl, unsigned offset) - { - m_entries.Push(Entry{dstRep->LclNum, dstRep, srcLcl, nullptr, offset, dstRep->AccessType}); - } - - //------------------------------------------------------------------------ - // CopyBetweenReplacements: - // Add an entry specifying to copy from a promoted field into a replacement. - // - // Parameters: - // dstRep - The destination replacement. - // srcLcl - Local number of regularly promoted source field. - // offset - The offset this covers in the struct copy. - // type - The type of copy. - // - // Remarks: - // Used when the source local is a regular promoted field. - // - void CopyBetweenReplacements(unsigned dstLcl, Replacement* srcRep, unsigned offset) - { - m_entries.Push(Entry{dstLcl, nullptr, srcRep->LclNum, srcRep, offset, srcRep->AccessType}); - } - - //------------------------------------------------------------------------ - // CopyToReplacement: - // Add an entry specifying to copy from the source into a replacement local. - // - // Parameters: - // dstLcl - The destination local to write. - // offset - The relative offset into the source. - // type - The type of copy. - // - void CopyToReplacement(Replacement* dstRep, unsigned offset) - { - m_entries.Push(Entry{dstRep->LclNum, dstRep, BAD_VAR_NUM, nullptr, offset, dstRep->AccessType}); - } - - //------------------------------------------------------------------------ - // CopyFromReplacement: - // Add an entry specifying to copy from a replacement local into the destination. - // - // Parameters: - // srcLcl - The source local to copy from. - // offset - The relative offset into the destination to write. - // type - The type of copy. - // - void CopyFromReplacement(Replacement* srcRep, unsigned offset) - { - m_entries.Push(Entry{BAD_VAR_NUM, nullptr, srcRep->LclNum, srcRep, offset, srcRep->AccessType}); - } - - //------------------------------------------------------------------------ - // CopyFromReplacement: - // Add an entry specifying to copy from a replacement local into the destination. - // - // Parameters: - // srcLcl - The source local to copy from. - // offset - The relative offset into the destination to write. - // type - The type of copy. - // - void CopyFromReplacement(unsigned srcLcl, unsigned offset, var_types type) - { - m_entries.Push(Entry{BAD_VAR_NUM, nullptr, srcLcl, nullptr, offset, type}); - } - - //------------------------------------------------------------------------ - // InitReplacement: - // Add an entry specifying that a specified replacement local should be - // constant initialized. - // - // Parameters: - // dstLcl - The destination local. - // offset - The offset covered by this initialization. - // type - The type to initialize. - // - void InitReplacement(Replacement* dstRep, unsigned offset) - { - m_entries.Push(Entry{dstRep->LclNum, dstRep, BAD_VAR_NUM, nullptr, offset, dstRep->AccessType}); - } - - //------------------------------------------------------------------------ - // Finalize: - // Create IR to perform the full decomposed struct copy as specified by - // the entries that were added to the decomposition plan. Add the - // statements to the specified list. - // - // Parameters: - // statements - The list of statements to add to. - // - void Finalize(DecompositionStatementList* statements) - { - if (IsInit()) - { - FinalizeInit(statements); - } - else - { - FinalizeCopy(statements); - } - } - - //------------------------------------------------------------------------ - // CanInitPrimitive: - // Check if we can handle initializing a primitive of the specified type. - // For example, we cannot directly initialize SIMD types to non-zero - // constants. - // - // Parameters: - // type - The primitive type - // - // Returns: - // True if so. - // - bool CanInitPrimitive(var_types type) - { - assert(IsInit()); - if (varTypeIsGC(type) || varTypeIsSIMD(type)) - { - return GetInitPattern() == 0; - } - - return true; - } - - private: - //------------------------------------------------------------------------ - // IsInit: - // Check if this is an init block operation. - // - // Returns: - // True if so. - // - bool IsInit() - { - return m_src->IsConstInitVal(); - } - - //------------------------------------------------------------------------ - // GetInitPattern: - // For an init block operation, get the pattern to init with. - // - // Returns: - // Byte pattern broadcast into every byte of a 64-bit int. - // - int64_t GetInitPattern() - { - assert(IsInit()); - GenTree* cns = m_src->OperIsInitVal() ? m_src->gtGetOp1() : m_src; - int64_t pattern = int64_t(cns->AsIntCon()->IconValue() & 0xFF) * 0x0101010101010101LL; - return pattern; - } - - //------------------------------------------------------------------------ - // ComputeRemainder: - // Compute the remainder of the block operation that needs to be inited - // or copied after the replacements stored in the plan have been handled. - // - // Returns: - // Segments representing the remainder. - // - // Remarks: - // This function takes into account that insignificant padding does not - // need to be considered part of the remainder. For example, the last 4 - // bytes of Span on 64-bit are not returned as the remainder. - // - StructSegments ComputeRemainder() - { - ClassLayout* dstLayout = m_dst->GetLayout(m_compiler); - - COMP_HANDLE compHnd = m_compiler->info.compCompHnd; - - bool significantPadding; - if (dstLayout->IsBlockLayout()) - { - significantPadding = true; - JITDUMP(" Block op has significant padding due to block layout\n"); - } - else - { - uint32_t attribs = compHnd->getClassAttribs(dstLayout->GetClassHandle()); - if ((attribs & CORINFO_FLG_INDEXABLE_FIELDS) != 0) - { - significantPadding = true; - JITDUMP(" Block op has significant padding due to indexable fields\n"); - } - else if ((attribs & CORINFO_FLG_DONT_DIG_FIELDS) != 0) - { - significantPadding = true; - JITDUMP(" Block op has significant padding due to CORINFO_FLG_DONT_DIG_FIELDS\n"); - } - else if (((attribs & CORINFO_FLG_CUSTOMLAYOUT) != 0) && ((attribs & CORINFO_FLG_CONTAINS_GC_PTR) == 0)) - { - significantPadding = true; - JITDUMP(" Block op has significant padding due to CUSTOMLAYOUT without GC pointers\n"); - } - else - { - significantPadding = false; - } - } - - StructSegments segments(m_compiler->getAllocator(CMK_Promotion)); - - // Validate with "obviously correct" but less scalable fixed bit vector implementation. - INDEBUG(FixedBitVect* segmentBitVect = FixedBitVect::bitVectInit(dstLayout->GetSize(), m_compiler)); - - if (significantPadding) - { - segments.Add(StructSegments::Segment(0, dstLayout->GetSize())); - - #ifdef DEBUG - for (unsigned i = 0; i < dstLayout->GetSize(); i++) - segmentBitVect->bitVectSet(i); - #endif - } - else - { - unsigned numFields = compHnd->getClassNumInstanceFields(dstLayout->GetClassHandle()); - for (unsigned i = 0; i < numFields; i++) - { - CORINFO_FIELD_HANDLE fieldHnd = compHnd->getFieldInClass(dstLayout->GetClassHandle(), (int)i); - unsigned fldOffset = compHnd->getFieldOffset(fieldHnd); - CORINFO_CLASS_HANDLE fieldClassHandle; - CorInfoType corType = compHnd->getFieldType(fieldHnd, &fieldClassHandle); - var_types varType = JITtype2varType(corType); - unsigned size = genTypeSize(varType); - if (size == 0) - { - // TODO-CQ: Recursively handle padding in sub structures - // here. Might be better to introduce a single JIT-EE call - // to query the significant segments -- that would also be - // usable by R2R even outside the version bubble in many - // cases. - size = compHnd->getClassSize(fieldClassHandle); - assert(size != 0); - } - - segments.Add(StructSegments::Segment(fldOffset, fldOffset + size)); - #ifdef DEBUG - for (unsigned i = 0; i < size; i++) - segmentBitVect->bitVectSet(fldOffset + i); - #endif - } - } - - // TODO-TP: Cache above StructSegments per class layout and just clone - // it there before the following subtract operations. - - for (int i = 0; i < m_entries.Height(); i++) - { - const Entry& entry = m_entries.BottomRef(i); - - segments.Subtract(StructSegments::Segment(entry.Offset, entry.Offset + genTypeSize(entry.Type))); - - #ifdef DEBUG - for (unsigned i = 0; i < genTypeSize(entry.Type); i++) - segmentBitVect->bitVectClear(entry.Offset + i); - #endif - } - - #ifdef DEBUG - segments.Check(segmentBitVect); - - if (m_compiler->verbose) - { - printf(" Remainder: "); - segments.Dump(); - printf("\n"); - } - #endif - - return segments; - } - - // Represents the strategy for handling the remainder part of the block - // operation. - struct RemainderStrategy - { - enum - { - NoRemainder, - Primitive, - FullBlock, - }; - - int Type; - unsigned PrimitiveOffset; - var_types PrimitiveType; - - RemainderStrategy(int type, unsigned primitiveOffset = 0, var_types primitiveType = TYP_UNDEF) - : Type(type), PrimitiveOffset(primitiveOffset), PrimitiveType(primitiveType) - { - } - }; - - //------------------------------------------------------------------------ - // DetermineRemainderStrategy: - // Determine the strategy to use to handle the remaining parts of the struct - // once replacements have been handled. - // - // Returns: - // Type describing how it should be handled; for example, by a full block - // copy (that may be redundant with some of the replacements, but covers - // the rest of the remainder); or by handling a specific 'hole' as a - // primitive. - // - RemainderStrategy DetermineRemainderStrategy() - { - StructSegments remainder = ComputeRemainder(); - if (remainder.IsEmpty()) - { - JITDUMP(" => Remainder strategy: do nothing\n"); - return RemainderStrategy(RemainderStrategy::NoRemainder); - } - - StructSegments::Segment segment; - // See if we can "plug the hole" with a single primitive. - // TODO-CQ: Why does doing this for LCL_VAR result in so many regressions? - // TODO-CQ: Once we have liveness we can unlock this for LCL_VARs. - if (remainder.IsSingleSegment(&segment)) - { - var_types primitiveType = TYP_UNDEF; - unsigned size = segment.End - segment.Start; - switch (size) - { - case 1: - primitiveType = TYP_UBYTE; - break; - case 2: - primitiveType = TYP_USHORT; - break; - #ifdef TARGET_64BIT - case 4: - primitiveType = TYP_INT; - break; - #endif - case TARGET_POINTER_SIZE: - primitiveType = TYP_I_IMPL; - if ((segment.Start % TARGET_POINTER_SIZE) == 0) - { - ClassLayout* dstLayout = m_dst->GetLayout(m_compiler); - primitiveType = dstLayout->GetGCPtrType(segment.Start / TARGET_POINTER_SIZE); - } - break; - - // TODO-CQ: SIMD sizes - } - - if (primitiveType != TYP_UNDEF) - { - if (!IsInit() || CanInitPrimitive(primitiveType)) - { - JITDUMP(" => Remainder strategy: %s at %03u\n", varTypeName(primitiveType), segment.Start); - return RemainderStrategy(RemainderStrategy::Primitive, segment.Start, primitiveType); - } - else - { - JITDUMP(" Cannot handle initing remainder as primitive of type %s\n", varTypeName(primitiveType)); - } - } - } - - JITDUMP(" => Remainder strategy: retain a full block op\n"); - return RemainderStrategy(RemainderStrategy::FullBlock); - } - - //------------------------------------------------------------------------ - // FinalizeInit: - // Create IR to perform the decomposed initialization. - // - // Parameters: - // statements - List to add statements to. - // - void FinalizeInit(DecompositionStatementList* statements) - { - GenTree* cns = m_src->OperIsInitVal() ? m_src->gtGetOp1() : m_src; - int64_t initPattern = GetInitPattern(); - - for (int i = 0; i < m_entries.Height(); i++) - { - const Entry& entry = m_entries.BottomRef(i); - - assert(entry.ToLclNum != BAD_VAR_NUM); - GenTree* src = CreateInitValue(entry.Type, initPattern); - GenTree* dst = m_compiler->gtNewLclvNode(entry.ToLclNum, entry.Type); - statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); - } - - RemainderStrategy remainderStrategy = DetermineRemainderStrategy(); - if (remainderStrategy.Type == RemainderStrategy::FullBlock) - { - GenTree* asg = m_compiler->gtNewBlkOpNode(m_dst, cns); - statements->AddStatement(asg); - } - else if (remainderStrategy.Type == RemainderStrategy::Primitive) - { - GenTree* src = CreateInitValue(remainderStrategy.PrimitiveType, initPattern); - GenTreeLclVarCommon* dstLcl = m_dst->AsLclVarCommon(); - GenTree* dst = m_compiler->gtNewLclFldNode(dstLcl->GetLclNum(), remainderStrategy.PrimitiveType, - dstLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); - m_compiler->lvaSetVarDoNotEnregister(dstLcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); - statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); - } - } - - //------------------------------------------------------------------------ - // CreateInitValue: - // Create an IR node representing a constant value with the specified init pattern. - // - // Parameters: - // type - The primitive type - // initPattern - Pattern to init with - // - // Returns: - // A constant. - // - // Remarks: - // Should only be called when that pattern can actually be represented; - // for example, SIMD types and GC pointers only support an init pattern - // of zero. - // - GenTree* CreateInitValue(var_types type, int64_t initPattern) - { - switch (type) - { - case TYP_BOOL: - case TYP_BYTE: - case TYP_UBYTE: - case TYP_SHORT: - case TYP_USHORT: - case TYP_INT: - { - int64_t mask = (int64_t(1) << (genTypeSize(type) * 8)) - 1; - return m_compiler->gtNewIconNode(static_cast(initPattern & mask)); - } - case TYP_LONG: - return m_compiler->gtNewLconNode(initPattern); - case TYP_FLOAT: - float floatPattern; - memcpy(&floatPattern, &initPattern, sizeof(floatPattern)); - return m_compiler->gtNewDconNode(floatPattern, TYP_FLOAT); - case TYP_DOUBLE: - double doublePattern; - memcpy(&doublePattern, &initPattern, sizeof(doublePattern)); - return m_compiler->gtNewDconNode(doublePattern); - case TYP_REF: - case TYP_BYREF: - #ifdef FEATURE_SIMD - case TYP_SIMD8: - case TYP_SIMD12: - case TYP_SIMD16: - #if defined(TARGET_XARCH) - case TYP_SIMD32: - case TYP_SIMD64: - #endif // TARGET_XARCH - #endif // FEATURE_SIMD - { - assert(initPattern == 0); - return m_compiler->gtNewZeroConNode(type); - } - default: - unreached(); - } - } - - //------------------------------------------------------------------------ - // FinalizeCopy: - // Create IR to perform the decomposed copy. - // - // Parameters: - // statements - List to add statements to. - // - void FinalizeCopy(DecompositionStatementList* statements) - { - assert(m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD) && - m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD)); - - RemainderStrategy remainderStrategy = DetermineRemainderStrategy(); - - // If the remainder is a full block and is going to incur write barrier - // then avoid incurring multiple write barriers for each source - // replacement that is a GC pointer -- write them back to the struct - // first instead. - if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && m_dst->OperIs(GT_BLK, GT_FIELD) && - m_dst->GetLayout(m_compiler)->HasGCPtr()) - { - for (int i = 0; i < m_entries.Height(); i++) - { - const Entry& entry = m_entries.BottomRef(i); - // TODO: Double check that TYP_BYREF do not incur any write barriers. - if ((entry.FromReplacement != nullptr) && (entry.Type == TYP_REF)) - { - Replacement* rep = entry.FromReplacement; - if (rep->NeedsWriteBack) - { - statements->AddStatement( - CreateWriteBack(m_compiler, m_src->AsLclVarCommon()->GetLclNum(), *rep)); - JITDUMP(" Will write back V%02u (%s) to avoid an additional write barrier\n", rep->LclNum, - rep->Description); - - rep->NeedsWriteBack = false; - } - } - } - } - - GenTree* addr = nullptr; - unsigned addrBaseOffs = 0; - GenTreeFlags indirFlags = GTF_EMPTY; - - if (m_dst->OperIs(GT_BLK, GT_FIELD)) - { - addr = m_dst->gtGetOp1(); - - if (m_dst->OperIs(GT_FIELD)) - { - addrBaseOffs = m_dst->AsField()->gtFldOffset; - } - - indirFlags = GetPropagatedIndirFlags(m_dst); - } - else if (m_src->OperIs(GT_BLK, GT_FIELD)) - { - addr = m_src->gtGetOp1(); - - if (m_src->OperIs(GT_FIELD)) - { - addrBaseOffs = m_src->AsField()->gtFldOffset; - } - - indirFlags = GetPropagatedIndirFlags(m_src); - } - - int numAddrUses = 0; - - if (addr != nullptr) - { - for (int i = 0; i < m_entries.Height(); i++) - { - if (!IsHandledByRemainder(m_entries.BottomRef(i), remainderStrategy)) - { - numAddrUses++; - } - } - - if (remainderStrategy.Type != RemainderStrategy::NoRemainder) - { - numAddrUses++; - } - } - - bool needsNullCheck = false; - if ((addr != nullptr) && m_compiler->fgAddrCouldBeNull(addr)) - { - switch (remainderStrategy.Type) - { - case RemainderStrategy::NoRemainder: - case RemainderStrategy::Primitive: - needsNullCheck = true; - // See if our first indirection will subsume the null check (usual case). - for (int i = 0; i < m_entries.Height(); i++) - { - if (IsHandledByRemainder(m_entries.BottomRef(i), remainderStrategy)) - { - continue; - } - - const Entry& entry = m_entries.BottomRef(0); - - assert((entry.FromLclNum == BAD_VAR_NUM) || (entry.ToLclNum == BAD_VAR_NUM)); - needsNullCheck = m_compiler->fgIsBigOffset(addrBaseOffs + entry.Offset); - } - break; - } - } - - if (needsNullCheck) - { - numAddrUses++; - } - - if ((addr != nullptr) && (numAddrUses > 1)) - { - if (addr->OperIsLocal() && (!m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD) || - (addr->AsLclVarCommon()->GetLclNum() != m_dst->AsLclVarCommon()->GetLclNum()))) - { - // We will introduce more uses of the address local, so it is - // no longer dying here. - addr->gtFlags &= ~GTF_VAR_DEATH; - } - else if (addr->IsInvariant()) - { - // Fall through - } - else - { - unsigned addrLcl = m_compiler->lvaGrabTemp(true DEBUGARG("Spilling address for field-by-field copy")); - statements->AddStatement(m_compiler->gtNewTempAssign(addrLcl, addr)); - addr = m_compiler->gtNewLclvNode(addrLcl, addr->TypeGet()); - UpdateEarlyRefCount(m_compiler, addr); - } - } - - auto grabAddr = [&numAddrUses, addr, this](unsigned offs) { - assert(numAddrUses > 0); - numAddrUses--; - - GenTree* addrUse; - if (numAddrUses == 0) - { - // Last use of the address, reuse the node. - addrUse = addr; - } - else - { - addrUse = m_compiler->gtCloneExpr(addr); - UpdateEarlyRefCount(m_compiler, addrUse); - } - - if (offs != 0) - { - var_types addrType = varTypeIsGC(addrUse) ? TYP_BYREF : TYP_I_IMPL; - addrUse = m_compiler->gtNewOperNode(GT_ADD, addrType, addrUse, - m_compiler->gtNewIconNode((ssize_t)offs, TYP_I_IMPL)); - } - - return addrUse; - }; - - if (remainderStrategy.Type == RemainderStrategy::FullBlock) - { - // We will reuse the existing block op's operands. Rebase the - // address off of the new local we created. - if (m_src->OperIs(GT_BLK, GT_FIELD)) - { - // Note that we should use 0 instead of addrBaseOffs here - // since this ends up as the address of the GT_FIELD node - // that already has the field offset. - m_src->AsUnOp()->gtOp1 = grabAddr(0); - } - else if (m_dst->OperIs(GT_BLK, GT_FIELD)) - { - // Like above, use 0 intentionally here. - m_dst->AsUnOp()->gtOp1 = grabAddr(0); - } - } - - // If the source involves replacements then do the struct op first -- - // otherwise we would overwrite the destination with stale bits. - // If the source does not involve replacements then CQ analysis shows - // that it's best to do it last. - if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && m_srcInvolvesReplacements) - { - statements->AddStatement(m_compiler->gtNewBlkOpNode(m_dst, m_src)); - - if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - // We will introduce uses of the source below so this struct - // copy is no longer the last use if it was before. - m_src->gtFlags &= ~GTF_VAR_DEATH; - } - } - - if (needsNullCheck) - { - GenTreeIndir* indir = m_compiler->gtNewIndir(TYP_BYTE, grabAddr(addrBaseOffs)); - PropagateIndirFlags(indir, indirFlags); - statements->AddStatement(indir); - } - - for (int i = 0; i < m_entries.Height(); i++) - { - const Entry& entry = m_entries.BottomRef(i); - - if (IsHandledByRemainder(entry, remainderStrategy)) - { - JITDUMP(" Skipping dst+%03u <- V%02u (%s); it is up-to-date in its struct local and will be handled " - "as part of the remainder\n", - entry.Offset, entry.FromReplacement->LclNum, entry.FromReplacement->Description); - continue; - } - - GenTree* dst; - if (entry.ToLclNum != BAD_VAR_NUM) - { - dst = m_compiler->gtNewLclvNode(entry.ToLclNum, entry.Type); - - if (m_compiler->lvaGetDesc(entry.ToLclNum)->lvIsStructField) - UpdateEarlyRefCount(m_compiler, dst); - } - else - { - assert(entry.FromLclNum != BAD_VAR_NUM); - - if (m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - unsigned offs = m_dst->AsLclVarCommon()->GetLclOffs() + entry.Offset; - // Local morph ensures we do not see local indirs here that dereference beyond UINT16_MAX. - noway_assert(FitsIn(offs)); - dst = m_compiler->gtNewLclFldNode(m_dst->AsLclVarCommon()->GetLclNum(), entry.Type, offs); - m_compiler->lvaSetVarDoNotEnregister(m_dst->AsLclVarCommon()->GetLclNum() - DEBUGARG(DoNotEnregisterReason::LocalField)); - UpdateEarlyRefCount(m_compiler, dst); - } - else - { - GenTree* addr = grabAddr(addrBaseOffs + entry.Offset); - dst = m_compiler->gtNewIndir(entry.Type, addr); - PropagateIndirFlags(dst, indirFlags); - } - } - - GenTree* src; - if (entry.FromLclNum != BAD_VAR_NUM) - { - src = m_compiler->gtNewLclvNode(entry.FromLclNum, entry.Type); - - if (m_compiler->lvaGetDesc(entry.FromLclNum)->lvIsStructField) - UpdateEarlyRefCount(m_compiler, src); - } - else - { - assert(entry.ToLclNum != BAD_VAR_NUM); - if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - unsigned offs = m_src->AsLclVarCommon()->GetLclOffs() + entry.Offset; - noway_assert(FitsIn(offs)); - src = m_compiler->gtNewLclFldNode(m_src->AsLclVarCommon()->GetLclNum(), entry.Type, offs); - m_compiler->lvaSetVarDoNotEnregister(m_src->AsLclVarCommon()->GetLclNum() - DEBUGARG(DoNotEnregisterReason::LocalField)); - UpdateEarlyRefCount(m_compiler, src); - } - else - { - GenTree* addr = grabAddr(addrBaseOffs + entry.Offset); - src = m_compiler->gtNewIndir(entry.Type, addr); - PropagateIndirFlags(src, indirFlags); - } - } - - statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); - } - - if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && !m_srcInvolvesReplacements) - { - statements->AddStatement(m_compiler->gtNewBlkOpNode(m_dst, m_src)); - } - - if (remainderStrategy.Type == RemainderStrategy::Primitive) - { - GenTree* dst; - if (m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - GenTreeLclVarCommon* dstLcl = m_dst->AsLclVarCommon(); - dst = m_compiler->gtNewLclFldNode(dstLcl->GetLclNum(), remainderStrategy.PrimitiveType, - dstLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); - m_compiler->lvaSetVarDoNotEnregister(dstLcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); - } - else - { - dst = m_compiler->gtNewIndir(remainderStrategy.PrimitiveType, - grabAddr(addrBaseOffs + remainderStrategy.PrimitiveOffset)); - PropagateIndirFlags(dst, indirFlags); - } - - GenTree* src; - if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - GenTreeLclVarCommon* srcLcl = m_src->AsLclVarCommon(); - src = m_compiler->gtNewLclFldNode(srcLcl->GetLclNum(), remainderStrategy.PrimitiveType, - srcLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); - m_compiler->lvaSetVarDoNotEnregister(srcLcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); - } - else - { - src = m_compiler->gtNewIndir(remainderStrategy.PrimitiveType, - grabAddr(addrBaseOffs + remainderStrategy.PrimitiveOffset)); - PropagateIndirFlags(src, indirFlags); - } - - statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); - } - - assert(numAddrUses == 0); - } - - bool IsHandledByRemainder(const Entry& entry, const RemainderStrategy& remainderStrategy) - { - // If the remainder is being handled as a full block copy and this - // replacement is up-to-date in its struct local then we can skip - // copying the replacement explicitly. - return (remainderStrategy.Type == RemainderStrategy::FullBlock) && (entry.FromReplacement != nullptr) && - !entry.FromReplacement->NeedsWriteBack && (entry.ToLclNum == BAD_VAR_NUM); - } - //------------------------------------------------------------------------ - // GetPropagatedIndirFlags: - // Convert GT_BLK or GT_FIELD indir flags into flags that should be - // propagated to derived GT_IND nodes. - // - // Parameters: - // indir - The indirection - // - // Returns: - // Flags to propagate to created derived GT_IND nodes. - // - GenTreeFlags GetPropagatedIndirFlags(GenTree* indir) - { - assert(indir->OperIs(GT_BLK, GT_FIELD)); - if (indir->OperIs(GT_BLK)) - { - return indir->gtFlags & (GTF_IND_VOLATILE | GTF_IND_NONFAULTING | GTF_IND_UNALIGNED | GTF_IND_INITCLASS); - } - - static_assert_no_msg(GTF_FLD_VOLATILE == GTF_IND_VOLATILE); - return indir->gtFlags & GTF_IND_VOLATILE; - } - - //------------------------------------------------------------------------ - // PropagateIndirFlags: - // Propagate the specified flags to a GT_IND node. - // - // Parameters: - // indir - The indirection to apply flags to - // flags - The specified indirection flags. - // - void PropagateIndirFlags(GenTree* indir, GenTreeFlags flags) - { - if (genTypeSize(indir) == 1) - { - flags &= ~GTF_IND_UNALIGNED; - } - - indir->gtFlags |= flags; - } - - //------------------------------------------------------------------------ - // UpdateEarlyRefCount: - // Update early ref counts if necessary for the specified IR node. - // - // Parameters: - // comp - compiler instance - // candidate - the IR node that may be a local that should have its early - // ref counts updated. - // - static void UpdateEarlyRefCount(Compiler* comp, GenTree* candidate) - { - if (!candidate->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_LCL_ADDR)) - { - return; - } - - IncrementRefCount(comp, candidate->AsLclVarCommon()->GetLclNum()); - - LclVarDsc* varDsc = comp->lvaGetDesc(candidate->AsLclVarCommon()); - if (varDsc->lvIsStructField) - { - IncrementRefCount(comp, varDsc->lvParentLcl); - } - - if (varDsc->lvPromoted) - { - for (unsigned fldLclNum = varDsc->lvFieldLclStart; fldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; - fldLclNum++) - { - IncrementRefCount(comp, fldLclNum); - } - } - } - - //------------------------------------------------------------------------ - // IncrementRefCount: - // Increment the ref count for the specified local. - // - // Parameters: - // comp - compiler instance - // lclNum - the local - // - static void IncrementRefCount(Compiler* comp, unsigned lclNum) - { - LclVarDsc* varDsc = comp->lvaGetDesc(lclNum); - varDsc->incLvRefCntSaturating(1, RCS_EARLY); - } - }; - //------------------------------------------------------------------------ // DecomposeAssignment: // Handle an assignment that may be between struct locals with replacements. From 6f782dd1384df864124005faaf51a748f47d1b46 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 3 May 2023 20:29:57 +0200 Subject: [PATCH 30/37] Extract to a new file --- src/coreclr/jit/CMakeLists.txt | 1 + src/coreclr/jit/promotion.cpp | 1100 +++--------- src/coreclr/jit/promotion.h | 134 +- src/coreclr/jit/promotiondecomposition.cpp | 1749 ++++++++++++++++++++ 4 files changed, 2138 insertions(+), 846 deletions(-) create mode 100644 src/coreclr/jit/promotiondecomposition.cpp diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index db3634b332dc06..ad1a30671fef9b 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -159,6 +159,7 @@ set( JIT_SOURCES patchpoint.cpp phase.cpp promotion.cpp + promotiondecomposition.cpp rangecheck.cpp rationalize.cpp redundantbranchopts.cpp diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 9071df7f774495..31c540cae7b268 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -92,92 +92,32 @@ struct Access }; //------------------------------------------------------------------------ -// BinarySearch: -// Find first entry with an equal offset, or bitwise complement of first -// entry with a higher offset. +// Replacement::Overlaps: +// Check if this replacement overlaps the specified range. // // Parameters: -// vec - The vector to binary search in -// offset - The offset to search for +// otherStart - Start of the other range. +// otherSize - Size of the other range. // // Returns: -// Index of the first entry with an equal offset, or bitwise complement of -// first entry with a higher offset. +// True if they overlap. // -template -static size_t BinarySearch(const jitstd::vector& vec, unsigned offset) +bool Replacement::Overlaps(unsigned otherStart, unsigned otherSize) const { - size_t min = 0; - size_t max = vec.size(); - while (min < max) + unsigned end = Offset + genTypeSize(AccessType); + if (end <= otherStart) { - size_t mid = min + (max - min) / 2; - if (vec[mid].*field == offset) - { - while (mid > 0 && vec[mid - 1].*field == offset) - { - mid--; - } - - return mid; - } - if (vec[mid].*field < offset) - { - min = mid + 1; - } - else - { - max = mid; - } + return false; } - return ~min; -} - -// Represents a single replacement of a (field) access into a struct local. -struct Replacement -{ - unsigned Offset; - var_types AccessType; - unsigned LclNum; - // Is the replacement local (given by LclNum) fresher than the value in the struct local? - bool NeedsWriteBack = true; - // Is the value in the struct local fresher than the replacement local? - // Note that the invariant is that this is always false at the entrance to - // a basic block, i.e. all predecessors would have read the replacement - // back before transferring control if necessary. - bool NeedsReadBack = false; -#ifdef DEBUG - const char* Description; -#endif - - Replacement(unsigned offset, var_types accessType, unsigned lclNum DEBUGARG(const char* description)) - : Offset(offset) - , AccessType(accessType) - , LclNum(lclNum) -#ifdef DEBUG - , Description(description) -#endif + unsigned otherEnd = otherStart + otherSize; + if (otherEnd <= Offset) { + return false; } - bool Overlaps(unsigned otherStart, unsigned otherSize) const - { - unsigned end = Offset + genTypeSize(AccessType); - if (end <= otherStart) - { - return false; - } - - unsigned otherEnd = otherStart + otherSize; - if (otherEnd <= Offset) - { - return false; - } - - return true; - } -}; + return true; +} //------------------------------------------------------------------------ // CreateWriteBack: @@ -194,7 +134,7 @@ struct Replacement // Returns: // IR node. // -static GenTree* CreateWriteBack(Compiler* compiler, unsigned structLclNum, const Replacement& replacement) +GenTree* Promotion::CreateWriteBack(Compiler* compiler, unsigned structLclNum, const Replacement& replacement) { GenTree* dst = compiler->gtNewLclFldNode(structLclNum, replacement.AccessType, replacement.Offset); GenTree* src = compiler->gtNewLclvNode(replacement.LclNum, genActualType(replacement.AccessType)); @@ -217,7 +157,7 @@ static GenTree* CreateWriteBack(Compiler* compiler, unsigned structLclNum, const // Returns: // IR node. // -static GenTree* CreateReadBack(Compiler* compiler, unsigned structLclNum, const Replacement& replacement) +GenTree* Promotion::CreateReadBack(Compiler* compiler, unsigned structLclNum, const Replacement& replacement) { GenTree* dst = compiler->gtNewLclvNode(replacement.LclNum, genActualType(replacement.AccessType)); GenTree* src = compiler->gtNewLclFldNode(structLclNum, replacement.AccessType, replacement.Offset); @@ -289,7 +229,7 @@ class LocalUses size_t index = 0; if (m_accesses.size() > 0) { - index = BinarySearch(m_accesses, offs); + index = Promotion::BinarySearch(m_accesses, offs); if ((ssize_t)index >= 0) { do @@ -724,850 +664,320 @@ class LocalsUseVisitor : public GenTreeVisitor } }; -class ReplaceVisitor : public GenTreeVisitor +Compiler::fgWalkResult ReplaceVisitor::PostOrderVisit(GenTree** use, GenTree* user) { - Promotion* m_prom; - jitstd::vector** m_replacements; - bool m_madeChanges = false; + GenTree* tree = *use; -public: - enum + if (tree->OperIs(GT_ASG)) { - DoPostOrder = true, - UseExecutionOrder = true, - }; + // If LHS of the ASG was a local then we skipped it as we don't + // want to see it until after the RHS. + if (tree->gtGetOp1()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + ReplaceLocal(&tree->AsOp()->gtOp1, tree); + } - ReplaceVisitor(Promotion* prom, jitstd::vector** replacements) - : GenTreeVisitor(prom->m_compiler), m_prom(prom), m_replacements(replacements) - { + // Assignments can be decomposed directly into accesses of the replacements. + HandleAssignment(use, user); + return fgWalkResult::WALK_CONTINUE; } - bool MadeChanges() + if (tree->OperIs(GT_CALL)) { - return m_madeChanges; + // Calls need to store replacements back into the struct local for args + // and need to restore replacements from the result (for + // retbufs/returns). + LoadStoreAroundCall((*use)->AsCall(), user); + return fgWalkResult::WALK_CONTINUE; } - void Reset() + if (tree->OperIs(GT_RETURN)) { - m_madeChanges = false; + // Returns need to store replacements back into the struct local. + StoreBeforeReturn((*use)->AsUnOp()); + return fgWalkResult::WALK_CONTINUE; } - fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) + // Skip the local on the LHS of ASGs when we see it in the normal tree + // visit; we handle it as part of the parent ASG instead. + if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD) && + ((user == nullptr) || !user->OperIs(GT_ASG) || (user->gtGetOp1() != tree))) { - GenTree* tree = *use; - - if (tree->OperIs(GT_ASG)) - { - // If LHS of the ASG was a local then we skipped it as we don't - // want to see it until after the RHS. - if (tree->gtGetOp1()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - ReplaceLocal(&tree->AsOp()->gtOp1, tree); - } - - // Assignments can be decomposed directly into accesses of the replacements. - DecomposeAssignment(use, user); - return fgWalkResult::WALK_CONTINUE; - } - - if (tree->OperIs(GT_CALL)) - { - // Calls need to store replacements back into the struct local for args - // and need to restore replacements from the result (for - // retbufs/returns). - LoadStoreAroundCall((*use)->AsCall(), user); - return fgWalkResult::WALK_CONTINUE; - } - - if (tree->OperIs(GT_RETURN)) - { - // Returns need to store replacements back into the struct local. - StoreBeforeReturn((*use)->AsUnOp()); - return fgWalkResult::WALK_CONTINUE; - } - - // Skip the local on the LHS of ASGs when we see it in the normal tree - // visit; we handle it as part of the parent ASG instead. - if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD) && - ((user == nullptr) || !user->OperIs(GT_ASG) || (user->gtGetOp1() != tree))) - { - ReplaceLocal(use, user); - return fgWalkResult::WALK_CONTINUE; - } - + ReplaceLocal(use, user); return fgWalkResult::WALK_CONTINUE; } - //------------------------------------------------------------------------ - // DecomposeAssignment: - // Handle an assignment that may be between struct locals with replacements. - // - // Parameters: - // asg - The assignment - // user - The user of the assignment. - // - void DecomposeAssignment(GenTree** use, GenTree* user) - { - GenTreeOp* asg = (*use)->AsOp(); + return fgWalkResult::WALK_CONTINUE; +} - if (!asg->gtGetOp1()->TypeIs(TYP_STRUCT)) +//------------------------------------------------------------------------ +// LoadStoreAroundCall: +// Handle a call that may involve struct local arguments and that may +// pass a struct local with replacements as the retbuf. +// +// Parameters: +// call - The call +// user - The user of the call. +// +void ReplaceVisitor::LoadStoreAroundCall(GenTreeCall* call, GenTree* user) +{ + CallArg* retBufArg = nullptr; + for (CallArg& arg : call->gtArgs.Args()) + { + if (arg.GetWellKnownArg() == WellKnownArg::RetBuffer) { - return; + retBufArg = &arg; + continue; } - GenTree* dst = asg->gtGetOp1(); - assert(!dst->OperIs(GT_COMMA)); - GenTree* src = asg->gtGetOp2()->gtEffectiveVal(); - - GenTreeLclVarCommon* dstLcl = dst->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? dst->AsLclVarCommon() : nullptr; - GenTreeLclVarCommon* srcLcl = src->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? src->AsLclVarCommon() : nullptr; - - Replacement* dstFirstRep = nullptr; - Replacement* dstEndRep = nullptr; - bool dstInvolvesReplacements = (dstLcl != nullptr) && OverlappingReplacements(dstLcl, &dstFirstRep, &dstEndRep); - Replacement* srcFirstRep = nullptr; - Replacement* srcEndRep = nullptr; - bool srcInvolvesReplacements = (srcLcl != nullptr) && OverlappingReplacements(srcLcl, &srcFirstRep, &srcEndRep); - - if (!dstInvolvesReplacements && !srcInvolvesReplacements) + if (!arg.GetNode()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) { - return; + continue; } - JITDUMP("Processing block operation [%06u] that involves replacements\n", Compiler::dspTreeID(asg)); + GenTreeLclVarCommon* argNodeLcl = arg.GetNode()->AsLclVarCommon(); - if (src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD) || src->IsConstInitVal()) + if (argNodeLcl->TypeIs(TYP_STRUCT)) { - DecompositionStatementList result; - EliminateCommasInBlockOp(asg, &result); - - if (dstInvolvesReplacements) - { - unsigned dstLclOffs = dstLcl->GetLclOffs(); - unsigned dstLclSize = dstLcl->GetLayout(m_compiler)->GetSize(); - if (dstFirstRep->Offset < dstLclOffs) - { - if (dstFirstRep->NeedsWriteBack) - { - JITDUMP("*** Block operation partially overlaps with destination V%02u (%s). Write and " - "read-backs are " - "necessary.\n", - dstFirstRep->LclNum, dstFirstRep->Description); - // The value of the replacement will be partially assembled from its old value and this struct - // operation. - // We accomplish this by an initial write back, the struct copy, followed by a later read back. - // TODO-CQ: This is very expensive and unreflected in heuristics, but it is also very rare. - result.AddStatement(CreateWriteBack(m_compiler, dstLcl->GetLclNum(), *dstFirstRep)); - - dstFirstRep->NeedsWriteBack = false; - } - - dstFirstRep->NeedsReadBack = true; - dstFirstRep++; - } - - if (dstEndRep > dstFirstRep) - { - Replacement* dstLastRep = dstEndRep - 1; - if (dstLastRep->Offset + genTypeSize(dstLastRep->AccessType) > dstLclOffs + dstLclSize) - { - if (dstLastRep->NeedsWriteBack) - { - JITDUMP("*** Block operation partially overlaps with destination V%02u (%s). Write and " - "read-backs are " - "necessary.\n", - dstLastRep->LclNum, dstLastRep->Description); - result.AddStatement(CreateWriteBack(m_compiler, dstLcl->GetLclNum(), *dstLastRep)); - - dstLastRep->NeedsWriteBack = false; - } - - dstLastRep->NeedsReadBack = true; - dstEndRep--; - } - } - } - - if (srcInvolvesReplacements) - { - unsigned srcLclOffs = srcLcl->GetLclOffs(); - unsigned srcLclSize = srcLcl->GetLayout(m_compiler)->GetSize(); - - if (srcFirstRep->Offset < srcLclOffs) - { - if (srcFirstRep->NeedsWriteBack) - { - JITDUMP( - "*** Block operation partially overlaps with source V%02u (%s). Write back is necessary.\n", - srcFirstRep->LclNum, srcFirstRep->Description); - - result.AddStatement(CreateWriteBack(m_compiler, srcLcl->GetLclNum(), *srcFirstRep)); - - srcFirstRep->NeedsWriteBack = false; - } - - srcFirstRep++; - } - - if (srcEndRep > srcFirstRep) - { - Replacement* srcLastRep = srcEndRep - 1; - if (srcLastRep->Offset + genTypeSize(srcLastRep->AccessType) > srcLclOffs + srcLclSize) - { - if (srcLastRep->NeedsWriteBack) - { - JITDUMP("*** Block operation partially overlaps with source V%02u (%s). Write back is " - "necessary.\n", - srcLastRep->LclNum, srcLastRep->Description); - - result.AddStatement(CreateWriteBack(m_compiler, srcLcl->GetLclNum(), *srcLastRep)); - srcLastRep->NeedsWriteBack = false; - } - - srcEndRep--; - } - } - } - - DecompositionPlan plan(m_compiler, dst, src, srcInvolvesReplacements); - - if (src->IsConstInitVal()) - { - InitFields(dst->AsLclVarCommon(), dstFirstRep, dstEndRep, &plan); - } - else - { - CopyBetweenFields(dst, dstFirstRep, dstEndRep, src, srcFirstRep, srcEndRep, &result, &plan); - } - - plan.Finalize(&result); - - *use = result.ToCommaTree(m_compiler); - m_madeChanges = true; - } - else - { - if (asg->gtGetOp2()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - GenTreeLclVarCommon* rhsLcl = asg->gtGetOp2()->AsLclVarCommon(); - unsigned size = rhsLcl->GetLayout(m_compiler)->GetSize(); - WriteBackBefore(&asg->gtOp2, rhsLcl->GetLclNum(), rhsLcl->GetLclOffs(), size); - } - - if (asg->gtGetOp1()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - GenTreeLclVarCommon* lhsLcl = asg->gtGetOp1()->AsLclVarCommon(); - unsigned size = lhsLcl->GetLayout(m_compiler)->GetSize(); - MarkForReadBack(lhsLcl->GetLclNum(), lhsLcl->GetLclOffs(), size); - } + unsigned size = argNodeLcl->GetLayout(m_compiler)->GetSize(); + WriteBackBefore(&arg.EarlyNodeRef(), argNodeLcl->GetLclNum(), argNodeLcl->GetLclOffs(), size); } } - //------------------------------------------------------------------------ - // InitFields: - // Add entries into the plan specifying which replacements can be - // directly inited, and mark the other ones as requiring read back. - // - // Parameters: - // dst - Destination local that involves replacement. - // firstRep - The first replacement. - // endRep - End of the replacements. - // plan - Decomposition plan to add initialization entries into. - // - void InitFields(GenTreeLclVarCommon* dst, Replacement* firstRep, Replacement* endRep, DecompositionPlan* plan) + if (call->IsOptimizingRetBufAsLocal()) { - for (Replacement* rep = firstRep; rep < endRep; rep++) - { - if (!plan->CanInitPrimitive(rep->AccessType)) - { - JITDUMP(" Unsupported init of %s %s. Will init as struct and read back.\n", - varTypeName(rep->AccessType), rep->Description); - - // We will need to read this one back after initing the struct. - rep->NeedsWriteBack = false; - rep->NeedsReadBack = true; - continue; - } + assert(retBufArg != nullptr); + assert(retBufArg->GetNode()->OperIs(GT_LCL_ADDR)); + GenTreeLclVarCommon* retBufLcl = retBufArg->GetNode()->AsLclVarCommon(); + unsigned size = m_compiler->typGetObjLayout(call->gtRetClsHnd)->GetSize(); - JITDUMP(" Init V%02u (%s)\n", rep->LclNum, rep->Description); - plan->InitReplacement(rep, rep->Offset - dst->GetLclOffs()); - rep->NeedsWriteBack = true; - rep->NeedsReadBack = false; - } + MarkForReadBack(retBufLcl->GetLclNum(), retBufLcl->GetLclOffs(), size); } +} - //------------------------------------------------------------------------ - // CopyBetweenFields: - // Copy between two struct locals that may involve replacements. - // - // Parameters: - // dst - Destination node - // dstFirstRep - First replacement of the destination or nullptr if destination is not a promoted local. - // dstEndRep - One past last replacement of the destination. - // src - Source node - // srcFirstRep - First replacement of the source or nullptr if source is not a promoted local. - // srcEndRep - One past last replacement of the source. - // statements - Statement list to add potential "init" statements to. - // plan - Data structure that tracks the specific copies to be done. - // - void CopyBetweenFields(GenTree* dst, - Replacement* dstFirstRep, - Replacement* dstEndRep, - GenTree* src, - Replacement* srcFirstRep, - Replacement* srcEndRep, - DecompositionStatementList* statements, - DecompositionPlan* plan) +//------------------------------------------------------------------------ +// ReplaceLocal: +// Handle a local that may need to be replaced. +// +// Parameters: +// use - The use of the local +// user - The user of the local. +// +// Notes: +// This usually amounts to making a replacement like +// +// LCL_FLD int V00 [+8] -> LCL_VAR int V10. +// +// In some cases we may have a pending read back, meaning that the +// replacement local is out-of-date compared to the struct local. +// In that case we also need to insert IR to read it back. +// This happens for example if the struct local was just assigned from a +// call or via a block copy. +// +void ReplaceVisitor::ReplaceLocal(GenTree** use, GenTree* user) +{ + GenTreeLclVarCommon* lcl = (*use)->AsLclVarCommon(); + unsigned lclNum = lcl->GetLclNum(); + if (m_replacements[lclNum] == nullptr) { - assert(src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD)); - - GenTreeLclVarCommon* dstLcl = dst->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? dst->AsLclVarCommon() : nullptr; - GenTreeLclVarCommon* srcLcl = src->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? src->AsLclVarCommon() : nullptr; - unsigned dstBaseOffs = dstLcl != nullptr ? dstLcl->GetLclOffs() : 0; - unsigned srcBaseOffs = srcLcl != nullptr ? srcLcl->GetLclOffs() : 0; - - LclVarDsc* dstDsc = dstLcl != nullptr ? m_compiler->lvaGetDesc(dstLcl) : nullptr; - LclVarDsc* srcDsc = srcLcl != nullptr ? m_compiler->lvaGetDesc(srcLcl) : nullptr; - - Replacement* dstRep = dstFirstRep; - Replacement* srcRep = srcFirstRep; - - while ((dstRep < dstEndRep) || (srcRep < srcEndRep)) - { - if ((srcRep < srcEndRep) && srcRep->NeedsReadBack) - { - JITDUMP(" Source replacement V%02u (%s) is stale. Will read it back before copy.\n", srcRep->LclNum, - srcRep->Description); - - assert(srcLcl != nullptr); - statements->AddStatement(CreateReadBack(m_compiler, srcLcl->GetLclNum(), *srcRep)); - srcRep->NeedsReadBack = false; - assert(!srcRep->NeedsWriteBack); - } - - if ((dstRep < dstEndRep) && (srcRep < srcEndRep)) - { - if (srcRep->Offset - srcBaseOffs + genTypeSize(srcRep->AccessType) < dstRep->Offset - dstBaseOffs) - { - // This source replacement ends before the next destination replacement starts. - // Write it directly to the destination struct local. - unsigned offs = srcRep->Offset - srcBaseOffs; - plan->CopyFromReplacement(srcRep, offs); - JITDUMP(" dst+%03u <- V%02u (%s)\n", offs, srcRep->LclNum, srcRep->Description); - srcRep++; - continue; - } - - if (dstRep->Offset - dstBaseOffs + genTypeSize(dstRep->AccessType) < srcRep->Offset - srcBaseOffs) - { - // Destination replacement ends before the next source replacement starts. - // Read it directly from the source struct local. - unsigned offs = dstRep->Offset - dstBaseOffs; - plan->CopyToReplacement(dstRep, offs); - JITDUMP(" V%02u (%s) <- src+%03u\n", dstRep->LclNum, dstRep->Description, offs); - dstRep->NeedsWriteBack = true; - dstRep->NeedsReadBack = false; - dstRep++; - continue; - } - - // Overlap. Check for exact match of replacements. - // TODO-CQ: Allow copies between small types of different signs, and between TYP_I_IMPL/TYP_BYREF? - if (((dstRep->Offset - dstBaseOffs) == (srcRep->Offset - srcBaseOffs)) && - (dstRep->AccessType == srcRep->AccessType)) - { - plan->CopyBetweenReplacements(dstRep, srcRep, dstRep->Offset - dstBaseOffs); - JITDUMP(" V%02u (%s) <- V%02u (%s)\n", dstRep->LclNum, dstRep->Description, srcRep->LclNum, - srcRep->Description); - - dstRep->NeedsWriteBack = true; - dstRep->NeedsReadBack = false; - dstRep++; - srcRep++; - continue; - } - - // Partial overlap. Write source back to the struct local. We - // will handle the destination replacement in a future - // iteration of the loop. - statements->AddStatement(CreateWriteBack(m_compiler, srcLcl->GetLclNum(), *srcRep)); - JITDUMP(" Partial overlap of V%02u (%s) <- V%02u (%s). Will read source back before copy\n", - dstRep->LclNum, dstRep->Description, srcRep->LclNum, srcRep->Description); - srcRep++; - continue; - } - - if (dstRep < dstEndRep) - { - unsigned offs = dstRep->Offset - dstBaseOffs; - - if ((srcDsc != nullptr) && srcDsc->lvPromoted) - { - unsigned srcOffs = srcLcl->GetLclOffs() + offs; - unsigned fieldLcl = m_compiler->lvaGetFieldLocal(srcDsc, srcOffs); + return; + } - if (fieldLcl != BAD_VAR_NUM) - { - LclVarDsc* dsc = m_compiler->lvaGetDesc(fieldLcl); - if (dsc->lvType == dstRep->AccessType) - { - plan->CopyBetweenReplacements(dstRep, fieldLcl, offs); - JITDUMP(" V%02u (%s) <- V%02u (%s)\n", dstRep->LclNum, dstRep->Description, dsc->lvReason); - dstRep->NeedsWriteBack = true; - dstRep->NeedsReadBack = false; - dstRep++; - continue; - } - } - } + jitstd::vector& replacements = *m_replacements[lclNum]; - // TODO-CQ: If the source is promoted then this will result in - // DNER'ing it. Alternatively we could copy the promoted field - // directly to the destination's struct local and mark the - // overlapping fields as needing read back to avoid this DNER. - plan->CopyToReplacement(dstRep, offs); - JITDUMP(" V%02u (%s) <- src+%03u\n", dstRep->LclNum, dstRep->Description, offs); - dstRep->NeedsWriteBack = true; - dstRep->NeedsReadBack = false; - dstRep++; - } - else - { - assert(srcRep < srcEndRep); - unsigned offs = srcRep->Offset - srcBaseOffs; - if ((dstDsc != nullptr) && dstDsc->lvPromoted) - { - unsigned dstOffs = dstLcl->GetLclOffs() + offs; - unsigned fieldLcl = m_compiler->lvaGetFieldLocal(dstDsc, dstOffs); - - if (fieldLcl != BAD_VAR_NUM) - { - LclVarDsc* dsc = m_compiler->lvaGetDesc(fieldLcl); - if (dsc->lvType == srcRep->AccessType) - { - plan->CopyBetweenReplacements(fieldLcl, srcRep, offs); - JITDUMP(" V%02u (%s) <- V%02u (%s)\n", fieldLcl, dsc->lvReason, srcRep->LclNum, - srcRep->Description); - srcRep++; - continue; - } - } - } + unsigned offs = lcl->GetLclOffs(); + var_types accessType = lcl->TypeGet(); - plan->CopyFromReplacement(srcRep, offs); - JITDUMP(" dst+%03u <- V%02u (%s)\n", offs, srcRep->LclNum, srcRep->Description); - srcRep++; - } - } +#ifdef DEBUG + if (accessType == TYP_STRUCT) + { + assert((user == nullptr) || user->OperIs(GT_ASG, GT_CALL, GT_RETURN)); } - - //------------------------------------------------------------------------ - // EliminateCommasInBlockOp: - // Ensure that the sources of a block op are not commas by extracting side effects. - // - // Parameters: - // asg - The block op - // result - Statement list to add resulting statements to. - // - // Remarks: - // Works similarly to MorphInitBlockHelper::EliminateCommas. - // - void EliminateCommasInBlockOp(GenTreeOp* asg, DecompositionStatementList* result) + else { - bool any = false; - GenTree* lhs = asg->gtGetOp1(); - assert(lhs->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_FIELD, GT_IND, GT_BLK)); - - GenTree* rhs = asg->gtGetOp2(); - - if (asg->IsReverseOp()) - { - while (rhs->OperIs(GT_COMMA)) - { - result->AddStatement(rhs->gtGetOp1()); - rhs = rhs->gtGetOp2(); - any = true; - } - } - else + ClassLayout* accessLayout = accessType == TYP_STRUCT ? lcl->GetLayout(m_compiler) : nullptr; + unsigned accessSize = accessLayout != nullptr ? accessLayout->GetSize() : genTypeSize(accessType); + for (const Replacement& rep : replacements) { - if (lhs->OperIsUnary() && rhs->OperIs(GT_COMMA)) - { - GenTree* addr = lhs->gtGetOp1(); - // Note that GTF_GLOB_REF is not up to date here, hence we need - // a tree walk to find address exposed locals. - if (((addr->gtFlags & GTF_ALL_EFFECT) != 0) || - (((rhs->gtFlags & GTF_ASG) != 0) && !addr->IsInvariant()) || - m_compiler->gtHasAddressExposedLocals(addr)) - { - unsigned lhsAddrLclNum = m_compiler->lvaGrabTemp(true DEBUGARG("Block morph LHS addr")); - - result->AddStatement(m_compiler->gtNewTempAssign(lhsAddrLclNum, addr)); - lhs->AsUnOp()->gtOp1 = m_compiler->gtNewLclvNode(lhsAddrLclNum, genActualType(addr)); - m_compiler->gtUpdateNodeSideEffects(lhs); - m_madeChanges = true; - any = true; - } - } - - while (rhs->OperIs(GT_COMMA)) - { - result->AddStatement(rhs->gtGetOp1()); - rhs = rhs->gtGetOp2(); - any = true; - } + assert(!rep.Overlaps(offs, accessSize) || ((rep.Offset == offs) && (rep.AccessType == accessType))); } - if (any) - { - asg->gtOp2 = rhs; - m_compiler->gtUpdateNodeSideEffects(asg); - m_madeChanges = true; - } + assert((accessType != TYP_STRUCT) || (accessLayout != nullptr)); + JITDUMP("Processing use [%06u] of V%02u.[%03u..%03u)\n", Compiler::dspTreeID(lcl), lclNum, offs, + offs + accessSize); } +#endif - //------------------------------------------------------------------------ - // OverlappingReplacements: - // Find replacements that overlap the specified struct local. - // - // Parameters: - // lcl - A struct local - // firstReplacement - [out] The first replacement that overlaps - // endReplacement - [out, optional] One past the last replacement that overlaps - // - // Returns: - // True if any replacement overlaps; otherwise false. - // - bool OverlappingReplacements(GenTreeLclVarCommon* lcl, - Replacement** firstReplacement, - Replacement** endReplacement = nullptr) + if (accessType == TYP_STRUCT) { - if (m_replacements[lcl->GetLclNum()] == nullptr) - { - return false; - } - - jitstd::vector& replacements = *m_replacements[lcl->GetLclNum()]; - - unsigned offs = lcl->GetLclOffs(); - unsigned size = lcl->GetLayout(m_compiler)->GetSize(); - size_t firstIndex = BinarySearch(replacements, offs); - if ((ssize_t)firstIndex < 0) - { - firstIndex = ~firstIndex; - if (firstIndex > 0) - { - Replacement& lastRepBefore = replacements[firstIndex - 1]; - if ((lastRepBefore.Offset + genTypeSize(lastRepBefore.AccessType)) > offs) - { - // Overlap with last entry starting before offs. - firstIndex--; - } - else if (firstIndex >= replacements.size()) - { - // Starts after last replacement ends. - return false; - } - } - - const Replacement& first = replacements[firstIndex]; - if (first.Offset >= (offs + size)) - { - // First candidate starts after this ends. - return false; - } - } - - assert((firstIndex < replacements.size()) && replacements[firstIndex].Overlaps(offs, size)); - *firstReplacement = &replacements[firstIndex]; - - if (endReplacement != nullptr) - { - size_t lastIndex = BinarySearch(replacements, offs + size); - if ((ssize_t)lastIndex < 0) - { - lastIndex = ~lastIndex; - } - - // Since we verified above that there is an overlapping replacement - // we know that lastIndex exists and is the next one that does not - // overlap. - assert(lastIndex > 0); - *endReplacement = replacements.data() + lastIndex; - } - - return true; + // Will be handled once we get to the parent. + return; } - //------------------------------------------------------------------------ - // LoadStoreAroundCall: - // Handle a call that may involve struct local arguments and that may - // pass a struct local with replacements as the retbuf. - // - // Parameters: - // call - The call - // user - The user of the call. - // - void LoadStoreAroundCall(GenTreeCall* call, GenTree* user) + size_t index = Promotion::BinarySearch(replacements, offs); + if ((ssize_t)index < 0) { - CallArg* retBufArg = nullptr; - for (CallArg& arg : call->gtArgs.Args()) - { - if (arg.GetWellKnownArg() == WellKnownArg::RetBuffer) - { - retBufArg = &arg; - continue; - } - - if (!arg.GetNode()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - continue; - } - - GenTreeLclVarCommon* argNodeLcl = arg.GetNode()->AsLclVarCommon(); - - if (argNodeLcl->TypeIs(TYP_STRUCT)) - { - unsigned size = argNodeLcl->GetLayout(m_compiler)->GetSize(); - WriteBackBefore(&arg.EarlyNodeRef(), argNodeLcl->GetLclNum(), argNodeLcl->GetLclOffs(), size); - } - } + // Access that we don't have a replacement for. + return; + } - if (call->IsOptimizingRetBufAsLocal()) - { - assert(retBufArg != nullptr); - assert(retBufArg->GetNode()->OperIs(GT_LCL_ADDR)); - GenTreeLclVarCommon* retBufLcl = retBufArg->GetNode()->AsLclVarCommon(); - unsigned size = m_compiler->typGetObjLayout(call->gtRetClsHnd)->GetSize(); + Replacement& rep = replacements[index]; + assert(accessType == rep.AccessType); + JITDUMP(" ..replaced with promoted lcl V%02u\n", rep.LclNum); + *use = m_compiler->gtNewLclvNode(rep.LclNum, accessType); - MarkForReadBack(retBufLcl->GetLclNum(), retBufLcl->GetLclOffs(), size); - } + if ((lcl->gtFlags & GTF_VAR_DEF) != 0) + { + rep.NeedsWriteBack = true; + rep.NeedsReadBack = false; } - - //------------------------------------------------------------------------ - // ReplaceLocal: - // Handle a local that may need to be replaced. - // - // Parameters: - // use - The use of the local - // user - The user of the local. - // - // Notes: - // This usually amounts to making a replacement like - // - // LCL_FLD int V00 [+8] -> LCL_VAR int V10. - // - // In some cases we may have a pending read back, meaning that the - // replacement local is out-of-date compared to the struct local. - // In that case we also need to insert IR to read it back. - // This happens for example if the struct local was just assigned from a - // call or via a block copy. - // - void ReplaceLocal(GenTree** use, GenTree* user) + else if (rep.NeedsReadBack) { - GenTreeLclVarCommon* lcl = (*use)->AsLclVarCommon(); - unsigned lclNum = lcl->GetLclNum(); - if (m_replacements[lclNum] == nullptr) - { - return; - } - - jitstd::vector& replacements = *m_replacements[lclNum]; - - unsigned offs = lcl->GetLclOffs(); - var_types accessType = lcl->TypeGet(); - -#ifdef DEBUG - if (accessType == TYP_STRUCT) - { - assert((user == nullptr) || user->OperIs(GT_ASG, GT_CALL, GT_RETURN)); - } - else - { - ClassLayout* accessLayout = accessType == TYP_STRUCT ? lcl->GetLayout(m_compiler) : nullptr; - unsigned accessSize = accessLayout != nullptr ? accessLayout->GetSize() : genTypeSize(accessType); - for (const Replacement& rep : replacements) - { - assert(!rep.Overlaps(offs, accessSize) || ((rep.Offset == offs) && (rep.AccessType == accessType))); - } - - assert((accessType != TYP_STRUCT) || (accessLayout != nullptr)); - JITDUMP("Processing use [%06u] of V%02u.[%03u..%03u)\n", Compiler::dspTreeID(lcl), lclNum, offs, - offs + accessSize); - } -#endif - - if (accessType == TYP_STRUCT) - { - // Will be handled once we get to the parent. - return; - } - - size_t index = BinarySearch(replacements, offs); - if ((ssize_t)index < 0) - { - // Access that we don't have a replacement for. - return; - } - - Replacement& rep = replacements[index]; - assert(accessType == rep.AccessType); - JITDUMP(" ..replaced with promoted lcl V%02u\n", rep.LclNum); - *use = m_compiler->gtNewLclvNode(rep.LclNum, accessType); + *use = m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(), + Promotion::CreateReadBack(m_compiler, lclNum, rep), *use); + rep.NeedsReadBack = false; + + // TODO-CQ: Local copy prop does not take into account that the + // uses of LCL_VAR occur at the user, which means it may introduce + // illegally overlapping lifetimes, such as: + // + // └──▌ ADD int + // ├──▌ LCL_VAR int V10 tmp6 -> copy propagated to [V35 tmp31] + // └──▌ COMMA int + // ├──▌ ASG int + // │ ├──▌ LCL_VAR int V35 tmp31 + // │ └──▌ LCL_FLD int V03 loc1 [+4] + // This really ought to be handled by local copy prop, but the way it works during + // morph makes it hard to fix there. + // + // This is the short term fix. Long term fixes may be: + // 1. Fix local copy prop + // 2. Teach LSRA to allow the above cases, simplifying IR concepts (e.g. + // introduce something like GT_COPY on top of LCL_VAR when they + // need to be "defs") + // 3. Change the pass here to avoid creating any embedded assignments by making use + // of gtSplitTree. We will only need to split in very edge cases since the point + // at which the replacement was marked as needing read back is practically always + // going to be in a previous statement, so this shouldn't be too bad for CQ. + + m_compiler->lvaGetDesc(rep.LclNum)->lvRedefinedInEmbeddedStatement = true; + } - if ((lcl->gtFlags & GTF_VAR_DEF) != 0) - { - rep.NeedsWriteBack = true; - rep.NeedsReadBack = false; - } - else if (rep.NeedsReadBack) - { - *use = - m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(), CreateReadBack(m_compiler, lclNum, rep), *use); - rep.NeedsReadBack = false; - - // TODO-CQ: Local copy prop does not take into account that the - // uses of LCL_VAR occur at the user, which means it may introduce - // illegally overlapping lifetimes, such as: - // - // └──▌ ADD int - // ├──▌ LCL_VAR int V10 tmp6 -> copy propagated to [V35 tmp31] - // └──▌ COMMA int - // ├──▌ ASG int - // │ ├──▌ LCL_VAR int V35 tmp31 - // │ └──▌ LCL_FLD int V03 loc1 [+4] - // This really ought to be handled by local copy prop, but the way it works during - // morph makes it hard to fix there. - // - // This is the short term fix. Long term fixes may be: - // 1. Fix local copy prop - // 2. Teach LSRA to allow the above cases, simplifying IR concepts (e.g. - // introduce something like GT_COPY on top of LCL_VAR when they - // need to be "defs") - // 3. Change the pass here to avoid creating any embedded assignments by making use - // of gtSplitTree. We will only need to split in very edge cases since the point - // at which the replacement was marked as needing read back is practically always - // going to be in a previous statement, so this shouldn't be too bad for CQ. - - m_compiler->lvaGetDesc(rep.LclNum)->lvRedefinedInEmbeddedStatement = true; - } + m_madeChanges = true; +} - m_madeChanges = true; +//------------------------------------------------------------------------ +// StoreBeforeReturn: +// Handle a return of a potential struct local. +// +// Parameters: +// ret - The GT_RETURN node +// +void ReplaceVisitor::StoreBeforeReturn(GenTreeUnOp* ret) +{ + if (ret->TypeIs(TYP_VOID) || !ret->gtGetOp1()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + return; } - //------------------------------------------------------------------------ - // StoreBeforeReturn: - // Handle a return of a potential struct local. - // - // Parameters: - // ret - The GT_RETURN node - // - void StoreBeforeReturn(GenTreeUnOp* ret) + GenTreeLclVarCommon* retLcl = ret->gtGetOp1()->AsLclVarCommon(); + if (retLcl->TypeIs(TYP_STRUCT)) { - if (ret->TypeIs(TYP_VOID) || !ret->gtGetOp1()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - return; - } - - GenTreeLclVarCommon* retLcl = ret->gtGetOp1()->AsLclVarCommon(); - if (retLcl->TypeIs(TYP_STRUCT)) - { - unsigned size = retLcl->GetLayout(m_compiler)->GetSize(); - WriteBackBefore(&ret->gtOp1, retLcl->GetLclNum(), retLcl->GetLclOffs(), size); - } + unsigned size = retLcl->GetLayout(m_compiler)->GetSize(); + WriteBackBefore(&ret->gtOp1, retLcl->GetLclNum(), retLcl->GetLclOffs(), size); } +} - //------------------------------------------------------------------------ - // WriteBackBefore: - // Update the use with IR that writes back all necessary overlapping - // replacements into a struct local. - // - // Parameters: - // use - The use, which will be updated with a cascading comma trees of assignments - // lcl - The struct local - // offs - The starting offset into the struct local of the overlapping range to write back to - // size - The size of the overlapping range - // - void WriteBackBefore(GenTree** use, unsigned lcl, unsigned offs, unsigned size) +//------------------------------------------------------------------------ +// WriteBackBefore: +// Update the use with IR that writes back all necessary overlapping +// replacements into a struct local. +// +// Parameters: +// use - The use, which will be updated with a cascading comma trees of assignments +// lcl - The struct local +// offs - The starting offset into the struct local of the overlapping range to write back to +// size - The size of the overlapping range +// +void ReplaceVisitor::WriteBackBefore(GenTree** use, unsigned lcl, unsigned offs, unsigned size) +{ + if (m_replacements[lcl] == nullptr) { - if (m_replacements[lcl] == nullptr) - { - return; - } + return; + } - jitstd::vector& replacements = *m_replacements[lcl]; - size_t index = BinarySearch(replacements, offs); + jitstd::vector& replacements = *m_replacements[lcl]; + size_t index = Promotion::BinarySearch(replacements, offs); - if ((ssize_t)index < 0) + if ((ssize_t)index < 0) + { + index = ~index; + if ((index > 0) && replacements[index - 1].Overlaps(offs, size)) { - index = ~index; - if ((index > 0) && replacements[index - 1].Overlaps(offs, size)) - { - index--; - } + index--; } + } - unsigned end = offs + size; - while ((index < replacements.size()) && (replacements[index].Offset < end)) + unsigned end = offs + size; + while ((index < replacements.size()) && (replacements[index].Offset < end)) + { + Replacement& rep = replacements[index]; + if (rep.NeedsWriteBack) { - Replacement& rep = replacements[index]; - if (rep.NeedsWriteBack) - { - GenTreeOp* comma = - m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(), CreateWriteBack(m_compiler, lcl, rep), *use); - *use = comma; - use = &comma->gtOp2; + GenTreeOp* comma = m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(), + Promotion::CreateWriteBack(m_compiler, lcl, rep), *use); + *use = comma; + use = &comma->gtOp2; - rep.NeedsWriteBack = false; - m_madeChanges = true; - } - - index++; + rep.NeedsWriteBack = false; + m_madeChanges = true; } + + index++; } +} - //------------------------------------------------------------------------ - // MarkForReadBack: - // Mark that replacements in the specified struct local need to be read - // back before their next use. - // - // Parameters: - // lcl - The struct local - // offs - The starting offset of the range in the struct local that needs to be read back from. - // size - The size of the range - // - void MarkForReadBack(unsigned lcl, unsigned offs, unsigned size) +//------------------------------------------------------------------------ +// MarkForReadBack: +// Mark that replacements in the specified struct local need to be read +// back before their next use. +// +// Parameters: +// lcl - The struct local +// offs - The starting offset of the range in the struct local that needs to be read back from. +// size - The size of the range +// +void ReplaceVisitor::MarkForReadBack(unsigned lcl, unsigned offs, unsigned size) +{ + if (m_replacements[lcl] == nullptr) { - if (m_replacements[lcl] == nullptr) - { - return; - } + return; + } - jitstd::vector& replacements = *m_replacements[lcl]; - size_t index = BinarySearch(replacements, offs); + jitstd::vector& replacements = *m_replacements[lcl]; + size_t index = Promotion::BinarySearch(replacements, offs); - if ((ssize_t)index < 0) + if ((ssize_t)index < 0) + { + index = ~index; + if ((index > 0) && replacements[index - 1].Overlaps(offs, size)) { - index = ~index; - if ((index > 0) && replacements[index - 1].Overlaps(offs, size)) - { - index--; - } + index--; } + } - bool result = false; - unsigned end = offs + size; - while ((index < replacements.size()) && (replacements[index].Offset < end)) - { - result = true; - Replacement& rep = replacements[index]; - assert(rep.Overlaps(offs, size)); - rep.NeedsReadBack = true; - rep.NeedsWriteBack = false; - index++; - } + bool result = false; + unsigned end = offs + size; + while ((index < replacements.size()) && (replacements[index].Offset < end)) + { + result = true; + Replacement& rep = replacements[index]; + assert(rep.Overlaps(offs, size)); + rep.NeedsReadBack = true; + rep.NeedsWriteBack = false; + index++; } -}; +} //------------------------------------------------------------------------ // Promotion::Run: diff --git a/src/coreclr/jit/promotion.h b/src/coreclr/jit/promotion.h index 2ae42e3312c0f8..6c8f71a077e727 100644 --- a/src/coreclr/jit/promotion.h +++ b/src/coreclr/jit/promotion.h @@ -7,20 +7,96 @@ #include "compiler.h" #include "vector.h" -struct Replacement; +// Represents a single replacement of a (field) access into a struct local. +struct Replacement +{ + unsigned Offset; + var_types AccessType; + unsigned LclNum; + // Is the replacement local (given by LclNum) fresher than the value in the struct local? + bool NeedsWriteBack = true; + // Is the value in the struct local fresher than the replacement local? + // Note that the invariant is that this is always false at the entrance to + // a basic block, i.e. all predecessors would have read the replacement + // back before transferring control if necessary. + bool NeedsReadBack = false; +#ifdef DEBUG + const char* Description; +#endif + + Replacement(unsigned offset, var_types accessType, unsigned lclNum DEBUGARG(const char* description)) + : Offset(offset) + , AccessType(accessType) + , LclNum(lclNum) +#ifdef DEBUG + , Description(description) +#endif + { + } + + bool Overlaps(unsigned otherStart, unsigned otherSize) const; +}; class Promotion { Compiler* m_compiler; + friend class LocalUses; friend class LocalsUseVisitor; friend class ReplaceVisitor; + friend class DecompositionPlan; + friend class StructSegments; void InsertInitialReadBack(unsigned lclNum, const jitstd::vector& replacements, Statement** prevStmt); void ExplicitlyZeroInitReplacementLocals(unsigned lclNum, const jitstd::vector& replacements, Statement** prevStmt); void InsertInitStatement(Statement** prevStmt, GenTree* tree); + static GenTree* CreateWriteBack(Compiler* compiler, unsigned structLclNum, const Replacement& replacement); + static GenTree* CreateReadBack(Compiler* compiler, unsigned structLclNum, const Replacement& replacement); + + //------------------------------------------------------------------------ + // BinarySearch: + // Find first entry with an equal offset, or bitwise complement of first + // entry with a higher offset. + // + // Parameters: + // vec - The vector to binary search in + // offset - The offset to search for + // + // Returns: + // Index of the first entry with an equal offset, or bitwise complement of + // first entry with a higher offset. + // + template + static size_t BinarySearch(const jitstd::vector& vec, unsigned offset) + { + size_t min = 0; + size_t max = vec.size(); + while (min < max) + { + size_t mid = min + (max - min) / 2; + if (vec[mid].*field == offset) + { + while (mid > 0 && vec[mid - 1].*field == offset) + { + mid--; + } + + return mid; + } + if (vec[mid].*field < offset) + { + min = mid + 1; + } + else + { + max = mid; + } + } + + return ~min; + } public: explicit Promotion(Compiler* compiler) : m_compiler(compiler) @@ -30,4 +106,60 @@ class Promotion PhaseStatus Run(); }; +class DecompositionStatementList; +class DecompositionPlan; + +class ReplaceVisitor : public GenTreeVisitor +{ + Promotion* m_prom; + jitstd::vector** m_replacements; + bool m_madeChanges = false; + +public: + enum + { + DoPostOrder = true, + UseExecutionOrder = true, + }; + + ReplaceVisitor(Promotion* prom, jitstd::vector** replacements) + : GenTreeVisitor(prom->m_compiler), m_prom(prom), m_replacements(replacements) + { + } + + bool MadeChanges() + { + return m_madeChanges; + } + + void Reset() + { + m_madeChanges = false; + } + + fgWalkResult PostOrderVisit(GenTree** use, GenTree* user); + +private: + void LoadStoreAroundCall(GenTreeCall* call, GenTree* user); + void ReplaceLocal(GenTree** use, GenTree* user); + void StoreBeforeReturn(GenTreeUnOp* ret); + void WriteBackBefore(GenTree** use, unsigned lcl, unsigned offs, unsigned size); + void MarkForReadBack(unsigned lcl, unsigned offs, unsigned size); + + void HandleAssignment(GenTree** use, GenTree* user); + bool OverlappingReplacements(GenTreeLclVarCommon* lcl, + Replacement** firstReplacement, + Replacement** endReplacement = nullptr); + void EliminateCommasInBlockOp(GenTreeOp* asg, DecompositionStatementList* result); + void InitFields(GenTreeLclVarCommon* dst, Replacement* firstRep, Replacement* endRep, DecompositionPlan* plan); + void CopyBetweenFields(GenTree* dst, + Replacement* dstFirstRep, + Replacement* dstEndRep, + GenTree* src, + Replacement* srcFirstRep, + Replacement* srcEndRep, + DecompositionStatementList* statements, + DecompositionPlan* plan); +}; + #endif diff --git a/src/coreclr/jit/promotiondecomposition.cpp b/src/coreclr/jit/promotiondecomposition.cpp new file mode 100644 index 00000000000000..295801e0c9d1aa --- /dev/null +++ b/src/coreclr/jit/promotiondecomposition.cpp @@ -0,0 +1,1749 @@ +#include "jitpch.h" +#include "promotion.h" +#include "jitstd/algorithm.h" + +// Represents a list of statements; this is the result of assignment decomposition. +class DecompositionStatementList +{ + GenTree* m_head = nullptr; + +public: + void AddStatement(GenTree* stmt) + { + stmt->gtNext = m_head; + m_head = stmt; + } + + GenTree* ToCommaTree(Compiler* comp) + { + if (m_head == nullptr) + { + return comp->gtNewNothingNode(); + } + + GenTree* tree = m_head; + + for (GenTree* cur = m_head->gtNext; cur != nullptr; cur = cur->gtNext) + { + tree = comp->gtNewOperNode(GT_COMMA, TYP_VOID, cur, tree); + } + + return tree; + } +}; + +// Represents significant segments of a struct operation. +// +// Essentially a segment tree (but not stored as a tree) that supports boolean +// Add/Subtract operations of segments. Used to compute the remainder after +// replacements have been handled as part of a decomposed block operation. +class StructSegments +{ +public: + struct Segment + { + unsigned Start = 0; + unsigned End = 0; + + Segment() + { + } + + Segment(unsigned start, unsigned end) : Start(start), End(end) + { + } + + bool IntersectsInclusive(const Segment& other) const + { + if (End < other.Start) + { + return false; + } + + if (other.End < Start) + { + return false; + } + + return true; + } + + bool Contains(const Segment& other) const + { + return other.Start >= Start && other.End <= End; + } + + void Merge(const Segment& other) + { + Start = min(Start, other.Start); + End = max(End, other.End); + } + }; + +private: + jitstd::vector m_segments; + +public: + StructSegments(CompAllocator allocator) : m_segments(allocator) + { + } + + //------------------------------------------------------------------------ + // Add: + // Add a segment to the data structure. + // + // Parameters: + // segment - The segment to add. + // + void Add(const Segment& segment) + { + size_t index = Promotion::BinarySearch(m_segments, segment.Start); + + if ((ssize_t)index < 0) + { + index = ~index; + } + + m_segments.insert(m_segments.begin() + index, segment); + size_t endIndex; + for (endIndex = index + 1; endIndex < m_segments.size(); endIndex++) + { + if (!m_segments[index].IntersectsInclusive(m_segments[endIndex])) + { + break; + } + + m_segments[index].Merge(m_segments[endIndex]); + } + + m_segments.erase(m_segments.begin() + index + 1, m_segments.begin() + endIndex); + } + + //------------------------------------------------------------------------ + // Subtract: + // Subtract a segment from the data structure. + // + // Parameters: + // segment - The segment to subtract. + // + void Subtract(const Segment& segment) + { + size_t index = Promotion::BinarySearch(m_segments, segment.Start); + if ((ssize_t)index < 0) + { + index = ~index; + } + else + { + // Start == segment[index].End, which makes it non-interesting. + index++; + } + + if (index >= m_segments.size()) + { + return; + } + + // Here we know Start < segment[index].End. Do they not intersect at all? + if (m_segments[index].Start >= segment.End) + { + // Does not intersect any segment. + return; + } + + assert(m_segments[index].IntersectsInclusive(segment)); + + if (m_segments[index].Contains(segment)) + { + if (segment.Start > m_segments[index].Start) + { + // New segment (existing.Start, segment.Start) + if (segment.End < m_segments[index].End) + { + m_segments.insert(m_segments.begin() + index, Segment(m_segments[index].Start, segment.Start)); + + // And new segment (segment.End, existing.End) + m_segments[index + 1].Start = segment.End; + return; + } + + m_segments[index].End = segment.Start; + return; + } + if (segment.End < m_segments[index].End) + { + // New segment (segment.End, existing.End) + m_segments[index].Start = segment.End; + return; + } + + // Full segment is being removed + m_segments.erase(m_segments.begin() + index); + return; + } + + if (segment.Start > m_segments[index].Start) + { + m_segments[index].End = segment.Start; + index++; + } + + size_t endIndex = Promotion::BinarySearch(m_segments, segment.End); + if ((ssize_t)endIndex >= 0) + { + m_segments.erase(m_segments.begin() + index, m_segments.begin() + endIndex + 1); + return; + } + + endIndex = ~endIndex; + if (endIndex == m_segments.size()) + { + m_segments.erase(m_segments.begin() + index, m_segments.end()); + return; + } + + if (segment.End > m_segments[endIndex].Start) + { + m_segments[endIndex].Start = segment.End; + } + + m_segments.erase(m_segments.begin() + index, m_segments.begin() + endIndex); + } + + //------------------------------------------------------------------------ + // IsEmpty: + // Check if the segment tree is empty. + // + // Returns: + // True if so. + // + bool IsEmpty() + { + return m_segments.size() == 0; + } + + //------------------------------------------------------------------------ + // IsSingleSegment: + // Check if the segment tree contains only a single segment, and return + // it if so. + // + // Parameters: + // result - [out] The single segment. Only valid if the method returns true. + // + // Returns: + // True if so. + // + bool IsSingleSegment(Segment* result) + { + if (m_segments.size() == 1) + { + *result = m_segments[0]; + return true; + } + + return false; + } + +#ifdef DEBUG + //------------------------------------------------------------------------ + // Check: + // Validate that the data structure is normalized and that it equals a + // specific fixed bit vector. + // + // Parameters: + // vect - The bit vector + // + // Remarks: + // This validates that the internal representation is normalized (i.e. + // all adjacent intervals are merged) and that it contains an index iff + // the specified vector contains that index. + // + void Check(FixedBitVect* vect) + { + bool first = true; + unsigned last = 0; + for (const Segment& segment : m_segments) + { + assert(first || (last < segment.Start)); + assert(segment.End <= vect->bitVectGetSize()); + + for (unsigned i = last; i < segment.Start; i++) + assert(!vect->bitVectTest(i)); + + for (unsigned i = segment.Start; i < segment.End; i++) + assert(vect->bitVectTest(i)); + + first = false; + last = segment.End; + } + + for (unsigned i = last, size = vect->bitVectGetSize(); i < size; i++) + assert(!vect->bitVectTest(i)); + } + + //------------------------------------------------------------------------ + // Dump: + // Dump a string representation of the segment tree to stdout. + // + void Dump() + { + if (m_segments.size() == 0) + { + printf(""); + } + else + { + const char* sep = ""; + for (const Segment& segment : m_segments) + { + printf("%s[%03u..%03u)", sep, segment.Start, segment.End); + sep = " "; + } + } + } +#endif +}; + +// Represents a plan for decomposing a block operation into direct treatment of +// replacement fields and the remainder. +class DecompositionPlan +{ + struct Entry + { + unsigned ToLclNum; + Replacement* ToReplacement; + unsigned FromLclNum; + Replacement* FromReplacement; + unsigned Offset; + var_types Type; + }; + + Compiler* m_compiler; + ArrayStack m_entries; + GenTree* m_dst; + GenTree* m_src; + bool m_srcInvolvesReplacements; + +public: + DecompositionPlan(Compiler* comp, GenTree* dst, GenTree* src, bool srcInvolvesReplacements) + : m_compiler(comp) + , m_entries(comp->getAllocator(CMK_Promotion)) + , m_dst(dst) + , m_src(src) + , m_srcInvolvesReplacements(srcInvolvesReplacements) + { + } + + //------------------------------------------------------------------------ + // CopyBetweenReplacements: + // Add an entry specifying to copy from a replacement into another replacement. + // + // Parameters: + // dstRep - The destination replacement. + // srcRep - The source replacement. + // offset - The offset this covers in the struct copy. + // type - The type of copy. + // + void CopyBetweenReplacements(Replacement* dstRep, Replacement* srcRep, unsigned offset) + { + m_entries.Push(Entry{dstRep->LclNum, dstRep, srcRep->LclNum, srcRep, offset, dstRep->AccessType}); + } + + //------------------------------------------------------------------------ + // CopyBetweenReplacements: + // Add an entry specifying to copy from a promoted field into a replacement. + // + // Parameters: + // dstRep - The destination replacement. + // srcLcl - Local number of regularly promoted source field. + // offset - The offset this covers in the struct copy. + // type - The type of copy. + // + // Remarks: + // Used when the source local is a regular promoted field. + // + void CopyBetweenReplacements(Replacement* dstRep, unsigned srcLcl, unsigned offset) + { + m_entries.Push(Entry{dstRep->LclNum, dstRep, srcLcl, nullptr, offset, dstRep->AccessType}); + } + + //------------------------------------------------------------------------ + // CopyBetweenReplacements: + // Add an entry specifying to copy from a promoted field into a replacement. + // + // Parameters: + // dstRep - The destination replacement. + // srcLcl - Local number of regularly promoted source field. + // offset - The offset this covers in the struct copy. + // type - The type of copy. + // + // Remarks: + // Used when the source local is a regular promoted field. + // + void CopyBetweenReplacements(unsigned dstLcl, Replacement* srcRep, unsigned offset) + { + m_entries.Push(Entry{dstLcl, nullptr, srcRep->LclNum, srcRep, offset, srcRep->AccessType}); + } + + //------------------------------------------------------------------------ + // CopyToReplacement: + // Add an entry specifying to copy from the source into a replacement local. + // + // Parameters: + // dstLcl - The destination local to write. + // offset - The relative offset into the source. + // type - The type of copy. + // + void CopyToReplacement(Replacement* dstRep, unsigned offset) + { + m_entries.Push(Entry{dstRep->LclNum, dstRep, BAD_VAR_NUM, nullptr, offset, dstRep->AccessType}); + } + + //------------------------------------------------------------------------ + // CopyFromReplacement: + // Add an entry specifying to copy from a replacement local into the destination. + // + // Parameters: + // srcLcl - The source local to copy from. + // offset - The relative offset into the destination to write. + // type - The type of copy. + // + void CopyFromReplacement(Replacement* srcRep, unsigned offset) + { + m_entries.Push(Entry{BAD_VAR_NUM, nullptr, srcRep->LclNum, srcRep, offset, srcRep->AccessType}); + } + + //------------------------------------------------------------------------ + // CopyFromReplacement: + // Add an entry specifying to copy from a replacement local into the destination. + // + // Parameters: + // srcLcl - The source local to copy from. + // offset - The relative offset into the destination to write. + // type - The type of copy. + // + void CopyFromReplacement(unsigned srcLcl, unsigned offset, var_types type) + { + m_entries.Push(Entry{BAD_VAR_NUM, nullptr, srcLcl, nullptr, offset, type}); + } + + //------------------------------------------------------------------------ + // InitReplacement: + // Add an entry specifying that a specified replacement local should be + // constant initialized. + // + // Parameters: + // dstLcl - The destination local. + // offset - The offset covered by this initialization. + // type - The type to initialize. + // + void InitReplacement(Replacement* dstRep, unsigned offset) + { + m_entries.Push(Entry{dstRep->LclNum, dstRep, BAD_VAR_NUM, nullptr, offset, dstRep->AccessType}); + } + + //------------------------------------------------------------------------ + // Finalize: + // Create IR to perform the full decomposed struct copy as specified by + // the entries that were added to the decomposition plan. Add the + // statements to the specified list. + // + // Parameters: + // statements - The list of statements to add to. + // + void Finalize(DecompositionStatementList* statements) + { + if (IsInit()) + { + FinalizeInit(statements); + } + else + { + FinalizeCopy(statements); + } + } + + //------------------------------------------------------------------------ + // CanInitPrimitive: + // Check if we can handle initializing a primitive of the specified type. + // For example, we cannot directly initialize SIMD types to non-zero + // constants. + // + // Parameters: + // type - The primitive type + // + // Returns: + // True if so. + // + bool CanInitPrimitive(var_types type) + { + assert(IsInit()); + if (varTypeIsGC(type) || varTypeIsSIMD(type)) + { + return GetInitPattern() == 0; + } + + return true; + } + +private: + //------------------------------------------------------------------------ + // IsInit: + // Check if this is an init block operation. + // + // Returns: + // True if so. + // + bool IsInit() + { + return m_src->IsConstInitVal(); + } + + //------------------------------------------------------------------------ + // GetInitPattern: + // For an init block operation, get the pattern to init with. + // + // Returns: + // Byte pattern broadcast into every byte of a 64-bit int. + // + int64_t GetInitPattern() + { + assert(IsInit()); + GenTree* cns = m_src->OperIsInitVal() ? m_src->gtGetOp1() : m_src; + int64_t pattern = int64_t(cns->AsIntCon()->IconValue() & 0xFF) * 0x0101010101010101LL; + return pattern; + } + + //------------------------------------------------------------------------ + // ComputeRemainder: + // Compute the remainder of the block operation that needs to be inited + // or copied after the replacements stored in the plan have been handled. + // + // Returns: + // Segments representing the remainder. + // + // Remarks: + // This function takes into account that insignificant padding does not + // need to be considered part of the remainder. For example, the last 4 + // bytes of Span on 64-bit are not returned as the remainder. + // + StructSegments ComputeRemainder() + { + ClassLayout* dstLayout = m_dst->GetLayout(m_compiler); + + COMP_HANDLE compHnd = m_compiler->info.compCompHnd; + + bool significantPadding; + if (dstLayout->IsBlockLayout()) + { + significantPadding = true; + JITDUMP(" Block op has significant padding due to block layout\n"); + } + else + { + uint32_t attribs = compHnd->getClassAttribs(dstLayout->GetClassHandle()); + if ((attribs & CORINFO_FLG_INDEXABLE_FIELDS) != 0) + { + significantPadding = true; + JITDUMP(" Block op has significant padding due to indexable fields\n"); + } + else if ((attribs & CORINFO_FLG_DONT_DIG_FIELDS) != 0) + { + significantPadding = true; + JITDUMP(" Block op has significant padding due to CORINFO_FLG_DONT_DIG_FIELDS\n"); + } + else if (((attribs & CORINFO_FLG_CUSTOMLAYOUT) != 0) && ((attribs & CORINFO_FLG_CONTAINS_GC_PTR) == 0)) + { + significantPadding = true; + JITDUMP(" Block op has significant padding due to CUSTOMLAYOUT without GC pointers\n"); + } + else + { + significantPadding = false; + } + } + + StructSegments segments(m_compiler->getAllocator(CMK_Promotion)); + + // Validate with "obviously correct" but less scalable fixed bit vector implementation. + INDEBUG(FixedBitVect* segmentBitVect = FixedBitVect::bitVectInit(dstLayout->GetSize(), m_compiler)); + + if (significantPadding) + { + segments.Add(StructSegments::Segment(0, dstLayout->GetSize())); + +#ifdef DEBUG + for (unsigned i = 0; i < dstLayout->GetSize(); i++) + segmentBitVect->bitVectSet(i); +#endif + } + else + { + unsigned numFields = compHnd->getClassNumInstanceFields(dstLayout->GetClassHandle()); + for (unsigned i = 0; i < numFields; i++) + { + CORINFO_FIELD_HANDLE fieldHnd = compHnd->getFieldInClass(dstLayout->GetClassHandle(), (int)i); + unsigned fldOffset = compHnd->getFieldOffset(fieldHnd); + CORINFO_CLASS_HANDLE fieldClassHandle; + CorInfoType corType = compHnd->getFieldType(fieldHnd, &fieldClassHandle); + var_types varType = JITtype2varType(corType); + unsigned size = genTypeSize(varType); + if (size == 0) + { + // TODO-CQ: Recursively handle padding in sub structures + // here. Might be better to introduce a single JIT-EE call + // to query the significant segments -- that would also be + // usable by R2R even outside the version bubble in many + // cases. + size = compHnd->getClassSize(fieldClassHandle); + assert(size != 0); + } + + segments.Add(StructSegments::Segment(fldOffset, fldOffset + size)); +#ifdef DEBUG + for (unsigned i = 0; i < size; i++) + segmentBitVect->bitVectSet(fldOffset + i); +#endif + } + } + + // TODO-TP: Cache above StructSegments per class layout and just clone + // it there before the following subtract operations. + + for (int i = 0; i < m_entries.Height(); i++) + { + const Entry& entry = m_entries.BottomRef(i); + + segments.Subtract(StructSegments::Segment(entry.Offset, entry.Offset + genTypeSize(entry.Type))); + +#ifdef DEBUG + for (unsigned i = 0; i < genTypeSize(entry.Type); i++) + segmentBitVect->bitVectClear(entry.Offset + i); +#endif + } + +#ifdef DEBUG + segments.Check(segmentBitVect); + + if (m_compiler->verbose) + { + printf(" Remainder: "); + segments.Dump(); + printf("\n"); + } +#endif + + return segments; + } + + // Represents the strategy for handling the remainder part of the block + // operation. + struct RemainderStrategy + { + enum + { + NoRemainder, + Primitive, + FullBlock, + }; + + int Type; + unsigned PrimitiveOffset; + var_types PrimitiveType; + + RemainderStrategy(int type, unsigned primitiveOffset = 0, var_types primitiveType = TYP_UNDEF) + : Type(type), PrimitiveOffset(primitiveOffset), PrimitiveType(primitiveType) + { + } + }; + + //------------------------------------------------------------------------ + // DetermineRemainderStrategy: + // Determine the strategy to use to handle the remaining parts of the struct + // once replacements have been handled. + // + // Returns: + // Type describing how it should be handled; for example, by a full block + // copy (that may be redundant with some of the replacements, but covers + // the rest of the remainder); or by handling a specific 'hole' as a + // primitive. + // + RemainderStrategy DetermineRemainderStrategy() + { + StructSegments remainder = ComputeRemainder(); + if (remainder.IsEmpty()) + { + JITDUMP(" => Remainder strategy: do nothing\n"); + return RemainderStrategy(RemainderStrategy::NoRemainder); + } + + StructSegments::Segment segment; + // See if we can "plug the hole" with a single primitive. + // TODO-CQ: Why does doing this for LCL_VAR result in so many regressions? + // TODO-CQ: Once we have liveness we can unlock this for LCL_VARs. + if (remainder.IsSingleSegment(&segment)) + { + var_types primitiveType = TYP_UNDEF; + unsigned size = segment.End - segment.Start; + switch (size) + { + case 1: + primitiveType = TYP_UBYTE; + break; + case 2: + primitiveType = TYP_USHORT; + break; +#ifdef TARGET_64BIT + case 4: + primitiveType = TYP_INT; + break; +#endif + case TARGET_POINTER_SIZE: + primitiveType = TYP_I_IMPL; + if ((segment.Start % TARGET_POINTER_SIZE) == 0) + { + ClassLayout* dstLayout = m_dst->GetLayout(m_compiler); + primitiveType = dstLayout->GetGCPtrType(segment.Start / TARGET_POINTER_SIZE); + } + break; + + // TODO-CQ: SIMD sizes + } + + if (primitiveType != TYP_UNDEF) + { + if (!IsInit() || CanInitPrimitive(primitiveType)) + { + JITDUMP(" => Remainder strategy: %s at %03u\n", varTypeName(primitiveType), segment.Start); + return RemainderStrategy(RemainderStrategy::Primitive, segment.Start, primitiveType); + } + else + { + JITDUMP(" Cannot handle initing remainder as primitive of type %s\n", varTypeName(primitiveType)); + } + } + } + + JITDUMP(" => Remainder strategy: retain a full block op\n"); + return RemainderStrategy(RemainderStrategy::FullBlock); + } + + //------------------------------------------------------------------------ + // FinalizeInit: + // Create IR to perform the decomposed initialization. + // + // Parameters: + // statements - List to add statements to. + // + void FinalizeInit(DecompositionStatementList* statements) + { + GenTree* cns = m_src->OperIsInitVal() ? m_src->gtGetOp1() : m_src; + int64_t initPattern = GetInitPattern(); + + for (int i = 0; i < m_entries.Height(); i++) + { + const Entry& entry = m_entries.BottomRef(i); + + assert(entry.ToLclNum != BAD_VAR_NUM); + GenTree* src = CreateInitValue(entry.Type, initPattern); + GenTree* dst = m_compiler->gtNewLclvNode(entry.ToLclNum, entry.Type); + statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); + } + + RemainderStrategy remainderStrategy = DetermineRemainderStrategy(); + if (remainderStrategy.Type == RemainderStrategy::FullBlock) + { + GenTree* asg = m_compiler->gtNewBlkOpNode(m_dst, cns); + statements->AddStatement(asg); + } + else if (remainderStrategy.Type == RemainderStrategy::Primitive) + { + GenTree* src = CreateInitValue(remainderStrategy.PrimitiveType, initPattern); + GenTreeLclVarCommon* dstLcl = m_dst->AsLclVarCommon(); + GenTree* dst = m_compiler->gtNewLclFldNode(dstLcl->GetLclNum(), remainderStrategy.PrimitiveType, + dstLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); + m_compiler->lvaSetVarDoNotEnregister(dstLcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); + statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); + } + } + + //------------------------------------------------------------------------ + // CreateInitValue: + // Create an IR node representing a constant value with the specified init pattern. + // + // Parameters: + // type - The primitive type + // initPattern - Pattern to init with + // + // Returns: + // A constant. + // + // Remarks: + // Should only be called when that pattern can actually be represented; + // for example, SIMD types and GC pointers only support an init pattern + // of zero. + // + GenTree* CreateInitValue(var_types type, int64_t initPattern) + { + switch (type) + { + case TYP_BOOL: + case TYP_BYTE: + case TYP_UBYTE: + case TYP_SHORT: + case TYP_USHORT: + case TYP_INT: + { + int64_t mask = (int64_t(1) << (genTypeSize(type) * 8)) - 1; + return m_compiler->gtNewIconNode(static_cast(initPattern & mask)); + } + case TYP_LONG: + return m_compiler->gtNewLconNode(initPattern); + case TYP_FLOAT: + float floatPattern; + memcpy(&floatPattern, &initPattern, sizeof(floatPattern)); + return m_compiler->gtNewDconNode(floatPattern, TYP_FLOAT); + case TYP_DOUBLE: + double doublePattern; + memcpy(&doublePattern, &initPattern, sizeof(doublePattern)); + return m_compiler->gtNewDconNode(doublePattern); + case TYP_REF: + case TYP_BYREF: +#ifdef FEATURE_SIMD + case TYP_SIMD8: + case TYP_SIMD12: + case TYP_SIMD16: +#if defined(TARGET_XARCH) + case TYP_SIMD32: + case TYP_SIMD64: +#endif // TARGET_XARCH +#endif // FEATURE_SIMD + { + assert(initPattern == 0); + return m_compiler->gtNewZeroConNode(type); + } + default: + unreached(); + } + } + + //------------------------------------------------------------------------ + // FinalizeCopy: + // Create IR to perform the decomposed copy. + // + // Parameters: + // statements - List to add statements to. + // + void FinalizeCopy(DecompositionStatementList* statements) + { + assert(m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD) && + m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD)); + + RemainderStrategy remainderStrategy = DetermineRemainderStrategy(); + + // If the remainder is a full block and is going to incur write barrier + // then avoid incurring multiple write barriers for each source + // replacement that is a GC pointer -- write them back to the struct + // first instead. + if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && m_dst->OperIs(GT_BLK, GT_FIELD) && + m_dst->GetLayout(m_compiler)->HasGCPtr()) + { + for (int i = 0; i < m_entries.Height(); i++) + { + const Entry& entry = m_entries.BottomRef(i); + // TODO: Double check that TYP_BYREF do not incur any write barriers. + if ((entry.FromReplacement != nullptr) && (entry.Type == TYP_REF)) + { + Replacement* rep = entry.FromReplacement; + if (rep->NeedsWriteBack) + { + statements->AddStatement( + Promotion::CreateWriteBack(m_compiler, m_src->AsLclVarCommon()->GetLclNum(), *rep)); + JITDUMP(" Will write back V%02u (%s) to avoid an additional write barrier\n", rep->LclNum, + rep->Description); + + rep->NeedsWriteBack = false; + } + } + } + } + + GenTree* addr = nullptr; + unsigned addrBaseOffs = 0; + GenTreeFlags indirFlags = GTF_EMPTY; + + if (m_dst->OperIs(GT_BLK, GT_FIELD)) + { + addr = m_dst->gtGetOp1(); + + if (m_dst->OperIs(GT_FIELD)) + { + addrBaseOffs = m_dst->AsField()->gtFldOffset; + } + + indirFlags = GetPropagatedIndirFlags(m_dst); + } + else if (m_src->OperIs(GT_BLK, GT_FIELD)) + { + addr = m_src->gtGetOp1(); + + if (m_src->OperIs(GT_FIELD)) + { + addrBaseOffs = m_src->AsField()->gtFldOffset; + } + + indirFlags = GetPropagatedIndirFlags(m_src); + } + + int numAddrUses = 0; + + if (addr != nullptr) + { + for (int i = 0; i < m_entries.Height(); i++) + { + if (!IsHandledByRemainder(m_entries.BottomRef(i), remainderStrategy)) + { + numAddrUses++; + } + } + + if (remainderStrategy.Type != RemainderStrategy::NoRemainder) + { + numAddrUses++; + } + } + + bool needsNullCheck = false; + if ((addr != nullptr) && m_compiler->fgAddrCouldBeNull(addr)) + { + switch (remainderStrategy.Type) + { + case RemainderStrategy::NoRemainder: + case RemainderStrategy::Primitive: + needsNullCheck = true; + // See if our first indirection will subsume the null check (usual case). + for (int i = 0; i < m_entries.Height(); i++) + { + if (IsHandledByRemainder(m_entries.BottomRef(i), remainderStrategy)) + { + continue; + } + + const Entry& entry = m_entries.BottomRef(0); + + assert((entry.FromLclNum == BAD_VAR_NUM) || (entry.ToLclNum == BAD_VAR_NUM)); + needsNullCheck = m_compiler->fgIsBigOffset(addrBaseOffs + entry.Offset); + } + break; + } + } + + if (needsNullCheck) + { + numAddrUses++; + } + + if ((addr != nullptr) && (numAddrUses > 1)) + { + if (addr->OperIsLocal() && (!m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD) || + (addr->AsLclVarCommon()->GetLclNum() != m_dst->AsLclVarCommon()->GetLclNum()))) + { + // We will introduce more uses of the address local, so it is + // no longer dying here. + addr->gtFlags &= ~GTF_VAR_DEATH; + } + else if (addr->IsInvariant()) + { + // Fall through + } + else + { + unsigned addrLcl = m_compiler->lvaGrabTemp(true DEBUGARG("Spilling address for field-by-field copy")); + statements->AddStatement(m_compiler->gtNewTempAssign(addrLcl, addr)); + addr = m_compiler->gtNewLclvNode(addrLcl, addr->TypeGet()); + UpdateEarlyRefCount(m_compiler, addr); + } + } + + auto grabAddr = [&numAddrUses, addr, this](unsigned offs) { + assert(numAddrUses > 0); + numAddrUses--; + + GenTree* addrUse; + if (numAddrUses == 0) + { + // Last use of the address, reuse the node. + addrUse = addr; + } + else + { + addrUse = m_compiler->gtCloneExpr(addr); + UpdateEarlyRefCount(m_compiler, addrUse); + } + + if (offs != 0) + { + var_types addrType = varTypeIsGC(addrUse) ? TYP_BYREF : TYP_I_IMPL; + addrUse = m_compiler->gtNewOperNode(GT_ADD, addrType, addrUse, + m_compiler->gtNewIconNode((ssize_t)offs, TYP_I_IMPL)); + } + + return addrUse; + }; + + if (remainderStrategy.Type == RemainderStrategy::FullBlock) + { + // We will reuse the existing block op's operands. Rebase the + // address off of the new local we created. + if (m_src->OperIs(GT_BLK, GT_FIELD)) + { + // Note that we should use 0 instead of addrBaseOffs here + // since this ends up as the address of the GT_FIELD node + // that already has the field offset. + m_src->AsUnOp()->gtOp1 = grabAddr(0); + } + else if (m_dst->OperIs(GT_BLK, GT_FIELD)) + { + // Like above, use 0 intentionally here. + m_dst->AsUnOp()->gtOp1 = grabAddr(0); + } + } + + // If the source involves replacements then do the struct op first -- + // otherwise we would overwrite the destination with stale bits. + // If the source does not involve replacements then CQ analysis shows + // that it's best to do it last. + if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && m_srcInvolvesReplacements) + { + statements->AddStatement(m_compiler->gtNewBlkOpNode(m_dst, m_src)); + + if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + // We will introduce uses of the source below so this struct + // copy is no longer the last use if it was before. + m_src->gtFlags &= ~GTF_VAR_DEATH; + } + } + + if (needsNullCheck) + { + GenTreeIndir* indir = m_compiler->gtNewIndir(TYP_BYTE, grabAddr(addrBaseOffs)); + PropagateIndirFlags(indir, indirFlags); + statements->AddStatement(indir); + } + + for (int i = 0; i < m_entries.Height(); i++) + { + const Entry& entry = m_entries.BottomRef(i); + + if (IsHandledByRemainder(entry, remainderStrategy)) + { + JITDUMP(" Skipping dst+%03u <- V%02u (%s); it is up-to-date in its struct local and will be handled " + "as part of the remainder\n", + entry.Offset, entry.FromReplacement->LclNum, entry.FromReplacement->Description); + continue; + } + + GenTree* dst; + if (entry.ToLclNum != BAD_VAR_NUM) + { + dst = m_compiler->gtNewLclvNode(entry.ToLclNum, entry.Type); + + if (m_compiler->lvaGetDesc(entry.ToLclNum)->lvIsStructField) + UpdateEarlyRefCount(m_compiler, dst); + } + else + { + assert(entry.FromLclNum != BAD_VAR_NUM); + + if (m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + unsigned offs = m_dst->AsLclVarCommon()->GetLclOffs() + entry.Offset; + // Local morph ensures we do not see local indirs here that dereference beyond UINT16_MAX. + noway_assert(FitsIn(offs)); + dst = m_compiler->gtNewLclFldNode(m_dst->AsLclVarCommon()->GetLclNum(), entry.Type, offs); + m_compiler->lvaSetVarDoNotEnregister(m_dst->AsLclVarCommon()->GetLclNum() + DEBUGARG(DoNotEnregisterReason::LocalField)); + UpdateEarlyRefCount(m_compiler, dst); + } + else + { + GenTree* addr = grabAddr(addrBaseOffs + entry.Offset); + dst = m_compiler->gtNewIndir(entry.Type, addr); + PropagateIndirFlags(dst, indirFlags); + } + } + + GenTree* src; + if (entry.FromLclNum != BAD_VAR_NUM) + { + src = m_compiler->gtNewLclvNode(entry.FromLclNum, entry.Type); + + if (m_compiler->lvaGetDesc(entry.FromLclNum)->lvIsStructField) + UpdateEarlyRefCount(m_compiler, src); + } + else + { + assert(entry.ToLclNum != BAD_VAR_NUM); + if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + unsigned offs = m_src->AsLclVarCommon()->GetLclOffs() + entry.Offset; + noway_assert(FitsIn(offs)); + src = m_compiler->gtNewLclFldNode(m_src->AsLclVarCommon()->GetLclNum(), entry.Type, offs); + m_compiler->lvaSetVarDoNotEnregister(m_src->AsLclVarCommon()->GetLclNum() + DEBUGARG(DoNotEnregisterReason::LocalField)); + UpdateEarlyRefCount(m_compiler, src); + } + else + { + GenTree* addr = grabAddr(addrBaseOffs + entry.Offset); + src = m_compiler->gtNewIndir(entry.Type, addr); + PropagateIndirFlags(src, indirFlags); + } + } + + statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); + } + + if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && !m_srcInvolvesReplacements) + { + statements->AddStatement(m_compiler->gtNewBlkOpNode(m_dst, m_src)); + } + + if (remainderStrategy.Type == RemainderStrategy::Primitive) + { + GenTree* dst; + if (m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + GenTreeLclVarCommon* dstLcl = m_dst->AsLclVarCommon(); + dst = m_compiler->gtNewLclFldNode(dstLcl->GetLclNum(), remainderStrategy.PrimitiveType, + dstLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); + m_compiler->lvaSetVarDoNotEnregister(dstLcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); + } + else + { + dst = m_compiler->gtNewIndir(remainderStrategy.PrimitiveType, + grabAddr(addrBaseOffs + remainderStrategy.PrimitiveOffset)); + PropagateIndirFlags(dst, indirFlags); + } + + GenTree* src; + if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + GenTreeLclVarCommon* srcLcl = m_src->AsLclVarCommon(); + src = m_compiler->gtNewLclFldNode(srcLcl->GetLclNum(), remainderStrategy.PrimitiveType, + srcLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); + m_compiler->lvaSetVarDoNotEnregister(srcLcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); + } + else + { + src = m_compiler->gtNewIndir(remainderStrategy.PrimitiveType, + grabAddr(addrBaseOffs + remainderStrategy.PrimitiveOffset)); + PropagateIndirFlags(src, indirFlags); + } + + statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); + } + + assert(numAddrUses == 0); + } + + bool IsHandledByRemainder(const Entry& entry, const RemainderStrategy& remainderStrategy) + { + // If the remainder is being handled as a full block copy and this + // replacement is up-to-date in its struct local then we can skip + // copying the replacement explicitly. + return (remainderStrategy.Type == RemainderStrategy::FullBlock) && (entry.FromReplacement != nullptr) && + !entry.FromReplacement->NeedsWriteBack && (entry.ToLclNum == BAD_VAR_NUM); + } + //------------------------------------------------------------------------ + // GetPropagatedIndirFlags: + // Convert GT_BLK or GT_FIELD indir flags into flags that should be + // propagated to derived GT_IND nodes. + // + // Parameters: + // indir - The indirection + // + // Returns: + // Flags to propagate to created derived GT_IND nodes. + // + GenTreeFlags GetPropagatedIndirFlags(GenTree* indir) + { + assert(indir->OperIs(GT_BLK, GT_FIELD)); + if (indir->OperIs(GT_BLK)) + { + return indir->gtFlags & (GTF_IND_VOLATILE | GTF_IND_NONFAULTING | GTF_IND_UNALIGNED | GTF_IND_INITCLASS); + } + + static_assert_no_msg(GTF_FLD_VOLATILE == GTF_IND_VOLATILE); + return indir->gtFlags & GTF_IND_VOLATILE; + } + + //------------------------------------------------------------------------ + // PropagateIndirFlags: + // Propagate the specified flags to a GT_IND node. + // + // Parameters: + // indir - The indirection to apply flags to + // flags - The specified indirection flags. + // + void PropagateIndirFlags(GenTree* indir, GenTreeFlags flags) + { + if (genTypeSize(indir) == 1) + { + flags &= ~GTF_IND_UNALIGNED; + } + + indir->gtFlags |= flags; + } + + //------------------------------------------------------------------------ + // UpdateEarlyRefCount: + // Update early ref counts if necessary for the specified IR node. + // + // Parameters: + // comp - compiler instance + // candidate - the IR node that may be a local that should have its early + // ref counts updated. + // + static void UpdateEarlyRefCount(Compiler* comp, GenTree* candidate) + { + if (!candidate->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_LCL_ADDR)) + { + return; + } + + IncrementRefCount(comp, candidate->AsLclVarCommon()->GetLclNum()); + + LclVarDsc* varDsc = comp->lvaGetDesc(candidate->AsLclVarCommon()); + if (varDsc->lvIsStructField) + { + IncrementRefCount(comp, varDsc->lvParentLcl); + } + + if (varDsc->lvPromoted) + { + for (unsigned fldLclNum = varDsc->lvFieldLclStart; fldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; + fldLclNum++) + { + IncrementRefCount(comp, fldLclNum); + } + } + } + + //------------------------------------------------------------------------ + // IncrementRefCount: + // Increment the ref count for the specified local. + // + // Parameters: + // comp - compiler instance + // lclNum - the local + // + static void IncrementRefCount(Compiler* comp, unsigned lclNum) + { + LclVarDsc* varDsc = comp->lvaGetDesc(lclNum); + varDsc->incLvRefCntSaturating(1, RCS_EARLY); + } +}; + +//------------------------------------------------------------------------ +// InitFields: +// Add entries into the plan specifying which replacements can be +// directly inited, and mark the other ones as requiring read back. +// +// Parameters: +// dst - Destination local that involves replacement. +// firstRep - The first replacement. +// endRep - End of the replacements. +// plan - Decomposition plan to add initialization entries into. +// +void ReplaceVisitor::InitFields(GenTreeLclVarCommon* dst, + Replacement* firstRep, + Replacement* endRep, + DecompositionPlan* plan) +{ + for (Replacement* rep = firstRep; rep < endRep; rep++) + { + if (!plan->CanInitPrimitive(rep->AccessType)) + { + JITDUMP(" Unsupported init of %s %s. Will init as struct and read back.\n", varTypeName(rep->AccessType), + rep->Description); + + // We will need to read this one back after initing the struct. + rep->NeedsWriteBack = false; + rep->NeedsReadBack = true; + continue; + } + + JITDUMP(" Init V%02u (%s)\n", rep->LclNum, rep->Description); + plan->InitReplacement(rep, rep->Offset - dst->GetLclOffs()); + rep->NeedsWriteBack = true; + rep->NeedsReadBack = false; + } +} + +//------------------------------------------------------------------------ +// CopyBetweenFields: +// Copy between two struct locals that may involve replacements. +// +// Parameters: +// dst - Destination node +// dstFirstRep - First replacement of the destination or nullptr if destination is not a promoted local. +// dstEndRep - One past last replacement of the destination. +// src - Source node +// srcFirstRep - First replacement of the source or nullptr if source is not a promoted local. +// srcEndRep - One past last replacement of the source. +// statements - Statement list to add potential "init" statements to. +// plan - Data structure that tracks the specific copies to be done. +// +void ReplaceVisitor::CopyBetweenFields(GenTree* dst, + Replacement* dstFirstRep, + Replacement* dstEndRep, + GenTree* src, + Replacement* srcFirstRep, + Replacement* srcEndRep, + DecompositionStatementList* statements, + DecompositionPlan* plan) +{ + assert(src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD)); + + GenTreeLclVarCommon* dstLcl = dst->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? dst->AsLclVarCommon() : nullptr; + GenTreeLclVarCommon* srcLcl = src->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? src->AsLclVarCommon() : nullptr; + unsigned dstBaseOffs = dstLcl != nullptr ? dstLcl->GetLclOffs() : 0; + unsigned srcBaseOffs = srcLcl != nullptr ? srcLcl->GetLclOffs() : 0; + + LclVarDsc* dstDsc = dstLcl != nullptr ? m_compiler->lvaGetDesc(dstLcl) : nullptr; + LclVarDsc* srcDsc = srcLcl != nullptr ? m_compiler->lvaGetDesc(srcLcl) : nullptr; + + Replacement* dstRep = dstFirstRep; + Replacement* srcRep = srcFirstRep; + + while ((dstRep < dstEndRep) || (srcRep < srcEndRep)) + { + if ((srcRep < srcEndRep) && srcRep->NeedsReadBack) + { + JITDUMP(" Source replacement V%02u (%s) is stale. Will read it back before copy.\n", srcRep->LclNum, + srcRep->Description); + + assert(srcLcl != nullptr); + statements->AddStatement(Promotion::CreateReadBack(m_compiler, srcLcl->GetLclNum(), *srcRep)); + srcRep->NeedsReadBack = false; + assert(!srcRep->NeedsWriteBack); + } + + if ((dstRep < dstEndRep) && (srcRep < srcEndRep)) + { + if (srcRep->Offset - srcBaseOffs + genTypeSize(srcRep->AccessType) < dstRep->Offset - dstBaseOffs) + { + // This source replacement ends before the next destination replacement starts. + // Write it directly to the destination struct local. + unsigned offs = srcRep->Offset - srcBaseOffs; + plan->CopyFromReplacement(srcRep, offs); + JITDUMP(" dst+%03u <- V%02u (%s)\n", offs, srcRep->LclNum, srcRep->Description); + srcRep++; + continue; + } + + if (dstRep->Offset - dstBaseOffs + genTypeSize(dstRep->AccessType) < srcRep->Offset - srcBaseOffs) + { + // Destination replacement ends before the next source replacement starts. + // Read it directly from the source struct local. + unsigned offs = dstRep->Offset - dstBaseOffs; + plan->CopyToReplacement(dstRep, offs); + JITDUMP(" V%02u (%s) <- src+%03u\n", dstRep->LclNum, dstRep->Description, offs); + dstRep->NeedsWriteBack = true; + dstRep->NeedsReadBack = false; + dstRep++; + continue; + } + + // Overlap. Check for exact match of replacements. + // TODO-CQ: Allow copies between small types of different signs, and between TYP_I_IMPL/TYP_BYREF? + if (((dstRep->Offset - dstBaseOffs) == (srcRep->Offset - srcBaseOffs)) && + (dstRep->AccessType == srcRep->AccessType)) + { + plan->CopyBetweenReplacements(dstRep, srcRep, dstRep->Offset - dstBaseOffs); + JITDUMP(" V%02u (%s) <- V%02u (%s)\n", dstRep->LclNum, dstRep->Description, srcRep->LclNum, + srcRep->Description); + + dstRep->NeedsWriteBack = true; + dstRep->NeedsReadBack = false; + dstRep++; + srcRep++; + continue; + } + + // Partial overlap. Write source back to the struct local. We + // will handle the destination replacement in a future + // iteration of the loop. + statements->AddStatement(Promotion::CreateWriteBack(m_compiler, srcLcl->GetLclNum(), *srcRep)); + JITDUMP(" Partial overlap of V%02u (%s) <- V%02u (%s). Will read source back before copy\n", + dstRep->LclNum, dstRep->Description, srcRep->LclNum, srcRep->Description); + srcRep++; + continue; + } + + if (dstRep < dstEndRep) + { + unsigned offs = dstRep->Offset - dstBaseOffs; + + if ((srcDsc != nullptr) && srcDsc->lvPromoted) + { + unsigned srcOffs = srcLcl->GetLclOffs() + offs; + unsigned fieldLcl = m_compiler->lvaGetFieldLocal(srcDsc, srcOffs); + + if (fieldLcl != BAD_VAR_NUM) + { + LclVarDsc* dsc = m_compiler->lvaGetDesc(fieldLcl); + if (dsc->lvType == dstRep->AccessType) + { + plan->CopyBetweenReplacements(dstRep, fieldLcl, offs); + JITDUMP(" V%02u (%s) <- V%02u (%s)\n", dstRep->LclNum, dstRep->Description, dsc->lvReason); + dstRep->NeedsWriteBack = true; + dstRep->NeedsReadBack = false; + dstRep++; + continue; + } + } + } + + // TODO-CQ: If the source is promoted then this will result in + // DNER'ing it. Alternatively we could copy the promoted field + // directly to the destination's struct local and mark the + // overlapping fields as needing read back to avoid this DNER. + plan->CopyToReplacement(dstRep, offs); + JITDUMP(" V%02u (%s) <- src+%03u\n", dstRep->LclNum, dstRep->Description, offs); + dstRep->NeedsWriteBack = true; + dstRep->NeedsReadBack = false; + dstRep++; + } + else + { + assert(srcRep < srcEndRep); + unsigned offs = srcRep->Offset - srcBaseOffs; + if ((dstDsc != nullptr) && dstDsc->lvPromoted) + { + unsigned dstOffs = dstLcl->GetLclOffs() + offs; + unsigned fieldLcl = m_compiler->lvaGetFieldLocal(dstDsc, dstOffs); + + if (fieldLcl != BAD_VAR_NUM) + { + LclVarDsc* dsc = m_compiler->lvaGetDesc(fieldLcl); + if (dsc->lvType == srcRep->AccessType) + { + plan->CopyBetweenReplacements(fieldLcl, srcRep, offs); + JITDUMP(" V%02u (%s) <- V%02u (%s)\n", fieldLcl, dsc->lvReason, srcRep->LclNum, + srcRep->Description); + srcRep++; + continue; + } + } + } + + plan->CopyFromReplacement(srcRep, offs); + JITDUMP(" dst+%03u <- V%02u (%s)\n", offs, srcRep->LclNum, srcRep->Description); + srcRep++; + } + } +} + +//------------------------------------------------------------------------ +// EliminateCommasInBlockOp: +// Ensure that the sources of a block op are not commas by extracting side effects. +// +// Parameters: +// asg - The block op +// result - Statement list to add resulting statements to. +// +// Remarks: +// Works similarly to MorphInitBlockHelper::EliminateCommas. +// +void ReplaceVisitor::EliminateCommasInBlockOp(GenTreeOp* asg, DecompositionStatementList* result) +{ + bool any = false; + GenTree* lhs = asg->gtGetOp1(); + assert(lhs->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_FIELD, GT_IND, GT_BLK)); + + GenTree* rhs = asg->gtGetOp2(); + + if (asg->IsReverseOp()) + { + while (rhs->OperIs(GT_COMMA)) + { + result->AddStatement(rhs->gtGetOp1()); + rhs = rhs->gtGetOp2(); + any = true; + } + } + else + { + if (lhs->OperIsUnary() && rhs->OperIs(GT_COMMA)) + { + GenTree* addr = lhs->gtGetOp1(); + // Note that GTF_GLOB_REF is not up to date here, hence we need + // a tree walk to find address exposed locals. + if (((addr->gtFlags & GTF_ALL_EFFECT) != 0) || (((rhs->gtFlags & GTF_ASG) != 0) && !addr->IsInvariant()) || + m_compiler->gtHasAddressExposedLocals(addr)) + { + unsigned lhsAddrLclNum = m_compiler->lvaGrabTemp(true DEBUGARG("Block morph LHS addr")); + + result->AddStatement(m_compiler->gtNewTempAssign(lhsAddrLclNum, addr)); + lhs->AsUnOp()->gtOp1 = m_compiler->gtNewLclvNode(lhsAddrLclNum, genActualType(addr)); + m_compiler->gtUpdateNodeSideEffects(lhs); + m_madeChanges = true; + any = true; + } + } + + while (rhs->OperIs(GT_COMMA)) + { + result->AddStatement(rhs->gtGetOp1()); + rhs = rhs->gtGetOp2(); + any = true; + } + } + + if (any) + { + asg->gtOp2 = rhs; + m_compiler->gtUpdateNodeSideEffects(asg); + m_madeChanges = true; + } +} + +//------------------------------------------------------------------------ +// OverlappingReplacements: +// Find replacements that overlap the specified struct local. +// +// Parameters: +// lcl - A struct local +// firstReplacement - [out] The first replacement that overlaps +// endReplacement - [out, optional] One past the last replacement that overlaps +// +// Returns: +// True if any replacement overlaps; otherwise false. +// +bool ReplaceVisitor::OverlappingReplacements(GenTreeLclVarCommon* lcl, + Replacement** firstReplacement, + Replacement** endReplacement) +{ + if (m_replacements[lcl->GetLclNum()] == nullptr) + { + return false; + } + + jitstd::vector& replacements = *m_replacements[lcl->GetLclNum()]; + + unsigned offs = lcl->GetLclOffs(); + unsigned size = lcl->GetLayout(m_compiler)->GetSize(); + size_t firstIndex = Promotion::BinarySearch(replacements, offs); + if ((ssize_t)firstIndex < 0) + { + firstIndex = ~firstIndex; + if (firstIndex > 0) + { + Replacement& lastRepBefore = replacements[firstIndex - 1]; + if ((lastRepBefore.Offset + genTypeSize(lastRepBefore.AccessType)) > offs) + { + // Overlap with last entry starting before offs. + firstIndex--; + } + else if (firstIndex >= replacements.size()) + { + // Starts after last replacement ends. + return false; + } + } + + const Replacement& first = replacements[firstIndex]; + if (first.Offset >= (offs + size)) + { + // First candidate starts after this ends. + return false; + } + } + + assert((firstIndex < replacements.size()) && replacements[firstIndex].Overlaps(offs, size)); + *firstReplacement = &replacements[firstIndex]; + + if (endReplacement != nullptr) + { + size_t lastIndex = Promotion::BinarySearch(replacements, offs + size); + if ((ssize_t)lastIndex < 0) + { + lastIndex = ~lastIndex; + } + + // Since we verified above that there is an overlapping replacement + // we know that lastIndex exists and is the next one that does not + // overlap. + assert(lastIndex > 0); + *endReplacement = replacements.data() + lastIndex; + } + + return true; +} + +//------------------------------------------------------------------------ +// HandleAssignment: +// Handle an assignment that may be between struct locals with replacements. +// +// Parameters: +// asg - The assignment +// user - The user of the assignment. +// +void ReplaceVisitor::HandleAssignment(GenTree** use, GenTree* user) +{ + GenTreeOp* asg = (*use)->AsOp(); + + if (!asg->gtGetOp1()->TypeIs(TYP_STRUCT)) + { + return; + } + + GenTree* dst = asg->gtGetOp1(); + assert(!dst->OperIs(GT_COMMA)); + GenTree* src = asg->gtGetOp2()->gtEffectiveVal(); + + GenTreeLclVarCommon* dstLcl = dst->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? dst->AsLclVarCommon() : nullptr; + GenTreeLclVarCommon* srcLcl = src->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? src->AsLclVarCommon() : nullptr; + + Replacement* dstFirstRep = nullptr; + Replacement* dstEndRep = nullptr; + bool dstInvolvesReplacements = (dstLcl != nullptr) && OverlappingReplacements(dstLcl, &dstFirstRep, &dstEndRep); + Replacement* srcFirstRep = nullptr; + Replacement* srcEndRep = nullptr; + bool srcInvolvesReplacements = (srcLcl != nullptr) && OverlappingReplacements(srcLcl, &srcFirstRep, &srcEndRep); + + if (!dstInvolvesReplacements && !srcInvolvesReplacements) + { + return; + } + + JITDUMP("Processing block operation [%06u] that involves replacements\n", Compiler::dspTreeID(asg)); + + if (src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD) || src->IsConstInitVal()) + { + DecompositionStatementList result; + EliminateCommasInBlockOp(asg, &result); + + if (dstInvolvesReplacements) + { + unsigned dstLclOffs = dstLcl->GetLclOffs(); + unsigned dstLclSize = dstLcl->GetLayout(m_compiler)->GetSize(); + if (dstFirstRep->Offset < dstLclOffs) + { + if (dstFirstRep->NeedsWriteBack) + { + JITDUMP("*** Block operation partially overlaps with destination V%02u (%s). Write and " + "read-backs are " + "necessary.\n", + dstFirstRep->LclNum, dstFirstRep->Description); + // The value of the replacement will be partially assembled from its old value and this struct + // operation. + // We accomplish this by an initial write back, the struct copy, followed by a later read back. + // TODO-CQ: This is very expensive and unreflected in heuristics, but it is also very rare. + result.AddStatement(Promotion::CreateWriteBack(m_compiler, dstLcl->GetLclNum(), *dstFirstRep)); + + dstFirstRep->NeedsWriteBack = false; + } + + dstFirstRep->NeedsReadBack = true; + dstFirstRep++; + } + + if (dstEndRep > dstFirstRep) + { + Replacement* dstLastRep = dstEndRep - 1; + if (dstLastRep->Offset + genTypeSize(dstLastRep->AccessType) > dstLclOffs + dstLclSize) + { + if (dstLastRep->NeedsWriteBack) + { + JITDUMP("*** Block operation partially overlaps with destination V%02u (%s). Write and " + "read-backs are " + "necessary.\n", + dstLastRep->LclNum, dstLastRep->Description); + result.AddStatement(Promotion::CreateWriteBack(m_compiler, dstLcl->GetLclNum(), *dstLastRep)); + + dstLastRep->NeedsWriteBack = false; + } + + dstLastRep->NeedsReadBack = true; + dstEndRep--; + } + } + } + + if (srcInvolvesReplacements) + { + unsigned srcLclOffs = srcLcl->GetLclOffs(); + unsigned srcLclSize = srcLcl->GetLayout(m_compiler)->GetSize(); + + if (srcFirstRep->Offset < srcLclOffs) + { + if (srcFirstRep->NeedsWriteBack) + { + JITDUMP("*** Block operation partially overlaps with source V%02u (%s). Write back is necessary.\n", + srcFirstRep->LclNum, srcFirstRep->Description); + + result.AddStatement(Promotion::CreateWriteBack(m_compiler, srcLcl->GetLclNum(), *srcFirstRep)); + + srcFirstRep->NeedsWriteBack = false; + } + + srcFirstRep++; + } + + if (srcEndRep > srcFirstRep) + { + Replacement* srcLastRep = srcEndRep - 1; + if (srcLastRep->Offset + genTypeSize(srcLastRep->AccessType) > srcLclOffs + srcLclSize) + { + if (srcLastRep->NeedsWriteBack) + { + JITDUMP("*** Block operation partially overlaps with source V%02u (%s). Write back is " + "necessary.\n", + srcLastRep->LclNum, srcLastRep->Description); + + result.AddStatement(Promotion::CreateWriteBack(m_compiler, srcLcl->GetLclNum(), *srcLastRep)); + srcLastRep->NeedsWriteBack = false; + } + + srcEndRep--; + } + } + } + + DecompositionPlan plan(m_compiler, dst, src, srcInvolvesReplacements); + + if (src->IsConstInitVal()) + { + InitFields(dst->AsLclVarCommon(), dstFirstRep, dstEndRep, &plan); + } + else + { + CopyBetweenFields(dst, dstFirstRep, dstEndRep, src, srcFirstRep, srcEndRep, &result, &plan); + } + + plan.Finalize(&result); + + *use = result.ToCommaTree(m_compiler); + m_madeChanges = true; + } + else + { + if (asg->gtGetOp2()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + GenTreeLclVarCommon* rhsLcl = asg->gtGetOp2()->AsLclVarCommon(); + unsigned size = rhsLcl->GetLayout(m_compiler)->GetSize(); + WriteBackBefore(&asg->gtOp2, rhsLcl->GetLclNum(), rhsLcl->GetLclOffs(), size); + } + + if (asg->gtGetOp1()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + GenTreeLclVarCommon* lhsLcl = asg->gtGetOp1()->AsLclVarCommon(); + unsigned size = lhsLcl->GetLayout(m_compiler)->GetSize(); + MarkForReadBack(lhsLcl->GetLclNum(), lhsLcl->GetLclOffs(), size); + } + } +} From 0888806b90be94b4aedc8b0d9e3a9b95266d2d6f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 3 May 2023 20:37:21 +0200 Subject: [PATCH 31/37] A few fixes --- src/coreclr/jit/promotiondecomposition.cpp | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/promotiondecomposition.cpp b/src/coreclr/jit/promotiondecomposition.cpp index 295801e0c9d1aa..261acc54e087ba 100644 --- a/src/coreclr/jit/promotiondecomposition.cpp +++ b/src/coreclr/jit/promotiondecomposition.cpp @@ -369,7 +369,7 @@ class DecompositionPlan //------------------------------------------------------------------------ // CopyBetweenReplacements: - // Add an entry specifying to copy from a promoted field into a replacement. + // Add an entry specifying to copy from a replacement into a promoted field. // // Parameters: // dstRep - The destination replacement. @@ -378,7 +378,7 @@ class DecompositionPlan // type - The type of copy. // // Remarks: - // Used when the source local is a regular promoted field. + // Used when the destination local is a regular promoted field. // void CopyBetweenReplacements(unsigned dstLcl, Replacement* srcRep, unsigned offset) { @@ -413,20 +413,6 @@ class DecompositionPlan m_entries.Push(Entry{BAD_VAR_NUM, nullptr, srcRep->LclNum, srcRep, offset, srcRep->AccessType}); } - //------------------------------------------------------------------------ - // CopyFromReplacement: - // Add an entry specifying to copy from a replacement local into the destination. - // - // Parameters: - // srcLcl - The source local to copy from. - // offset - The relative offset into the destination to write. - // type - The type of copy. - // - void CopyFromReplacement(unsigned srcLcl, unsigned offset, var_types type) - { - m_entries.Push(Entry{BAD_VAR_NUM, nullptr, srcLcl, nullptr, offset, type}); - } - //------------------------------------------------------------------------ // InitReplacement: // Add an entry specifying that a specified replacement local should be From 9641dbe0940e5731ad2f4c81eb503566f807b656 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 3 May 2023 20:37:52 +0200 Subject: [PATCH 32/37] Remove unnecessary change with GT_FIELD removal --- src/coreclr/jit/compiler.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index f96901a826ae87..4420a7662a36cc 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6150,11 +6150,8 @@ class Compiler bool gtTreeContainsOper(GenTree* tree, genTreeOps op); ExceptionSetFlags gtCollectExceptions(GenTree* tree); -public: bool fgIsBigOffset(size_t offset); -private: - bool fgNeedReturnSpillTemp(); /* From bd2021d1bfe0a875f165f389312c73e5f07a5914 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 3 May 2023 20:45:08 +0200 Subject: [PATCH 33/37] Fix after merge --- src/coreclr/jit/compiler.h | 2 + src/coreclr/jit/promotiondecomposition.cpp | 83 ++++++---------------- 2 files changed, 25 insertions(+), 60 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 5fc4b50e1f66c0..052cceda5f8da5 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6164,8 +6164,10 @@ class Compiler bool gtTreeContainsOper(GenTree* tree, genTreeOps op); ExceptionSetFlags gtCollectExceptions(GenTree* tree); +public: bool fgIsBigOffset(size_t offset); +private: bool fgNeedReturnSpillTemp(); /* diff --git a/src/coreclr/jit/promotiondecomposition.cpp b/src/coreclr/jit/promotiondecomposition.cpp index 261acc54e087ba..d5856237826934 100644 --- a/src/coreclr/jit/promotiondecomposition.cpp +++ b/src/coreclr/jit/promotiondecomposition.cpp @@ -822,8 +822,7 @@ class DecompositionPlan // void FinalizeCopy(DecompositionStatementList* statements) { - assert(m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD) && - m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD)); + assert(m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK) && m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK)); RemainderStrategy remainderStrategy = DetermineRemainderStrategy(); @@ -831,7 +830,7 @@ class DecompositionPlan // then avoid incurring multiple write barriers for each source // replacement that is a GC pointer -- write them back to the struct // first instead. - if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && m_dst->OperIs(GT_BLK, GT_FIELD) && + if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && m_dst->OperIs(GT_BLK) && m_dst->GetLayout(m_compiler)->HasGCPtr()) { for (int i = 0; i < m_entries.Height(); i++) @@ -854,31 +853,20 @@ class DecompositionPlan } } - GenTree* addr = nullptr; - unsigned addrBaseOffs = 0; - GenTreeFlags indirFlags = GTF_EMPTY; + GenTree* addr = nullptr; + GenTreeFlags indirFlags = GTF_EMPTY; - if (m_dst->OperIs(GT_BLK, GT_FIELD)) + if (m_dst->OperIs(GT_BLK)) { addr = m_dst->gtGetOp1(); - - if (m_dst->OperIs(GT_FIELD)) - { - addrBaseOffs = m_dst->AsField()->gtFldOffset; - } - - indirFlags = GetPropagatedIndirFlags(m_dst); + indirFlags = + m_dst->gtFlags & (GTF_IND_VOLATILE | GTF_IND_NONFAULTING | GTF_IND_UNALIGNED | GTF_IND_INITCLASS); } - else if (m_src->OperIs(GT_BLK, GT_FIELD)) + else if (m_src->OperIs(GT_BLK)) { addr = m_src->gtGetOp1(); - - if (m_src->OperIs(GT_FIELD)) - { - addrBaseOffs = m_src->AsField()->gtFldOffset; - } - - indirFlags = GetPropagatedIndirFlags(m_src); + indirFlags = + m_src->gtFlags & (GTF_IND_VOLATILE | GTF_IND_NONFAULTING | GTF_IND_UNALIGNED | GTF_IND_INITCLASS); } int numAddrUses = 0; @@ -918,7 +906,8 @@ class DecompositionPlan const Entry& entry = m_entries.BottomRef(0); assert((entry.FromLclNum == BAD_VAR_NUM) || (entry.ToLclNum == BAD_VAR_NUM)); - needsNullCheck = m_compiler->fgIsBigOffset(addrBaseOffs + entry.Offset); + needsNullCheck = m_compiler->fgIsBigOffset(entry.Offset); + break; } break; } @@ -981,16 +970,12 @@ class DecompositionPlan { // We will reuse the existing block op's operands. Rebase the // address off of the new local we created. - if (m_src->OperIs(GT_BLK, GT_FIELD)) + if (m_src->OperIs(GT_BLK)) { - // Note that we should use 0 instead of addrBaseOffs here - // since this ends up as the address of the GT_FIELD node - // that already has the field offset. m_src->AsUnOp()->gtOp1 = grabAddr(0); } - else if (m_dst->OperIs(GT_BLK, GT_FIELD)) + else if (m_dst->OperIs(GT_BLK)) { - // Like above, use 0 intentionally here. m_dst->AsUnOp()->gtOp1 = grabAddr(0); } } @@ -1013,7 +998,7 @@ class DecompositionPlan if (needsNullCheck) { - GenTreeIndir* indir = m_compiler->gtNewIndir(TYP_BYTE, grabAddr(addrBaseOffs)); + GenTreeIndir* indir = m_compiler->gtNewIndir(TYP_BYTE, grabAddr(0)); PropagateIndirFlags(indir, indirFlags); statements->AddStatement(indir); } @@ -1054,7 +1039,7 @@ class DecompositionPlan } else { - GenTree* addr = grabAddr(addrBaseOffs + entry.Offset); + GenTree* addr = grabAddr(entry.Offset); dst = m_compiler->gtNewIndir(entry.Type, addr); PropagateIndirFlags(dst, indirFlags); } @@ -1082,7 +1067,7 @@ class DecompositionPlan } else { - GenTree* addr = grabAddr(addrBaseOffs + entry.Offset); + GenTree* addr = grabAddr(entry.Offset); src = m_compiler->gtNewIndir(entry.Type, addr); PropagateIndirFlags(src, indirFlags); } @@ -1109,7 +1094,7 @@ class DecompositionPlan else { dst = m_compiler->gtNewIndir(remainderStrategy.PrimitiveType, - grabAddr(addrBaseOffs + remainderStrategy.PrimitiveOffset)); + grabAddr(remainderStrategy.PrimitiveOffset)); PropagateIndirFlags(dst, indirFlags); } @@ -1124,7 +1109,7 @@ class DecompositionPlan else { src = m_compiler->gtNewIndir(remainderStrategy.PrimitiveType, - grabAddr(addrBaseOffs + remainderStrategy.PrimitiveOffset)); + grabAddr(remainderStrategy.PrimitiveOffset)); PropagateIndirFlags(src, indirFlags); } @@ -1142,28 +1127,6 @@ class DecompositionPlan return (remainderStrategy.Type == RemainderStrategy::FullBlock) && (entry.FromReplacement != nullptr) && !entry.FromReplacement->NeedsWriteBack && (entry.ToLclNum == BAD_VAR_NUM); } - //------------------------------------------------------------------------ - // GetPropagatedIndirFlags: - // Convert GT_BLK or GT_FIELD indir flags into flags that should be - // propagated to derived GT_IND nodes. - // - // Parameters: - // indir - The indirection - // - // Returns: - // Flags to propagate to created derived GT_IND nodes. - // - GenTreeFlags GetPropagatedIndirFlags(GenTree* indir) - { - assert(indir->OperIs(GT_BLK, GT_FIELD)); - if (indir->OperIs(GT_BLK)) - { - return indir->gtFlags & (GTF_IND_VOLATILE | GTF_IND_NONFAULTING | GTF_IND_UNALIGNED | GTF_IND_INITCLASS); - } - - static_assert_no_msg(GTF_FLD_VOLATILE == GTF_IND_VOLATILE); - return indir->gtFlags & GTF_IND_VOLATILE; - } //------------------------------------------------------------------------ // PropagateIndirFlags: @@ -1291,7 +1254,7 @@ void ReplaceVisitor::CopyBetweenFields(GenTree* dst, DecompositionStatementList* statements, DecompositionPlan* plan) { - assert(src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD)); + assert(src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK)); GenTreeLclVarCommon* dstLcl = dst->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? dst->AsLclVarCommon() : nullptr; GenTreeLclVarCommon* srcLcl = src->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? src->AsLclVarCommon() : nullptr; @@ -1448,7 +1411,7 @@ void ReplaceVisitor::EliminateCommasInBlockOp(GenTreeOp* asg, DecompositionState { bool any = false; GenTree* lhs = asg->gtGetOp1(); - assert(lhs->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_FIELD, GT_IND, GT_BLK)); + assert(lhs->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_IND, GT_BLK)); GenTree* rhs = asg->gtGetOp2(); @@ -1463,7 +1426,7 @@ void ReplaceVisitor::EliminateCommasInBlockOp(GenTreeOp* asg, DecompositionState } else { - if (lhs->OperIsUnary() && rhs->OperIs(GT_COMMA)) + if (lhs->OperIsIndir() && rhs->OperIs(GT_COMMA)) { GenTree* addr = lhs->gtGetOp1(); // Note that GTF_GLOB_REF is not up to date here, hence we need @@ -1608,7 +1571,7 @@ void ReplaceVisitor::HandleAssignment(GenTree** use, GenTree* user) JITDUMP("Processing block operation [%06u] that involves replacements\n", Compiler::dspTreeID(asg)); - if (src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK, GT_FIELD) || src->IsConstInitVal()) + if (src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK) || src->IsConstInitVal()) { DecompositionStatementList result; EliminateCommasInBlockOp(asg, &result); From 39428e4c9898db0f9f235e52500733f3b2228f8f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 4 May 2023 12:17:25 +0200 Subject: [PATCH 34/37] Fix after merge --- src/coreclr/jit/promotion.cpp | 242 +++----- src/coreclr/jit/promotiondecomposition.cpp | 672 ++++++++++----------- 2 files changed, 410 insertions(+), 504 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index eb9793be19a0bf..8e7f197ec9e4b9 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -91,174 +91,6 @@ struct Access } }; -<<<<<<< HEAD -//------------------------------------------------------------------------ -// Replacement::Overlaps: -// Check if this replacement overlaps the specified range. -// -// Parameters: -// otherStart - Start of the other range. -// otherSize - Size of the other range. -// -// Returns: -// True if they overlap. -// -bool Replacement::Overlaps(unsigned otherStart, unsigned otherSize) const -{ - unsigned end = Offset + genTypeSize(AccessType); - if (end <= otherStart) - { - return false; - } - - unsigned otherEnd = otherStart + otherSize; - if (otherEnd <= Offset) - { - return false; - } - - return true; -} - -//------------------------------------------------------------------------ -// CreateWriteBack: -// Create IR that writes a replacement local's value back to its struct local: -// -// ASG -// LCL_FLD int V00 [+4] -// LCL_VAR int V01 -// -// Parameters: -// structLclNum - Struct local -// replacement - Information about the replacement -// -// Returns: -// IR node. -// -GenTree* Promotion::CreateWriteBack(Compiler* compiler, unsigned structLclNum, const Replacement& replacement) -{ - GenTree* dst = compiler->gtNewLclFldNode(structLclNum, replacement.AccessType, replacement.Offset); - GenTree* src = compiler->gtNewLclvNode(replacement.LclNum, genActualType(replacement.AccessType)); - GenTree* asg = compiler->gtNewAssignNode(dst, src); - return asg; -} - -//------------------------------------------------------------------------ -// CreateReadBack: -// Create IR that reads a replacement local's value back from its struct local: -// -// ASG -// LCL_VAR int V01 -// LCL_FLD int V00 [+4] -// -// Parameters: -// structLclNum - Struct local -// replacement - Information about the replacement -// -// Returns: -// IR node. -// -GenTree* Promotion::CreateReadBack(Compiler* compiler, unsigned structLclNum, const Replacement& replacement) -{ - GenTree* dst = compiler->gtNewLclvNode(replacement.LclNum, genActualType(replacement.AccessType)); - GenTree* src = compiler->gtNewLclFldNode(structLclNum, replacement.AccessType, replacement.Offset); - GenTree* asg = compiler->gtNewAssignNode(dst, src); - return asg; -} - -||||||| 458f3de2828 -//------------------------------------------------------------------------ -// BinarySearch: -// Find first entry with an equal offset, or bitwise complement of first -// entry with a higher offset. -// -// Parameters: -// vec - The vector to binary search in -// offset - The offset to search for -// -// Returns: -// Index of the first entry with an equal offset, or bitwise complement of -// first entry with a higher offset. -// -template -static size_t BinarySearch(const jitstd::vector& vec, unsigned offset) -{ - size_t min = 0; - size_t max = vec.size(); - while (min < max) - { - size_t mid = min + (max - min) / 2; - if (vec[mid].*field == offset) - { - while (mid > 0 && vec[mid - 1].*field == offset) - { - mid--; - } - - return mid; - } - if (vec[mid].*field < offset) - { - min = mid + 1; - } - else - { - max = mid; - } - } - - return ~min; -} - -// Represents a single replacement of a (field) access into a struct local. -struct Replacement -{ - unsigned Offset; - var_types AccessType; - unsigned LclNum; - // Is the replacement local (given by LclNum) fresher than the value in the struct local? - bool NeedsWriteBack = true; - // Is the value in the struct local fresher than the replacement local? - // Note that the invariant is that this is always false at the entrance to - // a basic block, i.e. all predecessors would have read the replacement - // back before transferring control if necessary. - bool NeedsReadBack = false; - // Arbitrary flag bit used e.g. by decomposition. Assumed to be false. - bool Handled = false; -#ifdef DEBUG - const char* Name; -#endif - - Replacement(unsigned offset, var_types accessType, unsigned lclNum DEBUGARG(const char* name)) - : Offset(offset) - , AccessType(accessType) - , LclNum(lclNum) -#ifdef DEBUG - , Name(name) -#endif - { - } - - bool Overlaps(unsigned otherStart, unsigned otherSize) const - { - unsigned end = Offset + genTypeSize(AccessType); - if (end <= otherStart) - { - return false; - } - - unsigned otherEnd = otherStart + otherSize; - if (otherEnd <= Offset) - { - return false; - } - - return true; - } -}; - -======= ->>>>>>> 2e17200fc6782beac0b63c290628dbf79ff13650 enum class AccessKindFlags : uint32_t { None = 0, @@ -758,6 +590,80 @@ class LocalsUseVisitor : public GenTreeVisitor } }; +//------------------------------------------------------------------------ +// Replacement::Overlaps: +// Check if this replacement overlaps the specified range. +// +// Parameters: +// otherStart - Start of the other range. +// otherSize - Size of the other range. +// +// Returns: +// True if they overlap. +// +bool Replacement::Overlaps(unsigned otherStart, unsigned otherSize) const +{ + unsigned end = Offset + genTypeSize(AccessType); + if (end <= otherStart) + { + return false; + } + + unsigned otherEnd = otherStart + otherSize; + if (otherEnd <= Offset) + { + return false; + } + + return true; +} + +//------------------------------------------------------------------------ +// CreateWriteBack: +// Create IR that writes a replacement local's value back to its struct local: +// +// ASG +// LCL_FLD int V00 [+4] +// LCL_VAR int V01 +// +// Parameters: +// structLclNum - Struct local +// replacement - Information about the replacement +// +// Returns: +// IR node. +// +GenTree* Promotion::CreateWriteBack(Compiler* compiler, unsigned structLclNum, const Replacement& replacement) +{ + GenTree* dst = compiler->gtNewLclFldNode(structLclNum, replacement.AccessType, replacement.Offset); + GenTree* src = compiler->gtNewLclvNode(replacement.LclNum, genActualType(replacement.AccessType)); + GenTree* asg = compiler->gtNewAssignNode(dst, src); + return asg; +} + +//------------------------------------------------------------------------ +// CreateReadBack: +// Create IR that reads a replacement local's value back from its struct local: +// +// ASG +// LCL_VAR int V01 +// LCL_FLD int V00 [+4] +// +// Parameters: +// structLclNum - Struct local +// replacement - Information about the replacement +// +// Returns: +// IR node. +// +GenTree* Promotion::CreateReadBack(Compiler* compiler, unsigned structLclNum, const Replacement& replacement) +{ + GenTree* dst = compiler->gtNewLclvNode(replacement.LclNum, genActualType(replacement.AccessType)); + GenTree* src = compiler->gtNewLclFldNode(structLclNum, replacement.AccessType, replacement.Offset); + GenTree* asg = compiler->gtNewAssignNode(dst, src); + return asg; +} + Compiler::fgWalkResult ReplaceVisitor::PostOrderVisit(GenTree** use, GenTree* user) { GenTree* tree = *use; diff --git a/src/coreclr/jit/promotiondecomposition.cpp b/src/coreclr/jit/promotiondecomposition.cpp index d5856237826934..75003a5a637952 100644 --- a/src/coreclr/jit/promotiondecomposition.cpp +++ b/src/coreclr/jit/promotiondecomposition.cpp @@ -1196,268 +1196,167 @@ class DecompositionPlan }; //------------------------------------------------------------------------ -// InitFields: -// Add entries into the plan specifying which replacements can be -// directly inited, and mark the other ones as requiring read back. +// HandleAssignment: +// Handle an assignment that may be between struct locals with replacements. // // Parameters: -// dst - Destination local that involves replacement. -// firstRep - The first replacement. -// endRep - End of the replacements. -// plan - Decomposition plan to add initialization entries into. +// asg - The assignment +// user - The user of the assignment. // -void ReplaceVisitor::InitFields(GenTreeLclVarCommon* dst, - Replacement* firstRep, - Replacement* endRep, - DecompositionPlan* plan) +void ReplaceVisitor::HandleAssignment(GenTree** use, GenTree* user) { - for (Replacement* rep = firstRep; rep < endRep; rep++) - { - if (!plan->CanInitPrimitive(rep->AccessType)) - { - JITDUMP(" Unsupported init of %s %s. Will init as struct and read back.\n", varTypeName(rep->AccessType), - rep->Description); - - // We will need to read this one back after initing the struct. - rep->NeedsWriteBack = false; - rep->NeedsReadBack = true; - continue; - } + GenTreeOp* asg = (*use)->AsOp(); - JITDUMP(" Init V%02u (%s)\n", rep->LclNum, rep->Description); - plan->InitReplacement(rep, rep->Offset - dst->GetLclOffs()); - rep->NeedsWriteBack = true; - rep->NeedsReadBack = false; + if (!asg->gtGetOp1()->TypeIs(TYP_STRUCT)) + { + return; } -} - -//------------------------------------------------------------------------ -// CopyBetweenFields: -// Copy between two struct locals that may involve replacements. -// -// Parameters: -// dst - Destination node -// dstFirstRep - First replacement of the destination or nullptr if destination is not a promoted local. -// dstEndRep - One past last replacement of the destination. -// src - Source node -// srcFirstRep - First replacement of the source or nullptr if source is not a promoted local. -// srcEndRep - One past last replacement of the source. -// statements - Statement list to add potential "init" statements to. -// plan - Data structure that tracks the specific copies to be done. -// -void ReplaceVisitor::CopyBetweenFields(GenTree* dst, - Replacement* dstFirstRep, - Replacement* dstEndRep, - GenTree* src, - Replacement* srcFirstRep, - Replacement* srcEndRep, - DecompositionStatementList* statements, - DecompositionPlan* plan) -{ - assert(src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK)); - GenTreeLclVarCommon* dstLcl = dst->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? dst->AsLclVarCommon() : nullptr; - GenTreeLclVarCommon* srcLcl = src->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? src->AsLclVarCommon() : nullptr; - unsigned dstBaseOffs = dstLcl != nullptr ? dstLcl->GetLclOffs() : 0; - unsigned srcBaseOffs = srcLcl != nullptr ? srcLcl->GetLclOffs() : 0; + GenTree* dst = asg->gtGetOp1(); + assert(!dst->OperIs(GT_COMMA)); + GenTree* src = asg->gtGetOp2()->gtEffectiveVal(); - LclVarDsc* dstDsc = dstLcl != nullptr ? m_compiler->lvaGetDesc(dstLcl) : nullptr; - LclVarDsc* srcDsc = srcLcl != nullptr ? m_compiler->lvaGetDesc(srcLcl) : nullptr; + GenTreeLclVarCommon* dstLcl = dst->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? dst->AsLclVarCommon() : nullptr; + GenTreeLclVarCommon* srcLcl = src->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? src->AsLclVarCommon() : nullptr; - Replacement* dstRep = dstFirstRep; - Replacement* srcRep = srcFirstRep; + Replacement* dstFirstRep = nullptr; + Replacement* dstEndRep = nullptr; + bool dstInvolvesReplacements = (dstLcl != nullptr) && OverlappingReplacements(dstLcl, &dstFirstRep, &dstEndRep); + Replacement* srcFirstRep = nullptr; + Replacement* srcEndRep = nullptr; + bool srcInvolvesReplacements = (srcLcl != nullptr) && OverlappingReplacements(srcLcl, &srcFirstRep, &srcEndRep); - while ((dstRep < dstEndRep) || (srcRep < srcEndRep)) + if (!dstInvolvesReplacements && !srcInvolvesReplacements) { - if ((srcRep < srcEndRep) && srcRep->NeedsReadBack) - { - JITDUMP(" Source replacement V%02u (%s) is stale. Will read it back before copy.\n", srcRep->LclNum, - srcRep->Description); + return; + } - assert(srcLcl != nullptr); - statements->AddStatement(Promotion::CreateReadBack(m_compiler, srcLcl->GetLclNum(), *srcRep)); - srcRep->NeedsReadBack = false; - assert(!srcRep->NeedsWriteBack); - } + JITDUMP("Processing block operation [%06u] that involves replacements\n", Compiler::dspTreeID(asg)); - if ((dstRep < dstEndRep) && (srcRep < srcEndRep)) + if (src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK) || src->IsConstInitVal()) + { + DecompositionStatementList result; + EliminateCommasInBlockOp(asg, &result); + + if (dstInvolvesReplacements) { - if (srcRep->Offset - srcBaseOffs + genTypeSize(srcRep->AccessType) < dstRep->Offset - dstBaseOffs) + unsigned dstLclOffs = dstLcl->GetLclOffs(); + unsigned dstLclSize = dstLcl->GetLayout(m_compiler)->GetSize(); + if (dstFirstRep->Offset < dstLclOffs) { - // This source replacement ends before the next destination replacement starts. - // Write it directly to the destination struct local. - unsigned offs = srcRep->Offset - srcBaseOffs; - plan->CopyFromReplacement(srcRep, offs); - JITDUMP(" dst+%03u <- V%02u (%s)\n", offs, srcRep->LclNum, srcRep->Description); - srcRep++; - continue; - } + if (dstFirstRep->NeedsWriteBack) + { + JITDUMP("*** Block operation partially overlaps with destination V%02u (%s). Write and " + "read-backs are " + "necessary.\n", + dstFirstRep->LclNum, dstFirstRep->Description); + // The value of the replacement will be partially assembled from its old value and this struct + // operation. + // We accomplish this by an initial write back, the struct copy, followed by a later read back. + // TODO-CQ: This is very expensive and unreflected in heuristics, but it is also very rare. + result.AddStatement(Promotion::CreateWriteBack(m_compiler, dstLcl->GetLclNum(), *dstFirstRep)); - if (dstRep->Offset - dstBaseOffs + genTypeSize(dstRep->AccessType) < srcRep->Offset - srcBaseOffs) - { - // Destination replacement ends before the next source replacement starts. - // Read it directly from the source struct local. - unsigned offs = dstRep->Offset - dstBaseOffs; - plan->CopyToReplacement(dstRep, offs); - JITDUMP(" V%02u (%s) <- src+%03u\n", dstRep->LclNum, dstRep->Description, offs); - dstRep->NeedsWriteBack = true; - dstRep->NeedsReadBack = false; - dstRep++; - continue; + dstFirstRep->NeedsWriteBack = false; + } + + dstFirstRep->NeedsReadBack = true; + dstFirstRep++; } - // Overlap. Check for exact match of replacements. - // TODO-CQ: Allow copies between small types of different signs, and between TYP_I_IMPL/TYP_BYREF? - if (((dstRep->Offset - dstBaseOffs) == (srcRep->Offset - srcBaseOffs)) && - (dstRep->AccessType == srcRep->AccessType)) + if (dstEndRep > dstFirstRep) { - plan->CopyBetweenReplacements(dstRep, srcRep, dstRep->Offset - dstBaseOffs); - JITDUMP(" V%02u (%s) <- V%02u (%s)\n", dstRep->LclNum, dstRep->Description, srcRep->LclNum, - srcRep->Description); + Replacement* dstLastRep = dstEndRep - 1; + if (dstLastRep->Offset + genTypeSize(dstLastRep->AccessType) > dstLclOffs + dstLclSize) + { + if (dstLastRep->NeedsWriteBack) + { + JITDUMP("*** Block operation partially overlaps with destination V%02u (%s). Write and " + "read-backs are " + "necessary.\n", + dstLastRep->LclNum, dstLastRep->Description); + result.AddStatement(Promotion::CreateWriteBack(m_compiler, dstLcl->GetLclNum(), *dstLastRep)); - dstRep->NeedsWriteBack = true; - dstRep->NeedsReadBack = false; - dstRep++; - srcRep++; - continue; - } + dstLastRep->NeedsWriteBack = false; + } - // Partial overlap. Write source back to the struct local. We - // will handle the destination replacement in a future - // iteration of the loop. - statements->AddStatement(Promotion::CreateWriteBack(m_compiler, srcLcl->GetLclNum(), *srcRep)); - JITDUMP(" Partial overlap of V%02u (%s) <- V%02u (%s). Will read source back before copy\n", - dstRep->LclNum, dstRep->Description, srcRep->LclNum, srcRep->Description); - srcRep++; - continue; + dstLastRep->NeedsReadBack = true; + dstEndRep--; + } + } } - if (dstRep < dstEndRep) + if (srcInvolvesReplacements) { - unsigned offs = dstRep->Offset - dstBaseOffs; + unsigned srcLclOffs = srcLcl->GetLclOffs(); + unsigned srcLclSize = srcLcl->GetLayout(m_compiler)->GetSize(); - if ((srcDsc != nullptr) && srcDsc->lvPromoted) + if (srcFirstRep->Offset < srcLclOffs) { - unsigned srcOffs = srcLcl->GetLclOffs() + offs; - unsigned fieldLcl = m_compiler->lvaGetFieldLocal(srcDsc, srcOffs); - - if (fieldLcl != BAD_VAR_NUM) + if (srcFirstRep->NeedsWriteBack) { - LclVarDsc* dsc = m_compiler->lvaGetDesc(fieldLcl); - if (dsc->lvType == dstRep->AccessType) - { - plan->CopyBetweenReplacements(dstRep, fieldLcl, offs); - JITDUMP(" V%02u (%s) <- V%02u (%s)\n", dstRep->LclNum, dstRep->Description, dsc->lvReason); - dstRep->NeedsWriteBack = true; - dstRep->NeedsReadBack = false; - dstRep++; - continue; - } + JITDUMP("*** Block operation partially overlaps with source V%02u (%s). Write back is necessary.\n", + srcFirstRep->LclNum, srcFirstRep->Description); + + result.AddStatement(Promotion::CreateWriteBack(m_compiler, srcLcl->GetLclNum(), *srcFirstRep)); + + srcFirstRep->NeedsWriteBack = false; } + + srcFirstRep++; } - // TODO-CQ: If the source is promoted then this will result in - // DNER'ing it. Alternatively we could copy the promoted field - // directly to the destination's struct local and mark the - // overlapping fields as needing read back to avoid this DNER. - plan->CopyToReplacement(dstRep, offs); - JITDUMP(" V%02u (%s) <- src+%03u\n", dstRep->LclNum, dstRep->Description, offs); - dstRep->NeedsWriteBack = true; - dstRep->NeedsReadBack = false; - dstRep++; - } - else - { - assert(srcRep < srcEndRep); - unsigned offs = srcRep->Offset - srcBaseOffs; - if ((dstDsc != nullptr) && dstDsc->lvPromoted) + if (srcEndRep > srcFirstRep) { - unsigned dstOffs = dstLcl->GetLclOffs() + offs; - unsigned fieldLcl = m_compiler->lvaGetFieldLocal(dstDsc, dstOffs); - - if (fieldLcl != BAD_VAR_NUM) + Replacement* srcLastRep = srcEndRep - 1; + if (srcLastRep->Offset + genTypeSize(srcLastRep->AccessType) > srcLclOffs + srcLclSize) { - LclVarDsc* dsc = m_compiler->lvaGetDesc(fieldLcl); - if (dsc->lvType == srcRep->AccessType) + if (srcLastRep->NeedsWriteBack) { - plan->CopyBetweenReplacements(fieldLcl, srcRep, offs); - JITDUMP(" V%02u (%s) <- V%02u (%s)\n", fieldLcl, dsc->lvReason, srcRep->LclNum, - srcRep->Description); - srcRep++; - continue; + JITDUMP("*** Block operation partially overlaps with source V%02u (%s). Write back is " + "necessary.\n", + srcLastRep->LclNum, srcLastRep->Description); + + result.AddStatement(Promotion::CreateWriteBack(m_compiler, srcLcl->GetLclNum(), *srcLastRep)); + srcLastRep->NeedsWriteBack = false; } + + srcEndRep--; } } - - plan->CopyFromReplacement(srcRep, offs); - JITDUMP(" dst+%03u <- V%02u (%s)\n", offs, srcRep->LclNum, srcRep->Description); - srcRep++; } - } -} - -//------------------------------------------------------------------------ -// EliminateCommasInBlockOp: -// Ensure that the sources of a block op are not commas by extracting side effects. -// -// Parameters: -// asg - The block op -// result - Statement list to add resulting statements to. -// -// Remarks: -// Works similarly to MorphInitBlockHelper::EliminateCommas. -// -void ReplaceVisitor::EliminateCommasInBlockOp(GenTreeOp* asg, DecompositionStatementList* result) -{ - bool any = false; - GenTree* lhs = asg->gtGetOp1(); - assert(lhs->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_IND, GT_BLK)); - GenTree* rhs = asg->gtGetOp2(); + DecompositionPlan plan(m_compiler, dst, src, srcInvolvesReplacements); - if (asg->IsReverseOp()) - { - while (rhs->OperIs(GT_COMMA)) + if (src->IsConstInitVal()) { - result->AddStatement(rhs->gtGetOp1()); - rhs = rhs->gtGetOp2(); - any = true; + InitFields(dst->AsLclVarCommon(), dstFirstRep, dstEndRep, &plan); } + else + { + CopyBetweenFields(dst, dstFirstRep, dstEndRep, src, srcFirstRep, srcEndRep, &result, &plan); + } + + plan.Finalize(&result); + + *use = result.ToCommaTree(m_compiler); + m_madeChanges = true; } else { - if (lhs->OperIsIndir() && rhs->OperIs(GT_COMMA)) + if (asg->gtGetOp2()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) { - GenTree* addr = lhs->gtGetOp1(); - // Note that GTF_GLOB_REF is not up to date here, hence we need - // a tree walk to find address exposed locals. - if (((addr->gtFlags & GTF_ALL_EFFECT) != 0) || (((rhs->gtFlags & GTF_ASG) != 0) && !addr->IsInvariant()) || - m_compiler->gtHasAddressExposedLocals(addr)) - { - unsigned lhsAddrLclNum = m_compiler->lvaGrabTemp(true DEBUGARG("Block morph LHS addr")); - - result->AddStatement(m_compiler->gtNewTempAssign(lhsAddrLclNum, addr)); - lhs->AsUnOp()->gtOp1 = m_compiler->gtNewLclvNode(lhsAddrLclNum, genActualType(addr)); - m_compiler->gtUpdateNodeSideEffects(lhs); - m_madeChanges = true; - any = true; - } + GenTreeLclVarCommon* rhsLcl = asg->gtGetOp2()->AsLclVarCommon(); + unsigned size = rhsLcl->GetLayout(m_compiler)->GetSize(); + WriteBackBefore(&asg->gtOp2, rhsLcl->GetLclNum(), rhsLcl->GetLclOffs(), size); } - while (rhs->OperIs(GT_COMMA)) + if (asg->gtGetOp1()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) { - result->AddStatement(rhs->gtGetOp1()); - rhs = rhs->gtGetOp2(); - any = true; + GenTreeLclVarCommon* lhsLcl = asg->gtGetOp1()->AsLclVarCommon(); + unsigned size = lhsLcl->GetLayout(m_compiler)->GetSize(); + MarkForReadBack(lhsLcl->GetLclNum(), lhsLcl->GetLclOffs(), size); } } - - if (any) - { - asg->gtOp2 = rhs; - m_compiler->gtUpdateNodeSideEffects(asg); - m_madeChanges = true; - } } //------------------------------------------------------------------------ @@ -1530,169 +1429,270 @@ bool ReplaceVisitor::OverlappingReplacements(GenTreeLclVarCommon* lcl, *endReplacement = replacements.data() + lastIndex; } - return true; + return true; +} + +//------------------------------------------------------------------------ +// EliminateCommasInBlockOp: +// Ensure that the sources of a block op are not commas by extracting side effects. +// +// Parameters: +// asg - The block op +// result - Statement list to add resulting statements to. +// +// Remarks: +// Works similarly to MorphInitBlockHelper::EliminateCommas. +// +void ReplaceVisitor::EliminateCommasInBlockOp(GenTreeOp* asg, DecompositionStatementList* result) +{ + bool any = false; + GenTree* lhs = asg->gtGetOp1(); + assert(lhs->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_IND, GT_BLK)); + + GenTree* rhs = asg->gtGetOp2(); + + if (asg->IsReverseOp()) + { + while (rhs->OperIs(GT_COMMA)) + { + result->AddStatement(rhs->gtGetOp1()); + rhs = rhs->gtGetOp2(); + any = true; + } + } + else + { + if (lhs->OperIsIndir() && rhs->OperIs(GT_COMMA)) + { + GenTree* addr = lhs->gtGetOp1(); + // Note that GTF_GLOB_REF is not up to date here, hence we need + // a tree walk to find address exposed locals. + if (((addr->gtFlags & GTF_ALL_EFFECT) != 0) || (((rhs->gtFlags & GTF_ASG) != 0) && !addr->IsInvariant()) || + m_compiler->gtHasAddressExposedLocals(addr)) + { + unsigned lhsAddrLclNum = m_compiler->lvaGrabTemp(true DEBUGARG("Block morph LHS addr")); + + result->AddStatement(m_compiler->gtNewTempAssign(lhsAddrLclNum, addr)); + lhs->AsUnOp()->gtOp1 = m_compiler->gtNewLclvNode(lhsAddrLclNum, genActualType(addr)); + m_compiler->gtUpdateNodeSideEffects(lhs); + m_madeChanges = true; + any = true; + } + } + + while (rhs->OperIs(GT_COMMA)) + { + result->AddStatement(rhs->gtGetOp1()); + rhs = rhs->gtGetOp2(); + any = true; + } + } + + if (any) + { + asg->gtOp2 = rhs; + m_compiler->gtUpdateNodeSideEffects(asg); + m_madeChanges = true; + } +} + +//------------------------------------------------------------------------ +// InitFields: +// Add entries into the plan specifying which replacements can be +// directly inited, and mark the other ones as requiring read back. +// +// Parameters: +// dst - Destination local that involves replacement. +// firstRep - The first replacement. +// endRep - End of the replacements. +// plan - Decomposition plan to add initialization entries into. +// +void ReplaceVisitor::InitFields(GenTreeLclVarCommon* dst, + Replacement* firstRep, + Replacement* endRep, + DecompositionPlan* plan) +{ + for (Replacement* rep = firstRep; rep < endRep; rep++) + { + if (!plan->CanInitPrimitive(rep->AccessType)) + { + JITDUMP(" Unsupported init of %s %s. Will init as struct and read back.\n", varTypeName(rep->AccessType), + rep->Description); + + // We will need to read this one back after initing the struct. + rep->NeedsWriteBack = false; + rep->NeedsReadBack = true; + continue; + } + + JITDUMP(" Init V%02u (%s)\n", rep->LclNum, rep->Description); + plan->InitReplacement(rep, rep->Offset - dst->GetLclOffs()); + rep->NeedsWriteBack = true; + rep->NeedsReadBack = false; + } } //------------------------------------------------------------------------ -// HandleAssignment: -// Handle an assignment that may be between struct locals with replacements. +// CopyBetweenFields: +// Copy between two struct locals that may involve replacements. // // Parameters: -// asg - The assignment -// user - The user of the assignment. +// dst - Destination node +// dstFirstRep - First replacement of the destination or nullptr if destination is not a promoted local. +// dstEndRep - One past last replacement of the destination. +// src - Source node +// srcFirstRep - First replacement of the source or nullptr if source is not a promoted local. +// srcEndRep - One past last replacement of the source. +// statements - Statement list to add potential "init" statements to. +// plan - Data structure that tracks the specific copies to be done. // -void ReplaceVisitor::HandleAssignment(GenTree** use, GenTree* user) +void ReplaceVisitor::CopyBetweenFields(GenTree* dst, + Replacement* dstFirstRep, + Replacement* dstEndRep, + GenTree* src, + Replacement* srcFirstRep, + Replacement* srcEndRep, + DecompositionStatementList* statements, + DecompositionPlan* plan) { - GenTreeOp* asg = (*use)->AsOp(); - - if (!asg->gtGetOp1()->TypeIs(TYP_STRUCT)) - { - return; - } + assert(src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK)); - GenTree* dst = asg->gtGetOp1(); - assert(!dst->OperIs(GT_COMMA)); - GenTree* src = asg->gtGetOp2()->gtEffectiveVal(); + GenTreeLclVarCommon* dstLcl = dst->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? dst->AsLclVarCommon() : nullptr; + GenTreeLclVarCommon* srcLcl = src->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? src->AsLclVarCommon() : nullptr; + unsigned dstBaseOffs = dstLcl != nullptr ? dstLcl->GetLclOffs() : 0; + unsigned srcBaseOffs = srcLcl != nullptr ? srcLcl->GetLclOffs() : 0; - GenTreeLclVarCommon* dstLcl = dst->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? dst->AsLclVarCommon() : nullptr; - GenTreeLclVarCommon* srcLcl = src->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? src->AsLclVarCommon() : nullptr; + LclVarDsc* dstDsc = dstLcl != nullptr ? m_compiler->lvaGetDesc(dstLcl) : nullptr; + LclVarDsc* srcDsc = srcLcl != nullptr ? m_compiler->lvaGetDesc(srcLcl) : nullptr; - Replacement* dstFirstRep = nullptr; - Replacement* dstEndRep = nullptr; - bool dstInvolvesReplacements = (dstLcl != nullptr) && OverlappingReplacements(dstLcl, &dstFirstRep, &dstEndRep); - Replacement* srcFirstRep = nullptr; - Replacement* srcEndRep = nullptr; - bool srcInvolvesReplacements = (srcLcl != nullptr) && OverlappingReplacements(srcLcl, &srcFirstRep, &srcEndRep); + Replacement* dstRep = dstFirstRep; + Replacement* srcRep = srcFirstRep; - if (!dstInvolvesReplacements && !srcInvolvesReplacements) + while ((dstRep < dstEndRep) || (srcRep < srcEndRep)) { - return; - } - - JITDUMP("Processing block operation [%06u] that involves replacements\n", Compiler::dspTreeID(asg)); + if ((srcRep < srcEndRep) && srcRep->NeedsReadBack) + { + JITDUMP(" Source replacement V%02u (%s) is stale. Will read it back before copy.\n", srcRep->LclNum, + srcRep->Description); - if (src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_BLK) || src->IsConstInitVal()) - { - DecompositionStatementList result; - EliminateCommasInBlockOp(asg, &result); + assert(srcLcl != nullptr); + statements->AddStatement(Promotion::CreateReadBack(m_compiler, srcLcl->GetLclNum(), *srcRep)); + srcRep->NeedsReadBack = false; + assert(!srcRep->NeedsWriteBack); + } - if (dstInvolvesReplacements) + if ((dstRep < dstEndRep) && (srcRep < srcEndRep)) { - unsigned dstLclOffs = dstLcl->GetLclOffs(); - unsigned dstLclSize = dstLcl->GetLayout(m_compiler)->GetSize(); - if (dstFirstRep->Offset < dstLclOffs) + if (srcRep->Offset - srcBaseOffs + genTypeSize(srcRep->AccessType) < dstRep->Offset - dstBaseOffs) { - if (dstFirstRep->NeedsWriteBack) - { - JITDUMP("*** Block operation partially overlaps with destination V%02u (%s). Write and " - "read-backs are " - "necessary.\n", - dstFirstRep->LclNum, dstFirstRep->Description); - // The value of the replacement will be partially assembled from its old value and this struct - // operation. - // We accomplish this by an initial write back, the struct copy, followed by a later read back. - // TODO-CQ: This is very expensive and unreflected in heuristics, but it is also very rare. - result.AddStatement(Promotion::CreateWriteBack(m_compiler, dstLcl->GetLclNum(), *dstFirstRep)); - - dstFirstRep->NeedsWriteBack = false; - } - - dstFirstRep->NeedsReadBack = true; - dstFirstRep++; + // This source replacement ends before the next destination replacement starts. + // Write it directly to the destination struct local. + unsigned offs = srcRep->Offset - srcBaseOffs; + plan->CopyFromReplacement(srcRep, offs); + JITDUMP(" dst+%03u <- V%02u (%s)\n", offs, srcRep->LclNum, srcRep->Description); + srcRep++; + continue; } - if (dstEndRep > dstFirstRep) + if (dstRep->Offset - dstBaseOffs + genTypeSize(dstRep->AccessType) < srcRep->Offset - srcBaseOffs) { - Replacement* dstLastRep = dstEndRep - 1; - if (dstLastRep->Offset + genTypeSize(dstLastRep->AccessType) > dstLclOffs + dstLclSize) - { - if (dstLastRep->NeedsWriteBack) - { - JITDUMP("*** Block operation partially overlaps with destination V%02u (%s). Write and " - "read-backs are " - "necessary.\n", - dstLastRep->LclNum, dstLastRep->Description); - result.AddStatement(Promotion::CreateWriteBack(m_compiler, dstLcl->GetLclNum(), *dstLastRep)); + // Destination replacement ends before the next source replacement starts. + // Read it directly from the source struct local. + unsigned offs = dstRep->Offset - dstBaseOffs; + plan->CopyToReplacement(dstRep, offs); + JITDUMP(" V%02u (%s) <- src+%03u\n", dstRep->LclNum, dstRep->Description, offs); + dstRep->NeedsWriteBack = true; + dstRep->NeedsReadBack = false; + dstRep++; + continue; + } - dstLastRep->NeedsWriteBack = false; - } + // Overlap. Check for exact match of replacements. + // TODO-CQ: Allow copies between small types of different signs, and between TYP_I_IMPL/TYP_BYREF? + if (((dstRep->Offset - dstBaseOffs) == (srcRep->Offset - srcBaseOffs)) && + (dstRep->AccessType == srcRep->AccessType)) + { + plan->CopyBetweenReplacements(dstRep, srcRep, dstRep->Offset - dstBaseOffs); + JITDUMP(" V%02u (%s) <- V%02u (%s)\n", dstRep->LclNum, dstRep->Description, srcRep->LclNum, + srcRep->Description); - dstLastRep->NeedsReadBack = true; - dstEndRep--; - } + dstRep->NeedsWriteBack = true; + dstRep->NeedsReadBack = false; + dstRep++; + srcRep++; + continue; } + + // Partial overlap. Write source back to the struct local. We + // will handle the destination replacement in a future + // iteration of the loop. + statements->AddStatement(Promotion::CreateWriteBack(m_compiler, srcLcl->GetLclNum(), *srcRep)); + JITDUMP(" Partial overlap of V%02u (%s) <- V%02u (%s). Will read source back before copy\n", + dstRep->LclNum, dstRep->Description, srcRep->LclNum, srcRep->Description); + srcRep++; + continue; } - if (srcInvolvesReplacements) + if (dstRep < dstEndRep) { - unsigned srcLclOffs = srcLcl->GetLclOffs(); - unsigned srcLclSize = srcLcl->GetLayout(m_compiler)->GetSize(); + unsigned offs = dstRep->Offset - dstBaseOffs; - if (srcFirstRep->Offset < srcLclOffs) + if ((srcDsc != nullptr) && srcDsc->lvPromoted) { - if (srcFirstRep->NeedsWriteBack) - { - JITDUMP("*** Block operation partially overlaps with source V%02u (%s). Write back is necessary.\n", - srcFirstRep->LclNum, srcFirstRep->Description); - - result.AddStatement(Promotion::CreateWriteBack(m_compiler, srcLcl->GetLclNum(), *srcFirstRep)); - - srcFirstRep->NeedsWriteBack = false; - } - - srcFirstRep++; - } + unsigned srcOffs = srcLcl->GetLclOffs() + offs; + unsigned fieldLcl = m_compiler->lvaGetFieldLocal(srcDsc, srcOffs); - if (srcEndRep > srcFirstRep) - { - Replacement* srcLastRep = srcEndRep - 1; - if (srcLastRep->Offset + genTypeSize(srcLastRep->AccessType) > srcLclOffs + srcLclSize) + if (fieldLcl != BAD_VAR_NUM) { - if (srcLastRep->NeedsWriteBack) + LclVarDsc* dsc = m_compiler->lvaGetDesc(fieldLcl); + if (dsc->lvType == dstRep->AccessType) { - JITDUMP("*** Block operation partially overlaps with source V%02u (%s). Write back is " - "necessary.\n", - srcLastRep->LclNum, srcLastRep->Description); - - result.AddStatement(Promotion::CreateWriteBack(m_compiler, srcLcl->GetLclNum(), *srcLastRep)); - srcLastRep->NeedsWriteBack = false; + plan->CopyBetweenReplacements(dstRep, fieldLcl, offs); + JITDUMP(" V%02u (%s) <- V%02u (%s)\n", dstRep->LclNum, dstRep->Description, dsc->lvReason); + dstRep->NeedsWriteBack = true; + dstRep->NeedsReadBack = false; + dstRep++; + continue; } - - srcEndRep--; } } - } - DecompositionPlan plan(m_compiler, dst, src, srcInvolvesReplacements); - - if (src->IsConstInitVal()) - { - InitFields(dst->AsLclVarCommon(), dstFirstRep, dstEndRep, &plan); + // TODO-CQ: If the source is promoted then this will result in + // DNER'ing it. Alternatively we could copy the promoted field + // directly to the destination's struct local and mark the + // overlapping fields as needing read back to avoid this DNER. + plan->CopyToReplacement(dstRep, offs); + JITDUMP(" V%02u (%s) <- src+%03u\n", dstRep->LclNum, dstRep->Description, offs); + dstRep->NeedsWriteBack = true; + dstRep->NeedsReadBack = false; + dstRep++; } else { - CopyBetweenFields(dst, dstFirstRep, dstEndRep, src, srcFirstRep, srcEndRep, &result, &plan); - } - - plan.Finalize(&result); + assert(srcRep < srcEndRep); + unsigned offs = srcRep->Offset - srcBaseOffs; + if ((dstDsc != nullptr) && dstDsc->lvPromoted) + { + unsigned dstOffs = dstLcl->GetLclOffs() + offs; + unsigned fieldLcl = m_compiler->lvaGetFieldLocal(dstDsc, dstOffs); - *use = result.ToCommaTree(m_compiler); - m_madeChanges = true; - } - else - { - if (asg->gtGetOp2()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - GenTreeLclVarCommon* rhsLcl = asg->gtGetOp2()->AsLclVarCommon(); - unsigned size = rhsLcl->GetLayout(m_compiler)->GetSize(); - WriteBackBefore(&asg->gtOp2, rhsLcl->GetLclNum(), rhsLcl->GetLclOffs(), size); - } + if (fieldLcl != BAD_VAR_NUM) + { + LclVarDsc* dsc = m_compiler->lvaGetDesc(fieldLcl); + if (dsc->lvType == srcRep->AccessType) + { + plan->CopyBetweenReplacements(fieldLcl, srcRep, offs); + JITDUMP(" V%02u (%s) <- V%02u (%s)\n", fieldLcl, dsc->lvReason, srcRep->LclNum, + srcRep->Description); + srcRep++; + continue; + } + } + } - if (asg->gtGetOp1()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - GenTreeLclVarCommon* lhsLcl = asg->gtGetOp1()->AsLclVarCommon(); - unsigned size = lhsLcl->GetLayout(m_compiler)->GetSize(); - MarkForReadBack(lhsLcl->GetLclNum(), lhsLcl->GetLclOffs(), size); + plan->CopyFromReplacement(srcRep, offs); + JITDUMP(" dst+%03u <- V%02u (%s)\n", offs, srcRep->LclNum, srcRep->Description); + srcRep++; } } } From c7a9f8d47c9e0a9f3f25c3cf426afd1417ded0a4 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 4 May 2023 13:14:09 +0200 Subject: [PATCH 35/37] Clean up a bit --- src/coreclr/jit/promotion.cpp | 2 + src/coreclr/jit/promotiondecomposition.cpp | 130 ++++++++++----------- 2 files changed, 62 insertions(+), 70 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 8e7f197ec9e4b9..1ece87d7c3b50d 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -627,6 +627,7 @@ bool Replacement::Overlaps(unsigned otherStart, unsigned otherSize) const // LCL_VAR int V01 // // Parameters: +// compiler - Compiler instance // structLclNum - Struct local // replacement - Information about the replacement // @@ -650,6 +651,7 @@ GenTree* Promotion::CreateWriteBack(Compiler* compiler, unsigned structLclNum, c // LCL_FLD int V00 [+4] // // Parameters: +// compiler - Compiler instance // structLclNum - Struct local // replacement - Information about the replacement // diff --git a/src/coreclr/jit/promotiondecomposition.cpp b/src/coreclr/jit/promotiondecomposition.cpp index 75003a5a637952..a71a2fc4b6d4f2 100644 --- a/src/coreclr/jit/promotiondecomposition.cpp +++ b/src/coreclr/jit/promotiondecomposition.cpp @@ -665,8 +665,6 @@ class DecompositionPlan StructSegments::Segment segment; // See if we can "plug the hole" with a single primitive. - // TODO-CQ: Why does doing this for LCL_VAR result in so many regressions? - // TODO-CQ: Once we have liveness we can unlock this for LCL_VARs. if (remainder.IsSingleSegment(&segment)) { var_types primitiveType = TYP_UNDEF; @@ -730,10 +728,12 @@ class DecompositionPlan { const Entry& entry = m_entries.BottomRef(i); - assert(entry.ToLclNum != BAD_VAR_NUM); + assert((entry.ToLclNum != BAD_VAR_NUM) && (entry.ToReplacement != nullptr)); GenTree* src = CreateInitValue(entry.Type, initPattern); GenTree* dst = m_compiler->gtNewLclvNode(entry.ToLclNum, entry.Type); statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); + entry.ToReplacement->NeedsWriteBack = true; + entry.ToReplacement->NeedsReadBack = false; } RemainderStrategy remainderStrategy = DetermineRemainderStrategy(); @@ -981,7 +981,7 @@ class DecompositionPlan } // If the source involves replacements then do the struct op first -- - // otherwise we would overwrite the destination with stale bits. + // we would overwrite the destination with stale bits if we did it last. // If the source does not involve replacements then CQ analysis shows // that it's best to do it last. if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && m_srcInvolvesReplacements) @@ -1009,6 +1009,7 @@ class DecompositionPlan if (IsHandledByRemainder(entry, remainderStrategy)) { + assert(entry.FromReplacement != nullptr); JITDUMP(" Skipping dst+%03u <- V%02u (%s); it is up-to-date in its struct local and will be handled " "as part of the remainder\n", entry.Offset, entry.FromReplacement->LclNum, entry.FromReplacement->Description); @@ -1023,26 +1024,21 @@ class DecompositionPlan if (m_compiler->lvaGetDesc(entry.ToLclNum)->lvIsStructField) UpdateEarlyRefCount(m_compiler, dst); } + else if (m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + unsigned offs = m_dst->AsLclVarCommon()->GetLclOffs() + entry.Offset; + // Local morph ensures we do not see local indirs here that dereference beyond UINT16_MAX. + noway_assert(FitsIn(offs)); + dst = m_compiler->gtNewLclFldNode(m_dst->AsLclVarCommon()->GetLclNum(), entry.Type, offs); + m_compiler->lvaSetVarDoNotEnregister(m_dst->AsLclVarCommon()->GetLclNum() + DEBUGARG(DoNotEnregisterReason::LocalField)); + UpdateEarlyRefCount(m_compiler, dst); + } else { - assert(entry.FromLclNum != BAD_VAR_NUM); - - if (m_dst->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - unsigned offs = m_dst->AsLclVarCommon()->GetLclOffs() + entry.Offset; - // Local morph ensures we do not see local indirs here that dereference beyond UINT16_MAX. - noway_assert(FitsIn(offs)); - dst = m_compiler->gtNewLclFldNode(m_dst->AsLclVarCommon()->GetLclNum(), entry.Type, offs); - m_compiler->lvaSetVarDoNotEnregister(m_dst->AsLclVarCommon()->GetLclNum() - DEBUGARG(DoNotEnregisterReason::LocalField)); - UpdateEarlyRefCount(m_compiler, dst); - } - else - { - GenTree* addr = grabAddr(entry.Offset); - dst = m_compiler->gtNewIndir(entry.Type, addr); - PropagateIndirFlags(dst, indirFlags); - } + GenTree* addr = grabAddr(entry.Offset); + dst = m_compiler->gtNewIndir(entry.Type, addr); + PropagateIndirFlags(dst, indirFlags); } GenTree* src; @@ -1053,27 +1049,28 @@ class DecompositionPlan if (m_compiler->lvaGetDesc(entry.FromLclNum)->lvIsStructField) UpdateEarlyRefCount(m_compiler, src); } + else if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + unsigned offs = m_src->AsLclVarCommon()->GetLclOffs() + entry.Offset; + noway_assert(FitsIn(offs)); + src = m_compiler->gtNewLclFldNode(m_src->AsLclVarCommon()->GetLclNum(), entry.Type, offs); + m_compiler->lvaSetVarDoNotEnregister(m_src->AsLclVarCommon()->GetLclNum() + DEBUGARG(DoNotEnregisterReason::LocalField)); + UpdateEarlyRefCount(m_compiler, src); + } else { - assert(entry.ToLclNum != BAD_VAR_NUM); - if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - unsigned offs = m_src->AsLclVarCommon()->GetLclOffs() + entry.Offset; - noway_assert(FitsIn(offs)); - src = m_compiler->gtNewLclFldNode(m_src->AsLclVarCommon()->GetLclNum(), entry.Type, offs); - m_compiler->lvaSetVarDoNotEnregister(m_src->AsLclVarCommon()->GetLclNum() - DEBUGARG(DoNotEnregisterReason::LocalField)); - UpdateEarlyRefCount(m_compiler, src); - } - else - { - GenTree* addr = grabAddr(entry.Offset); - src = m_compiler->gtNewIndir(entry.Type, addr); - PropagateIndirFlags(src, indirFlags); - } + GenTree* addr = grabAddr(entry.Offset); + src = m_compiler->gtNewIndir(entry.Type, addr); + PropagateIndirFlags(src, indirFlags); } statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); + if (entry.ToReplacement != nullptr) + { + entry.ToReplacement->NeedsWriteBack = true; + entry.ToReplacement->NeedsReadBack = false; + } } if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && !m_srcInvolvesReplacements) @@ -1119,11 +1116,19 @@ class DecompositionPlan assert(numAddrUses == 0); } + //------------------------------------------------------------------------ + // IsHandledByRemainder: + // Check if the specified entry is redundant because the remainder would + // handle it anyway. This occurs when we have a source replacement that + // is up-to-date in its struct local and we are going to retain a full + // block operation anyway. + // + // Parameters: + // entry - The init/copy entry + // remainderStrategy - The strategy we are using for the remainder + // bool IsHandledByRemainder(const Entry& entry, const RemainderStrategy& remainderStrategy) { - // If the remainder is being handled as a full block copy and this - // replacement is up-to-date in its struct local then we can skip - // copying the replacement explicitly. return (remainderStrategy.Type == RemainderStrategy::FullBlock) && (entry.FromReplacement != nullptr) && !entry.FromReplacement->NeedsWriteBack && (entry.ToLclNum == BAD_VAR_NUM); } @@ -1214,6 +1219,7 @@ void ReplaceVisitor::HandleAssignment(GenTree** use, GenTree* user) GenTree* dst = asg->gtGetOp1(); assert(!dst->OperIs(GT_COMMA)); + GenTree* src = asg->gtGetOp2()->gtEffectiveVal(); GenTreeLclVarCommon* dstLcl = dst->OperIs(GT_LCL_VAR, GT_LCL_FLD) ? dst->AsLclVarCommon() : nullptr; @@ -1244,18 +1250,16 @@ void ReplaceVisitor::HandleAssignment(GenTree** use, GenTree* user) unsigned dstLclSize = dstLcl->GetLayout(m_compiler)->GetSize(); if (dstFirstRep->Offset < dstLclOffs) { + JITDUMP("*** Block operation partially overlaps with start replacement of destination V%02u (%s)\n", + dstFirstRep->LclNum, dstFirstRep->Description); + if (dstFirstRep->NeedsWriteBack) { - JITDUMP("*** Block operation partially overlaps with destination V%02u (%s). Write and " - "read-backs are " - "necessary.\n", - dstFirstRep->LclNum, dstFirstRep->Description); // The value of the replacement will be partially assembled from its old value and this struct // operation. // We accomplish this by an initial write back, the struct copy, followed by a later read back. - // TODO-CQ: This is very expensive and unreflected in heuristics, but it is also very rare. + // TODO-CQ: This is expensive and unreflected in heuristics, but it is also very rare. result.AddStatement(Promotion::CreateWriteBack(m_compiler, dstLcl->GetLclNum(), *dstFirstRep)); - dstFirstRep->NeedsWriteBack = false; } @@ -1268,14 +1272,12 @@ void ReplaceVisitor::HandleAssignment(GenTree** use, GenTree* user) Replacement* dstLastRep = dstEndRep - 1; if (dstLastRep->Offset + genTypeSize(dstLastRep->AccessType) > dstLclOffs + dstLclSize) { + JITDUMP("*** Block operation partially overlaps with end replacement of destination V%02u (%s)\n", + dstLastRep->LclNum, dstLastRep->Description); + if (dstLastRep->NeedsWriteBack) { - JITDUMP("*** Block operation partially overlaps with destination V%02u (%s). Write and " - "read-backs are " - "necessary.\n", - dstLastRep->LclNum, dstLastRep->Description); result.AddStatement(Promotion::CreateWriteBack(m_compiler, dstLcl->GetLclNum(), *dstLastRep)); - dstLastRep->NeedsWriteBack = false; } @@ -1292,13 +1294,12 @@ void ReplaceVisitor::HandleAssignment(GenTree** use, GenTree* user) if (srcFirstRep->Offset < srcLclOffs) { + JITDUMP("*** Block operation partially overlaps with start replacement of source V%02u (%s)\n", + srcFirstRep->LclNum, srcFirstRep->Description); + if (srcFirstRep->NeedsWriteBack) { - JITDUMP("*** Block operation partially overlaps with source V%02u (%s). Write back is necessary.\n", - srcFirstRep->LclNum, srcFirstRep->Description); - result.AddStatement(Promotion::CreateWriteBack(m_compiler, srcLcl->GetLclNum(), *srcFirstRep)); - srcFirstRep->NeedsWriteBack = false; } @@ -1310,12 +1311,11 @@ void ReplaceVisitor::HandleAssignment(GenTree** use, GenTree* user) Replacement* srcLastRep = srcEndRep - 1; if (srcLastRep->Offset + genTypeSize(srcLastRep->AccessType) > srcLclOffs + srcLclSize) { + JITDUMP("*** Block operation partially overlaps with end replacement of source V%02u (%s)\n", + srcLastRep->LclNum, srcLastRep->Description); + if (srcLastRep->NeedsWriteBack) { - JITDUMP("*** Block operation partially overlaps with source V%02u (%s). Write back is " - "necessary.\n", - srcLastRep->LclNum, srcLastRep->Description); - result.AddStatement(Promotion::CreateWriteBack(m_compiler, srcLcl->GetLclNum(), *srcLastRep)); srcLastRep->NeedsWriteBack = false; } @@ -1527,8 +1527,6 @@ void ReplaceVisitor::InitFields(GenTreeLclVarCommon* dst, JITDUMP(" Init V%02u (%s)\n", rep->LclNum, rep->Description); plan->InitReplacement(rep, rep->Offset - dst->GetLclOffs()); - rep->NeedsWriteBack = true; - rep->NeedsReadBack = false; } } @@ -1601,8 +1599,6 @@ void ReplaceVisitor::CopyBetweenFields(GenTree* dst, unsigned offs = dstRep->Offset - dstBaseOffs; plan->CopyToReplacement(dstRep, offs); JITDUMP(" V%02u (%s) <- src+%03u\n", dstRep->LclNum, dstRep->Description, offs); - dstRep->NeedsWriteBack = true; - dstRep->NeedsReadBack = false; dstRep++; continue; } @@ -1616,8 +1612,6 @@ void ReplaceVisitor::CopyBetweenFields(GenTree* dst, JITDUMP(" V%02u (%s) <- V%02u (%s)\n", dstRep->LclNum, dstRep->Description, srcRep->LclNum, srcRep->Description); - dstRep->NeedsWriteBack = true; - dstRep->NeedsReadBack = false; dstRep++; srcRep++; continue; @@ -1649,8 +1643,6 @@ void ReplaceVisitor::CopyBetweenFields(GenTree* dst, { plan->CopyBetweenReplacements(dstRep, fieldLcl, offs); JITDUMP(" V%02u (%s) <- V%02u (%s)\n", dstRep->LclNum, dstRep->Description, dsc->lvReason); - dstRep->NeedsWriteBack = true; - dstRep->NeedsReadBack = false; dstRep++; continue; } @@ -1663,8 +1655,6 @@ void ReplaceVisitor::CopyBetweenFields(GenTree* dst, // overlapping fields as needing read back to avoid this DNER. plan->CopyToReplacement(dstRep, offs); JITDUMP(" V%02u (%s) <- src+%03u\n", dstRep->LclNum, dstRep->Description, offs); - dstRep->NeedsWriteBack = true; - dstRep->NeedsReadBack = false; dstRep++; } else From b34659801e0d6b8b600b5d18810c5a489f22c2c6 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 5 May 2023 21:46:54 +0200 Subject: [PATCH 36/37] Address feedback; factor some code --- src/coreclr/jit/compiler.h | 2 + src/coreclr/jit/gentree.cpp | 65 +++++++++ src/coreclr/jit/morphblock.cpp | 66 +-------- src/coreclr/jit/promotiondecomposition.cpp | 159 +++++++++------------ 4 files changed, 143 insertions(+), 149 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 052cceda5f8da5..35c28b561fcf99 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2510,6 +2510,8 @@ class Compiler GenTree* gtNewOneConNode(var_types type, var_types simdBaseType = TYP_UNDEF); + GenTree* gtNewConWithPattern(var_types type, uint8_t pattern); + GenTreeLclVar* gtNewStoreLclVarNode(unsigned lclNum, GenTree* data); GenTreeLclFld* gtNewStoreLclFldNode(unsigned lclNum, var_types type, unsigned offset, GenTree* data); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 3f5d9d5eee7f93..0f7fd302761dd5 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -7684,6 +7684,71 @@ GenTree* Compiler::gtNewOneConNode(var_types type, var_types simdBaseType /* = T } } +//------------------------------------------------------------------------ +// CreateInitValue: +// Create an IR node representing a constant value with the specified 8 +// byte character broadcast into all of its bytes. +// +// Parameters: +// type - The primitive type. For small types the constant will be +// zero/sign-extended and a TYP_INT node will be returned. +// pattern - A byte pattern. +// +// Returns: +// An IR node representing the constant. +// +// Remarks: +// Should only be called when that pattern can actually be represented; for +// example, GC pointers only support an init pattern of zero. +// +GenTree* Compiler::gtNewConWithPattern(var_types type, uint8_t pattern) +{ + switch (type) + { + case TYP_BOOL: + case TYP_UBYTE: + return gtNewIconNode(pattern); + case TYP_BYTE: + return gtNewIconNode((int8_t)pattern); + case TYP_SHORT: + return gtNewIconNode((int16_t)(pattern * 0x0101)); + case TYP_USHORT: + return gtNewIconNode((uint16_t)(pattern * 0x0101)); + case TYP_INT: + return gtNewIconNode((int32_t)(pattern * 0x01010101)); + case TYP_LONG: + return gtNewIconNode((int64_t)(pattern * 0x0101010101010101LL), TYP_LONG); + case TYP_FLOAT: + float floatPattern; + memset(&floatPattern, pattern, sizeof(floatPattern)); + return gtNewDconNode(floatPattern, TYP_FLOAT); + case TYP_DOUBLE: + double doublePattern; + memset(&doublePattern, pattern, sizeof(doublePattern)); + return gtNewDconNode(doublePattern); + case TYP_REF: + case TYP_BYREF: + assert(pattern == 0); + return gtNewZeroConNode(type); +#ifdef FEATURE_SIMD + case TYP_SIMD8: + case TYP_SIMD12: + case TYP_SIMD16: +#if defined(TARGET_XARCH) + case TYP_SIMD32: + case TYP_SIMD64: +#endif // TARGET_XARCH +#endif // FEATURE_SIMD + { + GenTreeVecCon* node = gtNewVconNode(type); + memset(&node->gtSimdVal, pattern, sizeof(node->gtSimdVal)); + return node; + } + default: + unreached(); + } +} + GenTreeLclVar* Compiler::gtNewStoreLclVarNode(unsigned lclNum, GenTree* data) { LclVarDsc* varDsc = lvaGetDesc(lclNum); diff --git a/src/coreclr/jit/morphblock.cpp b/src/coreclr/jit/morphblock.cpp index 31e6d49fd8bc49..67c2029c03dcaf 100644 --- a/src/coreclr/jit/morphblock.cpp +++ b/src/coreclr/jit/morphblock.cpp @@ -410,7 +410,7 @@ void MorphInitBlockHelper::TryInitFieldByField() return; } - const int64_t initPattern = (initVal->AsIntCon()->IconValue() & 0xFF) * 0x0101010101010101LL; + const uint8_t initPattern = (uint8_t)(initVal->AsIntCon()->IconValue() & 0xFF); if (initPattern != 0) { @@ -418,14 +418,11 @@ void MorphInitBlockHelper::TryInitFieldByField() { LclVarDsc* fieldDesc = m_comp->lvaGetDesc(destLclVar->lvFieldLclStart + i); - if (varTypeIsSIMD(fieldDesc) || varTypeIsGC(fieldDesc)) + if (varTypeIsGC(fieldDesc)) { - // Cannot initialize GC or SIMD types with a non-zero constant. - // The former is completely bogus. The later restriction could be - // lifted by supporting non-zero SIMD constants or by generating - // field initialization code that converts an integer constant to - // the appropriate SIMD value. Unlikely to be very useful, though. - JITDUMP(" dest contains GC and/or SIMD fields and source constant is not 0.\n"); + // Cannot initialize GC types with a non-zero constant. The + // former is completely bogus. + JITDUMP(" dest contains GC fields and source constant is not 0.\n"); return; } } @@ -448,58 +445,7 @@ void MorphInitBlockHelper::TryInitFieldByField() LclVarDsc* fieldDesc = m_comp->lvaGetDesc(fieldLclNum); var_types fieldType = fieldDesc->TypeGet(); - GenTree* src; - switch (fieldType) - { - case TYP_BOOL: - case TYP_BYTE: - case TYP_UBYTE: - case TYP_SHORT: - case TYP_USHORT: - // Promoted fields are expected to be "normalize on load". If that changes then - // we may need to adjust this code to widen the constant correctly. - assert(fieldDesc->lvNormalizeOnLoad()); - FALLTHROUGH; - case TYP_INT: - { - int64_t mask = (int64_t(1) << (genTypeSize(fieldType) * 8)) - 1; - src = m_comp->gtNewIconNode(static_cast(initPattern & mask)); - break; - } - case TYP_LONG: - src = m_comp->gtNewLconNode(initPattern); - break; - case TYP_FLOAT: - float floatPattern; - memcpy(&floatPattern, &initPattern, sizeof(floatPattern)); - src = m_comp->gtNewDconNode(floatPattern, TYP_FLOAT); - break; - case TYP_DOUBLE: - double doublePattern; - memcpy(&doublePattern, &initPattern, sizeof(doublePattern)); - src = m_comp->gtNewDconNode(doublePattern); - break; - case TYP_REF: - case TYP_BYREF: -#ifdef FEATURE_SIMD - case TYP_SIMD8: - case TYP_SIMD12: - case TYP_SIMD16: -#if defined(TARGET_XARCH) - case TYP_SIMD32: - case TYP_SIMD64: -#endif // TARGET_XARCH -#endif // FEATURE_SIMD - { - assert(initPattern == 0); - src = m_comp->gtNewZeroConNode(fieldType); - break; - } - - default: - unreached(); - } - + GenTree* src = m_comp->gtNewConWithPattern(fieldType, initPattern); GenTree* store = m_comp->gtNewTempAssign(fieldLclNum, src); if (m_comp->optLocalAssertionProp) diff --git a/src/coreclr/jit/promotiondecomposition.cpp b/src/coreclr/jit/promotiondecomposition.cpp index a71a2fc4b6d4f2..076e3f720d67a9 100644 --- a/src/coreclr/jit/promotiondecomposition.cpp +++ b/src/coreclr/jit/promotiondecomposition.cpp @@ -490,14 +490,13 @@ class DecompositionPlan // For an init block operation, get the pattern to init with. // // Returns: - // Byte pattern broadcast into every byte of a 64-bit int. + // Byte pattern. // - int64_t GetInitPattern() + uint8_t GetInitPattern() { assert(IsInit()); - GenTree* cns = m_src->OperIsInitVal() ? m_src->gtGetOp1() : m_src; - int64_t pattern = int64_t(cns->AsIntCon()->IconValue() & 0xFF) * 0x0101010101010101LL; - return pattern; + GenTree* cns = m_src->OperIsInitVal() ? m_src->gtGetOp1() : m_src; + return uint8_t(cns->AsIntCon()->IconValue() & 0xFF); } //------------------------------------------------------------------------ @@ -669,29 +668,33 @@ class DecompositionPlan { var_types primitiveType = TYP_UNDEF; unsigned size = segment.End - segment.Start; - switch (size) + // For + if ((size == TARGET_POINTER_SIZE) && ((segment.Start % TARGET_POINTER_SIZE) == 0)) { - case 1: - primitiveType = TYP_UBYTE; - break; - case 2: - primitiveType = TYP_USHORT; - break; + ClassLayout* dstLayout = m_dst->GetLayout(m_compiler); + primitiveType = dstLayout->GetGCPtrType(segment.Start / TARGET_POINTER_SIZE); + } + else + { + switch (size) + { + case 1: + primitiveType = TYP_UBYTE; + break; + case 2: + primitiveType = TYP_USHORT; + break; + case 4: + primitiveType = TYP_INT; + break; #ifdef TARGET_64BIT - case 4: - primitiveType = TYP_INT; - break; + case 8: + primitiveType = TYP_LONG; + break; #endif - case TARGET_POINTER_SIZE: - primitiveType = TYP_I_IMPL; - if ((segment.Start % TARGET_POINTER_SIZE) == 0) - { - ClassLayout* dstLayout = m_dst->GetLayout(m_compiler); - primitiveType = dstLayout->GetGCPtrType(segment.Start / TARGET_POINTER_SIZE); - } - break; - // TODO-CQ: SIMD sizes + // TODO-CQ: SIMD sizes + } } if (primitiveType != TYP_UNDEF) @@ -722,14 +725,14 @@ class DecompositionPlan void FinalizeInit(DecompositionStatementList* statements) { GenTree* cns = m_src->OperIsInitVal() ? m_src->gtGetOp1() : m_src; - int64_t initPattern = GetInitPattern(); + uint8_t initPattern = GetInitPattern(); for (int i = 0; i < m_entries.Height(); i++) { const Entry& entry = m_entries.BottomRef(i); assert((entry.ToLclNum != BAD_VAR_NUM) && (entry.ToReplacement != nullptr)); - GenTree* src = CreateInitValue(entry.Type, initPattern); + GenTree* src = m_compiler->gtNewConWithPattern(entry.Type, initPattern); GenTree* dst = m_compiler->gtNewLclvNode(entry.ToLclNum, entry.Type); statements->AddStatement(m_compiler->gtNewAssignNode(dst, src)); entry.ToReplacement->NeedsWriteBack = true; @@ -744,7 +747,7 @@ class DecompositionPlan } else if (remainderStrategy.Type == RemainderStrategy::Primitive) { - GenTree* src = CreateInitValue(remainderStrategy.PrimitiveType, initPattern); + GenTree* src = m_compiler->gtNewConWithPattern(remainderStrategy.PrimitiveType, initPattern); GenTreeLclVarCommon* dstLcl = m_dst->AsLclVarCommon(); GenTree* dst = m_compiler->gtNewLclFldNode(dstLcl->GetLclNum(), remainderStrategy.PrimitiveType, dstLcl->GetLclOffs() + remainderStrategy.PrimitiveOffset); @@ -753,66 +756,6 @@ class DecompositionPlan } } - //------------------------------------------------------------------------ - // CreateInitValue: - // Create an IR node representing a constant value with the specified init pattern. - // - // Parameters: - // type - The primitive type - // initPattern - Pattern to init with - // - // Returns: - // A constant. - // - // Remarks: - // Should only be called when that pattern can actually be represented; - // for example, SIMD types and GC pointers only support an init pattern - // of zero. - // - GenTree* CreateInitValue(var_types type, int64_t initPattern) - { - switch (type) - { - case TYP_BOOL: - case TYP_BYTE: - case TYP_UBYTE: - case TYP_SHORT: - case TYP_USHORT: - case TYP_INT: - { - int64_t mask = (int64_t(1) << (genTypeSize(type) * 8)) - 1; - return m_compiler->gtNewIconNode(static_cast(initPattern & mask)); - } - case TYP_LONG: - return m_compiler->gtNewLconNode(initPattern); - case TYP_FLOAT: - float floatPattern; - memcpy(&floatPattern, &initPattern, sizeof(floatPattern)); - return m_compiler->gtNewDconNode(floatPattern, TYP_FLOAT); - case TYP_DOUBLE: - double doublePattern; - memcpy(&doublePattern, &initPattern, sizeof(doublePattern)); - return m_compiler->gtNewDconNode(doublePattern); - case TYP_REF: - case TYP_BYREF: -#ifdef FEATURE_SIMD - case TYP_SIMD8: - case TYP_SIMD12: - case TYP_SIMD16: -#if defined(TARGET_XARCH) - case TYP_SIMD32: - case TYP_SIMD64: -#endif // TARGET_XARCH -#endif // FEATURE_SIMD - { - assert(initPattern == 0); - return m_compiler->gtNewZeroConNode(type); - } - default: - unreached(); - } - } - //------------------------------------------------------------------------ // FinalizeCopy: // Create IR to perform the decomposed copy. @@ -829,7 +772,42 @@ class DecompositionPlan // If the remainder is a full block and is going to incur write barrier // then avoid incurring multiple write barriers for each source // replacement that is a GC pointer -- write them back to the struct - // first instead. + // first instead. That is, instead of: + // + // ▌ COMMA void + // ├──▌ ASG struct (copy) <- write barrier + // │ ├──▌ BLK struct + // │ │ └──▌ LCL_VAR byref V01 arg1 + // │ └──▌ LCL_VAR struct V00 arg0 + // └──▌ COMMA void + // ├──▌ ASG ref <- write barrier + // │ ├──▌ IND ref + // │ │ └──▌ ADD byref + // │ │ ├──▌ LCL_VAR byref V01 arg1 + // │ │ └──▌ CNS_INT long 8 + // │ └──▌ LCL_VAR ref V05 tmp3 + // └──▌ ASG ref <- write barrier + // ├──▌ IND ref + // │ └──▌ ADD byref + // │ ├──▌ LCL_VAR byref V01 arg1 + // │ └──▌ CNS_INT long 24 + // └──▌ LCL_VAR ref V06 tmp4 + // + // Produce: + // + // ▌ COMMA void + // ├──▌ ASG ref <- no write barrier + // │ ├──▌ LCL_FLD ref V00 arg0 [+8] + // │ └──▌ LCL_VAR ref V05 tmp3 + // └──▌ COMMA void + // ├──▌ ASG ref <- no write barrier + // │ ├──▌ LCL_FLD ref V00 arg0 [+24] + // │ └──▌ LCL_VAR ref V06 tmp4 + // └──▌ ASG struct (copy) <- write barrier + // ├──▌ BLK struct + // │ └──▌ LCL_VAR byref V01 arg1 (last use) + // └──▌ LCL_VAR struct V00 arg0 + // if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && m_dst->OperIs(GT_BLK) && m_dst->GetLayout(m_compiler)->HasGCPtr()) { @@ -847,6 +825,9 @@ class DecompositionPlan JITDUMP(" Will write back V%02u (%s) to avoid an additional write barrier\n", rep->LclNum, rep->Description); + // The loop below will skip these replacements as an + // optimization if it is going to copy the struct + // anyway. rep->NeedsWriteBack = false; } } @@ -903,7 +884,7 @@ class DecompositionPlan continue; } - const Entry& entry = m_entries.BottomRef(0); + const Entry& entry = m_entries.BottomRef(i); assert((entry.FromLclNum == BAD_VAR_NUM) || (entry.ToLclNum == BAD_VAR_NUM)); needsNullCheck = m_compiler->fgIsBigOffset(entry.Offset); From d431901aab585501146664feb1f1f79f44c07b54 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 5 May 2023 22:16:31 +0200 Subject: [PATCH 37/37] Fix bad factoring --- src/coreclr/jit/gentree.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 0f7fd302761dd5..b5144681dfb0e0 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -7715,9 +7715,9 @@ GenTree* Compiler::gtNewConWithPattern(var_types type, uint8_t pattern) case TYP_USHORT: return gtNewIconNode((uint16_t)(pattern * 0x0101)); case TYP_INT: - return gtNewIconNode((int32_t)(pattern * 0x01010101)); + return gtNewIconNode(pattern * 0x01010101); case TYP_LONG: - return gtNewIconNode((int64_t)(pattern * 0x0101010101010101LL), TYP_LONG); + return gtNewLconNode(pattern * 0x0101010101010101LL); case TYP_FLOAT: float floatPattern; memset(&floatPattern, pattern, sizeof(floatPattern));