-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[VPlan] Try to hoist Previous (and operands), if sinking fails for FORs. #108945
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2535516
89b10f2
63424ba
49dc5d5
ef52b83
cf20f7c
aee92ab
a67d252
833e5ce
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -772,6 +772,105 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, | |
return true; | ||
} | ||
|
||
/// Try to hoist \p Previous and its operands before all users of \p FOR. | ||
static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR, | ||
VPRecipeBase *Previous, | ||
VPDominatorTree &VPDT) { | ||
if (Previous->mayHaveSideEffects() || Previous->mayReadFromMemory()) | ||
return false; | ||
|
||
// Collect recipes that need hoisting. | ||
SmallVector<VPRecipeBase *> HoistCandidates; | ||
SmallPtrSet<VPRecipeBase *, 8> Visited; | ||
VPRecipeBase *HoistPoint = nullptr; | ||
// Find the closest hoist point by looking at all users of FOR and selecting | ||
// the recipe dominating all other users. | ||
for (VPUser *U : FOR->users()) { | ||
auto *R = dyn_cast<VPRecipeBase>(U); | ||
if (!R) | ||
continue; | ||
if (!HoistPoint || VPDT.properlyDominates(R, HoistPoint)) | ||
HoistPoint = R; | ||
} | ||
assert(all_of(FOR->users(), | ||
[&VPDT, HoistPoint](VPUser *U) { | ||
auto *R = dyn_cast<VPRecipeBase>(U); | ||
return !R || HoistPoint == R || | ||
VPDT.properlyDominates(HoistPoint, R); | ||
}) && | ||
"HoistPoint must dominate all users of FOR"); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: may be worth asserting HoistPoint (is non-null and) dominates all FOR->users(). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, thanks! |
||
auto NeedsHoisting = [HoistPoint, &VPDT, | ||
&Visited](VPValue *HoistCandidateV) -> VPRecipeBase * { | ||
VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe(); | ||
if (!HoistCandidate) | ||
return nullptr; | ||
VPRegionBlock *EnclosingLoopRegion = | ||
HoistCandidate->getParent()->getEnclosingLoopRegion(); | ||
assert((!HoistCandidate->getParent()->getParent() || | ||
HoistCandidate->getParent()->getParent() == EnclosingLoopRegion) && | ||
"CFG in VPlan should still be flat, without replicate regions"); | ||
// Hoist candidate was already visited, no need to hoist. | ||
if (!Visited.insert(HoistCandidate).second) | ||
return nullptr; | ||
|
||
// Candidate is outside loop region or a header phi, dominates FOR users w/o | ||
// hoisting. | ||
if (!EnclosingLoopRegion || isa<VPHeaderPHIRecipe>(HoistCandidate)) | ||
return nullptr; | ||
|
||
// If we reached a recipe that dominates HoistPoint, we don't need to | ||
// hoist the recipe. | ||
if (VPDT.properlyDominates(HoistCandidate, HoistPoint)) | ||
return nullptr; | ||
return HoistCandidate; | ||
}; | ||
auto CanHoist = [&](VPRecipeBase *HoistCandidate) { | ||
// Avoid hoisting candidates with side-effects, as we do not yet analyze | ||
// associated dependencies. | ||
return !HoistCandidate->mayHaveSideEffects(); | ||
}; | ||
|
||
if (!NeedsHoisting(Previous->getVPSingleValue())) | ||
return true; | ||
|
||
// Recursively try to hoist Previous and its operands before all users of FOR. | ||
HoistCandidates.push_back(Previous); | ||
|
||
for (unsigned I = 0; I != HoistCandidates.size(); ++I) { | ||
VPRecipeBase *Current = HoistCandidates[I]; | ||
assert(Current->getNumDefinedValues() == 1 && | ||
"only recipes with a single defined value expected"); | ||
if (!CanHoist(Current)) | ||
return false; | ||
|
||
for (VPValue *Op : Current->operands()) { | ||
// If we reach FOR, it means the original Previous depends on some other | ||
// recurrence that in turn depends on FOR. If that is the case, we would | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (continuing outdated discussion): having the Previous of one FOR depend on the header phi of another FOR is clear (including the case where Previous is simply an or-with-zero copy leading to a second order recurrence), implying that FORs better be handled in some order if not (topologically sorting) altogether. But this check circles back to the original FOR phi, i.e., a cyclic dependence rather than a FOR one. |
||
// also need to hoist recipes involving the other FOR, which may break | ||
// dependencies. | ||
if (Op == FOR) | ||
return false; | ||
|
||
if (auto *R = NeedsHoisting(Op)) | ||
HoistCandidates.push_back(R); | ||
} | ||
} | ||
|
||
// Order recipes to hoist by dominance so earlier instructions are processed | ||
// first. | ||
sort(HoistCandidates, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) { | ||
return VPDT.properlyDominates(A, B); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does it provide strict weak ordering? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes it should, as the CFG in the loop is flattened. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given that the CFG in the loop is assumed to be flattened,
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
}); | ||
|
||
for (VPRecipeBase *HoistCandidate : HoistCandidates) { | ||
HoistCandidate->moveBefore(*HoistPoint->getParent(), | ||
HoistPoint->getIterator()); | ||
} | ||
|
||
return true; | ||
} | ||
|
||
bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, | ||
VPBuilder &LoopBuilder) { | ||
VPDominatorTree VPDT; | ||
|
@@ -795,7 +894,8 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, | |
Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe(); | ||
} | ||
|
||
if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT)) | ||
if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT) && | ||
!hoistPreviousBeforeFORUsers(FOR, Previous, VPDT)) | ||
return false; | ||
|
||
// Introduce a recipe to combine the incoming and previous values of a | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -278,3 +278,79 @@ exit: | |
store double %.lcssa, ptr %C | ||
ret i64 %.in.lcssa | ||
} | ||
|
||
; Test for https://github.com/llvm/llvm-project/issues/106523. | ||
; %for.2 requires no code motion, as its previous (%or) precedes its (first) | ||
; user (store). Furthermore, its user cannot sink, being a store. | ||
; | ||
; %for.1 requires code motion, as its previous (%trunc) follows its (first) | ||
; user (%or). Sinking %or past %trunc seems possible, as %or has no uses | ||
; (except for feeding %for.2; worth strengthening VPlan's dce?). However, %or | ||
; is both the user of %for.1 and the previous of %for.2, and we refrain from | ||
; sinking instructions that act as previous because they (may) serve points to | ||
; sink after. | ||
|
||
; Instead, %for.1 can be reconciled by hoisting its previous above its user | ||
; %or, as this user %trunc depends only on %iv. | ||
define void @for_iv_trunc_optimized(ptr %dst) { | ||
; CHECK-LABEL: @for_iv_trunc_optimized( | ||
; CHECK-NEXT: bb: | ||
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
; CHECK: vector.ph: | ||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] | ||
; CHECK: vector.body: | ||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] | ||
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 1>, [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] | ||
; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] | ||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 1, i32 2, i32 3, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The loop seems to be vectorized by VF=4 and unrolled by UF=2 as VEC_IND and INDEX are bumped by 8's, but there's only a single copy of <4 x i32> vectors, presumably due to dce. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, due to only storing to an invariant pointer I think |
||
; CHECK-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4> | ||
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> | ||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TMP0 is the first splice for %for.1, fed by (last lane of) a <4 x i32> vector IV of last iteration, rather than truncating an i64 one, along with first 3 lanes of current vector IV. |
||
; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP0]], <i32 3, i32 3, i32 3, i32 3> | ||
; CHECK-NEXT: [[TMP3]] = or <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3> | ||
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The first splice of %for.2 combines the last lane of VECTOR_RECUR1 with first 3 lanes of TMP2, but being dead is eliminated. |
||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP5]], i32 3 | ||
; CHECK-NEXT: store i32 [[TMP6]], ptr [[DST:%.*]], align 4 | ||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 | ||
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], <i32 4, i32 4, i32 4, i32 4> | ||
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 336 | ||
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] | ||
; CHECK: middle.block: | ||
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3 | ||
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 | ||
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] | ||
; CHECK: scalar.ph: | ||
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 337, [[MIDDLE_BLOCK]] ], [ 1, [[BB:%.*]] ] | ||
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1, [[BB]] ] | ||
; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT3]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] | ||
; CHECK-NEXT: br label [[LOOP:%.*]] | ||
; CHECK: loop: | ||
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] | ||
; CHECK-NEXT: [[FOR_1:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TRUNC:%.*]], [[LOOP]] ] | ||
; CHECK-NEXT: [[FOR_2:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT4]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[LOOP]] ] | ||
; CHECK-NEXT: [[OR]] = or i32 [[FOR_1]], 3 | ||
; CHECK-NEXT: [[ADD]] = add i64 [[IV]], 1 | ||
; CHECK-NEXT: store i32 [[FOR_2]], ptr [[DST]], align 4 | ||
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[IV]], 337 | ||
; CHECK-NEXT: [[TRUNC]] = trunc i64 [[IV]] to i32 | ||
; CHECK-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]] | ||
; CHECK: exit: | ||
; CHECK-NEXT: ret void | ||
; | ||
bb: | ||
br label %loop | ||
|
||
loop: | ||
%iv = phi i64 [ 1, %bb ], [ %add, %loop ] | ||
%for.1 = phi i32 [ 1, %bb ], [ %trunc, %loop ] | ||
%for.2 = phi i32 [ 0, %bb ], [ %or, %loop ] | ||
%or = or i32 %for.1, 3 | ||
%add = add i64 %iv, 1 | ||
store i32 %for.2, ptr %dst, align 4 | ||
%icmp = icmp ult i64 %iv, 337 | ||
%trunc = trunc i64 %iv to i32 | ||
br i1 %icmp, label %loop, label %exit | ||
|
||
exit: | ||
ret void | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -147,14 +147,57 @@ exit: | |
} | ||
|
||
; This test has two FORs (for.x and for.y) where incoming value from the previous | ||
; iteration (for.x.prev) of one FOR (for.y) depends on another FOR (for.x). Due to | ||
; this dependency all uses of the former FOR (for.y) should be sunk after | ||
; incoming value from the previous iteration (for.x.prev) of te latter FOR (for.y). | ||
; That means side-effecting user (store i64 %for.y.i64, ptr %gep) of the latter | ||
; FOR (for.y) should be moved which is not currently supported. | ||
; iteration (for.x.prev) of one FOR (for.y) depends on another FOR (for.x). | ||
; Sinking would require moving a recipe with side effects (store). Instead, | ||
; for.x.next can be hoisted. | ||
define i32 @test_chained_first_order_recurrences_4(ptr %base, i64 %x) { | ||
; CHECK-LABEL: 'test_chained_first_order_recurrences_4' | ||
; CHECK: No VPlans built. | ||
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { | ||
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF | ||
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count | ||
; CHECK-NEXT: Live-in ir<4098> = original trip-count | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: vector.ph: | ||
; CHECK-NEXT: WIDEN ir<%for.x.next> = mul ir<%x>, ir<2> | ||
; CHECK-NEXT: Successor(s): vector loop | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: <x1> vector loop: { | ||
; CHECK-NEXT: vector.body: | ||
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> | ||
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.x> = phi ir<0>, ir<%for.x.next> | ||
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.y> = phi ir<0>, ir<%for.x.prev> | ||
; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> | ||
; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%base>, vp<[[SCALAR_STEPS]]> | ||
; CHECK-NEXT: EMIT vp<[[SPLICE_X:%.]]> = first-order splice ir<%for.x>, ir<%for.x.next> | ||
; CHECK-NEXT: WIDEN-CAST ir<%for.x.prev> = trunc vp<[[SPLICE_X]]> to i32 | ||
; CHECK-NEXT: EMIT vp<[[SPLICE_Y:%.+]]> = first-order splice ir<%for.y>, ir<%for.x.prev> | ||
; CHECK-NEXT: WIDEN-CAST ir<%for.y.i64> = sext vp<[[SPLICE_Y]]> to i64 | ||
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> | ||
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%for.y.i64> | ||
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> | ||
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> | ||
; CHECK-NEXT: No successors | ||
; CHECK-NEXT: } | ||
; CHECK-NEXT: Successor(s): middle.block | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: middle.block: | ||
; CHECK-NEXT: EMIT vp<[[EXT_X:%.+]]> = extract-from-end ir<%for.x.next>, ir<1> | ||
; CHECK-NEXT: EMIT vp<[[EXT_Y:%.+]]>.1 = extract-from-end ir<%for.x.prev>, ir<1> | ||
; CHECK-NEXT: EMIT vp<[[MIDDLE_C:%.+]]> = icmp eq ir<4098>, vp<[[VTC]]> | ||
; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_C]]> | ||
; CHECK-NEXT: Successor(s): ir-bb<ret>, scalar.ph | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: ir-bb<ret>: | ||
; CHECK-NEXT: No successors | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: scalar.ph: | ||
; CHECK-NEXT: EMIT vp<[[RESUME_X:%.+]]> = resume-phi vp<[[EXT_X]]>, ir<0> | ||
; CHECK-NEXT: EMIT vp<[[RESUME_Y:%.+]]>.1 = resume-phi vp<[[EXT_Y]]>.1, ir<0> | ||
; CHECK-NEXT: No successors | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: Live-out i64 %for.x = vp<[[RESUME_X]]> | ||
; CHECK-NEXT: Live-out i32 %for.y = vp<[[RESUME_Y]]>.1 | ||
; CHECK-NEXT: } | ||
; | ||
entry: | ||
br label %loop | ||
|
@@ -178,7 +221,54 @@ ret: | |
|
||
define i32 @test_chained_first_order_recurrences_5_hoist_to_load(ptr %base) { | ||
; CHECK-LABEL: 'test_chained_first_order_recurrences_5_hoist_to_load' | ||
; CHECK: No VPlans built. | ||
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { | ||
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF | ||
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count | ||
; CHECK-NEXT: Live-in ir<4098> = original trip-count | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: vector.ph: | ||
; CHECK-NEXT: Successor(s): vector loop | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: <x1> vector loop: { | ||
; CHECK-NEXT: vector.body: | ||
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> | ||
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.x> = phi ir<0>, ir<%for.x.next> | ||
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.y> = phi ir<0>, ir<%for.x.prev> | ||
; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> | ||
; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%base>, vp<[[SCALAR_STEPS]]> | ||
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> | ||
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> | ||
; CHECK-NEXT: WIDEN ir<%for.x.next> = mul ir<%l>, ir<2> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar case to the one above, except the end - we hoist its previous for.x.next above its user for.x.prev (until reaching a dependence on %l, rather than all the way out of the loop). |
||
; CHECK-NEXT: EMIT vp<[[SPLICE_X:%.]]> = first-order splice ir<%for.x>, ir<%for.x.next> | ||
; CHECK-NEXT: WIDEN-CAST ir<%for.x.prev> = trunc vp<[[SPLICE_X]]> to i32 | ||
; CHECK-NEXT: EMIT vp<[[SPLICE_Y:%.+]]> = first-order splice ir<%for.y>, ir<%for.x.prev> | ||
; CHECK-NEXT: WIDEN-CAST ir<%for.y.i64> = sext vp<[[SPLICE_Y]]> to i64 | ||
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> | ||
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%for.y.i64> | ||
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> | ||
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> | ||
; CHECK-NEXT: No successors | ||
; CHECK-NEXT: } | ||
; CHECK-NEXT: Successor(s): middle.block | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: middle.block: | ||
; CHECK-NEXT: EMIT vp<[[EXT_X:%.+]]> = extract-from-end ir<%for.x.next>, ir<1> | ||
; CHECK-NEXT: EMIT vp<[[EXT_Y:%.+]]>.1 = extract-from-end ir<%for.x.prev>, ir<1> | ||
; CHECK-NEXT: EMIT vp<[[MIDDLE_C:%.+]]> = icmp eq ir<4098>, vp<[[VTC]]> | ||
; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_C]]> | ||
; CHECK-NEXT: Successor(s): ir-bb<ret>, scalar.ph | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: ir-bb<ret>: | ||
; CHECK-NEXT: No successors | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: scalar.ph: | ||
; CHECK-NEXT: EMIT vp<[[RESUME_X:%.+]]> = resume-phi vp<[[EXT_X]]>, ir<0> | ||
; CHECK-NEXT: EMIT vp<[[RESUME_Y:%.+]]>.1 = resume-phi vp<[[EXT_Y]]>.1, ir<0> | ||
; CHECK-NEXT: No successors | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: Live-out i64 %for.x = vp<[[RESUME_X]]> | ||
; CHECK-NEXT: Live-out i32 %for.y = vp<[[RESUME_Y]]>.1 | ||
; CHECK-NEXT: } | ||
; | ||
entry: | ||
br label %loop | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: else can assert HoistPoint dominates R, if not assert so collectively below.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added an assert after the loop, thanks!