Skip to content

Commit 44dc1e0

Browse files
committed
[SLP]Improve findReusedOrderedScalars processing, NFCI.
Tries to simplify structural complexity of the findReusedOrderedScalars function.
1 parent decf027 commit 44dc1e0

File tree

1 file changed

+68
-42
lines changed

1 file changed

+68
-42
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 68 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -3758,61 +3758,87 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
37583758
OrdersType CurrentOrder(NumScalars, NumScalars);
37593759
SmallVector<int> Positions;
37603760
SmallBitVector UsedPositions(NumScalars);
3761-
const TreeEntry *STE = nullptr;
3761+
DenseMap<const TreeEntry *, unsigned> UsedEntries;
3762+
DenseMap<Value *, std::pair<const TreeEntry *, unsigned>> ValueToEntryPos;
3763+
for (Value *V : TE.Scalars) {
3764+
if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V))
3765+
continue;
3766+
const auto *LocalSTE = getTreeEntry(V);
3767+
if (!LocalSTE)
3768+
continue;
3769+
unsigned Lane =
3770+
std::distance(LocalSTE->Scalars.begin(), find(LocalSTE->Scalars, V));
3771+
if (Lane >= NumScalars)
3772+
continue;
3773+
++UsedEntries.try_emplace(LocalSTE, 0).first->getSecond();
3774+
ValueToEntryPos.try_emplace(V, LocalSTE, Lane);
3775+
}
3776+
if (UsedEntries.empty())
3777+
return std::nullopt;
3778+
const TreeEntry &BestSTE =
3779+
*std::max_element(UsedEntries.begin(), UsedEntries.end(),
3780+
[](const std::pair<const TreeEntry *, unsigned> &P1,
3781+
const std::pair<const TreeEntry *, unsigned> &P2) {
3782+
return P1.second < P2.second;
3783+
})
3784+
->first;
3785+
UsedEntries.erase(&BestSTE);
3786+
const TreeEntry *SecondBestSTE = nullptr;
3787+
if (!UsedEntries.empty())
3788+
SecondBestSTE =
3789+
std::max_element(UsedEntries.begin(), UsedEntries.end(),
3790+
[](const std::pair<const TreeEntry *, unsigned> &P1,
3791+
const std::pair<const TreeEntry *, unsigned> &P2) {
3792+
return P1.second < P2.second;
3793+
})
3794+
->first;
37623795
// Try to find all gathered scalars that are gets vectorized in other
37633796
// vectorize node. Here we can have only one single tree vector node to
37643797
// correctly identify order of the gathered scalars.
37653798
for (unsigned I = 0; I < NumScalars; ++I) {
37663799
Value *V = TE.Scalars[I];
37673800
if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V))
37683801
continue;
3769-
if (const auto *LocalSTE = getTreeEntry(V)) {
3770-
if (!STE)
3771-
STE = LocalSTE;
3772-
else if (STE != LocalSTE)
3773-
// Take the order only from the single vector node.
3774-
return std::nullopt;
3775-
unsigned Lane =
3776-
std::distance(STE->Scalars.begin(), find(STE->Scalars, V));
3777-
if (Lane >= NumScalars)
3778-
return std::nullopt;
3779-
if (CurrentOrder[Lane] != NumScalars) {
3780-
if (Lane != I)
3781-
continue;
3782-
UsedPositions.reset(CurrentOrder[Lane]);
3783-
}
3784-
// The partial identity (where only some elements of the gather node are
3785-
// in the identity order) is good.
3786-
CurrentOrder[Lane] = I;
3787-
UsedPositions.set(I);
3802+
const auto [LocalSTE, Lane] = ValueToEntryPos.lookup(V);
3803+
if (!LocalSTE || (LocalSTE != &BestSTE && LocalSTE != SecondBestSTE))
3804+
continue;
3805+
if (CurrentOrder[Lane] != NumScalars) {
3806+
if ((CurrentOrder[Lane] >= BestSTE.Scalars.size() ||
3807+
BestSTE.Scalars[CurrentOrder[Lane]] == V) &&
3808+
(Lane != I || LocalSTE == SecondBestSTE))
3809+
continue;
3810+
UsedPositions.reset(CurrentOrder[Lane]);
37883811
}
3812+
// The partial identity (where only some elements of the gather node are
3813+
// in the identity order) is good.
3814+
CurrentOrder[Lane] = I;
3815+
UsedPositions.set(I);
37893816
}
37903817
// Need to keep the order if we have a vector entry and at least 2 scalars or
37913818
// the vectorized entry has just 2 scalars.
3792-
if (STE && (UsedPositions.count() > 1 || STE->Scalars.size() == 2)) {
3793-
auto &&IsIdentityOrder = [NumScalars](ArrayRef<unsigned> CurrentOrder) {
3794-
for (unsigned I = 0; I < NumScalars; ++I)
3795-
if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars)
3796-
return false;
3797-
return true;
3798-
};
3799-
if (IsIdentityOrder(CurrentOrder))
3800-
return OrdersType();
3801-
auto *It = CurrentOrder.begin();
3802-
for (unsigned I = 0; I < NumScalars;) {
3803-
if (UsedPositions.test(I)) {
3804-
++I;
3805-
continue;
3806-
}
3807-
if (*It == NumScalars) {
3808-
*It = I;
3809-
++I;
3810-
}
3811-
++It;
3819+
if (BestSTE.Scalars.size() != 2 && UsedPositions.count() <= 1)
3820+
return std::nullopt;
3821+
auto IsIdentityOrder = [&](ArrayRef<unsigned> CurrentOrder) {
3822+
for (unsigned I = 0; I < NumScalars; ++I)
3823+
if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars)
3824+
return false;
3825+
return true;
3826+
};
3827+
if (IsIdentityOrder(CurrentOrder))
3828+
return OrdersType();
3829+
auto *It = CurrentOrder.begin();
3830+
for (unsigned I = 0; I < NumScalars;) {
3831+
if (UsedPositions.test(I)) {
3832+
++I;
3833+
continue;
3834+
}
3835+
if (*It == NumScalars) {
3836+
*It = I;
3837+
++I;
38123838
}
3813-
return std::move(CurrentOrder);
3839+
++It;
38143840
}
3815-
return std::nullopt;
3841+
return std::move(CurrentOrder);
38163842
}
38173843

38183844
namespace {

0 commit comments

Comments
 (0)