@@ -3758,61 +3758,87 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
3758
3758
OrdersType CurrentOrder(NumScalars, NumScalars);
3759
3759
SmallVector<int> Positions;
3760
3760
SmallBitVector UsedPositions(NumScalars);
3761
- const TreeEntry *STE = nullptr;
3761
+ DenseMap<const TreeEntry *, unsigned> UsedEntries;
3762
+ DenseMap<Value *, std::pair<const TreeEntry *, unsigned>> ValueToEntryPos;
3763
+ for (Value *V : TE.Scalars) {
3764
+ if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V))
3765
+ continue;
3766
+ const auto *LocalSTE = getTreeEntry(V);
3767
+ if (!LocalSTE)
3768
+ continue;
3769
+ unsigned Lane =
3770
+ std::distance(LocalSTE->Scalars.begin(), find(LocalSTE->Scalars, V));
3771
+ if (Lane >= NumScalars)
3772
+ continue;
3773
+ ++UsedEntries.try_emplace(LocalSTE, 0).first->getSecond();
3774
+ ValueToEntryPos.try_emplace(V, LocalSTE, Lane);
3775
+ }
3776
+ if (UsedEntries.empty())
3777
+ return std::nullopt;
3778
+ const TreeEntry &BestSTE =
3779
+ *std::max_element(UsedEntries.begin(), UsedEntries.end(),
3780
+ [](const std::pair<const TreeEntry *, unsigned> &P1,
3781
+ const std::pair<const TreeEntry *, unsigned> &P2) {
3782
+ return P1.second < P2.second;
3783
+ })
3784
+ ->first;
3785
+ UsedEntries.erase(&BestSTE);
3786
+ const TreeEntry *SecondBestSTE = nullptr;
3787
+ if (!UsedEntries.empty())
3788
+ SecondBestSTE =
3789
+ std::max_element(UsedEntries.begin(), UsedEntries.end(),
3790
+ [](const std::pair<const TreeEntry *, unsigned> &P1,
3791
+ const std::pair<const TreeEntry *, unsigned> &P2) {
3792
+ return P1.second < P2.second;
3793
+ })
3794
+ ->first;
3762
3795
// Try to find all gathered scalars that are gets vectorized in other
3763
3796
// vectorize node. Here we can have only one single tree vector node to
3764
3797
// correctly identify order of the gathered scalars.
3765
3798
for (unsigned I = 0; I < NumScalars; ++I) {
3766
3799
Value *V = TE.Scalars[I];
3767
3800
if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V))
3768
3801
continue;
3769
- if (const auto *LocalSTE = getTreeEntry(V)) {
3770
- if (!STE)
3771
- STE = LocalSTE;
3772
- else if (STE != LocalSTE)
3773
- // Take the order only from the single vector node.
3774
- return std::nullopt;
3775
- unsigned Lane =
3776
- std::distance(STE->Scalars.begin(), find(STE->Scalars, V));
3777
- if (Lane >= NumScalars)
3778
- return std::nullopt;
3779
- if (CurrentOrder[Lane] != NumScalars) {
3780
- if (Lane != I)
3781
- continue;
3782
- UsedPositions.reset(CurrentOrder[Lane]);
3783
- }
3784
- // The partial identity (where only some elements of the gather node are
3785
- // in the identity order) is good.
3786
- CurrentOrder[Lane] = I;
3787
- UsedPositions.set(I);
3802
+ const auto [LocalSTE, Lane] = ValueToEntryPos.lookup(V);
3803
+ if (!LocalSTE || (LocalSTE != &BestSTE && LocalSTE != SecondBestSTE))
3804
+ continue;
3805
+ if (CurrentOrder[Lane] != NumScalars) {
3806
+ if ((CurrentOrder[Lane] >= BestSTE.Scalars.size() ||
3807
+ BestSTE.Scalars[CurrentOrder[Lane]] == V) &&
3808
+ (Lane != I || LocalSTE == SecondBestSTE))
3809
+ continue;
3810
+ UsedPositions.reset(CurrentOrder[Lane]);
3788
3811
}
3812
+ // The partial identity (where only some elements of the gather node are
3813
+ // in the identity order) is good.
3814
+ CurrentOrder[Lane] = I;
3815
+ UsedPositions.set(I);
3789
3816
}
3790
3817
// Need to keep the order if we have a vector entry and at least 2 scalars or
3791
3818
// the vectorized entry has just 2 scalars.
3792
- if (STE && (UsedPositions.count() > 1 || STE->Scalars.size() == 2)) {
3793
- auto &&IsIdentityOrder = [NumScalars](ArrayRef<unsigned> CurrentOrder) {
3794
- for (unsigned I = 0; I < NumScalars; ++I)
3795
- if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars)
3796
- return false;
3797
- return true;
3798
- };
3799
- if (IsIdentityOrder(CurrentOrder))
3800
- return OrdersType();
3801
- auto *It = CurrentOrder.begin();
3802
- for (unsigned I = 0; I < NumScalars;) {
3803
- if (UsedPositions.test(I)) {
3804
- ++I;
3805
- continue;
3806
- }
3807
- if (*It == NumScalars) {
3808
- *It = I;
3809
- ++I;
3810
- }
3811
- ++It;
3819
+ if (BestSTE.Scalars.size() != 2 && UsedPositions.count() <= 1)
3820
+ return std::nullopt;
3821
+ auto IsIdentityOrder = [&](ArrayRef<unsigned> CurrentOrder) {
3822
+ for (unsigned I = 0; I < NumScalars; ++I)
3823
+ if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars)
3824
+ return false;
3825
+ return true;
3826
+ };
3827
+ if (IsIdentityOrder(CurrentOrder))
3828
+ return OrdersType();
3829
+ auto *It = CurrentOrder.begin();
3830
+ for (unsigned I = 0; I < NumScalars;) {
3831
+ if (UsedPositions.test(I)) {
3832
+ ++I;
3833
+ continue;
3834
+ }
3835
+ if (*It == NumScalars) {
3836
+ *It = I;
3837
+ ++I;
3812
3838
}
3813
- return std::move(CurrentOrder) ;
3839
+ ++It ;
3814
3840
}
3815
- return std::nullopt ;
3841
+ return std::move(CurrentOrder) ;
3816
3842
}
3817
3843
3818
3844
namespace {
0 commit comments