Skip to content

Commit 876266a

Browse files
committed
Squashed commit of the following:
commit 9c2faf15231ac5ebc168161d1731feed55eb177c Merge: 0a0ac8da5df6 baecc9e Author: Rin <[email protected]> Date: Thu Oct 5 11:19:13 2023 +0100 Merge branch 'main' into maxTC_tailBase commit 0a0ac8da5df684b865d0fb16f7a806832f37e05b Author: Rin Dobrescu <[email protected]> Date: Thu Sep 28 15:48:49 2023 +0000 [AArch64][LoopVectorize] Use upper bound trip count instead of the constant TC when choosing max VF commit 26e009c Author: Rin Dobrescu <[email protected]> Date: Thu Sep 28 10:30:39 2023 +0000 Remove 'assertions automatically generated' line from test commit e056129 Author: Rin Dobrescu <[email protected]> Date: Wed Sep 27 14:47:42 2023 +0000 Address comments and fix tests commit 1bf78c8 Author: Rin Dobrescu <[email protected]> Date: Mon Sep 25 11:34:15 2023 +0000 [AArch64][LoopVectorize] Use either fixed-width or scalable VF when tail-folding
1 parent baecc9e commit 876266a

File tree

1 file changed

+34
-31
lines changed

1 file changed

+34
-31
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 34 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1663,17 +1663,17 @@ class LoopVectorizationCostModel {
16631663
/// disabled or unsupported, then the scalable part will be equal to
16641664
/// ElementCount::getScalable(0).
16651665
FixedScalableVFPair computeFeasibleMaxVF(unsigned ConstTripCount,
1666+
unsigned MaxTripCount,
16661667
ElementCount UserVF,
16671668
bool FoldTailByMasking);
16681669

16691670
/// \return the maximized element count based on the targets vector
16701671
/// registers and the loop trip-count, but limited to a maximum safe VF.
16711672
/// This is a helper function of computeFeasibleMaxVF.
1672-
ElementCount getMaximizedVFForTarget(unsigned ConstTripCount,
1673-
unsigned SmallestType,
1674-
unsigned WidestType,
1675-
ElementCount MaxSafeVF,
1676-
bool FoldTailByMasking);
1673+
ElementCount
1674+
getMaximizedVFForTarget(unsigned ConstTripCount, unsigned MaxTripCount,
1675+
unsigned SmallestType, unsigned WidestType,
1676+
ElementCount MaxSafeVF, bool FoldTailByMasking);
16771677

16781678
/// \return the maximum legal scalable VF, based on the safe max number
16791679
/// of elements.
@@ -4811,7 +4811,8 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
48114811
}
48124812

48134813
FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
4814-
unsigned ConstTripCount, ElementCount UserVF, bool FoldTailByMasking) {
4814+
unsigned ConstTripCount, unsigned MaxTripCount, ElementCount UserVF,
4815+
bool FoldTailByMasking) {
48154816
MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
48164817
unsigned SmallestType, WidestType;
48174818
std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
@@ -4898,14 +4899,14 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
48984899

48994900
FixedScalableVFPair Result(ElementCount::getFixed(1),
49004901
ElementCount::getScalable(0));
4901-
if (auto MaxVF =
4902-
getMaximizedVFForTarget(ConstTripCount, SmallestType, WidestType,
4903-
MaxSafeFixedVF, FoldTailByMasking))
4902+
if (auto MaxVF = getMaximizedVFForTarget(ConstTripCount, MaxTripCount,
4903+
SmallestType, WidestType,
4904+
MaxSafeFixedVF, FoldTailByMasking))
49044905
Result.FixedVF = MaxVF;
49054906

4906-
if (auto MaxVF =
4907-
getMaximizedVFForTarget(ConstTripCount, SmallestType, WidestType,
4908-
MaxSafeScalableVF, FoldTailByMasking))
4907+
if (auto MaxVF = getMaximizedVFForTarget(
4908+
ConstTripCount, MaxTripCount, SmallestType, WidestType,
4909+
MaxSafeScalableVF, FoldTailByMasking))
49094910
if (MaxVF.isScalable()) {
49104911
Result.ScalableVF = MaxVF;
49114912
LLVM_DEBUG(dbgs() << "LV: Found feasible scalable VF = " << MaxVF
@@ -4928,6 +4929,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
49284929
}
49294930

49304931
unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
4932+
unsigned MaxTC = PSE.getSE()->getSmallConstantMaxTripCount(TheLoop);
49314933
LLVM_DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
49324934
if (TC == 1) {
49334935
reportVectorizationFailure("Single iteration (non) loop",
@@ -4938,7 +4940,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
49384940

49394941
switch (ScalarEpilogueStatus) {
49404942
case CM_ScalarEpilogueAllowed:
4941-
return computeFeasibleMaxVF(TC, UserVF, false);
4943+
return computeFeasibleMaxVF(TC, MaxTC, UserVF, false);
49424944
case CM_ScalarEpilogueNotAllowedUsePredicate:
49434945
[[fallthrough]];
49444946
case CM_ScalarEpilogueNotNeededUsePredicate:
@@ -4976,7 +4978,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
49764978
LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a "
49774979
"scalar epilogue instead.\n");
49784980
ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
4979-
return computeFeasibleMaxVF(TC, UserVF, false);
4981+
return computeFeasibleMaxVF(TC, MaxTC, UserVF, false);
49804982
}
49814983
return FixedScalableVFPair::getNone();
49824984
}
@@ -4993,7 +4995,8 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
49934995
InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
49944996
}
49954997

4996-
FixedScalableVFPair MaxFactors = computeFeasibleMaxVF(TC, UserVF, true);
4998+
FixedScalableVFPair MaxFactors =
4999+
computeFeasibleMaxVF(TC, MaxTC, UserVF, true);
49975000

49985001
// Avoid tail folding if the trip count is known to be a multiple of any VF
49995002
// we choose.
@@ -5069,8 +5072,8 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
50695072
}
50705073

50715074
ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
5072-
unsigned ConstTripCount, unsigned SmallestType, unsigned WidestType,
5073-
ElementCount MaxSafeVF, bool FoldTailByMasking) {
5075+
unsigned ConstTripCount, unsigned MaxTripCount, unsigned SmallestType,
5076+
unsigned WidestType, ElementCount MaxSafeVF, bool FoldTailByMasking) {
50745077
bool ComputeScalableMaxVF = MaxSafeVF.isScalable();
50755078
const TypeSize WidestRegister = TTI.getRegisterBitWidth(
50765079
ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
@@ -5108,24 +5111,24 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
51085111
}
51095112

51105113
// When a scalar epilogue is required, at least one iteration of the scalar
5111-
// loop has to execute. Adjust ConstTripCount accordingly to avoid picking a
5114+
// loop has to execute. Adjust MaxTripCount accordingly to avoid picking a
51125115
// max VF that results in a dead vector loop.
5113-
if (ConstTripCount > 0 && requiresScalarEpilogue(true))
5114-
ConstTripCount -= 1;
5115-
5116-
if (ConstTripCount && ConstTripCount <= WidestRegisterMinEC &&
5117-
(!FoldTailByMasking || isPowerOf2_32(ConstTripCount))) {
5118-
// If loop trip count (TC) is known at compile time there is no point in
5119-
// choosing VF greater than TC (as done in the loop below). Select maximum
5120-
// power of two which doesn't exceed TC.
5121-
// If MaxVectorElementCount is scalable, we only fall back on a fixed VF
5122-
// when the TC is less than or equal to the known number of lanes.
5123-
auto ClampedConstTripCount = llvm::bit_floor(ConstTripCount);
5116+
if (MaxTripCount > 0 && requiresScalarEpilogue(true))
5117+
MaxTripCount -= 1;
5118+
5119+
if (MaxTripCount && MaxTripCount <= WidestRegisterMinEC &&
5120+
(!FoldTailByMasking || isPowerOf2_32(MaxTripCount))) {
5121+
// If upper bound loop trip count (TC) is known at compile time there is no
5122+
// point in choosing VF greater than TC (as done in the loop below). Select
5123+
// maximum power of two which doesn't exceed TC. If MaxVectorElementCount is
5124+
// scalable, we only fall back on a fixed VF when the TC is less than or
5125+
// equal to the known number of lanes.
5126+
auto ClampedUpperTripCount = llvm::bit_floor(MaxTripCount);
51245127
LLVM_DEBUG(dbgs() << "LV: Clamping the MaxVF to maximum power of two not "
51255128
"exceeding the constant trip count: "
5126-
<< ClampedConstTripCount << "\n");
5129+
<< ClampedUpperTripCount << "\n");
51275130
return ElementCount::get(
5128-
ClampedConstTripCount,
5131+
ClampedUpperTripCount,
51295132
FoldTailByMasking ? MaxVectorElementCount.isScalable() : false);
51305133
}
51315134

0 commit comments

Comments
 (0)