Skip to content

Commit 4f86aa6

Browse files
committed
[LV] Add -scalable-vectorization=<option> flag.
This patch adds a new option to the LoopVectorizer to control how scalable vectors can be used. Initially, this suggests three levels to control scalable vectorization, although other more aggressive options can be added in the future. The possible options are: - Disabled: Disables vectorization with scalable vectors. - Enabled: Vectorize loops using scalable vectors or fixed-width vectors, but favors fixed-width vectors when the cost is a tie. - Preferred: Like 'Enabled', but favoring scalable vectors when the cost-model is inconclusive. Reviewed By: paulwalker-arm, vkmr Differential Revision: https://reviews.llvm.org/D101945
1 parent 57d20cb commit 4f86aa6

30 files changed

+135
-39
lines changed

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,20 @@ class LoopVectorizeHints {
9696
FK_Enabled = 1, ///< Forcing enabled.
9797
};
9898

99+
enum ScalableForceKind {
100+
/// Not selected.
101+
SK_Unspecified = -1,
102+
/// Disables vectorization with scalable vectors.
103+
SK_FixedWidthOnly = 0,
104+
/// Vectorize loops using scalable vectors or fixed-width vectors, but favor
105+
/// scalable vectors when the cost-model is inconclusive. This is the
106+
/// default when the scalable.enable hint is enabled through a pragma.
107+
SK_PreferScalable = 1,
108+
/// Vectorize loops using scalable vectors or fixed-width vectors, but
109+
/// favor fixed-width vectors when the cost is inconclusive.
110+
SK_PreferFixedWidth = 2,
111+
};
112+
99113
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced,
100114
OptimizationRemarkEmitter &ORE);
101115

@@ -109,7 +123,8 @@ class LoopVectorizeHints {
109123
void emitRemarkWithHints() const;
110124

111125
ElementCount getWidth() const {
112-
return ElementCount::get(Width.Value, isScalable());
126+
return ElementCount::get(Width.Value,
127+
isScalableVectorizationExplicitlyEnabled());
113128
}
114129
unsigned getInterleave() const {
115130
if (Interleave.Value)
@@ -129,7 +144,16 @@ class LoopVectorizeHints {
129144
return (ForceKind)Force.Value;
130145
}
131146

132-
bool isScalable() const { return Scalable.Value; }
147+
/// \return true if scalable vectorization has been explicitly enabled.
148+
bool isScalableVectorizationExplicitlyEnabled() const {
149+
return Scalable.Value == SK_PreferFixedWidth ||
150+
Scalable.Value == SK_PreferScalable;
151+
}
152+
153+
/// \return true if scalable vectorization has been explicitly disabled.
154+
bool isScalableVectorizationDisabled() const {
155+
return Scalable.Value == SK_FixedWidthOnly;
156+
}
133157

134158
/// If hints are provided that force vectorization, use the AlwaysPrint
135159
/// pass name to force the frontend to print the diagnostic.

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,23 @@ static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
4848
cl::desc("The maximum number of SCEV checks allowed with a "
4949
"vectorize(enable) pragma"));
5050

51+
// FIXME: When scalable vectorization is stable enough, change the default
52+
// to SK_PreferFixedWidth.
53+
static cl::opt<LoopVectorizeHints::ScalableForceKind> ScalableVectorization(
54+
"scalable-vectorization", cl::init(LoopVectorizeHints::SK_FixedWidthOnly),
55+
cl::Hidden,
56+
cl::desc("Control whether the compiler can use scalable vectors to "
57+
"vectorize a loop"),
58+
cl::values(
59+
clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off",
60+
"Scalable vectorization is disabled."),
61+
clEnumValN(LoopVectorizeHints::SK_PreferFixedWidth, "on",
62+
"Scalable vectorization is available, but favor fixed-width "
63+
"vectorization when the cost is inconclusive."),
64+
clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred",
65+
"Scalable vectorization is available and favored when the "
66+
"cost is inconclusive.")));
67+
5168
/// Maximum vectorization interleave count.
5269
static const unsigned MaxInterleaveFactor = 16;
5370

@@ -77,15 +94,25 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
7794
Force("vectorize.enable", FK_Undefined, HK_FORCE),
7895
IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
7996
Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
80-
Scalable("vectorize.scalable.enable", false, HK_SCALABLE), TheLoop(L),
81-
ORE(ORE) {
97+
Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
98+
TheLoop(L), ORE(ORE) {
8299
// Populate values with existing loop metadata.
83100
getHintsFromMetadata();
84101

85102
// force-vector-interleave overrides DisableInterleaving.
86103
if (VectorizerParams::isInterleaveForced())
87104
Interleave.Value = VectorizerParams::VectorizationInterleave;
88105

106+
if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified)
107+
// If the width is set, but the metadata says nothing about the scalable
108+
// property, then assume it concerns only a fixed-width UserVF.
109+
// If width is not set, the flag takes precedence.
110+
Scalable.Value = Width.Value ? SK_FixedWidthOnly : ScalableVectorization;
111+
else if (ScalableVectorization == SK_FixedWidthOnly)
112+
// If the flag is set to disable any use of scalable vectors, override the
113+
// loop hint.
114+
Scalable.Value = SK_FixedWidthOnly;
115+
89116
if (IsVectorized.Value != 1)
90117
// If the vectorization width and interleaving count are both 1 then
91118
// consider the loop to have been already vectorized because there's

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5648,6 +5648,12 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
56485648
return ElementCount::getScalable(0);
56495649
}
56505650

5651+
if (Hints->isScalableVectorizationDisabled()) {
5652+
reportVectorizationInfo("Scalable vectorization is explicitly disabled",
5653+
"ScalableVectorizationDisabled", ORE, TheLoop);
5654+
return ElementCount::getScalable(0);
5655+
}
5656+
56515657
auto MaxScalableVF = ElementCount::getScalable(
56525658
std::numeric_limits<ElementCount::ScalarTy>::max());
56535659

llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -loop-vectorize -force-target-instruction-cost=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s 2>&1 | FileCheck %s
1+
; RUN: opt -loop-vectorize -scalable-vectorization=on -force-target-instruction-cost=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s 2>&1 | FileCheck %s
22

33
; This test currently fails when the LV calculates a maximums safe
44
; distance for scalable vectors, because the code to eliminate the tail is

llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF1
2-
; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF2
1+
; RUN: opt -loop-vectorize -scalable-vectorization=on -force-vector-width=4 -force-vector-interleave=1 -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF1
2+
; RUN: opt -loop-vectorize -scalable-vectorization=on -force-vector-width=4 -force-vector-interleave=2 -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF2
33

44
; We vectorize this first order recurrence, with a set of insertelements for
55
; each unrolled part. Make sure these insertelements are generated in-order,

llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; REQUIRES: asserts
2-
; RUN: opt -loop-vectorize -force-vector-interleave=1 -S -debug < %s 2>%t | FileCheck %s
2+
; RUN: opt -loop-vectorize -scalable-vectorization=on -force-vector-interleave=1 -S -debug-only=loop-vectorize < %s 2>%t | FileCheck %s
33
; RUN: cat %t | FileCheck %s --check-prefix=CHECK-COST
44

55
target triple = "aarch64-unknown-linux-gnu"

llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu < %s | FileCheck %s
1+
; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on < %s | FileCheck %s
22

33
define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) {
44
; CHECK-LABEL: @vec_load

llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S 2>%t | FileCheck %s -check-prefix=CHECK
1+
; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S -scalable-vectorization=on 2>%t | FileCheck %s -check-prefix=CHECK
22
; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARK
33

44
; Reduction can be vectorized

llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -enable-strict-reductions -S | FileCheck %s -check-prefix=CHECK
1+
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -enable-strict-reductions -S | FileCheck %s -check-prefix=CHECK
22

33
define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) {
44
; CHECK-LABEL: @fadd_strict

llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-analysis.ll

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
; REQUIRES: asserts
2-
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON
3-
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON_MAXBW
2+
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON
3+
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=preferred < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED
4+
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=off < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED
5+
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON_MAXBW
46

57
; Test that the MaxVF for the following loop, that has no dependence distances,
68
; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16
79
; (maximized bandwidth for i8 in the loop).
810
define void @test0(i32* %a, i8* %b, i32* %c) {
911
; CHECK: LV: Checking a loop in "test0"
1012
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
13+
; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4
14+
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
1115
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 16
1216
entry:
1317
br label %loop
@@ -35,6 +39,8 @@ exit:
3539
define void @test1(i32* %a, i8* %b) {
3640
; CHECK: LV: Checking a loop in "test1"
3741
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
42+
; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4
43+
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
3844
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 4
3945
entry:
4046
br label %loop
@@ -63,6 +69,8 @@ exit:
6369
define void @test2(i32* %a, i8* %b) {
6470
; CHECK: LV: Checking a loop in "test2"
6571
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
72+
; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 2
73+
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
6674
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 2
6775
entry:
6876
br label %loop
@@ -91,6 +99,8 @@ exit:
9199
define void @test3(i32* %a, i8* %b) {
92100
; CHECK: LV: Checking a loop in "test3"
93101
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
102+
; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 1
103+
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
94104
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 1
95105
entry:
96106
br label %loop
@@ -120,6 +130,8 @@ exit:
120130
define void @test4(i32* %a, i32* %b) {
121131
; CHECK: LV: Checking a loop in "test4"
122132
; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
133+
; CHECK_SCALABLE_PREFERRED-NOT: LV: Found feasible scalable VF
134+
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
123135
; CHECK_SCALABLE_ON_MAXBW-NOT: LV: Found feasible scalable VF
124136
entry:
125137
br label %loop

0 commit comments

Comments
 (0)