Skip to content

Commit 03874ee

Browse files
committed
[AArch64] Add invalid 1 x vscale costs for reductions and reduction-operations.
The code-generator is currently not able to handle scalable vectors of <vscale x 1 x eltty>. The usual "fix" for this until it is supported is to mark the costs of loads/stores with an invalid cost, preventing the vectorizer from vectorizing at those factors. But on rare occasions loops don't contain load/stores, only reductions. So whilst this is still unsupported return an invalid cost to avoid selecting vscale x 1 VFs. The cost of a reduction is not currently used by the vectorizer so this adds the cost to the add/mul/and/or/xor or min/max that should feed the reduction. This change will be removed when code-generation for these types is sufficiently reliable. Fixes #99760
1 parent 6e4c580 commit 03874ee

File tree

5 files changed

+90
-0
lines changed

5 files changed

+90
-0
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,15 @@ static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {
541541
InstructionCost
542542
AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
543543
TTI::TargetCostKind CostKind) {
544+
// The code-generator is currently not able to handle scalable vectors
545+
// of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
546+
// it. This change will be removed when code-generation for these types is
547+
// sufficiently reliable.
544548
auto *RetTy = ICA.getReturnType();
549+
if (auto *VTy = dyn_cast<ScalableVectorType>(RetTy))
550+
if (VTy->getElementCount() == ElementCount::getScalable(1))
551+
return InstructionCost::getInvalid();
552+
545553
switch (ICA.getID()) {
546554
case Intrinsic::experimental_vector_histogram_add:
547555
if (!ST->hasSVE2())
@@ -3070,6 +3078,14 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
30703078
ArrayRef<const Value *> Args,
30713079
const Instruction *CxtI) {
30723080

3081+
// The code-generator is currently not able to handle scalable vectors
3082+
// of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
3083+
// it. This change will be removed when code-generation for these types is
3084+
// sufficiently reliable.
3085+
if (auto *VTy = dyn_cast<ScalableVectorType>(Ty))
3086+
if (VTy->getElementCount() == ElementCount::getScalable(1))
3087+
return InstructionCost::getInvalid();
3088+
30733089
// TODO: Handle more cost kinds.
30743090
if (CostKind != TTI::TCK_RecipThroughput)
30753091
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
@@ -3844,6 +3860,14 @@ InstructionCost
38443860
AArch64TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
38453861
FastMathFlags FMF,
38463862
TTI::TargetCostKind CostKind) {
3863+
// The code-generator is currently not able to handle scalable vectors
3864+
// of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
3865+
// it. This change will be removed when code-generation for these types is
3866+
// sufficiently reliable.
3867+
if (auto *VTy = dyn_cast<ScalableVectorType>(Ty))
3868+
if (VTy->getElementCount() == ElementCount::getScalable(1))
3869+
return InstructionCost::getInvalid();
3870+
38473871
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
38483872

38493873
if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
@@ -3888,6 +3912,14 @@ InstructionCost
38883912
AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
38893913
std::optional<FastMathFlags> FMF,
38903914
TTI::TargetCostKind CostKind) {
3915+
// The code-generator is currently not able to handle scalable vectors
3916+
// of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
3917+
// it. This change will be removed when code-generation for these types is
3918+
// sufficiently reliable.
3919+
if (auto *VTy = dyn_cast<ScalableVectorType>(ValTy))
3920+
if (VTy->getElementCount() == ElementCount::getScalable(1))
3921+
return InstructionCost::getInvalid();
3922+
38913923
if (TTI::requiresOrderedReduction(FMF)) {
38923924
if (auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) {
38933925
InstructionCost BaseCost =

llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ define void @fadd() {
88
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fadd <vscale x 4 x half> undef, undef
99
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fadd <vscale x 8 x half> undef, undef
1010
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fadd <vscale x 16 x half> undef, undef
11+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1F32 = fadd <vscale x 1 x float> undef, undef
1112
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fadd <vscale x 2 x float> undef, undef
1213
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <vscale x 4 x float> undef, undef
1314
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <vscale x 8 x float> undef, undef
@@ -19,6 +20,7 @@ define void @fadd() {
1920
%V8F16 = fadd <vscale x 8 x half> undef, undef
2021
%V16F16 = fadd <vscale x 16 x half> undef, undef
2122

23+
%V1F32 = fadd <vscale x 1 x float> undef, undef
2224
%V2F32 = fadd <vscale x 2 x float> undef, undef
2325
%V4F32 = fadd <vscale x 4 x float> undef, undef
2426
%V8F32 = fadd <vscale x 8 x float> undef, undef
@@ -34,6 +36,7 @@ define void @fsub() {
3436
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <vscale x 4 x half> undef, undef
3537
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <vscale x 8 x half> undef, undef
3638
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fsub <vscale x 16 x half> undef, undef
39+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1F32 = fsub <vscale x 1 x float> undef, undef
3740
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <vscale x 2 x float> undef, undef
3841
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <vscale x 4 x float> undef, undef
3942
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <vscale x 8 x float> undef, undef
@@ -45,6 +48,7 @@ define void @fsub() {
4548
%V8F16 = fsub <vscale x 8 x half> undef, undef
4649
%V16F16 = fsub <vscale x 16 x half> undef, undef
4750

51+
%V1F32 = fsub <vscale x 1 x float> undef, undef
4852
%V2F32 = fsub <vscale x 2 x float> undef, undef
4953
%V4F32 = fsub <vscale x 4 x float> undef, undef
5054
%V8F32 = fsub <vscale x 8 x float> undef, undef

llvm/test/Analysis/CostModel/AArch64/sve-arith.ll

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,34 @@ define void @scalable_mul() #0 {
4343
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_nxv8i16 = mul <vscale x 8 x i16> undef, undef
4444
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_nxv4i32 = mul <vscale x 4 x i32> undef, undef
4545
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_nxv2i64 = mul <vscale x 2 x i64> undef, undef
46+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv1i64 = mul <vscale x 1 x i64> undef, undef
4647
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
4748
;
4849
entry:
4950
%mul_nxv16i8 = mul <vscale x 16 x i8> undef, undef
5051
%mul_nxv8i16 = mul <vscale x 8 x i16> undef, undef
5152
%mul_nxv4i32 = mul <vscale x 4 x i32> undef, undef
5253
%mul_nxv2i64 = mul <vscale x 2 x i64> undef, undef
54+
%mul_nxv1i64 = mul <vscale x 1 x i64> undef, undef
55+
56+
ret void
57+
}
58+
59+
define void @scalable_add() #0 {
60+
; CHECK-LABEL: 'scalable_add'
61+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_nxv16i8 = add <vscale x 16 x i8> undef, undef
62+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_nxv8i16 = add <vscale x 8 x i16> undef, undef
63+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_nxv4i32 = add <vscale x 4 x i32> undef, undef
64+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_nxv2i64 = add <vscale x 2 x i64> undef, undef
65+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %add_nxv1i64 = add <vscale x 1 x i64> undef, undef
66+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
67+
;
68+
entry:
69+
%add_nxv16i8 = add <vscale x 16 x i8> undef, undef
70+
%add_nxv8i16 = add <vscale x 8 x i16> undef, undef
71+
%add_nxv4i32 = add <vscale x 4 x i32> undef, undef
72+
%add_nxv2i64 = add <vscale x 2 x i64> undef, undef
73+
%add_nxv1i64 = add <vscale x 1 x i64> undef, undef
5374

5475
ret void
5576
}

llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,16 +116,20 @@ declare <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float>, i64
116116

117117
define void @reductions(<vscale x 4 x i32> %v0, <vscale x 4 x i64> %v1, <vscale x 4 x float> %v2, <vscale x 4 x double> %v3) {
118118
; CHECK-LABEL: 'reductions'
119+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef)
119120
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0)
120121
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1)
122+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> undef)
121123
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0)
122124
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1)
125+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> undef)
123126
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0)
124127
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1)
125128
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0)
126129
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1)
127130
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0)
128131
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1)
132+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> undef)
129133
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0)
130134
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1)
131135
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0)
@@ -134,25 +138,32 @@ define void @reductions(<vscale x 4 x i32> %v0, <vscale x 4 x i64> %v1, <vscale
134138
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v1)
135139
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0)
136140
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1)
141+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
137142
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v2)
138143
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3)
144+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> undef)
139145
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2)
140146
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3)
147+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> undef)
141148
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2)
142149
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3)
143150
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
144151
;
145152
; TYPE_BASED_ONLY-LABEL: 'reductions'
153+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef)
146154
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0)
147155
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1)
156+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> undef)
148157
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0)
149158
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1)
159+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> undef)
150160
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0)
151161
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1)
152162
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0)
153163
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1)
154164
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0)
155165
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1)
166+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> undef)
156167
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0)
157168
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1)
158169
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0)
@@ -161,24 +172,31 @@ define void @reductions(<vscale x 4 x i32> %v0, <vscale x 4 x i64> %v1, <vscale
161172
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v1)
162173
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0)
163174
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1)
175+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
164176
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v2)
165177
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3)
178+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> undef)
166179
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2)
167180
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3)
181+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> undef)
168182
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2)
169183
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3)
170184
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
171185
;
186+
%add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef)
172187
%add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0)
173188
%add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1)
189+
%mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> undef)
174190
%mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0)
175191
%mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1)
192+
%and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> undef)
176193
%and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0)
177194
%and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1)
178195
%or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0)
179196
%or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1)
180197
%xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0)
181198
%xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1)
199+
%umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> undef)
182200
%umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0)
183201
%umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1)
184202
%smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0)
@@ -188,10 +206,13 @@ define void @reductions(<vscale x 4 x i32> %v0, <vscale x 4 x i64> %v1, <vscale
188206
%smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0)
189207
%smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1)
190208

209+
%fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.0, <vscale x 1 x float> undef)
191210
%fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> %v2)
192211
%fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> %v3)
212+
%fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> undef)
193213
%fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2)
194214
%fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3)
215+
%fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> undef)
195216
%fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2)
196217
%fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3)
197218

0 commit comments

Comments
 (0)