diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 49b462668b630..e85fd73996dd1 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -541,7 +541,15 @@ static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) { InstructionCost AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { + // The code-generator is currently not able to handle scalable vectors + // of yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. auto *RetTy = ICA.getReturnType(); + if (auto *VTy = dyn_cast(RetTy)) + if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + switch (ICA.getID()) { case Intrinsic::experimental_vector_histogram_add: if (!ST->hasSVE2()) @@ -3070,6 +3078,14 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( ArrayRef Args, const Instruction *CxtI) { + // The code-generator is currently not able to handle scalable vectors + // of yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. + if (auto *VTy = dyn_cast(Ty)) + if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + // TODO: Handle more cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, @@ -3844,6 +3860,14 @@ InstructionCost AArch64TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) { + // The code-generator is currently not able to handle scalable vectors + // of yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. + if (auto *VTy = dyn_cast(Ty)) + if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + std::pair LT = getTypeLegalizationCost(Ty); if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16()) @@ -3888,6 +3912,14 @@ InstructionCost AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, std::optional FMF, TTI::TargetCostKind CostKind) { + // The code-generator is currently not able to handle scalable vectors + // of yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. + if (auto *VTy = dyn_cast(ValTy)) + if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + if (TTI::requiresOrderedReduction(FMF)) { if (auto *FixedVTy = dyn_cast(ValTy)) { InstructionCost BaseCost = diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll index 18a1c31c03f74..770d3087b0752 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll @@ -8,6 +8,7 @@ define void @fadd() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fadd undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fadd undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1F32 = fadd undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fadd undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd undef, undef @@ -19,6 +20,7 @@ define void @fadd() { %V8F16 = fadd undef, undef %V16F16 = fadd undef, undef + %V1F32 = fadd undef, undef %V2F32 = fadd undef, undef %V4F32 = fadd undef, undef %V8F32 = fadd undef, undef @@ -34,6 +36,7 @@ define void @fsub() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1F32 = fsub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub undef, undef @@ -45,6 +48,7 @@ define void @fsub() { %V8F16 = fsub undef, undef %V16F16 = fsub undef, undef + %V1F32 = fsub undef, undef %V2F32 = fsub undef, undef %V4F32 = fsub undef, undef %V8F32 = fsub undef, undef diff --git a/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll b/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll index e1a9ee114d261..98d5bd5bd13f4 100644 --- a/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll +++ b/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll @@ -3,6 +3,7 @@ define void @foo_no_vscale_range() { ; CHECK-LABEL: 'foo_no_vscale_range' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %res.i64.nxv1i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv1i1( undef, i1 true) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 true) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 true) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 true) @@ -45,6 +46,7 @@ define void @foo_no_vscale_range() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res.i32.v32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v32i1(<32 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + %res.i64.nxv1i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv1i1( undef, i1 true) %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 true) %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 true) %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 true) diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-arith.ll b/llvm/test/Analysis/CostModel/AArch64/sve-arith.ll index f4dfea4cce349..46450e68f40e2 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-arith.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-arith.ll @@ -43,6 +43,7 @@ define void @scalable_mul() #0 { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_nxv8i16 = mul undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_nxv4i32 = mul undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_nxv2i64 = mul undef, undef +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv1i64 = mul undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; entry: @@ -50,6 +51,26 @@ entry: %mul_nxv8i16 = mul undef, undef %mul_nxv4i32 = mul undef, undef %mul_nxv2i64 = mul undef, undef + %mul_nxv1i64 = mul undef, undef + + ret void +} + +define void @scalable_add() #0 { +; CHECK-LABEL: 'scalable_add' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_nxv16i8 = add undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_nxv8i16 = add undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_nxv4i32 = add undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_nxv2i64 = add undef, undef +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %add_nxv1i64 = add undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +entry: + %add_nxv16i8 = add undef, undef + %add_nxv8i16 = add undef, undef + %add_nxv4i32 = add undef, undef + %add_nxv2i64 = add undef, undef + %add_nxv1i64 = add undef, undef ret void } diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll index 004b696bb9a98..aede9c8984312 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -116,82 +116,118 @@ declare <8 x float> @llvm.vector.extract.v8f32.nxv4f32(, i64 define void @reductions( %v0, %v1, %v2, %v3) { ; CHECK-LABEL: 'reductions' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32( %v0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64( %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32( undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32( %v0) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64( %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32( %v0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64( %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32( %v0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64( %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32( %v0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64( %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32( %v0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64( %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32( %v0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64( %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32( %v0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64( %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32( %v0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64( %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, %v2) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, %v3) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32( %v2) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64( %v3) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32( %v2) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64( %v3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPE_BASED_ONLY-LABEL: 'reductions' +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32( undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32( %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64( %v1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32( undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32( %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64( %v1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32( undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32( %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64( %v1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32( undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32( %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64( %v1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32( undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32( %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64( %v1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64( undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32( %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64( %v1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64( undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32( %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64( %v1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64( undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32( %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64( %v1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64( undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32( %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64( %v1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, %v2) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, %v3) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32( undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32( %v2) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64( %v3) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32( undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32( %v2) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64( %v3) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32( undef) %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32( %v0) %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64( %v1) + %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32( undef) %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32( %v0) %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64( %v1) + %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32( undef) %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32( %v0) %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64( %v1) + %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32( undef) %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32( %v0) %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64( %v1) + %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32( undef) %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32( %v0) %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64( %v1) + %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64( undef) %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32( %v0) %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64( %v1) + %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64( undef) %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32( %v0) %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64( %v1) + %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64( undef) %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32( %v0) %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64( %v1) + %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64( undef) %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32( %v0) %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64( %v1) + %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.0, undef) %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, %v2) %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, %v3) + %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32( undef) %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32( %v2) %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64( %v3) + %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32( undef) %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32( %v2) %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64( %v3) diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll b/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll index 7a801db1f35fa..777f4a2c4a64e 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll @@ -14,6 +14,7 @@ define void @umin() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call @llvm.umin.nxv4i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call @llvm.umin.nxv8i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call @llvm.umin.nxv16i16( undef, undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1i32 = call @llvm.umin.nxv1i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call @llvm.umin.nxv2i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call @llvm.umin.nxv4i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call @llvm.umin.nxv8i32( undef, undef) @@ -30,6 +31,7 @@ define void @umin() { %V4i16 = call @llvm.umin.nxv4i16( undef, undef) %V8i16 = call @llvm.umin.nxv8i16( undef, undef) %V16i16 = call @llvm.umin.nxv16i16( undef, undef) + %V1i32 = call @llvm.umin.nxv1i32( undef, undef) %V2i32 = call @llvm.umin.nxv2i32( undef, undef) %V4i32 = call @llvm.umin.nxv4i32( undef, undef) %V8i32 = call @llvm.umin.nxv8i32( undef, undef) @@ -49,6 +51,7 @@ define void @umax() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call @llvm.umax.nxv4i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call @llvm.umax.nxv8i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call @llvm.umax.nxv16i16( undef, undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1i32 = call @llvm.umax.nxv1i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call @llvm.umax.nxv2i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call @llvm.umax.nxv4i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call @llvm.umax.nxv8i32( undef, undef) @@ -65,6 +68,7 @@ define void @umax() { %V4i16 = call @llvm.umax.nxv4i16( undef, undef) %V8i16 = call @llvm.umax.nxv8i16( undef, undef) %V16i16 = call @llvm.umax.nxv16i16( undef, undef) + %V1i32 = call @llvm.umax.nxv1i32( undef, undef) %V2i32 = call @llvm.umax.nxv2i32( undef, undef) %V4i32 = call @llvm.umax.nxv4i32( undef, undef) %V8i32 = call @llvm.umax.nxv8i32( undef, undef) @@ -84,6 +88,7 @@ define void @smin() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call @llvm.smin.nxv4i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call @llvm.smin.nxv8i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call @llvm.smin.nxv16i16( undef, undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1i32 = call @llvm.smin.nxv1i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call @llvm.smin.nxv2i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call @llvm.smin.nxv4i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call @llvm.smin.nxv8i32( undef, undef) @@ -100,6 +105,7 @@ define void @smin() { %V4i16 = call @llvm.smin.nxv4i16( undef, undef) %V8i16 = call @llvm.smin.nxv8i16( undef, undef) %V16i16 = call @llvm.smin.nxv16i16( undef, undef) + %V1i32 = call @llvm.smin.nxv1i32( undef, undef) %V2i32 = call @llvm.smin.nxv2i32( undef, undef) %V4i32 = call @llvm.smin.nxv4i32( undef, undef) %V8i32 = call @llvm.smin.nxv8i32( undef, undef) @@ -119,6 +125,7 @@ define void @smax() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call @llvm.smax.nxv4i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call @llvm.smax.nxv8i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call @llvm.smax.nxv16i16( undef, undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1i32 = call @llvm.smax.nxv1i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call @llvm.smax.nxv2i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call @llvm.smax.nxv4i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call @llvm.smax.nxv8i32( undef, undef) @@ -135,6 +142,7 @@ define void @smax() { %V4i16 = call @llvm.smax.nxv4i16( undef, undef) %V8i16 = call @llvm.smax.nxv8i16( undef, undef) %V16i16 = call @llvm.smax.nxv16i16( undef, undef) + %V1i32 = call @llvm.smax.nxv1i32( undef, undef) %V2i32 = call @llvm.smax.nxv2i32( undef, undef) %V4i32 = call @llvm.smax.nxv4i32( undef, undef) %V8i32 = call @llvm.smax.nxv8i32( undef, undef) @@ -145,6 +153,7 @@ define void @smax() { define void @minnum() { ; CHECK-LABEL: 'minnum' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1f32 = call @llvm.minnum.nxv1f32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f32 = call @llvm.minnum.nxv2f32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4f32 = call @llvm.minnum.nxv4f32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8f32 = call @llvm.minnum.nxv8f32( undef, undef) @@ -156,6 +165,7 @@ define void @minnum() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16f16 = call @llvm.minnum.nxv16f16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + %V1f32 = call @llvm.minnum.nxv1f32( undef, undef) %V2f32 = call @llvm.minnum.nxv2f32( undef, undef) %V4f32 = call @llvm.minnum.nxv4f32( undef, undef) %V8f32 = call @llvm.minnum.nxv8f32( undef, undef) @@ -170,6 +180,7 @@ define void @minnum() { define void @maxnum() { ; CHECK-LABEL: 'maxnum' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1f32 = call @llvm.maxnum.nxv1f32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f32 = call @llvm.maxnum.nxv2f32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4f32 = call @llvm.maxnum.nxv4f32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8f32 = call @llvm.maxnum.nxv8f32( undef, undef) @@ -181,6 +192,7 @@ define void @maxnum() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16f16 = call @llvm.maxnum.nxv16f16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + %V1f32 = call @llvm.maxnum.nxv1f32( undef, undef) %V2f32 = call @llvm.maxnum.nxv2f32( undef, undef) %V4f32 = call @llvm.maxnum.nxv4f32( undef, undef) %V8f32 = call @llvm.maxnum.nxv8f32( undef, undef)