diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 36df9ee2e7d94..c2e48284c68ac 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1574,6 +1574,67 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { if (Intrinsic::isTargetIntrinsic(IID)) return TargetTransformInfo::TCC_Basic; + // VP Intrinsics should have the same cost as their non-vp counterpart. + // TODO: Adjust the cost to make the vp intrinsic cheaper than its non-vp + // counterpart when the vector length argument is smaller than the maximum + // vector length. + // TODO: Support other kinds of VPIntrinsics + if (VPIntrinsic::isVPIntrinsic(ICA.getID())) { + std::optional<unsigned> FOp = + VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID()); + if (FOp) { + if (ICA.getID() == Intrinsic::vp_load) { + Align Alignment; + if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst())) + Alignment = VPI->getPointerAlignment().valueOrOne(); + unsigned AS = 0; + if (ICA.getArgTypes().size() > 1) + if (auto *PtrTy = dyn_cast<PointerType>(ICA.getArgTypes()[0])) + AS = PtrTy->getAddressSpace(); + return thisT()->getMemoryOpCost(*FOp, ICA.getReturnType(), Alignment, + AS, CostKind); + } + if (ICA.getID() == Intrinsic::vp_store) { + Align Alignment; + if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst())) + Alignment = VPI->getPointerAlignment().valueOrOne(); + unsigned AS = 0; + if (ICA.getArgTypes().size() >= 2) + if (auto *PtrTy = dyn_cast<PointerType>(ICA.getArgTypes()[1])) + AS = PtrTy->getAddressSpace(); + return thisT()->getMemoryOpCost(*FOp, ICA.getArgTypes()[0], Alignment, + AS, CostKind); + } + if (VPBinOpIntrinsic::isVPBinOp(ICA.getID())) { + return thisT()->getArithmeticInstrCost(*FOp, ICA.getReturnType(), + CostKind); + } + } + + std::optional<Intrinsic::ID> FID = + VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID()); + if (FID) { + // Non-vp version will have same arg types except mask and vector + // length. + assert(ICA.getArgTypes().size() >= 2 && + "Expected VPIntrinsic to have Mask and Vector Length args and " + "types"); + ArrayRef<Type *> NewTys = ArrayRef(ICA.getArgTypes()).drop_back(2); + + // VPReduction intrinsics have a start value argument that their non-vp + // counterparts do not have, except for the fadd and fmul non-vp + // counterpart. + if (VPReductionIntrinsic::isVPReduction(ICA.getID()) && + *FID != Intrinsic::vector_reduce_fadd && + *FID != Intrinsic::vector_reduce_fmul) + NewTys = NewTys.drop_front(); + + IntrinsicCostAttributes NewICA(*FID, ICA.getReturnType(), NewTys, + ICA.getFlags()); + return thisT()->getIntrinsicInstrCost(NewICA, CostKind); + } + } + if (ICA.isTypeBasedOnly()) return getTypeBasedIntrinsicInstrCost(ICA, CostKind); @@ -1834,68 +1895,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { } } - // VP Intrinsics should have the same cost as their non-vp counterpart. - // TODO: Adjust the cost to make the vp intrinsic cheaper than its non-vp - // counterpart when the vector length argument is smaller than the maximum - // vector length. - // TODO: Support other kinds of VPIntrinsics - if (VPIntrinsic::isVPIntrinsic(ICA.getID())) { - std::optional<unsigned> FOp = - VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID()); - if (FOp) { - if (ICA.getID() == Intrinsic::vp_load) { - Align Alignment; - if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst())) - Alignment = VPI->getPointerAlignment().valueOrOne(); - unsigned AS = 0; - if (ICA.getArgs().size() > 1) - if (auto *PtrTy = - dyn_cast<PointerType>(ICA.getArgs()[0]->getType())) - AS = PtrTy->getAddressSpace(); - return thisT()->getMemoryOpCost(*FOp, ICA.getReturnType(), Alignment, - AS, CostKind); - } - if (ICA.getID() == Intrinsic::vp_store) { - Align Alignment; - if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst())) - Alignment = VPI->getPointerAlignment().valueOrOne(); - unsigned AS = 0; - if (ICA.getArgs().size() >= 2) - if (auto *PtrTy = - dyn_cast<PointerType>(ICA.getArgs()[1]->getType())) - AS = PtrTy->getAddressSpace(); - return thisT()->getMemoryOpCost(*FOp, Args[0]->getType(), Alignment, - AS, CostKind); - } - if (VPBinOpIntrinsic::isVPBinOp(ICA.getID())) { - return thisT()->getArithmeticInstrCost(*FOp, ICA.getReturnType(), - CostKind); - } - } - - std::optional<Intrinsic::ID> FID = - VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID()); - if (FID) { - // Non-vp version will have same Args/Tys except mask and vector length. - assert(ICA.getArgs().size() >= 2 && ICA.getArgTypes().size() >= 2 && - "Expected VPIntrinsic to have Mask and Vector Length args and " - "types"); - ArrayRef<Type *> NewTys = ArrayRef(ICA.getArgTypes()).drop_back(2); - - // VPReduction intrinsics have a start value argument that their non-vp - // counterparts do not have, except for the fadd and fmul non-vp - // counterpart. - if (VPReductionIntrinsic::isVPReduction(ICA.getID()) && - *FID != Intrinsic::vector_reduce_fadd && - *FID != Intrinsic::vector_reduce_fmul) - NewTys = NewTys.drop_front(); - - IntrinsicCostAttributes NewICA(*FID, ICA.getReturnType(), NewTys, - ICA.getFlags()); - return thisT()->getIntrinsicInstrCost(NewICA, CostKind); - } - } - // Assume that we need to scalarize this intrinsic.) // Compute the scalarization overhead based on Args for a vector // intrinsic. diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 4edaeef354a59..2b16dcbcd8695 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1104,26 +1104,6 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return Cost * LT.first; break; } - // vp integer arithmetic ops. - case Intrinsic::vp_add: - case Intrinsic::vp_and: - case Intrinsic::vp_ashr: - case Intrinsic::vp_lshr: - case Intrinsic::vp_mul: - case Intrinsic::vp_or: - case Intrinsic::vp_sdiv: - case Intrinsic::vp_shl: - case Intrinsic::vp_srem: - case Intrinsic::vp_sub: - case Intrinsic::vp_udiv: - case Intrinsic::vp_urem: - case Intrinsic::vp_xor: - // vp float arithmetic ops. - case Intrinsic::vp_fadd: - case Intrinsic::vp_fsub: - case Intrinsic::vp_fmul: - case Intrinsic::vp_fdiv: - case Intrinsic::vp_frem: case Intrinsic::vp_fneg: { std::optional<unsigned> FOp = VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID()); @@ -1164,23 +1144,6 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return getCmpSelInstrCost(*FOp, ICA.getArgTypes()[0], ICA.getReturnType(), UI->getPredicate(), CostKind); } - // vp load/store - case Intrinsic::vp_load: - case Intrinsic::vp_store: { - if (!ICA.getInst()) - break; - Intrinsic::ID IID = ICA.getID(); - std::optional<unsigned> FOp = VPIntrinsic::getFunctionalOpcodeForVP(IID); - assert(FOp.has_value()); - auto *UI = cast<VPIntrinsic>(ICA.getInst()); - if (ICA.getID() == Intrinsic::vp_load) - return getMemoryOpCost( - *FOp, ICA.getReturnType(), UI->getPointerAlignment(), - UI->getOperand(0)->getType()->getPointerAddressSpace(), CostKind); - return getMemoryOpCost( - *FOp, ICA.getArgTypes()[0], UI->getPointerAlignment(), - UI->getOperand(1)->getType()->getPointerAddressSpace(), CostKind); - } case Intrinsic::vp_select: { Intrinsic::ID IID = ICA.getID(); std::optional<unsigned> FOp = VPIntrinsic::getFunctionalOpcodeForVP(IID); diff --git a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll index 574b7338f63fb..800ea223850d3 100644 --- a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll @@ -747,38 +747,38 @@ define void @smax() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPEBASED-LABEL: 'smax' -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %t0 = call <2 x i8> @llvm.vp.smax.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %t2 = call <4 x i8> @llvm.vp.smax.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %t4 = call <8 x i8> @llvm.vp.smax.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 157 for instruction: %t6 = call <16 x i8> @llvm.vp.smax.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %t8 = call <2 x i16> @llvm.vp.smax.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %t10 = call <4 x i16> @llvm.vp.smax.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %t12 = call <8 x i16> @llvm.vp.smax.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %t14 = call <16 x i16> @llvm.vp.smax.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %t16 = call <2 x i32> @llvm.vp.smax.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %t18 = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %t20 = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %t22 = call <16 x i32> @llvm.vp.smax.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %t24 = call <2 x i64> @llvm.vp.smax.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t26 = call <4 x i64> @llvm.vp.smax.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %t28 = call <8 x i64> @llvm.vp.smax.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %t30 = call <16 x i64> @llvm.vp.smax.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t32 = call <vscale x 2 x i8> @llvm.vp.smax.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t34 = call <vscale x 4 x i8> @llvm.vp.smax.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t36 = call <vscale x 8 x i8> @llvm.vp.smax.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t38 = call <vscale x 16 x i8> @llvm.vp.smax.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t40 = call <vscale x 2 x i16> @llvm.vp.smax.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t42 = call <vscale x 4 x i16> @llvm.vp.smax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t44 = call <vscale x 8 x i16> @llvm.vp.smax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t46 = call <vscale x 16 x i16> @llvm.vp.smax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t48 = call <vscale x 2 x i32> @llvm.vp.smax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t50 = call <vscale x 4 x i32> @llvm.vp.smax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t52 = call <vscale x 8 x i32> @llvm.vp.smax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t54 = call <vscale x 16 x i32> @llvm.vp.smax.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t56 = call <vscale x 2 x i64> @llvm.vp.smax.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t58 = call <vscale x 4 x i64> @llvm.vp.smax.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t60 = call <vscale x 8 x i64> @llvm.vp.smax.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t62 = call <vscale x 16 x i64> @llvm.vp.smax.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.smax.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.smax.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.smax.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.smax.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i16> @llvm.vp.smax.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t10 = call <4 x i16> @llvm.vp.smax.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <8 x i16> @llvm.vp.smax.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t14 = call <16 x i16> @llvm.vp.smax.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <2 x i32> @llvm.vp.smax.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t20 = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t22 = call <16 x i32> @llvm.vp.smax.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <2 x i64> @llvm.vp.smax.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t26 = call <4 x i64> @llvm.vp.smax.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t28 = call <8 x i64> @llvm.vp.smax.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t30 = call <16 x i64> @llvm.vp.smax.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t32 = call <vscale x 2 x i8> @llvm.vp.smax.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t34 = call <vscale x 4 x i8> @llvm.vp.smax.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t36 = call <vscale x 8 x i8> @llvm.vp.smax.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t38 = call <vscale x 16 x i8> @llvm.vp.smax.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t40 = call <vscale x 2 x i16> @llvm.vp.smax.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t42 = call <vscale x 4 x i16> @llvm.vp.smax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t44 = call <vscale x 8 x i16> @llvm.vp.smax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t46 = call <vscale x 16 x i16> @llvm.vp.smax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t48 = call <vscale x 2 x i32> @llvm.vp.smax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t50 = call <vscale x 4 x i32> @llvm.vp.smax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t52 = call <vscale x 8 x i32> @llvm.vp.smax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t54 = call <vscale x 16 x i32> @llvm.vp.smax.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t56 = call <vscale x 2 x i64> @llvm.vp.smax.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t58 = call <vscale x 4 x i64> @llvm.vp.smax.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t60 = call <vscale x 8 x i64> @llvm.vp.smax.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t62 = call <vscale x 16 x i64> @llvm.vp.smax.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t0 = call <2 x i8> @llvm.vp.smax.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef) @@ -854,38 +854,38 @@ define void @smin() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPEBASED-LABEL: 'smin' -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %t0 = call <2 x i8> @llvm.vp.smin.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %t2 = call <4 x i8> @llvm.vp.smin.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %t4 = call <8 x i8> @llvm.vp.smin.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 157 for instruction: %t6 = call <16 x i8> @llvm.vp.smin.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %t8 = call <2 x i16> @llvm.vp.smin.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %t10 = call <4 x i16> @llvm.vp.smin.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %t12 = call <8 x i16> @llvm.vp.smin.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %t14 = call <16 x i16> @llvm.vp.smin.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %t16 = call <2 x i32> @llvm.vp.smin.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %t18 = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %t20 = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %t22 = call <16 x i32> @llvm.vp.smin.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %t24 = call <2 x i64> @llvm.vp.smin.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t26 = call <4 x i64> @llvm.vp.smin.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %t28 = call <8 x i64> @llvm.vp.smin.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %t30 = call <16 x i64> @llvm.vp.smin.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t32 = call <vscale x 2 x i8> @llvm.vp.smin.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t34 = call <vscale x 4 x i8> @llvm.vp.smin.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t36 = call <vscale x 8 x i8> @llvm.vp.smin.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t38 = call <vscale x 16 x i8> @llvm.vp.smin.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t40 = call <vscale x 2 x i16> @llvm.vp.smin.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t42 = call <vscale x 4 x i16> @llvm.vp.smin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t44 = call <vscale x 8 x i16> @llvm.vp.smin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t46 = call <vscale x 16 x i16> @llvm.vp.smin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t48 = call <vscale x 2 x i32> @llvm.vp.smin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t50 = call <vscale x 4 x i32> @llvm.vp.smin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t52 = call <vscale x 8 x i32> @llvm.vp.smin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t54 = call <vscale x 16 x i32> @llvm.vp.smin.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t56 = call <vscale x 2 x i64> @llvm.vp.smin.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t58 = call <vscale x 4 x i64> @llvm.vp.smin.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t60 = call <vscale x 8 x i64> @llvm.vp.smin.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t62 = call <vscale x 16 x i64> @llvm.vp.smin.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.smin.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.smin.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.smin.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.smin.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i16> @llvm.vp.smin.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t10 = call <4 x i16> @llvm.vp.smin.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <8 x i16> @llvm.vp.smin.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t14 = call <16 x i16> @llvm.vp.smin.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <2 x i32> @llvm.vp.smin.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t20 = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t22 = call <16 x i32> @llvm.vp.smin.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <2 x i64> @llvm.vp.smin.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t26 = call <4 x i64> @llvm.vp.smin.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t28 = call <8 x i64> @llvm.vp.smin.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t30 = call <16 x i64> @llvm.vp.smin.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t32 = call <vscale x 2 x i8> @llvm.vp.smin.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t34 = call <vscale x 4 x i8> @llvm.vp.smin.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t36 = call <vscale x 8 x i8> @llvm.vp.smin.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t38 = call <vscale x 16 x i8> @llvm.vp.smin.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t40 = call <vscale x 2 x i16> @llvm.vp.smin.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t42 = call <vscale x 4 x i16> @llvm.vp.smin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t44 = call <vscale x 8 x i16> @llvm.vp.smin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t46 = call <vscale x 16 x i16> @llvm.vp.smin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t48 = call <vscale x 2 x i32> @llvm.vp.smin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t50 = call <vscale x 4 x i32> @llvm.vp.smin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t52 = call <vscale x 8 x i32> @llvm.vp.smin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t54 = call <vscale x 16 x i32> @llvm.vp.smin.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t56 = call <vscale x 2 x i64> @llvm.vp.smin.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t58 = call <vscale x 4 x i64> @llvm.vp.smin.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t60 = call <vscale x 8 x i64> @llvm.vp.smin.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t62 = call <vscale x 16 x i64> @llvm.vp.smin.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t0 = call <2 x i8> @llvm.vp.smin.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef) @@ -960,38 +960,38 @@ define void @umax() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPEBASED-LABEL: 'umax' -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %t0 = call <2 x i8> @llvm.vp.umax.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %t2 = call <4 x i8> @llvm.vp.umax.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %t4 = call <8 x i8> @llvm.vp.umax.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 157 for instruction: %t6 = call <16 x i8> @llvm.vp.umax.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %t8 = call <2 x i16> @llvm.vp.umax.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %t10 = call <4 x i16> @llvm.vp.umax.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %t12 = call <8 x i16> @llvm.vp.umax.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %t14 = call <16 x i16> @llvm.vp.umax.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %t16 = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %t18 = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %t20 = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %t22 = call <16 x i32> @llvm.vp.umax.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %t24 = call <2 x i64> @llvm.vp.umax.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t26 = call <4 x i64> @llvm.vp.umax.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %t28 = call <8 x i64> @llvm.vp.umax.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %t30 = call <16 x i64> @llvm.vp.umax.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t32 = call <vscale x 2 x i8> @llvm.vp.umax.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t34 = call <vscale x 4 x i8> @llvm.vp.umax.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t36 = call <vscale x 8 x i8> @llvm.vp.umax.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t38 = call <vscale x 16 x i8> @llvm.vp.umax.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t40 = call <vscale x 2 x i16> @llvm.vp.umax.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t42 = call <vscale x 4 x i16> @llvm.vp.umax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t44 = call <vscale x 8 x i16> @llvm.vp.umax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t46 = call <vscale x 16 x i16> @llvm.vp.umax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t48 = call <vscale x 2 x i32> @llvm.vp.umax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t50 = call <vscale x 4 x i32> @llvm.vp.umax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t52 = call <vscale x 8 x i32> @llvm.vp.umax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t54 = call <vscale x 16 x i32> @llvm.vp.umax.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t56 = call <vscale x 2 x i64> @llvm.vp.umax.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t58 = call <vscale x 4 x i64> @llvm.vp.umax.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t60 = call <vscale x 8 x i64> @llvm.vp.umax.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t62 = call <vscale x 16 x i64> @llvm.vp.umax.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.umax.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.umax.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.umax.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.umax.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i16> @llvm.vp.umax.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t10 = call <4 x i16> @llvm.vp.umax.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <8 x i16> @llvm.vp.umax.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t14 = call <16 x i16> @llvm.vp.umax.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t20 = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t22 = call <16 x i32> @llvm.vp.umax.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <2 x i64> @llvm.vp.umax.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t26 = call <4 x i64> @llvm.vp.umax.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t28 = call <8 x i64> @llvm.vp.umax.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t30 = call <16 x i64> @llvm.vp.umax.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t32 = call <vscale x 2 x i8> @llvm.vp.umax.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t34 = call <vscale x 4 x i8> @llvm.vp.umax.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t36 = call <vscale x 8 x i8> @llvm.vp.umax.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t38 = call <vscale x 16 x i8> @llvm.vp.umax.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t40 = call <vscale x 2 x i16> @llvm.vp.umax.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t42 = call <vscale x 4 x i16> @llvm.vp.umax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t44 = call <vscale x 8 x i16> @llvm.vp.umax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t46 = call <vscale x 16 x i16> @llvm.vp.umax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t48 = call <vscale x 2 x i32> @llvm.vp.umax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t50 = call <vscale x 4 x i32> @llvm.vp.umax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t52 = call <vscale x 8 x i32> @llvm.vp.umax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t54 = call <vscale x 16 x i32> @llvm.vp.umax.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t56 = call <vscale x 2 x i64> @llvm.vp.umax.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t58 = call <vscale x 4 x i64> @llvm.vp.umax.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t60 = call <vscale x 8 x i64> @llvm.vp.umax.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t62 = call <vscale x 16 x i64> @llvm.vp.umax.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t0 = call <2 x i8> @llvm.vp.umax.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef) @@ -1066,38 +1066,38 @@ define void @umin() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPEBASED-LABEL: 'umin' -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %t0 = call <2 x i8> @llvm.vp.umin.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %t2 = call <4 x i8> @llvm.vp.umin.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %t4 = call <8 x i8> @llvm.vp.umin.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 157 for instruction: %t6 = call <16 x i8> @llvm.vp.umin.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %t8 = call <2 x i16> @llvm.vp.umin.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %t10 = call <4 x i16> @llvm.vp.umin.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %t12 = call <8 x i16> @llvm.vp.umin.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %t14 = call <16 x i16> @llvm.vp.umin.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %t16 = call <2 x i32> @llvm.vp.umin.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %t18 = call <4 x i32> @llvm.vp.umin.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %t20 = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %t22 = call <16 x i32> @llvm.vp.umin.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %t24 = call <2 x i64> @llvm.vp.umin.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t26 = call <4 x i64> @llvm.vp.umin.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %t28 = call <8 x i64> @llvm.vp.umin.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %t30 = call <16 x i64> @llvm.vp.umin.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t32 = call <vscale x 2 x i8> @llvm.vp.umin.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t34 = call <vscale x 4 x i8> @llvm.vp.umin.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t36 = call <vscale x 8 x i8> @llvm.vp.umin.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t38 = call <vscale x 16 x i8> @llvm.vp.umin.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t40 = call <vscale x 2 x i16> @llvm.vp.umin.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t42 = call <vscale x 4 x i16> @llvm.vp.umin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t44 = call <vscale x 8 x i16> @llvm.vp.umin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t46 = call <vscale x 16 x i16> @llvm.vp.umin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t48 = call <vscale x 2 x i32> @llvm.vp.umin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t50 = call <vscale x 4 x i32> @llvm.vp.umin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t52 = call <vscale x 8 x i32> @llvm.vp.umin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t54 = call <vscale x 16 x i32> @llvm.vp.umin.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t56 = call <vscale x 2 x i64> @llvm.vp.umin.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t58 = call <vscale x 4 x i64> @llvm.vp.umin.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t60 = call <vscale x 8 x i64> @llvm.vp.umin.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %t62 = call <vscale x 16 x i64> @llvm.vp.umin.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.umin.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.umin.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.umin.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.umin.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i16> @llvm.vp.umin.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t10 = call <4 x i16> @llvm.vp.umin.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <8 x i16> @llvm.vp.umin.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t14 = call <16 x i16> @llvm.vp.umin.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <2 x i32> @llvm.vp.umin.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <4 x i32> @llvm.vp.umin.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t20 = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t22 = call <16 x i32> @llvm.vp.umin.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <2 x i64> @llvm.vp.umin.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t26 = call <4 x i64> @llvm.vp.umin.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t28 = call <8 x i64> @llvm.vp.umin.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t30 = call <16 x i64> @llvm.vp.umin.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t32 = call <vscale x 2 x i8> @llvm.vp.umin.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t34 = call <vscale x 4 x i8> @llvm.vp.umin.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t36 = call <vscale x 8 x i8> @llvm.vp.umin.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t38 = call <vscale x 16 x i8> @llvm.vp.umin.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t40 = call <vscale x 2 x i16> @llvm.vp.umin.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t42 = call <vscale x 4 x i16> @llvm.vp.umin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t44 = call <vscale x 8 x i16> @llvm.vp.umin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t46 = call <vscale x 16 x i16> @llvm.vp.umin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t48 = call <vscale x 2 x i32> @llvm.vp.umin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t50 = call <vscale x 4 x i32> @llvm.vp.umin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t52 = call <vscale x 8 x i32> @llvm.vp.umin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t54 = call <vscale x 16 x i32> @llvm.vp.umin.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t56 = call <vscale x 2 x i64> @llvm.vp.umin.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t58 = call <vscale x 4 x i64> @llvm.vp.umin.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t60 = call <vscale x 8 x i64> @llvm.vp.umin.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t62 = call <vscale x 16 x i64> @llvm.vp.umin.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t0 = call <2 x i8> @llvm.vp.umin.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef) @@ -1172,37 +1172,37 @@ define void @abs() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPEBASED-LABEL: 'abs' -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %1 = call <2 x i8> @llvm.vp.abs.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call <2 x i8> @llvm.vp.abs.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %3 = call <4 x i8> @llvm.vp.abs.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x i8> @llvm.vp.abs.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %5 = call <8 x i8> @llvm.vp.abs.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <8 x i8> @llvm.vp.abs.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %7 = call <16 x i8> @llvm.vp.abs.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call <16 x i8> @llvm.vp.abs.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %9 = call <2 x i64> @llvm.vp.abs.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <2 x i64> @llvm.vp.abs.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %11 = call <4 x i64> @llvm.vp.abs.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call <4 x i64> @llvm.vp.abs.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %13 = call <8 x i64> @llvm.vp.abs.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call <8 x i64> @llvm.vp.abs.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %15 = call <16 x i64> @llvm.vp.abs.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i64> @llvm.vp.abs.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = call <16 x i64> @llvm.abs.v16i64(<16 x i64> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.abs.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.abs.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 2 x i8> @llvm.abs.nxv2i8(<vscale x 2 x i8> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %19 = call <vscale x 4 x i8> @llvm.vp.abs.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call <vscale x 4 x i8> @llvm.vp.abs.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = call <vscale x 4 x i8> @llvm.abs.nxv4i8(<vscale x 4 x i8> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %21 = call <vscale x 8 x i8> @llvm.vp.abs.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = call <vscale x 8 x i8> @llvm.vp.abs.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call <vscale x 8 x i8> @llvm.abs.nxv8i8(<vscale x 8 x i8> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %23 = call <vscale x 16 x i8> @llvm.vp.abs.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = call <vscale x 16 x i8> @llvm.vp.abs.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %25 = call <vscale x 2 x i64> @llvm.vp.abs.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = call <vscale x 2 x i64> @llvm.vp.abs.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %27 = call <vscale x 4 x i64> @llvm.vp.abs.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = call <vscale x 4 x i64> @llvm.vp.abs.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %29 = call <vscale x 8 x i64> @llvm.vp.abs.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = call <vscale x 8 x i64> @llvm.vp.abs.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = call <vscale x 8 x i64> @llvm.abs.nxv8i64(<vscale x 8 x i64> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %31 = call <vscale x 16 x i64> @llvm.vp.abs.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = call <vscale x 16 x i64> @llvm.vp.abs.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = call <vscale x 16 x i64> @llvm.abs.nxv16i64(<vscale x 16 x i64> undef, i1 false) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; diff --git a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll index 28d6b59e2b7f0..058aaea46fd09 100644 --- a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll +++ b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll @@ -973,23 +973,14 @@ define <vscale x 1 x i64> @xor_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <v } define <vscale x 1 x i64> @smin_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) { -; VEC-COMBINE-LABEL: @smin_nxv1i64_allonesmask( -; VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 -; VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-NEXT: [[TMP1:%.*]] = call i64 @llvm.smin.i64(i64 [[Y:%.*]], i64 42) -; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0 -; VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) -; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP3]] -; -; NO-VEC-COMBINE-LABEL: @smin_nxv1i64_allonesmask( -; NO-VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 -; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 -; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.smin.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) -; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) -; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]] +; ALL-LABEL: @smin_nxv1i64_allonesmask( +; ALL-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 +; ALL-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer +; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 +; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.smin.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) +; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0 %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer @@ -1016,23 +1007,14 @@ define <vscale x 1 x i64> @smin_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, < } define <vscale x 1 x i64> @smax_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) { -; VEC-COMBINE-LABEL: @smax_nxv1i64_allonesmask( -; VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 -; VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-NEXT: [[TMP1:%.*]] = call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 42) -; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0 -; VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) -; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP3]] -; -; NO-VEC-COMBINE-LABEL: @smax_nxv1i64_allonesmask( -; NO-VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 -; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 -; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.smax.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) -; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) -; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]] +; ALL-LABEL: @smax_nxv1i64_allonesmask( +; ALL-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 +; ALL-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer +; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 +; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.smax.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) +; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0 %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer @@ -1059,23 +1041,14 @@ define <vscale x 1 x i64> @smax_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, < } define <vscale x 1 x i64> @umin_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) { -; VEC-COMBINE-LABEL: @umin_nxv1i64_allonesmask( -; VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 -; VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-NEXT: [[TMP1:%.*]] = call i64 @llvm.umin.i64(i64 [[Y:%.*]], i64 42) -; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0 -; VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) -; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP3]] -; -; NO-VEC-COMBINE-LABEL: @umin_nxv1i64_allonesmask( -; NO-VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 -; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 -; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.umin.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) -; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) -; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]] +; ALL-LABEL: @umin_nxv1i64_allonesmask( +; ALL-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 +; ALL-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer +; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 +; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.umin.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) +; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0 %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer @@ -1087,23 +1060,14 @@ define <vscale x 1 x i64> @umin_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 % } define <vscale x 1 x i64> @umax_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) { -; VEC-COMBINE-LABEL: @umax_nxv1i64_allonesmask( -; VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 -; VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 [[Y:%.*]], i64 42) -; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0 -; VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) -; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP3]] -; -; NO-VEC-COMBINE-LABEL: @umax_nxv1i64_allonesmask( -; NO-VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 -; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 -; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.umax.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) -; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) -; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]] +; ALL-LABEL: @umax_nxv1i64_allonesmask( +; ALL-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 +; ALL-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer +; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 +; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.umax.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) +; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0 %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer