diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 9048481b49189..9cd1031143151 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -126,12 +126,13 @@ class IntrinsicCostAttributes { // If ScalarizationCost is UINT_MAX, the cost of scalarizing the // arguments and the return value will be computed based on types. InstructionCost ScalarizationCost = InstructionCost::getInvalid(); + TargetLibraryInfo const *LibInfo = nullptr; public: IntrinsicCostAttributes( Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarCost = InstructionCost::getInvalid(), - bool TypeBasedOnly = false); + bool TypeBasedOnly = false, TargetLibraryInfo const *LibInfo = nullptr); IntrinsicCostAttributes( Intrinsic::ID Id, Type *RTy, ArrayRef Tys, @@ -145,7 +146,8 @@ class IntrinsicCostAttributes { Intrinsic::ID Id, Type *RTy, ArrayRef Args, ArrayRef Tys, FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr, - InstructionCost ScalarCost = InstructionCost::getInvalid()); + InstructionCost ScalarCost = InstructionCost::getInvalid(), + TargetLibraryInfo const *LibInfo = nullptr); Intrinsic::ID getID() const { return IID; } const IntrinsicInst *getInst() const { return II; } @@ -154,6 +156,7 @@ class IntrinsicCostAttributes { InstructionCost getScalarizationCost() const { return ScalarizationCost; } const SmallVectorImpl &getArgs() const { return Arguments; } const SmallVectorImpl &getArgTypes() const { return ParamTys; } + const TargetLibraryInfo *getLibInfo() const { return LibInfo; } bool isTypeBasedOnly() const { return Arguments.empty(); diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 032c7d7b5159e..d70a1623b87f4 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -22,6 +22,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" #include "llvm/Analysis/ValueTracking.h" @@ -285,6 +286,64 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return false; } + /// Several intrinsics that return structs (including llvm.sincos[pi] and + /// llvm.modf) can be lowered to a vector library call (for certain VFs). The + /// vector library functions correspond to the scalar calls (e.g. sincos or + /// modf), which unlike the intrinsic return values via output pointers. This + /// helper checks if a vector call exists for the given intrinsic, and returns + /// the cost, which includes the cost of the mask (if required), and the loads + /// for values returned via output pointers. \p LC is the scalar libcall and + /// \p CallRetElementIndex (optional) is the struct element which is mapped to + /// the call return value. If std::nullopt is returned, then no vector library + /// call is available, so the intrinsic should be assigned the default cost + /// (e.g. scalarization). + std::optional getMultipleResultIntrinsicVectorLibCallCost( + const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind, + RTLIB::Libcall LC, std::optional CallRetElementIndex = {}) { + Type *RetTy = ICA.getReturnType(); + // Vector variants of the intrinsic can be mapped to a vector library call. + auto const *LibInfo = ICA.getLibInfo(); + if (!LibInfo || !isa(RetTy) || + !isVectorizedStructTy(cast(RetTy))) + return std::nullopt; + + // Find associated libcall. + const char *LCName = getTLI()->getLibcallName(LC); + if (!LCName) + return std::nullopt; + + // Search for a corresponding vector variant. + LLVMContext &Ctx = RetTy->getContext(); + ElementCount VF = getVectorizedTypeVF(RetTy); + VecDesc const *VD = nullptr; + for (bool Masked : {false, true}) { + if ((VD = LibInfo->getVectorMappingInfo(LCName, VF, Masked))) + break; + } + if (!VD) + return std::nullopt; + + // Cost the call + mask. + auto Cost = + thisT()->getCallInstrCost(nullptr, RetTy, ICA.getArgTypes(), CostKind); + if (VD->isMasked()) + Cost += thisT()->getShuffleCost( + TargetTransformInfo::SK_Broadcast, + VectorType::get(IntegerType::getInt1Ty(Ctx), VF), {}, CostKind, 0, + nullptr, {}); + + // Lowering to a library call (with output pointers) may require us to emit + // reloads for the results. + for (auto [Idx, VectorTy] : enumerate(getContainedTypes(RetTy))) { + if (Idx == CallRetElementIndex) + continue; + Cost += thisT()->getMemoryOpCost( + Instruction::Load, VectorTy, + thisT()->getDataLayout().getABITypeAlign(VectorTy), 0, CostKind); + } + return Cost; + } + protected: explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL) : BaseT(DL) {} @@ -1726,9 +1785,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Type *RetTy = ICA.getReturnType(); - ElementCount RetVF = - (RetTy->isVectorTy() ? cast(RetTy)->getElementCount() - : ElementCount::getFixed(1)); + ElementCount RetVF = isVectorizedTy(RetTy) ? getVectorizedTypeVF(RetTy) + : ElementCount::getFixed(1); + const IntrinsicInst *I = ICA.getInst(); const SmallVectorImpl &Args = ICA.getArgs(); FastMathFlags FMF = ICA.getFlags(); @@ -1997,6 +2056,16 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { } case Intrinsic::experimental_vector_match: return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind); + case Intrinsic::sincos: { + Type *Ty = getContainedTypes(RetTy).front(); + EVT VT = getTLI()->getValueType(DL, Ty); + RTLIB::Libcall LC = RTLIB::getSINCOS(VT.getScalarType()); + if (auto Cost = + getMultipleResultIntrinsicVectorLibCallCost(ICA, CostKind, LC)) + return *Cost; + // Otherwise, fallback to default scalarization cost. + break; + } } // Assume that we need to scalarize this intrinsic.) @@ -2005,10 +2074,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { InstructionCost ScalarizationCost = InstructionCost::getInvalid(); if (RetVF.isVector() && !RetVF.isScalable()) { ScalarizationCost = 0; - if (!RetTy->isVoidTy()) - ScalarizationCost += getScalarizationOverhead( - cast(RetTy), - /*Insert*/ true, /*Extract*/ false, CostKind); + if (!RetTy->isVoidTy()) { + for (Type *VectorTy : getContainedTypes(RetTy)) { + ScalarizationCost += getScalarizationOverhead( + cast(VectorTy), + /*Insert=*/true, /*Extract=*/false, CostKind); + } + } ScalarizationCost += getOperandsScalarizationOverhead(Args, ICA.getArgTypes(), CostKind); } @@ -2689,27 +2761,32 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // Else, assume that we need to scalarize this intrinsic. For math builtins // this will emit a costly libcall, adding call overhead and spills. Make it // very expensive. - if (auto *RetVTy = dyn_cast(RetTy)) { + if (isVectorizedTy(RetTy)) { + ArrayRef RetVTys = getContainedTypes(RetTy); + // Scalable vectors cannot be scalarized, so return Invalid. - if (isa(RetTy) || any_of(Tys, [](const Type *Ty) { - return isa(Ty); - })) + if (any_of(concat(RetVTys, Tys), + [](Type *Ty) { return isa(Ty); })) return InstructionCost::getInvalid(); - InstructionCost ScalarizationCost = - SkipScalarizationCost - ? ScalarizationCostPassed - : getScalarizationOverhead(RetVTy, /*Insert*/ true, - /*Extract*/ false, CostKind); + InstructionCost ScalarizationCost = ScalarizationCostPassed; + if (!SkipScalarizationCost) { + ScalarizationCost = 0; + for (Type *RetVTy : RetVTys) { + ScalarizationCost += getScalarizationOverhead( + cast(RetVTy), /*Insert=*/true, + /*Extract=*/false, CostKind); + } + } - unsigned ScalarCalls = cast(RetVTy)->getNumElements(); + unsigned ScalarCalls = getVectorizedTypeVF(RetTy).getFixedValue(); SmallVector ScalarTys; for (Type *Ty : Tys) { if (Ty->isVectorTy()) Ty = Ty->getScalarType(); ScalarTys.push_back(Ty); } - IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF); + IntrinsicCostAttributes Attrs(IID, toScalarizedTy(RetTy), ScalarTys, FMF); InstructionCost ScalarCost = thisT()->getIntrinsicInstrCost(Attrs, CostKind); for (Type *Ty : Tys) { diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp index ee6622516a5ac..68cb536bf7891 100644 --- a/llvm/lib/Analysis/CostModel.cpp +++ b/llvm/lib/Analysis/CostModel.cpp @@ -17,6 +17,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CostModel.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" @@ -24,6 +25,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; static cl::opt CostKind( @@ -42,12 +44,18 @@ static cl::opt TypeBasedIntrinsicCost("type-based-intrinsic-cost", cl::desc("Calculate intrinsics cost based only on argument types"), cl::init(false)); +static cl::opt PreferIntrinsicCost( + "prefer-intrinsic-cost", + cl::desc("Prefer using getIntrinsicInstrCost over getInstructionCost"), + cl::init(false)); + #define CM_NAME "cost-model" #define DEBUG_TYPE CM_NAME PreservedAnalyses CostModelPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { auto &TTI = AM.getResult(F); + auto &TLI = AM.getResult(F); OS << "Printing analysis 'Cost Model Analysis' for function '" << F.getName() << "':\n"; for (BasicBlock &B : F) { for (Instruction &Inst : B) { @@ -55,12 +63,12 @@ PreservedAnalyses CostModelPrinterPass::run(Function &F, // which cost kind to print. InstructionCost Cost; auto *II = dyn_cast(&Inst); - if (II && TypeBasedIntrinsicCost) { - IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II, - InstructionCost::getInvalid(), true); + if (II && (PreferIntrinsicCost || TypeBasedIntrinsicCost)) { + IntrinsicCostAttributes ICA( + II->getIntrinsicID(), *II, InstructionCost::getInvalid(), + /*TypeBasedOnly=*/TypeBasedIntrinsicCost, &TLI); Cost = TTI.getIntrinsicInstrCost(ICA, CostKind); - } - else { + } else { Cost = TTI.getInstructionCost(&Inst, CostKind); } diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 1ca9a16b18112..7edb4e4f07e98 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -69,9 +69,9 @@ bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) { IntrinsicCostAttributes::IntrinsicCostAttributes( Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarizationCost, - bool TypeBasedOnly) + bool TypeBasedOnly, const TargetLibraryInfo *LibInfo) : II(dyn_cast(&CI)), RetTy(CI.getType()), IID(Id), - ScalarizationCost(ScalarizationCost) { + ScalarizationCost(ScalarizationCost), LibInfo(LibInfo) { if (const auto *FPMO = dyn_cast(&CI)) FMF = FPMO->getFastMathFlags(); @@ -101,13 +101,12 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty, ParamTys.push_back(Argument->getType()); } -IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, - ArrayRef Args, - ArrayRef Tys, - FastMathFlags Flags, - const IntrinsicInst *I, - InstructionCost ScalarCost) - : II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) { +IntrinsicCostAttributes::IntrinsicCostAttributes( + Intrinsic::ID Id, Type *RTy, ArrayRef Args, + ArrayRef Tys, FastMathFlags Flags, const IntrinsicInst *I, + InstructionCost ScalarCost, TargetLibraryInfo const *LibInfo) + : II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost), + LibInfo(LibInfo) { ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end()); Arguments.insert(Arguments.begin(), Args.begin(), Args.end()); } diff --git a/llvm/test/Analysis/CostModel/AArch64/sincos.ll b/llvm/test/Analysis/CostModel/AArch64/sincos.ll new file mode 100644 index 0000000000000..e32c51667e0ad --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/sincos.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "sincos" +; RUN: opt < %s -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -passes="print" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s +; RUN: opt < %s -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=ArmPL -passes="print" -prefer-intrinsic-cost -cost-kind=throughput 2>&1 -disable-output | FileCheck %s -check-prefix=CHECK-VECLIB + +define void @sincos() { +; CHECK-LABEL: 'sincos' +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call { half, half } @llvm.sincos.f16(half poison) +; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f32 = call { float, float } @llvm.sincos.f32(float poison) +; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f64 = call { double, double } @llvm.sincos.f64(double poison) +; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 poison) +; +; CHECK: Cost Model: Found an estimated cost of 36 for instruction: %v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> poison) +; CHECK: Cost Model: Found an estimated cost of 52 for instruction: %v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> poison) +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> poison) +; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> poison) +; CHECK: Cost Model: Found an estimated cost of 104 for instruction: %v8f32 = call { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float> poison) +; +; CHECK: Cost Model: Invalid cost for instruction: %nxv8f16 = call { , } @llvm.sincos.nxv8f16( poison) +; CHECK: Cost Model: Invalid cost for instruction: %nxv4f32 = call { , } @llvm.sincos.nxv4f32( poison) +; CHECK: Cost Model: Invalid cost for instruction: %nxv2f64 = call { , } @llvm.sincos.nxv2f64( poison) +; CHECK: Cost Model: Invalid cost for instruction: %nxv1f128 = call { , } @llvm.sincos.nxv1f128( poison) +; CHECK: Cost Model: Invalid cost for instruction: %nxv8f32 = call { , } @llvm.sincos.nxv8f32( poison) +; +; CHECK-VECLIB-LABEL: 'sincos' +; CHECK-VECLIB: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call { half, half } @llvm.sincos.f16(half poison) +; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f32 = call { float, float } @llvm.sincos.f32(float poison) +; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f64 = call { double, double } @llvm.sincos.f64(double poison) +; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 poison) +; +; CHECK-VECLIB: Cost Model: Found an estimated cost of 36 for instruction: %v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> poison) +; CHECK-VECLIB: Cost Model: Found an estimated cost of 12 for instruction: %v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> poison) +; CHECK-VECLIB: Cost Model: Found an estimated cost of 12 for instruction: %v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> poison) +; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> poison) +; CHECK-VECLIB: Cost Model: Found an estimated cost of 104 for instruction: %v8f32 = call { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float> poison) +; +; CHECK-VECLIB: Cost Model: Invalid cost for instruction: %nxv8f16 = call { , } @llvm.sincos.nxv8f16( poison) +; CHECK-VECLIB: Cost Model: Found an estimated cost of 13 for instruction: %nxv4f32 = call { , } @llvm.sincos.nxv4f32( poison) +; CHECK-VECLIB: Cost Model: Found an estimated cost of 13 for instruction: %nxv2f64 = call { , } @llvm.sincos.nxv2f64( poison) +; CHECK-VECLIB: Cost Model: Invalid cost for instruction: %nxv1f128 = call { , } @llvm.sincos.nxv1f128( poison) +; CHECK-VECLIB: Cost Model: Invalid cost for instruction: %nxv8f32 = call { , } @llvm.sincos.nxv8f32( poison) +; + %f16 = call { half, half } @llvm.sincos.f16(half poison) + %f32 = call { float, float } @llvm.sincos.f32(float poison) + %f64 = call { double, double } @llvm.sincos.f64(double poison) + %f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 poison) + + %v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> poison) + %v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> poison) + %v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> poison) + %v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> poison) + %v8f32 = call { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float> poison) + + %nxv8f16 = call { , } @llvm.sincos.v8f16( poison) + %nxv4f32 = call { , } @llvm.sincos.v4f32( poison) + %nxv2f64 = call { , } @llvm.sincos.v2f64( poison) + %nxv1f128 = call { , } @llvm.sincos.v1f128( poison) + %nxv8f32 = call { , } @llvm.sincos.v8f32( poison) + + ret void +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/frexp.ll b/llvm/test/Analysis/CostModel/AMDGPU/frexp.ll index 22134d042fabb..f5f4445b34b02 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/frexp.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/frexp.ll @@ -68,46 +68,46 @@ define void @frexp_f16_i32() { define void @frexp_f16_i16() { ; GFX7-LABEL: 'frexp_f16_i16' ; GFX7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call { half, i16 } @llvm.frexp.f16.i16(half undef) -; GFX7-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = call { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half> undef) -; GFX7-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f16 = call { <3 x half>, <3 x i16> } @llvm.frexp.v3f16.v3i16(<3 x half> undef) -; GFX7-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = call { <4 x half>, <4 x i16> } @llvm.frexp.v4f16.v4i16(<4 x half> undef) -; GFX7-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f16 = call { <5 x half>, <5 x i16> } @llvm.frexp.v5f16.v5i16(<5 x half> undef) -; GFX7-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f16 = call { <8 x half>, <8 x i16> } @llvm.frexp.v8f16.v8i16(<8 x half> undef) -; GFX7-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f16 = call { <16 x half>, <16 x i16> } @llvm.frexp.v16f16.v16i16(<16 x half> undef) -; GFX7-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = call { <17 x half>, <17 x i16> } @llvm.frexp.v17f16.v17i16(<17 x half> undef) +; GFX7-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f16 = call { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half> undef) +; GFX7-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v3f16 = call { <3 x half>, <3 x i16> } @llvm.frexp.v3f16.v3i16(<3 x half> undef) +; GFX7-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f16 = call { <4 x half>, <4 x i16> } @llvm.frexp.v4f16.v4i16(<4 x half> undef) +; GFX7-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v5f16 = call { <5 x half>, <5 x i16> } @llvm.frexp.v5f16.v5i16(<5 x half> undef) +; GFX7-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f16 = call { <8 x half>, <8 x i16> } @llvm.frexp.v8f16.v8i16(<8 x half> undef) +; GFX7-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16f16 = call { <16 x half>, <16 x i16> } @llvm.frexp.v16f16.v16i16(<16 x half> undef) +; GFX7-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %v17f16 = call { <17 x half>, <17 x i16> } @llvm.frexp.v17f16.v17i16(<17 x half> undef) ; GFX7-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX8PLUS-LABEL: 'frexp_f16_i16' ; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call { half, i16 } @llvm.frexp.f16.i16(half undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half> undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call { <3 x half>, <3 x i16> } @llvm.frexp.v3f16.v3i16(<3 x half> undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call { <4 x half>, <4 x i16> } @llvm.frexp.v4f16.v4i16(<4 x half> undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = call { <5 x half>, <5 x i16> } @llvm.frexp.v5f16.v5i16(<5 x half> undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16 = call { <8 x half>, <8 x i16> } @llvm.frexp.v8f16.v8i16(<8 x half> undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16f16 = call { <16 x half>, <16 x i16> } @llvm.frexp.v16f16.v16i16(<16 x half> undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v17f16 = call { <17 x half>, <17 x i16> } @llvm.frexp.v17f16.v17i16(<17 x half> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = call { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v3f16 = call { <3 x half>, <3 x i16> } @llvm.frexp.v3f16.v3i16(<3 x half> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f16 = call { <4 x half>, <4 x i16> } @llvm.frexp.v4f16.v4i16(<4 x half> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v5f16 = call { <5 x half>, <5 x i16> } @llvm.frexp.v5f16.v5i16(<5 x half> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v8f16 = call { <8 x half>, <8 x i16> } @llvm.frexp.v8f16.v8i16(<8 x half> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %v16f16 = call { <16 x half>, <16 x i16> } @llvm.frexp.v16f16.v16i16(<16 x half> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %v17f16 = call { <17 x half>, <17 x i16> } @llvm.frexp.v17f16.v17i16(<17 x half> undef) ; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX7-SIZE-LABEL: 'frexp_f16_i16' ; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call { half, i16 } @llvm.frexp.f16.i16(half undef) -; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = call { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half> undef) -; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f16 = call { <3 x half>, <3 x i16> } @llvm.frexp.v3f16.v3i16(<3 x half> undef) -; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = call { <4 x half>, <4 x i16> } @llvm.frexp.v4f16.v4i16(<4 x half> undef) -; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f16 = call { <5 x half>, <5 x i16> } @llvm.frexp.v5f16.v5i16(<5 x half> undef) -; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f16 = call { <8 x half>, <8 x i16> } @llvm.frexp.v8f16.v8i16(<8 x half> undef) -; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f16 = call { <16 x half>, <16 x i16> } @llvm.frexp.v16f16.v16i16(<16 x half> undef) -; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = call { <17 x half>, <17 x i16> } @llvm.frexp.v17f16.v17i16(<17 x half> undef) +; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f16 = call { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half> undef) +; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v3f16 = call { <3 x half>, <3 x i16> } @llvm.frexp.v3f16.v3i16(<3 x half> undef) +; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f16 = call { <4 x half>, <4 x i16> } @llvm.frexp.v4f16.v4i16(<4 x half> undef) +; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v5f16 = call { <5 x half>, <5 x i16> } @llvm.frexp.v5f16.v5i16(<5 x half> undef) +; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f16 = call { <8 x half>, <8 x i16> } @llvm.frexp.v8f16.v8i16(<8 x half> undef) +; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16f16 = call { <16 x half>, <16 x i16> } @llvm.frexp.v16f16.v16i16(<16 x half> undef) +; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %v17f16 = call { <17 x half>, <17 x i16> } @llvm.frexp.v17f16.v17i16(<17 x half> undef) ; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GFX8PLUS-SIZE-LABEL: 'frexp_f16_i16' ; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call { half, i16 } @llvm.frexp.f16.i16(half undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half> undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call { <3 x half>, <3 x i16> } @llvm.frexp.v3f16.v3i16(<3 x half> undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call { <4 x half>, <4 x i16> } @llvm.frexp.v4f16.v4i16(<4 x half> undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = call { <5 x half>, <5 x i16> } @llvm.frexp.v5f16.v5i16(<5 x half> undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16 = call { <8 x half>, <8 x i16> } @llvm.frexp.v8f16.v8i16(<8 x half> undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16f16 = call { <16 x half>, <16 x i16> } @llvm.frexp.v16f16.v16i16(<16 x half> undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v17f16 = call { <17 x half>, <17 x i16> } @llvm.frexp.v17f16.v17i16(<17 x half> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = call { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v3f16 = call { <3 x half>, <3 x i16> } @llvm.frexp.v3f16.v3i16(<3 x half> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f16 = call { <4 x half>, <4 x i16> } @llvm.frexp.v4f16.v4i16(<4 x half> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v5f16 = call { <5 x half>, <5 x i16> } @llvm.frexp.v5f16.v5i16(<5 x half> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v8f16 = call { <8 x half>, <8 x i16> } @llvm.frexp.v8f16.v8i16(<8 x half> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %v16f16 = call { <16 x half>, <16 x i16> } @llvm.frexp.v16f16.v16i16(<16 x half> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %v17f16 = call { <17 x half>, <17 x i16> } @llvm.frexp.v17f16.v17i16(<17 x half> undef) ; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = call { half, i16 } @llvm.frexp.f16.i16(half undef)