From 0b99b56779e8ee8f95578927829ee0fc8680ab79 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 4 Aug 2025 14:26:07 +0100 Subject: [PATCH 1/2] [AArch64] Generalize costing for FP16 instructions This extracts the code for modelling a fp16 operation as fptrunc(fpop(fpext,fpext) into a new function named getFP16BF16PromoteCost so that it can be reused by the arithmetic instructions. The function takes a lambda to calculate the cost of the operation with the promoted type. --- .../AArch64/AArch64TargetTransformInfo.cpp | 71 ++++++--- .../AArch64/AArch64TargetTransformInfo.h | 8 + .../Analysis/CostModel/AArch64/arith-fp.ll | 147 ++++++++++-------- .../CostModel/AArch64/extract_float.ll | 15 +- .../Analysis/CostModel/AArch64/reduce-fadd.ll | 68 ++++---- .../Analysis/CostModel/AArch64/vec3-ops.ll | 4 +- 6 files changed, 185 insertions(+), 128 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 9f05add8bc1c1..35d05e4ebea80 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -3975,6 +3975,26 @@ InstructionCost AArch64TTIImpl::getScalarizationOverhead( return DemandedElts.popcount() * (Insert + Extract) * VecInstCost; } +std::optional AArch64TTIImpl::getFP16BF16PromoteCost( + Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, bool IncludeTrunc, + std::function InstCost) const { + if ((ST->hasFullFP16() || !Ty->getScalarType()->isHalfTy()) && + !Ty->getScalarType()->isBFloatTy()) + return std::nullopt; + + Type *PromotedTy = Ty->getWithNewType(Type::getFloatTy(Ty->getContext())); + InstructionCost Cost = getCastInstrCost(Instruction::FPExt, PromotedTy, Ty, + TTI::CastContextHint::None, CostKind); + if (!Op1Info.isConstant() && !Op2Info.isConstant()) + Cost *= 2; + Cost += InstCost(PromotedTy); + if (IncludeTrunc) + Cost += getCastInstrCost(Instruction::FPTrunc, Ty, PromotedTy, + TTI::CastContextHint::None, CostKind); + return Cost; +} + InstructionCost AArch64TTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, @@ -3997,6 +4017,18 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( std::pair LT = getTypeLegalizationCost(Ty); int ISD = TLI->InstructionOpcodeToISD(Opcode); + // Increase the cost for half and bfloat types if not architecturally + // supported. + if (ISD == ISD::FADD || ISD == ISD::FSUB || ISD == ISD::FMUL || + ISD == ISD::FDIV || ISD == ISD::FREM) + if (auto PromotedCost = getFP16BF16PromoteCost( + Ty, CostKind, Op1Info, Op2Info, /*IncludeTrunc=*/true, + [&](Type *PromotedTy) { + return getArithmeticInstrCost(Opcode, PromotedTy, CostKind, + Op1Info, Op2Info); + })) + return *PromotedCost; + switch (ISD) { default: return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, @@ -4265,11 +4297,6 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( [[fallthrough]]; case ISD::FADD: case ISD::FSUB: - // Increase the cost for half and bfloat types if not architecturally - // supported. - if ((Ty->getScalarType()->isHalfTy() && !ST->hasFullFP16()) || - (Ty->getScalarType()->isBFloatTy() && !ST->hasBF16())) - return 2 * LT.first; if (!Ty->getScalarType()->isFP128Ty()) return LT.first; [[fallthrough]]; @@ -4371,25 +4398,21 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost( } if (Opcode == Instruction::FCmp) { - // Without dedicated instructions we promote f16 + bf16 compares to f32. - if ((!ST->hasFullFP16() && ValTy->getScalarType()->isHalfTy()) || - ValTy->getScalarType()->isBFloatTy()) { - Type *PromotedTy = - ValTy->getWithNewType(Type::getFloatTy(ValTy->getContext())); - InstructionCost Cost = - getCastInstrCost(Instruction::FPExt, PromotedTy, ValTy, - TTI::CastContextHint::None, CostKind); - if (!Op1Info.isConstant() && !Op2Info.isConstant()) - Cost *= 2; - Cost += getCmpSelInstrCost(Opcode, PromotedTy, CondTy, VecPred, CostKind, - Op1Info, Op2Info); - if (ValTy->isVectorTy()) - Cost += getCastInstrCost( - Instruction::Trunc, VectorType::getInteger(cast(ValTy)), - VectorType::getInteger(cast(PromotedTy)), - TTI::CastContextHint::None, CostKind); - return Cost; - } + if (auto Cost = getFP16BF16PromoteCost( + ValTy, CostKind, Op1Info, Op2Info, /*IncludeTrunc=*/false, + [&](Type *PromotedTy) { + InstructionCost Cost = + getCmpSelInstrCost(Opcode, PromotedTy, CondTy, VecPred, + CostKind, Op1Info, Op2Info); + if (isa(PromotedTy)) + Cost += getCastInstrCost( + Instruction::Trunc, + VectorType::getInteger(cast(ValTy)), + VectorType::getInteger(cast(PromotedTy)), + TTI::CastContextHint::None, CostKind); + return Cost; + })) + return *Cost; auto LT = getTypeLegalizationCost(ValTy); // Model unknown fp compares as a libcall. diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 7f45177437237..fa9b25af44232 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -435,6 +435,14 @@ class AArch64TTIImpl final : public BasicTTIImplBase { bool preferPredicatedReductionSelect() const override { return ST->hasSVE(); } + /// FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the + /// architecture features are not present. + std::optional + getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, bool IncludeTrunc, + std::function InstCost) const; + InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional FMF, diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll index de1b39db1539c..0a154d09c36ba 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll @@ -34,10 +34,10 @@ define void @fadd() { define void @fadd_fp16() { ; CHECK-BASE-LABEL: 'fadd_fp16' -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fadd half undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fadd <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fadd <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fadd <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fadd half undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fadd <4 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fadd <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fadd <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fadd_fp16' @@ -84,10 +84,10 @@ define void @fsub() { define void @fsub_fp16() { ; CHECK-BASE-LABEL: 'fsub_fp16' -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fsub half undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fsub <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fsub <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fsub <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fsub half undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fsub <4 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fsub <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fsub <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fsub_fp16' @@ -134,9 +134,9 @@ define void @fneg_idiom() { define void @fneg_idiom_fp16() { ; CHECK-BASE-LABEL: 'fneg_idiom_fp16' -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fsub half 0xH8000, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fsub <4 x half> splat (half 0xH8000), undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fsub <8 x half> splat (half 0xH8000), undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fsub half 0xH8000, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fsub <4 x half> splat (half 0xH8000), undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fsub <8 x half> splat (half 0xH8000), undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fneg_idiom_fp16' @@ -180,21 +180,13 @@ define void @fneg() { } define void @fneg_fp16() { -; CHECK-BASE-LABEL: 'fneg_fp16' -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fneg half undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <2 x half> undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <4 x half> undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <8 x half> undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <16 x half> undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void -; -; CHECK-FP16-LABEL: 'fneg_fp16' -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fneg half undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <2 x half> undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <4 x half> undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <8 x half> undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <16 x half> undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; CHECK-LABEL: 'fneg_fp16' +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fneg half undef +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <2 x half> undef +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <4 x half> undef +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <8 x half> undef +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <16 x half> undef +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %F16 = fneg half undef %V2F16 = fneg <2 x half> undef @@ -252,16 +244,16 @@ define void @fmulfneg() { define void @fmulneg_fp16() { ; CHECK-BASE-LABEL: 'fmulneg_fp16' -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fneg half undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16M = fmul half %F16, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <2 x half> undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16M = fmul <2 x half> %V2F16, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <4 x half> undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16M = fmul <4 x half> %V4F16, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <8 x half> undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16M = fmul <8 x half> %V8F16, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <16 x half> undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16M = fmul <16 x half> %V16F16, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fneg half undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %F16M = fmul half %F16, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <2 x half> undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16M = fmul <2 x half> %V2F16, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <4 x half> undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16M = fmul <4 x half> %V4F16, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <8 x half> undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16M = fmul <8 x half> %V8F16, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <16 x half> undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16M = fmul <16 x half> %V16F16, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fmulneg_fp16' @@ -338,16 +330,16 @@ define void @fnegfmul() { define void @fnegfmul_fp16() { ; CHECK-BASE-LABEL: 'fnegfmul_fp16' -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16M = fmul half undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fneg half %F16M -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16M = fmul <2 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <2 x half> %V2F16M -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16M = fmul <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <4 x half> %V4F16M -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16M = fmul <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <8 x half> %V8F16M -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16M = fmul <16 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <16 x half> %V16F16M +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %F16M = fmul half undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fneg half %F16M +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16M = fmul <2 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <2 x half> %V2F16M +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16M = fmul <4 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <4 x half> %V4F16M +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16M = fmul <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <8 x half> %V8F16M +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16M = fmul <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <16 x half> %V16F16M ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fnegfmul_fp16' @@ -405,12 +397,19 @@ define void @fmul() { } define void @fmul_fp16() { -; CHECK-LABEL: 'fmul_fp16' -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fmul half undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fmul <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fmul <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fmul <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; CHECK-BASE-LABEL: 'fmul_fp16' +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fmul half undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fmul <4 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fmul <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fmul <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-FP16-LABEL: 'fmul_fp16' +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fmul half undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fmul <4 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fmul <8 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fmul <16 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %F16 = fmul half undef, undef %V4F16 = fmul <4 x half> undef, undef @@ -448,12 +447,19 @@ define void @fdiv() { } define void @fdiv_fp16() { -; CHECK-LABEL: 'fdiv_fp16' -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F16 = fdiv half undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = fdiv <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = fdiv <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V16F16 = fdiv <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; CHECK-BASE-LABEL: 'fdiv_fp16' +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %F16 = fdiv half undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = fdiv <4 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = fdiv <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V16F16 = fdiv <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-FP16-LABEL: 'fdiv_fp16' +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F16 = fdiv half undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = fdiv <4 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = fdiv <8 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of 4 for: %V16F16 = fdiv <16 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %F16 = fdiv half undef, undef %V4F16 = fdiv <4 x half> undef, undef @@ -491,12 +497,19 @@ define void @frem() { } define void @frem_fp16() { -; CHECK-LABEL: 'frem_fp16' -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %F16 = frem half undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:52 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = frem <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:108 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = frem <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:216 CodeSize:4 Lat:4 SizeLat:4 for: %V16F16 = frem <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; CHECK-BASE-LABEL: 'frem_fp16' +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:13 CodeSize:4 Lat:4 SizeLat:4 for: %F16 = frem half undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:55 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = frem <4 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:110 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = frem <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:220 CodeSize:4 Lat:4 SizeLat:4 for: %V16F16 = frem <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-FP16-LABEL: 'frem_fp16' +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %F16 = frem half undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:52 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = frem <4 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:108 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = frem <8 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:216 CodeSize:4 Lat:4 SizeLat:4 for: %V16F16 = frem <16 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %F16 = frem half undef, undef %V4F16 = frem <4 x half> undef, undef @@ -729,9 +742,9 @@ define void @fmuladd() { define void @fmuladd_fp16() { ; CHECK-BASE-LABEL: 'fmuladd_fp16' ; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %F16 = call half @llvm.fmuladd.f16(half undef, half undef, half undef) -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V4F16 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef) -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V8F16 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef) -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef) +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:9 CodeSize:2 Lat:6 SizeLat:2 for: %V4F16 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef) +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:18 CodeSize:2 Lat:6 SizeLat:2 for: %V8F16 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef) +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:36 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef) ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fmuladd_fp16' diff --git a/llvm/test/Analysis/CostModel/AArch64/extract_float.ll b/llvm/test/Analysis/CostModel/AArch64/extract_float.ll index d2b75faa014d6..c2140218b0c54 100644 --- a/llvm/test/Analysis/CostModel/AArch64/extract_float.ll +++ b/llvm/test/Analysis/CostModel/AArch64/extract_float.ll @@ -11,6 +11,7 @@ define double @extract_case1(<2 x double> %a) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %1 = extractelement <2 x double> %a, i32 1 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %1 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %res +; entry: %1 = extractelement <2 x double> %a, i32 0 %2 = extractelement <2 x double> %a, i32 1 @@ -24,6 +25,7 @@ define double @extract_case2(<2 x double> %a) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %0 = extractelement <2 x double> %a, i32 1 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %res +; entry: %1 = extractelement <2 x double> %a, i32 1 %res = fmul double %1, %1 @@ -36,6 +38,7 @@ define double @extract_case3(<2 x double> %a) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %0 = extractelement <2 x double> %a, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %res +; entry: %1 = extractelement <2 x double> %a, i32 0 %res = fmul double %1, %1 @@ -48,6 +51,7 @@ define double @extract_case4(<2 x double> %a, double %b) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %0 = extractelement <2 x double> %a, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %res +; entry: %1 = extractelement <2 x double> %a, i32 0 %res = fmul double %1, %b @@ -60,6 +64,7 @@ define double @extract_case5(<2 x double> %a, double %b) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %0 = extractelement <2 x double> %a, i32 1 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %res +; entry: %1 = extractelement <2 x double> %a, i32 1 %res = fmul double %1, %b @@ -74,6 +79,7 @@ define double @extract_case6(<3 x double> %a) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %1 = extractelement <3 x double> %a, i32 1 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %1 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %res +; entry: %1 = extractelement <3 x double> %a, i32 0 %2 = extractelement <3 x double> %a, i32 1 @@ -90,6 +96,7 @@ define double @extract_case7(<4 x double> %a) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %1 = extractelement <4 x double> %a, i32 2 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %1 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %res +; entry: %1 = extractelement <4 x double> %a, i32 1 %2 = extractelement <4 x double> %a, i32 2 @@ -108,6 +115,7 @@ define double @extract_case8(<2 x double> %a) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = fmul double %0, %1 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = fmul double %3, %4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %5 +; entry: %1 = extractelement <2 x double> %a, i32 0 %2 = extractelement <2 x double> %a, i32 1 @@ -129,6 +137,7 @@ define double @extract_case9(<2 x double> %a) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = fmul double %0, %1 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = fmul double %3, %4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %5 +; entry: %1 = extractelement <2 x double> %a, i32 0 %2 = extractelement <2 x double> %a, i32 1 @@ -148,6 +157,7 @@ define double @extract_case10(<4 x double> %a) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @foo(double %1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = fmul double %0, %1 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %2 +; entry: %1 = extractelement <4 x double> %a, i32 0 %2 = extractelement <4 x double> %a, i32 1 @@ -161,7 +171,7 @@ define half @extract_case11(<2 x half> %a) { ; NOFP16-LABEL: 'extract_case11' ; NOFP16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %0 = extractelement <2 x half> %a, i32 0 ; NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = extractelement <2 x half> %a, i32 1 -; NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fmul half %0, %1 +; NOFP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = fmul half %0, %1 ; NOFP16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %res ; ; FULLFP16-LABEL: 'extract_case11' @@ -169,6 +179,7 @@ define half @extract_case11(<2 x half> %a) { ; FULLFP16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %1 = extractelement <2 x half> %a, i32 1 ; FULLFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fmul half %0, %1 ; FULLFP16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %res +; entry: %1 = extractelement <2 x half> %a, i32 0 %2 = extractelement <2 x half> %a, i32 1 @@ -183,6 +194,7 @@ define float @extract_case12(<2 x float> %a) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %1 = extractelement <2 x float> %a, i32 1 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fmul float %0, %1 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %res +; entry: %1 = extractelement <2 x float> %a, i32 0 %2 = extractelement <2 x float> %a, i32 1 @@ -198,6 +210,7 @@ define double @extract_case13(<2 x double> %a) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = extractelement <2 x double> %a, i32 1 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = fadd double %0, %1 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %res +; entry: %1 = extractelement <2 x double> %a, i32 0 %2 = extractelement <2 x double> %a, i32 1 diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll index f565924a325a3..c4236d2cf41b8 100644 --- a/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll +++ b/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll @@ -26,10 +26,10 @@ define void @strict_fp_reductions() { define void @strict_fp_reductions_fp16() { ; CHECK-NOFP16-LABEL: 'strict_fp_reductions_fp16' -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:10 SizeLat:6 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:18 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:38 CodeSize:23 Lat:46 SizeLat:30 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:76 CodeSize:46 Lat:92 SizeLat:60 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:12 CodeSize:5 Lat:10 SizeLat:6 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:26 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:54 CodeSize:23 Lat:46 SizeLat:30 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:108 CodeSize:46 Lat:92 SizeLat:60 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) ; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-F16-LABEL: 'strict_fp_reductions_fp16' @@ -40,10 +40,10 @@ define void @strict_fp_reductions_fp16() { ; CHECK-F16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-BF16-LABEL: 'strict_fp_reductions_fp16' -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:10 SizeLat:6 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:18 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:38 CodeSize:23 Lat:46 SizeLat:30 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:76 CodeSize:46 Lat:92 SizeLat:60 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:12 CodeSize:5 Lat:10 SizeLat:6 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:26 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:54 CodeSize:23 Lat:46 SizeLat:30 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:108 CodeSize:46 Lat:92 SizeLat:60 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) ; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef) @@ -55,15 +55,15 @@ define void @strict_fp_reductions_fp16() { define void @strict_fp_reductions_bf16() { ; CHECK-NOFP16-LABEL: 'strict_fp_reductions_bf16' -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:18 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:54 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) ; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-F16-LABEL: 'strict_fp_reductions_bf16' -; CHECK-F16-NEXT: Cost Model: Found costs of RThru:18 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) +; CHECK-F16-NEXT: Cost Model: Found costs of RThru:54 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) ; CHECK-F16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-BF16-LABEL: 'strict_fp_reductions_bf16' -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:14 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:26 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) ; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4f8(bfloat 0.0, <4 x bfloat> undef) @@ -117,16 +117,16 @@ define void @fast_fp_reductions() { define void @fast_fp_reductions_fp16() { ; CHECK-NOFP16-LABEL: 'fast_fp_reductions_fp16' -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:30 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:30 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:48 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:48 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:38 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:38 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:14 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:14 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:48 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:48 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:72 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:72 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:203 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:248 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef) ; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-F16-LABEL: 'fast_fp_reductions_fp16' @@ -143,16 +143,16 @@ define void @fast_fp_reductions_fp16() { ; CHECK-F16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-BF16-LABEL: 'fast_fp_reductions_fp16' -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:30 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:30 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:48 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:48 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:38 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef) -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:38 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef) +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:14 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:14 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:48 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:48 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:72 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:72 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:203 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef) +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:248 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef) ; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef) @@ -175,15 +175,15 @@ define void @fast_fp_reductions_fp16() { define void @fast_fp_reductions_bf16() { ; CHECK-NOFP16-LABEL: 'fast_fp_reductions_bf16' -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:28 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef) ; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-F16-LABEL: 'fast_fp_reductions_bf16' -; CHECK-F16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef) +; CHECK-F16-NEXT: Cost Model: Found costs of RThru:28 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef) ; CHECK-F16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-BF16-LABEL: 'fast_fp_reductions_bf16' -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef) +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:14 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef) ; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4f8(bfloat -0.0, <4 x bfloat> undef) diff --git a/llvm/test/Analysis/CostModel/AArch64/vec3-ops.ll b/llvm/test/Analysis/CostModel/AArch64/vec3-ops.ll index 6bcf3c705ef6f..f234341b0fd86 100644 --- a/llvm/test/Analysis/CostModel/AArch64/vec3-ops.ll +++ b/llvm/test/Analysis/CostModel/AArch64/vec3-ops.ll @@ -206,8 +206,8 @@ define void @vec3_float(<3 x float> %a, <3 x float> %b, ptr %src, ptr %dst) { define void @vec3_half(<3 x half> %a, <3 x half> %b, ptr %src, ptr %dst) { ; CHECK-LABEL: 'vec3_half' ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %l = load <3 x half>, ptr %src, align 1 -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %add = fadd <3 x half> %l, %b -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %sub = fsub <3 x half> %add, %a +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %add = fadd <3 x half> %l, %b +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %sub = fsub <3 x half> %add, %a ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <3 x half> %sub, ptr %dst, align 1 ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; From 7c9694e7ec55f404ed977424d9855d828668e6be Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 7 Aug 2025 18:00:56 +0100 Subject: [PATCH 2/2] Simplify condition --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 35d05e4ebea80..8d8a8d0ba33dc 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -3979,8 +3979,9 @@ std::optional AArch64TTIImpl::getFP16BF16PromoteCost( Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, std::function InstCost) const { - if ((ST->hasFullFP16() || !Ty->getScalarType()->isHalfTy()) && - !Ty->getScalarType()->isBFloatTy()) + if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy()) + return std::nullopt; + if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16()) return std::nullopt; Type *PromotedTy = Ty->getWithNewType(Type::getFloatTy(Ty->getContext()));