Skip to content

Commit b66abda

Browse files
committed
[TTI][RISCV] Deduplicate type-based VP costing
We have a lot of code in RISCVTTIImpl::getIntrinsicInstrCost for vp intrinsics, which just forward the cost to the underlying non-vp cost function. However I just also noticed that there is generic code in BasicTTIImpl's getIntrinsicInstrCost that does the same thing, added in llvm#67178. The only difference is that BasicTTIImpl doesn't yet handle it for type-based costing. There doesn't seem to be any reason that it can't since it's just inspecting the argument types. This shuffles the VP costing up to handle both regular and type-based costing, and begins to deduplicate the VP specific costing in RISCVTTIImpl by moving them into BasicTTIImpl.h. It's not NFC since it picks up a couple of VP nodes that had slipped through the cracks. Future PRs can begin to move more of the code from RISCVTTIImpl to BasicTTIImpl.
1 parent 44a41b0 commit b66abda

File tree

3 files changed

+77
-115
lines changed

3 files changed

+77
-115
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 61 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1574,6 +1574,67 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
15741574
if (Intrinsic::isTargetIntrinsic(IID))
15751575
return TargetTransformInfo::TCC_Basic;
15761576

1577+
// VP Intrinsics should have the same cost as their non-vp counterpart.
1578+
// TODO: Adjust the cost to make the vp intrinsic cheaper than its non-vp
1579+
// counterpart when the vector length argument is smaller than the maximum
1580+
// vector length.
1581+
// TODO: Support other kinds of VPIntrinsics
1582+
if (VPIntrinsic::isVPIntrinsic(ICA.getID())) {
1583+
std::optional<unsigned> FOp =
1584+
VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID());
1585+
if (FOp) {
1586+
if (ICA.getID() == Intrinsic::vp_load) {
1587+
Align Alignment;
1588+
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
1589+
Alignment = VPI->getPointerAlignment().valueOrOne();
1590+
unsigned AS = 0;
1591+
if (ICA.getArgTypes().size() > 1)
1592+
if (auto *PtrTy = dyn_cast<PointerType>(ICA.getArgTypes()[0]))
1593+
AS = PtrTy->getAddressSpace();
1594+
return thisT()->getMemoryOpCost(*FOp, ICA.getReturnType(), Alignment,
1595+
AS, CostKind);
1596+
}
1597+
if (ICA.getID() == Intrinsic::vp_store) {
1598+
Align Alignment;
1599+
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
1600+
Alignment = VPI->getPointerAlignment().valueOrOne();
1601+
unsigned AS = 0;
1602+
if (ICA.getArgTypes().size() >= 2)
1603+
if (auto *PtrTy = dyn_cast<PointerType>(ICA.getArgTypes()[1]))
1604+
AS = PtrTy->getAddressSpace();
1605+
return thisT()->getMemoryOpCost(*FOp, ICA.getArgTypes()[0], Alignment,
1606+
AS, CostKind);
1607+
}
1608+
if (VPBinOpIntrinsic::isVPBinOp(ICA.getID())) {
1609+
return thisT()->getArithmeticInstrCost(*FOp, ICA.getReturnType(),
1610+
CostKind);
1611+
}
1612+
}
1613+
1614+
std::optional<Intrinsic::ID> FID =
1615+
VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID());
1616+
if (FID) {
1617+
// Non-vp version will have same arg types except mask and vector
1618+
// length.
1619+
assert(ICA.getArgTypes().size() >= 2 &&
1620+
"Expected VPIntrinsic to have Mask and Vector Length args and "
1621+
"types");
1622+
ArrayRef<Type *> NewTys = ArrayRef(ICA.getArgTypes()).drop_back(2);
1623+
1624+
// VPReduction intrinsics have a start value argument that their non-vp
1625+
// counterparts do not have, except for the fadd and fmul non-vp
1626+
// counterpart.
1627+
if (VPReductionIntrinsic::isVPReduction(ICA.getID()) &&
1628+
*FID != Intrinsic::vector_reduce_fadd &&
1629+
*FID != Intrinsic::vector_reduce_fmul)
1630+
NewTys = NewTys.drop_front();
1631+
1632+
IntrinsicCostAttributes NewICA(*FID, ICA.getReturnType(), NewTys,
1633+
ICA.getFlags());
1634+
return thisT()->getIntrinsicInstrCost(NewICA, CostKind);
1635+
}
1636+
}
1637+
15771638
if (ICA.isTypeBasedOnly())
15781639
return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
15791640

@@ -1834,68 +1895,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
18341895
}
18351896
}
18361897

1837-
// VP Intrinsics should have the same cost as their non-vp counterpart.
1838-
// TODO: Adjust the cost to make the vp intrinsic cheaper than its non-vp
1839-
// counterpart when the vector length argument is smaller than the maximum
1840-
// vector length.
1841-
// TODO: Support other kinds of VPIntrinsics
1842-
if (VPIntrinsic::isVPIntrinsic(ICA.getID())) {
1843-
std::optional<unsigned> FOp =
1844-
VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID());
1845-
if (FOp) {
1846-
if (ICA.getID() == Intrinsic::vp_load) {
1847-
Align Alignment;
1848-
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
1849-
Alignment = VPI->getPointerAlignment().valueOrOne();
1850-
unsigned AS = 0;
1851-
if (ICA.getArgs().size() > 1)
1852-
if (auto *PtrTy =
1853-
dyn_cast<PointerType>(ICA.getArgs()[0]->getType()))
1854-
AS = PtrTy->getAddressSpace();
1855-
return thisT()->getMemoryOpCost(*FOp, ICA.getReturnType(), Alignment,
1856-
AS, CostKind);
1857-
}
1858-
if (ICA.getID() == Intrinsic::vp_store) {
1859-
Align Alignment;
1860-
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
1861-
Alignment = VPI->getPointerAlignment().valueOrOne();
1862-
unsigned AS = 0;
1863-
if (ICA.getArgs().size() >= 2)
1864-
if (auto *PtrTy =
1865-
dyn_cast<PointerType>(ICA.getArgs()[1]->getType()))
1866-
AS = PtrTy->getAddressSpace();
1867-
return thisT()->getMemoryOpCost(*FOp, Args[0]->getType(), Alignment,
1868-
AS, CostKind);
1869-
}
1870-
if (VPBinOpIntrinsic::isVPBinOp(ICA.getID())) {
1871-
return thisT()->getArithmeticInstrCost(*FOp, ICA.getReturnType(),
1872-
CostKind);
1873-
}
1874-
}
1875-
1876-
std::optional<Intrinsic::ID> FID =
1877-
VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID());
1878-
if (FID) {
1879-
// Non-vp version will have same Args/Tys except mask and vector length.
1880-
assert(ICA.getArgs().size() >= 2 && ICA.getArgTypes().size() >= 2 &&
1881-
"Expected VPIntrinsic to have Mask and Vector Length args and "
1882-
"types");
1883-
ArrayRef<Type *> NewTys = ArrayRef(ICA.getArgTypes()).drop_back(2);
1884-
1885-
// VPReduction intrinsics have a start value argument that their non-vp
1886-
// counterparts do not have, except for the fadd and fmul non-vp
1887-
// counterpart.
1888-
if (VPReductionIntrinsic::isVPReduction(ICA.getID()) &&
1889-
*FID != Intrinsic::vector_reduce_fadd &&
1890-
*FID != Intrinsic::vector_reduce_fmul)
1891-
NewTys = NewTys.drop_front();
1892-
1893-
IntrinsicCostAttributes NewICA(*FID, ICA.getReturnType(), NewTys,
1894-
ICA.getFlags());
1895-
return thisT()->getIntrinsicInstrCost(NewICA, CostKind);
1896-
}
1897-
}
1898-
18991898
// Assume that we need to scalarize this intrinsic.)
19001899
// Compute the scalarization overhead based on Args for a vector
19011900
// intrinsic.

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,26 +1104,6 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
11041104
return Cost * LT.first;
11051105
break;
11061106
}
1107-
// vp integer arithmetic ops.
1108-
case Intrinsic::vp_add:
1109-
case Intrinsic::vp_and:
1110-
case Intrinsic::vp_ashr:
1111-
case Intrinsic::vp_lshr:
1112-
case Intrinsic::vp_mul:
1113-
case Intrinsic::vp_or:
1114-
case Intrinsic::vp_sdiv:
1115-
case Intrinsic::vp_shl:
1116-
case Intrinsic::vp_srem:
1117-
case Intrinsic::vp_sub:
1118-
case Intrinsic::vp_udiv:
1119-
case Intrinsic::vp_urem:
1120-
case Intrinsic::vp_xor:
1121-
// vp float arithmetic ops.
1122-
case Intrinsic::vp_fadd:
1123-
case Intrinsic::vp_fsub:
1124-
case Intrinsic::vp_fmul:
1125-
case Intrinsic::vp_fdiv:
1126-
case Intrinsic::vp_frem:
11271107
case Intrinsic::vp_fneg: {
11281108
std::optional<unsigned> FOp =
11291109
VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID());
@@ -1164,23 +1144,6 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
11641144
return getCmpSelInstrCost(*FOp, ICA.getArgTypes()[0], ICA.getReturnType(),
11651145
UI->getPredicate(), CostKind);
11661146
}
1167-
// vp load/store
1168-
case Intrinsic::vp_load:
1169-
case Intrinsic::vp_store: {
1170-
if (!ICA.getInst())
1171-
break;
1172-
Intrinsic::ID IID = ICA.getID();
1173-
std::optional<unsigned> FOp = VPIntrinsic::getFunctionalOpcodeForVP(IID);
1174-
assert(FOp.has_value());
1175-
auto *UI = cast<VPIntrinsic>(ICA.getInst());
1176-
if (ICA.getID() == Intrinsic::vp_load)
1177-
return getMemoryOpCost(
1178-
*FOp, ICA.getReturnType(), UI->getPointerAlignment(),
1179-
UI->getOperand(0)->getType()->getPointerAddressSpace(), CostKind);
1180-
return getMemoryOpCost(
1181-
*FOp, ICA.getArgTypes()[0], UI->getPointerAlignment(),
1182-
UI->getOperand(1)->getType()->getPointerAddressSpace(), CostKind);
1183-
}
11841147
case Intrinsic::vp_select: {
11851148
Intrinsic::ID IID = ICA.getID();
11861149
std::optional<unsigned> FOp = VPIntrinsic::getFunctionalOpcodeForVP(IID);

llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1172,37 +1172,37 @@ define void @abs() {
11721172
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
11731173
;
11741174
; TYPEBASED-LABEL: 'abs'
1175-
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %1 = call <2 x i8> @llvm.vp.abs.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef)
1175+
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call <2 x i8> @llvm.vp.abs.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef)
11761176
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
1177-
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %3 = call <4 x i8> @llvm.vp.abs.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
1177+
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x i8> @llvm.vp.abs.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
11781178
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
1179-
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %5 = call <8 x i8> @llvm.vp.abs.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
1179+
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <8 x i8> @llvm.vp.abs.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
11801180
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false)
1181-
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %7 = call <16 x i8> @llvm.vp.abs.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
1181+
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call <16 x i8> @llvm.vp.abs.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
11821182
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false)
1183-
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %9 = call <2 x i64> @llvm.vp.abs.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
1183+
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <2 x i64> @llvm.vp.abs.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
11841184
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
1185-
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %11 = call <4 x i64> @llvm.vp.abs.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
1185+
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call <4 x i64> @llvm.vp.abs.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
11861186
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
1187-
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %13 = call <8 x i64> @llvm.vp.abs.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
1187+
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call <8 x i64> @llvm.vp.abs.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
11881188
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
1189-
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %15 = call <16 x i64> @llvm.vp.abs.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
1189+
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i64> @llvm.vp.abs.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
11901190
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = call <16 x i64> @llvm.abs.v16i64(<16 x i64> undef, i1 false)
1191-
; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.abs.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
1191+
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.abs.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
11921192
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 2 x i8> @llvm.abs.nxv2i8(<vscale x 2 x i8> undef, i1 false)
1193-
; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %19 = call <vscale x 4 x i8> @llvm.vp.abs.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
1193+
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call <vscale x 4 x i8> @llvm.vp.abs.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
11941194
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = call <vscale x 4 x i8> @llvm.abs.nxv4i8(<vscale x 4 x i8> undef, i1 false)
1195-
; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %21 = call <vscale x 8 x i8> @llvm.vp.abs.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
1195+
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = call <vscale x 8 x i8> @llvm.vp.abs.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
11961196
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call <vscale x 8 x i8> @llvm.abs.nxv8i8(<vscale x 8 x i8> undef, i1 false)
1197-
; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %23 = call <vscale x 16 x i8> @llvm.vp.abs.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
1197+
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = call <vscale x 16 x i8> @llvm.vp.abs.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
11981198
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8> undef, i1 false)
1199-
; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %25 = call <vscale x 2 x i64> @llvm.vp.abs.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
1199+
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = call <vscale x 2 x i64> @llvm.vp.abs.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
12001200
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> undef, i1 false)
1201-
; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %27 = call <vscale x 4 x i64> @llvm.vp.abs.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
1201+
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = call <vscale x 4 x i64> @llvm.vp.abs.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
12021202
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64> undef, i1 false)
1203-
; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %29 = call <vscale x 8 x i64> @llvm.vp.abs.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
1203+
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = call <vscale x 8 x i64> @llvm.vp.abs.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
12041204
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = call <vscale x 8 x i64> @llvm.abs.nxv8i64(<vscale x 8 x i64> undef, i1 false)
1205-
; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %31 = call <vscale x 16 x i64> @llvm.vp.abs.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
1205+
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = call <vscale x 16 x i64> @llvm.vp.abs.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
12061206
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = call <vscale x 16 x i64> @llvm.abs.nxv16i64(<vscale x 16 x i64> undef, i1 false)
12071207
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
12081208
;

0 commit comments

Comments
 (0)