@@ -1574,6 +1574,67 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1574
1574
if (Intrinsic::isTargetIntrinsic (IID))
1575
1575
return TargetTransformInfo::TCC_Basic;
1576
1576
1577
+ // VP Intrinsics should have the same cost as their non-vp counterpart.
1578
+ // TODO: Adjust the cost to make the vp intrinsic cheaper than its non-vp
1579
+ // counterpart when the vector length argument is smaller than the maximum
1580
+ // vector length.
1581
+ // TODO: Support other kinds of VPIntrinsics
1582
+ if (VPIntrinsic::isVPIntrinsic (ICA.getID ())) {
1583
+ std::optional<unsigned > FOp =
1584
+ VPIntrinsic::getFunctionalOpcodeForVP (ICA.getID ());
1585
+ if (FOp) {
1586
+ if (ICA.getID () == Intrinsic::vp_load) {
1587
+ Align Alignment;
1588
+ if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst ()))
1589
+ Alignment = VPI->getPointerAlignment ().valueOrOne ();
1590
+ unsigned AS = 0 ;
1591
+ if (ICA.getArgTypes ().size () > 1 )
1592
+ if (auto *PtrTy = dyn_cast<PointerType>(ICA.getArgTypes ()[0 ]))
1593
+ AS = PtrTy->getAddressSpace ();
1594
+ return thisT ()->getMemoryOpCost (*FOp, ICA.getReturnType (), Alignment,
1595
+ AS, CostKind);
1596
+ }
1597
+ if (ICA.getID () == Intrinsic::vp_store) {
1598
+ Align Alignment;
1599
+ if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst ()))
1600
+ Alignment = VPI->getPointerAlignment ().valueOrOne ();
1601
+ unsigned AS = 0 ;
1602
+ if (ICA.getArgTypes ().size () >= 2 )
1603
+ if (auto *PtrTy = dyn_cast<PointerType>(ICA.getArgTypes ()[1 ]))
1604
+ AS = PtrTy->getAddressSpace ();
1605
+ return thisT ()->getMemoryOpCost (*FOp, ICA.getArgTypes ()[0 ], Alignment,
1606
+ AS, CostKind);
1607
+ }
1608
+ if (VPBinOpIntrinsic::isVPBinOp (ICA.getID ())) {
1609
+ return thisT ()->getArithmeticInstrCost (*FOp, ICA.getReturnType (),
1610
+ CostKind);
1611
+ }
1612
+ }
1613
+
1614
+ std::optional<Intrinsic::ID> FID =
1615
+ VPIntrinsic::getFunctionalIntrinsicIDForVP (ICA.getID ());
1616
+ if (FID) {
1617
+ // Non-vp version will have same arg types except mask and vector
1618
+ // length.
1619
+ assert (ICA.getArgTypes ().size () >= 2 &&
1620
+ " Expected VPIntrinsic to have Mask and Vector Length args and "
1621
+ " types" );
1622
+ ArrayRef<Type *> NewTys = ArrayRef (ICA.getArgTypes ()).drop_back (2 );
1623
+
1624
+ // VPReduction intrinsics have a start value argument that their non-vp
1625
+ // counterparts do not have, except for the fadd and fmul non-vp
1626
+ // counterpart.
1627
+ if (VPReductionIntrinsic::isVPReduction (ICA.getID ()) &&
1628
+ *FID != Intrinsic::vector_reduce_fadd &&
1629
+ *FID != Intrinsic::vector_reduce_fmul)
1630
+ NewTys = NewTys.drop_front ();
1631
+
1632
+ IntrinsicCostAttributes NewICA (*FID, ICA.getReturnType (), NewTys,
1633
+ ICA.getFlags ());
1634
+ return thisT ()->getIntrinsicInstrCost (NewICA, CostKind);
1635
+ }
1636
+ }
1637
+
1577
1638
if (ICA.isTypeBasedOnly ())
1578
1639
return getTypeBasedIntrinsicInstrCost (ICA, CostKind);
1579
1640
@@ -1834,68 +1895,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1834
1895
}
1835
1896
}
1836
1897
1837
- // VP Intrinsics should have the same cost as their non-vp counterpart.
1838
- // TODO: Adjust the cost to make the vp intrinsic cheaper than its non-vp
1839
- // counterpart when the vector length argument is smaller than the maximum
1840
- // vector length.
1841
- // TODO: Support other kinds of VPIntrinsics
1842
- if (VPIntrinsic::isVPIntrinsic (ICA.getID ())) {
1843
- std::optional<unsigned > FOp =
1844
- VPIntrinsic::getFunctionalOpcodeForVP (ICA.getID ());
1845
- if (FOp) {
1846
- if (ICA.getID () == Intrinsic::vp_load) {
1847
- Align Alignment;
1848
- if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst ()))
1849
- Alignment = VPI->getPointerAlignment ().valueOrOne ();
1850
- unsigned AS = 0 ;
1851
- if (ICA.getArgs ().size () > 1 )
1852
- if (auto *PtrTy =
1853
- dyn_cast<PointerType>(ICA.getArgs ()[0 ]->getType ()))
1854
- AS = PtrTy->getAddressSpace ();
1855
- return thisT ()->getMemoryOpCost (*FOp, ICA.getReturnType (), Alignment,
1856
- AS, CostKind);
1857
- }
1858
- if (ICA.getID () == Intrinsic::vp_store) {
1859
- Align Alignment;
1860
- if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst ()))
1861
- Alignment = VPI->getPointerAlignment ().valueOrOne ();
1862
- unsigned AS = 0 ;
1863
- if (ICA.getArgs ().size () >= 2 )
1864
- if (auto *PtrTy =
1865
- dyn_cast<PointerType>(ICA.getArgs ()[1 ]->getType ()))
1866
- AS = PtrTy->getAddressSpace ();
1867
- return thisT ()->getMemoryOpCost (*FOp, Args[0 ]->getType (), Alignment,
1868
- AS, CostKind);
1869
- }
1870
- if (VPBinOpIntrinsic::isVPBinOp (ICA.getID ())) {
1871
- return thisT ()->getArithmeticInstrCost (*FOp, ICA.getReturnType (),
1872
- CostKind);
1873
- }
1874
- }
1875
-
1876
- std::optional<Intrinsic::ID> FID =
1877
- VPIntrinsic::getFunctionalIntrinsicIDForVP (ICA.getID ());
1878
- if (FID) {
1879
- // Non-vp version will have same Args/Tys except mask and vector length.
1880
- assert (ICA.getArgs ().size () >= 2 && ICA.getArgTypes ().size () >= 2 &&
1881
- " Expected VPIntrinsic to have Mask and Vector Length args and "
1882
- " types" );
1883
- ArrayRef<Type *> NewTys = ArrayRef (ICA.getArgTypes ()).drop_back (2 );
1884
-
1885
- // VPReduction intrinsics have a start value argument that their non-vp
1886
- // counterparts do not have, except for the fadd and fmul non-vp
1887
- // counterpart.
1888
- if (VPReductionIntrinsic::isVPReduction (ICA.getID ()) &&
1889
- *FID != Intrinsic::vector_reduce_fadd &&
1890
- *FID != Intrinsic::vector_reduce_fmul)
1891
- NewTys = NewTys.drop_front ();
1892
-
1893
- IntrinsicCostAttributes NewICA (*FID, ICA.getReturnType (), NewTys,
1894
- ICA.getFlags ());
1895
- return thisT ()->getIntrinsicInstrCost (NewICA, CostKind);
1896
- }
1897
- }
1898
-
1899
1898
// Assume that we need to scalarize this intrinsic.)
1900
1899
// Compute the scalarization overhead based on Args for a vector
1901
1900
// intrinsic.
0 commit comments