-
Notifications
You must be signed in to change notification settings - Fork 13.7k
TTI: Check legalization cost of abs nodes #100523
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-analysis Author: Matt Arsenault (arsenm) ChangesFull diff: https://github.com/llvm/llvm-project/pull/100523.diff 2 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index ba70498bfb731..65f929369c1f0 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::vector_reduce_fminimum:
return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
VecOpTy, ICA.getFlags(), CostKind);
- case Intrinsic::abs: {
- // abs(X) = select(icmp(X,0),X,sub(0,X))
- Type *CondTy = RetTy->getWithNewBitWidth(1);
- CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
- InstructionCost Cost = 0;
- Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
- Pred, CostKind);
- Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
- Pred, CostKind);
- // TODO: Should we add an OperandValueProperties::OP_Zero property?
- Cost += thisT()->getArithmeticInstrCost(
- BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None});
- return Cost;
- }
+ case Intrinsic::abs:
+ ISD = ISD::ABS;
+ break;
case Intrinsic::smax:
ISD = ISD::SMAX;
break;
@@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind);
return Cost;
}
+ case Intrinsic::abs: {
+ // abs(X) = select(icmp(X,0),X,sub(0,X))
+ Type *CondTy = RetTy->getWithNewBitWidth(1);
+ CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
+ InstructionCost Cost = 0;
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+ Pred, CostKind);
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
+ Pred, CostKind);
+ // TODO: Should we add an OperandValueProperties::OP_Zero property?
+ Cost += thisT()->getArithmeticInstrCost(
+ BinaryOperator::Sub, RetTy, CostKind,
+ {TTI::OK_UniformConstantValue, TTI::OP_None});
+ return Cost;
+ }
case Intrinsic::fptosi_sat:
case Intrinsic::fptoui_sat: {
if (Tys.empty())
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
index 133b95609bc15..623e02eb8239d 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -54,11 +54,11 @@ define i32 @abs_nonpoison(i32 %arg) {
; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
; FAST-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
-; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
-; FAST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
-; FAST-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
-; FAST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
-; FAST-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
@@ -112,11 +112,11 @@ define i32 @abs_nonpoison(i32 %arg) {
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
@@ -204,11 +204,11 @@ define i32 @abs_poison(i32 %arg) {
; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 true)
; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 true)
; FAST-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 true)
-; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 true)
-; FAST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 true)
-; FAST-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 true)
-; FAST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 true)
-; FAST-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 true)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 true)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 true)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 true)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 true)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 true)
; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 true)
; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 true)
; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 true)
@@ -262,11 +262,11 @@ define i32 @abs_poison(i32 %arg) {
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 true)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 true)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 true)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 true)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 true)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 true)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 true)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 true)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 true)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 true)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 true)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 true)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 true)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 true)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 true)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 true)
|
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesFull diff: https://github.com/llvm/llvm-project/pull/100523.diff 2 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index ba70498bfb731..65f929369c1f0 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::vector_reduce_fminimum:
return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
VecOpTy, ICA.getFlags(), CostKind);
- case Intrinsic::abs: {
- // abs(X) = select(icmp(X,0),X,sub(0,X))
- Type *CondTy = RetTy->getWithNewBitWidth(1);
- CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
- InstructionCost Cost = 0;
- Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
- Pred, CostKind);
- Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
- Pred, CostKind);
- // TODO: Should we add an OperandValueProperties::OP_Zero property?
- Cost += thisT()->getArithmeticInstrCost(
- BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None});
- return Cost;
- }
+ case Intrinsic::abs:
+ ISD = ISD::ABS;
+ break;
case Intrinsic::smax:
ISD = ISD::SMAX;
break;
@@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind);
return Cost;
}
+ case Intrinsic::abs: {
+ // abs(X) = select(icmp(X,0),X,sub(0,X))
+ Type *CondTy = RetTy->getWithNewBitWidth(1);
+ CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
+ InstructionCost Cost = 0;
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+ Pred, CostKind);
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
+ Pred, CostKind);
+ // TODO: Should we add an OperandValueProperties::OP_Zero property?
+ Cost += thisT()->getArithmeticInstrCost(
+ BinaryOperator::Sub, RetTy, CostKind,
+ {TTI::OK_UniformConstantValue, TTI::OP_None});
+ return Cost;
+ }
case Intrinsic::fptosi_sat:
case Intrinsic::fptoui_sat: {
if (Tys.empty())
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
index 133b95609bc15..623e02eb8239d 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -54,11 +54,11 @@ define i32 @abs_nonpoison(i32 %arg) {
; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
; FAST-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
-; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
-; FAST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
-; FAST-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
-; FAST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
-; FAST-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
@@ -112,11 +112,11 @@ define i32 @abs_nonpoison(i32 %arg) {
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
@@ -204,11 +204,11 @@ define i32 @abs_poison(i32 %arg) {
; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 true)
; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 true)
; FAST-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 true)
-; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 true)
-; FAST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 true)
-; FAST-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 true)
-; FAST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 true)
-; FAST-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 true)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 true)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 true)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 true)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 true)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 true)
; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 true)
; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 true)
; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 true)
@@ -262,11 +262,11 @@ define i32 @abs_poison(i32 %arg) {
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 true)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 true)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 true)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 true)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 true)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 true)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 true)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 true)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 true)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 true)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 true)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 true)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 true)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 true)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 true)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 true)
|
; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) | ||
; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) | ||
; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false) | ||
; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is this demonstrating? 2 does not seem like the right cost for any VALU/SALU operation on v32i16.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ISD::ABS v32i16 is set to CUSTOM lowering in SIISelLowering.cpp so the cost used is LT.first * 2
- this assumption appears to be the cause of most regression in this patch series
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Most of the custom lowerings are working around the default legalization actions being to scalarize, rather than decompose into 2 x vectors
330c0e2
to
df2b6b7
Compare
ca78bfb
to
85c14e0
Compare
85c14e0
to
949edfe
Compare
8c88a8f
to
8180759
Compare
949edfe
to
49db2b2
Compare
8180759
to
45aed42
Compare
49db2b2
to
b448d7d
Compare
45aed42
to
19f7331
Compare
b448d7d
to
6a73464
Compare
19f7331
to
a2900f1
Compare
6a73464
to
5bb2cad
Compare
a2900f1
to
2746c43
Compare
5bb2cad
to
574affb
Compare
2746c43
to
88fe51a
Compare
574affb
to
bbd9d3d
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
88fe51a
to
d428b60
Compare
bbd9d3d
to
65c4d58
Compare
d428b60
to
50af0bb
Compare
65c4d58
to
9c21872
Compare
50af0bb
to
b5f3942
Compare
9c21872
to
ef1f347
Compare
Merge activity
|
b5f3942
to
cdac929
Compare
c653e64
to
7c7e93f
Compare
Also adjust the AMDGPU cost.
7c7e93f
to
ea74c99
Compare
No description provided.