-
Notifications
You must be signed in to change notification settings - Fork 14.2k
[PowePC] using MTVSRBMI instruction instead of constant pool in power10+ #144084
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-powerpc Author: zhijian lin (diggerlin) ChangesThe instruction MTVSRBMI move 0x00(or 0xFF) to each byte of VSR based on the bits mask. Using the instruction instead of constant pool can reduce the asm code size and instructions in power10. Full diff: https://github.com/llvm/llvm-project/pull/144084.diff 2 Files Affected:
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 0f8e5e57c58b7..60c89aff155f3 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9580,6 +9580,37 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
return false;
}
+bool isValidMtVsrbmi(APInt &BMI, BuildVectorSDNode &BVN) {
+ unsigned int NumOps = BVN.getNumOperands();
+ assert(NumOps > 0 && "isConstantSplat has 0-size build vector");
+
+ BMI.clearAllBits();
+ EVT VT = BVN.getValueType(0);
+ APInt ConstValue(VT.getSizeInBits(), 0);
+
+ unsigned EltWidth = VT.getScalarSizeInBits();
+
+ for (unsigned j = 0; j < NumOps; ++j) {
+ SDValue OpVal = BVN.getOperand(j);
+ unsigned BitPos = j * EltWidth;
+ auto *CN = dyn_cast<ConstantSDNode>(OpVal);
+
+ if (!CN)
+ return false;
+
+ ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos);
+ }
+
+ for (unsigned J = 0; J < 16; J++) {
+ APInt ExtractValue = ConstValue.extractBits(8, J * 8);
+ if (ExtractValue != 0x00 && ExtractValue != 0xFF)
+ return false;
+ if (ExtractValue == 0xFF)
+ BMI.setBit(J);
+ }
+ return true;
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen
@@ -9591,6 +9622,24 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
+ if(Subtarget.hasP10Vector()) {
+ APInt BMI(32, 0);
+ // If the value of the vector is all zeros or all ones,
+ // we do not convert it to MTVSRBMI.
+ // The xxleqv instruction sets a vector with all ones.
+ // The xxlxor instruction sets a vector with all zeros.
+ if (isValidMtVsrbmi(BMI, *BVN) && BMI != 0 && BMI!=0xffff ) {
+ SDValue SDConstant= DAG.getTargetConstant(BMI, dl, MVT::i32);
+ MachineSDNode* MSDNode = DAG.getMachineNode(PPC::MTVSRBMI, dl,MVT::v16i8, SDConstant);
+ SDValue SDV = SDValue(MSDNode,0);
+ EVT DVT = BVN->getValueType(0);
+ EVT SVT = SDV.getValueType();
+ if (SVT != DVT ) {
+ SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
+ }
+ return SDV;
+ }
+ }
// Check if this is a splat of a constant value.
APInt APSplatBits, APSplatUndef;
unsigned SplatBitSize;
diff --git a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
index 5486dc02faf90..232014db9a012 100644
--- a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
+++ b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
@@ -10,28 +10,13 @@
; RUN: | FileCheck %s --check-prefix=CHECK
define dso_local noundef range(i8 -1, 1) <16 x i8> @_Z5v00FFv() {
-; CHECK: L..CPI0_0:
-; CHECK-NEXT: .byte 255 # 0xff
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
+; CHECK-NOT: L..CPI0_0:
+; CHECK-NOT: .byte 255 # 0xff
+; CHECK-NOT: .byte 0 # 0x0
; CHECK-LABEL: _Z5v00FFv:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lwz r3, L..C0(r2) # %const.0
-; CHECK-NEXT: lxv vs34, 0(r3)
+; CHECK-NEXT: mtvsrbmi v2, 1
; CHECK-NEXT: blr
entry:
ret <16 x i8> <i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions cpp -- llvm/lib/Target/PowerPC/PPCISelLowering.cpp View the diff from clang-format here.diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 5813e84c4..f359d3416 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9624,7 +9624,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
- if(Subtarget.hasP10Vector()) {
+ if (Subtarget.hasP10Vector()) {
APInt BitMask(32, 0);
// If the value of the vector is all zeros or all ones,
// we do not convert it to MTVSRBMI.
@@ -9632,11 +9632,12 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// The xxlxor instruction sets a vector with all zeros.
if (isValidMtVsrBmi(BitMask, *BVN) && BitMask != 0 && BitMask != 0xffff) {
SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
- MachineSDNode* MSDNode = DAG.getMachineNode(PPC::MTVSRBMI, dl,MVT::v16i8, SDConstant);
+ MachineSDNode *MSDNode =
+ DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
SDValue SDV = SDValue(MSDNode, 0);
EVT DVT = BVN->getValueType(0);
EVT SVT = SDV.getValueType();
- if (SVT != DVT ) {
+ if (SVT != DVT) {
SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
}
return SDV;
|
EVT DVT = BVN->getValueType(0); | ||
EVT SVT = SDV.getValueType(); | ||
if (SVT != DVT ) { | ||
SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
clang format
|
||
unsigned EltWidth = VT.getScalarSizeInBits(); | ||
|
||
for (unsigned j = 0; j < NumOps; ++j) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since BuildVectorSDNode is super class of SDNode, can we not use range based for loop to iterate over the operands?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The index is used.
@@ -9580,6 +9580,37 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, | |||
return false; | |||
} | |||
|
|||
bool isValidMtVsrbmi(APInt &BMI, BuildVectorSDNode &BVN) { | |||
unsigned int NumOps = BVN.getNumOperands(); | |||
assert(NumOps > 0 && "isConstantSplat has 0-size build vector"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This assert text seems confusing. The routine is not called from isConstantSplat. Maybe just say unexpected 0 size build vector.
|
||
unsigned EltWidth = VT.getScalarSizeInBits(); | ||
|
||
for (unsigned j = 0; j < NumOps; ++j) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The index is used.
|
||
unsigned EltWidth = VT.getScalarSizeInBits(); | ||
|
||
for (unsigned j = 0; j < NumOps; ++j) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This loop uses lowercase j, the loop below uppercase J. Let's pick one and stick with it.
ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos); | ||
} | ||
|
||
for (unsigned J = 0; J < 16; J++) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's stick with pre-increment like the above j loop.
@@ -9580,6 +9580,37 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, | |||
return false; | |||
} | |||
|
|||
bool isValidMtVsrbmi(APInt &BMI, BuildVectorSDNode &BVN) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The instruction uses bmi but instruction names are short. Maybe use BitMask for parameter.
// The xxleqv instruction sets a vector with all ones. | ||
// The xxlxor instruction sets a vector with all zeros. | ||
if (isValidMtVsrbmi(BMI, *BVN) && BMI != 0 && BMI!=0xffff ) { | ||
SDValue SDConstant= DAG.getTargetConstant(BMI, dl, MVT::i32); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Spacing looks off.
// The xxlxor instruction sets a vector with all zeros. | ||
if (isValidMtVsrbmi(BMI, *BVN) && BMI != 0 && BMI!=0xffff ) { | ||
SDValue SDConstant= DAG.getTargetConstant(BMI, dl, MVT::i32); | ||
MachineSDNode* MSDNode = DAG.getMachineNode(PPC::MTVSRBMI, dl,MVT::v16i8, SDConstant); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Spacing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Spacing around dl,MVT::v16i8 still looks off.
if (isValidMtVsrbmi(BMI, *BVN) && BMI != 0 && BMI!=0xffff ) { | ||
SDValue SDConstant= DAG.getTargetConstant(BMI, dl, MVT::i32); | ||
MachineSDNode* MSDNode = DAG.getMachineNode(PPC::MTVSRBMI, dl,MVT::v16i8, SDConstant); | ||
SDValue SDV = SDValue(MSDNode,0); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Spacing.
The instruction MTVSRBMI set 0x00(or 0xFF) to each byte of VSR based on the bits mask. Using the instruction instead of constant pool can reduce the asm code size and instructions in power10.