[PowePC] using MTVSRBMI instruction instead of constant pool in power10+ #144084

diggerlin · 2025-06-13T14:32:51Z

The instruction MTVSRBMI set 0x00(or 0xFF) to each byte of VSR based on the bits mask. Using the instruction instead of constant pool can reduce the asm code size and instructions in power10.

llvmbot · 2025-06-13T14:33:31Z

@llvm/pr-subscribers-backend-powerpc

Author: zhijian lin (diggerlin)

Changes

The instruction MTVSRBMI move 0x00(or 0xFF) to each byte of VSR based on the bits mask. Using the instruction instead of constant pool can reduce the asm code size and instructions in power10.

Full diff: https://github.com/llvm/llvm-project/pull/144084.diff

2 Files Affected:

(modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+49)
(modified) llvm/test/CodeGen/PowerPC/mtvsrbmi.ll (+4-19)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 0f8e5e57c58b7..60c89aff155f3 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9580,6 +9580,37 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
   return false;
 }
 
+bool isValidMtVsrbmi(APInt &BMI, BuildVectorSDNode &BVN) {
+  unsigned int NumOps = BVN.getNumOperands();
+  assert(NumOps > 0 && "isConstantSplat has 0-size build vector");
+
+  BMI.clearAllBits();
+  EVT VT = BVN.getValueType(0);
+  APInt ConstValue(VT.getSizeInBits(), 0);
+
+  unsigned EltWidth = VT.getScalarSizeInBits();
+
+  for (unsigned j = 0; j < NumOps; ++j) {
+    SDValue OpVal = BVN.getOperand(j);
+    unsigned BitPos = j * EltWidth;
+    auto *CN = dyn_cast<ConstantSDNode>(OpVal);
+
+    if (!CN)
+      return false;
+
+    ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos);
+  }
+
+  for (unsigned J = 0; J < 16; J++) {
+    APInt ExtractValue = ConstValue.extractBits(8, J * 8);
+    if (ExtractValue != 0x00 && ExtractValue != 0xFF)
+      return false;
+    if (ExtractValue == 0xFF)
+      BMI.setBit(J);
+  }
+  return true;
+}
+
 // If this is a case we can't handle, return null and let the default
 // expansion code take care of it.  If we CAN select this case, and if it
 // selects to a single instruction, return Op.  Otherwise, if we can codegen
@@ -9591,6 +9622,24 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
 
+  if(Subtarget.hasP10Vector()) {
+    APInt BMI(32, 0);
+    // If the value of the vector is all zeros or all ones,
+    // we do not convert it to MTVSRBMI.
+    // The xxleqv instruction sets a vector with all ones.
+    // The xxlxor instruction sets a vector with all zeros.
+    if (isValidMtVsrbmi(BMI, *BVN) && BMI != 0 && BMI!=0xffff ) {
+      SDValue  SDConstant= DAG.getTargetConstant(BMI, dl, MVT::i32);
+      MachineSDNode* MSDNode = DAG.getMachineNode(PPC::MTVSRBMI, dl,MVT::v16i8, SDConstant);
+      SDValue  SDV = SDValue(MSDNode,0);
+      EVT DVT = BVN->getValueType(0);
+      EVT SVT = SDV.getValueType();
+      if (SVT != DVT ) {
+	SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
+      }
+      return SDV;
+    }
+  }
   // Check if this is a splat of a constant value.
   APInt APSplatBits, APSplatUndef;
   unsigned SplatBitSize;
diff --git a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
index 5486dc02faf90..232014db9a012 100644
--- a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
+++ b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
@@ -10,28 +10,13 @@
 ; RUN:   | FileCheck %s --check-prefix=CHECK
 
 define dso_local noundef range(i8 -1, 1) <16 x i8> @_Z5v00FFv() {
-; CHECK:      L..CPI0_0:
-; CHECK-NEXT:   .byte   255                             # 0xff
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
+; CHECK-NOT:      L..CPI0_0:
+; CHECK-NOT:   .byte   255                             # 0xff
+; CHECK-NOT:   .byte   0                               # 0x0
 
 ; CHECK-LABEL: _Z5v00FFv:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lwz r3, L..C0(r2) # %const.0
-; CHECK-NEXT:    lxv vs34, 0(r3)
+; CHECK-NEXT:    mtvsrbmi v2, 1
 ; CHECK-NEXT:    blr
 entry:
   ret <16 x i8> <i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>

github-actions · 2025-06-13T14:35:57Z

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:

git-clang-format --diff HEAD~1 HEAD --extensions cpp -- llvm/lib/Target/PowerPC/PPCISelLowering.cpp

View the diff from clang-format here.

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 5813e84c4..f359d3416 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9624,7 +9624,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
 
-  if(Subtarget.hasP10Vector()) {
+  if (Subtarget.hasP10Vector()) {
     APInt BitMask(32, 0);
     // If the value of the vector is all zeros or all ones,
     // we do not convert it to MTVSRBMI.
@@ -9632,11 +9632,12 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     // The xxlxor instruction sets a vector with all zeros.
     if (isValidMtVsrBmi(BitMask, *BVN) && BitMask != 0 && BitMask != 0xffff) {
       SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
-      MachineSDNode* MSDNode = DAG.getMachineNode(PPC::MTVSRBMI, dl,MVT::v16i8, SDConstant);
+      MachineSDNode *MSDNode =
+          DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
       SDValue SDV = SDValue(MSDNode, 0);
       EVT DVT = BVN->getValueType(0);
       EVT SVT = SDV.getValueType();
-      if (SVT != DVT ) {
+      if (SVT != DVT) {
         SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
       }
       return SDV;

lei137 · 2025-06-13T20:03:08Z