Skip to content

[RISCV] Handle .vx/.vi pseudos in hasAllNBitUsers #67419

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Sep 27, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
Original file line number Diff line number Diff line change
@@ -130,6 +130,118 @@ parseFeatureBits(bool IsRV64, const FeatureBitset &FeatureBits) {

} // namespace RISCVFeatures

bool RISCVII::vectorInstUsesNBitsOfScalarOp(uint16_t Opcode, unsigned Bits,
unsigned Log2SEW) {
// TODO: Handle Zvbb instructions
switch (Opcode) {
default:
return false;

// 11.6. Vector Single-Width Shift Instructions
case RISCV::VSLL_VX:
case RISCV::VSRL_VX:
case RISCV::VSRA_VX:
// 12.4. Vector Single-Width Scaling Shift Instructions
case RISCV::VSSRL_VX:
case RISCV::VSSRA_VX:
// Only the low lg2(SEW) bits of the shift-amount value are used.
return Log2SEW <= Bits;

// 11.7 Vector Narrowing Integer Right Shift Instructions
case RISCV::VNSRL_WX:
case RISCV::VNSRA_WX:
// 12.5. Vector Narrowing Fixed-Point Clip Instructions
case RISCV::VNCLIPU_WX:
case RISCV::VNCLIP_WX:
// Only the low lg2(2*SEW) bits of the shift-amount value are used.
return (Log2SEW + 1) <= Bits;

// 11.1. Vector Single-Width Integer Add and Subtract
case RISCV::VADD_VX:
case RISCV::VSUB_VX:
case RISCV::VRSUB_VX:
// 11.2. Vector Widening Integer Add/Subtract
case RISCV::VWADDU_VX:
case RISCV::VWSUBU_VX:
case RISCV::VWADD_VX:
case RISCV::VWSUB_VX:
case RISCV::VWADDU_WX:
case RISCV::VWSUBU_WX:
case RISCV::VWADD_WX:
case RISCV::VWSUB_WX:
// 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
case RISCV::VADC_VXM:
case RISCV::VADC_VIM:
case RISCV::VMADC_VXM:
case RISCV::VMADC_VIM:
case RISCV::VMADC_VX:
case RISCV::VSBC_VXM:
case RISCV::VMSBC_VXM:
case RISCV::VMSBC_VX:
// 11.5 Vector Bitwise Logical Instructions
case RISCV::VAND_VX:
case RISCV::VOR_VX:
case RISCV::VXOR_VX:
// 11.8. Vector Integer Compare Instructions
case RISCV::VMSEQ_VX:
case RISCV::VMSNE_VX:
case RISCV::VMSLTU_VX:
case RISCV::VMSLT_VX:
case RISCV::VMSLEU_VX:
case RISCV::VMSLE_VX:
case RISCV::VMSGTU_VX:
case RISCV::VMSGT_VX:
// 11.9. Vector Integer Min/Max Instructions
case RISCV::VMINU_VX:
case RISCV::VMIN_VX:
case RISCV::VMAXU_VX:
case RISCV::VMAX_VX:
// 11.10. Vector Single-Width Integer Multiply Instructions
case RISCV::VMUL_VX:
case RISCV::VMULH_VX:
case RISCV::VMULHU_VX:
case RISCV::VMULHSU_VX:
// 11.11. Vector Integer Divide Instructions
case RISCV::VDIVU_VX:
case RISCV::VDIV_VX:
case RISCV::VREMU_VX:
case RISCV::VREM_VX:
// 11.12. Vector Widening Integer Multiply Instructions
case RISCV::VWMUL_VX:
case RISCV::VWMULU_VX:
case RISCV::VWMULSU_VX:
// 11.13. Vector Single-Width Integer Multiply-Add Instructions
case RISCV::VMACC_VX:
case RISCV::VNMSAC_VX:
case RISCV::VMADD_VX:
case RISCV::VNMSUB_VX:
// 11.14. Vector Widening Integer Multiply-Add Instructions
case RISCV::VWMACCU_VX:
case RISCV::VWMACC_VX:
case RISCV::VWMACCSU_VX:
case RISCV::VWMACCUS_VX:
// 11.15. Vector Integer Merge Instructions
case RISCV::VMERGE_VXM:
// 11.16. Vector Integer Move Instructions
case RISCV::VMV_V_X:
// 12.1. Vector Single-Width Saturating Add and Subtract
case RISCV::VSADDU_VX:
case RISCV::VSADD_VX:
case RISCV::VSSUBU_VX:
case RISCV::VSSUB_VX:
// 12.2. Vector Single-Width Averaging Add and Subtract
case RISCV::VAADDU_VX:
case RISCV::VAADD_VX:
case RISCV::VASUBU_VX:
case RISCV::VASUB_VX:
// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
case RISCV::VSMUL_VX:
// 16.1. Integer Scalar Move Instructions
case RISCV::VMV_S_X:
return (1 << Log2SEW) <= Bits;
}
}

// Encode VTYPE into the binary format used by the the VSETVLI instruction which
// is used by our MC layer representation.
//
5 changes: 5 additions & 0 deletions llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
Original file line number Diff line number Diff line change
@@ -241,6 +241,11 @@ static inline bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc) {
Desc.getOperandConstraint(Desc.getNumDefs(), MCOI::TIED_TO) == 0;
}

// Returns true if the .vx vector instruction \p Opcode only uses the lower \p
// Bits for a given SEW.
bool vectorInstUsesNBitsOfScalarOp(uint16_t Opcode, unsigned Bits,
unsigned Log2SEW);

// RISC-V Specific Machine Operand Flags
enum {
MO_None = 0,
26 changes: 25 additions & 1 deletion llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
@@ -2782,8 +2782,32 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,

// TODO: Add more opcodes?
switch (User->getMachineOpcode()) {
default:
default: {
if (const RISCVVPseudosTable::PseudoInfo *PseudoInfo =
RISCVVPseudosTable::getPseudoInfo(User->getMachineOpcode())) {

const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
if (!RISCVII::hasSEWOp(MCID.TSFlags))
return false;
assert(RISCVII::hasVLOp(MCID.TSFlags));

bool HasGlueOp = User->getGluedNode() != nullptr;
unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
bool HasChainOp =
User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(MCID.TSFlags);
unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp -
HasGlueOp - 2;
const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);

if (UI.getOperandNo() == VLIdx)
return false;
if (RISCVII::vectorInstUsesNBitsOfScalarOp(PseudoInfo->BaseInstr, Bits,
Log2SEW))
break;
}
return false;
}
case RISCV::ADDW:
case RISCV::ADDIW:
case RISCV::SUBW:
17 changes: 16 additions & 1 deletion llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
Original file line number Diff line number Diff line change
@@ -106,8 +106,23 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI,
unsigned OpIdx = UserOp.getOperandNo();

switch (UserMI->getOpcode()) {
default:
default: {
if (const RISCVVPseudosTable::PseudoInfo *PseudoInfo =
RISCVVPseudosTable::getPseudoInfo(UserMI->getOpcode())) {
const MCInstrDesc &MCID = UserMI->getDesc();
if (!RISCVII::hasSEWOp(MCID.TSFlags))
return false;
assert(RISCVII::hasVLOp(MCID.TSFlags));
const unsigned Log2SEW =
UserMI->getOperand(RISCVII::getSEWOpNum(MCID)).getImm();
if (UserOp.getOperandNo() == RISCVII::getVLOpNum(MCID))
return false;
if (RISCVII::vectorInstUsesNBitsOfScalarOp(PseudoInfo->BaseInstr,
Bits, Log2SEW))
break;
}
return false;
}

case RISCV::ADDIW:
case RISCV::ADDW:
35 changes: 11 additions & 24 deletions llvm/test/CodeGen/RISCV/rvv/constant-folding.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s

; These tests check that the scalable-vector version of this series of
; instructions does not get into an infinite DAGCombine loop. This was
@@ -14,26 +12,15 @@
; a constant SPLAT_VECTOR didn't follow suit.

define <2 x i16> @fixedlen(<2 x i32> %x) {
; RV32-LABEL: fixedlen:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vsrl.vi v8, v8, 16
; RV32-NEXT: lui a0, 1048568
; RV32-NEXT: vand.vx v8, v8, a0
; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; RV32-NEXT: vnsrl.wi v8, v8, 0
; RV32-NEXT: ret
;
; RV64-LABEL: fixedlen:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV64-NEXT: vsrl.vi v8, v8, 16
; RV64-NEXT: lui a0, 131071
; RV64-NEXT: slli a0, a0, 3
; RV64-NEXT: vand.vx v8, v8, a0
; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; RV64-NEXT: vnsrl.wi v8, v8, 0
; RV64-NEXT: ret
; CHECK-LABEL: fixedlen:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vsrl.vi v8, v8, 16
; CHECK-NEXT: lui a0, 1048568
; CHECK-NEXT: vand.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: ret
%v41 = insertelement <2 x i32> poison, i32 16, i32 0
%v42 = shufflevector <2 x i32> %v41, <2 x i32> poison, <2 x i32> zeroinitializer
%v43 = lshr <2 x i32> %x, %v42
94 changes: 31 additions & 63 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
Original file line number Diff line number Diff line change
@@ -1,71 +1,39 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh | FileCheck %s -check-prefixes=CHECK,RV32
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh | FileCheck %s -check-prefixes=CHECK,RV64
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh | FileCheck %s
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh | FileCheck %s

; Integers

define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) {
; RV32-LABEL: vector_deinterleave_load_v16i1_v32i1:
; RV32: # %bb.0:
; RV32-NEXT: li a1, 32
; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; RV32-NEXT: vlm.v v0, (a0)
; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-NEXT: vmv.v.i v8, 0
; RV32-NEXT: vmerge.vim v10, v8, 1, v0
; RV32-NEXT: vid.v v9
; RV32-NEXT: vadd.vv v11, v9, v9
; RV32-NEXT: vrgather.vv v9, v10, v11
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-NEXT: vmerge.vim v8, v8, 1, v0
; RV32-NEXT: vadd.vi v12, v11, -16
; RV32-NEXT: lui a0, 16
; RV32-NEXT: addi a0, a0, -256
; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; RV32-NEXT: vmv.s.x v0, a0
; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; RV32-NEXT: vrgather.vv v9, v8, v12, v0.t
; RV32-NEXT: vmsne.vi v9, v9, 0
; RV32-NEXT: vadd.vi v12, v11, 1
; RV32-NEXT: vrgather.vv v13, v10, v12
; RV32-NEXT: vadd.vi v10, v11, -15
; RV32-NEXT: vrgather.vv v13, v8, v10, v0.t
; RV32-NEXT: vmsne.vi v8, v13, 0
; RV32-NEXT: vmv.v.v v0, v9
; RV32-NEXT: ret
;
; RV64-LABEL: vector_deinterleave_load_v16i1_v32i1:
; RV64: # %bb.0:
; RV64-NEXT: li a1, 32
; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; RV64-NEXT: vlm.v v0, (a0)
; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64-NEXT: vmv.v.i v8, 0
; RV64-NEXT: vmerge.vim v10, v8, 1, v0
; RV64-NEXT: vid.v v9
; RV64-NEXT: vadd.vv v11, v9, v9
; RV64-NEXT: vrgather.vv v9, v10, v11
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64-NEXT: vslidedown.vi v0, v0, 2
; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-NEXT: vadd.vi v12, v11, -16
; RV64-NEXT: lui a0, 16
; RV64-NEXT: addiw a0, a0, -256
; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; RV64-NEXT: vmv.s.x v0, a0
; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; RV64-NEXT: vrgather.vv v9, v8, v12, v0.t
; RV64-NEXT: vmsne.vi v9, v9, 0
; RV64-NEXT: vadd.vi v12, v11, 1
; RV64-NEXT: vrgather.vv v13, v10, v12
; RV64-NEXT: vadd.vi v10, v11, -15
; RV64-NEXT: vrgather.vv v13, v8, v10, v0.t
; RV64-NEXT: vmsne.vi v8, v13, 0
; RV64-NEXT: vmv.v.v v0, v9
; RV64-NEXT: ret
; CHECK-LABEL: vector_deinterleave_load_v16i1_v32i1:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vlm.v v0, (a0)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v10, v8, 1, v0
; CHECK-NEXT: vid.v v9
; CHECK-NEXT: vadd.vv v11, v9, v9
; CHECK-NEXT: vrgather.vv v9, v10, v11
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vadd.vi v12, v11, -16
; CHECK-NEXT: li a0, -256
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t
; CHECK-NEXT: vmsne.vi v9, v9, 0
; CHECK-NEXT: vadd.vi v12, v11, 1
; CHECK-NEXT: vrgather.vv v13, v10, v12
; CHECK-NEXT: vadd.vi v10, v11, -15
; CHECK-NEXT: vrgather.vv v13, v8, v10, v0.t
; CHECK-NEXT: vmsne.vi v8, v13, 0
; CHECK-NEXT: vmv.v.v v0, v9
; CHECK-NEXT: ret
%vec = load <32 x i1>, ptr %p
%retval = call {<16 x i1>, <16 x i1>} @llvm.experimental.vector.deinterleave2.v32i1(<32 x i1> %vec)
ret {<16 x i1>, <16 x i1>} %retval
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
Original file line number Diff line number Diff line change
@@ -585,8 +585,7 @@ define signext i32 @vpreduce_umax_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %
;
; RV64-LABEL: vpreduce_umax_v2i32:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: srli a0, a0, 32
; RV64-NEXT: andi a0, a0, -1
; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
@@ -626,8 +625,7 @@ define signext i32 @vpreduce_umin_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %
;
; RV64-LABEL: vpreduce_umin_v2i32:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: srli a0, a0, 32
; RV64-NEXT: andi a0, a0, -1
; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
@@ -727,8 +725,7 @@ define signext i32 @vpreduce_umax_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %
;
; RV64-LABEL: vpreduce_umax_v4i32:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: srli a0, a0, 32
; RV64-NEXT: andi a0, a0, -1
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
@@ -768,8 +765,7 @@ define signext i32 @vpreduce_umin_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %
;
; RV64-LABEL: vpreduce_umin_v4i32:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: srli a0, a0, 32
; RV64-NEXT: andi a0, a0, -1
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
Loading