Skip to content

Commit 0803dba

Browse files
committed
[RISCV] Add fixed-length vector instrinsics for segment load
Inspired by reviews.llvm.org/D107790. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D119834
1 parent d537075 commit 0803dba

File tree

3 files changed

+193
-0
lines changed

3 files changed

+193
-0
lines changed

llvm/include/llvm/IR/IntrinsicsRISCV.td

+10
Original file line numberDiff line numberDiff line change
@@ -1499,6 +1499,16 @@ let TargetPrefix = "riscv" in {
14991499
[llvm_anyvector_ty, llvm_anyptr_ty,
15001500
llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
15011501
[NoCapture<ArgIndex<1>>, IntrWriteMem]>;
1502+
1503+
// Segment loads for fixed vectors.
1504+
foreach nf = [2, 3, 4, 5, 6, 7, 8] in {
1505+
def int_riscv_seg # nf # _load
1506+
: Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
1507+
!add(nf, -1))),
1508+
[llvm_anyptr_ty, llvm_anyint_ty],
1509+
[NoCapture<ArgIndex<0>>, IntrReadMem]>;
1510+
}
1511+
15021512
} // TargetPrefix = "riscv"
15031513

15041514
//===----------------------------------------------------------------------===//

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+54
Original file line numberDiff line numberDiff line change
@@ -1126,6 +1126,24 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
11261126
Info.size = MemoryLocation::UnknownSize;
11271127
Info.flags |= MachineMemOperand::MOStore;
11281128
return true;
1129+
case Intrinsic::riscv_seg2_load:
1130+
case Intrinsic::riscv_seg3_load:
1131+
case Intrinsic::riscv_seg4_load:
1132+
case Intrinsic::riscv_seg5_load:
1133+
case Intrinsic::riscv_seg6_load:
1134+
case Intrinsic::riscv_seg7_load:
1135+
case Intrinsic::riscv_seg8_load:
1136+
Info.opc = ISD::INTRINSIC_W_CHAIN;
1137+
Info.ptrVal = I.getArgOperand(0);
1138+
Info.memVT =
1139+
getValueType(DL, I.getType()->getStructElementType(0)->getScalarType());
1140+
Info.align =
1141+
Align(DL.getTypeSizeInBits(
1142+
I.getType()->getStructElementType(0)->getScalarType()) /
1143+
8);
1144+
Info.size = MemoryLocation::UnknownSize;
1145+
Info.flags |= MachineMemOperand::MOLoad;
1146+
return true;
11291147
}
11301148
}
11311149

@@ -4878,6 +4896,42 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
48784896
Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
48794897
return DAG.getMergeValues({Result, Chain}, DL);
48804898
}
4899+
case Intrinsic::riscv_seg2_load:
4900+
case Intrinsic::riscv_seg3_load:
4901+
case Intrinsic::riscv_seg4_load:
4902+
case Intrinsic::riscv_seg5_load:
4903+
case Intrinsic::riscv_seg6_load:
4904+
case Intrinsic::riscv_seg7_load:
4905+
case Intrinsic::riscv_seg8_load: {
4906+
SDLoc DL(Op);
4907+
static const Intrinsic::ID VlsegInts[7] = {
4908+
Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
4909+
Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
4910+
Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
4911+
Intrinsic::riscv_vlseg8};
4912+
unsigned NF = Op->getNumValues() - 1;
4913+
assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
4914+
MVT XLenVT = Subtarget.getXLenVT();
4915+
MVT VT = Op->getSimpleValueType(0);
4916+
MVT ContainerVT = getContainerForFixedLengthVector(VT);
4917+
4918+
SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
4919+
SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
4920+
auto *Load = cast<MemIntrinsicSDNode>(Op);
4921+
SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
4922+
ContainerVTs.push_back(MVT::Other);
4923+
SDVTList VTs = DAG.getVTList(ContainerVTs);
4924+
SDValue Result =
4925+
DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
4926+
{Load->getChain(), IntID, Op.getOperand(2), VL},
4927+
Load->getMemoryVT(), Load->getMemOperand());
4928+
SmallVector<SDValue, 9> Results;
4929+
for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
4930+
Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
4931+
DAG, Subtarget));
4932+
Results.push_back(Result.getValue(NF));
4933+
return DAG.getMergeValues(Results, DL);
4934+
}
48814935
}
48824936

48834937
return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple riscv64 -mattr=+zve64x -riscv-v-vector-bits-min=128 < %s \
3+
; RUN: | FileCheck %s
4+
5+
define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
6+
; CHECK-LABEL: load_factor2:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
9+
; CHECK-NEXT: vlseg2e8.v v7, (a0)
10+
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v7_v8
11+
; CHECK-NEXT: ret
12+
%1 = bitcast <16 x i8>* %ptr to i8*
13+
%2 = call { <8 x i8>, <8 x i8> } @llvm.riscv.seg2.load.v8i8.p0i8.i64(i8* %1, i64 8)
14+
%3 = extractvalue { <8 x i8>, <8 x i8> } %2, 0
15+
%4 = extractvalue { <8 x i8>, <8 x i8> } %2, 1
16+
ret <8 x i8> %4
17+
}
18+
19+
define <8 x i8> @load_factor3(<24 x i8>* %ptr) {
20+
; CHECK-LABEL: load_factor3:
21+
; CHECK: # %bb.0:
22+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
23+
; CHECK-NEXT: vlseg3e8.v v6, (a0)
24+
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v6_v7_v8
25+
; CHECK-NEXT: ret
26+
%1 = bitcast <24 x i8>* %ptr to i8*
27+
%2 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg3.load.v8i8.p0i8.i64(i8* %1, i64 8)
28+
%3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %2, 0
29+
%4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %2, 1
30+
%5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %2, 2
31+
ret <8 x i8> %5
32+
}
33+
34+
define <8 x i8> @load_factor4(<32 x i8>* %ptr) {
35+
; CHECK-LABEL: load_factor4:
36+
; CHECK: # %bb.0:
37+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
38+
; CHECK-NEXT: vlseg4e8.v v5, (a0)
39+
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v5_v6_v7_v8
40+
; CHECK-NEXT: ret
41+
%1 = bitcast <32 x i8>* %ptr to i8*
42+
%2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg4.load.v8i8.p0i8.i64(i8* %1, i64 8)
43+
%3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 0
44+
%4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 1
45+
%5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 2
46+
%6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 3
47+
ret <8 x i8> %6
48+
}
49+
50+
define <8 x i8> @load_factor5(<40 x i8>* %ptr) {
51+
; CHECK-LABEL: load_factor5:
52+
; CHECK: # %bb.0:
53+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
54+
; CHECK-NEXT: vlseg5e8.v v4, (a0)
55+
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v4_v5_v6_v7_v8
56+
; CHECK-NEXT: ret
57+
%1 = bitcast <40 x i8>* %ptr to i8*
58+
%2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg5.load.v8i8.p0i8.i64(i8* %1, i64 8)
59+
%3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 0
60+
%4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 1
61+
%5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 2
62+
%6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 3
63+
%7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 4
64+
ret <8 x i8> %7
65+
}
66+
67+
define <8 x i8> @load_factor6(<48 x i8>* %ptr) {
68+
; CHECK-LABEL: load_factor6:
69+
; CHECK: # %bb.0:
70+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
71+
; CHECK-NEXT: vlseg6e8.v v3, (a0)
72+
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v3_v4_v5_v6_v7_v8
73+
; CHECK-NEXT: ret
74+
%1 = bitcast <48 x i8>* %ptr to i8*
75+
%2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg6.load.v8i8.p0i8.i64(i8* %1, i64 8)
76+
%3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 0
77+
%4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 1
78+
%5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 2
79+
%6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 3
80+
%7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 4
81+
%8 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 5
82+
ret <8 x i8> %8
83+
}
84+
85+
define <8 x i8> @load_factor7(<56 x i8>* %ptr) {
86+
; CHECK-LABEL: load_factor7:
87+
; CHECK: # %bb.0:
88+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
89+
; CHECK-NEXT: vlseg7e8.v v2, (a0)
90+
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v2_v3_v4_v5_v6_v7_v8
91+
; CHECK-NEXT: ret
92+
%1 = bitcast <56 x i8>* %ptr to i8*
93+
%2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg7.load.v8i8.p0i8.i64(i8* %1, i64 8)
94+
%3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 0
95+
%4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 1
96+
%5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 2
97+
%6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 3
98+
%7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 4
99+
%8 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 5
100+
%9 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 6
101+
ret <8 x i8> %9
102+
}
103+
104+
define <8 x i8> @load_factor8(<64 x i8>* %ptr) {
105+
; CHECK-LABEL: load_factor8:
106+
; CHECK: # %bb.0:
107+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
108+
; CHECK-NEXT: vlseg8e8.v v1, (a0)
109+
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v1_v2_v3_v4_v5_v6_v7_v8
110+
; CHECK-NEXT: ret
111+
%1 = bitcast <64 x i8>* %ptr to i8*
112+
%2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg8.load.v8i8.p0i8.i64(i8* %1, i64 8)
113+
%3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 0
114+
%4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 1
115+
%5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 2
116+
%6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 3
117+
%7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 4
118+
%8 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 5
119+
%9 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 6
120+
%10 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 7
121+
ret <8 x i8> %10
122+
}
123+
declare { <8 x i8>, <8 x i8> } @llvm.riscv.seg2.load.v8i8.p0i8.i64(i8*, i64)
124+
declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg3.load.v8i8.p0i8.i64(i8*, i64)
125+
declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg4.load.v8i8.p0i8.i64(i8*, i64)
126+
declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg5.load.v8i8.p0i8.i64(i8*, i64)
127+
declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg6.load.v8i8.p0i8.i64(i8*, i64)
128+
declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg7.load.v8i8.p0i8.i64(i8*, i64)
129+
declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg8.load.v8i8.p0i8.i64(i8*, i64)

0 commit comments

Comments
 (0)