Skip to content

Commit 015aed1

Browse files
[SelectionDAG] WidenVecOp_INSERT_SUBVECTOR - Replace INSERT_SUBVECTOR with series of INSERT_VECTOR_ELT (#124420)
If the operands to `INSERT_SUBVECTOR` can't be widened legally, just replace the `INSERT_SUBVECTOR` with a series of `INSERT_VECTOR_ELT`. Closes #124255 (and possibly #102016)
1 parent 606cf88 commit 015aed1

File tree

2 files changed

+71
-3
lines changed

2 files changed

+71
-3
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7040,6 +7040,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
70407040
SDValue SubVec = N->getOperand(1);
70417041
SDValue InVec = N->getOperand(0);
70427042

7043+
EVT OrigVT = SubVec.getValueType();
70437044
if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector)
70447045
SubVec = GetWidenedVector(SubVec);
70457046

@@ -7064,14 +7065,34 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
70647065
}
70657066
}
70667067

7068+
SDLoc DL(N);
7069+
70677070
// We need to make sure that the indices are still valid, otherwise we might
70687071
// widen what was previously well-defined to something undefined.
70697072
if (IndicesValid && InVec.isUndef() && N->getConstantOperandVal(2) == 0)
7070-
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, InVec, SubVec,
7073+
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, InVec, SubVec,
70717074
N->getOperand(2));
70727075

7073-
report_fatal_error("Don't know how to widen the operands for "
7074-
"INSERT_SUBVECTOR");
7076+
if (!IndicesValid || OrigVT.isScalableVector())
7077+
report_fatal_error(
7078+
"Don't know how to widen the operands for INSERT_SUBVECTOR");
7079+
7080+
// If the operands can't be widened legally, just replace the INSERT_SUBVECTOR
7081+
// with a series of INSERT_VECTOR_ELT
7082+
unsigned Idx = N->getConstantOperandVal(2);
7083+
7084+
SDValue InsertElt = InVec;
7085+
EVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
7086+
for (unsigned I = 0, E = OrigVT.getVectorNumElements(); I != E; ++I) {
7087+
SDValue ExtractElt =
7088+
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT.getVectorElementType(),
7089+
SubVec, DAG.getConstant(I, DL, VectorIdxTy));
7090+
InsertElt =
7091+
DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, InsertElt, ExtractElt,
7092+
DAG.getConstant(I + Idx, DL, VectorIdxTy));
7093+
}
7094+
7095+
return InsertElt;
70757096
}
70767097

70777098
SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {

llvm/test/CodeGen/X86/pr124255.ll

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
3+
4+
define <4 x i32> @insert_v2i32_in_v4i32_at_0(<4 x i32> %a, <2 x i32> %b) {
5+
; CHECK-LABEL: insert_v2i32_in_v4i32_at_0:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
8+
; CHECK-NEXT: retq
9+
%result = tail call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 0)
10+
ret <4 x i32> %result
11+
}
12+
13+
define <4 x i32> @insert_v2i32_in_v4i32_at_2(<4 x i32> %a, <2 x i32> %b) {
14+
; CHECK-LABEL: insert_v2i32_in_v4i32_at_2:
15+
; CHECK: # %bb.0:
16+
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
17+
; CHECK-NEXT: retq
18+
%result = tail call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 2)
19+
ret <4 x i32> %result
20+
}
21+
22+
define <4 x float> @insert_v2f32_in_v4f32_at_0(<4 x float> %a, <2 x float> %b) {
23+
; CHECK-LABEL: insert_v2f32_in_v4f32_at_0:
24+
; CHECK: # %bb.0:
25+
; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
26+
; CHECK-NEXT: retq
27+
%result = tail call <4 x float> @llvm.vector.insert.v4float.v2float(<4 x float> %a, <2 x float> %b, i64 0)
28+
ret <4 x float> %result
29+
}
30+
31+
define <8 x i32> @insert_v2i32_in_v8i32_at_0(<8 x i32> %a, <2 x i32> %b) {
32+
; CHECK-LABEL: insert_v2i32_in_v8i32_at_0:
33+
; CHECK: # %bb.0:
34+
; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
35+
; CHECK-NEXT: retq
36+
%result = tail call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %a, <2 x i32> %b, i64 0)
37+
ret <8 x i32> %result
38+
}
39+
40+
define <8 x i32> @insert_v2i32_in_v8i32_at_6(<8 x i32> %a, <2 x i32> %b) {
41+
; CHECK-LABEL: insert_v2i32_in_v8i32_at_6:
42+
; CHECK: # %bb.0:
43+
; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
44+
; CHECK-NEXT: retq
45+
%result = tail call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %a, <2 x i32> %b, i64 6)
46+
ret <8 x i32> %result
47+
}

0 commit comments

Comments
 (0)