diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index f39d9ca15496a..1000235ab4061 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -7040,6 +7040,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { SDValue SubVec = N->getOperand(1); SDValue InVec = N->getOperand(0); + EVT OrigVT = SubVec.getValueType(); if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector) SubVec = GetWidenedVector(SubVec); @@ -7064,14 +7065,34 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { } } + SDLoc DL(N); + // We need to make sure that the indices are still valid, otherwise we might // widen what was previously well-defined to something undefined. if (IndicesValid && InVec.isUndef() && N->getConstantOperandVal(2) == 0) - return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, InVec, SubVec, + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, InVec, SubVec, N->getOperand(2)); - report_fatal_error("Don't know how to widen the operands for " - "INSERT_SUBVECTOR"); + if (!IndicesValid || OrigVT.isScalableVector()) + report_fatal_error( + "Don't know how to widen the operands for INSERT_SUBVECTOR"); + + // If the operands can't be widened legally, just replace the INSERT_SUBVECTOR + // with a series of INSERT_VECTOR_ELT + unsigned Idx = N->getConstantOperandVal(2); + + SDValue InsertElt = InVec; + EVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + for (unsigned I = 0, E = OrigVT.getVectorNumElements(); I != E; ++I) { + SDValue ExtractElt = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT.getVectorElementType(), + SubVec, DAG.getConstant(I, DL, VectorIdxTy)); + InsertElt = + DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, InsertElt, ExtractElt, + DAG.getConstant(I + Idx, DL, VectorIdxTy)); + } + + return InsertElt; } SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { diff --git a/llvm/test/CodeGen/X86/pr124255.ll b/llvm/test/CodeGen/X86/pr124255.ll new file mode 100644 index 0000000000000..da502442baddb --- /dev/null +++ b/llvm/test/CodeGen/X86/pr124255.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s + +define <4 x i32> @insert_v2i32_in_v4i32_at_0(<4 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: insert_v2i32_in_v4i32_at_0: +; CHECK: # %bb.0: +; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; CHECK-NEXT: retq + %result = tail call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 0) + ret <4 x i32> %result +} + +define <4 x i32> @insert_v2i32_in_v4i32_at_2(<4 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: insert_v2i32_in_v4i32_at_2: +; CHECK: # %bb.0: +; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: retq + %result = tail call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 2) + ret <4 x i32> %result +} + +define <4 x float> @insert_v2f32_in_v4f32_at_0(<4 x float> %a, <2 x float> %b) { +; CHECK-LABEL: insert_v2f32_in_v4f32_at_0: +; CHECK: # %bb.0: +; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; CHECK-NEXT: retq + %result = tail call <4 x float> @llvm.vector.insert.v4float.v2float(<4 x float> %a, <2 x float> %b, i64 0) + ret <4 x float> %result +} + +define <8 x i32> @insert_v2i32_in_v8i32_at_0(<8 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: insert_v2i32_in_v8i32_at_0: +; CHECK: # %bb.0: +; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] +; CHECK-NEXT: retq + %result = tail call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %a, <2 x i32> %b, i64 0) + ret <8 x i32> %result +} + +define <8 x i32> @insert_v2i32_in_v8i32_at_6(<8 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: insert_v2i32_in_v8i32_at_6: +; CHECK: # %bb.0: +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: retq + %result = tail call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %a, <2 x i32> %b, i64 6) + ret <8 x i32> %result +}