llvm · abhishek-kaushik22 · Jan 28, 2025 · Jan 25, 2025 · Jan 27, 2025 · Jan 27, 2025
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -7040,8 +7040,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
   SDValue SubVec = N->getOperand(1);
   SDValue InVec = N->getOperand(0);
 
-  if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector)
-    SubVec = GetWidenedVector(SubVec);
+  SDValue OrigSubVec;
+  if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector) {
+    OrigSubVec = std::move(SubVec);
+    SubVec = GetWidenedVector(OrigSubVec);
+  }
 
   EVT SubVT = SubVec.getValueType();
 
@@ -7070,8 +7073,24 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, InVec, SubVec,
                        N->getOperand(2));
 
-  report_fatal_error("Don't know how to widen the operands for "
-                     "INSERT_SUBVECTOR");
+  // If the operands can't be widened legally, just replace the INSERT_SUBVECTOR
+  // with a series of INSERT_VECTOR_ELT
+  EVT OrigVT = OrigSubVec.getValueType();
+  unsigned Idx = N->getConstantOperandVal(2);
+
+  SDValue InsertElt;
+  SDLoc DL(N);
+  EVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+  for (unsigned I = 0; I < OrigVT.getVectorNumElements(); ++I) {
+    SDValue ExtractElt =
+        DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT.getScalarType(), SubVec,
+                    DAG.getConstant(I, DL, VectorIdxTy));
+    InsertElt =
+        DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, I != 0 ? InsertElt : InVec,
+                    ExtractElt, DAG.getConstant(I + Idx, DL, VectorIdxTy));
+  }
+
+  return InsertElt;
 }
 
 SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {

diff --git a/llvm/test/CodeGen/X86/pr124255.ll b/llvm/test/CodeGen/X86/pr124255.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
+
+define <4 x i32> @insert_i32_v2_in_v4_at_0(<4 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: insert_i32_v2_in_v4_at_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; CHECK-NEXT:    retq
+  %result = tail call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 0)
+  ret <4 x i32> %result
+}
+
+define <4 x i32> @insert_i32_v2_in_v4_at_2(<4 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: insert_i32_v2_in_v4_at_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    retq
+  %result = tail call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 2)
+  ret <4 x i32> %result
+}
+
+define <4 x float> @insert_f32_v2_in_v4_at_0(<4 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: insert_f32_v2_in_v4_at_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; CHECK-NEXT:    retq
+  %result = tail call <4 x float> @llvm.vector.insert.v4float.v2float(<4 x float> %a, <2 x float> %b, i64 0)
+  ret <4 x float> %result
+}
+
+define <8 x i32> @insert_i32_v2_in_v8_at_0(<8 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: insert_i32_v2_in_v8_at_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; CHECK-NEXT:    retq
+  %result = tail call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %a, <2 x i32> %b, i64 0)
+  ret <8 x i32> %result
+}
+
+define <8 x i32> @insert_i32_v2_in_v8_at_6(<8 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: insert_i32_v2_in_v8_at_6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; CHECK-NEXT:    retq
+  %result = tail call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %a, <2 x i32> %b, i64 6)
+  ret <8 x i32> %result
+}