From 085cc302bc4dcc86414ae200a59ac3e2b6b43b98 Mon Sep 17 00:00:00 2001 From: abhishek-kaushik22 Date: Sat, 25 Jan 2025 22:57:25 +0530 Subject: [PATCH 1/6] [SelectionDAG] Replace `INSERT_SUBVECTOR` with series of `INSERT_VECTOR_ELT` If the operands to `INSERT_SUBVECTOR` can't be widened legally, just replace the `INSERT_SUBVECTOR` with a series of `INSERT_VECTOR_ELT`. Closes #124255 (and possibly #102016) --- .../SelectionDAG/LegalizeVectorTypes.cpp | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index f39d9ca15496a..81cf1afe746e8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -7040,8 +7040,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { SDValue SubVec = N->getOperand(1); SDValue InVec = N->getOperand(0); - if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector) - SubVec = GetWidenedVector(SubVec); + SDValue OrigSubVec; + if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector) { + OrigSubVec = std::move(SubVec); + SubVec = GetWidenedVector(OrigSubVec); + } EVT SubVT = SubVec.getValueType(); @@ -7070,8 +7073,23 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, InVec, SubVec, N->getOperand(2)); - report_fatal_error("Don't know how to widen the operands for " - "INSERT_SUBVECTOR"); + // If the operands can't be widened legally, just replace the INSERT_SUBVECTOR + // with a series of INSERT_VECTOR_ELT + EVT OrigVT = OrigSubVec.getValueType(); + unsigned Idx = N->getConstantOperandVal(2); + + SDValue InsertVecElt; + SDLoc DL(N); + for (unsigned I = 0; I < OrigVT.getVectorNumElements(); ++I) { + SDValue Extract = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT.getScalarType(), SubVec, + DAG.getIntPtrConstant(I, DL, /*isTarget*/ true)); + InsertVecElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, + I != 0 ? InsertVecElt : InVec, Extract, + DAG.getIntPtrConstant(I + Idx, DL, true)); + } + + return InsertVecElt; } SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { From 0a5a2e31c3ab548134ee8b480092fcc754241545 Mon Sep 17 00:00:00 2001 From: abhishek-kaushik22 Date: Mon, 27 Jan 2025 13:40:10 +0530 Subject: [PATCH 2/6] Use `getVectorIdxConstant` --- .../CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 81cf1afe746e8..30cb86a8ed092 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -7078,18 +7078,18 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { EVT OrigVT = OrigSubVec.getValueType(); unsigned Idx = N->getConstantOperandVal(2); - SDValue InsertVecElt; + SDValue InsertElt; SDLoc DL(N); for (unsigned I = 0; I < OrigVT.getVectorNumElements(); ++I) { - SDValue Extract = + SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT.getScalarType(), SubVec, - DAG.getIntPtrConstant(I, DL, /*isTarget*/ true)); - InsertVecElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, - I != 0 ? InsertVecElt : InVec, Extract, - DAG.getIntPtrConstant(I + Idx, DL, true)); + DAG.getVectorIdxConstant(I, DL)); + InsertElt = + DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, I != 0 ? InsertElt : InVec, + ExtractElt, DAG.getVectorIdxConstant(I + Idx, DL)); } - return InsertVecElt; + return InsertElt; } SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { From 1cb71fab7949dca9609697dcc88ccb4483fba24f Mon Sep 17 00:00:00 2001 From: abhishek-kaushik22 Date: Mon, 27 Jan 2025 13:41:12 +0530 Subject: [PATCH 3/6] Add tests --- llvm/test/CodeGen/X86/pr124255.ll | 47 +++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 llvm/test/CodeGen/X86/pr124255.ll diff --git a/llvm/test/CodeGen/X86/pr124255.ll b/llvm/test/CodeGen/X86/pr124255.ll new file mode 100644 index 0000000000000..a472c20e4d53c --- /dev/null +++ b/llvm/test/CodeGen/X86/pr124255.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s + +define <4 x i32> @insert_i32_v2_in_v4_at_0(<4 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: insert_i32_v2_in_v4_at_0: +; CHECK: # %bb.0: +; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; CHECK-NEXT: retq + %result = tail call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 0) + ret <4 x i32> %result +} + +define <4 x i32> @insert_i32_v2_in_v4_at_2(<4 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: insert_i32_v2_in_v4_at_2: +; CHECK: # %bb.0: +; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: retq + %result = tail call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 2) + ret <4 x i32> %result +} + +define <4 x float> @insert_f32_v2_in_v4_at_0(<4 x float> %a, <2 x float> %b) { +; CHECK-LABEL: insert_f32_v2_in_v4_at_0: +; CHECK: # %bb.0: +; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; CHECK-NEXT: retq + %result = tail call <4 x float> @llvm.vector.insert.v4float.v2float(<4 x float> %a, <2 x float> %b, i64 0) + ret <4 x float> %result +} + +define <8 x i32> @insert_i32_v2_in_v8_at_0(<8 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: insert_i32_v2_in_v8_at_0: +; CHECK: # %bb.0: +; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] +; CHECK-NEXT: retq + %result = tail call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %a, <2 x i32> %b, i64 0) + ret <8 x i32> %result +} + +define <8 x i32> @insert_i32_v2_in_v8_at_6(<8 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: insert_i32_v2_in_v8_at_6: +; CHECK: # %bb.0: +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: retq + %result = tail call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %a, <2 x i32> %b, i64 6) + ret <8 x i32> %result +} From 275878cc403a951aafa87ccf68c0b2a8ead99f6e Mon Sep 17 00:00:00 2001 From: abhishek-kaushik22 Date: Mon, 27 Jan 2025 14:13:28 +0530 Subject: [PATCH 4/6] Update LegalizeVectorTypes.cpp --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 30cb86a8ed092..fac3757b96bc2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -7080,13 +7080,14 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { SDValue InsertElt; SDLoc DL(N); + EVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); for (unsigned I = 0; I < OrigVT.getVectorNumElements(); ++I) { SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT.getScalarType(), SubVec, - DAG.getVectorIdxConstant(I, DL)); + DAG.getConstant(I, DL, VectorIdxTy)); InsertElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, I != 0 ? InsertElt : InVec, - ExtractElt, DAG.getVectorIdxConstant(I + Idx, DL)); + ExtractElt, DAG.getConstant(I + Idx, DL, VectorIdxTy)); } return InsertElt; From f3f6e571efbdeba379a361f4514264ef60dcb1c6 Mon Sep 17 00:00:00 2001 From: abhishek-kaushik22 Date: Mon, 27 Jan 2025 20:57:19 +0530 Subject: [PATCH 5/6] Update LegalizeVectorTypes.cpp --- .../CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index fac3757b96bc2..b5d3c40e76470 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -7040,10 +7040,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { SDValue SubVec = N->getOperand(1); SDValue InVec = N->getOperand(0); - SDValue OrigSubVec; + EVT OrigVT; if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector) { - OrigSubVec = std::move(SubVec); - SubVec = GetWidenedVector(OrigSubVec); + OrigVT = SubVec.getValueType(); + SubVec = GetWidenedVector(SubVec); } EVT SubVT = SubVec.getValueType(); @@ -7075,10 +7075,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { // If the operands can't be widened legally, just replace the INSERT_SUBVECTOR // with a series of INSERT_VECTOR_ELT - EVT OrigVT = OrigSubVec.getValueType(); unsigned Idx = N->getConstantOperandVal(2); - SDValue InsertElt; + SDValue InsertElt = InVec; SDLoc DL(N); EVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); for (unsigned I = 0; I < OrigVT.getVectorNumElements(); ++I) { @@ -7086,8 +7085,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT.getScalarType(), SubVec, DAG.getConstant(I, DL, VectorIdxTy)); InsertElt = - DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, I != 0 ? InsertElt : InVec, - ExtractElt, DAG.getConstant(I + Idx, DL, VectorIdxTy)); + DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, InsertElt, ExtractElt, + DAG.getConstant(I + Idx, DL, VectorIdxTy)); } return InsertElt; From 53247085144a0826dce79e4696640fed2973debc Mon Sep 17 00:00:00 2001 From: abhishek-kaushik22 Date: Tue, 28 Jan 2025 13:07:11 +0530 Subject: [PATCH 6/6] Address review comments --- .../SelectionDAG/LegalizeVectorTypes.cpp | 21 +++++++++++-------- llvm/test/CodeGen/X86/pr124255.ll | 20 +++++++++--------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index b5d3c40e76470..1000235ab4061 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -7040,11 +7040,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { SDValue SubVec = N->getOperand(1); SDValue InVec = N->getOperand(0); - EVT OrigVT; - if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector) { - OrigVT = SubVec.getValueType(); + EVT OrigVT = SubVec.getValueType(); + if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector) SubVec = GetWidenedVector(SubVec); - } EVT SubVT = SubVec.getValueType(); @@ -7067,23 +7065,28 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { } } + SDLoc DL(N); + // We need to make sure that the indices are still valid, otherwise we might // widen what was previously well-defined to something undefined. if (IndicesValid && InVec.isUndef() && N->getConstantOperandVal(2) == 0) - return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, InVec, SubVec, + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, InVec, SubVec, N->getOperand(2)); + if (!IndicesValid || OrigVT.isScalableVector()) + report_fatal_error( + "Don't know how to widen the operands for INSERT_SUBVECTOR"); + // If the operands can't be widened legally, just replace the INSERT_SUBVECTOR // with a series of INSERT_VECTOR_ELT unsigned Idx = N->getConstantOperandVal(2); SDValue InsertElt = InVec; - SDLoc DL(N); EVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); - for (unsigned I = 0; I < OrigVT.getVectorNumElements(); ++I) { + for (unsigned I = 0, E = OrigVT.getVectorNumElements(); I != E; ++I) { SDValue ExtractElt = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT.getScalarType(), SubVec, - DAG.getConstant(I, DL, VectorIdxTy)); + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT.getVectorElementType(), + SubVec, DAG.getConstant(I, DL, VectorIdxTy)); InsertElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, InsertElt, ExtractElt, DAG.getConstant(I + Idx, DL, VectorIdxTy)); diff --git a/llvm/test/CodeGen/X86/pr124255.ll b/llvm/test/CodeGen/X86/pr124255.ll index a472c20e4d53c..da502442baddb 100644 --- a/llvm/test/CodeGen/X86/pr124255.ll +++ b/llvm/test/CodeGen/X86/pr124255.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s -define <4 x i32> @insert_i32_v2_in_v4_at_0(<4 x i32> %a, <2 x i32> %b) { -; CHECK-LABEL: insert_i32_v2_in_v4_at_0: +define <4 x i32> @insert_v2i32_in_v4i32_at_0(<4 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: insert_v2i32_in_v4i32_at_0: ; CHECK: # %bb.0: ; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; CHECK-NEXT: retq @@ -10,8 +10,8 @@ define <4 x i32> @insert_i32_v2_in_v4_at_0(<4 x i32> %a, <2 x i32> %b) { ret <4 x i32> %result } -define <4 x i32> @insert_i32_v2_in_v4_at_2(<4 x i32> %a, <2 x i32> %b) { -; CHECK-LABEL: insert_i32_v2_in_v4_at_2: +define <4 x i32> @insert_v2i32_in_v4i32_at_2(<4 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: insert_v2i32_in_v4i32_at_2: ; CHECK: # %bb.0: ; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq @@ -19,8 +19,8 @@ define <4 x i32> @insert_i32_v2_in_v4_at_2(<4 x i32> %a, <2 x i32> %b) { ret <4 x i32> %result } -define <4 x float> @insert_f32_v2_in_v4_at_0(<4 x float> %a, <2 x float> %b) { -; CHECK-LABEL: insert_f32_v2_in_v4_at_0: +define <4 x float> @insert_v2f32_in_v4f32_at_0(<4 x float> %a, <2 x float> %b) { +; CHECK-LABEL: insert_v2f32_in_v4f32_at_0: ; CHECK: # %bb.0: ; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; CHECK-NEXT: retq @@ -28,8 +28,8 @@ define <4 x float> @insert_f32_v2_in_v4_at_0(<4 x float> %a, <2 x float> %b) { ret <4 x float> %result } -define <8 x i32> @insert_i32_v2_in_v8_at_0(<8 x i32> %a, <2 x i32> %b) { -; CHECK-LABEL: insert_i32_v2_in_v8_at_0: +define <8 x i32> @insert_v2i32_in_v8i32_at_0(<8 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: insert_v2i32_in_v8i32_at_0: ; CHECK: # %bb.0: ; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; CHECK-NEXT: retq @@ -37,8 +37,8 @@ define <8 x i32> @insert_i32_v2_in_v8_at_0(<8 x i32> %a, <2 x i32> %b) { ret <8 x i32> %result } -define <8 x i32> @insert_i32_v2_in_v8_at_6(<8 x i32> %a, <2 x i32> %b) { -; CHECK-LABEL: insert_i32_v2_in_v8_at_6: +define <8 x i32> @insert_v2i32_in_v8i32_at_6(<8 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: insert_v2i32_in_v8i32_at_6: ; CHECK: # %bb.0: ; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; CHECK-NEXT: retq