From a03e473a79ee73151f67fe77b35dd05b817e2a0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Sat, 21 Sep 2024 14:24:11 +0200 Subject: [PATCH 1/9] [GlobalISel] Import extract/insert subvector Tests are limited to fixed-length vectors. Test: AArch64/GlobalISel/irtranslator-subvector.ll Reference: https://llvm.org/docs/LangRef.html#llvm-vector-extract-intrinsic https://llvm.org/docs/LangRef.html#llvm-vector-insert-intrinsic --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 14 ++++ .../GlobalISel/irtranslator-subvector.ll | 78 +++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 7ff8d2446eec5..a0649f712bd64 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2588,6 +2588,20 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, getOrCreateVReg(*CI.getOperand(0)), getOrCreateVReg(*CI.getOperand(1))); return true; + case Intrinsic::vector_extract: { + ConstantInt *Index = cast(CI.getOperand(1)); + MIRBuilder.buildExtractSubvector(getOrCreateVReg(CI), + getOrCreateVReg(*CI.getOperand(0)), + Index->getZExtValue()); + return true; + } + case Intrinsic::vector_insert: { + ConstantInt *Index = cast(CI.getOperand(2)); + MIRBuilder.buildInsertSubvector( + getOrCreateVReg(CI), getOrCreateVReg(*CI.getOperand(0)), + getOrCreateVReg(*CI.getOperand(1)), Index->getZExtValue()); + return true; + } case Intrinsic::prefetch: { Value *Addr = CI.getOperand(0); unsigned RW = cast(CI.getOperand(1))->getZExtValue(); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll new file mode 100644 index 0000000000000..bdcd8e3d99af8 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -O0 -mtriple=aarch64-linux-gnu -global-isel -stop-after=irtranslator %s -o - | FileCheck %s + +define i32 @extract_v4i32_vector_insert_const(<4 x i32> %a, <2 x i32> %b, i32 %c) { + ; CHECK-LABEL: name: extract_v4i32_vector_insert_const + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $d1, $q0, $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<4 x s32>) = G_INSERT_SUBVECTOR [[COPY]], [[COPY1]](<2 x s32>), 0 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[INSERT_SUBVECTOR]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: $w0 = COPY [[EVEC]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %vector = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 0) + %d = extractelement <4 x i32> %vector, i32 1 + ret i32 %d +} + +define i32 @extract_v4i32_vector_insert(<4 x i32> %a, <2 x i32> %b, i32 %c) { + ; CHECK-LABEL: name: extract_v4i32_vector_insert + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $d1, $q0, $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<4 x s32>) = G_INSERT_SUBVECTOR [[COPY]], [[COPY1]](<2 x s32>), 0 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[INSERT_SUBVECTOR]](<4 x s32>), [[ZEXT]](s64) + ; CHECK-NEXT: $w0 = COPY [[EVEC]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %vector = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 0) + %d = extractelement <4 x i32> %vector, i32 %c + ret i32 %d +} + +define i32 @extract_v4i32_vector_extract(<4 x i32> %a, <2 x i32> %b, i32 %c) { + ; CHECK-LABEL: name: extract_v4i32_vector_extract + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $d1, $q0, $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT_SUBVECTOR [[COPY]](<4 x s32>), 0 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[EXTRACT_SUBVECTOR]](<4 x s32>), [[ZEXT]](s64) + ; CHECK-NEXT: $w0 = COPY [[EVEC]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %vector = call <4 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %a, i64 0) + %d = extractelement <4 x i32> %vector, i32 %c + ret i32 %d +} + +define i32 @extract_v4i32_vector_extract_const(<4 x i32> %a, <2 x i32> %b, i32 %c) { + ; CHECK-LABEL: name: extract_v4i32_vector_extract_const + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $d1, $q0, $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT_SUBVECTOR [[COPY]](<4 x s32>), 0 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[EXTRACT_SUBVECTOR]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: $w0 = COPY [[EVEC]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %vector = call <4 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %a, i64 0) + %d = extractelement <4 x i32> %vector, i32 0 + ret i32 %d +} From 68a015296359b7555af88c30ba59a074eee6da87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Sun, 29 Sep 2024 13:29:17 +0200 Subject: [PATCH 2/9] address review comments --- .../llvm/CodeGen/GlobalISel/IRTranslator.h | 2 + llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 123 +++++++++++++-- .../GlobalISel/irtranslator-subvector.ll | 146 ++++++++++++++++-- 3 files changed, 246 insertions(+), 25 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 2796ea4a86617..6fd05c8fddd5f 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -546,8 +546,10 @@ class IRTranslator : public MachineFunctionPass { bool translateVAArg(const User &U, MachineIRBuilder &MIRBuilder); bool translateInsertElement(const User &U, MachineIRBuilder &MIRBuilder); + bool translateInsertVector(const User &U, MachineIRBuilder &MIRBuilder); bool translateExtractElement(const User &U, MachineIRBuilder &MIRBuilder); + bool translateExtractVector(const User &U, MachineIRBuilder &MIRBuilder); bool translateShuffleVector(const User &U, MachineIRBuilder &MIRBuilder); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index a0649f712bd64..abb52e11436c6 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2588,20 +2588,10 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, getOrCreateVReg(*CI.getOperand(0)), getOrCreateVReg(*CI.getOperand(1))); return true; - case Intrinsic::vector_extract: { - ConstantInt *Index = cast(CI.getOperand(1)); - MIRBuilder.buildExtractSubvector(getOrCreateVReg(CI), - getOrCreateVReg(*CI.getOperand(0)), - Index->getZExtValue()); - return true; - } - case Intrinsic::vector_insert: { - ConstantInt *Index = cast(CI.getOperand(2)); - MIRBuilder.buildInsertSubvector( - getOrCreateVReg(CI), getOrCreateVReg(*CI.getOperand(0)), - getOrCreateVReg(*CI.getOperand(1)), Index->getZExtValue()); - return true; - } + case Intrinsic::vector_extract: + return translateExtractVector(CI, MIRBuilder); + case Intrinsic::vector_insert: + return translateInsertVector(CI, MIRBuilder); case Intrinsic::prefetch: { Value *Addr = CI.getOperand(0); unsigned RW = cast(CI.getOperand(1))->getZExtValue(); @@ -3163,8 +3153,7 @@ bool IRTranslator::translateInsertElement(const User &U, if (auto *CI = dyn_cast(U.getOperand(2))) { if (CI->getBitWidth() != PreferredVecIdxWidth) { APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth); - auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx); - Idx = getOrCreateVReg(*NewIdxCI); + CI = ConstantInt::get(CI->getContext(), NewIdx); } } if (!Idx) @@ -3177,6 +3166,58 @@ bool IRTranslator::translateInsertElement(const User &U, return true; } +bool IRTranslator::translateInsertVector(const User &U, + MachineIRBuilder &MIRBuilder) { + Register Dst = getOrCreateVReg(U); + Register Vec = getOrCreateVReg(*U.getOperand(0)); + Register Elt = getOrCreateVReg(*U.getOperand(1)); + + ConstantInt *CI = cast(U.getOperand(2)); + unsigned PreferredVecIdxWidth = TLI->getVectorIdxTy(*DL).getSizeInBits(); + + // Resize Index to preferred index width. + if (CI->getBitWidth() != PreferredVecIdxWidth) { + APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth); + CI = ConstantInt::get(CI->getContext(), NewIdx); + } + + // If it is a <1 x Ty> vector, we have to use other means. + if (auto *ResultType = dyn_cast(U.getOperand(1)->getType()); + ResultType && ResultType->getNumElements() == 1) { + if (auto *InputType = dyn_cast(U.getOperand(0)->getType()); + InputType && InputType->getNumElements() == 1) { + // We are inserting an illegal fixed vector into an illegal + // fixed vector, use the scalar as it is not a legal vector type + // in LLT. + return translateCopy(U, *U.getOperand(0), MIRBuilder); + } + if (auto *InputType = + dyn_cast(U.getOperand(0)->getType())) { + // We are inserting an illegal fixed vector into a fixed vector, use the + // scalar as it is not a legal vector type in LLT. + Register Idx = getOrCreateVReg(*CI); + MIRBuilder.buildInsertVectorElement(Dst, Vec, Elt, Idx); + return true; + } + if (auto *InputType = + dyn_cast(U.getOperand(0)->getType())) { + // We are inserting an illegal fixed vector into a scalable vector, use + // a scalar element insert. + LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth); + Register Idx = getOrCreateVReg(*CI); + auto ScaledIndex = MIRBuilder.buildMul( + VecIdxTy, MIRBuilder.buildVScale(VecIdxTy, 1), Idx); + MIRBuilder.buildInsertVectorElement(Dst, Vec, Elt, ScaledIndex); + return true; + } + } + + MIRBuilder.buildInsertSubvector( + getOrCreateVReg(U), getOrCreateVReg(*U.getOperand(0)), + getOrCreateVReg(*U.getOperand(1)), CI->getZExtValue()); + return true; +} + bool IRTranslator::translateExtractElement(const User &U, MachineIRBuilder &MIRBuilder) { // If it is a <1 x Ty> vector, use the scalar as it is @@ -3205,6 +3246,56 @@ bool IRTranslator::translateExtractElement(const User &U, return true; } +bool IRTranslator::translateExtractVector(const User &U, + MachineIRBuilder &MIRBuilder) { + Register Res = getOrCreateVReg(U); + Register Vec = getOrCreateVReg(*U.getOperand(0)); + ConstantInt *CI = cast(U.getOperand(1)); + unsigned PreferredVecIdxWidth = TLI->getVectorIdxTy(*DL).getSizeInBits(); + + // Resize Index to preferred index width. + if (CI->getBitWidth() != PreferredVecIdxWidth) { + APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth); + CI = ConstantInt::get(CI->getContext(), NewIdx); + } + + // If it is a <1 x Ty> vector, we have to use other means. + if (auto *ResultType = dyn_cast(U.getType()); + ResultType && ResultType->getNumElements() == 1) { + if (auto *InputType = dyn_cast(U.getOperand(0)->getType()); + InputType && InputType->getNumElements() == 1) { + // We are extracting an illegal fixed vector from an illegal fixed vector, + // use the scalar as it is not a legal vector type in LLT. + return translateCopy(U, *U.getOperand(0), MIRBuilder); + } + if (auto *InputType = + dyn_cast(U.getOperand(0)->getType())) { + // We are extracting a fixed vector from a fixed vector, use the + // scalar as it is not a legal vector type in LLT. + Register Idx = getOrCreateVReg(*CI); + MIRBuilder.buildExtractVectorElement(Res, Vec, Idx); + return true; + } + if (auto *InputType = + dyn_cast(U.getOperand(0)->getType())) { + // We are extracting a fixed vector from a scalable vector, use + // a scalar element extract. + LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth); + Register Idx = getOrCreateVReg(*CI); + auto ScaledIndex = MIRBuilder.buildMul( + VecIdxTy, MIRBuilder.buildVScale(VecIdxTy, 1), Idx); + MIRBuilder.buildExtractVectorElement(Res, Vec, ScaledIndex); + return true; + } + } + + ConstantInt *Index = cast(U.getOperand(1)); + MIRBuilder.buildExtractSubvector(getOrCreateVReg(U), + getOrCreateVReg(*U.getOperand(0)), + Index->getZExtValue()); + return true; +} + bool IRTranslator::translateShuffleVector(const User &U, MachineIRBuilder &MIRBuilder) { // A ShuffleVector that operates on scalable vectors is a splat vector where diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll index bdcd8e3d99af8..7322f888e58fa 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -O0 -mtriple=aarch64-linux-gnu -global-isel -stop-after=irtranslator %s -o - | FileCheck %s +; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sve -global-isel -stop-after=irtranslator -aarch64-enable-gisel-sve=1 %s -o - | FileCheck %s define i32 @extract_v4i32_vector_insert_const(<4 x i32> %a, <2 x i32> %b, i32 %c) { ; CHECK-LABEL: name: extract_v4i32_vector_insert_const @@ -58,21 +58,149 @@ entry: ret i32 %d } -define i32 @extract_v4i32_vector_extract_const(<4 x i32> %a, <2 x i32> %b, i32 %c) { +define i32 @extract_v4i32_vector_extract_const( %a, i32 %c, ptr %p) { ; CHECK-LABEL: name: extract_v4i32_vector_extract_const ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $d1, $q0, $w0 + ; CHECK-NEXT: liveins: $w0, $x1, $z0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_() = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_() = G_EXTRACT_SUBVECTOR [[COPY]](), 0 + ; CHECK-NEXT: G_STORE [[EXTRACT_SUBVECTOR]](), [[COPY2]](p0) :: (store () into %ir.p) + ; CHECK-NEXT: $w0 = COPY [[C]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %vector = call @llvm.vector.extract( %a, i64 0) + store %vector, ptr %p, align 16 + ret i32 1 +} + +define i32 @extract_v4i32_vector_insert_const_vscale( %a, %b, i32 %c, ptr %p) { + ; CHECK-LABEL: name: extract_v4i32_vector_insert_const_vscale + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $w0, $x1, $z0, $z1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_() = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[COPY]], [[COPY1]](), 0 + ; CHECK-NEXT: G_STORE [[INSERT_SUBVECTOR]](), [[COPY3]](p0) :: (store () into %ir.p) + ; CHECK-NEXT: $w0 = COPY [[C]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %vector = call @llvm.vector.insert.nxv4i32.v4i32( %a, %b, i64 0) + store %vector, ptr %p, align 16 + ret i32 1 +} + +define i32 @extract_v4i32_vector_extract_const_illegal_fixed(<4 x i32> %a, ptr %p) { + ; CHECK-LABEL: name: extract_v4i32_vector_extract_const_illegal_fixed + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $q0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[COPY1]](p0) :: (store (s32) into %ir.p, align 16) + ; CHECK-NEXT: $w0 = COPY [[C1]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %vector = call <1 x i32> @llvm.vector.extract(<4 x i32> %a, i64 0) + store <1 x i32> %vector, ptr %p, align 16 + ret i32 1 +} + +define i32 @extract_v4i32_vector_extract_const_illegal_scalable( %a, ptr %p) { + ; CHECK-LABEL: name: extract_v4i32_vector_extract_const_illegal_scalable + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $x0, $z0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_() = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[VSCALE:%[0-9]+]]:_(s64) = G_VSCALE i64 1 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[VSCALE]], [[C]] + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[MUL]](s64) + ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[COPY1]](p0) :: (store (s32) into %ir.p, align 16) + ; CHECK-NEXT: $w0 = COPY [[C1]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %vector = call <1 x i32> @llvm.vector.extract( %a, i64 0) + store <1 x i32> %vector, ptr %p, align 16 + ret i32 1 +} + +define i32 @extract_v4i32_vector_insert_const_illegal_scalable( %a, <1 x i32> %b, i32 %c, ptr %p) { + ; CHECK-LABEL: name: extract_v4i32_vector_insert_const_illegal_scalable + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $d1, $w0, $x1, $z0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_() = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[VSCALE:%[0-9]+]]:_(s64) = G_VSCALE i64 1 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[VSCALE]], [[C]] + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[COPY]], [[UV]](s32), [[MUL]](s64) + ; CHECK-NEXT: G_STORE [[IVEC]](), [[COPY3]](p0) :: (store () into %ir.p) + ; CHECK-NEXT: $w0 = COPY [[C1]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %vector = call @llvm.vector.insert.nxv4i32.v4i32( %a, <1 x i32> %b, i64 0) + store %vector, ptr %p, align 16 + ret i32 1 +} + +define i32 @extract_v4i32_vector_insert_const_fixed(<4 x i32> %a, <1 x i32> %b, i32 %c, ptr %p) { + ; CHECK-LABEL: name: extract_v4i32_vector_insert_const_fixed + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $d1, $q0, $w0, $x1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT_SUBVECTOR [[COPY]](<4 x s32>), 0 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[EXTRACT_SUBVECTOR]](<4 x s32>), [[C]](s64) - ; CHECK-NEXT: $w0 = COPY [[EVEC]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[UV]](s32), [[C]](s64) + ; CHECK-NEXT: G_STORE [[IVEC]](<4 x s32>), [[COPY3]](p0) :: (store (<4 x s32>) into %ir.p) + ; CHECK-NEXT: $w0 = COPY [[C1]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 entry: - %vector = call <4 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %a, i64 0) - %d = extractelement <4 x i32> %vector, i32 0 - ret i32 %d + %vector = call <4 x i32> @llvm.vector.insert.v4i32.v4i32(<4 x i32> %a, <1 x i32> %b, i64 0) + store <4 x i32> %vector, ptr %p, align 16 + ret i32 1 +} + +define i32 @extract_v4i32_vector_insert_const_fixed_illegal(<1 x i32> %a, <1 x i32> %b, i32 %c, ptr %p) { + ; CHECK-LABEL: name: extract_v4i32_vector_insert_const_fixed_illegal + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $d0, $d1, $w0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: G_STORE [[COPY4]](s32), [[COPY3]](p0) :: (store (s32) into %ir.p, align 16) + ; CHECK-NEXT: $w0 = COPY [[C]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %vector = call <1 x i32> @llvm.vector.insert.v1i32.v4i32(<1 x i32> %a, <1 x i32> %b, i64 0) + store <1 x i32> %vector, ptr %p, align 16 + ret i32 1 } From 391709b84458b8540a45274a428074123c8f0067 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Sun, 29 Sep 2024 14:08:40 +0200 Subject: [PATCH 3/9] undo unrelated changes --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index abb52e11436c6..363563142f22f 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3153,7 +3153,8 @@ bool IRTranslator::translateInsertElement(const User &U, if (auto *CI = dyn_cast(U.getOperand(2))) { if (CI->getBitWidth() != PreferredVecIdxWidth) { APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth); - CI = ConstantInt::get(CI->getContext(), NewIdx); + auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx); + Idx = getOrCreateVReg(*NewIdxCI); } } if (!Idx) From dff0c03120aec2ccf1a48fe2d8bb091440f9f185 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Mon, 30 Sep 2024 07:56:19 +0200 Subject: [PATCH 4/9] stylish fix --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 363563142f22f..a468ddc064b90 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3290,10 +3290,9 @@ bool IRTranslator::translateExtractVector(const User &U, } } - ConstantInt *Index = cast(U.getOperand(1)); MIRBuilder.buildExtractSubvector(getOrCreateVReg(U), getOrCreateVReg(*U.getOperand(0)), - Index->getZExtValue()); + CI->getZExtValue()); return true; } From ab13ebea85138a6cacb46effa663c1086a03b996 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Mon, 30 Sep 2024 16:49:29 +0200 Subject: [PATCH 5/9] downgrade dyn_cast to isa --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index a468ddc064b90..664f30a47ab18 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3192,16 +3192,14 @@ bool IRTranslator::translateInsertVector(const User &U, // in LLT. return translateCopy(U, *U.getOperand(0), MIRBuilder); } - if (auto *InputType = - dyn_cast(U.getOperand(0)->getType())) { + if (isa(U.getOperand(0)->getType())) { // We are inserting an illegal fixed vector into a fixed vector, use the // scalar as it is not a legal vector type in LLT. Register Idx = getOrCreateVReg(*CI); MIRBuilder.buildInsertVectorElement(Dst, Vec, Elt, Idx); return true; } - if (auto *InputType = - dyn_cast(U.getOperand(0)->getType())) { + if (isa(U.getOperand(0)->getType())) { // We are inserting an illegal fixed vector into a scalable vector, use // a scalar element insert. LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth); @@ -3269,16 +3267,14 @@ bool IRTranslator::translateExtractVector(const User &U, // use the scalar as it is not a legal vector type in LLT. return translateCopy(U, *U.getOperand(0), MIRBuilder); } - if (auto *InputType = - dyn_cast(U.getOperand(0)->getType())) { + if (isa(U.getOperand(0)->getType())) { // We are extracting a fixed vector from a fixed vector, use the // scalar as it is not a legal vector type in LLT. Register Idx = getOrCreateVReg(*CI); MIRBuilder.buildExtractVectorElement(Res, Vec, Idx); return true; } - if (auto *InputType = - dyn_cast(U.getOperand(0)->getType())) { + if (isa(U.getOperand(0)->getType())) { // We are extracting a fixed vector from a scalable vector, use // a scalar element extract. LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth); From a8db95ba14effec7211f525bdea0bc8302b25290 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Mon, 30 Sep 2024 17:20:00 +0200 Subject: [PATCH 6/9] improve docstrings --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 664f30a47ab18..40360b0b0f1d8 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3193,15 +3193,16 @@ bool IRTranslator::translateInsertVector(const User &U, return translateCopy(U, *U.getOperand(0), MIRBuilder); } if (isa(U.getOperand(0)->getType())) { - // We are inserting an illegal fixed vector into a fixed vector, use the - // scalar as it is not a legal vector type in LLT. + // We are inserting an illegal fixed vector into a legal fixed + // vector, use the scalar as it is not a legal vector type in + // LLT. Register Idx = getOrCreateVReg(*CI); MIRBuilder.buildInsertVectorElement(Dst, Vec, Elt, Idx); return true; } if (isa(U.getOperand(0)->getType())) { - // We are inserting an illegal fixed vector into a scalable vector, use - // a scalar element insert. + // We are inserting an illegal fixed vector into a scalable + // vector, use a scalar element insert. LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth); Register Idx = getOrCreateVReg(*CI); auto ScaledIndex = MIRBuilder.buildMul( @@ -3268,15 +3269,16 @@ bool IRTranslator::translateExtractVector(const User &U, return translateCopy(U, *U.getOperand(0), MIRBuilder); } if (isa(U.getOperand(0)->getType())) { - // We are extracting a fixed vector from a fixed vector, use the - // scalar as it is not a legal vector type in LLT. + // We are extracting an illegal fixed vector from a legal fixed + // vector, use the scalar as it is not a legal vector type in + // LLT. Register Idx = getOrCreateVReg(*CI); MIRBuilder.buildExtractVectorElement(Res, Vec, Idx); return true; } if (isa(U.getOperand(0)->getType())) { - // We are extracting a fixed vector from a scalable vector, use - // a scalar element extract. + // We are extracting an illegal fixed vector from a scalable + // vector, use a scalar element extract. LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth); Register Idx = getOrCreateVReg(*CI); auto ScaledIndex = MIRBuilder.buildMul( From 45ff9f2b2b187328fd8de949b0f0bda75d385819 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Mon, 30 Sep 2024 17:48:14 +0200 Subject: [PATCH 7/9] float and ptr tests --- .../GlobalISel/irtranslator-subvector.ll | 122 ++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll index 7322f888e58fa..b35c1057219b7 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll @@ -20,6 +20,47 @@ entry: ret i32 %d } +define double @extract_v4double_vector_insert_const(<4 x double> %a, <2 x double> %b, i32 %c) { + ; CHECK-LABEL: name: extract_v4double_vector_insert_const + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $q0, $q1, $q2, $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY]](<2 x s64>), [[COPY1]](<2 x s64>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<4 x s64>) = G_INSERT_SUBVECTOR [[CONCAT_VECTORS]], [[COPY2]](<2 x s64>), 0 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[INSERT_SUBVECTOR]](<4 x s64>), [[C]](s64) + ; CHECK-NEXT: $d0 = COPY [[EVEC]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 +entry: + %vector = call <4 x double> @llvm.vector.insert.v4double.v2double(<4 x double> %a, <2 x double> %b, i64 0) + %d = extractelement <4 x double> %vector, i32 1 + ret double %d +} + +define float @extract_v4float_vector_insert_const(<4 x float> %a, <2 x float> %b, i32 %c) { + ; CHECK-LABEL: name: extract_v4float_vector_insert_const + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $d1, $q0, $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<4 x s32>) = G_INSERT_SUBVECTOR [[BITCAST]], [[COPY1]](<2 x s32>), 0 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[INSERT_SUBVECTOR]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: $s0 = COPY [[EVEC]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $s0 +entry: + %vector = call <4 x float> @llvm.vector.insert.v4float.v2float(<4 x float> %a, <2 x float> %b, i64 0) + %d = extractelement <4 x float> %vector, i32 1 + ret float %d +} + define i32 @extract_v4i32_vector_insert(<4 x i32> %a, <2 x i32> %b, i32 %c) { ; CHECK-LABEL: name: extract_v4i32_vector_insert ; CHECK: bb.1.entry: @@ -77,6 +118,25 @@ entry: ret i32 1 } +define i32 @extract_v2double_vector_extract_const( %a, i32 %c, ptr %p) { + ; CHECK-LABEL: name: extract_v2double_vector_extract_const + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $w0, $x1, $z0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_() = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_() = G_EXTRACT_SUBVECTOR [[COPY]](), 0 + ; CHECK-NEXT: G_STORE [[EXTRACT_SUBVECTOR]](), [[COPY2]](p0) :: (store () into %ir.p) + ; CHECK-NEXT: $w0 = COPY [[C]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %vector = call @llvm.vector.extract( %a, i64 0) + store %vector, ptr %p, align 16 + ret i32 1 +} + define i32 @extract_v4i32_vector_insert_const_vscale( %a, %b, i32 %c, ptr %p) { ; CHECK-LABEL: name: extract_v4i32_vector_insert_const_vscale ; CHECK: bb.1.entry: @@ -204,3 +264,65 @@ entry: store <1 x i32> %vector, ptr %p, align 16 ret i32 1 } + +define i32 @extract_v4ptr_vector_insert_const_fixed_illegal(<1 x ptr> %a, <1 x ptr> %b, i32 %c, ptr %p) { + ; CHECK-LABEL: name: extract_v4ptr_vector_insert_const_fixed_illegal + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $d0, $d1, $w0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $d1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) + ; CHECK-NEXT: G_STORE [[COPY4]](p0), [[COPY3]](p0) :: (store (p0) into %ir.p, align 16) + ; CHECK-NEXT: $w0 = COPY [[C]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %vector = call <1 x ptr> @llvm.vector.insert.v1ptr.v4ptr(<1 x ptr> %a, <1 x ptr> %b, i64 0) + store <1 x ptr> %vector, ptr %p, align 16 + ret i32 1 +} + +define i32 @extract_v4ifloat_vector_insert_const_fixed_illegal(<1 x float> %a, <1 x float> %b, float %c, ptr %p) { + ; CHECK-LABEL: name: extract_v4ifloat_vector_insert_const_fixed_illegal + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $d0, $d1, $s2, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $s2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: G_STORE [[COPY4]](s32), [[COPY3]](p0) :: (store (s32) into %ir.p, align 16) + ; CHECK-NEXT: $w0 = COPY [[C]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %vector = call <1 x float> @llvm.vector.insert.v1float.v4float(<1 x float> %a, <1 x float> %b, i64 0) + store <1 x float> %vector, ptr %p, align 16 + ret i32 1 +} + +define i32 @extract_v4iptr_vector_insert_const_fixed_illegal(<1 x ptr> %a, <1 x ptr> %b, ptr %c, ptr %p) { + ; CHECK-LABEL: name: extract_v4iptr_vector_insert_const_fixed_illegal + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $d0, $d1, $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $d1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) + ; CHECK-NEXT: G_STORE [[COPY4]](p0), [[COPY3]](p0) :: (store (p0) into %ir.p, align 16) + ; CHECK-NEXT: $w0 = COPY [[C]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %vector = call <1 x ptr> @llvm.vector.insert.v1ptr.v4ptr(<1 x ptr> %a, <1 x ptr> %b, i64 0) + store <1 x ptr> %vector, ptr %p, align 16 + ret i32 1 +} From 2c0d294f2c3dac4eafc5cc75a43adbf99609441e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Mon, 30 Sep 2024 18:34:29 +0200 Subject: [PATCH 8/9] another ptr test --- .../GlobalISel/irtranslator-subvector.ll | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll index b35c1057219b7..369093dd6e0cf 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll @@ -326,3 +326,27 @@ entry: store <1 x ptr> %vector, ptr %p, align 16 ret i32 1 } + +define i32 @extract_v4iptr_vector_insert_const_fixed_legal(<4 x ptr> %a, <4 x ptr> %b, ptr %c, ptr %p) { + ; CHECK-LABEL: name: extract_v4iptr_vector_insert_const_fixed_legal + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $q0, $q1, $q2, $q3, $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p0>) = G_CONCAT_VECTORS [[COPY]](<2 x s64>), [[COPY1]](<2 x s64>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3 + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x p0>) = G_CONCAT_VECTORS [[COPY2]](<2 x s64>), [[COPY3]](<2 x s64>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<4 x p0>) = G_INSERT_SUBVECTOR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]](<4 x p0>), 0 + ; CHECK-NEXT: G_STORE [[INSERT_SUBVECTOR]](<4 x p0>), [[COPY5]](p0) :: (store (<4 x p0>) into %ir.p, align 16) + ; CHECK-NEXT: $w0 = COPY [[C]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %vector = call <4 x ptr> @llvm.vector.insert.v4ptr.v4ptr(<4 x ptr> %a, <4 x ptr> %b, i64 0) + store <4 x ptr> %vector, ptr %p, align 16 + ret i32 1 +} From 689c7bc4122f43d44f089beaaae0e9aa9cb49ce0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Mon, 30 Sep 2024 19:07:44 +0200 Subject: [PATCH 9/9] fix mangling --- .../CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll index 369093dd6e0cf..149bf72b053ef 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll @@ -36,7 +36,7 @@ define double @extract_v4double_vector_insert_const(<4 x double> %a, <2 x double ; CHECK-NEXT: $d0 = COPY [[EVEC]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $d0 entry: - %vector = call <4 x double> @llvm.vector.insert.v4double.v2double(<4 x double> %a, <2 x double> %b, i64 0) + %vector = call <4 x double> @llvm.vector.insert.v4f64.v2f64(<4 x double> %a, <2 x double> %b, i64 0) %d = extractelement <4 x double> %vector, i32 1 ret double %d } @@ -56,7 +56,7 @@ define float @extract_v4float_vector_insert_const(<4 x float> %a, <2 x float> %b ; CHECK-NEXT: $s0 = COPY [[EVEC]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $s0 entry: - %vector = call <4 x float> @llvm.vector.insert.v4float.v2float(<4 x float> %a, <2 x float> %b, i64 0) + %vector = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> %a, <2 x float> %b, i64 0) %d = extractelement <4 x float> %vector, i32 1 ret float %d } @@ -302,7 +302,7 @@ define i32 @extract_v4ifloat_vector_insert_const_fixed_illegal(<1 x float> %a, < ; CHECK-NEXT: $w0 = COPY [[C]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 entry: - %vector = call <1 x float> @llvm.vector.insert.v1float.v4float(<1 x float> %a, <1 x float> %b, i64 0) + %vector = call <1 x float> @llvm.vector.insert.v1f32.v4f32(<1 x float> %a, <1 x float> %b, i64 0) store <1 x float> %vector, ptr %p, align 16 ret i32 1 }