Skip to content

[AArch64][GlobalISel] Reland Make G_DUP immediate 32-bits or larger (#96780) #99014

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2279,8 +2279,9 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
Register Dst = I.getOperand(0).getReg();
auto *CV = ConstantDataVector::getSplat(
MRI.getType(Dst).getNumElements(),
ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
ValAndVReg->Value));
ConstantInt::get(
Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()),
ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits())));
if (!emitConstantVector(Dst, CV, MIB, MRI))
return false;
I.eraseFromParent();
Expand Down Expand Up @@ -5559,7 +5560,8 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
}

if (CV->getSplatValue()) {
APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
APInt DefBits = APInt::getSplat(
DstSize, CV->getUniqueInteger().trunc(DstTy.getScalarSizeInBits()));
auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
MachineInstr *NewOp;
bool Inv = false;
Expand Down
35 changes: 32 additions & 3 deletions llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include "AArch64GenRegisterBankInfo.def"

using namespace llvm;
static const unsigned CustomMappingID = 1;

AArch64RegisterBankInfo::AArch64RegisterBankInfo(
const TargetRegisterInfo &TRI) {
Expand Down Expand Up @@ -424,6 +425,26 @@ void AArch64RegisterBankInfo::applyMappingImpl(
MI.getOperand(2).setReg(Ext.getReg(0));
return applyDefaultMapping(OpdMapper);
}
case AArch64::G_DUP: {
// Extend smaller gpr to 32-bits
assert(MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() < 32 &&
"Expected sources smaller than 32-bits");
Builder.setInsertPt(*MI.getParent(), MI.getIterator());

Register ConstReg;
auto ConstMI = MRI.getVRegDef(MI.getOperand(1).getReg());
if (ConstMI->getOpcode() == TargetOpcode::G_CONSTANT) {
auto CstVal = ConstMI->getOperand(1).getCImm()->getValue();
ConstReg =
Builder.buildConstant(LLT::scalar(32), CstVal.sext(32)).getReg(0);
} else {
ConstReg = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(1).getReg())
.getReg(0);
}
MRI.setRegBank(ConstReg, getRegBank(AArch64::GPRRegBankID));
MI.getOperand(1).setReg(ConstReg);
return applyDefaultMapping(OpdMapper);
}
default:
llvm_unreachable("Don't know how to handle that operation");
}
Expand Down Expand Up @@ -792,8 +813,14 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
(getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
onlyDefinesFP(*ScalarDef, MRI, TRI)))
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
else
else {
if (ScalarTy.getSizeInBits() < 32 &&
getRegBank(ScalarReg, MRI, TRI) == &AArch64::GPRRegBank) {
// Calls applyMappingImpl()
MappingID = CustomMappingID;
Comment on lines +819 to +820
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should wrap this in a brace if you want a comment on a line. Otherwise you can append the comment to the end of the MappingID = CustomMappingID; line.

}
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
}
break;
}
case TargetOpcode::G_TRUNC: {
Expand Down Expand Up @@ -1014,8 +1041,10 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// If the type is i8/i16, and the regank will be GPR, then we change the
// type to i32 in applyMappingImpl.
LLT Ty = MRI.getType(MI.getOperand(2).getReg());
if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16)
MappingID = 1;
if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16) {
// Calls applyMappingImpl()
MappingID = CustomMappingID;
}
OpRegBankIdx[2] = PMI_FirstGPR;
}

Expand Down
75 changes: 42 additions & 33 deletions llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@ body: |

; CHECK-LABEL: name: v4s32_gpr
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
; CHECK: $q0 = COPY [[DUP]](<4 x s32>)
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $w0
%4:_(<4 x s32>) = G_DUP %0(s32)
$q0 = COPY %4(<4 x s32>)
Expand All @@ -37,10 +38,11 @@ body: |

; CHECK-LABEL: name: v4s64_gpr
; CHECK: liveins: $x0
; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0
; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $x0
%4:_(<2 x s64>) = G_DUP %0(s64)
$q0 = COPY %4(<2 x s64>)
Expand All @@ -58,10 +60,11 @@ body: |

; CHECK-LABEL: name: v2s32_gpr
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
; CHECK: $d0 = COPY [[DUP]](<2 x s32>)
; CHECK: RET_ReallyLR implicit $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(s32) = COPY $w0
%4:_(<2 x s32>) = G_DUP %0(s32)
$d0 = COPY %4(<2 x s32>)
Expand All @@ -79,10 +82,11 @@ body: |

; CHECK-LABEL: name: v4s32_fpr
; CHECK: liveins: $s0
; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
; CHECK: $q0 = COPY [[DUP]](<4 x s32>)
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $s0
%4:_(<4 x s32>) = G_DUP %0(s32)
$q0 = COPY %4(<4 x s32>)
Expand All @@ -100,10 +104,11 @@ body: |

; CHECK-LABEL: name: v2s64_fpr
; CHECK: liveins: $d0
; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $d0
%4:_(<2 x s64>) = G_DUP %0(s64)
$q0 = COPY %4(<2 x s64>)
Expand All @@ -121,10 +126,11 @@ body: |

; CHECK-LABEL: name: v2s32_fpr
; CHECK: liveins: $s0
; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
; CHECK: $d0 = COPY [[DUP]](<2 x s32>)
; CHECK: RET_ReallyLR implicit $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(s32) = COPY $s0
%4:_(<2 x s32>) = G_DUP %0(s32)
$d0 = COPY %4(<2 x s32>)
Expand All @@ -142,10 +148,11 @@ body: |

; CHECK-LABEL: name: v2s64_fpr_copy
; CHECK: liveins: $d0
; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $d0
%6:_(<2 x s64>) = G_DUP %0(s64)
$q0 = COPY %6(<2 x s64>)
Expand All @@ -163,11 +170,13 @@ body: |

; CHECK-LABEL: name: v416s8_gpr
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
; CHECK: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32)
; CHECK: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP %trunc(s8)
; CHECK: $q0 = COPY [[DUP]](<16 x s8>)
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
; CHECK-NEXT: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:gpr(s32) = G_ANYEXT %trunc(s8)
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP [[ANYEXT]](s32)
; CHECK-NEXT: $q0 = COPY [[DUP]](<16 x s8>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $w0
%trunc:_(s8) = G_TRUNC %0(s32)
%1:_(<16 x s8>) = G_DUP %trunc(s8)
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
Original file line number Diff line number Diff line change
Expand Up @@ -453,3 +453,22 @@ body: |
%dup:fpr(<2 x p0>) = G_DUP %cst(p0)
$q0 = COPY %dup(<2 x p0>)
RET_ReallyLR implicit $q0
...
---
name: cstv4i16gpri32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins:
; CHECK-LABEL: name: cstv4i16gpri32
; CHECK: %dup:fpr64 = MOVIv4i16 3, 0
; CHECK-NEXT: $d0 = COPY %dup
; CHECK-NEXT: RET_ReallyLR implicit $d0
%cst:gpr(s32) = G_CONSTANT i32 3
%dup:fpr(<4 x s16>) = G_DUP %cst(s32)
$d0 = COPY %dup(<4 x s16>)
RET_ReallyLR implicit $d0

...
67 changes: 47 additions & 20 deletions llvm/test/CodeGen/AArch64/arm64-dup.ll
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,19 @@ define <4 x i32> @v_dupQ32(i32 %A) nounwind {
ret <4 x i32> %tmp4
}

define <4 x i16> @v_dup16_const(i16 %y, ptr %p) {
; CHECK-LABEL: v_dup16_const:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.4h v0, #10
; CHECK-NEXT: mov w8, #10 // =0xa
; CHECK-NEXT: strh w8, [x1]
; CHECK-NEXT: ret
%i = insertelement <4 x i16> undef, i16 10, i32 0
%lo = shufflevector <4 x i16> %i, <4 x i16> undef, <4 x i32> zeroinitializer
store i16 10, ptr %p
ret <4 x i16> %lo
}

define <4 x float> @v_dupQfloat(float %A) nounwind {
; CHECK-LABEL: v_dupQfloat:
; CHECK: // %bb.0:
Expand Down Expand Up @@ -420,9 +433,9 @@ define <4 x i16> @test_perfectshuffle_dupext_v4i16(<4 x i16> %a, <4 x i16> %b) n
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: adrp x8, .LCPI33_0
; CHECK-GI-NEXT: adrp x8, .LCPI34_0
; CHECK-GI-NEXT: mov.d v0[1], v1[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI33_0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0]
; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
Expand All @@ -443,9 +456,9 @@ define <4 x half> @test_perfectshuffle_dupext_v4f16(<4 x half> %a, <4 x half> %b
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: adrp x8, .LCPI34_0
; CHECK-GI-NEXT: adrp x8, .LCPI35_0
; CHECK-GI-NEXT: mov.d v0[1], v1[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI35_0]
; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
Expand All @@ -462,9 +475,9 @@ define <4 x i32> @test_perfectshuffle_dupext_v4i32(<4 x i32> %a, <4 x i32> %b) n
;
; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI35_0
; CHECK-GI-NEXT: adrp x8, .LCPI36_0
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0]
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT: ret
Expand All @@ -481,9 +494,9 @@ define <4 x float> @test_perfectshuffle_dupext_v4f32(<4 x float> %a, <4 x float>
;
; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4f32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI36_0
; CHECK-GI-NEXT: adrp x8, .LCPI37_0
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_0]
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT: ret
Expand All @@ -503,12 +516,12 @@ define void @disguised_dup(<4 x float> %x, ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: disguised_dup:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI37_1
; CHECK-GI-NEXT: adrp x8, .LCPI38_1
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_1]
; CHECK-GI-NEXT: adrp x8, .LCPI37_0
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI38_1]
; CHECK-GI-NEXT: adrp x8, .LCPI38_0
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_0]
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI38_0]
; CHECK-GI-NEXT: tbl.16b v2, { v0, v1 }, v2
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: str q2, [x1]
Expand All @@ -531,8 +544,8 @@ define <2 x i32> @dup_const2(<2 x i32> %A) nounwind {
;
; CHECK-GI-LABEL: dup_const2:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI38_0
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI38_0]
; CHECK-GI-NEXT: adrp x8, .LCPI39_0
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI39_0]
; CHECK-GI-NEXT: add.2s v0, v0, v1
; CHECK-GI-NEXT: ret
%tmp2 = add <2 x i32> %A, <i32 8421378, i32 8421378>
Expand All @@ -550,8 +563,8 @@ define <2 x i32> @dup_const4_ext(<4 x i32> %A) nounwind {
;
; CHECK-GI-LABEL: dup_const4_ext:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI39_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI39_0]
; CHECK-GI-NEXT: adrp x8, .LCPI40_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI40_0]
; CHECK-GI-NEXT: add.4s v0, v0, v1
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
Expand All @@ -575,12 +588,12 @@ define <4 x i32> @dup_const24(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C) nounwind
;
; CHECK-GI-LABEL: dup_const24:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI40_1
; CHECK-GI-NEXT: adrp x8, .LCPI41_1
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: ldr d3, [x8, :lo12:.LCPI40_1]
; CHECK-GI-NEXT: adrp x8, .LCPI40_0
; CHECK-GI-NEXT: ldr d3, [x8, :lo12:.LCPI41_1]
; CHECK-GI-NEXT: adrp x8, .LCPI41_0
; CHECK-GI-NEXT: add.2s v0, v0, v3
; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI40_0]
; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI41_0]
; CHECK-GI-NEXT: mov.d v0[1], v1[0]
; CHECK-GI-NEXT: add.4s v1, v2, v3
; CHECK-GI-NEXT: eor.16b v0, v1, v0
Expand Down Expand Up @@ -687,3 +700,17 @@ define <8 x i16> @bitcast_v2f64_v8i16(<2 x i64> %a) {
ret <8 x i16> %r
}

define <4 x i16> @dup_i16_v4i16_constant() {
; CHECK-SD-LABEL: dup_i16_v4i16_constant:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: mov w8, #9211 // =0x23fb
; CHECK-SD-NEXT: dup.4h v0, w8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: dup_i16_v4i16_constant:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI50_0
; CHECK-GI-NEXT: ldr d0, [x8, :lo12:.LCPI50_0]
; CHECK-GI-NEXT: ret
ret <4 x i16> <i16 9211, i16 9211, i16 9211, i16 9211>
}
Loading
Loading