Skip to content

[AArch64][GlobalISel] Make G_DUP immediate 32-bits or larger #96780

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5536,7 +5536,8 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
}

if (CV->getSplatValue()) {
APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
APInt DefBits = APInt::getSplat(
DstSize, CV->getUniqueInteger().trunc(DstTy.getScalarSizeInBits()));
auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
MachineInstr *NewOp;
bool Inv = false;
Expand Down
32 changes: 30 additions & 2 deletions llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include "AArch64GenRegisterBankInfo.def"

using namespace llvm;
static const unsigned CustomMappingID = 1;

AArch64RegisterBankInfo::AArch64RegisterBankInfo(
const TargetRegisterInfo &TRI) {
Expand Down Expand Up @@ -420,6 +421,27 @@ void AArch64RegisterBankInfo::applyMappingImpl(
MI.getOperand(2).setReg(Ext.getReg(0));
return applyDefaultMapping(OpdMapper);
}
case AArch64::G_DUP: {
// Extend smaller gpr to 32-bits
assert(MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() < 32 &&
"Expected sources smaller than 32-bits");
Builder.setInsertPt(*MI.getParent(), MI.getIterator());

Register ConstReg;
auto ConstMI = MRI.getVRegDef(MI.getOperand(1).getReg());
if (ConstMI->getOpcode() == TargetOpcode::G_CONSTANT) {
auto CstVal = ConstMI->getOperand(1).getCImm()->getValue();
ConstReg =
Builder.buildConstant(LLT::scalar(32), CstVal.sext(32)).getReg(0);
ConstMI->eraseFromParent();
} else {
ConstReg = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(1).getReg())
.getReg(0);
}
MRI.setRegBank(ConstReg, getRegBank(AArch64::GPRRegBankID));
MI.getOperand(1).setReg(ConstReg);
return applyDefaultMapping(OpdMapper);
}
default:
llvm_unreachable("Don't know how to handle that operation");
}
Expand Down Expand Up @@ -774,8 +796,13 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
(getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
onlyDefinesFP(*ScalarDef, MRI, TRI)))
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
else
else {
if (ScalarTy.getSizeInBits() < 32 &&
getRegBank(ScalarReg, MRI, TRI) == &AArch64::GPRRegBank)
// Calls applyMappingImpl()
MappingID = CustomMappingID;
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
}
break;
}
case TargetOpcode::G_TRUNC: {
Expand Down Expand Up @@ -992,7 +1019,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// type to i32 in applyMappingImpl.
LLT Ty = MRI.getType(MI.getOperand(2).getReg());
if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16)
MappingID = 1;
// Calls applyMappingImpl()
MappingID = CustomMappingID;
OpRegBankIdx[2] = PMI_FirstGPR;
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) {
;
; GISEL-LABEL: combine_vec_udiv_uniform:
; GISEL: // %bb.0:
; GISEL-NEXT: adrp x8, .LCPI0_0
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
; GISEL-NEXT: mov w8, #25645 // =0x642d
; GISEL-NEXT: dup v1.8h, w8
; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
Expand Down
75 changes: 42 additions & 33 deletions llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@ body: |

; CHECK-LABEL: name: v4s32_gpr
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
; CHECK: $q0 = COPY [[DUP]](<4 x s32>)
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $w0
%4:_(<4 x s32>) = G_DUP %0(s32)
$q0 = COPY %4(<4 x s32>)
Expand All @@ -37,10 +38,11 @@ body: |

; CHECK-LABEL: name: v4s64_gpr
; CHECK: liveins: $x0
; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0
; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $x0
%4:_(<2 x s64>) = G_DUP %0(s64)
$q0 = COPY %4(<2 x s64>)
Expand All @@ -58,10 +60,11 @@ body: |

; CHECK-LABEL: name: v2s32_gpr
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
; CHECK: $d0 = COPY [[DUP]](<2 x s32>)
; CHECK: RET_ReallyLR implicit $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(s32) = COPY $w0
%4:_(<2 x s32>) = G_DUP %0(s32)
$d0 = COPY %4(<2 x s32>)
Expand All @@ -79,10 +82,11 @@ body: |

; CHECK-LABEL: name: v4s32_fpr
; CHECK: liveins: $s0
; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
; CHECK: $q0 = COPY [[DUP]](<4 x s32>)
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $s0
%4:_(<4 x s32>) = G_DUP %0(s32)
$q0 = COPY %4(<4 x s32>)
Expand All @@ -100,10 +104,11 @@ body: |

; CHECK-LABEL: name: v2s64_fpr
; CHECK: liveins: $d0
; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $d0
%4:_(<2 x s64>) = G_DUP %0(s64)
$q0 = COPY %4(<2 x s64>)
Expand All @@ -121,10 +126,11 @@ body: |

; CHECK-LABEL: name: v2s32_fpr
; CHECK: liveins: $s0
; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
; CHECK: $d0 = COPY [[DUP]](<2 x s32>)
; CHECK: RET_ReallyLR implicit $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(s32) = COPY $s0
%4:_(<2 x s32>) = G_DUP %0(s32)
$d0 = COPY %4(<2 x s32>)
Expand All @@ -142,10 +148,11 @@ body: |

; CHECK-LABEL: name: v2s64_fpr_copy
; CHECK: liveins: $d0
; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $d0
%6:_(<2 x s64>) = G_DUP %0(s64)
$q0 = COPY %6(<2 x s64>)
Expand All @@ -163,11 +170,13 @@ body: |

; CHECK-LABEL: name: v416s8_gpr
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
; CHECK: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32)
; CHECK: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP %trunc(s8)
; CHECK: $q0 = COPY [[DUP]](<16 x s8>)
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
; CHECK-NEXT: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:gpr(s32) = G_ANYEXT %trunc(s8)
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP [[ANYEXT]](s32)
; CHECK-NEXT: $q0 = COPY [[DUP]](<16 x s8>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $w0
%trunc:_(s8) = G_TRUNC %0(s32)
%1:_(<16 x s8>) = G_DUP %trunc(s8)
Expand Down
34 changes: 9 additions & 25 deletions llvm/test/CodeGen/AArch64/aarch64-smull.ll
Original file line number Diff line number Diff line change
Expand Up @@ -994,9 +994,9 @@ define <8 x i16> @smull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
;
; CHECK-GI-LABEL: smull_noextvec_v8i8_v8i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI34_0
; CHECK-GI-NEXT: mov w8, #-999 // =0xfffffc19
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI34_0]
; CHECK-GI-NEXT: dup v1.8h, w8
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
%tmp3 = sext <8 x i8> %arg to <8 x i16>
Expand Down Expand Up @@ -1088,29 +1088,13 @@ define <8 x i16> @umull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind {

define <8 x i16> @umull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
; Do not use SMULL if the BUILD_VECTOR element values are too big.
; CHECK-NEON-LABEL: umull_noextvec_v8i8_v8i16:
; CHECK-NEON: // %bb.0:
; CHECK-NEON-NEXT: mov w8, #999 // =0x3e7
; CHECK-NEON-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEON-NEXT: dup v1.8h, w8
; CHECK-NEON-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-NEON-NEXT: ret
;
; CHECK-SVE-LABEL: umull_noextvec_v8i8_v8i16:
; CHECK-SVE: // %bb.0:
; CHECK-SVE-NEXT: mov w8, #999 // =0x3e7
; CHECK-SVE-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-SVE-NEXT: dup v1.8h, w8
; CHECK-SVE-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: umull_noextvec_v8i8_v8i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI38_0
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI38_0]
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
; CHECK-LABEL: umull_noextvec_v8i8_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #999 // =0x3e7
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: dup v1.8h, w8
; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%tmp3 = zext <8 x i8> %arg to <8 x i16>
%tmp4 = mul <8 x i16> %tmp3, <i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999>
ret <8 x i16> %tmp4
Expand Down
77 changes: 17 additions & 60 deletions llvm/test/CodeGen/AArch64/neon-mov.ll
Original file line number Diff line number Diff line change
Expand Up @@ -109,29 +109,11 @@ define <4 x i32> @movi4s_lsl16() {
}

define <4 x i32> @movi4s_fneg() {
; CHECK-NOFP16-SD-LABEL: movi4s_fneg:
; CHECK-NOFP16-SD: // %bb.0:
; CHECK-NOFP16-SD-NEXT: movi v0.4s, #240, lsl #8
; CHECK-NOFP16-SD-NEXT: fneg v0.4s, v0.4s
; CHECK-NOFP16-SD-NEXT: ret
;
; CHECK-FP16-SD-LABEL: movi4s_fneg:
; CHECK-FP16-SD: // %bb.0:
; CHECK-FP16-SD-NEXT: movi v0.4s, #240, lsl #8
; CHECK-FP16-SD-NEXT: fneg v0.4s, v0.4s
; CHECK-FP16-SD-NEXT: ret
;
; CHECK-NOFP16-GI-LABEL: movi4s_fneg:
; CHECK-NOFP16-GI: // %bb.0:
; CHECK-NOFP16-GI-NEXT: movi v0.4s, #240, lsl #8
; CHECK-NOFP16-GI-NEXT: fneg v0.4s, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: movi4s_fneg:
; CHECK-FP16-GI: // %bb.0:
; CHECK-FP16-GI-NEXT: movi v0.4s, #240, lsl #8
; CHECK-FP16-GI-NEXT: fneg v0.4s, v0.4s
; CHECK-FP16-GI-NEXT: ret
; CHECK-LABEL: movi4s_fneg:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v0.4s, #240, lsl #8
; CHECK-NEXT: fneg v0.4s, v0.4s
; CHECK-NEXT: ret
ret <4 x i32> <i32 2147545088, i32 2147545088, i32 2147545088, i32 2147545088>
}

Expand Down Expand Up @@ -308,23 +290,17 @@ define <8 x i16> @mvni8h_neg() {
; CHECK-NOFP16-SD-NEXT: dup v0.8h, w8
; CHECK-NOFP16-SD-NEXT: ret
;
; CHECK-FP16-SD-LABEL: mvni8h_neg:
; CHECK-FP16-SD: // %bb.0:
; CHECK-FP16-SD-NEXT: movi v0.8h, #240
; CHECK-FP16-SD-NEXT: fneg v0.8h, v0.8h
; CHECK-FP16-SD-NEXT: ret
; CHECK-FP16-LABEL: mvni8h_neg:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: movi v0.8h, #240
; CHECK-FP16-NEXT: fneg v0.8h, v0.8h
; CHECK-FP16-NEXT: ret
;
; CHECK-NOFP16-GI-LABEL: mvni8h_neg:
; CHECK-NOFP16-GI: // %bb.0:
; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI32_0
; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI32_0]
; CHECK-NOFP16-GI-NEXT: mov w8, #-32528 // =0xffff80f0
; CHECK-NOFP16-GI-NEXT: dup v0.8h, w8
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: mvni8h_neg:
; CHECK-FP16-GI: // %bb.0:
; CHECK-FP16-GI-NEXT: movi v0.8h, #240
; CHECK-FP16-GI-NEXT: fneg v0.8h, v0.8h
; CHECK-FP16-GI-NEXT: ret
ret <8 x i16> <i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008>
}

Expand Down Expand Up @@ -494,29 +470,11 @@ define <2 x double> @fmov2d() {
}

define <2 x double> @fmov2d_neg0() {
; CHECK-NOFP16-SD-LABEL: fmov2d_neg0:
; CHECK-NOFP16-SD: // %bb.0:
; CHECK-NOFP16-SD-NEXT: movi v0.2d, #0000000000000000
; CHECK-NOFP16-SD-NEXT: fneg v0.2d, v0.2d
; CHECK-NOFP16-SD-NEXT: ret
;
; CHECK-FP16-SD-LABEL: fmov2d_neg0:
; CHECK-FP16-SD: // %bb.0:
; CHECK-FP16-SD-NEXT: movi v0.2d, #0000000000000000
; CHECK-FP16-SD-NEXT: fneg v0.2d, v0.2d
; CHECK-FP16-SD-NEXT: ret
;
; CHECK-NOFP16-GI-LABEL: fmov2d_neg0:
; CHECK-NOFP16-GI: // %bb.0:
; CHECK-NOFP16-GI-NEXT: movi v0.2d, #0000000000000000
; CHECK-NOFP16-GI-NEXT: fneg v0.2d, v0.2d
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: fmov2d_neg0:
; CHECK-FP16-GI: // %bb.0:
; CHECK-FP16-GI-NEXT: movi v0.2d, #0000000000000000
; CHECK-FP16-GI-NEXT: fneg v0.2d, v0.2d
; CHECK-FP16-GI-NEXT: ret
; CHECK-LABEL: fmov2d_neg0:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: fneg v0.2d, v0.2d
; CHECK-NEXT: ret
ret <2 x double> <double -0.0, double -0.0>
}

Expand Down Expand Up @@ -581,5 +539,4 @@ define <2 x i32> @movi1d() {
ret <2 x i32> %1
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-FP16: {{.*}}
; CHECK-NOFP16: {{.*}}
Loading