diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index bd802bd4b173a..477b31cb776e6 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4129,6 +4129,12 @@ class TargetLowering : public TargetLoweringBase { return true; } + /// GlobalISel - return true if it's profitable to perform the combine: + /// shl ([sza]ext x), y => zext (shl x, y) + virtual bool isDesirableToPullExtFromShl(const MachineInstr &MI) const { + return true; + } + // Return AndOrSETCCFoldKind::{AddAnd, ABS} if its desirable to try and // optimize LogicOp(SETCC0, SETCC1). An example (what is implemented as of // writing this) is: diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 2ce6895042409..f79944e824575 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1719,6 +1719,8 @@ void CombinerHelper::applyCombineMulToShl(MachineInstr &MI, bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData) { assert(MI.getOpcode() == TargetOpcode::G_SHL && KB); + if (!getTargetLowering().isDesirableToPullExtFromShl(MI)) + return false; Register LHS = MI.getOperand(1).getReg(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index e015f68dabc69..bdde4b5e8e00f 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -690,6 +690,10 @@ class AArch64TargetLowering : public TargetLowering { bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override; + bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override { + return false; + } + /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. bool isDesirableToCommuteXorWithShift(const SDNode *N) const override; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/no-reduce-shl-of-ext.ll b/llvm/test/CodeGen/AArch64/GlobalISel/no-reduce-shl-of-ext.ll new file mode 100644 index 0000000000000..ab009cb7cc0e3 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/no-reduce-shl-of-ext.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc %s -verify-machineinstrs -mtriple aarch64-apple-darwin -global-isel -o - | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +%struct.mszip_stream = type { i32, i32, i8, i32, ptr, i32, i32, i32, i32, ptr, ptr, ptr, ptr, ptr, i32, i32, i32, [288 x i8], [32 x i8], [1152 x i16], [128 x i16], [32768 x i8], ptr, ptr } + +define i16 @test(i32 %bit_buffer.6.lcssa, ptr %zip, ptr %.out) { +; CHECK-LABEL: test: +; CHECK: ; %bb.0: +; CHECK-NEXT: and w8, w0, #0x1ff +; CHECK-NEXT: add x8, x1, w8, uxtw #1 +; CHECK-NEXT: ldrh w0, [x8, #412] +; CHECK-NEXT: ret + %and274 = and i32 %bit_buffer.6.lcssa, 511 + %idxprom275 = zext i32 %and274 to i64 + %arrayidx276 = getelementptr inbounds %struct.mszip_stream, ptr %zip, i64 0, i32 19, i64 %idxprom275 + %ld = load i16, ptr %arrayidx276, align 2 + ret i16 %ld +}