diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index bd802bd4b173a..477b31cb776e6 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4129,6 +4129,12 @@ class TargetLowering : public TargetLoweringBase {
     return true;
   }
 
+  /// GlobalISel - return true if it's profitable to perform the combine:
+  /// shl ([sza]ext x), y => zext (shl x, y)
+  virtual bool isDesirableToPullExtFromShl(const MachineInstr &MI) const {
+    return true;
+  }
+
   // Return AndOrSETCCFoldKind::{AddAnd, ABS} if its desirable to try and
   // optimize LogicOp(SETCC0, SETCC1). An example (what is implemented as of
   // writing this) is:
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 2ce6895042409..f79944e824575 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1719,6 +1719,8 @@ void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
 bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
                                              RegisterImmPair &MatchData) {
   assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
+  if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
+    return false;
 
   Register LHS = MI.getOperand(1).getReg();
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index e015f68dabc69..bdde4b5e8e00f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -690,6 +690,10 @@ class AArch64TargetLowering : public TargetLowering {
   bool isDesirableToCommuteWithShift(const SDNode *N,
                                      CombineLevel Level) const override;
 
+  bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override {
+    return false;
+  }
+
   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
   bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/no-reduce-shl-of-ext.ll b/llvm/test/CodeGen/AArch64/GlobalISel/no-reduce-shl-of-ext.ll
new file mode 100644
index 0000000000000..ab009cb7cc0e3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/no-reduce-shl-of-ext.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc %s -verify-machineinstrs -mtriple aarch64-apple-darwin -global-isel -o - | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+%struct.mszip_stream = type { i32, i32, i8, i32, ptr, i32, i32, i32, i32, ptr, ptr, ptr, ptr, ptr, i32, i32, i32, [288 x i8], [32 x i8], [1152 x i16], [128 x i16], [32768 x i8], ptr, ptr }
+
+define i16 @test(i32 %bit_buffer.6.lcssa, ptr %zip, ptr %.out) {
+; CHECK-LABEL: test:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    and w8, w0, #0x1ff
+; CHECK-NEXT:    add x8, x1, w8, uxtw #1
+; CHECK-NEXT:    ldrh w0, [x8, #412]
+; CHECK-NEXT:    ret
+  %and274 = and i32 %bit_buffer.6.lcssa, 511
+  %idxprom275 = zext i32 %and274 to i64
+  %arrayidx276 = getelementptr inbounds %struct.mszip_stream, ptr %zip, i64 0, i32 19, i64 %idxprom275
+  %ld = load i16, ptr %arrayidx276, align 2
+  ret i16 %ld
+}