From 8645bdc6e551feec3602593e74ca30847f0f565e Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 10 Aug 2025 13:08:06 +0100 Subject: [PATCH 1/5] [LV] Use shl for (VFxUF * vscale) when creating minimum iter check. Directly emit shl instead of a multiply if VFxUF is a power-of-2. The main motivation here is to prepare the code and test for directly generating and expanding a SCEV expression of the minimum iteration count. SCEVExpander will directly emit shl for multiplies with powers-of-2. InstCombine will also performs this combine, so end-to-end this should effectively by NFC. --- .../Transforms/Vectorize/LoopVectorize.cpp | 19 +++-- .../AArch64/divs-with-scalable-vfs.ll | 4 +- .../AArch64/eliminate-tail-predication.ll | 2 +- .../gather-do-not-vectorize-addressing.ll | 2 +- .../AArch64/induction-costs-sve.ll | 2 +- .../AArch64/interleave-with-gaps.ll | 4 +- .../AArch64/interleaving-reduction.ll | 2 +- .../AArch64/low_trip_count_predicates.ll | 8 +-- .../LoopVectorize/AArch64/masked-call.ll | 2 +- .../AArch64/outer_loop_prefer_scalable.ll | 28 ++++---- .../AArch64/partial-reduce-chained.ll | 36 +++++----- .../partial-reduce-dot-product-mixed.ll | 8 +-- .../AArch64/partial-reduce-dot-product.ll | 58 +++++++-------- .../AArch64/partial-reduce-sub.ll | 6 +- .../LoopVectorize/AArch64/partial-reduce.ll | 16 ++--- .../AArch64/pr60831-sve-inv-store-crash.ll | 2 +- .../AArch64/reduction-recurrence-costs-sve.ll | 6 +- .../AArch64/scalable-avoid-scalarization.ll | 2 +- .../AArch64/scalable-reduction-inloop-cond.ll | 4 +- .../AArch64/scalable-strict-fadd.ll | 30 ++++---- .../AArch64/simple_early_exit.ll | 4 +- .../AArch64/single-early-exit-interleave.ll | 2 +- .../LoopVectorize/AArch64/store-costs-sve.ll | 4 +- .../sve-epilog-vect-inloop-reductions.ll | 2 +- .../AArch64/sve-epilog-vect-reductions.ll | 2 +- .../sve-epilog-vect-strict-reductions.ll | 2 +- .../LoopVectorize/AArch64/sve-epilog-vect.ll | 32 ++++----- .../LoopVectorize/AArch64/sve-fneg.ll | 2 +- .../LoopVectorize/AArch64/sve-inv-store.ll | 4 +- .../AArch64/sve-live-out-pointer-induction.ll | 2 +- .../LoopVectorize/AArch64/sve-multiexit.ll | 4 +- .../sve-runtime-check-size-based-threshold.ll | 2 +- .../LoopVectorize/AArch64/sve-tail-folding.ll | 2 +- .../AArch64/sve-vscale-based-trip-counts.ll | 14 ++-- .../AArch64/sve-widen-extractvalue.ll | 2 +- .../LoopVectorize/AArch64/sve-widen-gep.ll | 4 +- .../AArch64/tail-folding-styles.ll | 2 +- .../LoopVectorize/RISCV/dead-ops-cost.ll | 2 +- .../first-order-recurrence-scalable-vf1.ll | 3 +- .../LoopVectorize/RISCV/fminimumnum.ll | 16 ++--- .../LoopVectorize/RISCV/inloop-reduction.ll | 8 +-- .../Transforms/LoopVectorize/RISCV/lmul.ll | 3 +- .../RISCV/partial-reduce-dot-product.ll | 16 ++--- .../RISCV/riscv-vector-reverse.ll | 8 +-- .../LoopVectorize/RISCV/scalable-tailfold.ll | 2 +- .../RISCV/tail-folding-bin-unary-ops-args.ll | 72 +++++++++---------- .../RISCV/tail-folding-call-intrinsics.ll | 52 +++++++------- .../RISCV/tail-folding-cast-intrinsics.ll | 38 +++++----- .../RISCV/tail-folding-cond-reduction.ll | 16 ++--- .../LoopVectorize/RISCV/tail-folding-div.ll | 8 +-- .../tail-folding-fixed-order-recurrence.ll | 12 ++-- .../RISCV/tail-folding-gather-scatter.ll | 2 +- .../RISCV/tail-folding-inloop-reduction.ll | 28 ++++---- .../RISCV/tail-folding-interleave.ll | 14 ++-- .../RISCV/tail-folding-intermediate-store.ll | 4 +- .../LoopVectorize/RISCV/tail-folding-iv32.ll | 2 +- .../RISCV/tail-folding-masked-loadstore.ll | 2 +- .../RISCV/tail-folding-ordered-reduction.ll | 2 +- .../RISCV/tail-folding-reduction.ll | 28 ++++---- .../RISCV/tail-folding-reverse-load-store.ll | 4 +- .../RISCV/tail-folding-safe-dep-distance.ll | 4 +- .../LoopVectorize/RISCV/uniform-load-store.ll | 4 +- .../RISCV/vectorize-vp-intrinsics.ll | 2 +- .../first-order-recurrence-scalable-vf1.ll | 6 +- .../LoopVectorize/outer_loop_scalable.ll | 2 +- .../LoopVectorize/scalable-assume.ll | 6 +- .../scalable-first-order-recurrence.ll | 24 +++---- .../LoopVectorize/scalable-iv-outside-user.ll | 2 +- .../LoopVectorize/scalable-lifetime.ll | 4 +- ...able-loop-unpredicated-body-scalar-tail.ll | 6 +- .../LoopVectorize/scalable-predication.ll | 2 +- .../scalable-reduction-inloop.ll | 2 +- .../scalable-trunc-min-bitwidth.ll | 4 +- .../vectorize-force-tail-with-evl.ll | 5 +- 74 files changed, 379 insertions(+), 363 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 65ab8211e68c3..53f59e52771d1 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2305,16 +2305,27 @@ Value *InnerLoopVectorizer::createIterationCountCheck(ElementCount VF, Type *CountTy = Count->getType(); Value *CheckMinIters = Builder.getFalse(); auto CreateStep = [&]() -> Value * { + ElementCount VFTimesUF = VF.multiplyCoefficientBy(UF); + Value *VFxUF = nullptr; + if (!VFTimesUF.isScalable() || + !isPowerOf2_64(VFTimesUF.getKnownMinValue())) { + VFxUF = createStepForVF(Builder, CountTy, VF, UF); + } else { + VFxUF = Builder.CreateShl( + Builder.CreateVScale(CountTy), + ConstantInt::get(CountTy, Log2_64(VFTimesUF.getKnownMinValue())), "", + true); + } + // Create step with max(MinProTripCount, UF * VF). - if (UF * VF.getKnownMinValue() >= MinProfitableTripCount.getKnownMinValue()) - return createStepForVF(Builder, CountTy, VF, UF); + if (ElementCount::isKnownGE(VFTimesUF, MinProfitableTripCount)) + return VFxUF; Value *MinProfTC = createStepForVF(Builder, CountTy, MinProfitableTripCount, 1); if (!VF.isScalable()) return MinProfTC; - return Builder.CreateBinaryIntrinsic( - Intrinsic::umax, MinProfTC, createStepForVF(Builder, CountTy, VF, UF)); + return Builder.CreateBinaryIntrinsic(Intrinsic::umax, MinProfTC, VFxUF); }; TailFoldingStyle Style = Cost->getTailFoldingStyle(); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index bf72fea73d40b..4ecda056e6910 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -7,8 +7,8 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-SAME: ptr [[DST:%.*]], i32 [[X:%.*]], i64 [[M:%.*]], i64 [[CONV6:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[CONV61:%.*]] = zext i32 [[X]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP10]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP1]]) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll index 6a592edfc1d64..7363f86cdab7a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll @@ -10,7 +10,7 @@ define void @f1(ptr %A) #0 { ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll index a8d0b37cac3c9..4ddf51b9bdd58 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll @@ -72,7 +72,7 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur ; SVE: for.body.preheader: ; SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 ; SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; SVE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SVE: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index 3cef1f6e03ff9..b821593f7845f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -13,7 +13,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[DST1:%.*]] = ptrtoint ptr [[DST]] to i64 ; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 ; DEFAULT-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 16 +; DEFAULT-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 4 ; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; DEFAULT: [[VECTOR_MEMCHECK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll index 1c4b62183d939..20eb884261572 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll @@ -11,7 +11,7 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3 ; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ITER_CHECK:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 17, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: @@ -147,7 +147,7 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no ; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ITER_CHECK:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 17, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll index fa8d17c5c28fc..e018b2016c915 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll @@ -137,7 +137,7 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-4-VLA-LABEL: @interleave_integer_reduction( ; INTERLEAVE-4-VLA-NEXT: entry: ; INTERLEAVE-4-VLA-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; INTERLEAVE-4-VLA-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; INTERLEAVE-4-VLA-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; INTERLEAVE-4-VLA-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INTERLEAVE-4-VLA: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll index bbc2e324941a1..635efad8c499a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll @@ -49,7 +49,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-VS1-NEXT: [[TMP3:%.*]] = sub i64 20, [[TMP2]] ; CHECK-VS1-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VS1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 +; CHECK-VS1-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 3 ; CHECK-VS1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP5]] ; CHECK-VS1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] ; CHECK-VS1: [[VECTOR_SCEVCHECK]]: @@ -64,7 +64,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS1-NEXT: br i1 [[TMP13]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; CHECK-VS1: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: ; CHECK-VS1-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VS1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 16 +; CHECK-VS1-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 4 ; CHECK-VS1-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP3]], [[TMP15]] ; CHECK-VS1-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VS1: [[VECTOR_PH]]: @@ -149,7 +149,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS2-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-VS2-NEXT: [[TMP3:%.*]] = sub i64 20, [[TMP2]] ; CHECK-VS2-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VS2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-VS2-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2 ; CHECK-VS2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP5]] ; CHECK-VS2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] ; CHECK-VS2: [[VECTOR_SCEVCHECK]]: @@ -164,7 +164,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS2-NEXT: br i1 [[TMP13]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; CHECK-VS2: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: ; CHECK-VS2-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VS2-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 8 +; CHECK-VS2-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 3 ; CHECK-VS2-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP3]], [[TMP15]] ; CHECK-VS2-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VS2: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 7028678b338f0..8e6ac48a5cbc8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -904,7 +904,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFNONE-NEXT: [[ENTRY:.*]]: ; TFNONE-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 ; TFNONE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 2 +; TFNONE-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 1 ; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] ; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; TFNONE: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll index f284afc38788a..87a18ba2c18ea 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll @@ -10,7 +10,7 @@ define void @foo() { ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -32,22 +32,22 @@ define void @foo() { ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP7]], i32 4, splat (i1 true), poison) ; CHECK-NEXT: br label [[INNER_LOOP1:%.*]] ; CHECK: inner_loop1: -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP10:%.*]], [[INNER_LOOP1]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi [ [[WIDE_MASKED_GATHER]], [[VECTOR_BODY]] ], [ [[TMP9:%.*]], [[INNER_LOOP1]] ] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [512 x float], ptr @B, i64 0, [[VEC_PHI]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP8]], i32 4, splat (i1 true), poison) -; CHECK-NEXT: [[TMP9]] = fmul [[VEC_PHI2]], [[WIDE_MASKED_GATHER3]] -; CHECK-NEXT: [[TMP10]] = add nuw nsw [[VEC_PHI]], splat (i64 1) -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq [[TMP10]], splat (i64 512) -; CHECK-NEXT: [[TMP12:%.*]] = extractelement [[TMP11]], i32 0 -; CHECK-NEXT: br i1 [[TMP12]], label [[VECTOR_LATCH]], label [[INNER_LOOP1]] +; CHECK-NEXT: [[TMP8:%.*]] = phi [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP12:%.*]], [[INNER_LOOP1]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi [ [[WIDE_MASKED_GATHER]], [[VECTOR_BODY]] ], [ [[TMP11:%.*]], [[INNER_LOOP1]] ] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [512 x float], ptr @B, i64 0, [[TMP8]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP10]], i32 4, splat (i1 true), poison) +; CHECK-NEXT: [[TMP11]] = fmul [[TMP9]], [[WIDE_MASKED_GATHER2]] +; CHECK-NEXT: [[TMP12]] = add nuw nsw [[TMP8]], splat (i64 1) +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq [[TMP12]], splat (i64 512) +; CHECK-NEXT: [[TMP14:%.*]] = extractelement [[TMP13]], i32 0 +; CHECK-NEXT: br i1 [[TMP14]], label [[VECTOR_LATCH]], label [[INNER_LOOP1]] ; CHECK: vector.latch: -; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi [ [[TMP9]], [[INNER_LOOP1]] ] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[VEC_PHI4]], [[TMP7]], i32 4, splat (i1 true)) +; CHECK-NEXT: [[TMP15:%.*]] = phi [ [[TMP11]], [[INNER_LOOP1]] ] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[TMP15]], [[TMP7]], i32 4, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll index 7232fe5f019f2..330c22d736809 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll @@ -53,7 +53,7 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: @@ -94,7 +94,7 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: @@ -205,7 +205,7 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: @@ -246,7 +246,7 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: @@ -357,7 +357,7 @@ define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: @@ -398,7 +398,7 @@ define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: @@ -513,7 +513,7 @@ define i32 @chained_partial_reduce_sub_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: @@ -554,7 +554,7 @@ define i32 @chained_partial_reduce_sub_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: @@ -671,7 +671,7 @@ define i32 @chained_partial_reduce_add_add_add(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: @@ -714,7 +714,7 @@ define i32 @chained_partial_reduce_add_add_add(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: @@ -835,7 +835,7 @@ define i32 @chained_partial_reduce_sub_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: @@ -878,7 +878,7 @@ define i32 @chained_partial_reduce_sub_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: @@ -997,7 +997,7 @@ define i32 @chained_partial_reduce_madd_extadd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: @@ -1037,7 +1037,7 @@ define i32 @chained_partial_reduce_madd_extadd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: @@ -1141,7 +1141,7 @@ define i32 @chained_partial_reduce_extadd_extadd(ptr %a, ptr %b, i32 %N) #0 { ; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: @@ -1177,7 +1177,7 @@ define i32 @chained_partial_reduce_extadd_extadd(ptr %a, ptr %b, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: @@ -1278,7 +1278,7 @@ define i32 @chained_partial_reduce_extadd_madd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: @@ -1318,7 +1318,7 @@ define i32 @chained_partial_reduce_extadd_madd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll index 368cb18e625f1..e14a32ff0fdb2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll @@ -10,7 +10,7 @@ define i32 @sudot(ptr %a, ptr %b) #0 { ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -56,7 +56,7 @@ define i32 @sudot(ptr %a, ptr %b) #0 { ; CHECK-NOI8MM-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NOI8MM-NEXT: entry: ; CHECK-NOI8MM-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NOI8MM-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-NOI8MM-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-NOI8MM-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-NOI8MM: vector.ph: ; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -125,7 +125,7 @@ define i32 @usdot(ptr %a, ptr %b) #0 { ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -171,7 +171,7 @@ define i32 @usdot(ptr %a, ptr %b) #0 { ; CHECK-NOI8MM-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-NOI8MM-NEXT: entry: ; CHECK-NOI8MM-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NOI8MM-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-NOI8MM-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-NOI8MM-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-NOI8MM: vector.ph: ; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index c1725cf498b74..e55cb19bbccf3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -11,7 +11,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-INTERLEAVE1-NEXT: entry: ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -43,7 +43,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-INTERLEAVED-NEXT: entry: ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -89,7 +89,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -144,7 +144,7 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b ; CHECK-INTERLEAVE1-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-INTERLEAVE1-NEXT: entry: ; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP7]], 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP7]], 1 ; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() @@ -178,7 +178,7 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b ; CHECK-INTERLEAVED-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-INTERLEAVED-NEXT: entry: ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP7]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP7]], 2 ; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() @@ -226,7 +226,7 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b ; CHECK-MAXBW-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -285,7 +285,7 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly % ; CHECK-INTERLEAVE1-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1]] { ; CHECK-INTERLEAVE1-NEXT: entry: ; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP7]], 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP7]], 1 ; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() @@ -323,7 +323,7 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly % ; CHECK-INTERLEAVED-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1]] { ; CHECK-INTERLEAVED-NEXT: entry: ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP7]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP7]], 2 ; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() @@ -375,7 +375,7 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly % ; CHECK-MAXBW-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1]] { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -794,7 +794,7 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-INTERLEAVE1-NEXT: entry: ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -838,7 +838,7 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-INTERLEAVED-NEXT: entry: ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -888,7 +888,7 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -955,7 +955,7 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-INTERLEAVE1-NEXT: entry: ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -993,7 +993,7 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-INTERLEAVED-NEXT: entry: ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -1037,7 +1037,7 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -1098,7 +1098,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-SAME: i32 [[NUM_OUT:%.*]], i64 [[NUM_IN:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-INTERLEAVE1-NEXT: entry: ; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP13]], 4 +; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP13]], 2 ; CHECK-INTERLEAVE1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUM_IN]], [[TMP15]] ; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: @@ -1164,7 +1164,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-SAME: i32 [[NUM_OUT:%.*]], i64 [[NUM_IN:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-INTERLEAVED-NEXT: entry: ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP13]], 8 +; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP13]], 3 ; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUM_IN]], [[TMP15]] ; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: @@ -1286,7 +1286,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-MAXBW-SAME: i32 [[NUM_OUT:%.*]], i64 [[NUM_IN:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUM_IN]], [[TMP1]] ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: @@ -1542,7 +1542,7 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-INTERLEAVE1-NEXT: entry: ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() @@ -1578,7 +1578,7 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-INTERLEAVED-NEXT: entry: ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() @@ -1628,7 +1628,7 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -1688,7 +1688,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-INTERLEAVE1-NEXT: entry: ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -1721,7 +1721,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-INTERLEAVED-NEXT: entry: ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 41, [[TMP1]] ; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: @@ -1769,7 +1769,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 41, [[TMP1]] ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: @@ -2076,7 +2076,7 @@ define i64 @not_dotp_ext_outside_plan(ptr %a, i16 %b, i64 %n) #0 { ; CHECK-MAXBW: for.ph: ; CHECK-MAXBW-NEXT: [[EXT_B:%.*]] = zext i16 [[B]] to i64 ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: @@ -2209,7 +2209,7 @@ define i64 @not_dotp_ext_outside_plan2(ptr %a, i16 %b, i64 %n) #0 { ; CHECK-MAXBW: for.ph: ; CHECK-MAXBW-NEXT: [[EXT_B:%.*]] = zext i16 [[B]] to i64 ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: @@ -2271,7 +2271,7 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-INTERLEAVE1: for.body.preheader: ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 ; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 1 ; CHECK-INTERLEAVE1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] ; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: @@ -2312,7 +2312,7 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-INTERLEAVED: for.body.preheader: ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2 ; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] ; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: @@ -2367,7 +2367,7 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-MAXBW: for.body.preheader: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 ; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 8 +; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3 ; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll index 17da2afa5a2d1..6d0d6f67b7397 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll @@ -11,7 +11,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-INTERLEAVE1-NEXT: entry: ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -44,7 +44,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-INTERLEAVED-NEXT: entry: ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -92,7 +92,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll index 9133798b270fd..c3a7f2c477db8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll @@ -61,7 +61,7 @@ define i32 @zext_add_reduc_i8_i32_sve(ptr %a) #0 { ; CHECK-MAXBW-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -245,7 +245,7 @@ define i64 @zext_add_reduc_i8_i64(ptr %a) #0 { ; CHECK-MAXBW-SAME: ptr [[A:%.*]]) #[[ATTR0]] { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -341,7 +341,7 @@ define i64 @zext_add_reduc_i16_i64(ptr %a) #0 { ; CHECK-MAXBW-SAME: ptr [[A:%.*]]) #[[ATTR0]] { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -437,7 +437,7 @@ define i32 @zext_add_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-MAXBW-SAME: ptr [[A:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -719,7 +719,7 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-MAXBW-SAME: ptr [[A:%.*]]) #[[ATTR2]] { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -815,7 +815,7 @@ define i32 @sext_add_reduc_i8_i32(ptr %a) #0 { ; CHECK-MAXBW-SAME: ptr [[A:%.*]]) #[[ATTR0]] { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -932,7 +932,7 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 { ; CHECK-MAXBW-NEXT: [[CONV1:%.*]] = zext i8 [[C]] to i32 ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = sub i32 1024, [[D]] ; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = mul nuw i32 [[TMP1]], 16 +; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 4 ; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], [[TMP2]] ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: @@ -1053,7 +1053,7 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = sub i32 1024, [[D]] ; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = mul nuw i32 [[TMP1]], 16 +; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 4 ; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], [[TMP2]] ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll index 3b43d528b28bd..94172dea8ee6a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll @@ -9,7 +9,7 @@ define void @test_invar_gep(ptr %dst) #0 { ; CHECK-LABEL: @test_invar_gep( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index 29693785964d2..1d75b69722fb6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -51,7 +51,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; VSCALEFORTUNING2-NEXT: [[ENTRY:.*]]: ; VSCALEFORTUNING2-NEXT: [[TMP0:%.*]] = add i64 [[Y]], 1 ; VSCALEFORTUNING2-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VSCALEFORTUNING2-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 8 +; VSCALEFORTUNING2-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3 ; VSCALEFORTUNING2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] ; VSCALEFORTUNING2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VSCALEFORTUNING2: [[VECTOR_PH]]: @@ -323,7 +323,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[ENTRY:.*]]: ; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 ; DEFAULT-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 8 +; DEFAULT-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3 ; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: @@ -379,7 +379,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; VSCALEFORTUNING2-NEXT: [[ENTRY:.*]]: ; VSCALEFORTUNING2-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 ; VSCALEFORTUNING2-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VSCALEFORTUNING2-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 8 +; VSCALEFORTUNING2-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3 ; VSCALEFORTUNING2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] ; VSCALEFORTUNING2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VSCALEFORTUNING2: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll index 8d8d427636029..62971b5ea3f88 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll @@ -15,7 +15,7 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 [[TMP0]]) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[IDX]] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], [[TMP3]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll index c78f68f4f7f0b..92b2a44967cb8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll @@ -6,7 +6,7 @@ define float @cond_fadd(ptr noalias nocapture readonly %a, ptr noalias nocapture ; CHECK-LABEL: @cond_fadd( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -88,7 +88,7 @@ define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) { ; CHECK-LABEL: @cond_cmp_sel( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index d906918a1a741..060a0ded11f21 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -34,7 +34,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-UNORDERED-NEXT: entry: ; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: @@ -77,7 +77,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-ORDERED-NEXT: entry: ; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: @@ -202,7 +202,7 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-UNORDERED-NEXT: entry: ; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32 +; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 ; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: @@ -266,7 +266,7 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-ORDERED-NEXT: entry: ; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32 +; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 ; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: @@ -468,7 +468,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 ; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1 ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]] ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: @@ -536,7 +536,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 ; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1 ; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]] ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: @@ -725,7 +725,7 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-UNORDERED-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK-UNORDERED: for.body.preheader: ; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4 +; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2 ; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: @@ -782,7 +782,7 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK-ORDERED: for.body.preheader: ; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4 +; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2 ; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: @@ -945,7 +945,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-UNORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-UNORDERED-NEXT: entry: ; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: @@ -1000,7 +1000,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-ORDERED-NEXT: entry: ; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: @@ -1165,7 +1165,7 @@ define float @fadd_multiple(ptr noalias nocapture %a, ptr noalias nocapture %b, ; CHECK-UNORDERED-SAME: (ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-UNORDERED-NEXT: entry: ; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: @@ -1300,7 +1300,7 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-UNORDERED-NEXT: entry: ; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32 +; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 ; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: @@ -1380,7 +1380,7 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-ORDERED-NEXT: entry: ; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32 +; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 ; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: @@ -1610,7 +1610,7 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-UNORDERED-NEXT: entry: ; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32 +; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 ; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: @@ -1690,7 +1690,7 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-ORDERED-NEXT: entry: ; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32 +; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 ; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll index 4a12aef61f607..1ff59bd3a4c15 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll @@ -14,7 +14,7 @@ define i64 @same_exit_block_pre_inc_use1() #1 { ; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) ; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -254,7 +254,7 @@ define i64 @loop_contains_safe_div() #1 { ; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) ; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 +; CHECK-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 2 ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP12]]) ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll index cc838194309f4..e33b89dc4271e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll @@ -14,7 +14,7 @@ define i64 @same_exit_block_pre_inc_use1() #0 { ; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) ; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 64 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 6 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 510, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index c6e10c50c6f44..48c9941c842cc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -10,12 +10,12 @@ define void @cost_store_i8(ptr %dst) #0 { ; DEFAULT-SAME: ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] { ; DEFAULT-NEXT: iter.check: ; DEFAULT-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; DEFAULT-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 101, [[TMP1]] ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; DEFAULT: vector.main.loop.iter.check: ; DEFAULT-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 +; DEFAULT-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5 ; DEFAULT-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 101, [[TMP3]] ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; DEFAULT: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll index d89c5257a1303..c018c82476ceb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll @@ -11,7 +11,7 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll index 5e225328466dd..b0367fb152bff 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll @@ -11,7 +11,7 @@ define i64 @int_reduction_add(ptr %a, i64 %N) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll index 52117e3f03752..bcae394ffdb2e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll @@ -11,7 +11,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll index a521bfad4a87c..4ebdf1f23612f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -22,12 +22,12 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-LABEL: @main_vf_vscale_x_16( ; CHECK-NEXT: iter.check: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5 ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 1024, [[TMP3]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -83,7 +83,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK-VF8: vector.main.loop.iter.check: ; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32 +; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 ; CHECK-VF8-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: @@ -146,7 +146,7 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) { ; CHECK-LABEL: @main_vf_vscale_x_2_no_epi_iteration( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -178,7 +178,7 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK-VF8: vector.main.loop.iter.check: ; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-VF8-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: @@ -258,7 +258,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -311,7 +311,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK-VF8: vector.main.loop.iter.check: ; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-VF8-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: @@ -379,12 +379,12 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-LABEL: @test_pr57912_pointer_induction( ; CHECK-NEXT: iter.check: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 10000, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5 ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 10000, [[TMP3]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -443,7 +443,7 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK-VF8: vector.main.loop.iter.check: ; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32 +; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 ; CHECK-VF8-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 10000, [[TMP1]] ; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: @@ -513,11 +513,11 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-NEXT: [[V:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[N:%.*]] = mul nuw nsw i64 [[V]], 1033 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP3]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -590,7 +590,7 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-VF8-NEXT: [[V:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-VF8-NEXT: [[N:%.*]] = mul nuw nsw i64 [[V]], 1033 ; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-VF8-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: ; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -659,11 +659,11 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-NEXT: [[V:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[N:%.*]] = mul nuw nsw i64 [[V]], 1024 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP3]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -736,7 +736,7 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-VF8-NEXT: [[V:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-VF8-NEXT: [[N:%.*]] = mul nuw nsw i64 [[V]], 1024 ; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-VF8-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: ; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll index 863dae7a5593a..f499665d322b3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll @@ -16,7 +16,7 @@ define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef read ; CHECK: for.body.preheader: ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll index c8bbbdc64c631..7dc6a6aa890f9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll @@ -7,7 +7,7 @@ define void @inv_store_i16(ptr noalias %dst, ptr noalias readonly %src, i64 %N) ; CHECK-LABEL: @inv_store_i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -54,7 +54,7 @@ define void @cond_inv_store_i32(ptr noalias %dst, ptr noalias readonly %src, i64 ; CHECK-LABEL: @cond_inv_store_i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll index 76f33cfd744ec..ff0c430a7a12a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll @@ -11,7 +11,7 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) { ; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP5]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll index 26a164967add9..4610860538e0b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll @@ -16,7 +16,7 @@ define void @multiple_exits_unique_exit_block(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[N:%.*]], i32 999) ; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i32 [[TMP1]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 3 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: @@ -86,7 +86,7 @@ define i32 @multiple_exits_multiple_exit_blocks(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[N:%.*]], i32 999) ; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i32 [[TMP1]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 3 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll index b8f44f691d447..034285204cc9b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll @@ -14,7 +14,7 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr ; CHECK-NEXT: [[DST_21:%.*]] = ptrtoint ptr [[DST_2:%.*]] to i64 ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 20, i64 [[TMP1]]) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index d4ffb0310fa2e..fcc7886a71cee 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -537,7 +537,7 @@ define void @simple_memset_trip1024(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-LABEL: @simple_memset_trip1024( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll index db941a33c4ac2..020308a4713b4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll @@ -10,7 +10,7 @@ define void @vscale_mul_4(ptr noalias noundef readonly captures(none) %a, ptr no ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]] @@ -63,7 +63,7 @@ define void @vscale_mul_8(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[MUL1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]] @@ -129,7 +129,7 @@ define void @vscale_mul_12(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 12 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -197,7 +197,7 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 31 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -278,7 +278,7 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 64 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -361,7 +361,7 @@ define void @trip_count_with_overflow(ptr noalias noundef readonly captures(none ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], [[TMP3]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -443,7 +443,7 @@ define void @trip_count_too_big_for_element_count(ptr noalias noundef readonly c ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 32 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll index b007db973ba5c..0c6a490ddf4ba 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll @@ -8,7 +8,7 @@ define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 { ; CHECK-SAME: ptr [[DST:%.*]], { i64, i64 } [[SV:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i32 [[TMP0]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 1000, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index 1012c100f9c83..1dd8dd531e172 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -18,7 +18,7 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK-LABEL: @pointer_induction_used_as_vector( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -102,7 +102,7 @@ define void @pointer_induction(ptr noalias %start, i64 %N) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll index 6afeb4836ffda..4d816b23ef93f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll @@ -14,7 +14,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; NONE-NEXT: entry: ; NONE-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) ; NONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NONE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NONE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], [[TMP1]] ; NONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NONE: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index 4b7c385d34959..29d901f084bdb 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -84,7 +84,7 @@ define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) { ; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i32 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umax.i32(i32 6, i32 [[TMP1]]) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 252, [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll index 7eb3d7fc5a36d..62f101e3a8d7f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll @@ -9,7 +9,8 @@ define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 { ; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 0 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll b/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll index 4ab95397579c2..8b6db703d8274 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll @@ -11,7 +11,7 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[INPUT12:%.*]] = ptrtoint ptr [[INPUT1]] to i64 ; CHECK-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP8]], 4 +; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP8]], 2 ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP13]]) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP14]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -72,7 +72,7 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[INPUT12:%.*]] = ptrtoint ptr [[INPUT1]] to i64 ; ZVFHMIN-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64 ; ZVFHMIN-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; ZVFHMIN-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; ZVFHMIN-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP1]]) ; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP2]] ; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -156,7 +156,7 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[INPUT12:%.*]] = ptrtoint ptr [[INPUT1]] to i64 ; CHECK-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP8]], 4 +; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP8]], 2 ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP13]]) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP14]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -217,7 +217,7 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[INPUT12:%.*]] = ptrtoint ptr [[INPUT1]] to i64 ; ZVFHMIN-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64 ; ZVFHMIN-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; ZVFHMIN-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; ZVFHMIN-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP1]]) ; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP2]] ; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -301,7 +301,7 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[INPUT12:%.*]] = ptrtoint ptr [[INPUT1]] to i64 ; CHECK-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP8]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP8]], 1 ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP13]]) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP14]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -362,7 +362,7 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[INPUT12:%.*]] = ptrtoint ptr [[INPUT1]] to i64 ; ZVFHMIN-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64 ; ZVFHMIN-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; ZVFHMIN-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; ZVFHMIN-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP1]]) ; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP2]] ; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -446,7 +446,7 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[INPUT12:%.*]] = ptrtoint ptr [[INPUT1]] to i64 ; CHECK-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP8]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP8]], 1 ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP13]]) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP14]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -507,7 +507,7 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[INPUT12:%.*]] = ptrtoint ptr [[INPUT1]] to i64 ; ZVFHMIN-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64 ; ZVFHMIN-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; ZVFHMIN-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; ZVFHMIN-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP1]]) ; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP2]] ; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll index 47af9775fe9e2..4fc9b781cb20b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll @@ -15,7 +15,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; OUTLOOP-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; OUTLOOP: for.body.preheader: ; OUTLOOP-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() -; OUTLOOP-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 4 +; OUTLOOP-NEXT: [[TMP1:%.*]] = shl nuw i32 [[TMP0]], 2 ; OUTLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], [[TMP1]] ; OUTLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; OUTLOOP: vector.ph: @@ -65,7 +65,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; INLOOP-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; INLOOP: for.body.preheader: ; INLOOP-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() -; INLOOP-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 8 +; INLOOP-NEXT: [[TMP1:%.*]] = shl nuw i32 [[TMP0]], 3 ; INLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], [[TMP1]] ; INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INLOOP: vector.ph: @@ -220,7 +220,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; OUTLOOP-LABEL: @smin( ; OUTLOOP-NEXT: entry: ; OUTLOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; OUTLOOP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; OUTLOOP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; OUTLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; OUTLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; OUTLOOP: vector.ph: @@ -266,7 +266,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; INLOOP-LABEL: @smin( ; INLOOP-NEXT: entry: ; INLOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; INLOOP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; INLOOP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; INLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INLOOP: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll index 3ea37fcc7b654..8e99764243027 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll @@ -9,7 +9,8 @@ define void @load_store(ptr %p) { ; LMUL1-LABEL: @load_store( ; LMUL1-NEXT: entry: ; LMUL1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; LMUL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; LMUL1-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 0 +; LMUL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP2]] ; LMUL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; LMUL1: vector.ph: ; LMUL1-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll index bbd78a4d280c7..dff6c793897da 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll @@ -13,7 +13,7 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; V-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; V-NEXT: entry: ; V-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; V-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; V-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; V-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; V-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; V: vector.ph: @@ -46,7 +46,7 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; ZVQDOTQ-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; ZVQDOTQ-NEXT: entry: ; ZVQDOTQ-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; ZVQDOTQ-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; ZVQDOTQ-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; ZVQDOTQ-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; ZVQDOTQ-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; ZVQDOTQ: vector.ph: @@ -173,7 +173,7 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; V-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; V-NEXT: entry: ; V-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; V-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; V-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; V-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; V-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; V: vector.ph: @@ -206,7 +206,7 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; ZVQDOTQ-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; ZVQDOTQ-NEXT: entry: ; ZVQDOTQ-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; ZVQDOTQ-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; ZVQDOTQ-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; ZVQDOTQ-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; ZVQDOTQ-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; ZVQDOTQ: vector.ph: @@ -333,7 +333,7 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; V-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; V-NEXT: entry: ; V-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; V-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; V-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; V-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; V-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; V: vector.ph: @@ -366,7 +366,7 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; ZVQDOTQ-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; ZVQDOTQ-NEXT: entry: ; ZVQDOTQ-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; ZVQDOTQ-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; ZVQDOTQ-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; ZVQDOTQ-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; ZVQDOTQ-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; ZVQDOTQ: vector.ph: @@ -492,7 +492,7 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; V-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; V-NEXT: entry: ; V-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; V-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; V-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; V-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; V-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; V: vector.ph: @@ -525,7 +525,7 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; ZVQDOTQ-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; ZVQDOTQ-NEXT: entry: ; ZVQDOTQ-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; ZVQDOTQ-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; ZVQDOTQ-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; ZVQDOTQ-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; ZVQDOTQ-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; ZVQDOTQ: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index 8c87132ba2071..e49a71eb5b930 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -102,7 +102,7 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV64-UF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; RV64-UF2-NEXT: [[ENTRY:.*]]: ; RV64-UF2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; RV64-UF2-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; RV64-UF2-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; RV64-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1023, [[TMP1]] ; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; RV64-UF2: [[VECTOR_PH]]: @@ -326,7 +326,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2: [[FOR_BODY_PREHEADER]]: ; RV64-UF2-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 ; RV64-UF2-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; RV64-UF2-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 8 +; RV64-UF2-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3 ; RV64-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] ; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] ; RV64-UF2: [[VECTOR_SCEVCHECK]]: @@ -587,7 +587,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2: [[FOR_BODY_PREHEADER]]: ; RV64-UF2-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 ; RV64-UF2-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; RV64-UF2-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 8 +; RV64-UF2-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3 ; RV64-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] ; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] ; RV64-UF2: [[VECTOR_SCEVCHECK]]: @@ -787,7 +787,7 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV64-UF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] { ; RV64-UF2-NEXT: [[ENTRY:.*]]: ; RV64-UF2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; RV64-UF2-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; RV64-UF2-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; RV64-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1023, [[TMP1]] ; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; RV64-UF2: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll index fde4e31148281..4d1f4ce2aeb39 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll @@ -280,7 +280,7 @@ define i64 @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-LABEL: @uniform_load( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll index ac57ddde43cab..e6ed26cfa1109 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll @@ -60,8 +60,8 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 -; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP13]], 16 +; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -176,8 +176,8 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 -; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP13]], 16 +; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -292,8 +292,8 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 -; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP13]], 16 +; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -408,8 +408,8 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 -; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP13]], 16 +; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -524,8 +524,8 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 -; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP13]], 16 +; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -640,8 +640,8 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 -; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP13]], 16 +; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -756,8 +756,8 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 -; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP13]], 16 +; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -872,8 +872,8 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 -; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP13]], 16 +; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -988,8 +988,8 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 -; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP13]], 16 +; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -1104,8 +1104,8 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 -; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP13]], 16 +; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -1220,8 +1220,8 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 -; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP13]], 16 +; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -1336,8 +1336,8 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 -; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP13]], 16 +; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -1452,8 +1452,8 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 -; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP13]], 16 +; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -1571,8 +1571,8 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 -; NO-VP-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP15]], 4 +; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -1689,8 +1689,8 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 -; NO-VP-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP15]], 4 +; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -1807,8 +1807,8 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 -; NO-VP-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP15]], 4 +; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -1925,8 +1925,8 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 -; NO-VP-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP15]], 4 +; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -2096,8 +2096,8 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 -; NO-VP-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP15]], 4 +; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll index 4ac782612f38f..0ce9ef650379d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll @@ -70,9 +70,9 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[C3:%.*]] = ptrtoint ptr [[C]] to i64 ; NO-VP-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 -; NO-VP-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 4 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP19]]) +; NO-VP-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP12]], 2 +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP11]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -209,9 +209,9 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[C3:%.*]] = ptrtoint ptr [[C]] to i64 ; NO-VP-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 -; NO-VP-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 4 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP19]]) +; NO-VP-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP12]], 2 +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP11]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -348,9 +348,9 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[C3:%.*]] = ptrtoint ptr [[C]] to i64 ; NO-VP-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 -; NO-VP-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 4 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP19]]) +; NO-VP-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP12]], 2 +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP11]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -487,9 +487,9 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[C3:%.*]] = ptrtoint ptr [[C]] to i64 ; NO-VP-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 -; NO-VP-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 4 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP19]]) +; NO-VP-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP12]], 2 +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP11]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -617,9 +617,9 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 -; NO-VP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP11]], 4 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP15]]) +; NO-VP-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP10]], 2 +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP9]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -736,9 +736,9 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 -; NO-VP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP11]], 4 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP15]]) +; NO-VP-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP10]], 2 +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP9]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -859,9 +859,9 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 -; NO-VP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP11]], 4 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP17]]) +; NO-VP-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP10]], 2 +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP9]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -988,9 +988,9 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 -; NO-VP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP11]], 4 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP17]]) +; NO-VP-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP10]], 2 +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP9]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -1113,8 +1113,8 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 -; NO-VP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP11]], 4 +; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll index f69872c7208e2..fb71f6c187a36 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll @@ -59,7 +59,7 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) { ; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP9]], 2 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 1 ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 20, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -177,7 +177,7 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) { ; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP9]], 2 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 1 ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 20, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -295,7 +295,7 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) { ; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP9]], 2 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 1 ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 20, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -412,8 +412,8 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) { ; NO-VP-LABEL: define void @vp_fpext( ; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; NO-VP-NEXT: [[ENTRY:.*]]: -; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP9]], 2 +; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 1 ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -530,8 +530,8 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) { ; NO-VP-LABEL: define void @vp_fptrunc( ; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; NO-VP-NEXT: [[ENTRY:.*]]: -; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP9]], 2 +; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 1 ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -650,8 +650,8 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 -; NO-VP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP11]], 4 +; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -768,8 +768,8 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 -; NO-VP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP11]], 4 +; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -886,8 +886,8 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 -; NO-VP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP11]], 4 +; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -1004,8 +1004,8 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 -; NO-VP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP11]], 4 +; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] @@ -1122,9 +1122,9 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 -; NO-VP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP11]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP15]]) +; NO-VP-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP10]], 1 +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP9]]) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -1236,7 +1236,7 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) { ; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP13]], 2 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP13]], 1 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll index 7701abfc6c862..8bfd93b53bc7d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll @@ -106,7 +106,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0:[0-9]+]] { ; NO-VP-OUTLOOP-NEXT: entry: ; NO-VP-OUTLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-OUTLOOP-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4 +; NO-VP-OUTLOOP-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2 ; NO-VP-OUTLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP-OUTLOOP: vector.ph: @@ -154,7 +154,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0:[0-9]+]] { ; NO-VP-INLOOP-NEXT: entry: ; NO-VP-INLOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-INLOOP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-INLOOP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-INLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; NO-VP-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP-INLOOP: vector.ph: @@ -317,7 +317,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { ; NO-VP-OUTLOOP-NEXT: entry: ; NO-VP-OUTLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-OUTLOOP-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4 +; NO-VP-OUTLOOP-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2 ; NO-VP-OUTLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP-OUTLOOP: vector.ph: @@ -369,7 +369,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { ; NO-VP-INLOOP-NEXT: entry: ; NO-VP-INLOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-INLOOP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-INLOOP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-INLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; NO-VP-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP-INLOOP: vector.ph: @@ -548,7 +548,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { ; NO-VP-OUTLOOP-NEXT: entry: ; NO-VP-OUTLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-OUTLOOP-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4 +; NO-VP-OUTLOOP-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2 ; NO-VP-OUTLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP-OUTLOOP: vector.ph: @@ -606,7 +606,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { ; NO-VP-INLOOP-NEXT: entry: ; NO-VP-INLOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-INLOOP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-INLOOP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-INLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; NO-VP-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP-INLOOP: vector.ph: @@ -798,7 +798,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { ; NO-VP-OUTLOOP-NEXT: entry: ; NO-VP-OUTLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-OUTLOOP-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4 +; NO-VP-OUTLOOP-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2 ; NO-VP-OUTLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP-OUTLOOP: vector.ph: @@ -860,7 +860,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { ; NO-VP-INLOOP-NEXT: entry: ; NO-VP-INLOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-INLOOP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-INLOOP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-INLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; NO-VP-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP-INLOOP: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll index fce20dc849404..6b7fd976cbe6c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll @@ -54,7 +54,7 @@ define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; NO-VP-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 2 +; NO-VP-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 1 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP12]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -164,7 +164,7 @@ define void @test_udiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; NO-VP-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 2 +; NO-VP-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 1 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP12]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -273,7 +273,7 @@ define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; NO-VP-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 2 +; NO-VP-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 1 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP12]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -382,7 +382,7 @@ define void @test_urem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; NO-VP-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: ; NO-VP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 2 +; NO-VP-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 1 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP12]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll index 23a26394af07f..c0988380f8f13 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll @@ -62,7 +62,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; NO-VP-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[TC:%.*]]) #[[ATTR0:[0-9]+]] { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TC]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -191,7 +191,7 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; NO-VP-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[TC:%.*]]) #[[ATTR0]] { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TC]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -342,7 +342,7 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; NO-VP-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[TC:%.*]]) #[[ATTR0]] { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TC]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -445,7 +445,7 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[TC:%.*]]) #[[ATTR0]] { ; IF-EVL-NEXT: [[ENTRY:.*]]: ; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP9]], 4 +; IF-EVL-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 ; IF-EVL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TC]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -504,7 +504,7 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) { ; NO-VP-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[TC:%.*]]) #[[ATTR0]] { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TC]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -636,7 +636,7 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) { ; NO-VP-SAME: ptr noalias [[A:%.*]], i64 [[TC:%.*]]) #[[ATTR0]] { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TC]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll index 650ddd2aa4929..e16bb64073a06 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll @@ -58,7 +58,7 @@ define void @gather_scatter(ptr noalias %in, ptr noalias %out, ptr noalias %inde ; NO-VP-LABEL: @gather_scatter( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 2 +; NO-VP-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 1 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] ; NO-VP: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll index c80d4d4dd6226..517055a345406 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll @@ -49,7 +49,7 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; NO-VP-LABEL: @add( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -252,7 +252,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; NO-VP-LABEL: @or( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -348,7 +348,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; NO-VP-LABEL: @and( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -444,7 +444,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; NO-VP-LABEL: @xor( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -541,7 +541,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; NO-VP-LABEL: @smin( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -640,7 +640,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; NO-VP-LABEL: @smax( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -739,7 +739,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; NO-VP-LABEL: @umin( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -838,7 +838,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; NO-VP-LABEL: @umax( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -936,7 +936,7 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; NO-VP-LABEL: @fadd( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -1141,7 +1141,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; NO-VP-LABEL: @fmin( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -1242,7 +1242,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; NO-VP-LABEL: @fmax( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -1562,7 +1562,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; NO-VP-LABEL: @fmuladd( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -1669,7 +1669,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-LABEL: @anyof_icmp( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -1774,7 +1774,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-LABEL: @anyof_fcmp( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll index 0255e9eda3b81..fbd4658e18b59 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll @@ -58,7 +58,7 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { ; NO-VP-LABEL: @interleave( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -193,7 +193,7 @@ define i32 @load_factor_4_with_gap(i64 %n, ptr noalias %a) { ; NO-VP-LABEL: @load_factor_4_with_gap( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -334,7 +334,7 @@ define void @store_factor_4_with_gap(i32 %n, ptr noalias %a) { ; NO-VP-LABEL: @store_factor_4_with_gap( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i32 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -473,7 +473,7 @@ define i32 @load_factor_4_with_tail_gap(i64 %n, ptr noalias %a) { ; NO-VP-LABEL: @load_factor_4_with_tail_gap( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -615,7 +615,7 @@ define void @store_factor_4_with_tail_gap(i32 %n, ptr noalias %a) { ; NO-VP-LABEL: @store_factor_4_with_tail_gap( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i32 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -757,7 +757,7 @@ define i32 @load_factor_4_reverse(i64 %n, ptr noalias %a) { ; NO-VP-NEXT: [[SMIN:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP0]], i64 0) ; NO-VP-NEXT: [[TMP1:%.*]] = sub i64 [[N]], [[SMIN]] ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], [[TMP3]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -911,7 +911,7 @@ define void @store_factor_4_reverse(i32 %n, ptr noalias %a) { ; NO-VP-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP0]], i32 0) ; NO-VP-NEXT: [[TMP1:%.*]] = sub i32 [[TMP10]], [[SMIN]] ; NO-VP-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], [[TMP3]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll index 9737cdcbb3458..5e16468d5dd6c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll @@ -120,7 +120,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; NO-VP-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] { ; NO-VP-OUTLOOP-NEXT: entry: ; NO-VP-OUTLOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-OUTLOOP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-OUTLOOP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-OUTLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; NO-VP-OUTLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; NO-VP-OUTLOOP: vector.memcheck: @@ -173,7 +173,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; NO-VP-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] { ; NO-VP-INLOOP-NEXT: entry: ; NO-VP-INLOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-INLOOP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-INLOOP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-INLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; NO-VP-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; NO-VP-INLOOP: vector.memcheck: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll index ff8c62c70dc9d..dd048ba8a43b9 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll @@ -44,7 +44,7 @@ define void @iv32(ptr noalias %a, ptr noalias %b, i32 %N) { ; NO-VP-LABEL: @iv32( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP10:%.*]] = shl nuw i32 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], [[TMP10]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll index f8ee0083e829b..042e54a694d21 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll @@ -55,7 +55,7 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) { ; NO-VP-LABEL: @masked_loadstore( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 +; NO-VP-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP12]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll index cfbed8ce2f674..a2b2b5bc05141 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll @@ -48,7 +48,7 @@ define float @fadd(ptr noalias nocapture readonly %a, i64 %n) { ; NO-VP-LABEL: @fadd( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N1:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] ; NO-VP: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll index 2fda90d664a97..326e543276be2 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll @@ -49,7 +49,7 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; NO-VP-LABEL: @add( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -255,7 +255,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; NO-VP-LABEL: @or( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -354,7 +354,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; NO-VP-LABEL: @and( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -453,7 +453,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; NO-VP-LABEL: @xor( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -555,7 +555,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; NO-VP-LABEL: @smin( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -661,7 +661,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; NO-VP-LABEL: @smax( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -767,7 +767,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; NO-VP-LABEL: @umin( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -873,7 +873,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; NO-VP-LABEL: @umax( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -976,7 +976,7 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; NO-VP-LABEL: @fadd( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -1185,7 +1185,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; NO-VP-LABEL: @fmin( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -1291,7 +1291,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; NO-VP-LABEL: @fmax( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -1614,7 +1614,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; NO-VP-LABEL: @fmuladd( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -1721,7 +1721,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-LABEL: @anyof_icmp( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -1826,7 +1826,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-LABEL: @anyof_fcmp( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll index 854ca6e838c4d..c804fd9ceb7a9 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll @@ -63,7 +63,7 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; NO-VP-LABEL: @reverse_load_store( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -206,7 +206,7 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; NO-VP-LABEL: @reverse_load_store_masked( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[ENTRY:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll index 14b2104c11596..6e810f71102d3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll @@ -50,7 +50,7 @@ define void @test(ptr %p) { ; NO-VP-LABEL: @test( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] ; NO-VP: vector.ph: @@ -370,7 +370,7 @@ define void @trivial_due_max_vscale(ptr %p) { ; NO-VP-LABEL: @trivial_due_max_vscale( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] ; NO-VP: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 165606237ccec..7e10ce6def111 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -132,7 +132,7 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SCALABLE-NEXT: [[ENTRY:.*]]: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; SCALABLE: [[VECTOR_PH]]: @@ -208,7 +208,7 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; TF-SCALABLE-NEXT: [[ENTRY:.*]]: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; TF-SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] ; TF-SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; TF-SCALABLE: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll index 26d7c2cfd5b57..4da482f594350 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll @@ -51,7 +51,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-LABEL: @foo( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll index 40587c0c8b68c..2a531c8814827 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll @@ -9,7 +9,8 @@ define i64 @pr97452_scalable_vf1_for_live_out(ptr %src) { ; CHECK-SAME: ptr [[SRC:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 0 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() @@ -76,7 +77,8 @@ define void @pr97452_scalable_vf1_for_no_live_out(ptr %src, ptr noalias %dst) { ; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 0 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll index 31c32481cd9f0..84f19a19f3622 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll @@ -16,7 +16,7 @@ define void @foo() { ; CHECK-LABEL: define void @foo() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll index e822b9212f063..111fbb5965639 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll @@ -6,7 +6,7 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1600, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -80,7 +80,7 @@ define void @test2(ptr %a, ptr noalias %b) { ; CHECK-NEXT: [[PTRINT2:%.*]] = ptrtoint ptr [[B]] to i64 ; CHECK-NEXT: [[MASKCOND4:%.*]] = icmp eq i64 [[PTRINT2]], 0 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1600, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -151,7 +151,7 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll index 6cfd955afaae0..8a20ecc5966c9 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -20,7 +20,7 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-VF4UF1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-VF4UF1-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-VF4UF1-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; CHECK-VF4UF1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]] ; CHECK-VF4UF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK-VF4UF1: [[VECTOR_MEMCHECK]]: @@ -83,7 +83,7 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-VF4UF2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-VF4UF2-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 +; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 3 ; CHECK-VF4UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]] ; CHECK-VF4UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK-VF4UF2: [[VECTOR_MEMCHECK]]: @@ -186,7 +186,7 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; CHECK-VF4UF1-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4 ; CHECK-VF4UF1-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 ; CHECK-VF4UF1-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF1-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4 +; CHECK-VF4UF1-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2 ; CHECK-VF4UF1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] ; CHECK-VF4UF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF1: [[VECTOR_PH]]: @@ -245,7 +245,7 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; CHECK-VF4UF2-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4 ; CHECK-VF4UF2-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 ; CHECK-VF4UF2-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 8 +; CHECK-VF4UF2-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3 ; CHECK-VF4UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] ; CHECK-VF4UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF2: [[VECTOR_PH]]: @@ -357,7 +357,7 @@ define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, f ; CHECK-VF4UF1-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1 ; CHECK-VF4UF1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-VF4UF1-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-VF4UF1-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; CHECK-VF4UF1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]] ; CHECK-VF4UF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK-VF4UF1: [[VECTOR_MEMCHECK]]: @@ -433,7 +433,7 @@ define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, f ; CHECK-VF4UF2-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1 ; CHECK-VF4UF2-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-VF4UF2-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 +; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 3 ; CHECK-VF4UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]] ; CHECK-VF4UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK-VF4UF2: [[VECTOR_MEMCHECK]]: @@ -548,7 +548,7 @@ define i64 @constant_folded_previous_value() { ; CHECK-VF4UF1-LABEL: define i64 @constant_folded_previous_value() { ; CHECK-VF4UF1-NEXT: [[ENTRY:.*]]: ; CHECK-VF4UF1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-VF4UF1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-VF4UF1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1000, [[TMP1]] ; CHECK-VF4UF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF1: [[VECTOR_PH]]: @@ -574,7 +574,7 @@ define i64 @constant_folded_previous_value() { ; CHECK-VF4UF2-LABEL: define i64 @constant_folded_previous_value() { ; CHECK-VF4UF2-NEXT: [[ENTRY:.*]]: ; CHECK-VF4UF2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-VF4UF2-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-VF4UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1000, [[TMP1]] ; CHECK-VF4UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF2: [[VECTOR_PH]]: @@ -622,7 +622,7 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; CHECK-VF4UF1-SAME: ptr [[CVAL:%.*]], i32 [[X:%.*]]) { ; CHECK-VF4UF1-NEXT: [[ENTRY:.*]]: ; CHECK-VF4UF1-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF1-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 4 +; CHECK-VF4UF1-NEXT: [[TMP1:%.*]] = shl nuw i32 [[TMP0]], 2 ; CHECK-VF4UF1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 96, [[TMP1]] ; CHECK-VF4UF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF1: [[VECTOR_PH]]: @@ -668,7 +668,7 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; CHECK-VF4UF2-SAME: ptr [[CVAL:%.*]], i32 [[X:%.*]]) { ; CHECK-VF4UF2-NEXT: [[ENTRY:.*]]: ; CHECK-VF4UF2-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF2-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 8 +; CHECK-VF4UF2-NEXT: [[TMP1:%.*]] = shl nuw i32 [[TMP0]], 3 ; CHECK-VF4UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 96, [[TMP1]] ; CHECK-VF4UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF2: [[VECTOR_PH]]: @@ -739,7 +739,7 @@ define void @sink_after(ptr %a, ptr %b, i64 %n) { ; CHECK-VF4UF1-NEXT: [[ENTRY:.*]]: ; CHECK-VF4UF1-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[A]], align 2 ; CHECK-VF4UF1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-VF4UF1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-VF4UF1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-VF4UF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK-VF4UF1: [[VECTOR_MEMCHECK]]: @@ -796,7 +796,7 @@ define void @sink_after(ptr %a, ptr %b, i64 %n) { ; CHECK-VF4UF2-NEXT: [[ENTRY:.*]]: ; CHECK-VF4UF2-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[A]], align 2 ; CHECK-VF4UF2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-VF4UF2-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-VF4UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-VF4UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK-VF4UF2: [[VECTOR_MEMCHECK]]: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll index e251c2a853b9a..65e33dfd7238e 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll @@ -8,7 +8,7 @@ define i32 @iv_live_out_wide(ptr %dst) { ; CHECK-NEXT: [[STEP_1:%.*]] = sext i8 0 to i32 ; CHECK-NEXT: [[STEP_2:%.*]] = add nsw i32 [[STEP_1]], 1 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i32 [[TMP0]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 2000, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll index bf14c871f282b..850d6b299ca23 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll @@ -12,7 +12,7 @@ define void @test(ptr %d) { ; CHECK-NEXT: [[ARR:%.*]] = alloca [1024 x i32], align 16 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[ARR]]) ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 128, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -80,7 +80,7 @@ define void @testloopvariant(ptr %d) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARR:%.*]] = alloca [1024 x i32], align 16 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 128, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll index ee6da8f528dd2..fdacc5f4f71ef 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll @@ -2,14 +2,14 @@ ; RUN: opt -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -force-target-supports-scalable-vectors=true -scalable-vectorization=on < %s | FileCheck %s --check-prefix=CHECKUF1 ; RUN: opt -S -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -force-target-supports-scalable-vectors=true -scalable-vectorization=on < %s | FileCheck %s --check-prefix=CHECKUF2 -; For an interleave factor of 2, vscale is scaled by 8 instead of 4. +; For an interleave factor of 2, vscale is scaled by 8 instead of 4 (and thus shifted left by 3 instead of 2). ; There is also the increment for the next iteration, e.g. instead of indexing IDXB, it indexes at IDXB + vscale * 4. define void @loop(i64 %N, ptr noalias %a, ptr noalias %b) { ; CHECKUF1-LABEL: define void @loop( ; CHECKUF1-SAME: i64 [[N:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; CHECKUF1-NEXT: [[ENTRY:.*:]] ; CHECKUF1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECKUF1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECKUF1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECKUF1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECKUF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECKUF1: [[VECTOR_PH]]: @@ -37,7 +37,7 @@ define void @loop(i64 %N, ptr noalias %a, ptr noalias %b) { ; CHECKUF2-SAME: i64 [[N:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; CHECKUF2-NEXT: [[ENTRY:.*:]] ; CHECKUF2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECKUF2-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECKUF2-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECKUF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECKUF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECKUF2: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll index 9a6a378f39376..af5796747e6f4 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll @@ -9,7 +9,7 @@ define void @foo(i32 %val, ptr dereferenceable(1024) %ptr) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 -257, [[TMP7]] ; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll index c2ae92e80ef69..a6be33edcf587 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll @@ -7,7 +7,7 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-LABEL: @reduction_add_trunc( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP31:%.*]] = mul nuw i32 [[TMP30]], 16 +; CHECK-NEXT: [[TMP31:%.*]] = shl nuw i32 [[TMP30]], 4 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 256, [[TMP31]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll index 4495ed62ac6b1..ae96fe58caa26 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll @@ -5,7 +5,7 @@ define void @trunc_minimal_bitwidth(ptr %bptr, ptr noalias %hptr, i32 %val, i64 ; CHECK-LABEL: @trunc_minimal_bitwidth( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -63,7 +63,7 @@ define void @trunc_minimal_bitwidths_shufflevector (ptr %p, i32 %arg1, i64 %len) ; CHECK-LABEL: @trunc_minimal_bitwidths_shufflevector( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[LEN:%.*]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll index 4af9767524a0f..124bc4ec595aa 100644 --- a/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll @@ -37,7 +37,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-LABEL: @foo( ; NO-VP-NEXT: entry: ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP0]], 4 +; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP0]], 2 ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP8]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: @@ -82,7 +82,8 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-DEF-LABEL: @foo( ; NO-VP-DEF-NEXT: entry: ; NO-VP-DEF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-DEF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP0]] +; NO-VP-DEF-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 0 +; NO-VP-DEF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP2]] ; NO-VP-DEF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP-DEF: vector.ph: ; NO-VP-DEF-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() From be5cde3f21094ff36f0709df36b46b74b386f0c0 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 14 Aug 2025 10:07:28 +0100 Subject: [PATCH 2/5] !fixup move to createStepForVF. --- .../Transforms/Vectorize/LoopVectorize.cpp | 32 +++--- .../AArch64/conditional-branches-cost.ll | 2 +- .../AArch64/divs-with-scalable-vfs.ll | 6 +- .../AArch64/induction-costs-sve.ll | 6 +- .../AArch64/interleave-with-gaps.ll | 4 +- .../AArch64/interleaving-reduction.ll | 4 +- .../AArch64/low_trip_count_predicates.ll | 4 +- .../LoopVectorize/AArch64/masked-call.ll | 40 ++++---- .../LoopVectorize/AArch64/optsize_minsize.ll | 6 +- .../partial-reduce-dot-product-mixed.ll | 16 +-- .../AArch64/partial-reduce-dot-product.ll | 54 +++++----- .../AArch64/partial-reduce-sub.ll | 4 +- .../AArch64/reduction-recurrence-costs-sve.ll | 8 +- .../AArch64/scalable-strict-fadd.ll | 98 +++++++++---------- .../AArch64/single-early-exit-interleave.ll | 8 +- .../LoopVectorize/AArch64/store-costs-sve.ll | 6 +- .../sve-epilog-vect-inloop-reductions.ll | 2 +- .../AArch64/sve-epilog-vect-reductions.ll | 2 +- .../sve-epilog-vect-strict-reductions.ll | 2 +- .../LoopVectorize/AArch64/sve-epilog-vect.ll | 48 ++++----- .../LoopVectorize/AArch64/sve-fneg.ll | 4 +- .../AArch64/sve-inductions-unusual-types.ll | 4 +- .../AArch64/sve-live-out-pointer-induction.ll | 2 +- .../LoopVectorize/AArch64/sve-multiexit.ll | 8 +- .../sve-runtime-check-size-based-threshold.ll | 8 +- .../AArch64/sve-tail-folding-forced.ll | 2 +- .../AArch64/sve-tail-folding-reductions.ll | 12 +-- .../AArch64/sve-tail-folding-unroll.ll | 40 ++++---- .../LoopVectorize/AArch64/sve-tail-folding.ll | 18 ++-- .../AArch64/sve-vscale-based-trip-counts.ll | 30 +++--- .../AArch64/tail-folding-styles.ll | 2 +- .../first-order-recurrence-scalable-vf1.ll | 3 +- .../Transforms/LoopVectorize/RISCV/lmul.ll | 3 +- .../first-order-recurrence-scalable-vf1.ll | 6 +- .../LoopVectorize/scalable-assume.ll | 12 +-- .../scalable-first-order-recurrence.ll | 14 +-- .../LoopVectorize/scalable-iv-outside-user.ll | 2 +- ...able-loop-unpredicated-body-scalar-tail.ll | 4 +- .../scalable-reduction-inloop.ll | 2 +- .../vectorize-force-tail-with-evl.ll | 3 +- .../AArch64/sve-interleave-vectorization.ll | 2 +- 41 files changed, 263 insertions(+), 270 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 53f59e52771d1..3665af5b6a1d6 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -824,7 +824,13 @@ namespace llvm { Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step) { assert(Ty->isIntegerTy() && "Expected an integer step"); - return B.CreateElementCount(Ty, VF.multiplyCoefficientBy(Step)); + if (!VF.isScalable() || !isPowerOf2_64(VF.getKnownMinValue()) || + !isPowerOf2_64(Step)) + return B.CreateElementCount(Ty, VF.multiplyCoefficientBy(Step)); + + return B.CreateShl( + B.CreateVScale(Ty), + ConstantInt::get(Ty, Log2_64((VF * Step).getKnownMinValue())), "", true); } /// Return the runtime value for VF. @@ -2298,34 +2304,26 @@ Value *InnerLoopVectorizer::createIterationCountCheck(ElementCount VF, // Reuse existing vector loop preheader for TC checks. // Note that new preheader block is generated for vector loop. BasicBlock *const TCCheckBlock = LoopVectorPreHeader; - IRBuilder<> Builder(TCCheckBlock->getTerminator()); + IRBuilder Builder( + TCCheckBlock->getContext(), + InstSimplifyFolder(TCCheckBlock->getDataLayout())); + Builder.SetInsertPoint(TCCheckBlock->getTerminator()); // If tail is to be folded, vector loop takes care of all iterations. Value *Count = getTripCount(); Type *CountTy = Count->getType(); Value *CheckMinIters = Builder.getFalse(); auto CreateStep = [&]() -> Value * { - ElementCount VFTimesUF = VF.multiplyCoefficientBy(UF); - Value *VFxUF = nullptr; - if (!VFTimesUF.isScalable() || - !isPowerOf2_64(VFTimesUF.getKnownMinValue())) { - VFxUF = createStepForVF(Builder, CountTy, VF, UF); - } else { - VFxUF = Builder.CreateShl( - Builder.CreateVScale(CountTy), - ConstantInt::get(CountTy, Log2_64(VFTimesUF.getKnownMinValue())), "", - true); - } - // Create step with max(MinProTripCount, UF * VF). - if (ElementCount::isKnownGE(VFTimesUF, MinProfitableTripCount)) - return VFxUF; + if (UF * VF.getKnownMinValue() >= MinProfitableTripCount.getKnownMinValue()) + return createStepForVF(Builder, CountTy, VF, UF); Value *MinProfTC = createStepForVF(Builder, CountTy, MinProfitableTripCount, 1); if (!VF.isScalable()) return MinProfTC; - return Builder.CreateBinaryIntrinsic(Intrinsic::umax, MinProfTC, VFxUF); + return Builder.CreateBinaryIntrinsic( + Intrinsic::umax, MinProfTC, createStepForVF(Builder, CountTy, VF, UF)); }; TailFoldingStyle Style = Cost->getTailFoldingStyle(); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index a2c6a21796e8f..b0ccc14b9807c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -702,7 +702,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 ; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 +; PRED-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 1 ; PRED-NEXT: [[TMP8:%.*]] = sub i64 257, [[TMP7]] ; PRED-NEXT: [[TMP9:%.*]] = icmp ugt i64 257, [[TMP7]] ; PRED-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i64 [[TMP8]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index 4ecda056e6910..1819d7f561538 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -37,7 +37,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: [[TMP32:%.*]] = sext i32 [[TMP30]] to i64 ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP32]] ; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], 2 +; CHECK-NEXT: [[TMP38:%.*]] = shl nuw i64 [[TMP37]], 1 ; CHECK-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP34]], i64 [[TMP38]] ; CHECK-NEXT: store zeroinitializer, ptr [[TMP34]], align 8 ; CHECK-NEXT: store zeroinitializer, ptr [[TMP39]], align 8 @@ -109,7 +109,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[N]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[N]], [[TMP11]] ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0 @@ -225,7 +225,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP0]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP0]], [[TMP11]] ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index b821593f7845f..137e07336fd50 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -36,7 +36,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]] ; DEFAULT-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP24:%.*]] = mul nuw i64 [[TMP23]], 8 +; DEFAULT-NEXT: [[TMP24:%.*]] = shl nuw i64 [[TMP23]], 3 ; DEFAULT-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[TMP20]], i64 [[TMP24]] ; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP20]], align 1 ; DEFAULT-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP25]], align 1 @@ -54,7 +54,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[TMP37:%.*]] = trunc [[TMP35]] to ; DEFAULT-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] ; DEFAULT-NEXT: [[TMP41:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP41]], 8 +; DEFAULT-NEXT: [[TMP42:%.*]] = shl nuw i64 [[TMP41]], 3 ; DEFAULT-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP38]], i64 [[TMP42]] ; DEFAULT-NEXT: store [[TMP36]], ptr [[TMP38]], align 1 ; DEFAULT-NEXT: store [[TMP37]], ptr [[TMP43]], align 1 @@ -104,7 +104,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; PRED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 16 +; PRED-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 4 ; PRED-NEXT: [[TMP13:%.*]] = sub i64 [[TMP0]], [[TMP12]] ; PRED-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[TMP0]], [[TMP12]] ; PRED-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[TMP13]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll index 20eb884261572..8d86de521b411 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll @@ -43,7 +43,7 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3 ; CHECK-NEXT: br label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 1 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ule i64 1, [[TMP15]] ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]] ; CHECK: [[VEC_EPILOG_PH]]: @@ -179,7 +179,7 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no ; CHECK-NEXT: br label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 1 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ule i64 1, [[TMP15]] ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]] ; CHECK: [[VEC_EPILOG_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll index e018b2016c915..7ff4609f8ec4b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll @@ -154,10 +154,10 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-4-VLA-NEXT: [[VEC_PHI3:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-4-VLA-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]] ; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 +; INTERLEAVE-4-VLA-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 ; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[TMP8]] ; INTERLEAVE-4-VLA-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 8 +; INTERLEAVE-4-VLA-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 3 ; INTERLEAVE-4-VLA-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[TMP11]] ; INTERLEAVE-4-VLA-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() ; INTERLEAVE-4-VLA-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 12 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll index 635efad8c499a..110685e377dd7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll @@ -92,7 +92,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS1-NEXT: [[IND_END4:%.*]] = add i64 [[TMP0]], [[N_VEC]] ; CHECK-VS1-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP3]], [[N_VEC]] ; CHECK-VS1-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VS1-NEXT: [[TMP27:%.*]] = mul nuw i64 [[TMP26]], 8 +; CHECK-VS1-NEXT: [[TMP27:%.*]] = shl nuw i64 [[TMP26]], 3 ; CHECK-VS1-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP27]] ; CHECK-VS1-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]] ; CHECK-VS1: [[VEC_EPILOG_PH]]: @@ -192,7 +192,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS2-NEXT: [[IND_END4:%.*]] = add i64 [[TMP0]], [[N_VEC]] ; CHECK-VS2-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP3]], [[N_VEC]] ; CHECK-VS2-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VS2-NEXT: [[TMP27:%.*]] = mul nuw i64 [[TMP26]], 4 +; CHECK-VS2-NEXT: [[TMP27:%.*]] = shl nuw i64 [[TMP26]], 2 ; CHECK-VS2-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP27]] ; CHECK-VS2-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]] ; CHECK-VS2: [[VEC_EPILOG_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 8e6ac48a5cbc8..58f2af73bd04c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -76,7 +76,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -86,7 +86,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] ; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP7]], i64 [[TMP9]] ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK2]], poison) @@ -94,13 +94,13 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[ACTIVE_LANE_MASK2]]) ; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP15]] ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP11]], ptr [[TMP13]], i32 8, [[ACTIVE_LANE_MASK]]) ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP12]], ptr [[TMP16]], i32 8, [[ACTIVE_LANE_MASK2]]) ; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = add i64 [[INDEX_NEXT]], [[TMP18]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP19]], i64 1025) @@ -208,7 +208,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -218,7 +218,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP9]] ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK2]], poison) @@ -232,13 +232,13 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select [[TMP14]], [[TMP16]], zeroinitializer ; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP19]] ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP17]], i32 8, [[ACTIVE_LANE_MASK]]) ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI4]], ptr [[TMP20]], i32 8, [[ACTIVE_LANE_MASK2]]) ; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = shl nuw i64 [[TMP21]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = add i64 [[INDEX_NEXT]], [[TMP22]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP23]], i64 1025) @@ -365,7 +365,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -375,7 +375,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP9]] ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK2]], poison) @@ -395,13 +395,13 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select [[TMP20]], [[TMP22]], [[TMP18]] ; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = mul nuw i64 [[TMP24]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = shl nuw i64 [[TMP24]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP23]], i64 [[TMP25]] ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP23]], i32 8, [[ACTIVE_LANE_MASK]]) ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI4]], ptr [[TMP26]], i32 8, [[ACTIVE_LANE_MASK2]]) ; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = mul nuw i64 [[TMP27]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = shl nuw i64 [[TMP27]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = add i64 [[INDEX_NEXT]], [[TMP28]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP29]], i64 1025) @@ -660,7 +660,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -670,7 +670,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] ; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP7]], i64 [[TMP9]] ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK2]], poison) @@ -678,13 +678,13 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[ACTIVE_LANE_MASK2]]) ; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP15]] ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP11]], ptr [[TMP13]], i32 8, [[ACTIVE_LANE_MASK]]) ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP12]], ptr [[TMP16]], i32 8, [[ACTIVE_LANE_MASK2]]) ; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = add i64 [[INDEX_NEXT]], [[TMP18]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP19]], i64 1025) @@ -831,7 +831,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) ; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[M]], i64 0 @@ -844,7 +844,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFA_INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] ; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[TMP7]], i64 [[TMP9]] ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK2]], poison) @@ -856,7 +856,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = call @foo_vector( [[TMP14]], [[ACTIVE_LANE_MASK2]]) ; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP19]] ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP15]], ptr [[TMP17]], i32 8, [[ACTIVE_LANE_MASK]]) ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP16]], ptr [[TMP20]], i32 8, [[ACTIVE_LANE_MASK2]]) @@ -866,7 +866,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFA_INTERLEAVE-NEXT: [[TMP24]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[TMP22]], [[TMP23]]) ; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = mul nuw i64 [[TMP25]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = shl nuw i64 [[TMP25]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = add i64 [[INDEX_NEXT]], [[TMP26]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP27]], i64 1025) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll index 29c5119f0b98d..410abfbc2f2b7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll @@ -495,7 +495,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; DEFAULT-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; DEFAULT-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 ; DEFAULT-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; DEFAULT-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; DEFAULT-NEXT: [[TMP7:%.*]] = sub i64 15, [[TMP6]] ; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]] ; DEFAULT-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 @@ -563,7 +563,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; OPTSIZE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; OPTSIZE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 ; OPTSIZE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; OPTSIZE-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; OPTSIZE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; OPTSIZE-NEXT: [[TMP7:%.*]] = sub i64 15, [[TMP6]] ; OPTSIZE-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]] ; OPTSIZE-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 @@ -631,7 +631,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; MINSIZE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; MINSIZE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 ; MINSIZE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; MINSIZE-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; MINSIZE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; MINSIZE-NEXT: [[TMP7:%.*]] = sub i64 15, [[TMP6]] ; MINSIZE-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]] ; MINSIZE-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll index e14a32ff0fdb2..82ea025700c37 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll @@ -24,7 +24,7 @@ define i32 @sudot(ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 8 +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 3 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP6]], i64 [[TMP9]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP10]], align 1 @@ -32,7 +32,7 @@ define i32 @sudot(ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[TMP12:%.*]] = zext [[WIDE_LOAD2]] to ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 3 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13]], i64 [[TMP16]] ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP17]], align 1 @@ -70,7 +70,7 @@ define i32 @sudot(ptr %a, ptr %b) #0 { ; CHECK-NOI8MM-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-NOI8MM-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NOI8MM-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 8 +; CHECK-NOI8MM-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 3 ; CHECK-NOI8MM-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP6]], i64 [[TMP9]] ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP10]], align 1 @@ -78,7 +78,7 @@ define i32 @sudot(ptr %a, ptr %b) #0 { ; CHECK-NOI8MM-NEXT: [[TMP12:%.*]] = zext [[WIDE_LOAD2]] to ; CHECK-NOI8MM-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] ; CHECK-NOI8MM-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NOI8MM-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 +; CHECK-NOI8MM-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 3 ; CHECK-NOI8MM-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13]], i64 [[TMP16]] ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP17]], align 1 @@ -139,7 +139,7 @@ define i32 @usdot(ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 8 +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 3 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP6]], i64 [[TMP9]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP10]], align 1 @@ -147,7 +147,7 @@ define i32 @usdot(ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[TMP12:%.*]] = sext [[WIDE_LOAD2]] to ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 3 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13]], i64 [[TMP16]] ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP17]], align 1 @@ -185,7 +185,7 @@ define i32 @usdot(ptr %a, ptr %b) #0 { ; CHECK-NOI8MM-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-NOI8MM-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NOI8MM-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 8 +; CHECK-NOI8MM-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 3 ; CHECK-NOI8MM-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP6]], i64 [[TMP9]] ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP10]], align 1 @@ -193,7 +193,7 @@ define i32 @usdot(ptr %a, ptr %b) #0 { ; CHECK-NOI8MM-NEXT: [[TMP12:%.*]] = sext [[WIDE_LOAD2]] to ; CHECK-NOI8MM-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] ; CHECK-NOI8MM-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NOI8MM-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 +; CHECK-NOI8MM-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 3 ; CHECK-NOI8MM-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13]], i64 [[TMP16]] ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP17]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index e55cb19bbccf3..938d58bf5e1e3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -57,7 +57,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX1]] ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP14]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP14]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP20]], i64 [[TMP10]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP20]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP11]], align 1 @@ -65,7 +65,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = zext [[WIDE_LOAD2]] to ; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX1]] ; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = mul nuw i64 [[TMP26]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = shl nuw i64 [[TMP26]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP28]], i64 [[TMP27]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP28]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP18]], align 1 @@ -195,14 +195,14 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 1 ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP12]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[NEXT_GEP]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = zext [[WIDE_LOAD]] to ; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = zext [[WIDE_LOAD3]] to ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 1 ; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i64 [[TMP18]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[NEXT_GEP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP19]], align 1 @@ -344,14 +344,14 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly % ; CHECK-INTERLEAVED-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX2]] ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 1 ; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 [[TMP14]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[NEXT_GEP]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP30]], align 2 ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = zext [[WIDE_LOAD]] to ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = zext [[WIDE_LOAD4]] to ; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = shl nuw i64 [[TMP19]], 1 ; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 [[TMP20]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[NEXT_GEP3]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP21]], align 2 @@ -850,7 +850,7 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 8 +; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 3 ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP10]], i64 [[TMP13]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP14]], align 1 @@ -858,7 +858,7 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = zext [[WIDE_LOAD1]] to ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], 8 +; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = shl nuw i64 [[TMP19]], 3 ; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP17]], i64 [[TMP20]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP17]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP21]], align 1 @@ -1005,13 +1005,13 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 8 +; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 3 ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP10]], i64 [[TMP13]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = zext [[WIDE_LOAD]] to ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 8 +; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 3 ; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP16]], i64 [[TMP19]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP20]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = zext [[WIDE_LOAD1]] to @@ -1195,14 +1195,14 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]] ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]] ; CHECK-INTERLEAVED-NEXT: [[TMP56:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP56]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = shl nuw i64 [[TMP56]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[TMP20]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP21]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP66:%.*]] = sext [[WIDE_LOAD]] to ; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = sext [[WIDE_LOAD8]] to ; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = mul nuw i64 [[TMP25]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = shl nuw i64 [[TMP25]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP72:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 [[TMP26]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD9:%.*]] = load , ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD10:%.*]] = load , ptr [[TMP72]], align 1 @@ -1213,14 +1213,14 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP50]] = add [[TMP30]], [[VEC_PHI6]] ; CHECK-INTERLEAVED-NEXT: [[TMP33]] = add [[TMP31]], [[VEC_PHI7]] ; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = mul nuw i64 [[TMP35]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = shl nuw i64 [[TMP35]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[TMP36]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD11:%.*]] = load , ptr [[TMP4]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD12:%.*]] = load , ptr [[TMP37]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP38:%.*]] = sext [[WIDE_LOAD11]] to ; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = sext [[WIDE_LOAD12]] to ; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP41]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = shl nuw i64 [[TMP41]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 [[TMP42]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD13:%.*]] = load , ptr [[TMP5]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD14:%.*]] = load , ptr [[TMP43]], align 1 @@ -1231,14 +1231,14 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP48]] = add [[TMP46]], [[VEC_PHI4]] ; CHECK-INTERLEAVED-NEXT: [[TMP49]] = add [[TMP47]], [[VEC_PHI5]] ; CHECK-INTERLEAVED-NEXT: [[TMP51:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP52:%.*]] = mul nuw i64 [[TMP51]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP52:%.*]] = shl nuw i64 [[TMP51]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP52]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD15:%.*]] = load , ptr [[TMP7]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD16:%.*]] = load , ptr [[TMP53]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP54:%.*]] = sext [[WIDE_LOAD15]] to ; CHECK-INTERLEAVED-NEXT: [[TMP55:%.*]] = sext [[WIDE_LOAD16]] to ; CHECK-INTERLEAVED-NEXT: [[TMP57:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP58:%.*]] = mul nuw i64 [[TMP57]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP58:%.*]] = shl nuw i64 [[TMP57]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP59:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[TMP58]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD17:%.*]] = load , ptr [[TMP8]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD18:%.*]] = load , ptr [[TMP59]], align 1 @@ -1249,14 +1249,14 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP64]] = add [[TMP62]], [[VEC_PHI2]] ; CHECK-INTERLEAVED-NEXT: [[TMP65]] = add [[TMP63]], [[VEC_PHI3]] ; CHECK-INTERLEAVED-NEXT: [[TMP67:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP68:%.*]] = mul nuw i64 [[TMP67]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP68:%.*]] = shl nuw i64 [[TMP67]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP69:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 [[TMP68]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD19:%.*]] = load , ptr [[TMP10]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD20:%.*]] = load , ptr [[TMP69]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP70:%.*]] = sext [[WIDE_LOAD19]] to ; CHECK-INTERLEAVED-NEXT: [[TMP71:%.*]] = sext [[WIDE_LOAD20]] to ; CHECK-INTERLEAVED-NEXT: [[TMP73:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP74:%.*]] = mul nuw i64 [[TMP73]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP74:%.*]] = shl nuw i64 [[TMP73]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP75:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 [[TMP74]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD21:%.*]] = load , ptr [[TMP11]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD22:%.*]] = load , ptr [[TMP75]], align 1 @@ -1412,7 +1412,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] ; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] ; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 @@ -1449,7 +1449,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 @@ -1486,7 +1486,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 16 ; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] ; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 @@ -1592,7 +1592,7 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP15]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP15]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP10]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP11]], align 1 @@ -1600,7 +1600,7 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = zext [[WIDE_LOAD2]] to ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP8]], i64 [[TMP17]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP8]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP18]], align 1 @@ -1736,7 +1736,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 1 ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 [[TMP10]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP11]], align 1 @@ -1745,7 +1745,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[INDEX]], 1 ; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP14]] ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 1 ; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP15]], i64 [[TMP18]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP15]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP19]], align 1 @@ -2332,14 +2332,14 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 1 ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP15]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[NEXT_GEP]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP16]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = zext [[WIDE_LOAD]] to ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = zext [[WIDE_LOAD3]] to ; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 1 ; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 [[TMP21]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[NEXT_GEP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP22]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll index 6d0d6f67b7397..89b5689845612 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll @@ -58,7 +58,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP7]], i64 [[TMP10]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP11]], align 1 @@ -66,7 +66,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = zext [[WIDE_LOAD2]] to ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP14]], i64 [[TMP17]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP14]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP18]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index 1d75b69722fb6..8b2da8c4a7047 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -185,7 +185,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[X]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 +; PRED-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2 ; PRED-NEXT: [[TMP8:%.*]] = sub i64 [[TMP0]], [[TMP7]] ; PRED-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], [[TMP7]] ; PRED-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i64 [[TMP8]], i64 0 @@ -340,7 +340,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP13:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[INDEX]] ; DEFAULT-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 4 +; DEFAULT-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 2 ; DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr i16, ptr [[TMP13]], i64 [[TMP17]] ; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP13]], align 2 ; DEFAULT-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP18]], align 2 @@ -396,7 +396,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; VSCALEFORTUNING2-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] ; VSCALEFORTUNING2-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[INDEX]] ; VSCALEFORTUNING2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; VSCALEFORTUNING2-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 +; VSCALEFORTUNING2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2 ; VSCALEFORTUNING2-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[TMP8]], i64 [[TMP11]] ; VSCALEFORTUNING2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 2 ; VSCALEFORTUNING2-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP12]], align 2 @@ -439,7 +439,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 8 ; PRED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 8 +; PRED-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 3 ; PRED-NEXT: [[TMP10:%.*]] = sub i64 [[TMP0]], [[TMP9]] ; PRED-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], [[TMP9]] ; PRED-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index 060a0ded11f21..9583c7f174333 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -123,7 +123,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 ; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]] ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[N]], [[TMP3]] ; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 0 @@ -219,10 +219,10 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 3 ; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP6]] ; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 4 ; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP9]] ; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 @@ -280,10 +280,10 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 3 ; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP6]] ; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 4 ; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP9]] ; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 @@ -327,15 +327,15 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32 ; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 +; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5 ; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]] ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[N]], [[TMP3]] ; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 0 ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 3 ; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP8]] ; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 4 ; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP10]] ; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 @@ -354,10 +354,10 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 3 ; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP15]] ; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 4 ; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP18]] ; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 24 @@ -376,10 +376,10 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP30]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP28]], [[TMP29]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = mul nuw i64 [[TMP31]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = shl nuw i64 [[TMP31]], 3 ; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], [[TMP32]] ; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = mul nuw i64 [[TMP34]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = shl nuw i64 [[TMP34]], 4 ; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = add i64 [[INDEX]], [[TMP35]] ; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], 24 @@ -604,7 +604,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[TMP2]], [[TMP6]] ; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP2]], [[TMP6]] ; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 @@ -842,7 +842,7 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4 ; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]] ; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[N]], [[TMP4]] ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i64 [[TMP5]], i64 0 @@ -1058,7 +1058,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]] ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[N]], [[TMP3]] ; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 0 @@ -1317,10 +1317,10 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 3 ; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP6]] ; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 4 ; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP9]] ; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 @@ -1331,10 +1331,10 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP13]], align 4 ; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] ; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 +; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 3 ; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP16]] ; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 16 +; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 4 ; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP19]] ; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 24 @@ -1394,10 +1394,10 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 3 ; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP6]] ; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 4 ; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP9]] ; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 @@ -1408,10 +1408,10 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 4 ; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] ; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 +; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 3 ; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP16]] ; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 16 +; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 4 ; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP19]] ; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 24 @@ -1461,15 +1461,15 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32 ; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 +; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5 ; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]] ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[N]], [[TMP3]] ; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 0 ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 3 ; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP8]] ; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 4 ; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP10]] ; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 @@ -1488,10 +1488,10 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP44:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 3 ; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP15]] ; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 4 ; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP18]] ; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 24 @@ -1502,10 +1502,10 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP22]], i32 4, [[ACTIVE_LANE_MASK8]], poison) ; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] ; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = mul nuw i64 [[TMP24]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = shl nuw i64 [[TMP24]], 3 ; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[TMP25]] ; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = mul nuw i64 [[TMP27]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = shl nuw i64 [[TMP27]], 4 ; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[TMP28]] ; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = mul nuw i64 [[TMP30]], 24 @@ -1528,10 +1528,10 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP44]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP42]], [[TMP43]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = mul nuw i64 [[TMP45]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = shl nuw i64 [[TMP45]], 3 ; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = add i64 [[INDEX]], [[TMP46]] ; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = shl nuw i64 [[TMP48]], 4 ; CHECK-ORDERED-TF-NEXT: [[TMP50:%.*]] = add i64 [[INDEX]], [[TMP49]] ; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = mul nuw i64 [[TMP51]], 24 @@ -1627,10 +1627,10 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 3 ; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP6]] ; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 4 ; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP9]] ; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 @@ -1641,10 +1641,10 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP13]], align 4 ; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] ; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 +; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 3 ; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP16]] ; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 16 +; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 4 ; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP19]] ; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 24 @@ -1704,10 +1704,10 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 3 ; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP6]] ; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 4 ; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP9]] ; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 @@ -1718,10 +1718,10 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 4 ; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] ; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 +; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 3 ; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP16]] ; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 16 +; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 4 ; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP19]] ; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 24 @@ -1771,15 +1771,15 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32 ; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 +; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5 ; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]] ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[N]], [[TMP3]] ; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 0 ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 3 ; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP8]] ; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 4 ; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP10]] ; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 @@ -1798,10 +1798,10 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP44:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 3 ; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP15]] ; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 4 ; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP18]] ; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 24 @@ -1812,10 +1812,10 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP22]], i32 4, [[ACTIVE_LANE_MASK8]], poison) ; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] ; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = mul nuw i64 [[TMP24]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = shl nuw i64 [[TMP24]], 3 ; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[TMP25]] ; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = mul nuw i64 [[TMP27]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = shl nuw i64 [[TMP27]], 4 ; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[TMP28]] ; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = mul nuw i64 [[TMP30]], 24 @@ -1838,10 +1838,10 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP44]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP42]], [[TMP43]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = mul nuw i64 [[TMP45]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = shl nuw i64 [[TMP45]], 3 ; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = add i64 [[INDEX]], [[TMP46]] ; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = shl nuw i64 [[TMP48]], 4 ; CHECK-ORDERED-TF-NEXT: [[TMP50:%.*]] = add i64 [[INDEX]], [[TMP49]] ; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = mul nuw i64 [[TMP51]], 24 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll index e33b89dc4271e..46493e0bcd417 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll @@ -29,10 +29,10 @@ define i64 @same_exit_block_pre_inc_use1() #0 { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP27:%.*]] = mul nuw i64 [[TMP11]], 32 +; CHECK-NEXT: [[TMP27:%.*]] = shl nuw i64 [[TMP11]], 5 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP27]] ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 48 @@ -43,10 +43,10 @@ define i64 @same_exit_block_pre_inc_use1() #0 { ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP28]], align 1 ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 16 +; CHECK-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 4 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i64 [[TMP19]] ; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 32 +; CHECK-NEXT: [[TMP22:%.*]] = shl nuw i64 [[TMP21]], 5 ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i64 [[TMP22]] ; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP25:%.*]] = mul nuw i64 [[TMP24]], 48 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index 48c9941c842cc..f6de370874d12 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -28,7 +28,7 @@ define void @cost_store_i8(ptr %dst) #0 { ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] ; DEFAULT-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 16 +; DEFAULT-NEXT: [[TMP23:%.*]] = shl nuw i64 [[TMP22]], 4 ; DEFAULT-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP9]], i64 [[TMP23]] ; DEFAULT-NEXT: store zeroinitializer, ptr [[TMP9]], align 1 ; DEFAULT-NEXT: store zeroinitializer, ptr [[TMP24]], align 1 @@ -41,7 +41,7 @@ define void @cost_store_i8(ptr %dst) #0 { ; DEFAULT: vec.epilog.iter.check: ; DEFAULT-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 101, [[N_VEC]] ; DEFAULT-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 8 +; DEFAULT-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 3 ; DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP13]] ; DEFAULT-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; DEFAULT: vec.epilog.ph: @@ -82,7 +82,7 @@ define void @cost_store_i8(ptr %dst) #0 { ; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 ; PRED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 16 +; PRED-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 4 ; PRED-NEXT: [[TMP9:%.*]] = sub i64 101, [[TMP8]] ; PRED-NEXT: [[TMP10:%.*]] = icmp ugt i64 101, [[TMP8]] ; PRED-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll index c018c82476ceb..793813e55409e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll @@ -26,7 +26,7 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) { ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -1, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 2 +; CHECK-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 1 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP14]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 8 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP15]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll index b0367fb152bff..44a8eba84a1d0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll @@ -26,7 +26,7 @@ define i64 @int_reduction_add(ptr %a, i64 %N) { ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 2 +; CHECK-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 1 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP14]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 8 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP15]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll index bcae394ffdb2e..4c4c9e57b4ffb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll @@ -25,7 +25,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[TMP16]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP17]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll index 4ebdf1f23612f..04b0075e5a5b2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -40,7 +40,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 16 +; CHECK-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 4 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 [[TMP18]] ; CHECK-NEXT: store splat (i8 1), ptr [[TMP14]], align 1 ; CHECK-NEXT: store splat (i8 1), ptr [[TMP19]], align 1 @@ -53,7 +53,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 1024, [[N_VEC]] ; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 8 +; CHECK-NEXT: [[TMP22:%.*]] = shl nuw i64 [[TMP21]], 3 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP22]] ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: @@ -96,7 +96,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-VF8-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 16 +; CHECK-VF8-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 4 ; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[TMP16]] ; CHECK-VF8-NEXT: store splat (i8 1), ptr [[TMP12]], align 1 ; CHECK-VF8-NEXT: store splat (i8 1), ptr [[TMP17]], align 1 @@ -158,7 +158,7 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 [[TMP8]] ; CHECK-NEXT: store splat (i64 1), ptr [[TMP6]], align 1 ; CHECK-NEXT: store splat (i64 1), ptr [[TMP9]], align 1 @@ -191,7 +191,7 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF8-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-VF8-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 +; CHECK-VF8-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 1 ; CHECK-VF8-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 [[TMP8]] ; CHECK-VF8-NEXT: store splat (i64 1), ptr [[TMP6]], align 1 ; CHECK-VF8-NEXT: store splat (i64 1), ptr [[TMP9]], align 1 @@ -271,7 +271,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 2 +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 1 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]] ; CHECK-NEXT: store splat (i64 1), ptr [[TMP12]], align 1 ; CHECK-NEXT: store splat (i64 1), ptr [[TMP17]], align 1 @@ -324,7 +324,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-VF8-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 2 +; CHECK-VF8-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 1 ; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]] ; CHECK-VF8-NEXT: store splat (i64 1), ptr [[TMP12]], align 1 ; CHECK-VF8-NEXT: store splat (i64 1), ptr [[TMP17]], align 1 @@ -397,7 +397,7 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 16 +; CHECK-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 4 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP14]], i64 [[TMP18]] ; CHECK-NEXT: store zeroinitializer, ptr [[TMP14]], align 1 ; CHECK-NEXT: store zeroinitializer, ptr [[TMP19]], align 1 @@ -411,7 +411,7 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-NEXT: [[IND_END4:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 10000, [[N_VEC]] ; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 8 +; CHECK-NEXT: [[TMP22:%.*]] = shl nuw i64 [[TMP21]], 3 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP22]] ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: @@ -456,7 +456,7 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[INDEX]] ; CHECK-VF8-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 16 +; CHECK-VF8-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 4 ; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP16]] ; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP12]], align 1 ; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP17]], align 1 @@ -530,20 +530,20 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP11]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP12]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP16]] ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP17]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP19:%.*]] = fmul [[WIDE_LOAD2]], [[WIDE_LOAD4]] ; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4 +; CHECK-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]] ; CHECK-NEXT: store [[TMP18]], ptr [[TMP13]], align 4 ; CHECK-NEXT: store [[TMP19]], ptr [[TMP22]], align 4 @@ -556,7 +556,7 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] ; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP25:%.*]] = mul nuw i64 [[TMP24]], 2 +; CHECK-NEXT: [[TMP25:%.*]] = shl nuw i64 [[TMP24]], 1 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP25]] ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: @@ -602,20 +602,20 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF8-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-VF8-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 +; CHECK-VF8-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 2 ; CHECK-VF8-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP9]] ; CHECK-VF8-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; CHECK-VF8-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP10]], align 4 ; CHECK-VF8-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-VF8-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 4 +; CHECK-VF8-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 2 ; CHECK-VF8-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP14]] ; CHECK-VF8-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP11]], align 4 ; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP15]], align 4 ; CHECK-VF8-NEXT: [[TMP16:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-VF8-NEXT: [[TMP17:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD3]] ; CHECK-VF8-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 4 +; CHECK-VF8-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 2 ; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP19]] ; CHECK-VF8-NEXT: store [[TMP16]], ptr [[TMP11]], align 4 ; CHECK-VF8-NEXT: store [[TMP17]], ptr [[TMP20]], align 4 @@ -676,20 +676,20 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP11]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP12]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP16]] ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP17]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP19:%.*]] = fmul [[WIDE_LOAD2]], [[WIDE_LOAD4]] ; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4 +; CHECK-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]] ; CHECK-NEXT: store [[TMP18]], ptr [[TMP13]], align 4 ; CHECK-NEXT: store [[TMP19]], ptr [[TMP22]], align 4 @@ -702,7 +702,7 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] ; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP25:%.*]] = mul nuw i64 [[TMP24]], 2 +; CHECK-NEXT: [[TMP25:%.*]] = shl nuw i64 [[TMP24]], 1 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP25]] ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: @@ -748,20 +748,20 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF8-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-VF8-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 +; CHECK-VF8-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 2 ; CHECK-VF8-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP9]] ; CHECK-VF8-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; CHECK-VF8-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP10]], align 4 ; CHECK-VF8-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-VF8-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 4 +; CHECK-VF8-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 2 ; CHECK-VF8-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP14]] ; CHECK-VF8-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP11]], align 4 ; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP15]], align 4 ; CHECK-VF8-NEXT: [[TMP16:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-VF8-NEXT: [[TMP17:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD3]] ; CHECK-VF8-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 4 +; CHECK-VF8-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 2 ; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP19]] ; CHECK-VF8-NEXT: store [[TMP16]], ptr [[TMP11]], align 4 ; CHECK-VF8-NEXT: store [[TMP17]], ptr [[TMP20]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll index f499665d322b3..95153a4a59f1a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll @@ -36,7 +36,7 @@ define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef read ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds half, ptr [[S]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 8 +; CHECK-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 3 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds half, ptr [[TMP11]], i64 [[TMP14]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP11]], align 2 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP15]], align 2 @@ -44,7 +44,7 @@ define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef read ; CHECK-NEXT: [[TMP17:%.*]] = fneg [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds half, ptr [[D]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 8 +; CHECK-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 3 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds half, ptr [[TMP18]], i64 [[TMP21]] ; CHECK-NEXT: store [[TMP16]], ptr [[TMP18]], align 2 ; CHECK-NEXT: store [[TMP17]], ptr [[TMP22]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll index dd9d3a1fae223..731272675940e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll @@ -36,7 +36,7 @@ define void @induction_i7(ptr %dst) #0 { ; CHECK-NEXT: [[TMP23:%.*]] = zext [[TMP19]] to ; CHECK-NEXT: [[TMP24:%.*]] = zext [[TMP20]] to ; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP27:%.*]] = mul nuw i64 [[TMP26]], 2 +; CHECK-NEXT: [[TMP27:%.*]] = shl nuw i64 [[TMP26]], 1 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP27]] ; CHECK-NEXT: store [[TMP23]], ptr [[TMP21]], align 8 ; CHECK-NEXT: store [[TMP24]], ptr [[TMP28]], align 8 @@ -96,7 +96,7 @@ define void @induction_i3_zext(ptr %dst) #0 { ; CHECK-NEXT: [[TMP20:%.*]] = zext [[STEP_ADD]] to ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP25:%.*]] = mul nuw i64 [[TMP24]], 2 +; CHECK-NEXT: [[TMP25:%.*]] = shl nuw i64 [[TMP24]], 1 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP25]] ; CHECK-NEXT: store [[TMP19]], ptr [[TMP21]], align 8 ; CHECK-NEXT: store [[TMP20]], ptr [[TMP26]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll index ff0c430a7a12a..9312306ce519a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll @@ -29,7 +29,7 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP34:%.*]] = mul nuw i64 [[TMP33]], 2 +; CHECK-NEXT: [[TMP34:%.*]] = shl nuw i64 [[TMP33]], 1 ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP30]], i64 [[TMP34]] ; CHECK-NEXT: store zeroinitializer, ptr [[TMP30]], align 8 ; CHECK-NEXT: store zeroinitializer, ptr [[TMP35]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll index 4610860538e0b..ed49dc5a7573f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll @@ -38,13 +38,13 @@ define void @multiple_exits_unique_exit_block(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 4 +; CHECK-NEXT: [[TMP23:%.*]] = shl nuw i64 [[TMP22]], 2 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP23]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP19]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP24]], align 4 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP28]], 4 +; CHECK-NEXT: [[TMP29:%.*]] = shl nuw i64 [[TMP28]], 2 ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP29]] ; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP25]], align 4 ; CHECK-NEXT: store [[WIDE_LOAD3]], ptr [[TMP30]], align 4 @@ -108,13 +108,13 @@ define i32 @multiple_exits_multiple_exit_blocks(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 4 +; CHECK-NEXT: [[TMP23:%.*]] = shl nuw i64 [[TMP22]], 2 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP23]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP19]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP24]], align 4 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP28]], 4 +; CHECK-NEXT: [[TMP29:%.*]] = shl nuw i64 [[TMP28]], 2 ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP29]] ; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP25]], align 4 ; CHECK-NEXT: store [[WIDE_LOAD3]], ptr [[TMP30]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll index 034285204cc9b..3c667ccbdedfc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll @@ -52,12 +52,12 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i64, ptr [[SRC_1]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i64, ptr [[SRC_2]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP28]], 2 +; CHECK-NEXT: [[TMP29:%.*]] = shl nuw i64 [[TMP28]], 1 ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i64, ptr [[TMP27]], i64 [[TMP29]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP27]], align 8 ; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load , ptr [[TMP30]], align 8 ; CHECK-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP32]], 2 +; CHECK-NEXT: [[TMP33:%.*]] = shl nuw i64 [[TMP32]], 1 ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[TMP31]], i64 [[TMP33]] ; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load , ptr [[TMP31]], align 8 ; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load , ptr [[TMP34]], align 8 @@ -66,12 +66,12 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i64, ptr [[DST_1]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP45:%.*]] = getelementptr i64, ptr [[DST_2]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP42:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP43:%.*]] = mul nuw i64 [[TMP42]], 2 +; CHECK-NEXT: [[TMP43:%.*]] = shl nuw i64 [[TMP42]], 1 ; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i64, ptr [[TMP41]], i64 [[TMP43]] ; CHECK-NEXT: store [[TMP35]], ptr [[TMP41]], align 8 ; CHECK-NEXT: store [[TMP36]], ptr [[TMP44]], align 8 ; CHECK-NEXT: [[TMP46:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP46]], 2 +; CHECK-NEXT: [[TMP47:%.*]] = shl nuw i64 [[TMP46]], 1 ; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i64, ptr [[TMP45]], i64 [[TMP47]] ; CHECK-NEXT: store [[TMP35]], ptr [[TMP45]], align 8 ; CHECK-NEXT: store [[TMP36]], ptr [[TMP48]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll index 9636d0aaa43e7..ad9eefd8ee6a1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -49,7 +49,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll index e17f69d4c3d22..821b9fbbda78f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll @@ -15,7 +15,7 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 @@ -60,7 +60,7 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 4 ; CHECK-IN-LOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-IN-LOOP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-IN-LOOP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-IN-LOOP-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-IN-LOOP-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-IN-LOOP-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 @@ -123,7 +123,7 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 @@ -167,7 +167,7 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 ; CHECK-IN-LOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-IN-LOOP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-IN-LOOP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-IN-LOOP-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-IN-LOOP-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-IN-LOOP-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 @@ -228,7 +228,7 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 @@ -285,7 +285,7 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-IN-LOOP-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-IN-LOOP-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4 ; CHECK-IN-LOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-IN-LOOP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-IN-LOOP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-IN-LOOP-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]] ; CHECK-IN-LOOP-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] ; CHECK-IN-LOOP-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll index cf8cad2298c24..90224caa68cd4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll @@ -13,15 +13,15 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP62:%.*]] = mul nuw i64 [[TMP61]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 ; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP26:%.*]] = mul nuw i64 [[TMP25]], 4 +; CHECK-NEXT: [[TMP26:%.*]] = shl nuw i64 [[TMP25]], 2 ; CHECK-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP26]] ; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP28:%.*]] = mul nuw i64 [[TMP27]], 8 +; CHECK-NEXT: [[TMP28:%.*]] = shl nuw i64 [[TMP27]], 3 ; CHECK-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP28]] ; CHECK-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP30:%.*]] = mul nuw i64 [[TMP29]], 12 @@ -41,10 +41,10 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK9:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT13:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX6]] ; CHECK-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP53:%.*]] = mul nuw i64 [[TMP52]], 4 +; CHECK-NEXT: [[TMP53:%.*]] = shl nuw i64 [[TMP52]], 2 ; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP53]] ; CHECK-NEXT: [[TMP55:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP56:%.*]] = mul nuw i64 [[TMP55]], 8 +; CHECK-NEXT: [[TMP56:%.*]] = shl nuw i64 [[TMP55]], 3 ; CHECK-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP56]] ; CHECK-NEXT: [[TMP58:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP59:%.*]] = mul nuw i64 [[TMP58]], 12 @@ -55,10 +55,10 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP60]], i32 4, [[ACTIVE_LANE_MASK9]]) ; CHECK-NEXT: [[INDEX_NEXT10]] = add i64 [[INDEX6]], [[TMP62]] ; CHECK-NEXT: [[TMP63:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP64:%.*]] = mul nuw i64 [[TMP63]], 4 +; CHECK-NEXT: [[TMP64:%.*]] = shl nuw i64 [[TMP63]], 2 ; CHECK-NEXT: [[TMP65:%.*]] = add i64 [[INDEX6]], [[TMP64]] ; CHECK-NEXT: [[TMP66:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP67:%.*]] = mul nuw i64 [[TMP66]], 8 +; CHECK-NEXT: [[TMP67:%.*]] = shl nuw i64 [[TMP66]], 3 ; CHECK-NEXT: [[TMP68:%.*]] = add i64 [[INDEX6]], [[TMP67]] ; CHECK-NEXT: [[TMP69:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP70:%.*]] = mul nuw i64 [[TMP69]], 12 @@ -95,18 +95,18 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP83:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP84:%.*]] = mul nuw i64 [[TMP83]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 ; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP26:%.*]] = mul nuw i64 [[TMP25]], 4 +; CHECK-NEXT: [[TMP26:%.*]] = shl nuw i64 [[TMP25]], 2 ; CHECK-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP26]] ; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP28:%.*]] = mul nuw i64 [[TMP27]], 8 +; CHECK-NEXT: [[TMP28:%.*]] = shl nuw i64 [[TMP27]], 3 ; CHECK-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP28]] ; CHECK-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP30:%.*]] = mul nuw i64 [[TMP29]], 12 @@ -126,10 +126,10 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: [[ACTIVE_LANE_MASK9:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT16:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[COND_PTR:%.*]], i64 [[INDEX6]] ; CHECK-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP53:%.*]] = mul nuw i64 [[TMP52]], 4 +; CHECK-NEXT: [[TMP53:%.*]] = shl nuw i64 [[TMP52]], 2 ; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP53]] ; CHECK-NEXT: [[TMP55:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP56:%.*]] = mul nuw i64 [[TMP55]], 8 +; CHECK-NEXT: [[TMP56:%.*]] = shl nuw i64 [[TMP55]], 3 ; CHECK-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP56]] ; CHECK-NEXT: [[TMP58:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP59:%.*]] = mul nuw i64 [[TMP58]], 12 @@ -148,10 +148,10 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: [[TMP72:%.*]] = select [[ACTIVE_LANE_MASK9]], [[TMP64]], zeroinitializer ; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX6]] ; CHECK-NEXT: [[TMP74:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP75:%.*]] = mul nuw i64 [[TMP74]], 4 +; CHECK-NEXT: [[TMP75:%.*]] = shl nuw i64 [[TMP74]], 2 ; CHECK-NEXT: [[TMP76:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP75]] ; CHECK-NEXT: [[TMP77:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP78:%.*]] = mul nuw i64 [[TMP77]], 8 +; CHECK-NEXT: [[TMP78:%.*]] = shl nuw i64 [[TMP77]], 3 ; CHECK-NEXT: [[TMP79:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP78]] ; CHECK-NEXT: [[TMP80:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP81:%.*]] = mul nuw i64 [[TMP80]], 12 @@ -160,12 +160,12 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP76]], i32 4, [[TMP70]]) ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP79]], i32 4, [[TMP71]]) ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP82]], i32 4, [[TMP72]]) -; CHECK-NEXT: [[INDEX_NEXT13]] = add i64 [[INDEX6]], [[TMP84]] +; CHECK-NEXT: [[INDEX_NEXT13]] = add i64 [[INDEX6]], [[TMP6]] ; CHECK-NEXT: [[TMP85:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP86:%.*]] = mul nuw i64 [[TMP85]], 4 +; CHECK-NEXT: [[TMP86:%.*]] = shl nuw i64 [[TMP85]], 2 ; CHECK-NEXT: [[TMP87:%.*]] = add i64 [[INDEX6]], [[TMP86]] ; CHECK-NEXT: [[TMP88:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP89:%.*]] = mul nuw i64 [[TMP88]], 8 +; CHECK-NEXT: [[TMP89:%.*]] = shl nuw i64 [[TMP88]], 3 ; CHECK-NEXT: [[TMP90:%.*]] = add i64 [[INDEX6]], [[TMP89]] ; CHECK-NEXT: [[TMP91:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP92:%.*]] = mul nuw i64 [[TMP91]], 12 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index fcc7886a71cee..4cd7dac62e79f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -13,7 +13,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 @@ -103,7 +103,7 @@ define void @simple_memcpy(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 @@ -155,7 +155,7 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP2]], [[TMP9]] ; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0 @@ -212,7 +212,7 @@ define void @simple_gather_scatter(ptr noalias %dst, ptr noalias %src, ptr noali ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 @@ -266,7 +266,7 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) # ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 @@ -319,7 +319,7 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 @@ -385,7 +385,7 @@ define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n) ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 @@ -435,7 +435,7 @@ define void @simple_fdiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 @@ -489,7 +489,7 @@ define void @simple_idiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll index 020308a4713b4..8dafa3453dcf5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll @@ -69,19 +69,19 @@ define void @vscale_mul_8(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP10]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[A]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP11]], align 4 ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 4 +; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 2 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP15]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[B]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP16]], align 4 ; CHECK-NEXT: [[TMP17:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP18:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4 +; CHECK-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP21]] ; CHECK-NEXT: store [[TMP17]], ptr [[B]], align 4 ; CHECK-NEXT: store [[TMP18]], ptr [[TMP22]], align 4 @@ -210,20 +210,20 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[TMP10]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP11]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 4 +; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 2 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP15]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP12]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP16]], align 4 ; CHECK-NEXT: [[TMP17:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP18:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], 4 +; CHECK-NEXT: [[TMP20:%.*]] = shl nuw i64 [[TMP19]], 2 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP20]] ; CHECK-NEXT: store [[TMP17]], ptr [[TMP12]], align 4 ; CHECK-NEXT: store [[TMP18]], ptr [[TMP21]], align 4 @@ -291,20 +291,20 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[TMP10]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP11]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 4 +; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 2 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP15]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP12]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP16]], align 4 ; CHECK-NEXT: [[TMP17:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP18:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], 4 +; CHECK-NEXT: [[TMP20:%.*]] = shl nuw i64 [[TMP19]], 2 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP20]] ; CHECK-NEXT: store [[TMP17]], ptr [[TMP12]], align 4 ; CHECK-NEXT: store [[TMP18]], ptr [[TMP21]], align 4 @@ -374,20 +374,20 @@ define void @trip_count_with_overflow(ptr noalias noundef readonly captures(none ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP11]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP12]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP16]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP13]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP17]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP19:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4 +; CHECK-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]] ; CHECK-NEXT: store [[TMP18]], ptr [[TMP13]], align 4 ; CHECK-NEXT: store [[TMP19]], ptr [[TMP22]], align 4 @@ -453,20 +453,20 @@ define void @trip_count_too_big_for_element_count(ptr noalias noundef readonly c ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP11]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP12]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP16]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP13]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP17]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP19:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4 +; CHECK-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]] ; CHECK-NEXT: store [[TMP18]], ptr [[TMP13]], align 4 ; CHECK-NEXT: store [[TMP19]], ptr [[TMP22]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll index 4d816b23ef93f..24d23d81b1f92 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll @@ -171,7 +171,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll index 62f101e3a8d7f..7eb3d7fc5a36d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll @@ -9,8 +9,7 @@ define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 { ; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 0 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP2]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP0]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll index 8e99764243027..3ea37fcc7b654 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll @@ -9,8 +9,7 @@ define void @load_store(ptr %p) { ; LMUL1-LABEL: @load_store( ; LMUL1-NEXT: entry: ; LMUL1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; LMUL1-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 0 -; LMUL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP2]] +; LMUL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] ; LMUL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; LMUL1: vector.ph: ; LMUL1-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll index 2a531c8814827..40587c0c8b68c 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll @@ -9,8 +9,7 @@ define i64 @pr97452_scalable_vf1_for_live_out(ptr %src) { ; CHECK-SAME: ptr [[SRC:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 0 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP2]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP0]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() @@ -77,8 +76,7 @@ define void @pr97452_scalable_vf1_for_no_live_out(ptr %src, ptr noalias %dst) { ; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 0 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP2]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP0]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll index 111fbb5965639..ad8cd425cd6c2 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll @@ -19,7 +19,7 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP8]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP9]], align 4 @@ -33,7 +33,7 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK-NEXT: [[TMP15:%.*]] = fadd [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 2 +; CHECK-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 1 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP18]] ; CHECK-NEXT: store [[TMP14]], ptr [[TMP16]], align 4 ; CHECK-NEXT: store [[TMP15]], ptr [[TMP19]], align 4 @@ -95,7 +95,7 @@ define void @test2(ptr %a, ptr noalias %b) { ; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 2 +; CHECK-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 1 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[TMP12]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 4 @@ -105,7 +105,7 @@ define void @test2(ptr %a, ptr noalias %b) { ; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND4]]) ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 2 +; CHECK-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 1 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP18]] ; CHECK-NEXT: store [[TMP14]], ptr [[TMP16]], align 4 ; CHECK-NEXT: store [[TMP15]], ptr [[TMP19]], align 4 @@ -176,7 +176,7 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-NEXT: [[PREDPHI1:%.*]] = select [[TMP10]], splat (float 2.300000e+01), splat (float 4.200000e+01) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 1 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP13]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP11]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP14]], align 4 @@ -184,7 +184,7 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-NEXT: [[TMP16:%.*]] = fmul [[PREDPHI1]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 2 +; CHECK-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 1 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP19]] ; CHECK-NEXT: store [[TMP15]], ptr [[TMP17]], align 4 ; CHECK-NEXT: store [[TMP16]], ptr [[TMP20]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll index 8a20ecc5966c9..37ed28993cf69 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -110,7 +110,7 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF2-NEXT: [[TMP17:%.*]] = add nuw nsw i64 [[INDEX]], 1 ; CHECK-VF4UF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP17]] ; CHECK-VF4UF2-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4 +; CHECK-VF4UF2-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2 ; CHECK-VF4UF2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 [[TMP21]] ; CHECK-VF4UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP18]], align 4 ; CHECK-VF4UF2-NEXT: [[WIDE_LOAD3]] = load , ptr [[TMP22]], align 4 @@ -120,7 +120,7 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF2-NEXT: [[TMP26:%.*]] = add [[WIDE_LOAD]], [[TMP23]] ; CHECK-VF4UF2-NEXT: [[TMP27:%.*]] = add [[WIDE_LOAD3]], [[TMP24]] ; CHECK-VF4UF2-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP30:%.*]] = mul nuw i64 [[TMP29]], 4 +; CHECK-VF4UF2-NEXT: [[TMP30:%.*]] = shl nuw i64 [[TMP29]], 2 ; CHECK-VF4UF2-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP30]] ; CHECK-VF4UF2-NEXT: store [[TMP26]], ptr [[TMP25]], align 4 ; CHECK-VF4UF2-NEXT: store [[TMP27]], ptr [[TMP31]], align 4 @@ -265,7 +265,7 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; CHECK-VF4UF2-NEXT: [[VEC_PHI1:%.*]] = phi [ undef, %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 4 +; CHECK-VF4UF2-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 2 ; CHECK-VF4UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP13]] ; CHECK-VF4UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 4 ; CHECK-VF4UF2-NEXT: [[WIDE_LOAD2]] = load , ptr [[TMP14]], align 4 @@ -470,7 +470,7 @@ define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, f ; CHECK-VF4UF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-VF4UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-VF4UF2-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 4 +; CHECK-VF4UF2-NEXT: [[TMP22:%.*]] = shl nuw i64 [[TMP21]], 2 ; CHECK-VF4UF2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[TMP19]], i64 [[TMP22]] ; CHECK-VF4UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP19]], align 2, !alias.scope [[META6:![0-9]+]] ; CHECK-VF4UF2-NEXT: [[WIDE_LOAD4]] = load , ptr [[TMP23]], align 2, !alias.scope [[META6]] @@ -486,7 +486,7 @@ define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, f ; CHECK-VF4UF2-NEXT: [[TMP33:%.*]] = fsub fast [[TMP27]], [[TMP31]] ; CHECK-VF4UF2-NEXT: [[TMP34:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-VF4UF2-NEXT: [[TMP36:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP37:%.*]] = mul nuw i64 [[TMP36]], 4 +; CHECK-VF4UF2-NEXT: [[TMP37:%.*]] = shl nuw i64 [[TMP36]], 2 ; CHECK-VF4UF2-NEXT: [[TMP38:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i64 [[TMP37]] ; CHECK-VF4UF2-NEXT: store [[TMP32]], ptr [[TMP34]], align 8, !alias.scope [[META9:![0-9]+]], !noalias [[META6]] ; CHECK-VF4UF2-NEXT: store [[TMP33]], ptr [[TMP38]], align 8, !alias.scope [[META9]], !noalias [[META6]] @@ -826,7 +826,7 @@ define void @sink_after(ptr %a, ptr %b, i64 %n) { ; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = add nuw nsw i64 [[INDEX]], 1 ; CHECK-VF4UF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP12]] ; CHECK-VF4UF2-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 +; CHECK-VF4UF2-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2 ; CHECK-VF4UF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP13]], i64 [[TMP16]] ; CHECK-VF4UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP13]], align 2, !alias.scope [[META17:![0-9]+]] ; CHECK-VF4UF2-NEXT: [[WIDE_LOAD3]] = load , ptr [[TMP17]], align 2, !alias.scope [[META17]] @@ -840,7 +840,7 @@ define void @sink_after(ptr %a, ptr %b, i64 %n) { ; CHECK-VF4UF2-NEXT: [[TMP25:%.*]] = mul nsw [[TMP23]], [[TMP21]] ; CHECK-VF4UF2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-VF4UF2-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP28]], 4 +; CHECK-VF4UF2-NEXT: [[TMP29:%.*]] = shl nuw i64 [[TMP28]], 2 ; CHECK-VF4UF2-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[TMP29]] ; CHECK-VF4UF2-NEXT: store [[TMP24]], ptr [[TMP26]], align 4, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] ; CHECK-VF4UF2-NEXT: store [[TMP25]], ptr [[TMP30]], align 4, !alias.scope [[META20]], !noalias [[META17]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll index 65e33dfd7238e..cb61fc6e0a046 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll @@ -31,7 +31,7 @@ define i32 @iv_live_out_wide(ptr %dst) { ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 1 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP13]] ; CHECK-NEXT: store zeroinitializer, ptr [[TMP10]], align 2 ; CHECK-NEXT: store zeroinitializer, ptr [[TMP14]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll index fdacc5f4f71ef..bdc8734c45f2e 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll @@ -50,7 +50,7 @@ define void @loop(i64 %N, ptr noalias %a, ptr noalias %b) { ; CHECKUF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECKUF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[INDEX]] ; CHECKUF2-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECKUF2-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP8]], 4 +; CHECKUF2-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP8]], 2 ; CHECKUF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[TMP7]], i64 [[TMP16]] ; CHECKUF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 8 ; CHECKUF2-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP9]], align 8 @@ -58,7 +58,7 @@ define void @loop(i64 %N, ptr noalias %a, ptr noalias %b) { ; CHECKUF2-NEXT: [[TMP11:%.*]] = fadd [[WIDE_LOAD3]], splat (double 1.000000e+00) ; CHECKUF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]] ; CHECKUF2-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECKUF2-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP13]], 4 +; CHECKUF2-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP13]], 2 ; CHECKUF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i64 [[TMP17]] ; CHECKUF2-NEXT: store [[TMP10]], ptr [[TMP12]], align 8 ; CHECKUF2-NEXT: store [[TMP11]], ptr [[TMP14]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll index a6be33edcf587..83f1d30c64321 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll @@ -24,7 +24,7 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP15:%.*]] = and [[VEC_PHI1]], splat (i32 255) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 8 +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 3 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[TMP11]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP12]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll index 124bc4ec595aa..199bd5b31c70d 100644 --- a/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll @@ -82,8 +82,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-DEF-LABEL: @foo( ; NO-VP-DEF-NEXT: entry: ; NO-VP-DEF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-DEF-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 0 -; NO-VP-DEF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP2]] +; NO-VP-DEF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP0]] ; NO-VP-DEF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP-DEF: vector.ph: ; NO-VP-DEF-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll index ff085fc9c59e3..cd7ca1e22b499 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll @@ -16,7 +16,7 @@ define void @interleave_deinterleave(ptr noalias %dst, ptr %a, ptr %b) { ; CHECK-LABEL: @interleave_deinterleave( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: From 9915e22e2e6f34fe0c9facec41f4e967c8b4c60a Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 14 Aug 2025 12:23:38 +0100 Subject: [PATCH 3/5] !fixup add assert, multiply once --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3665af5b6a1d6..344dc75f1e46e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -824,9 +824,10 @@ namespace llvm { Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step) { assert(Ty->isIntegerTy() && "Expected an integer step"); - if (!VF.isScalable() || !isPowerOf2_64(VF.getKnownMinValue()) || - !isPowerOf2_64(Step)) - return B.CreateElementCount(Ty, VF.multiplyCoefficientBy(Step)); + ElementCount VFxStep = VF.multiplyCoefficientBy(Step); + assert(isPowerOf2_64(VF.getKnownMinValue()) && "must pass power-of-2 VF"); + if (!VF.isScalable() || !isPowerOf2_64(Step)) + return B.CreateElementCount(Ty, VFxStep); return B.CreateShl( B.CreateVScale(Ty), @@ -2319,7 +2320,7 @@ Value *InnerLoopVectorizer::createIterationCountCheck(ElementCount VF, return createStepForVF(Builder, CountTy, VF, UF); Value *MinProfTC = - createStepForVF(Builder, CountTy, MinProfitableTripCount, 1); + Builder.CreateElementCount(CountTy, MinProfitableTripCount); if (!VF.isScalable()) return MinProfTC; return Builder.CreateBinaryIntrinsic( From f256d83a781d4020f692acab8fdc9a3c2b162546 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 14 Aug 2025 12:34:30 +0100 Subject: [PATCH 4/5] !fixup adjust order --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 344dc75f1e46e..c3472c48b63d4 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -826,12 +826,13 @@ Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, assert(Ty->isIntegerTy() && "Expected an integer step"); ElementCount VFxStep = VF.multiplyCoefficientBy(Step); assert(isPowerOf2_64(VF.getKnownMinValue()) && "must pass power-of-2 VF"); - if (!VF.isScalable() || !isPowerOf2_64(Step)) - return B.CreateElementCount(Ty, VFxStep); - - return B.CreateShl( - B.CreateVScale(Ty), - ConstantInt::get(Ty, Log2_64((VF * Step).getKnownMinValue())), "", true); + if (VF.isScalable() && isPowerOf2_64(Step)) { + return B.CreateShl( + B.CreateVScale(Ty), + ConstantInt::get(Ty, Log2_64((VF * Step).getKnownMinValue())), "", + true); + } + return B.CreateElementCount(Ty, VFxStep); } /// Return the runtime value for VF. From d34f0bfa7cc3d086f2af76c529db090a739dfe0d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 14 Aug 2025 18:23:27 +0100 Subject: [PATCH 5/5] !fixup Use VFxStep in additional place. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index c3472c48b63d4..a57ea995b3edd 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -829,8 +829,7 @@ Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, if (VF.isScalable() && isPowerOf2_64(Step)) { return B.CreateShl( B.CreateVScale(Ty), - ConstantInt::get(Ty, Log2_64((VF * Step).getKnownMinValue())), "", - true); + ConstantInt::get(Ty, Log2_64(VFxStep.getKnownMinValue())), "", true); } return B.CreateElementCount(Ty, VFxStep); }