Skip to content

Commit 3f02b10

Browse files
committed
Revert "Mark 16i8 -> 2i64 partial reduction case as invalid"
This reverts commit 700d0d6.
1 parent f1b15d3 commit 3f02b10

File tree

2 files changed

+46
-45
lines changed

2 files changed

+46
-45
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -4923,7 +4923,9 @@ InstructionCost AArch64TTIImpl::getPartialReductionCost(
49234923
return Invalid;
49244924
break;
49254925
case 16:
4926-
if (AccumEVT != MVT::i32)
4926+
if (AccumEVT == MVT::i64)
4927+
Cost *= 2;
4928+
else if (AccumEVT != MVT::i32)
49274929
return Invalid;
49284930
break;
49294931
}

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll

+43-44
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,8 @@ for.exit: ; preds = %for.body
154154
ret i32 %add
155155
}
156156

157-
define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b) #1 {
158-
; CHECK-INTERLEAVE1-LABEL: define i64 @not_dotp_i8_to_i64_has_neon_dotprod(
157+
define i64 @dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b) #1 {
158+
; CHECK-INTERLEAVE1-LABEL: define i64 @dotp_i8_to_i64_has_neon_dotprod(
159159
; CHECK-INTERLEAVE1-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1:[0-9]+]] {
160160
; CHECK-INTERLEAVE1-NEXT: entry:
161161
; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
@@ -195,7 +195,7 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b
195195
; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
196196
; CHECK-INTERLEAVE1: scalar.ph:
197197
;
198-
; CHECK-INTERLEAVED-LABEL: define i64 @not_dotp_i8_to_i64_has_neon_dotprod(
198+
; CHECK-INTERLEAVED-LABEL: define i64 @dotp_i8_to_i64_has_neon_dotprod(
199199
; CHECK-INTERLEAVED-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1:[0-9]+]] {
200200
; CHECK-INTERLEAVED-NEXT: entry:
201201
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
@@ -249,42 +249,42 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b
249249
; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
250250
; CHECK-INTERLEAVED: scalar.ph:
251251
;
252-
; CHECK-MAXBW-LABEL: define i64 @not_dotp_i8_to_i64_has_neon_dotprod(
252+
; CHECK-MAXBW-LABEL: define i64 @dotp_i8_to_i64_has_neon_dotprod(
253253
; CHECK-MAXBW-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1:[0-9]+]] {
254254
; CHECK-MAXBW-NEXT: entry:
255255
; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
256-
; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
256+
; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16
257257
; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
258258
; CHECK-MAXBW: vector.ph:
259259
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
260-
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8
260+
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16
261261
; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
262262
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
263263
; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
264-
; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8
264+
; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16
265265
; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]]
266266
; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[B]], i64 [[N_VEC]]
267267
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
268268
; CHECK-MAXBW: vector.body:
269269
; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
270-
; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
270+
; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
271271
; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
272272
; CHECK-MAXBW-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP8]]
273273
; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
274274
; CHECK-MAXBW-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]]
275275
; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
276-
; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP10]], align 1
277-
; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i64>
276+
; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP10]], align 1
278277
; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i32 0
279-
; CHECK-MAXBW-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr [[TMP12]], align 1
280-
; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i64>
281-
; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = mul nuw nsw <vscale x 8 x i64> [[TMP13]], [[TMP11]]
282-
; CHECK-MAXBW-NEXT: [[TMP15]] = add <vscale x 8 x i64> [[TMP14]], [[VEC_PHI]]
278+
; CHECK-MAXBW-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 16 x i8>, ptr [[TMP12]], align 1
279+
; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = zext <vscale x 16 x i8> [[WIDE_LOAD2]] to <vscale x 16 x i64>
280+
; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = zext <vscale x 16 x i8> [[WIDE_LOAD]] to <vscale x 16 x i64>
281+
; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = mul nuw nsw <vscale x 16 x i64> [[TMP15]], [[TMP13]]
282+
; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv16i64(<vscale x 2 x i64> [[VEC_PHI]], <vscale x 16 x i64> [[TMP14]])
283283
; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
284284
; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
285285
; CHECK-MAXBW-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
286286
; CHECK-MAXBW: middle.block:
287-
; CHECK-MAXBW-NEXT: [[TMP17:%.*]] = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> [[TMP15]])
287+
; CHECK-MAXBW-NEXT: [[TMP17:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[PARTIAL_REDUCE]])
288288
; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
289289
; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
290290
; CHECK-MAXBW: scalar.ph:
@@ -313,8 +313,8 @@ for.exit: ; preds = %for.body
313313
ret i64 %add
314314
}
315315

316-
define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b) #1 {
317-
; CHECK-INTERLEAVE1-LABEL: define i64 @not_dotp_i16_to_i64_has_neon_dotprod(
316+
define i64 @dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b) #1 {
317+
; CHECK-INTERLEAVE1-LABEL: define i64 @dotp_i16_to_i64_has_neon_dotprod(
318318
; CHECK-INTERLEAVE1-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1]] {
319319
; CHECK-INTERLEAVE1-NEXT: entry:
320320
; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
@@ -358,7 +358,7 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %
358358
; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
359359
; CHECK-INTERLEAVE1: scalar.ph:
360360
;
361-
; CHECK-INTERLEAVED-LABEL: define i64 @not_dotp_i16_to_i64_has_neon_dotprod(
361+
; CHECK-INTERLEAVED-LABEL: define i64 @dotp_i16_to_i64_has_neon_dotprod(
362362
; CHECK-INTERLEAVED-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1]] {
363363
; CHECK-INTERLEAVED-NEXT: entry:
364364
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
@@ -416,7 +416,7 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %
416416
; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
417417
; CHECK-INTERLEAVED: scalar.ph:
418418
;
419-
; CHECK-MAXBW-LABEL: define i64 @not_dotp_i16_to_i64_has_neon_dotprod(
419+
; CHECK-MAXBW-LABEL: define i64 @dotp_i16_to_i64_has_neon_dotprod(
420420
; CHECK-MAXBW-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1]] {
421421
; CHECK-MAXBW-NEXT: entry:
422422
; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -1957,37 +1957,36 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 {
19571957
; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
19581958
; CHECK-MAXBW-NEXT: entry:
19591959
; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1960-
; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
1960+
; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16
19611961
; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 16, [[TMP1]]
19621962
; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
19631963
; CHECK-MAXBW: vector.ph:
19641964
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1965-
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8
1965+
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16
19661966
; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 16, [[TMP3]]
19671967
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 16, [[N_MOD_VF]]
19681968
; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1969-
; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8
1969+
; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16
19701970
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
19711971
; CHECK-MAXBW: vector.body:
19721972
; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1973-
; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
1973+
; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
19741974
; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
19751975
; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[TMP6]]
19761976
; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i32 0
1977-
; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP8]], align 1
1978-
; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i64>
1977+
; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP8]], align 1
19791978
; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 1
19801979
; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP10]]
19811980
; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP11]], i32 0
1982-
; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP12]], align 1
1983-
; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i64>
1984-
; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = mul nuw nsw <vscale x 8 x i64> [[TMP13]], [[TMP9]]
1985-
; CHECK-MAXBW-NEXT: [[TMP15]] = add <vscale x 8 x i64> [[VEC_PHI]], [[TMP14]]
1981+
; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 16 x i8>, ptr [[TMP12]], align 1
1982+
; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = zext <vscale x 16 x i8> [[WIDE_LOAD1]] to <vscale x 16 x i64>
1983+
; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = zext <vscale x 16 x i8> [[WIDE_LOAD]] to <vscale x 16 x i64>
1984+
; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = mul nuw nsw <vscale x 16 x i64> [[TMP16]], [[TMP13]]
1985+
; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv16i64(<vscale x 2 x i64> [[VEC_PHI]], <vscale x 16 x i64> [[TMP14]])
19861986
; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
1987-
; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1988-
; CHECK-MAXBW-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
1987+
; CHECK-MAXBW-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
19891988
; CHECK-MAXBW: middle.block:
1990-
; CHECK-MAXBW-NEXT: [[TMP17:%.*]] = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> [[TMP15]])
1989+
; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[PARTIAL_REDUCE]])
19911990
; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 16, [[N_VEC]]
19921991
; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
19931992
; CHECK-MAXBW: scalar.ph:
@@ -2589,41 +2588,41 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 {
25892588
; CHECK-MAXBW: for.body.preheader:
25902589
; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
25912590
; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
2592-
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 8
2591+
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 16
25932592
; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
25942593
; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
25952594
; CHECK-MAXBW: vector.ph:
25962595
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
2597-
; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8
2596+
; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 16
25982597
; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
25992598
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
26002599
; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
2601-
; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
2600+
; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16
26022601
; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = trunc i64 [[N_VEC]] to i32
26032602
; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]]
26042603
; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[N_VEC]]
2605-
; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = insertelement <vscale x 8 x i64> zeroinitializer, i64 [[COST]], i32 0
2604+
; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = insertelement <vscale x 2 x i64> zeroinitializer, i64 [[COST]], i32 0
26062605
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
26072606
; CHECK-MAXBW: vector.body:
26082607
; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2609-
; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i64> [ [[TMP10]], [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
2608+
; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ [[TMP10]], [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
26102609
; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0
26112610
; CHECK-MAXBW-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP11]]
26122611
; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0
26132612
; CHECK-MAXBW-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]]
26142613
; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
2615-
; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP13]], align 1
2616-
; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i64>
2614+
; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP13]], align 1
26172615
; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i32 0
2618-
; CHECK-MAXBW-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr [[TMP15]], align 1
2619-
; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i64>
2620-
; CHECK-MAXBW-NEXT: [[TMP17:%.*]] = mul nuw nsw <vscale x 8 x i64> [[TMP16]], [[TMP14]]
2621-
; CHECK-MAXBW-NEXT: [[TMP19]] = add <vscale x 8 x i64> [[TMP17]], [[VEC_PHI]]
2616+
; CHECK-MAXBW-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 16 x i8>, ptr [[TMP15]], align 1
2617+
; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = zext <vscale x 16 x i8> [[WIDE_LOAD2]] to <vscale x 16 x i64>
2618+
; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = zext <vscale x 16 x i8> [[WIDE_LOAD]] to <vscale x 16 x i64>
2619+
; CHECK-MAXBW-NEXT: [[TMP17:%.*]] = mul nuw nsw <vscale x 16 x i64> [[TMP20]], [[TMP16]]
2620+
; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv16i64(<vscale x 2 x i64> [[VEC_PHI]], <vscale x 16 x i64> [[TMP17]])
26222621
; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
26232622
; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
26242623
; CHECK-MAXBW-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
26252624
; CHECK-MAXBW: middle.block:
2626-
; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> [[TMP19]])
2625+
; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[PARTIAL_REDUCE]])
26272626
; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
26282627
; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
26292628
; CHECK-MAXBW: scalar.ph:

0 commit comments

Comments
 (0)