@@ -1143,6 +1143,242 @@ exit:
1143
1143
ret i16 %for.1
1144
1144
}
1145
1145
1146
+ define i64 @print_extended_reduction (ptr nocapture readonly %x , ptr nocapture readonly %y , i32 %n ) {
1147
+ ; CHECK-LABEL: 'print_extended_reduction'
1148
+ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
1149
+ ; CHECK-NEXT: Live-in vp<%0> = VF * UF
1150
+ ; CHECK-NEXT: Live-in vp<%1> = vector-trip-count
1151
+ ; CHECK-NEXT: Live-in ir<%n> = original trip-count
1152
+ ; CHECK-EMPTY:
1153
+ ; CHECK-NEXT: vector.ph:
1154
+ ; CHECK-NEXT: Successor(s): vector loop
1155
+ ; CHECK-EMPTY:
1156
+ ; CHECK-NEXT: <x1> vector loop: {
1157
+ ; CHECK-NEXT: vector.body:
1158
+ ; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
1159
+ ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%r.09> = phi ir<0>, ir<%add>
1160
+ ; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
1161
+ ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%x>, vp<%3>
1162
+ ; CHECK-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
1163
+ ; CHECK-NEXT: WIDEN ir<%load0> = load vp<%4>
1164
+ ; CHECK-NEXT: EXTENDED-REDUCE ir<%add> = ir<%r.09> + reduce.add (ir<%load0> extended to i64)
1165
+ ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%2>, vp<%0>
1166
+ ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%1>
1167
+ ; CHECK-NEXT: No successors
1168
+ ; CHECK-NEXT: }
1169
+ ; CHECK-NEXT: Successor(s): middle.block
1170
+ ; CHECK-EMPTY:
1171
+ ; CHECK-NEXT: middle.block:
1172
+ ; CHECK-NEXT: EMIT vp<%6> = compute-reduction-result ir<%r.09>, ir<%add>
1173
+ ; CHECK-NEXT: EMIT vp<%7> = extract-from-end vp<%6>, ir<1>
1174
+ ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%n>, vp<%1>
1175
+ ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
1176
+ ; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, scalar.ph
1177
+ ; CHECK-EMPTY:
1178
+ ; CHECK-NEXT: ir-bb<for.cond.cleanup.loopexit>:
1179
+ ; CHECK-NEXT: IR %add.lcssa = phi i64 [ %add, %for.body ] (extra operand: vp<%7>)
1180
+ ; CHECK-NEXT: No successors
1181
+ ; CHECK-EMPTY:
1182
+ ; CHECK-NEXT: scalar.ph:
1183
+ ; CHECK-NEXT: EMIT vp<%bc.merge.rdx> = resume-phi vp<%6>, ir<0>
1184
+ ; CHECK-NEXT: Successor(s): ir-bb<for.body>
1185
+ ; CHECK-EMPTY:
1186
+ ; CHECK-NEXT: ir-bb<for.body>:
1187
+ ; CHECK-NEXT: IR %i.010 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
1188
+ ; CHECK-NEXT: IR %r.09 = phi i64 [ %add, %for.body ], [ 0, %for.body.preheader ] (extra operand: vp<%bc.merge.rdx>)
1189
+ ; CHECK-NEXT: IR %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.010
1190
+ ; CHECK-NEXT: IR %load0 = load i32, ptr %arrayidx, align 4
1191
+ ; CHECK-NEXT: IR %conv0 = zext i32 %load0 to i64
1192
+ ; CHECK-NEXT: IR %add = add nsw i64 %r.09, %conv0
1193
+ ; CHECK-NEXT: IR %inc = add nuw nsw i32 %i.010, 1
1194
+ ; CHECK-NEXT: IR %exitcond = icmp eq i32 %inc, %n
1195
+ ; CHECK-NEXT: No successors
1196
+ ; CHECK-NEXT: }
1197
+ ;
1198
+ entry:
1199
+ %cmp8 = icmp sgt i32 %n , 0
1200
+ br i1 %cmp8 , label %for.body , label %for.cond.cleanup
1201
+
1202
+ for.body: ; preds = %entry, %for.body
1203
+ %i.010 = phi i32 [ %inc , %for.body ], [ 0 , %entry ]
1204
+ %r.09 = phi i64 [ %add , %for.body ], [ 0 , %entry ]
1205
+ %arrayidx = getelementptr inbounds i32 , ptr %x , i32 %i.010
1206
+ %load0 = load i32 , ptr %arrayidx , align 4
1207
+ %conv0 = zext i32 %load0 to i64
1208
+ %add = add nsw i64 %r.09 , %conv0
1209
+ %inc = add nuw nsw i32 %i.010 , 1
1210
+ %exitcond = icmp eq i32 %inc , %n
1211
+ br i1 %exitcond , label %for.cond.cleanup , label %for.body
1212
+
1213
+ for.cond.cleanup: ; preds = %for.body, %entry
1214
+ %r.0.lcssa = phi i64 [ 0 , %entry ], [ %add , %for.body ]
1215
+ ret i64 %r.0.lcssa
1216
+ }
1217
+
1218
+ define i64 @print_mulacc (ptr nocapture readonly %x , ptr nocapture readonly %y , i32 %n ) {
1219
+ ; CHECK-LABEL: 'print_mulacc'
1220
+ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
1221
+ ; CHECK-NEXT: Live-in vp<%0> = VF * UF
1222
+ ; CHECK-NEXT: Live-in vp<%1> = vector-trip-count
1223
+ ; CHECK-NEXT: Live-in ir<%n> = original trip-count
1224
+ ; CHECK-EMPTY:
1225
+ ; CHECK-NEXT: vector.ph:
1226
+ ; CHECK-NEXT: Successor(s): vector loop
1227
+ ; CHECK-EMPTY:
1228
+ ; CHECK-NEXT: <x1> vector loop: {
1229
+ ; CHECK-NEXT: vector.body:
1230
+ ; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
1231
+ ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%r.09> = phi ir<0>, ir<%add>
1232
+ ; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
1233
+ ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%x>, vp<%3>
1234
+ ; CHECK-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
1235
+ ; CHECK-NEXT: WIDEN ir<%load0> = load vp<%4>
1236
+ ; CHECK-NEXT: CLONE ir<%arrayidx1> = getelementptr inbounds ir<%y>, vp<%3>
1237
+ ; CHECK-NEXT: vp<%5> = vector-pointer ir<%arrayidx1>
1238
+ ; CHECK-NEXT: WIDEN ir<%load1> = load vp<%5>
1239
+ ; CHECK-NEXT: MULACC-REDUCE ir<%add> = ir<%r.09> + reduce.add (mul ir<%load0>, ir<%load1>)
1240
+ ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%2>, vp<%0>
1241
+ ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%1>
1242
+ ; CHECK-NEXT: No successors
1243
+ ; CHECK-NEXT: }
1244
+ ; CHECK-NEXT: Successor(s): middle.block
1245
+ ; CHECK-EMPTY:
1246
+ ; CHECK-NEXT: middle.block:
1247
+ ; CHECK-NEXT: EMIT vp<%7> = compute-reduction-result ir<%r.09>, ir<%add>
1248
+ ; CHECK-NEXT: EMIT vp<%8> = extract-from-end vp<%7>, ir<1>
1249
+ ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%n>, vp<%1>
1250
+ ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
1251
+ ; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, scalar.ph
1252
+ ; CHECK-EMPTY:
1253
+ ; CHECK-NEXT: ir-bb<for.cond.cleanup.loopexit>:
1254
+ ; CHECK-NEXT: IR %add.lcssa = phi i64 [ %add, %for.body ] (extra operand: vp<%8>)
1255
+ ; CHECK-NEXT: No successors
1256
+ ; CHECK-EMPTY:
1257
+ ; CHECK-NEXT: scalar.ph:
1258
+ ; CHECK-NEXT: EMIT vp<%bc.merge.rdx> = resume-phi vp<%7>, ir<0>
1259
+ ; CHECK-NEXT: Successor(s): ir-bb<for.body>
1260
+ ; CHECK-EMPTY:
1261
+ ; CHECK-NEXT: ir-bb<for.body>:
1262
+ ; CHECK-NEXT: IR %i.010 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
1263
+ ; CHECK-NEXT: IR %r.09 = phi i64 [ %add, %for.body ], [ 0, %for.body.preheader ] (extra operand: vp<%bc.merge.rdx>)
1264
+ ; CHECK-NEXT: IR %arrayidx = getelementptr inbounds i64, ptr %x, i32 %i.010
1265
+ ; CHECK-NEXT: IR %load0 = load i64, ptr %arrayidx, align 4
1266
+ ; CHECK-NEXT: IR %arrayidx1 = getelementptr inbounds i64, ptr %y, i32 %i.010
1267
+ ; CHECK-NEXT: IR %load1 = load i64, ptr %arrayidx1, align 4
1268
+ ; CHECK-NEXT: IR %mul = mul nsw i64 %load0, %load1
1269
+ ; CHECK-NEXT: IR %add = add nsw i64 %r.09, %mul
1270
+ ; CHECK-NEXT: IR %inc = add nuw nsw i32 %i.010, 1
1271
+ ; CHECK-NEXT: IR %exitcond = icmp eq i32 %inc, %n
1272
+ ; CHECK-NEXT: No successors
1273
+ ; CHECK-NEXT: }
1274
+ ;
1275
+ entry:
1276
+ %cmp8 = icmp sgt i32 %n , 0
1277
+ br i1 %cmp8 , label %for.body , label %for.cond.cleanup
1278
+
1279
+ for.body: ; preds = %entry, %for.body
1280
+ %i.010 = phi i32 [ %inc , %for.body ], [ 0 , %entry ]
1281
+ %r.09 = phi i64 [ %add , %for.body ], [ 0 , %entry ]
1282
+ %arrayidx = getelementptr inbounds i64 , ptr %x , i32 %i.010
1283
+ %load0 = load i64 , ptr %arrayidx , align 4
1284
+ %arrayidx1 = getelementptr inbounds i64 , ptr %y , i32 %i.010
1285
+ %load1 = load i64 , ptr %arrayidx1 , align 4
1286
+ %mul = mul nsw i64 %load0 , %load1
1287
+ %add = add nsw i64 %r.09 , %mul
1288
+ %inc = add nuw nsw i32 %i.010 , 1
1289
+ %exitcond = icmp eq i32 %inc , %n
1290
+ br i1 %exitcond , label %for.cond.cleanup , label %for.body
1291
+
1292
+ for.cond.cleanup: ; preds = %for.body, %entry
1293
+ %r.0.lcssa = phi i64 [ 0 , %entry ], [ %add , %for.body ]
1294
+ ret i64 %r.0.lcssa
1295
+ }
1296
+
1297
+ define i64 @print_mulacc_extended (ptr nocapture readonly %x , ptr nocapture readonly %y , i32 %n ) {
1298
+ ; CHECK-LABEL: 'print_mulacc_extended'
1299
+ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
1300
+ ; CHECK-NEXT: Live-in vp<%0> = VF * UF
1301
+ ; CHECK-NEXT: Live-in vp<%1> = vector-trip-count
1302
+ ; CHECK-NEXT: Live-in ir<%n> = original trip-count
1303
+ ; CHECK-EMPTY:
1304
+ ; CHECK-NEXT: vector.ph:
1305
+ ; CHECK-NEXT: Successor(s): vector loop
1306
+ ; CHECK-EMPTY:
1307
+ ; CHECK-NEXT: <x1> vector loop: {
1308
+ ; CHECK-NEXT: vector.body:
1309
+ ; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
1310
+ ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%r.09> = phi ir<0>, ir<%add>
1311
+ ; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
1312
+ ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%x>, vp<%3>
1313
+ ; CHECK-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
1314
+ ; CHECK-NEXT: WIDEN ir<%load0> = load vp<%4>
1315
+ ; CHECK-NEXT: CLONE ir<%arrayidx1> = getelementptr inbounds ir<%y>, vp<%3>
1316
+ ; CHECK-NEXT: vp<%5> = vector-pointer ir<%arrayidx1>
1317
+ ; CHECK-NEXT: WIDEN ir<%load1> = load vp<%5>
1318
+ ; CHECK-NEXT: MULACC-REDUCE ir<%add> = ir<%r.09> + (reduce.add (mul (ir<%load0> extended to i32), (ir<%load1> extended to i32)) extended to i64)
1319
+ ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%2>, vp<%0>
1320
+ ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%1>
1321
+ ; CHECK-NEXT: No successors
1322
+ ; CHECK-NEXT: }
1323
+ ; CHECK-NEXT: Successor(s): middle.block
1324
+ ; CHECK-EMPTY:
1325
+ ; CHECK-NEXT: middle.block:
1326
+ ; CHECK-NEXT: EMIT vp<%7> = compute-reduction-result ir<%r.09>, ir<%add>
1327
+ ; CHECK-NEXT: EMIT vp<%8> = extract-from-end vp<%7>, ir<1>
1328
+ ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%n>, vp<%1>
1329
+ ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
1330
+ ; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, scalar.ph
1331
+ ; CHECK-EMPTY:
1332
+ ; CHECK-NEXT: ir-bb<for.cond.cleanup.loopexit>:
1333
+ ; CHECK-NEXT: IR %add.lcssa = phi i64 [ %add, %for.body ] (extra operand: vp<%8>)
1334
+ ; CHECK-NEXT: No successors
1335
+ ; CHECK-EMPTY:
1336
+ ; CHECK-NEXT: scalar.ph:
1337
+ ; CHECK-NEXT: EMIT vp<%bc.merge.rdx> = resume-phi vp<%7>, ir<0>
1338
+ ; CHECK-NEXT: Successor(s): ir-bb<for.body>
1339
+ ; CHECK-EMPTY:
1340
+ ; CHECK-NEXT: ir-bb<for.body>:
1341
+ ; CHECK-NEXT: IR %i.010 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
1342
+ ; CHECK-NEXT: IR %r.09 = phi i64 [ %add, %for.body ], [ 0, %for.body.preheader ] (extra operand: vp<%bc.merge.rdx>)
1343
+ ; CHECK-NEXT: IR %arrayidx = getelementptr inbounds i16, ptr %x, i32 %i.010
1344
+ ; CHECK-NEXT: IR %load0 = load i16, ptr %arrayidx, align 4
1345
+ ; CHECK-NEXT: IR %arrayidx1 = getelementptr inbounds i16, ptr %y, i32 %i.010
1346
+ ; CHECK-NEXT: IR %load1 = load i16, ptr %arrayidx1, align 4
1347
+ ; CHECK-NEXT: IR %conv0 = sext i16 %load0 to i32
1348
+ ; CHECK-NEXT: IR %conv1 = sext i16 %load1 to i32
1349
+ ; CHECK-NEXT: IR %mul = mul nsw i32 %conv0, %conv1
1350
+ ; CHECK-NEXT: IR %conv = sext i32 %mul to i64
1351
+ ; CHECK-NEXT: IR %add = add nsw i64 %r.09, %conv
1352
+ ; CHECK-NEXT: IR %inc = add nuw nsw i32 %i.010, 1
1353
+ ; CHECK-NEXT: IR %exitcond = icmp eq i32 %inc, %n
1354
+ ; CHECK-NEXT: No successors
1355
+ ; CHECK-NEXT: }
1356
+ ;
1357
+ entry:
1358
+ %cmp8 = icmp sgt i32 %n , 0
1359
+ br i1 %cmp8 , label %for.body , label %for.cond.cleanup
1360
+
1361
+ for.body: ; preds = %entry, %for.body
1362
+ %i.010 = phi i32 [ %inc , %for.body ], [ 0 , %entry ]
1363
+ %r.09 = phi i64 [ %add , %for.body ], [ 0 , %entry ]
1364
+ %arrayidx = getelementptr inbounds i16 , ptr %x , i32 %i.010
1365
+ %load0 = load i16 , ptr %arrayidx , align 4
1366
+ %arrayidx1 = getelementptr inbounds i16 , ptr %y , i32 %i.010
1367
+ %load1 = load i16 , ptr %arrayidx1 , align 4
1368
+ %conv0 = sext i16 %load0 to i32
1369
+ %conv1 = sext i16 %load1 to i32
1370
+ %mul = mul nsw i32 %conv0 , %conv1
1371
+ %conv = sext i32 %mul to i64
1372
+ %add = add nsw i64 %r.09 , %conv
1373
+ %inc = add nuw nsw i32 %i.010 , 1
1374
+ %exitcond = icmp eq i32 %inc , %n
1375
+ br i1 %exitcond , label %for.cond.cleanup , label %for.body
1376
+
1377
+ for.cond.cleanup: ; preds = %for.body, %entry
1378
+ %r.0.lcssa = phi i64 [ 0 , %entry ], [ %add , %for.body ]
1379
+ ret i64 %r.0.lcssa
1380
+ }
1381
+
1146
1382
!llvm.dbg.cu = !{!0 }
1147
1383
!llvm.module.flags = !{!3 , !4 }
1148
1384
0 commit comments