@@ -267,5 +267,233 @@ define void @histogram_i16_8_lane(ptr %base, <vscale x 8 x i32> %indices, i16 %i
267
267
ret void
268
268
}
269
269
270
+ define void @histogram_i8_zext (ptr %base , <vscale x 4 x i32 > %indices , <vscale x 4 x i1 > %mask , i8 %inc ) #0 {
271
+ ; CHECK-LABEL: histogram_i8_zext:
272
+ ; CHECK: // %bb.0:
273
+ ; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
274
+ ; CHECK-NEXT: mov z3.s, w1
275
+ ; CHECK-NEXT: ld1b { z2.s }, p0/z, [x0, z0.s, uxtw]
276
+ ; CHECK-NEXT: ptrue p1.s
277
+ ; CHECK-NEXT: mad z1.s, p1/m, z3.s, z2.s
278
+ ; CHECK-NEXT: st1b { z1.s }, p0, [x0, z0.s, uxtw]
279
+ ; CHECK-NEXT: ret
280
+ %extended = zext <vscale x 4 x i32 > %indices to <vscale x 4 x i64 >
281
+ %buckets = getelementptr i8 , ptr %base , <vscale x 4 x i64 > %extended
282
+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i8 (<vscale x 4 x ptr > %buckets , i8 %inc , <vscale x 4 x i1 > %mask )
283
+ ret void
284
+ }
285
+
286
+ define void @histogram_i16_zext (ptr %base , <vscale x 4 x i32 > %indices , <vscale x 4 x i1 > %mask , i16 %inc ) #0 {
287
+ ; CHECK-LABEL: histogram_i16_zext:
288
+ ; CHECK: // %bb.0:
289
+ ; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
290
+ ; CHECK-NEXT: mov z3.s, w1
291
+ ; CHECK-NEXT: ld1h { z2.s }, p0/z, [x0, z0.s, uxtw #1]
292
+ ; CHECK-NEXT: ptrue p1.s
293
+ ; CHECK-NEXT: mad z1.s, p1/m, z3.s, z2.s
294
+ ; CHECK-NEXT: st1h { z1.s }, p0, [x0, z0.s, uxtw #1]
295
+ ; CHECK-NEXT: ret
296
+ %extended = zext <vscale x 4 x i32 > %indices to <vscale x 4 x i64 >
297
+ %buckets = getelementptr i16 , ptr %base , <vscale x 4 x i64 > %extended
298
+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i16 (<vscale x 4 x ptr > %buckets , i16 %inc , <vscale x 4 x i1 > %mask )
299
+ ret void
300
+ }
301
+
302
+ define void @histogram_i32_zext (ptr %base , <vscale x 4 x i32 > %indices , <vscale x 4 x i1 > %mask ) #0 {
303
+ ; CHECK-LABEL: histogram_i32_zext:
304
+ ; CHECK: // %bb.0:
305
+ ; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
306
+ ; CHECK-NEXT: mov z3.s, #1 // =0x1
307
+ ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0, z0.s, uxtw #2]
308
+ ; CHECK-NEXT: ptrue p1.s
309
+ ; CHECK-NEXT: mad z1.s, p1/m, z3.s, z2.s
310
+ ; CHECK-NEXT: st1w { z1.s }, p0, [x0, z0.s, uxtw #2]
311
+ ; CHECK-NEXT: ret
312
+ %extended = zext <vscale x 4 x i32 > %indices to <vscale x 4 x i64 >
313
+ %buckets = getelementptr i32 , ptr %base , <vscale x 4 x i64 > %extended
314
+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i32 (<vscale x 4 x ptr > %buckets , i32 1 , <vscale x 4 x i1 > %mask )
315
+ ret void
316
+ }
317
+
318
+ define void @histogram_i32_sext (ptr %base , <vscale x 4 x i32 > %indices , <vscale x 4 x i1 > %mask ) #0 {
319
+ ; CHECK-LABEL: histogram_i32_sext:
320
+ ; CHECK: // %bb.0:
321
+ ; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
322
+ ; CHECK-NEXT: mov z3.s, #1 // =0x1
323
+ ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0, z0.s, sxtw #2]
324
+ ; CHECK-NEXT: ptrue p1.s
325
+ ; CHECK-NEXT: mad z1.s, p1/m, z3.s, z2.s
326
+ ; CHECK-NEXT: st1w { z1.s }, p0, [x0, z0.s, sxtw #2]
327
+ ; CHECK-NEXT: ret
328
+ %extended = sext <vscale x 4 x i32 > %indices to <vscale x 4 x i64 >
329
+ %buckets = getelementptr i32 , ptr %base , <vscale x 4 x i64 > %extended
330
+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i32 (<vscale x 4 x ptr > %buckets , i32 1 , <vscale x 4 x i1 > %mask )
331
+ ret void
332
+ }
333
+
334
+ define void @histogram_zext_from_i8_to_i64 (ptr %base , <vscale x 4 x i8 > %indices , <vscale x 4 x i1 > %mask ) #0 {
335
+ ; CHECK-LABEL: histogram_zext_from_i8_to_i64:
336
+ ; CHECK: // %bb.0:
337
+ ; CHECK-NEXT: and z0.s, z0.s, #0xff
338
+ ; CHECK-NEXT: mov z3.s, #1 // =0x1
339
+ ; CHECK-NEXT: ptrue p1.s
340
+ ; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
341
+ ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0, z0.s, uxtw #2]
342
+ ; CHECK-NEXT: mad z1.s, p1/m, z3.s, z2.s
343
+ ; CHECK-NEXT: st1w { z1.s }, p0, [x0, z0.s, uxtw #2]
344
+ ; CHECK-NEXT: ret
345
+ %extended = zext <vscale x 4 x i8 > %indices to <vscale x 4 x i64 >
346
+ %buckets = getelementptr i32 , ptr %base , <vscale x 4 x i64 > %extended
347
+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i32 (<vscale x 4 x ptr > %buckets , i32 1 , <vscale x 4 x i1 > %mask )
348
+ ret void
349
+ }
350
+
351
+ define void @histogram_zext_from_i16_to_i64 (ptr %base , <vscale x 4 x i16 > %indices , <vscale x 4 x i1 > %mask ) #0 {
352
+ ; CHECK-LABEL: histogram_zext_from_i16_to_i64:
353
+ ; CHECK: // %bb.0:
354
+ ; CHECK-NEXT: and z0.s, z0.s, #0xffff
355
+ ; CHECK-NEXT: mov z3.s, #1 // =0x1
356
+ ; CHECK-NEXT: ptrue p1.s
357
+ ; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
358
+ ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0, z0.s, uxtw #2]
359
+ ; CHECK-NEXT: mad z1.s, p1/m, z3.s, z2.s
360
+ ; CHECK-NEXT: st1w { z1.s }, p0, [x0, z0.s, uxtw #2]
361
+ ; CHECK-NEXT: ret
362
+ %extended = zext <vscale x 4 x i16 > %indices to <vscale x 4 x i64 >
363
+ %buckets = getelementptr i32 , ptr %base , <vscale x 4 x i64 > %extended
364
+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i32 (<vscale x 4 x ptr > %buckets , i32 1 , <vscale x 4 x i1 > %mask )
365
+ ret void
366
+ }
367
+
368
+ define void @histogram_sext_from_i16_to_i64 (ptr %base , <vscale x 4 x i16 > %indices , <vscale x 4 x i1 > %mask ) #0 {
369
+ ; CHECK-LABEL: histogram_sext_from_i16_to_i64:
370
+ ; CHECK: // %bb.0:
371
+ ; CHECK-NEXT: ptrue p1.s
372
+ ; CHECK-NEXT: mov z3.s, #1 // =0x1
373
+ ; CHECK-NEXT: sxth z0.s, p1/m, z0.s
374
+ ; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
375
+ ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0, z0.s, sxtw #2]
376
+ ; CHECK-NEXT: mad z1.s, p1/m, z3.s, z2.s
377
+ ; CHECK-NEXT: st1w { z1.s }, p0, [x0, z0.s, sxtw #2]
378
+ ; CHECK-NEXT: ret
379
+ %extended = sext <vscale x 4 x i16 > %indices to <vscale x 4 x i64 >
380
+ %buckets = getelementptr i32 , ptr %base , <vscale x 4 x i64 > %extended
381
+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i32 (<vscale x 4 x ptr > %buckets , i32 1 , <vscale x 4 x i1 > %mask )
382
+ ret void
383
+ }
384
+
385
+ define void @histogram_zext_from_i8_to_i32 (ptr %base , <vscale x 4 x i8 > %indices , <vscale x 4 x i1 > %mask ) #0 {
386
+ ; CHECK-LABEL: histogram_zext_from_i8_to_i32:
387
+ ; CHECK: // %bb.0:
388
+ ; CHECK-NEXT: and z0.s, z0.s, #0xff
389
+ ; CHECK-NEXT: mov z3.s, #1 // =0x1
390
+ ; CHECK-NEXT: ptrue p1.s
391
+ ; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
392
+ ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0, z0.s, uxtw #2]
393
+ ; CHECK-NEXT: mad z1.s, p1/m, z3.s, z2.s
394
+ ; CHECK-NEXT: st1w { z1.s }, p0, [x0, z0.s, uxtw #2]
395
+ ; CHECK-NEXT: ret
396
+ %extended = zext <vscale x 4 x i8 > %indices to <vscale x 4 x i32 >
397
+ %buckets = getelementptr i32 , ptr %base , <vscale x 4 x i32 > %extended
398
+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i32 (<vscale x 4 x ptr > %buckets , i32 1 , <vscale x 4 x i1 > %mask )
399
+ ret void
400
+ }
401
+
402
+ define void @histogram_zext_from_i16_to_i32 (ptr %base , <vscale x 4 x i16 > %indices , <vscale x 4 x i1 > %mask ) #0 {
403
+ ; CHECK-LABEL: histogram_zext_from_i16_to_i32:
404
+ ; CHECK: // %bb.0:
405
+ ; CHECK-NEXT: and z0.s, z0.s, #0xffff
406
+ ; CHECK-NEXT: mov z3.s, #1 // =0x1
407
+ ; CHECK-NEXT: ptrue p1.s
408
+ ; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
409
+ ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0, z0.s, uxtw #2]
410
+ ; CHECK-NEXT: mad z1.s, p1/m, z3.s, z2.s
411
+ ; CHECK-NEXT: st1w { z1.s }, p0, [x0, z0.s, uxtw #2]
412
+ ; CHECK-NEXT: ret
413
+ %extended = zext <vscale x 4 x i16 > %indices to <vscale x 4 x i32 >
414
+ %buckets = getelementptr i32 , ptr %base , <vscale x 4 x i32 > %extended
415
+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i32 (<vscale x 4 x ptr > %buckets , i32 1 , <vscale x 4 x i1 > %mask )
416
+ ret void
417
+ }
418
+
419
+ define void @histogram_2_lane_zext (ptr %base , <vscale x 2 x i32 > %indices , <vscale x 2 x i1 > %mask ) #0 {
420
+ ; CHECK-LABEL: histogram_2_lane_zext:
421
+ ; CHECK: // %bb.0:
422
+ ; CHECK-NEXT: mov z1.d, z0.d
423
+ ; CHECK-NEXT: mov z3.d, #1 // =0x1
424
+ ; CHECK-NEXT: ptrue p1.d
425
+ ; CHECK-NEXT: ld1w { z2.d }, p0/z, [x0, z0.d, uxtw #2]
426
+ ; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
427
+ ; CHECK-NEXT: histcnt z1.d, p0/z, z1.d, z1.d
428
+ ; CHECK-NEXT: mad z1.d, p1/m, z3.d, z2.d
429
+ ; CHECK-NEXT: st1w { z1.d }, p0, [x0, z0.d, uxtw #2]
430
+ ; CHECK-NEXT: ret
431
+ %extended = zext <vscale x 2 x i32 > %indices to <vscale x 2 x i64 >
432
+ %buckets = getelementptr i32 , ptr %base , <vscale x 2 x i64 > %extended
433
+ call void @llvm.experimental.vector.histogram.add.nxv2p0.i32 (<vscale x 2 x ptr > %buckets , i32 1 , <vscale x 2 x i1 > %mask )
434
+ ret void
435
+ }
436
+
437
+ define void @histogram_8_lane_zext (ptr %base , <vscale x 8 x i32 > %indices , <vscale x 8 x i1 > %mask ) #0 {
438
+ ; CHECK-LABEL: histogram_8_lane_zext:
439
+ ; CHECK: // %bb.0:
440
+ ; CHECK-NEXT: punpklo p1.h, p0.b
441
+ ; CHECK-NEXT: mov z4.s, #1 // =0x1
442
+ ; CHECK-NEXT: ptrue p2.s
443
+ ; CHECK-NEXT: histcnt z2.s, p1/z, z0.s, z0.s
444
+ ; CHECK-NEXT: ld1w { z3.s }, p1/z, [x0, z0.s, uxtw #2]
445
+ ; CHECK-NEXT: punpkhi p0.h, p0.b
446
+ ; CHECK-NEXT: mad z2.s, p2/m, z4.s, z3.s
447
+ ; CHECK-NEXT: st1w { z2.s }, p1, [x0, z0.s, uxtw #2]
448
+ ; CHECK-NEXT: histcnt z0.s, p0/z, z1.s, z1.s
449
+ ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0, z1.s, uxtw #2]
450
+ ; CHECK-NEXT: mad z0.s, p2/m, z4.s, z2.s
451
+ ; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, uxtw #2]
452
+ ; CHECK-NEXT: ret
453
+ %extended = zext <vscale x 8 x i32 > %indices to <vscale x 8 x i64 >
454
+ %buckets = getelementptr i32 , ptr %base , <vscale x 8 x i64 > %extended
455
+ call void @llvm.experimental.vector.histogram.add.nxv8p0.i32 (<vscale x 8 x ptr > %buckets , i32 1 , <vscale x 8 x i1 > %mask )
456
+ ret void
457
+ }
458
+
459
+ define void @histogram_8_lane_sext (ptr %base , <vscale x 8 x i32 > %indices , <vscale x 8 x i1 > %mask ) #0 {
460
+ ; CHECK-LABEL: histogram_8_lane_sext:
461
+ ; CHECK: // %bb.0:
462
+ ; CHECK-NEXT: punpklo p1.h, p0.b
463
+ ; CHECK-NEXT: mov z4.s, #1 // =0x1
464
+ ; CHECK-NEXT: ptrue p2.s
465
+ ; CHECK-NEXT: histcnt z2.s, p1/z, z0.s, z0.s
466
+ ; CHECK-NEXT: ld1w { z3.s }, p1/z, [x0, z0.s, sxtw #2]
467
+ ; CHECK-NEXT: punpkhi p0.h, p0.b
468
+ ; CHECK-NEXT: mad z2.s, p2/m, z4.s, z3.s
469
+ ; CHECK-NEXT: st1w { z2.s }, p1, [x0, z0.s, sxtw #2]
470
+ ; CHECK-NEXT: histcnt z0.s, p0/z, z1.s, z1.s
471
+ ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0, z1.s, sxtw #2]
472
+ ; CHECK-NEXT: mad z0.s, p2/m, z4.s, z2.s
473
+ ; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, sxtw #2]
474
+ ; CHECK-NEXT: ret
475
+ %extended = sext <vscale x 8 x i32 > %indices to <vscale x 8 x i64 >
476
+ %buckets = getelementptr i32 , ptr %base , <vscale x 8 x i64 > %extended
477
+ call void @llvm.experimental.vector.histogram.add.nxv8p0.i32 (<vscale x 8 x ptr > %buckets , i32 1 , <vscale x 8 x i1 > %mask )
478
+ ret void
479
+ }
480
+
481
+ define void @histogram_zero_mask (<vscale x 2 x ptr > %buckets , i64 %inc , <vscale x 2 x i1 > %mask ) #0 {
482
+ ; CHECK-LABEL: histogram_zero_mask:
483
+ ; CHECK: // %bb.0:
484
+ ; CHECK-NEXT: ret
485
+ call void @llvm.experimental.vector.histogram.add.nxv2p0.i64 (<vscale x 2 x ptr > %buckets , i64 %inc , <vscale x 2 x i1 > zeroinitializer )
486
+ ret void
487
+ }
488
+
489
+ define void @histogram_sext_zero_mask (ptr %base , <vscale x 4 x i32 > %indices , <vscale x 4 x i1 > %mask ) #0 {
490
+ ; CHECK-LABEL: histogram_sext_zero_mask:
491
+ ; CHECK: // %bb.0:
492
+ ; CHECK-NEXT: ret
493
+ %extended = sext <vscale x 4 x i32 > %indices to <vscale x 4 x i64 >
494
+ %buckets = getelementptr i32 , ptr %base , <vscale x 4 x i64 > %extended
495
+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i32 (<vscale x 4 x ptr > %buckets , i32 1 , <vscale x 4 x i1 > zeroinitializer )
496
+ ret void
497
+ }
270
498
271
499
attributes #0 = { "target-features" ="+sve2" vscale_range(1 , 16 ) }
0 commit comments