@@ -494,3 +494,186 @@ if.else:
494
494
%vmull1 = tail call <8 x i16 > @llvm.aarch64.neon.pmull.v8i16 (<8 x i8 > %s3 , <8 x i8 > %s4 )
495
495
ret <8 x i16 > %vmull1
496
496
}
497
+
498
+ declare <8 x half > @llvm.fma.v8f16 (<8 x half >, <8 x half >, <8 x half >)
499
+
500
+ define <8 x half > @sink_shufflevector_fma_v8f16 (i1 %c , <8 x half > %a , <8 x half > %b ) {
501
+ ; CHECK-LABEL: @sink_shufflevector_fma_v8f16(
502
+ ; CHECK-NEXT: entry:
503
+ ; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
504
+ ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
505
+ ; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
506
+ ; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
507
+ ; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
508
+ ; CHECK-NEXT: [[S5:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
509
+ ; CHECK-NEXT: [[S6:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
510
+ ; CHECK-NEXT: [[S7:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
511
+ ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
512
+ ; CHECK: if.then:
513
+ ; CHECK-NEXT: [[R_0:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[S0]], <8 x half> [[B]])
514
+ ; CHECK-NEXT: [[R_1:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_0]], <8 x half> [[S1]], <8 x half> [[B]])
515
+ ; CHECK-NEXT: [[R_2:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_1]], <8 x half> [[S2]], <8 x half> [[B]])
516
+ ; CHECK-NEXT: [[R_3:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_2]], <8 x half> [[S3]], <8 x half> [[B]])
517
+ ; CHECK-NEXT: ret <8 x half> [[R_3]]
518
+ ; CHECK: if.else:
519
+ ; CHECK-NEXT: [[R_4:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B]], <8 x half> [[S4]], <8 x half> [[B]])
520
+ ; CHECK-NEXT: [[R_5:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_4]], <8 x half> [[S5]], <8 x half> [[B]])
521
+ ; CHECK-NEXT: [[R_6:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_5]], <8 x half> [[S6]], <8 x half> [[B]])
522
+ ; CHECK-NEXT: [[R_7:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_6]], <8 x half> [[S7]], <8 x half> [[B]])
523
+ ; CHECK-NEXT: ret <8 x half> [[R_7]]
524
+ ;
525
+ entry:
526
+ %s0 = shufflevector <8 x half > %a , <8 x half > poison, <8 x i32 > zeroinitializer
527
+ %s1 = shufflevector <8 x half > %a , <8 x half > poison, <8 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 >
528
+ %s2 = shufflevector <8 x half > %a , <8 x half > poison, <8 x i32 > <i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 >
529
+ %s3 = shufflevector <8 x half > %a , <8 x half > poison, <8 x i32 > <i32 3 , i32 3 , i32 3 , i32 3 , i32 3 , i32 3 , i32 3 , i32 3 >
530
+ %s4 = shufflevector <8 x half > %a , <8 x half > poison, <8 x i32 > <i32 4 , i32 4 , i32 4 , i32 4 , i32 4 , i32 4 , i32 4 , i32 4 >
531
+ %s5 = shufflevector <8 x half > %a , <8 x half > poison, <8 x i32 > <i32 5 , i32 5 , i32 5 , i32 5 , i32 5 , i32 5 , i32 5 , i32 5 >
532
+ %s6 = shufflevector <8 x half > %a , <8 x half > poison, <8 x i32 > <i32 6 , i32 6 , i32 6 , i32 6 , i32 6 , i32 6 , i32 6 , i32 6 >
533
+ %s7 = shufflevector <8 x half > %a , <8 x half > poison, <8 x i32 > <i32 7 , i32 7 , i32 7 , i32 7 , i32 7 , i32 7 , i32 7 , i32 7 >
534
+ br i1 %c , label %if.then , label %if.else
535
+
536
+ if.then:
537
+ %r.0 = tail call fast <8 x half > @llvm.fma.v8f16 (<8 x half > %b , <8 x half > %s0 , <8 x half > %b )
538
+ %r.1 = tail call fast <8 x half > @llvm.fma.v8f16 (<8 x half > %r.0 , <8 x half > %s1 , <8 x half > %b )
539
+ %r.2 = tail call fast <8 x half > @llvm.fma.v8f16 (<8 x half > %r.1 , <8 x half > %s2 , <8 x half > %b )
540
+ %r.3 = tail call fast <8 x half > @llvm.fma.v8f16 (<8 x half > %r.2 , <8 x half > %s3 , <8 x half > %b )
541
+ ret <8 x half > %r.3
542
+
543
+ if.else:
544
+ %r.4 = tail call fast <8 x half > @llvm.fma.v8f16 (<8 x half > %b , <8 x half > %s4 , <8 x half > %b )
545
+ %r.5 = tail call fast <8 x half > @llvm.fma.v8f16 (<8 x half > %r.4 , <8 x half > %s5 , <8 x half > %b )
546
+ %r.6 = tail call fast <8 x half > @llvm.fma.v8f16 (<8 x half > %r.5 , <8 x half > %s6 , <8 x half > %b )
547
+ %r.7 = tail call fast <8 x half > @llvm.fma.v8f16 (<8 x half > %r.6 , <8 x half > %s7 , <8 x half > %b )
548
+ ret <8 x half > %r.7
549
+ }
550
+
551
+ declare <4 x float > @llvm.fma.v4f32 (<4 x float >, <4 x float >, <4 x float >)
552
+
553
+ define <4 x float > @sink_shufflevector_fma_v4f32 (i1 %c , <8 x float > %a , <4 x float > %b ) {
554
+ ; CHECK-LABEL: @sink_shufflevector_fma_v4f32(
555
+ ; CHECK-NEXT: entry:
556
+ ; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> zeroinitializer
557
+ ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
558
+ ; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
559
+ ; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
560
+ ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
561
+ ; CHECK: if.then:
562
+ ; CHECK-NEXT: [[R_0:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[S0]], <4 x float> [[B]])
563
+ ; CHECK-NEXT: [[R_1:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[R_0]], <4 x float> [[S1]], <4 x float> [[B]])
564
+ ; CHECK-NEXT: ret <4 x float> [[R_1]]
565
+ ; CHECK: if.else:
566
+ ; CHECK-NEXT: [[R_2:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B]], <4 x float> [[S2]], <4 x float> [[B]])
567
+ ; CHECK-NEXT: [[R_3:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[R_2]], <4 x float> [[S3]], <4 x float> [[B]])
568
+ ; CHECK-NEXT: ret <4 x float> [[R_3]]
569
+ ;
570
+ entry:
571
+ %s0 = shufflevector <8 x float > %a , <8 x float > poison, <4 x i32 > zeroinitializer
572
+ %s1 = shufflevector <8 x float > %a , <8 x float > poison, <4 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 >
573
+ %s2 = shufflevector <8 x float > %a , <8 x float > poison, <4 x i32 > <i32 2 , i32 2 , i32 2 , i32 2 >
574
+ %s3 = shufflevector <8 x float > %a , <8 x float > poison, <4 x i32 > <i32 3 , i32 3 , i32 3 , i32 3 >
575
+ br i1 %c , label %if.then , label %if.else
576
+
577
+ if.then:
578
+ %r.0 = tail call fast <4 x float > @llvm.fma.v4f32 (<4 x float > %b , <4 x float > %s0 , <4 x float > %b )
579
+ %r.1 = tail call fast <4 x float > @llvm.fma.v4f32 (<4 x float > %r.0 , <4 x float > %s1 , <4 x float > %b )
580
+ ret <4 x float > %r.1
581
+
582
+ if.else:
583
+ %r.2 = tail call fast <4 x float > @llvm.fma.v4f32 (<4 x float > %b , <4 x float > %s2 , <4 x float > %b )
584
+ %r.3 = tail call fast <4 x float > @llvm.fma.v4f32 (<4 x float > %r.2 , <4 x float > %s3 , <4 x float > %b )
585
+ ret <4 x float > %r.3
586
+ }
587
+
588
+ declare <2 x double > @llvm.fma.v2f64 (<2 x double >, <2 x double >, <2 x double >)
589
+
590
+ define <2 x double > @sink_shufflevector_fma_v2f64 (i1 %c , <2 x double > %a , <2 x double > %b ) {
591
+ ; CHECK-LABEL: @sink_shufflevector_fma_v2f64(
592
+ ; CHECK-NEXT: entry:
593
+ ; CHECK-NEXT: [[S0:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <2 x i32> zeroinitializer
594
+ ; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
595
+ ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
596
+ ; CHECK: if.then:
597
+ ; CHECK-NEXT: [[R_0:%.*]] = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> [[B:%.*]], <2 x double> [[S0]], <2 x double> [[B]])
598
+ ; CHECK-NEXT: ret <2 x double> [[R_0]]
599
+ ; CHECK: if.else:
600
+ ; CHECK-NEXT: [[R_1:%.*]] = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> [[B]], <2 x double> [[S1]], <2 x double> [[B]])
601
+ ; CHECK-NEXT: ret <2 x double> [[R_1]]
602
+ ;
603
+ entry:
604
+ %s0 = shufflevector <2 x double > %a , <2 x double > poison, <2 x i32 > zeroinitializer
605
+ %s1 = shufflevector <2 x double > %a , <2 x double > poison, <2 x i32 > <i32 1 , i32 1 >
606
+ br i1 %c , label %if.then , label %if.else
607
+
608
+ if.then:
609
+ %r.0 = tail call fast <2 x double > @llvm.fma.v2f64 (<2 x double > %b , <2 x double > %s0 , <2 x double > %b )
610
+ ret <2 x double > %r.0
611
+
612
+ if.else:
613
+ %r.1 = tail call fast <2 x double > @llvm.fma.v2f64 (<2 x double > %b , <2 x double > %s1 , <2 x double > %b )
614
+ ret <2 x double > %r.1
615
+ }
616
+
617
+ define <4 x float > @do_not_sink_out_of_range_shufflevector_fma_v4f32 (i1 %c , <8 x float > %a , <4 x float > %b ) {
618
+ ; CHECK-LABEL: @do_not_sink_out_of_range_shufflevector_fma_v4f32(
619
+ ; CHECK-NEXT: entry:
620
+ ; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
621
+ ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
622
+ ; CHECK: if.then:
623
+ ; CHECK-NEXT: [[R:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[S4]], <4 x float> [[B]])
624
+ ; CHECK-NEXT: ret <4 x float> [[R]]
625
+ ; CHECK: if.else:
626
+ ; CHECK-NEXT: ret <4 x float> zeroinitializer
627
+ ;
628
+ entry:
629
+ %s4 = shufflevector <8 x float > %a , <8 x float > poison, <4 x i32 > <i32 4 , i32 4 , i32 4 , i32 4 >
630
+ br i1 %c , label %if.then , label %if.else
631
+
632
+ if.then:
633
+ %r = tail call fast <4 x float > @llvm.fma.v4f32 (<4 x float > %b , <4 x float > %s4 , <4 x float > %b )
634
+ ret <4 x float > %r
635
+
636
+ if.else:
637
+ ret <4 x float > zeroinitializer
638
+ }
639
+
640
+ declare <5 x float > @llvm.fma.v5f32 (<5 x float >, <5 x float >, <5 x float >)
641
+
642
+ define <5 x float > @do_not_sink_shufflevector_fma_v5f32 (i1 %c , <8 x float > %a , <5 x float > %b ) {
643
+ ; CHECK-LABEL: @do_not_sink_shufflevector_fma_v5f32(
644
+ ; CHECK-NEXT: entry:
645
+ ; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <5 x i32> zeroinitializer
646
+ ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 1, i32 1, i32 1, i32 1, i32 4>
647
+ ; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 2, i32 2, i32 2, i32 2, i32 4>
648
+ ; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 3, i32 3, i32 3, i32 3, i32 4>
649
+ ; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4>
650
+ ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
651
+ ; CHECK: if.then:
652
+ ; CHECK-NEXT: [[R_0:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[B:%.*]], <5 x float> [[S0]], <5 x float> [[B]])
653
+ ; CHECK-NEXT: [[R_1:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[R_0]], <5 x float> [[S1]], <5 x float> [[B]])
654
+ ; CHECK-NEXT: ret <5 x float> [[R_1]]
655
+ ; CHECK: if.else:
656
+ ; CHECK-NEXT: [[R_2:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[B]], <5 x float> [[S2]], <5 x float> [[B]])
657
+ ; CHECK-NEXT: [[R_3:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[R_2]], <5 x float> [[S3]], <5 x float> [[B]])
658
+ ; CHECK-NEXT: [[R_4:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[R_3]], <5 x float> [[S4]], <5 x float> [[B]])
659
+ ; CHECK-NEXT: ret <5 x float> [[R_4]]
660
+ ;
661
+ entry:
662
+ %s0 = shufflevector <8 x float > %a , <8 x float > poison, <5 x i32 > zeroinitializer
663
+ %s1 = shufflevector <8 x float > %a , <8 x float > poison, <5 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 , i32 4 >
664
+ %s2 = shufflevector <8 x float > %a , <8 x float > poison, <5 x i32 > <i32 2 , i32 2 , i32 2 , i32 2 , i32 4 >
665
+ %s3 = shufflevector <8 x float > %a , <8 x float > poison, <5 x i32 > <i32 3 , i32 3 , i32 3 , i32 3 , i32 4 >
666
+ %s4 = shufflevector <8 x float > %a , <8 x float > poison, <5 x i32 > <i32 4 , i32 4 , i32 4 , i32 4 , i32 4 >
667
+ br i1 %c , label %if.then , label %if.else
668
+
669
+ if.then:
670
+ %r.0 = tail call fast <5 x float > @llvm.fma.v5f32 (<5 x float > %b , <5 x float > %s0 , <5 x float > %b )
671
+ %r.1 = tail call fast <5 x float > @llvm.fma.v5f32 (<5 x float > %r.0 , <5 x float > %s1 , <5 x float > %b )
672
+ ret <5 x float > %r.1
673
+
674
+ if.else:
675
+ %r.2 = tail call fast <5 x float > @llvm.fma.v5f32 (<5 x float > %b , <5 x float > %s2 , <5 x float > %b )
676
+ %r.3 = tail call fast <5 x float > @llvm.fma.v5f32 (<5 x float > %r.2 , <5 x float > %s3 , <5 x float > %b )
677
+ %r.4 = tail call fast <5 x float > @llvm.fma.v5f32 (<5 x float > %r.3 , <5 x float > %s4 , <5 x float > %b )
678
+ ret <5 x float > %r.4
679
+ }
0 commit comments