Skip to content

Commit 8661725

Browse files
committed
[AArch64] Add tests with free shuffles for indexed fma variants.
The new tests contain examples where shuffles are free, because indexed fma instructions can be used.
1 parent 84acdd3 commit 8661725

File tree

1 file changed

+183
-0
lines changed

1 file changed

+183
-0
lines changed

llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,3 +494,186 @@ if.else:
494494
%vmull1 = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %s3, <8 x i8> %s4)
495495
ret <8 x i16> %vmull1
496496
}
497+
498+
declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
499+
500+
define <8 x half> @sink_shufflevector_fma_v8f16(i1 %c, <8 x half> %a, <8 x half> %b) {
501+
; CHECK-LABEL: @sink_shufflevector_fma_v8f16(
502+
; CHECK-NEXT: entry:
503+
; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
504+
; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
505+
; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
506+
; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
507+
; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
508+
; CHECK-NEXT: [[S5:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
509+
; CHECK-NEXT: [[S6:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
510+
; CHECK-NEXT: [[S7:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
511+
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
512+
; CHECK: if.then:
513+
; CHECK-NEXT: [[R_0:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[S0]], <8 x half> [[B]])
514+
; CHECK-NEXT: [[R_1:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_0]], <8 x half> [[S1]], <8 x half> [[B]])
515+
; CHECK-NEXT: [[R_2:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_1]], <8 x half> [[S2]], <8 x half> [[B]])
516+
; CHECK-NEXT: [[R_3:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_2]], <8 x half> [[S3]], <8 x half> [[B]])
517+
; CHECK-NEXT: ret <8 x half> [[R_3]]
518+
; CHECK: if.else:
519+
; CHECK-NEXT: [[R_4:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B]], <8 x half> [[S4]], <8 x half> [[B]])
520+
; CHECK-NEXT: [[R_5:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_4]], <8 x half> [[S5]], <8 x half> [[B]])
521+
; CHECK-NEXT: [[R_6:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_5]], <8 x half> [[S6]], <8 x half> [[B]])
522+
; CHECK-NEXT: [[R_7:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_6]], <8 x half> [[S7]], <8 x half> [[B]])
523+
; CHECK-NEXT: ret <8 x half> [[R_7]]
524+
;
525+
entry:
526+
%s0 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> zeroinitializer
527+
%s1 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
528+
%s2 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
529+
%s3 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
530+
%s4 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
531+
%s5 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
532+
%s6 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
533+
%s7 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
534+
br i1 %c, label %if.then, label %if.else
535+
536+
if.then:
537+
%r.0 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %s0, <8 x half> %b)
538+
%r.1 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %r.0, <8 x half> %s1, <8 x half> %b)
539+
%r.2 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %r.1, <8 x half> %s2, <8 x half> %b)
540+
%r.3 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %r.2, <8 x half> %s3, <8 x half> %b)
541+
ret <8 x half> %r.3
542+
543+
if.else:
544+
%r.4 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %s4, <8 x half> %b)
545+
%r.5 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %r.4, <8 x half> %s5, <8 x half> %b)
546+
%r.6 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %r.5, <8 x half> %s6, <8 x half> %b)
547+
%r.7 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %r.6, <8 x half> %s7, <8 x half> %b)
548+
ret <8 x half> %r.7
549+
}
550+
551+
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
552+
553+
define <4 x float> @sink_shufflevector_fma_v4f32(i1 %c, <8 x float> %a, <4 x float> %b) {
554+
; CHECK-LABEL: @sink_shufflevector_fma_v4f32(
555+
; CHECK-NEXT: entry:
556+
; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> zeroinitializer
557+
; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
558+
; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
559+
; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
560+
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
561+
; CHECK: if.then:
562+
; CHECK-NEXT: [[R_0:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[S0]], <4 x float> [[B]])
563+
; CHECK-NEXT: [[R_1:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[R_0]], <4 x float> [[S1]], <4 x float> [[B]])
564+
; CHECK-NEXT: ret <4 x float> [[R_1]]
565+
; CHECK: if.else:
566+
; CHECK-NEXT: [[R_2:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B]], <4 x float> [[S2]], <4 x float> [[B]])
567+
; CHECK-NEXT: [[R_3:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[R_2]], <4 x float> [[S3]], <4 x float> [[B]])
568+
; CHECK-NEXT: ret <4 x float> [[R_3]]
569+
;
570+
entry:
571+
%s0 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> zeroinitializer
572+
%s1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
573+
%s2 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
574+
%s3 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
575+
br i1 %c, label %if.then, label %if.else
576+
577+
if.then:
578+
%r.0 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %s0, <4 x float> %b)
579+
%r.1 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %r.0, <4 x float> %s1, <4 x float> %b)
580+
ret <4 x float> %r.1
581+
582+
if.else:
583+
%r.2 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %s2, <4 x float> %b)
584+
%r.3 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %r.2, <4 x float> %s3, <4 x float> %b)
585+
ret <4 x float> %r.3
586+
}
587+
588+
declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
589+
590+
define <2 x double> @sink_shufflevector_fma_v2f64(i1 %c, <2 x double> %a, <2 x double> %b) {
591+
; CHECK-LABEL: @sink_shufflevector_fma_v2f64(
592+
; CHECK-NEXT: entry:
593+
; CHECK-NEXT: [[S0:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <2 x i32> zeroinitializer
594+
; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
595+
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
596+
; CHECK: if.then:
597+
; CHECK-NEXT: [[R_0:%.*]] = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> [[B:%.*]], <2 x double> [[S0]], <2 x double> [[B]])
598+
; CHECK-NEXT: ret <2 x double> [[R_0]]
599+
; CHECK: if.else:
600+
; CHECK-NEXT: [[R_1:%.*]] = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> [[B]], <2 x double> [[S1]], <2 x double> [[B]])
601+
; CHECK-NEXT: ret <2 x double> [[R_1]]
602+
;
603+
entry:
604+
%s0 = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> zeroinitializer
605+
%s1 = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> <i32 1, i32 1>
606+
br i1 %c, label %if.then, label %if.else
607+
608+
if.then:
609+
%r.0 = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %s0, <2 x double> %b)
610+
ret <2 x double> %r.0
611+
612+
if.else:
613+
%r.1 = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %s1, <2 x double> %b)
614+
ret <2 x double> %r.1
615+
}
616+
617+
define <4 x float> @do_not_sink_out_of_range_shufflevector_fma_v4f32(i1 %c, <8 x float> %a, <4 x float> %b) {
618+
; CHECK-LABEL: @do_not_sink_out_of_range_shufflevector_fma_v4f32(
619+
; CHECK-NEXT: entry:
620+
; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
621+
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
622+
; CHECK: if.then:
623+
; CHECK-NEXT: [[R:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[S4]], <4 x float> [[B]])
624+
; CHECK-NEXT: ret <4 x float> [[R]]
625+
; CHECK: if.else:
626+
; CHECK-NEXT: ret <4 x float> zeroinitializer
627+
;
628+
entry:
629+
%s4 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
630+
br i1 %c, label %if.then, label %if.else
631+
632+
if.then:
633+
%r = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %s4, <4 x float> %b)
634+
ret <4 x float> %r
635+
636+
if.else:
637+
ret <4 x float> zeroinitializer
638+
}
639+
640+
declare <5 x float> @llvm.fma.v5f32(<5 x float>, <5 x float>, <5 x float>)
641+
642+
define <5 x float> @do_not_sink_shufflevector_fma_v5f32(i1 %c, <8 x float> %a, <5 x float> %b) {
643+
; CHECK-LABEL: @do_not_sink_shufflevector_fma_v5f32(
644+
; CHECK-NEXT: entry:
645+
; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <5 x i32> zeroinitializer
646+
; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 1, i32 1, i32 1, i32 1, i32 4>
647+
; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 2, i32 2, i32 2, i32 2, i32 4>
648+
; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 3, i32 3, i32 3, i32 3, i32 4>
649+
; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4>
650+
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
651+
; CHECK: if.then:
652+
; CHECK-NEXT: [[R_0:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[B:%.*]], <5 x float> [[S0]], <5 x float> [[B]])
653+
; CHECK-NEXT: [[R_1:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[R_0]], <5 x float> [[S1]], <5 x float> [[B]])
654+
; CHECK-NEXT: ret <5 x float> [[R_1]]
655+
; CHECK: if.else:
656+
; CHECK-NEXT: [[R_2:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[B]], <5 x float> [[S2]], <5 x float> [[B]])
657+
; CHECK-NEXT: [[R_3:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[R_2]], <5 x float> [[S3]], <5 x float> [[B]])
658+
; CHECK-NEXT: [[R_4:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[R_3]], <5 x float> [[S4]], <5 x float> [[B]])
659+
; CHECK-NEXT: ret <5 x float> [[R_4]]
660+
;
661+
entry:
662+
%s0 = shufflevector <8 x float> %a, <8 x float> poison, <5 x i32> zeroinitializer
663+
%s1 = shufflevector <8 x float> %a, <8 x float> poison, <5 x i32> <i32 1, i32 1, i32 1, i32 1, i32 4>
664+
%s2 = shufflevector <8 x float> %a, <8 x float> poison, <5 x i32> <i32 2, i32 2, i32 2, i32 2, i32 4>
665+
%s3 = shufflevector <8 x float> %a, <8 x float> poison, <5 x i32> <i32 3, i32 3, i32 3, i32 3, i32 4>
666+
%s4 = shufflevector <8 x float> %a, <8 x float> poison, <5 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4>
667+
br i1 %c, label %if.then, label %if.else
668+
669+
if.then:
670+
%r.0 = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> %b, <5 x float> %s0, <5 x float> %b)
671+
%r.1 = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> %r.0, <5 x float> %s1, <5 x float> %b)
672+
ret <5 x float> %r.1
673+
674+
if.else:
675+
%r.2 = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> %b, <5 x float> %s2, <5 x float> %b)
676+
%r.3 = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> %r.2, <5 x float> %s3, <5 x float> %b)
677+
%r.4 = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> %r.3, <5 x float> %s4, <5 x float> %b)
678+
ret <5 x float> %r.4
679+
}

0 commit comments

Comments
 (0)