|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 |
| 2 | +; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v < %s -S | FileCheck %s |
| 3 | + |
| 4 | +; Make sure we don't duplicate the safe divisor cost in the VPlan cost model. |
| 5 | + |
| 6 | +define void @pr154103(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d) { |
| 7 | +; CHECK-LABEL: define void @pr154103( |
| 8 | +; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], ptr noalias [[D:%.*]]) #[[ATTR0:[0-9]+]] { |
| 9 | +; CHECK-NEXT: [[ENTRY:.*:]] |
| 10 | +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| 11 | +; CHECK: [[VECTOR_PH]]: |
| 12 | +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[B]], i64 0 |
| 13 | +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer |
| 14 | +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[C]], i64 0 |
| 15 | +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer |
| 16 | +; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() |
| 17 | +; CHECK-NEXT: [[TMP1:%.*]] = mul <vscale x 4 x i64> [[TMP0]], splat (i64 7) |
| 18 | +; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> splat (i64 1), [[TMP1]] |
| 19 | +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| 20 | +; CHECK: [[VECTOR_BODY]]: |
| 21 | +; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| 22 | +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| 23 | +; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ -7905747460161236406, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| 24 | +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) |
| 25 | +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP2]], i64 0 |
| 26 | +; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT5]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer |
| 27 | +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 |
| 28 | +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 7, [[TMP3]] |
| 29 | +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP4]], i64 0 |
| 30 | +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer |
| 31 | +; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() |
| 32 | +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <vscale x 4 x i32> [[TMP5]], [[BROADCAST_SPLAT6]] |
| 33 | +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], <vscale x 4 x i64> [[VEC_IND]] |
| 34 | +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 [[TMP7]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP2]]) |
| 35 | +; CHECK-NEXT: [[TMP8:%.*]] = zext <vscale x 4 x i8> [[WIDE_MASKED_GATHER]] to <vscale x 4 x i64> |
| 36 | +; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i64> [[TMP8]], <vscale x 4 x i64> splat (i64 1), i32 [[TMP2]]) |
| 37 | +; CHECK-NEXT: [[TMP10:%.*]] = sdiv <vscale x 4 x i64> zeroinitializer, [[TMP9]] |
| 38 | +; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt <vscale x 4 x i64> [[TMP10]], zeroinitializer |
| 39 | +; CHECK-NEXT: [[TMP12:%.*]] = select <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i1> [[TMP11]], <vscale x 4 x i1> zeroinitializer |
| 40 | +; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 [[BROADCAST_SPLAT]], <vscale x 4 x i1> [[TMP11]], i32 [[TMP2]]) |
| 41 | +; CHECK-NEXT: [[TMP13:%.*]] = zext <vscale x 4 x i8> [[WIDE_MASKED_GATHER7]] to <vscale x 4 x i64> |
| 42 | +; CHECK-NEXT: [[TMP14:%.*]] = xor <vscale x 4 x i64> [[TMP13]], zeroinitializer |
| 43 | +; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP12]], <vscale x 4 x i64> [[TMP14]], <vscale x 4 x i64> zeroinitializer |
| 44 | +; CHECK-NEXT: [[TMP15:%.*]] = trunc <vscale x 4 x i64> [[PREDPHI]] to <vscale x 4 x i16> |
| 45 | +; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> [[TMP15]], <vscale x 4 x ptr> align 2 [[BROADCAST_SPLAT2]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP2]]) |
| 46 | +; CHECK-NEXT: store i32 0, ptr [[D]], align 4 |
| 47 | +; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP2]] to i64 |
| 48 | +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] |
| 49 | +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] |
| 50 | +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]] |
| 51 | +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], -7905747460161236406 |
| 52 | +; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| 53 | +; CHECK: [[MIDDLE_BLOCK]]: |
| 54 | +; CHECK-NEXT: br label %[[EXIT:.*]] |
| 55 | +; CHECK: [[SCALAR_PH]]: |
| 56 | +; CHECK-NEXT: br label %[[LOOP:.*]] |
| 57 | +; CHECK: [[LOOP]]: |
| 58 | +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] |
| 59 | +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]] |
| 60 | +; CHECK-NEXT: [[X:%.*]] = load i8, ptr [[GEP]], align 1 |
| 61 | +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[X]] to i64 |
| 62 | +; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 0, [[CONV]] |
| 63 | +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[DIV]], 0 |
| 64 | +; CHECK-NEXT: br i1 [[CMP]], label %[[THEN:.*]], label %[[LATCH]] |
| 65 | +; CHECK: [[THEN]]: |
| 66 | +; CHECK-NEXT: [[Y:%.*]] = load i8, ptr [[B]], align 1 |
| 67 | +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[Y]] to i64 |
| 68 | +; CHECK-NEXT: [[NOT:%.*]] = xor i64 [[ZEXT]], 0 |
| 69 | +; CHECK-NEXT: br label %[[LATCH]] |
| 70 | +; CHECK: [[LATCH]]: |
| 71 | +; CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[NOT]], %[[THEN]] ], [ 0, %[[LOOP]] ] |
| 72 | +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[COND]] to i16 |
| 73 | +; CHECK-NEXT: store i16 [[TRUNC]], ptr [[C]], align 2 |
| 74 | +; CHECK-NEXT: store i32 0, ptr [[D]], align 4 |
| 75 | +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 7 |
| 76 | +; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV]], 0 |
| 77 | +; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] |
| 78 | +; CHECK: [[EXIT]]: |
| 79 | +; CHECK-NEXT: ret void |
| 80 | +; |
| 81 | +entry: |
| 82 | + br label %loop |
| 83 | + |
| 84 | +loop: |
| 85 | + %iv = phi i64 [ 1, %entry ], [ %iv.next, %latch ] |
| 86 | + %gep = getelementptr i8, ptr %a, i64 %iv |
| 87 | + %x = load i8, ptr %gep, align 1 |
| 88 | + %conv = zext i8 %x to i64 |
| 89 | + %div = sdiv i64 0, %conv |
| 90 | + %cmp = icmp sgt i64 %div, 0 |
| 91 | + br i1 %cmp, label %then, label %latch |
| 92 | + |
| 93 | +then: |
| 94 | + %y = load i8, ptr %b |
| 95 | + %zext = zext i8 %y to i64 |
| 96 | + %not = xor i64 %zext, 0 |
| 97 | + br label %latch |
| 98 | + |
| 99 | +latch: |
| 100 | + %cond = phi i64 [ %not, %then ], [ 0, %loop ] |
| 101 | + %trunc = trunc i64 %cond to i16 |
| 102 | + store i16 %trunc, ptr %c |
| 103 | + store i32 0, ptr %d |
| 104 | + %iv.next = add i64 %iv, 7 |
| 105 | + %done = icmp eq i64 %iv, 0 |
| 106 | + br i1 %done, label %exit, label %loop |
| 107 | + |
| 108 | +exit: |
| 109 | + ret void |
| 110 | +} |
0 commit comments