diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-non-inc.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-non-inc.ll index e87d6392c4c7b..287376dc319e8 100644 --- a/llvm/test/CodeGen/PowerPC/loop-instr-form-non-inc.ll +++ b/llvm/test/CodeGen/PowerPC/loop-instr-form-non-inc.ll @@ -2,21 +2,22 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu \ ; RUN: -mcpu=pwr9 < %s | FileCheck %s -define dso_local void @test_no_inc(i32 signext %a) local_unnamed_addr nounwind align 2 { +define dso_local void @test_no_inc(i32 signext %a, ptr %p) local_unnamed_addr nounwind align 2 { ; CHECK-LABEL: test_no_inc: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: srawi 4, 3, 31 +; CHECK-NEXT: srawi 5, 3, 31 ; CHECK-NEXT: cmpwi 3, 0 ; CHECK-NEXT: li 6, 1 ; CHECK-NEXT: li 7, 0 -; CHECK-NEXT: andc 4, 3, 4 -; CHECK-NEXT: addi 5, 4, 1 +; CHECK-NEXT: andc 5, 3, 5 +; CHECK-NEXT: add 4, 5, 4 +; CHECK-NEXT: addi 4, 4, 1 ; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_1: # %for.cond.cleanup ; CHECK-NEXT: # -; CHECK-NEXT: stb 7, 0(5) -; CHECK-NEXT: add 5, 5, 4 +; CHECK-NEXT: stb 7, 0(4) +; CHECK-NEXT: add 4, 4, 5 ; CHECK-NEXT: .LBB0_2: # %for.cond ; CHECK-NEXT: # ; CHECK-NEXT: bc 4, 1, .LBB0_1 @@ -38,7 +39,7 @@ for.body.preheader: ; preds = %for.cond for.cond.cleanup: ; preds = %for.body.preheader, %for.cond %g.1.lcssa = phi i32 [ %g.0, %for.cond ], [ %0, %for.body.preheader ] - %arrayidx5 = getelementptr inbounds i8, ptr null, i32 %g.1.lcssa + %arrayidx5 = getelementptr inbounds i8, ptr %p, i32 %g.1.lcssa store i8 0, ptr %arrayidx5, align 1 br label %for.cond } diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll index f13ba7765e398..e99ea59bfacac 100644 --- a/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll +++ b/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll @@ -15,7 +15,7 @@ ; bit of any CR field is spilled. We need to test the spilling of a CR bit ; other than the LT bit. Hence this test case is rather complex. -define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { +define dso_local fastcc void @P10_Spill_CR_GT(ptr %p) unnamed_addr { ; CHECK-LABEL: P10_Spill_CR_GT: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: mfcr r12 @@ -25,32 +25,35 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-NEXT: stdu r1, -64(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r28, -32 ; CHECK-NEXT: .cfi_offset r29, -24 ; CHECK-NEXT: .cfi_offset r30, -16 ; CHECK-NEXT: .cfi_offset cr2, 8 ; CHECK-NEXT: .cfi_offset cr3, 8 ; CHECK-NEXT: .cfi_offset cr4, 8 -; CHECK-NEXT: lwz r3, 0(r3) -; CHECK-NEXT: std r29, 40(r1) # 8-byte Folded Spill +; CHECK-NEXT: lwz r4, 0(r3) ; CHECK-NEXT: std r30, 48(r1) # 8-byte Folded Spill +; CHECK-NEXT: addi r30, r3, -1 +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: std r28, 32(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, 40(r1) # 8-byte Folded Spill ; CHECK-NEXT: crxor 4*cr2+eq, 4*cr2+eq, 4*cr2+eq -; CHECK-NEXT: paddi r29, 0, .LJTI0_0@PCREL, 1 -; CHECK-NEXT: srwi r4, r3, 4 -; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: paddi r28, 0, .LJTI0_0@PCREL, 1 +; CHECK-NEXT: sldi r29, r3, 2 +; CHECK-NEXT: srwi r5, r4, 4 +; CHECK-NEXT: srwi r4, r4, 5 +; CHECK-NEXT: andi. r5, r5, 1 +; CHECK-NEXT: crmove 4*cr2+gt, gt ; CHECK-NEXT: andi. r4, r4, 1 ; CHECK-NEXT: li r4, 0 -; CHECK-NEXT: crmove 4*cr2+gt, gt -; CHECK-NEXT: andi. r3, r3, 1 -; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: crmove 4*cr2+lt, gt -; CHECK-NEXT: sldi r30, r3, 2 ; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_1: # %bb43 ; CHECK-NEXT: # ; CHECK-NEXT: bl call_1@notoc -; CHECK-NEXT: setnbc r3, 4*cr3+eq -; CHECK-NEXT: li r4, 0 -; CHECK-NEXT: stb r4, 0(r3) +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: isel r4, r30, r3, 4*cr3+eq +; CHECK-NEXT: stb r3, 0(r4) ; CHECK-NEXT: li r4, 0 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: # %bb5 @@ -65,8 +68,8 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-NEXT: lwz r5, 0(r3) ; CHECK-NEXT: rlwinm r4, r5, 0, 21, 22 ; CHECK-NEXT: cmpwi cr3, r4, 512 -; CHECK-NEXT: lwax r4, r29, r30 -; CHECK-NEXT: add r4, r29, r4 +; CHECK-NEXT: lwax r4, r28, r29 +; CHECK-NEXT: add r4, r28, r4 ; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: li r4, 0 ; CHECK-NEXT: bctr @@ -177,6 +180,7 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-NEXT: .LBB0_31: # %bb9 ; CHECK-NEXT: ld r30, 48(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, 40(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, 32(r1) # 8-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 64 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: lwz r12, 8(r1) @@ -187,10 +191,10 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB0_32: # %bb29 ; CHECK-NEXT: crmove eq, 4*cr3+eq +; CHECK-NEXT: li r29, 0 ; CHECK-NEXT: cmpwi cr3, r5, 366 ; CHECK-NEXT: cmpwi cr4, r3, 0 -; CHECK-NEXT: li r29, 0 -; CHECK-NEXT: setnbc r30, eq +; CHECK-NEXT: iseleq r30, r30, r29 ; CHECK-NEXT: bc 12, 4*cr2+lt, .LBB0_36 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_33: # %bb36 @@ -216,34 +220,37 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-BE-NEXT: stdu r1, -144(r1) ; CHECK-BE-NEXT: .cfi_def_cfa_offset 144 ; CHECK-BE-NEXT: .cfi_offset lr, 16 +; CHECK-BE-NEXT: .cfi_offset r28, -32 ; CHECK-BE-NEXT: .cfi_offset r29, -24 ; CHECK-BE-NEXT: .cfi_offset r30, -16 ; CHECK-BE-NEXT: .cfi_offset cr2, 8 ; CHECK-BE-NEXT: .cfi_offset cr2, 8 ; CHECK-BE-NEXT: .cfi_offset cr2, 8 -; CHECK-BE-NEXT: lwz r3, 0(r3) -; CHECK-BE-NEXT: std r29, 120(r1) # 8-byte Folded Spill +; CHECK-BE-NEXT: lwz r4, 0(r3) ; CHECK-BE-NEXT: std r30, 128(r1) # 8-byte Folded Spill +; CHECK-BE-NEXT: addi r30, r3, -1 +; CHECK-BE-NEXT: li r3, 0 +; CHECK-BE-NEXT: std r28, 112(r1) # 8-byte Folded Spill +; CHECK-BE-NEXT: std r29, 120(r1) # 8-byte Folded Spill ; CHECK-BE-NEXT: crxor 4*cr2+eq, 4*cr2+eq, 4*cr2+eq -; CHECK-BE-NEXT: srwi r4, r3, 4 -; CHECK-BE-NEXT: srwi r3, r3, 5 +; CHECK-BE-NEXT: sldi r29, r3, 2 +; CHECK-BE-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-BE-NEXT: ld r28, .LC0@toc@l(r3) +; CHECK-BE-NEXT: srwi r5, r4, 4 +; CHECK-BE-NEXT: srwi r4, r4, 5 +; CHECK-BE-NEXT: andi. r5, r5, 1 +; CHECK-BE-NEXT: crmove 4*cr2+gt, gt ; CHECK-BE-NEXT: andi. r4, r4, 1 ; CHECK-BE-NEXT: li r4, 0 -; CHECK-BE-NEXT: crmove 4*cr2+gt, gt -; CHECK-BE-NEXT: andi. r3, r3, 1 -; CHECK-BE-NEXT: li r3, 0 ; CHECK-BE-NEXT: crmove 4*cr2+lt, gt -; CHECK-BE-NEXT: sldi r30, r3, 2 -; CHECK-BE-NEXT: addis r3, r2, .LC0@toc@ha -; CHECK-BE-NEXT: ld r29, .LC0@toc@l(r3) ; CHECK-BE-NEXT: b .LBB0_2 ; CHECK-BE-NEXT: .LBB0_1: # %bb43 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: bl call_1 ; CHECK-BE-NEXT: nop -; CHECK-BE-NEXT: setnbc r3, 4*cr3+eq -; CHECK-BE-NEXT: li r4, 0 -; CHECK-BE-NEXT: stb r4, 0(r3) +; CHECK-BE-NEXT: li r3, 0 +; CHECK-BE-NEXT: isel r4, r30, r3, 4*cr3+eq +; CHECK-BE-NEXT: stb r3, 0(r4) ; CHECK-BE-NEXT: li r4, 0 ; CHECK-BE-NEXT: .p2align 4 ; CHECK-BE-NEXT: .LBB0_2: # %bb5 @@ -258,8 +265,8 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-BE-NEXT: lwz r5, 0(r3) ; CHECK-BE-NEXT: rlwinm r4, r5, 0, 21, 22 ; CHECK-BE-NEXT: cmpwi cr3, r4, 512 -; CHECK-BE-NEXT: lwax r4, r29, r30 -; CHECK-BE-NEXT: add r4, r29, r4 +; CHECK-BE-NEXT: lwax r4, r28, r29 +; CHECK-BE-NEXT: add r4, r28, r4 ; CHECK-BE-NEXT: mtctr r4 ; CHECK-BE-NEXT: li r4, 0 ; CHECK-BE-NEXT: bctr @@ -370,6 +377,7 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-BE-NEXT: .LBB0_31: # %bb9 ; CHECK-BE-NEXT: ld r30, 128(r1) # 8-byte Folded Reload ; CHECK-BE-NEXT: ld r29, 120(r1) # 8-byte Folded Reload +; CHECK-BE-NEXT: ld r28, 112(r1) # 8-byte Folded Reload ; CHECK-BE-NEXT: addi r1, r1, 144 ; CHECK-BE-NEXT: ld r0, 16(r1) ; CHECK-BE-NEXT: lwz r12, 8(r1) @@ -380,10 +388,10 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-BE-NEXT: blr ; CHECK-BE-NEXT: .LBB0_32: # %bb29 ; CHECK-BE-NEXT: crmove eq, 4*cr3+eq +; CHECK-BE-NEXT: li r29, 0 ; CHECK-BE-NEXT: cmpwi cr3, r5, 366 ; CHECK-BE-NEXT: cmpwi cr4, r3, 0 -; CHECK-BE-NEXT: li r29, 0 -; CHECK-BE-NEXT: setnbc r30, eq +; CHECK-BE-NEXT: iseleq r30, r30, r29 ; CHECK-BE-NEXT: bc 12, 4*cr2+lt, .LBB0_36 ; CHECK-BE-NEXT: .p2align 4 ; CHECK-BE-NEXT: .LBB0_33: # %bb36 @@ -528,7 +536,7 @@ bb32: ; preds = %bb40, %bb29 br i1 %tmp7, label %bb33, label %bb36 bb33: ; preds = %bb32 - %tmp34 = getelementptr inbounds i8, ptr null, i64 -1 + %tmp34 = getelementptr inbounds i8, ptr %p, i64 -1 %tmp35 = select i1 %tmp12, ptr %tmp34, ptr null store i8 0, ptr %tmp35, align 1 br label %bb36 @@ -558,7 +566,7 @@ bb42: ; preds = %bb42, %bb41 bb43: ; preds = %bb10, %bb10 call void @call_1() - %tmp44 = getelementptr inbounds i8, ptr null, i64 -1 + %tmp44 = getelementptr inbounds i8, ptr %p, i64 -1 %tmp45 = select i1 %tmp12, ptr %tmp44, ptr null store i8 0, ptr %tmp45, align 1 br label %bb63 diff --git a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll index 334379cda07ec..9bf619983ee78 100644 --- a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll +++ b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll @@ -4,78 +4,77 @@ @.str.28 = external unnamed_addr constant [69 x i8], align 1 -define void @print_res() nounwind { +define void @print_res(ptr %p) nounwind { ; CHECK-LABEL: print_res: ; CHECK: # %bb.0: -; CHECK-NEXT: lwz 3, 0(3) -; CHECK-NEXT: mflr 0 -; CHECK-NEXT: addi 3, 3, -1 -; CHECK-NEXT: clrldi 4, 3, 32 -; CHECK-NEXT: cmplwi 3, 3 -; CHECK-NEXT: li 3, 3 -; CHECK-NEXT: isellt 3, 4, 3 -; CHECK-NEXT: li 4, 1 -; CHECK-NEXT: cmpldi 3, 1 -; CHECK-NEXT: iselgt 3, 3, 4 -; CHECK-NEXT: li 4, 0 -; CHECK-NEXT: mtctr 3 -; CHECK-NEXT: stdu 1, -128(1) +; CHECK-NEXT: lwz 4, 0(3) +; CHECK-NEXT: addi 4, 4, -1 +; CHECK-NEXT: clrldi 5, 4, 32 +; CHECK-NEXT: cmplwi 4, 3 +; CHECK-NEXT: li 4, 3 +; CHECK-NEXT: isellt 4, 5, 4 +; CHECK-NEXT: li 5, 1 +; CHECK-NEXT: cmpldi 4, 1 +; CHECK-NEXT: iselgt 4, 4, 5 ; CHECK-NEXT: li 5, 0 -; CHECK-NEXT: std 0, 144(1) -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: li 7, -1 -; CHECK-NEXT: lbz 5, 0(5) +; CHECK-NEXT: mtctr 4 +; CHECK-NEXT: li 8, -1 +; CHECK-NEXT: lbz 6, 0(3) +; CHECK-NEXT: li 4, 1 ; CHECK-NEXT: bdz .LBB0_6 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: xori 6, 5, 84 -; CHECK-NEXT: clrldi 5, 7, 32 -; CHECK-NEXT: addi 3, 3, 1 -; CHECK-NEXT: addi 8, 7, -1 -; CHECK-NEXT: lbz 5, 0(5) +; CHECK-NEXT: xori 7, 6, 84 +; CHECK-NEXT: clrldi 6, 8, 32 +; CHECK-NEXT: addi 4, 4, 1 +; CHECK-NEXT: addi 9, 8, -1 +; CHECK-NEXT: lbzx 6, 3, 6 ; CHECK-NEXT: bdz .LBB0_5 ; CHECK-NEXT: # %bb.2: -; CHECK-NEXT: cntlzw 6, 6 -; CHECK-NEXT: addi 3, 3, 1 -; CHECK-NEXT: srwi 7, 6, 5 -; CHECK-NEXT: xori 6, 5, 84 -; CHECK-NEXT: clrldi 5, 8, 32 -; CHECK-NEXT: addi 8, 8, -1 -; CHECK-NEXT: lbz 5, 0(5) +; CHECK-NEXT: cntlzw 7, 7 +; CHECK-NEXT: addi 4, 4, 1 +; CHECK-NEXT: srwi 8, 7, 5 +; CHECK-NEXT: xori 7, 6, 84 +; CHECK-NEXT: clrldi 6, 9, 32 +; CHECK-NEXT: addi 9, 9, -1 +; CHECK-NEXT: lbzx 6, 3, 6 ; CHECK-NEXT: bdz .LBB0_4 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: clrldi 10, 8, 32 -; CHECK-NEXT: cntlzw 9, 6 -; CHECK-NEXT: xori 6, 5, 84 -; CHECK-NEXT: addi 8, 8, -1 -; CHECK-NEXT: lbz 5, 0(10) -; CHECK-NEXT: addi 3, 3, 1 -; CHECK-NEXT: add 4, 4, 7 -; CHECK-NEXT: srwi 7, 9, 5 +; CHECK-NEXT: clrldi 11, 9, 32 +; CHECK-NEXT: cntlzw 10, 7 +; CHECK-NEXT: xori 7, 6, 84 +; CHECK-NEXT: addi 9, 9, -1 +; CHECK-NEXT: lbzx 6, 3, 11 +; CHECK-NEXT: addi 4, 4, 1 +; CHECK-NEXT: add 5, 5, 8 +; CHECK-NEXT: srwi 8, 10, 5 ; CHECK-NEXT: bdnz .LBB0_3 ; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: add 4, 4, 7 +; CHECK-NEXT: add 5, 5, 8 ; CHECK-NEXT: .LBB0_5: -; CHECK-NEXT: cntlzw 6, 6 -; CHECK-NEXT: srwi 6, 6, 5 -; CHECK-NEXT: add 4, 4, 6 +; CHECK-NEXT: cntlzw 3, 7 +; CHECK-NEXT: srwi 3, 3, 5 +; CHECK-NEXT: add 5, 5, 3 ; CHECK-NEXT: .LBB0_6: -; CHECK-NEXT: xori 5, 5, 84 -; CHECK-NEXT: clrldi 3, 3, 32 -; CHECK-NEXT: li 7, 0 -; CHECK-NEXT: li 8, 3 -; CHECK-NEXT: std 3, 104(1) -; CHECK-NEXT: cntlzw 5, 5 +; CHECK-NEXT: xori 3, 6, 84 +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: cntlzw 3, 3 +; CHECK-NEXT: srwi 3, 3, 5 +; CHECK-NEXT: add 3, 5, 3 +; CHECK-NEXT: stdu 1, -128(1) +; CHECK-NEXT: clrldi 6, 3, 32 ; CHECK-NEXT: addis 3, 2, .LC0@toc@ha -; CHECK-NEXT: li 10, 0 -; CHECK-NEXT: ld 3, .LC0@toc@l(3) -; CHECK-NEXT: srwi 5, 5, 5 -; CHECK-NEXT: add 4, 4, 5 ; CHECK-NEXT: li 5, 0 +; CHECK-NEXT: std 0, 144(1) +; CHECK-NEXT: ld 3, .LC0@toc@l(3) ; CHECK-NEXT: std 5, 120(1) ; CHECK-NEXT: li 5, 3 -; CHECK-NEXT: clrldi 6, 4, 32 +; CHECK-NEXT: clrldi 4, 4, 32 +; CHECK-NEXT: std 4, 104(1) ; CHECK-NEXT: li 4, 3 +; CHECK-NEXT: li 7, 0 +; CHECK-NEXT: li 8, 3 +; CHECK-NEXT: li 10, 0 ; CHECK-NEXT: std 5, 96(1) ; CHECK-NEXT: li 5, 0 ; CHECK-NEXT: bl printf @@ -92,7 +91,7 @@ define void @print_res() nounwind { %8 = trunc i64 %6 to i32 %9 = sub i32 0, %8 %10 = zext i32 %9 to i64 - %11 = getelementptr inbounds i8, ptr null, i64 %10 + %11 = getelementptr inbounds i8, ptr %p, i64 %10 %12 = load i8, ptr %11, align 1 %13 = icmp eq i8 %12, 84 %14 = zext i1 %13 to i32 diff --git a/llvm/test/CodeGen/PowerPC/sms-phi.ll b/llvm/test/CodeGen/PowerPC/sms-phi.ll index 53a3f13c0597d..5d71423dc781e 100644 --- a/llvm/test/CodeGen/PowerPC/sms-phi.ll +++ b/llvm/test/CodeGen/PowerPC/sms-phi.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs\ ; RUN: -mcpu=pwr9 --ppc-enable-pipeliner -debug-only=pipeliner 2>&1 \ ; RUN: >/dev/null | FileCheck %s -define dso_local void @sha512() #0 { +define dso_local void @sha512(ptr %p) #0 { ;CHECK: prolog: ;CHECK: %{{[0-9]+}}:g8rc = ADD8 %{{[0-9]+}}:g8rc, %{{[0-9]+}}:g8rc ;CHECK: epilog: @@ -15,7 +15,7 @@ define dso_local void @sha512() #0 { %2 = phi i64 [ 0, %0 ], [ %12, %1 ] %3 = phi i64 [ undef, %0 ], [ %11, %1 ] %4 = phi i64 [ undef, %0 ], [ %3, %1 ] - %5 = getelementptr inbounds [80 x i64], ptr null, i64 0, i64 %2 + %5 = getelementptr inbounds [80 x i64], ptr %p, i64 0, i64 %2 %6 = load i64, ptr %5, align 8 %7 = add i64 0, %6 %8 = and i64 %3, %4 diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/p10-respect-unroll-pragma.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/p10-respect-unroll-pragma.ll index bf9cf59aedd1d..12d4e285cdfc2 100644 --- a/llvm/test/Transforms/LoopUnroll/PowerPC/p10-respect-unroll-pragma.ll +++ b/llvm/test/Transforms/LoopUnroll/PowerPC/p10-respect-unroll-pragma.ll @@ -7,104 +7,107 @@ define dso_local void @test(ptr %arg) #0 { ; CHECK-LABEL: @test( ; CHECK-NEXT: bb: +; CHECK-NEXT: [[I19:%.*]] = getelementptr i8, ptr [[ARG:%.*]], i64 -32 +; CHECK-NEXT: [[I21:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG]], i64 48 +; CHECK-NEXT: [[I21_REPACK1:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG]], i64 64 ; CHECK-NEXT: br label [[BB16:%.*]] ; CHECK: bb16: -; CHECK-NEXT: [[I20:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull inttoptr (i64 -32 to ptr)) +; CHECK-NEXT: [[I20:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[I19]]) ; CHECK-NEXT: [[I24:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20]]) ; CHECK-NEXT: [[I24_ELT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24]], 0 -; CHECK-NEXT: store <16 x i8> [[I24_ELT]], ptr inttoptr (i64 48 to ptr), align 16 +; CHECK-NEXT: store <16 x i8> [[I24_ELT]], ptr [[I21]], align 16 ; CHECK-NEXT: [[I24_ELT1:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24]], 1 -; CHECK-NEXT: store <16 x i8> [[I24_ELT1]], ptr inttoptr (i64 64 to ptr), align 64 -; CHECK-NEXT: [[I20_1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull inttoptr (i64 -32 to ptr)) +; CHECK-NEXT: store <16 x i8> [[I24_ELT1]], ptr [[I21_REPACK1]], align 16 +; CHECK-NEXT: [[I20_1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[I19]]) ; CHECK-NEXT: [[I24_1:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_1]]) ; CHECK-NEXT: [[I24_ELT_1:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_1]], 0 -; CHECK-NEXT: store <16 x i8> [[I24_ELT_1]], ptr inttoptr (i64 48 to ptr), align 16 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_1]], ptr [[I21]], align 16 ; CHECK-NEXT: [[I24_ELT1_1:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_1]], 1 -; CHECK-NEXT: store <16 x i8> [[I24_ELT1_1]], ptr inttoptr (i64 64 to ptr), align 64 -; CHECK-NEXT: [[I20_2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull inttoptr (i64 -32 to ptr)) +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_1]], ptr [[I21_REPACK1]], align 16 +; CHECK-NEXT: [[I20_2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[I19]]) ; CHECK-NEXT: [[I24_2:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_2]]) ; CHECK-NEXT: [[I24_ELT_2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_2]], 0 -; CHECK-NEXT: store <16 x i8> [[I24_ELT_2]], ptr inttoptr (i64 48 to ptr), align 16 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_2]], ptr [[I21]], align 16 ; CHECK-NEXT: [[I24_ELT1_2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_2]], 1 -; CHECK-NEXT: store <16 x i8> [[I24_ELT1_2]], ptr inttoptr (i64 64 to ptr), align 64 -; CHECK-NEXT: [[I20_3:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull inttoptr (i64 -32 to ptr)) +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_2]], ptr [[I21_REPACK1]], align 16 +; CHECK-NEXT: [[I20_3:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[I19]]) ; CHECK-NEXT: [[I24_3:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_3]]) ; CHECK-NEXT: [[I24_ELT_3:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_3]], 0 -; CHECK-NEXT: store <16 x i8> [[I24_ELT_3]], ptr inttoptr (i64 48 to ptr), align 16 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_3]], ptr [[I21]], align 16 ; CHECK-NEXT: [[I24_ELT1_3:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_3]], 1 -; CHECK-NEXT: store <16 x i8> [[I24_ELT1_3]], ptr inttoptr (i64 64 to ptr), align 64 -; CHECK-NEXT: [[I20_4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull inttoptr (i64 -32 to ptr)) +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_3]], ptr [[I21_REPACK1]], align 16 +; CHECK-NEXT: [[I20_4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[I19]]) ; CHECK-NEXT: [[I24_4:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_4]]) ; CHECK-NEXT: [[I24_ELT_4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_4]], 0 -; CHECK-NEXT: store <16 x i8> [[I24_ELT_4]], ptr inttoptr (i64 48 to ptr), align 16 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_4]], ptr [[I21]], align 16 ; CHECK-NEXT: [[I24_ELT1_4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_4]], 1 -; CHECK-NEXT: store <16 x i8> [[I24_ELT1_4]], ptr inttoptr (i64 64 to ptr), align 64 -; CHECK-NEXT: [[I20_5:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull inttoptr (i64 -32 to ptr)) +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_4]], ptr [[I21_REPACK1]], align 16 +; CHECK-NEXT: [[I20_5:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[I19]]) ; CHECK-NEXT: [[I24_5:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_5]]) ; CHECK-NEXT: [[I24_ELT_5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_5]], 0 -; CHECK-NEXT: store <16 x i8> [[I24_ELT_5]], ptr inttoptr (i64 48 to ptr), align 16 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_5]], ptr [[I21]], align 16 ; CHECK-NEXT: [[I24_ELT1_5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_5]], 1 -; CHECK-NEXT: store <16 x i8> [[I24_ELT1_5]], ptr inttoptr (i64 64 to ptr), align 64 -; CHECK-NEXT: [[I20_6:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull inttoptr (i64 -32 to ptr)) +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_5]], ptr [[I21_REPACK1]], align 16 +; CHECK-NEXT: [[I20_6:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[I19]]) ; CHECK-NEXT: [[I24_6:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_6]]) ; CHECK-NEXT: [[I24_ELT_6:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_6]], 0 -; CHECK-NEXT: store <16 x i8> [[I24_ELT_6]], ptr inttoptr (i64 48 to ptr), align 16 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_6]], ptr [[I21]], align 16 ; CHECK-NEXT: [[I24_ELT1_6:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_6]], 1 -; CHECK-NEXT: store <16 x i8> [[I24_ELT1_6]], ptr inttoptr (i64 64 to ptr), align 64 -; CHECK-NEXT: [[I20_7:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull inttoptr (i64 -32 to ptr)) +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_6]], ptr [[I21_REPACK1]], align 16 +; CHECK-NEXT: [[I20_7:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[I19]]) ; CHECK-NEXT: [[I24_7:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_7]]) ; CHECK-NEXT: [[I24_ELT_7:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_7]], 0 -; CHECK-NEXT: store <16 x i8> [[I24_ELT_7]], ptr inttoptr (i64 48 to ptr), align 16 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_7]], ptr [[I21]], align 16 ; CHECK-NEXT: [[I24_ELT1_7:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_7]], 1 -; CHECK-NEXT: store <16 x i8> [[I24_ELT1_7]], ptr inttoptr (i64 64 to ptr), align 64 -; CHECK-NEXT: [[I20_8:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull inttoptr (i64 -32 to ptr)) +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_7]], ptr [[I21_REPACK1]], align 16 +; CHECK-NEXT: [[I20_8:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[I19]]) ; CHECK-NEXT: [[I24_8:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_8]]) ; CHECK-NEXT: [[I24_ELT_8:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_8]], 0 -; CHECK-NEXT: store <16 x i8> [[I24_ELT_8]], ptr inttoptr (i64 48 to ptr), align 16 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_8]], ptr [[I21]], align 16 ; CHECK-NEXT: [[I24_ELT1_8:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_8]], 1 -; CHECK-NEXT: store <16 x i8> [[I24_ELT1_8]], ptr inttoptr (i64 64 to ptr), align 64 -; CHECK-NEXT: [[I20_9:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull inttoptr (i64 -32 to ptr)) +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_8]], ptr [[I21_REPACK1]], align 16 +; CHECK-NEXT: [[I20_9:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[I19]]) ; CHECK-NEXT: [[I24_9:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_9]]) ; CHECK-NEXT: [[I24_ELT_9:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_9]], 0 -; CHECK-NEXT: store <16 x i8> [[I24_ELT_9]], ptr inttoptr (i64 48 to ptr), align 16 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_9]], ptr [[I21]], align 16 ; CHECK-NEXT: [[I24_ELT1_9:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_9]], 1 -; CHECK-NEXT: store <16 x i8> [[I24_ELT1_9]], ptr inttoptr (i64 64 to ptr), align 64 -; CHECK-NEXT: [[I20_10:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull inttoptr (i64 -32 to ptr)) +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_9]], ptr [[I21_REPACK1]], align 16 +; CHECK-NEXT: [[I20_10:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[I19]]) ; CHECK-NEXT: [[I24_10:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_10]]) ; CHECK-NEXT: [[I24_ELT_10:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_10]], 0 -; CHECK-NEXT: store <16 x i8> [[I24_ELT_10]], ptr inttoptr (i64 48 to ptr), align 16 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_10]], ptr [[I21]], align 16 ; CHECK-NEXT: [[I24_ELT1_10:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_10]], 1 -; CHECK-NEXT: store <16 x i8> [[I24_ELT1_10]], ptr inttoptr (i64 64 to ptr), align 64 -; CHECK-NEXT: [[I20_11:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull inttoptr (i64 -32 to ptr)) +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_10]], ptr [[I21_REPACK1]], align 16 +; CHECK-NEXT: [[I20_11:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[I19]]) ; CHECK-NEXT: [[I24_11:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_11]]) ; CHECK-NEXT: [[I24_ELT_11:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_11]], 0 -; CHECK-NEXT: store <16 x i8> [[I24_ELT_11]], ptr inttoptr (i64 48 to ptr), align 16 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_11]], ptr [[I21]], align 16 ; CHECK-NEXT: [[I24_ELT1_11:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_11]], 1 -; CHECK-NEXT: store <16 x i8> [[I24_ELT1_11]], ptr inttoptr (i64 64 to ptr), align 64 -; CHECK-NEXT: [[I20_12:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull inttoptr (i64 -32 to ptr)) +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_11]], ptr [[I21_REPACK1]], align 16 +; CHECK-NEXT: [[I20_12:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[I19]]) ; CHECK-NEXT: [[I24_12:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_12]]) ; CHECK-NEXT: [[I24_ELT_12:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_12]], 0 -; CHECK-NEXT: store <16 x i8> [[I24_ELT_12]], ptr inttoptr (i64 48 to ptr), align 16 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_12]], ptr [[I21]], align 16 ; CHECK-NEXT: [[I24_ELT1_12:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_12]], 1 -; CHECK-NEXT: store <16 x i8> [[I24_ELT1_12]], ptr inttoptr (i64 64 to ptr), align 64 -; CHECK-NEXT: [[I20_13:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull inttoptr (i64 -32 to ptr)) +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_12]], ptr [[I21_REPACK1]], align 16 +; CHECK-NEXT: [[I20_13:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[I19]]) ; CHECK-NEXT: [[I24_13:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_13]]) ; CHECK-NEXT: [[I24_ELT_13:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_13]], 0 -; CHECK-NEXT: store <16 x i8> [[I24_ELT_13]], ptr inttoptr (i64 48 to ptr), align 16 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_13]], ptr [[I21]], align 16 ; CHECK-NEXT: [[I24_ELT1_13:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_13]], 1 -; CHECK-NEXT: store <16 x i8> [[I24_ELT1_13]], ptr inttoptr (i64 64 to ptr), align 64 -; CHECK-NEXT: [[I20_14:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull inttoptr (i64 -32 to ptr)) +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_13]], ptr [[I21_REPACK1]], align 16 +; CHECK-NEXT: [[I20_14:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[I19]]) ; CHECK-NEXT: [[I24_14:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_14]]) ; CHECK-NEXT: [[I24_ELT_14:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_14]], 0 -; CHECK-NEXT: store <16 x i8> [[I24_ELT_14]], ptr inttoptr (i64 48 to ptr), align 16 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_14]], ptr [[I21]], align 16 ; CHECK-NEXT: [[I24_ELT1_14:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_14]], 1 -; CHECK-NEXT: store <16 x i8> [[I24_ELT1_14]], ptr inttoptr (i64 64 to ptr), align 64 -; CHECK-NEXT: [[I20_15:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull inttoptr (i64 -32 to ptr)) +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_14]], ptr [[I21_REPACK1]], align 16 +; CHECK-NEXT: [[I20_15:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[I19]]) ; CHECK-NEXT: [[I24_15:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_15]]) ; CHECK-NEXT: [[I24_ELT_15:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_15]], 0 -; CHECK-NEXT: store <16 x i8> [[I24_ELT_15]], ptr inttoptr (i64 48 to ptr), align 16 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_15]], ptr [[I21]], align 16 ; CHECK-NEXT: [[I24_ELT1_15:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_15]], 1 -; CHECK-NEXT: store <16 x i8> [[I24_ELT1_15]], ptr inttoptr (i64 64 to ptr), align 64 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_15]], ptr [[I21_REPACK1]], align 16 ; CHECK-NEXT: br label [[BB16]], !llvm.loop [[LOOP0:![0-9]+]] ; bb: @@ -131,10 +134,10 @@ bb: bb16: ; preds = %bb16, %bb %i17 = load i64, ptr %i5, align 8 %i18 = icmp sge i64 %i17, 1 - %i19 = getelementptr i8, ptr null, i64 -32 + %i19 = getelementptr i8, ptr %arg, i64 -32 %i20 = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %i19) store <256 x i1> %i20, ptr %i7, align 32 - %i21 = getelementptr inbounds i8, ptr null, i64 48 + %i21 = getelementptr inbounds i8, ptr %arg, i64 48 %i23 = load <256 x i1>, ptr %i7, align 32 %i24 = call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i23) store { <16 x i8>, <16 x i8> } %i24, ptr %i21, align 16