diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 88b7e6d6585f7..93b8c16a46d3c 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -106,6 +106,8 @@ Changes to the MIPS Backend Changes to the PowerPC Backend ------------------------------ +* `half` now uses a soft float ABI, which works correctly in more cases. + Changes to the RISC-V Backend ----------------------------- diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 2cad36eff9c88..f84e6c8291cce 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -20,6 +20,7 @@ #include "LegalizeTypes.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -3729,10 +3730,20 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) { Res = SoftPromoteHalfOp_FAKE_USE(N, OpNo); break; case ISD::FCOPYSIGN: Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::LLRINT: + case ISD::LLROUND: + case ISD::LRINT: + case ISD::LROUND: case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break; + case ISD::STRICT_LLRINT: + case ISD::STRICT_LLROUND: + case ISD::STRICT_LRINT: + case ISD::STRICT_LROUND: + Res = SoftPromoteHalfOp_UnaryOp(N); + break; case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: Res = SoftPromoteHalfOp_FP_TO_XINT_SAT(N); break; @@ -3811,7 +3822,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) { return DAG.getNode(GetPromotionOpcode(SVT, RVT), SDLoc(N), RVT, Op); } -SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) { +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_UnaryOp(SDNode *N) { EVT RVT = N->getValueType(0); bool IsStrict = N->isStrictFPOpcode(); SDValue Op = N->getOperand(IsStrict ? 1 : 0); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 63544e63e1da1..8eb3cec8bc87a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -840,7 +840,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftPromoteHalfOp_FAKE_USE(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N); - SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N); + SDValue SoftPromoteHalfOp_UnaryOp(SDNode *N); SDValue SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N); SDValue SoftPromoteHalfOp_SETCC(SDNode *N); SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 9755f0e272d16..fedeb41dc5d37 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -801,6 +801,8 @@ namespace llvm { bool useSoftFloat() const override; + bool softPromoteHalfType() const override { return true; } + bool hasSPE() const; MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { diff --git a/llvm/test/CodeGen/ARM/lrint-conv.ll b/llvm/test/CodeGen/ARM/lrint-conv.ll index 9aa95112af533..848b14e48f2d1 100644 --- a/llvm/test/CodeGen/ARM/lrint-conv.ll +++ b/llvm/test/CodeGen/ARM/lrint-conv.ll @@ -1,12 +1,15 @@ ; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP ; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP -; FIXME: crash -; define i32 @testmswh_builtin(half %x) { -; entry: -; %0 = tail call i32 @llvm.lrint.i32.f16(half %x) -; ret i32 %0 -; } +; SOFTFP-LABEL: testmswh_builtin: +; SOFTFP: bl lrintf +; HARDFP-LABEL: testmswh_builtin: +; HARDFP: bl lrintf +define i32 @testmswh_builtin(half %x) { +entry: + %0 = tail call i32 @llvm.lrint.i32.f16(half %x) + ret i32 %0 +} ; SOFTFP-LABEL: testmsws_builtin: ; SOFTFP: bl lrintf diff --git a/llvm/test/CodeGen/Generic/half.ll b/llvm/test/CodeGen/Generic/half.ll index f4ea5b5b30621..9249343cb67b0 100644 --- a/llvm/test/CodeGen/Generic/half.ll +++ b/llvm/test/CodeGen/Generic/half.ll @@ -30,9 +30,9 @@ ; RUN: %if mips-registered-target %{ llc %s -o - -mtriple=mipsel-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %} ; RUN: %if msp430-registered-target %{ llc %s -o - -mtriple=msp430-none-elf | FileCheck %s --check-prefixes=ALL,CHECK %} ; RUN: %if nvptx-registered-target %{ llc %s -o - -mtriple=nvptx64-nvidia-cuda | FileCheck %s --check-prefixes=NOCRASH %} -; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BAD %} -; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BAD %} -; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BAD %} +; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %} +; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %} +; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %} ; RUN: %if riscv-registered-target %{ llc %s -o - -mtriple=riscv32-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %} ; RUN: %if riscv-registered-target %{ llc %s -o - -mtriple=riscv64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %} ; RUN: %if sparc-registered-target %{ llc %s -o - -mtriple=sparc-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BAD %} diff --git a/llvm/test/CodeGen/LoongArch/lrint-conv.ll b/llvm/test/CodeGen/LoongArch/lrint-conv.ll index 85de820025614..262d1c16a6486 100644 --- a/llvm/test/CodeGen/LoongArch/lrint-conv.ll +++ b/llvm/test/CodeGen/LoongArch/lrint-conv.ll @@ -5,16 +5,31 @@ ; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=loongarch64 | FileCheck %s --check-prefixes=LA64-I32 ; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=loongarch64 | FileCheck %s --check-prefixes=LA64-I64 -; FIXME: crash -; define ITy @test_lrint_ixx_f16(half %x) nounwind { -; %res = tail call ITy @llvm.lrint.ITy.f16(half %x) -; ret ITy %res -; } +define ITy @test_lrint_ixx_f16(half %x) nounwind { +; LA32-LABEL: test_lrint_ixx_f16: +; LA32: bl lrintf +; +; LA64-I32-LABEL: test_lrint_ixx_f16: +; LA64-I32: pcaddu18i $ra, %call36(lrintf) +; +; LA64-I64-LABEL: test_lrint_ixx_f16: +; LA64-I64: pcaddu18i $t8, %call36(lrintf) + %res = tail call ITy @llvm.lrint.ITy.f16(half %x) + ret ITy %res +} -; define ITy @test_llrint_ixx_f16(half %x) nounwind { -; %res = tail call ITy @llvm.llrint.ITy.f16(half %x) -; ret ITy %res -; } +define ITy @test_llrint_ixx_f16(half %x) nounwind { +; LA32-LABEL: test_llrint_ixx_f16: +; LA32: bl llrintf +; +; LA64-I32-LABEL: test_llrint_ixx_f16: +; LA64-I32: pcaddu18i $ra, %call36(llrintf) +; +; LA64-I64-LABEL: test_llrint_ixx_f16: +; LA64-I64: pcaddu18i $t8, %call36(llrintf) + %res = tail call ITy @llvm.llrint.ITy.f16(half %x) + ret ITy %res +} define ITy @test_lrint_ixx_f32(float %x) nounwind { ; LA32-LABEL: test_lrint_ixx_f32: diff --git a/llvm/test/CodeGen/Mips/llrint-conv.ll b/llvm/test/CodeGen/Mips/llrint-conv.ll index 592d40c0f65aa..8eaef5d4135bb 100644 --- a/llvm/test/CodeGen/Mips/llrint-conv.ll +++ b/llvm/test/CodeGen/Mips/llrint-conv.ll @@ -1,19 +1,18 @@ ; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s ; RUN: llc < %s -mtriple=mips -mattr=+soft-float | FileCheck %s -; FIXME: crash -; define signext i32 @testmswh(half %x) { -; entry: -; %0 = tail call i64 @llvm.llrint.i64.f16(half %x) -; %conv = trunc i64 %0 to i32 -; ret i32 %conv -; } +define signext i32 @testmswh(half %x) { +entry: + %0 = tail call i64 @llvm.llrint.i64.f16(half %x) + %conv = trunc i64 %0 to i32 + ret i32 %conv +} -; define i64 @testmsxh(half %x) { -; entry: -; %0 = tail call i64 @llvm.llrint.i64.f16(half %x) -; ret i64 %0 -; } +define i64 @testmsxh(half %x) { +entry: + %0 = tail call i64 @llvm.llrint.i64.f16(half %x) + ret i64 %0 +} define signext i32 @testmsws(float %x) { ; CHECK-LABEL: testmsws: diff --git a/llvm/test/CodeGen/Mips/lrint-conv.ll b/llvm/test/CodeGen/Mips/lrint-conv.ll index 6d2e392675f1c..64c5cb9ac5b07 100644 --- a/llvm/test/CodeGen/Mips/lrint-conv.ll +++ b/llvm/test/CodeGen/Mips/lrint-conv.ll @@ -1,19 +1,22 @@ ; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s ; RUN: llc < %s -mtriple=mips -mattr=+soft-float | FileCheck %s -; FIXME: crash -; define signext i32 @testmswh(half %x) { -; entry: -; %0 = tail call i64 @llvm.lrint.i64.f16(half %x) -; %conv = trunc i64 %0 to i32 -; ret i32 %conv -; } +define signext i32 @testmswh(half %x) { +; CHECK-LABEL: testmswh: +; CHECK: jal lrintf +entry: + %0 = tail call i64 @llvm.lrint.i64.f16(half %x) + %conv = trunc i64 %0 to i32 + ret i32 %conv +} -; define i64 @testmsxh(half %x) { -; entry: -; %0 = tail call i64 @llvm.lrint.i64.f16(half %x) -; ret i64 %0 -; } +define i64 @testmsxh(half %x) { +; CHECK-LABEL: testmsxh: +; CHECK: jal lrintf +entry: + %0 = tail call i64 @llvm.lrint.i64.f16(half %x) + ret i64 %0 +} define signext i32 @testmsws(float %x) { ; CHECK-LABEL: testmsws: diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll index 40786057ead5f..1aaf0310879d7 100644 --- a/llvm/test/CodeGen/PowerPC/atomics.ll +++ b/llvm/test/CodeGen/PowerPC/atomics.ll @@ -138,67 +138,67 @@ define void @store_i64_seq_cst(ptr %mem) { ; Atomic CmpXchg define i8 @cas_strong_i8_sc_sc(ptr %mem) { ; PPC32-LABEL: cas_strong_i8_sc_sc: -; PPC32: # %bb.0: +; PPC32: # %bb.0: # %cmpxchg.start ; PPC32-NEXT: rlwinm r5, r3, 0, 0, 29 ; PPC32-NEXT: lwarx r4, 0, r5 -; PPC32-NEXT: not r3, r3 +; PPC32-NEXT: not r3, r3 ; PPC32-NEXT: rlwinm r3, r3, 3, 27, 28 ; PPC32-NEXT: srw r6, r4, r3 ; PPC32-NEXT: andi. r6, r6, 255 -; PPC32-NEXT: bne cr0, .LBB8_4 -; PPC32-NEXT: # %bb.1: # %cmpxchg.fencedstore +; PPC32-NEXT: bne cr0, .LBB8_4 +; PPC32-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC32-NEXT: li r6, 255 ; PPC32-NEXT: li r7, 1 ; PPC32-NEXT: slw r6, r6, r3 -; PPC32-NEXT: not r6, r6 +; PPC32-NEXT: not r6, r6 ; PPC32-NEXT: slw r7, r7, r3 ; PPC32-NEXT: sync -; PPC32-NEXT: .LBB8_2: # %cmpxchg.trystore -; PPC32-NEXT: # =>This Inner Loop Header: Depth=1 +; PPC32-NEXT: .LBB8_2: # %cmpxchg.trystore +; PPC32-NEXT: # ; PPC32-NEXT: and r8, r4, r6 ; PPC32-NEXT: or r8, r8, r7 ; PPC32-NEXT: stwcx. r8, 0, r5 -; PPC32-NEXT: beq cr0, .LBB8_4 -; PPC32-NEXT: # %bb.3: # %cmpxchg.releasedload -; PPC32-NEXT: # in Loop: Header=BB8_2 Depth=1 +; PPC32-NEXT: beq cr0, .LBB8_4 +; PPC32-NEXT: # %bb.3: # %cmpxchg.releasedload +; PPC32-NEXT: # ; PPC32-NEXT: lwarx r4, 0, r5 ; PPC32-NEXT: srw r8, r4, r3 ; PPC32-NEXT: andi. r8, r8, 255 -; PPC32-NEXT: beq cr0, .LBB8_2 -; PPC32-NEXT: .LBB8_4: # %cmpxchg.nostore +; PPC32-NEXT: beq cr0, .LBB8_2 +; PPC32-NEXT: .LBB8_4: # %cmpxchg.nostore ; PPC32-NEXT: srw r3, r4, r3 ; PPC32-NEXT: lwsync ; PPC32-NEXT: blr ; ; PPC64-LABEL: cas_strong_i8_sc_sc: -; PPC64: # %bb.0: +; PPC64: # %bb.0: # %cmpxchg.start ; PPC64-NEXT: rldicr r5, r3, 0, 61 -; PPC64-NEXT: not r3, r3 +; PPC64-NEXT: not r3, r3 ; PPC64-NEXT: lwarx r4, 0, r5 ; PPC64-NEXT: rlwinm r3, r3, 3, 27, 28 ; PPC64-NEXT: srw r6, r4, r3 ; PPC64-NEXT: andi. r6, r6, 255 -; PPC64-NEXT: bne cr0, .LBB8_4 -; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore +; PPC64-NEXT: bne cr0, .LBB8_4 +; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64-NEXT: li r6, 255 ; PPC64-NEXT: li r7, 1 ; PPC64-NEXT: slw r6, r6, r3 -; PPC64-NEXT: not r6, r6 +; PPC64-NEXT: not r6, r6 ; PPC64-NEXT: slw r7, r7, r3 ; PPC64-NEXT: sync -; PPC64-NEXT: .LBB8_2: # %cmpxchg.trystore -; PPC64-NEXT: # =>This Inner Loop Header: Depth=1 +; PPC64-NEXT: .LBB8_2: # %cmpxchg.trystore +; PPC64-NEXT: # ; PPC64-NEXT: and r8, r4, r6 ; PPC64-NEXT: or r8, r8, r7 ; PPC64-NEXT: stwcx. r8, 0, r5 -; PPC64-NEXT: beq cr0, .LBB8_4 -; PPC64-NEXT: # %bb.3: # %cmpxchg.releasedload -; PPC64-NEXT: # in Loop: Header=BB8_2 Depth=1 +; PPC64-NEXT: beq cr0, .LBB8_4 +; PPC64-NEXT: # %bb.3: # %cmpxchg.releasedload +; PPC64-NEXT: # ; PPC64-NEXT: lwarx r4, 0, r5 ; PPC64-NEXT: srw r8, r4, r3 ; PPC64-NEXT: andi. r8, r8, 255 -; PPC64-NEXT: beq cr0, .LBB8_2 -; PPC64-NEXT: .LBB8_4: # %cmpxchg.nostore +; PPC64-NEXT: beq cr0, .LBB8_2 +; PPC64-NEXT: .LBB8_4: # %cmpxchg.nostore ; PPC64-NEXT: srw r3, r4, r3 ; PPC64-NEXT: lwsync ; PPC64-NEXT: blr @@ -208,24 +208,24 @@ define i8 @cas_strong_i8_sc_sc(ptr %mem) { } define i16 @cas_weak_i16_acquire_acquire(ptr %mem) { ; PPC32-LABEL: cas_weak_i16_acquire_acquire: -; PPC32: # %bb.0: +; PPC32: # %bb.0: # %cmpxchg.start ; PPC32-NEXT: rlwinm r4, r3, 0, 0, 29 ; PPC32-NEXT: lwarx r5, 0, r4 -; PPC32-NEXT: clrlwi r3, r3, 30 +; PPC32-NEXT: clrlwi r3, r3, 30 ; PPC32-NEXT: xori r3, r3, 2 ; PPC32-NEXT: slwi r6, r3, 3 ; PPC32-NEXT: srw r3, r5, r6 ; PPC32-NEXT: andi. r7, r3, 65535 -; PPC32-NEXT: beq cr0, .LBB9_2 -; PPC32-NEXT: # %bb.1: # %cmpxchg.failure +; PPC32-NEXT: beq cr0, .LBB9_2 +; PPC32-NEXT: # %bb.1: # %cmpxchg.failure ; PPC32-NEXT: lwsync ; PPC32-NEXT: blr -; PPC32-NEXT: .LBB9_2: # %cmpxchg.fencedstore +; PPC32-NEXT: .LBB9_2: # %cmpxchg.fencedstore ; PPC32-NEXT: lis r7, 0 ; PPC32-NEXT: ori r7, r7, 65535 ; PPC32-NEXT: slw r7, r7, r6 ; PPC32-NEXT: li r8, 1 -; PPC32-NEXT: not r7, r7 +; PPC32-NEXT: not r7, r7 ; PPC32-NEXT: slw r6, r8, r6 ; PPC32-NEXT: and r5, r5, r7 ; PPC32-NEXT: or r5, r5, r6 @@ -234,24 +234,24 @@ define i16 @cas_weak_i16_acquire_acquire(ptr %mem) { ; PPC32-NEXT: blr ; ; PPC64-LABEL: cas_weak_i16_acquire_acquire: -; PPC64: # %bb.0: -; PPC64-NEXT: rldicr r4, r3, 0, 61 -; PPC64-NEXT: clrlwi r3, r3, 30 +; PPC64: # %bb.0: # %cmpxchg.start +; PPC64-NEXT: rldicr r4, r3, 0, 61 +; PPC64-NEXT: clrlwi r3, r3, 30 ; PPC64-NEXT: lwarx r5, 0, r4 ; PPC64-NEXT: xori r3, r3, 2 ; PPC64-NEXT: slwi r6, r3, 3 ; PPC64-NEXT: srw r3, r5, r6 ; PPC64-NEXT: andi. r7, r3, 65535 -; PPC64-NEXT: beq cr0, .LBB9_2 -; PPC64-NEXT: # %bb.1: # %cmpxchg.failure +; PPC64-NEXT: beq cr0, .LBB9_2 +; PPC64-NEXT: # %bb.1: # %cmpxchg.failure ; PPC64-NEXT: lwsync ; PPC64-NEXT: blr -; PPC64-NEXT: .LBB9_2: # %cmpxchg.fencedstore +; PPC64-NEXT: .LBB9_2: # %cmpxchg.fencedstore ; PPC64-NEXT: lis r7, 0 ; PPC64-NEXT: ori r7, r7, 65535 ; PPC64-NEXT: slw r7, r7, r6 ; PPC64-NEXT: li r8, 1 -; PPC64-NEXT: not r7, r7 +; PPC64-NEXT: not r7, r7 ; PPC64-NEXT: slw r6, r8, r6 ; PPC64-NEXT: and r5, r5, r7 ; PPC64-NEXT: or r5, r5, r6 @@ -264,24 +264,24 @@ define i16 @cas_weak_i16_acquire_acquire(ptr %mem) { } define i32 @cas_strong_i32_acqrel_acquire(ptr %mem) { ; CHECK-LABEL: cas_strong_i32_acqrel_acquire: -; CHECK: # %bb.0: -; CHECK-NEXT: mr r4, r3 +; CHECK: # %bb.0: # %cmpxchg.start +; CHECK-NEXT: mr r4, r3 ; CHECK-NEXT: lwarx r3, 0, r3 -; CHECK-NEXT: cmplwi r3, 0 -; CHECK-NEXT: bne cr0, .LBB10_4 -; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore +; CHECK-NEXT: cmplwi r3, 0 +; CHECK-NEXT: bne cr0, .LBB10_4 +; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore ; CHECK-NEXT: li r5, 1 ; CHECK-NEXT: lwsync -; CHECK-NEXT: .LBB10_2: # %cmpxchg.trystore -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB10_2: # %cmpxchg.trystore +; CHECK-NEXT: # ; CHECK-NEXT: stwcx. r5, 0, r4 -; CHECK-NEXT: beq cr0, .LBB10_4 -; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload -; CHECK-NEXT: # in Loop: Header=BB10_2 Depth=1 +; CHECK-NEXT: beq cr0, .LBB10_4 +; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload +; CHECK-NEXT: # ; CHECK-NEXT: lwarx r3, 0, r4 -; CHECK-NEXT: cmplwi r3, 0 -; CHECK-NEXT: beq cr0, .LBB10_2 -; CHECK-NEXT: .LBB10_4: # %cmpxchg.nostore +; CHECK-NEXT: cmplwi r3, 0 +; CHECK-NEXT: beq cr0, .LBB10_2 +; CHECK-NEXT: .LBB10_4: # %cmpxchg.nostore ; CHECK-NEXT: lwsync ; CHECK-NEXT: blr %val = cmpxchg ptr %mem, i32 0, i32 1 acq_rel acquire @@ -313,12 +313,12 @@ define i64 @cas_weak_i64_release_monotonic(ptr %mem) { ; PPC32-NEXT: blr ; ; PPC64-LABEL: cas_weak_i64_release_monotonic: -; PPC64: # %bb.0: -; PPC64-NEXT: mr r4, r3 +; PPC64: # %bb.0: # %cmpxchg.start +; PPC64-NEXT: mr r4, r3 ; PPC64-NEXT: ldarx r3, 0, r3 -; PPC64-NEXT: cmpldi r3, 0 -; PPC64-NEXT: bnelr cr0 -; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore +; PPC64-NEXT: cmpldi r3, 0 +; PPC64-NEXT: bnelr cr0 +; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64-NEXT: li r5, 1 ; PPC64-NEXT: lwsync ; PPC64-NEXT: stdcx. r5, 0, r4 @@ -473,39 +473,20 @@ define i64 @and_i64_release(ptr %mem, i64 %operand) { define half @load_atomic_f16__seq_cst(ptr %ptr) { ; PPC32-LABEL: load_atomic_f16__seq_cst: ; PPC32: # %bb.0: -; PPC32-NEXT: mflr r0 -; PPC32-NEXT: stwu r1, -16(r1) -; PPC32-NEXT: stw r0, 20(r1) -; PPC32-NEXT: .cfi_def_cfa_offset 16 -; PPC32-NEXT: .cfi_offset lr, 4 ; PPC32-NEXT: sync ; PPC32-NEXT: lhz r3, 0(r3) ; PPC32-NEXT: cmpw cr7, r3, r3 ; PPC32-NEXT: bne- cr7, .+4 ; PPC32-NEXT: isync -; PPC32-NEXT: bl __extendhfsf2 -; PPC32-NEXT: lwz r0, 20(r1) -; PPC32-NEXT: addi r1, r1, 16 -; PPC32-NEXT: mtlr r0 ; PPC32-NEXT: blr ; ; PPC64-LABEL: load_atomic_f16__seq_cst: ; PPC64: # %bb.0: -; PPC64-NEXT: mflr r0 -; PPC64-NEXT: stdu r1, -112(r1) -; PPC64-NEXT: std r0, 128(r1) -; PPC64-NEXT: .cfi_def_cfa_offset 112 -; PPC64-NEXT: .cfi_offset lr, 16 ; PPC64-NEXT: sync ; PPC64-NEXT: lhz r3, 0(r3) ; PPC64-NEXT: cmpd cr7, r3, r3 ; PPC64-NEXT: bne- cr7, .+4 ; PPC64-NEXT: isync -; PPC64-NEXT: bl __extendhfsf2 -; PPC64-NEXT: nop -; PPC64-NEXT: addi r1, r1, 112 -; PPC64-NEXT: ld r0, 16(r1) -; PPC64-NEXT: mtlr r0 ; PPC64-NEXT: blr %val = load atomic half, ptr %ptr seq_cst, align 2 ret half %val @@ -579,44 +560,11 @@ define double @load_atomic_f64__seq_cst(ptr %ptr) { } define void @store_atomic_f16__seq_cst(ptr %ptr, half %val1) { -; PPC32-LABEL: store_atomic_f16__seq_cst: -; PPC32: # %bb.0: -; PPC32-NEXT: mflr r0 -; PPC32-NEXT: stwu r1, -16(r1) -; PPC32-NEXT: stw r0, 20(r1) -; PPC32-NEXT: .cfi_def_cfa_offset 16 -; PPC32-NEXT: .cfi_offset lr, 4 -; PPC32-NEXT: .cfi_offset r30, -8 -; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill -; PPC32-NEXT: mr r30, r3 -; PPC32-NEXT: bl __truncsfhf2 -; PPC32-NEXT: sync -; PPC32-NEXT: sth r3, 0(r30) -; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload -; PPC32-NEXT: lwz r0, 20(r1) -; PPC32-NEXT: addi r1, r1, 16 -; PPC32-NEXT: mtlr r0 -; PPC32-NEXT: blr -; -; PPC64-LABEL: store_atomic_f16__seq_cst: -; PPC64: # %bb.0: -; PPC64-NEXT: mflr r0 -; PPC64-NEXT: stdu r1, -128(r1) -; PPC64-NEXT: std r0, 144(r1) -; PPC64-NEXT: .cfi_def_cfa_offset 128 -; PPC64-NEXT: .cfi_offset lr, 16 -; PPC64-NEXT: .cfi_offset r30, -16 -; PPC64-NEXT: std r30, 112(r1) # 8-byte Folded Spill -; PPC64-NEXT: mr r30, r3 -; PPC64-NEXT: bl __truncsfhf2 -; PPC64-NEXT: nop -; PPC64-NEXT: sync -; PPC64-NEXT: sth r3, 0(r30) -; PPC64-NEXT: ld r30, 112(r1) # 8-byte Folded Reload -; PPC64-NEXT: addi r1, r1, 128 -; PPC64-NEXT: ld r0, 16(r1) -; PPC64-NEXT: mtlr r0 -; PPC64-NEXT: blr +; CHECK-LABEL: store_atomic_f16__seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: sync +; CHECK-NEXT: sth r4, 0(r3) +; CHECK-NEXT: blr store atomic half %val1, ptr %ptr seq_cst, align 2 ret void } diff --git a/llvm/test/CodeGen/PowerPC/f128-conv.ll b/llvm/test/CodeGen/PowerPC/f128-conv.ll index f8b2861156db4..080843217e8c9 100644 --- a/llvm/test/CodeGen/PowerPC/f128-conv.ll +++ b/llvm/test/CodeGen/PowerPC/f128-conv.ll @@ -1349,9 +1349,6 @@ define half @trunc(fp128 %a) nounwind { ; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: bl __trunckfhf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrlwi r3, r3, 16 -; CHECK-NEXT: mtfprwz f0, r3 -; CHECK-NEXT: xscvhpdp f1, f0 ; CHECK-NEXT: addi r1, r1, 32 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -1364,9 +1361,6 @@ define half @trunc(fp128 %a) nounwind { ; CHECK-P8-NEXT: std r0, 48(r1) ; CHECK-P8-NEXT: bl __trunckfhf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: clrldi r3, r3, 48 -; CHECK-P8-NEXT: bl __extendhfsf2 -; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -1379,7 +1373,9 @@ entry: define fp128 @ext(half %a) nounwind { ; CHECK-LABEL: ext: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xscpsgndp v2, f1, f1 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp v2, f0 ; CHECK-NEXT: xscvdpqp v2, v2 ; CHECK-NEXT: blr ; @@ -1387,7 +1383,10 @@ define fp128 @ext(half %a) nounwind { ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mflr r0 ; CHECK-P8-NEXT: stdu r1, -32(r1) +; CHECK-P8-NEXT: clrldi r3, r3, 48 ; CHECK-P8-NEXT: std r0, 48(r1) +; CHECK-P8-NEXT: bl __extendhfsf2 +; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __extendsfkf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 diff --git a/llvm/test/CodeGen/PowerPC/half.ll b/llvm/test/CodeGen/PowerPC/half.ll new file mode 100644 index 0000000000000..3cd4f8b5ff9b8 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/half.ll @@ -0,0 +1,2277 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc-unknown-unknown \ +; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: --check-prefix=PPC32 +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: --check-prefix=P8 +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -mattr=-hard-float \ +; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: --check-prefix=SOFT +; RUN: llc -mtriple=powerpc64-unknown-unknown \ +; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: --check-prefix=BE + +; Tests for various operations on half precison float. Much of the test is +; copied from test/CodeGen/X86/half.ll. + +define void @store(half %x, ptr %p) nounwind { +; PPC32-LABEL: store: +; PPC32: # %bb.0: +; PPC32-NEXT: sth r3, 0(r4) +; PPC32-NEXT: blr +; +; P8-LABEL: store: +; P8: # %bb.0: +; P8-NEXT: sth r3, 0(r4) +; P8-NEXT: blr +; +; CHECK-LABEL: store: +; CHECK: # %bb.0: +; CHECK-NEXT: sth r3, 0(r4) +; CHECK-NEXT: blr +; +; SOFT-LABEL: store: +; SOFT: # %bb.0: +; SOFT-NEXT: sth r3, 0(r4) +; SOFT-NEXT: blr +; +; BE-LABEL: store: +; BE: # %bb.0: +; BE-NEXT: sth r3, 0(r4) +; BE-NEXT: blr + store half %x, ptr %p + ret void +} + +define half @return(ptr %p) nounwind { +; PPC32-LABEL: return: +; PPC32: # %bb.0: +; PPC32-NEXT: lhz r3, 0(r3) +; PPC32-NEXT: blr +; +; P8-LABEL: return: +; P8: # %bb.0: +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: blr +; +; CHECK-LABEL: return: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: blr +; +; SOFT-LABEL: return: +; SOFT: # %bb.0: +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: blr +; +; BE-LABEL: return: +; BE: # %bb.0: +; BE-NEXT: lhz r3, 0(r3) +; BE-NEXT: blr + %r = load half, ptr %p + ret half %r +} + +define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr nounwind { +; PPC32-LABEL: loadd: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: lhz r3, 2(r3) +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: loadd: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: std r0, 48(r1) +; P8-NEXT: lhz r3, 2(r3) +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: loadd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi r3, r3, 2 +; CHECK-NEXT: lxsihzx f0, 0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: loadd: +; SOFT: # %bb.0: # %entry +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: std r0, 48(r1) +; SOFT-NEXT: lhz r3, 2(r3) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __extendsfdf2 +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: loadd: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: lhz r3, 2(r3) +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i16, ptr %a, i64 1 + %0 = load i16, ptr %arrayidx, align 2 + %1 = tail call double @llvm.convert.from.fp16.f64(i16 %0) + ret double %1 +} + +declare double @llvm.convert.from.fp16.f64(i16) + +define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr nounwind { +; PPC32-LABEL: loadf: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: lhz r3, 2(r3) +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: loadf: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: std r0, 48(r1) +; P8-NEXT: lhz r3, 2(r3) +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: loadf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi r3, r3, 2 +; CHECK-NEXT: lxsihzx f0, 0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: loadf: +; SOFT: # %bb.0: # %entry +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: std r0, 48(r1) +; SOFT-NEXT: lhz r3, 2(r3) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: loadf: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: lhz r3, 2(r3) +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i16, ptr %a, i64 1 + %0 = load i16, ptr %arrayidx, align 2 + %1 = tail call float @llvm.convert.from.fp16.f32(i16 %0) + ret float %1 +} + +declare float @llvm.convert.from.fp16.f32(i16) + +define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr nounwind { +; PPC32-LABEL: stored: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r3 +; PPC32-NEXT: bl __truncdfhf2 +; PPC32-NEXT: sth r3, 0(r30) +; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: stored: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: std r0, 64(r1) +; P8-NEXT: mr r30, r3 +; P8-NEXT: bl __truncdfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: stored: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: stxsihx f0, 0, r3 +; CHECK-NEXT: blr +; +; SOFT-LABEL: stored: +; SOFT: # %bb.0: # %entry +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: mr r30, r3 +; SOFT-NEXT: mr r3, r4 +; SOFT-NEXT: std r0, 64(r1) +; SOFT-NEXT: bl __truncdfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: stored: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -128(r1) +; BE-NEXT: std r0, 144(r1) +; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r3 +; BE-NEXT: bl __truncdfhf2 +; BE-NEXT: nop +; BE-NEXT: sth r3, 0(r30) +; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 128 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +entry: + %0 = tail call i16 @llvm.convert.to.fp16.f64(double %b) + store i16 %0, ptr %a, align 2 + ret void +} + +declare i16 @llvm.convert.to.fp16.f64(double) + +define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr nounwind { +; PPC32-LABEL: storef: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r3 +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: sth r3, 0(r30) +; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: storef: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: std r0, 64(r1) +; P8-NEXT: mr r30, r3 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: storef: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: stxsihx f0, 0, r3 +; CHECK-NEXT: blr +; +; SOFT-LABEL: storef: +; SOFT: # %bb.0: # %entry +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: mr r30, r3 +; SOFT-NEXT: clrldi r3, r4, 32 +; SOFT-NEXT: std r0, 64(r1) +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: storef: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -128(r1) +; BE-NEXT: std r0, 144(r1) +; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r3 +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: sth r3, 0(r30) +; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 128 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +entry: + %0 = tail call i16 @llvm.convert.to.fp16.f32(float %b) + store i16 %0, ptr %a, align 2 + ret void +} + +declare i16 @llvm.convert.to.fp16.f32(float) +define void @test_load_store(ptr %in, ptr %out) nounwind { +; PPC32-LABEL: test_load_store: +; PPC32: # %bb.0: +; PPC32-NEXT: lhz r3, 0(r3) +; PPC32-NEXT: sth r3, 0(r4) +; PPC32-NEXT: blr +; +; P8-LABEL: test_load_store: +; P8: # %bb.0: +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: sth r3, 0(r4) +; P8-NEXT: blr +; +; CHECK-LABEL: test_load_store: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: sth r3, 0(r4) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_load_store: +; SOFT: # %bb.0: +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: sth r3, 0(r4) +; SOFT-NEXT: blr +; +; BE-LABEL: test_load_store: +; BE: # %bb.0: +; BE-NEXT: lhz r3, 0(r3) +; BE-NEXT: sth r3, 0(r4) +; BE-NEXT: blr + %val = load half, ptr %in + store half %val, ptr %out + ret void +} +define i16 @test_bitcast_from_half(ptr %addr) nounwind { +; PPC32-LABEL: test_bitcast_from_half: +; PPC32: # %bb.0: +; PPC32-NEXT: lhz r3, 0(r3) +; PPC32-NEXT: blr +; +; P8-LABEL: test_bitcast_from_half: +; P8: # %bb.0: +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: blr +; +; CHECK-LABEL: test_bitcast_from_half: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_bitcast_from_half: +; SOFT: # %bb.0: +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: blr +; +; BE-LABEL: test_bitcast_from_half: +; BE: # %bb.0: +; BE-NEXT: lhz r3, 0(r3) +; BE-NEXT: blr + %val = load half, ptr %addr + %val_int = bitcast half %val to i16 + ret i16 %val_int +} +define void @test_bitcast_to_half(ptr %addr, i16 %in) nounwind { +; PPC32-LABEL: test_bitcast_to_half: +; PPC32: # %bb.0: +; PPC32-NEXT: sth r4, 0(r3) +; PPC32-NEXT: blr +; +; P8-LABEL: test_bitcast_to_half: +; P8: # %bb.0: +; P8-NEXT: sth r4, 0(r3) +; P8-NEXT: blr +; +; CHECK-LABEL: test_bitcast_to_half: +; CHECK: # %bb.0: +; CHECK-NEXT: sth r4, 0(r3) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_bitcast_to_half: +; SOFT: # %bb.0: +; SOFT-NEXT: sth r4, 0(r3) +; SOFT-NEXT: blr +; +; BE-LABEL: test_bitcast_to_half: +; BE: # %bb.0: +; BE-NEXT: sth r4, 0(r3) +; BE-NEXT: blr + %val_fp = bitcast i16 %in to half + store half %val_fp, ptr %addr + ret void +} + + +; Checks for https://github.com/llvm/llvm-project/issues/97981 +define half @from_bits(i16 %x) nounwind { +; PPC32-LABEL: from_bits: +; PPC32: # %bb.0: +; PPC32-NEXT: blr +; +; P8-LABEL: from_bits: +; P8: # %bb.0: +; P8-NEXT: blr +; +; CHECK-LABEL: from_bits: +; CHECK: # %bb.0: +; CHECK-NEXT: blr +; +; SOFT-LABEL: from_bits: +; SOFT: # %bb.0: +; SOFT-NEXT: blr +; +; BE-LABEL: from_bits: +; BE: # %bb.0: +; BE-NEXT: blr + %res = bitcast i16 %x to half + ret half %res +} + +define i16 @to_bits(half %x) nounwind { +; PPC32-LABEL: to_bits: +; PPC32: # %bb.0: +; PPC32-NEXT: blr +; +; P8-LABEL: to_bits: +; P8: # %bb.0: +; P8-NEXT: blr +; +; CHECK-LABEL: to_bits: +; CHECK: # %bb.0: +; CHECK-NEXT: blr +; +; SOFT-LABEL: to_bits: +; SOFT: # %bb.0: +; SOFT-NEXT: blr +; +; BE-LABEL: to_bits: +; BE: # %bb.0: +; BE-NEXT: blr + %res = bitcast half %x to i16 + ret i16 %res +} + +define float @test_extend32(ptr %addr) nounwind { +; PPC32-LABEL: test_extend32: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: lhz r3, 0(r3) +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_extend32: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: std r0, 48(r1) +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_extend32: +; CHECK: # %bb.0: +; CHECK-NEXT: lxsihzx f0, 0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_extend32: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: std r0, 48(r1) +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: test_extend32: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: lhz r3, 0(r3) +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %val16 = load half, ptr %addr + %val32 = fpext half %val16 to float + ret float %val32 +} +define double @test_extend64(ptr %addr) nounwind { +; PPC32-LABEL: test_extend64: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: lhz r3, 0(r3) +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_extend64: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: std r0, 48(r1) +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_extend64: +; CHECK: # %bb.0: +; CHECK-NEXT: lxsihzx f0, 0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_extend64: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: std r0, 48(r1) +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __extendsfdf2 +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: test_extend64: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: lhz r3, 0(r3) +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %val16 = load half, ptr %addr + %val32 = fpext half %val16 to double + ret double %val32 +} +define void @test_trunc32(float %in, ptr %addr) nounwind { +; PPC32-LABEL: test_trunc32: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r3 +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: sth r3, 0(r30) +; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_trunc32: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: std r0, 64(r1) +; P8-NEXT: mr r30, r4 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_trunc32: +; CHECK: # %bb.0: +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: stxsihx f0, 0, r4 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_trunc32: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: clrldi r3, r3, 32 +; SOFT-NEXT: std r0, 64(r1) +; SOFT-NEXT: mr r30, r4 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: test_trunc32: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -128(r1) +; BE-NEXT: std r0, 144(r1) +; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r4 +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: sth r3, 0(r30) +; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 128 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %val16 = fptrunc float %in to half + store half %val16, ptr %addr + ret void +} +define void @test_trunc64(double %in, ptr %addr) nounwind { +; PPC32-LABEL: test_trunc64: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r3 +; PPC32-NEXT: bl __truncdfhf2 +; PPC32-NEXT: sth r3, 0(r30) +; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_trunc64: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: std r0, 64(r1) +; P8-NEXT: mr r30, r4 +; P8-NEXT: bl __truncdfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_trunc64: +; CHECK: # %bb.0: +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: stxsihx f0, 0, r4 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_trunc64: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: std r0, 64(r1) +; SOFT-NEXT: mr r30, r4 +; SOFT-NEXT: bl __truncdfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: test_trunc64: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -128(r1) +; BE-NEXT: std r0, 144(r1) +; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r4 +; BE-NEXT: bl __truncdfhf2 +; BE-NEXT: nop +; BE-NEXT: sth r3, 0(r30) +; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 128 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %val16 = fptrunc double %in to half + store half %val16, ptr %addr + ret void +} +define i64 @test_fptosi_i64(ptr %p) nounwind { +; PPC32-LABEL: test_fptosi_i64: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: lhz r3, 0(r3) +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: bl __fixsfdi +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_fptosi_i64: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: std r0, 48(r1) +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: xscvdpsxds f0, f1 +; P8-NEXT: mffprd r3, f0 +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_fptosi_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: xscvdpsxds f0, f0 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_fptosi_i64: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: std r0, 48(r1) +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __fixsfdi +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: test_fptosi_i64: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -128(r1) +; BE-NEXT: std r0, 144(r1) +; BE-NEXT: lhz r3, 0(r3) +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: fctidz f0, f1 +; BE-NEXT: stfd f0, 120(r1) +; BE-NEXT: ld r3, 120(r1) +; BE-NEXT: addi r1, r1, 128 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %a = load half, ptr %p, align 2 + %r = fptosi half %a to i64 + ret i64 %r +} +define void @test_sitofp_i64(i64 %a, ptr %p) nounwind { +; PPC32-LABEL: test_sitofp_i64: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r5 +; PPC32-NEXT: bl __floatdisf +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: sth r3, 0(r30) +; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_sitofp_i64: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: mtfprd f0, r3 +; P8-NEXT: std r0, 64(r1) +; P8-NEXT: mr r30, r4 +; P8-NEXT: xscvsxdsp f1, f0 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_sitofp_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xscvsxdsp f0, f0 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: sth r3, 0(r4) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_sitofp_i64: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: std r0, 64(r1) +; SOFT-NEXT: mr r30, r4 +; SOFT-NEXT: bl __floatdisf +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 32 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: test_sitofp_i64: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -144(r1) +; BE-NEXT: sradi r5, r3, 53 +; BE-NEXT: std r0, 160(r1) +; BE-NEXT: addi r5, r5, 1 +; BE-NEXT: cmpldi r5, 1 +; BE-NEXT: std r30, 128(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r4 +; BE-NEXT: ble cr0, .LBB16_2 +; BE-NEXT: # %bb.1: +; BE-NEXT: clrldi r4, r3, 53 +; BE-NEXT: addi r4, r4, 2047 +; BE-NEXT: or r3, r4, r3 +; BE-NEXT: rldicr r3, r3, 0, 52 +; BE-NEXT: .LBB16_2: +; BE-NEXT: std r3, 120(r1) +; BE-NEXT: lfd f0, 120(r1) +; BE-NEXT: fcfid f0, f0 +; BE-NEXT: frsp f1, f0 +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: sth r3, 0(r30) +; BE-NEXT: ld r30, 128(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 144 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %r = sitofp i64 %a to half + store half %r, ptr %p + ret void +} +define i64 @test_fptoui_i64(ptr %p) nounwind { +; PPC32-LABEL: test_fptoui_i64: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: lhz r3, 0(r3) +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: bl __fixunssfdi +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_fptoui_i64: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: std r0, 48(r1) +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: xscvdpuxds f0, f1 +; P8-NEXT: mffprd r3, f0 +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_fptoui_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: xscvdpuxds f0, f0 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_fptoui_i64: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: std r0, 48(r1) +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __fixunssfdi +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: test_fptoui_i64: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -128(r1) +; BE-NEXT: std r0, 144(r1) +; BE-NEXT: lhz r3, 0(r3) +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: addis r3, r2, .LCPI17_0@toc@ha +; BE-NEXT: lfs f0, .LCPI17_0@toc@l(r3) +; BE-NEXT: fsubs f2, f1, f0 +; BE-NEXT: fcmpu cr0, f1, f0 +; BE-NEXT: fctidz f2, f2 +; BE-NEXT: stfd f2, 120(r1) +; BE-NEXT: fctidz f2, f1 +; BE-NEXT: stfd f2, 112(r1) +; BE-NEXT: blt cr0, .LBB17_2 +; BE-NEXT: # %bb.1: +; BE-NEXT: ld r3, 120(r1) +; BE-NEXT: li r4, 1 +; BE-NEXT: rldic r4, r4, 63, 0 +; BE-NEXT: xor r3, r3, r4 +; BE-NEXT: b .LBB17_3 +; BE-NEXT: .LBB17_2: +; BE-NEXT: ld r3, 112(r1) +; BE-NEXT: .LBB17_3: +; BE-NEXT: addi r1, r1, 128 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %a = load half, ptr %p, align 2 + %r = fptoui half %a to i64 + ret i64 %r +} +define void @test_uitofp_i64(i64 %a, ptr %p) nounwind { +; PPC32-LABEL: test_uitofp_i64: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r5 +; PPC32-NEXT: bl __floatundisf +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: sth r3, 0(r30) +; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_uitofp_i64: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: mtfprd f0, r3 +; P8-NEXT: std r0, 64(r1) +; P8-NEXT: mr r30, r4 +; P8-NEXT: xscvuxdsp f1, f0 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_uitofp_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xscvuxdsp f0, f0 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: sth r3, 0(r4) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_uitofp_i64: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: std r0, 64(r1) +; SOFT-NEXT: mr r30, r4 +; SOFT-NEXT: bl __floatundisf +; SOFT-NEXT: nop +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: test_uitofp_i64: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -144(r1) +; BE-NEXT: sradi r5, r3, 53 +; BE-NEXT: std r0, 160(r1) +; BE-NEXT: addi r5, r5, 1 +; BE-NEXT: cmpldi r5, 1 +; BE-NEXT: std r30, 128(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r4 +; BE-NEXT: bgt cr0, .LBB18_2 +; BE-NEXT: # %bb.1: +; BE-NEXT: mr r4, r3 +; BE-NEXT: b .LBB18_3 +; BE-NEXT: .LBB18_2: +; BE-NEXT: clrldi r4, r3, 53 +; BE-NEXT: addi r4, r4, 2047 +; BE-NEXT: or r4, r4, r3 +; BE-NEXT: rldicr r4, r4, 0, 52 +; BE-NEXT: .LBB18_3: +; BE-NEXT: rldicl r5, r3, 10, 54 +; BE-NEXT: clrldi r6, r3, 63 +; BE-NEXT: std r4, 112(r1) +; BE-NEXT: addi r5, r5, 1 +; BE-NEXT: cmpldi r5, 1 +; BE-NEXT: rldicl r5, r3, 63, 1 +; BE-NEXT: or r4, r6, r5 +; BE-NEXT: ble cr0, .LBB18_5 +; BE-NEXT: # %bb.4: +; BE-NEXT: clrldi r4, r4, 53 +; BE-NEXT: addi r4, r4, 2047 +; BE-NEXT: or r4, r4, r5 +; BE-NEXT: rldicl r4, r4, 53, 11 +; BE-NEXT: rldicl r4, r4, 11, 1 +; BE-NEXT: .LBB18_5: +; BE-NEXT: cmpdi r3, 0 +; BE-NEXT: std r4, 120(r1) +; BE-NEXT: bc 12, lt, .LBB18_7 +; BE-NEXT: # %bb.6: +; BE-NEXT: lfd f0, 112(r1) +; BE-NEXT: fcfid f0, f0 +; BE-NEXT: frsp f1, f0 +; BE-NEXT: b .LBB18_8 +; BE-NEXT: .LBB18_7: +; BE-NEXT: lfd f0, 120(r1) +; BE-NEXT: fcfid f0, f0 +; BE-NEXT: frsp f0, f0 +; BE-NEXT: fadds f1, f0, f0 +; BE-NEXT: .LBB18_8: +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: sth r3, 0(r30) +; BE-NEXT: ld r30, 128(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 144 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %r = uitofp i64 %a to half + store half %r, ptr %p + ret void +} +define <4 x float> @test_extend32_vec4(ptr %p) nounwind { +; PPC32-LABEL: test_extend32_vec4: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -48(r1) +; PPC32-NEXT: stw r0, 52(r1) +; PPC32-NEXT: stw r30, 16(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r3 +; PPC32-NEXT: lhz r3, 0(r3) +; PPC32-NEXT: stfd f29, 24(r1) # 8-byte Folded Spill +; PPC32-NEXT: stfd f30, 32(r1) # 8-byte Folded Spill +; PPC32-NEXT: stfd f31, 40(r1) # 8-byte Folded Spill +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lhz r3, 2(r30) +; PPC32-NEXT: fmr f31, f1 +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lhz r3, 4(r30) +; PPC32-NEXT: fmr f30, f1 +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lhz r3, 6(r30) +; PPC32-NEXT: fmr f29, f1 +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: fmr f4, f1 +; PPC32-NEXT: fmr f1, f31 +; PPC32-NEXT: fmr f2, f30 +; PPC32-NEXT: fmr f3, f29 +; PPC32-NEXT: lfd f31, 40(r1) # 8-byte Folded Reload +; PPC32-NEXT: lfd f30, 32(r1) # 8-byte Folded Reload +; PPC32-NEXT: lfd f29, 24(r1) # 8-byte Folded Reload +; PPC32-NEXT: lwz r30, 16(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 52(r1) +; PPC32-NEXT: addi r1, r1, 48 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_extend32_vec4: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -144(r1) +; P8-NEXT: li r4, 80 +; P8-NEXT: std r0, 160(r1) +; P8-NEXT: std r29, 120(r1) # 8-byte Folded Spill +; P8-NEXT: std r30, 128(r1) # 8-byte Folded Spill +; P8-NEXT: stxvd2x vs62, r1, r4 # 16-byte Folded Spill +; P8-NEXT: li r4, 96 +; P8-NEXT: stxvd2x vs63, r1, r4 # 16-byte Folded Spill +; P8-NEXT: lwz r4, 4(r3) +; P8-NEXT: stw r4, 64(r1) +; P8-NEXT: lwz r3, 0(r3) +; P8-NEXT: stw r3, 48(r1) +; P8-NEXT: addi r3, r1, 64 +; P8-NEXT: lxvd2x vs62, 0, r3 +; P8-NEXT: addi r3, r1, 48 +; P8-NEXT: lxvd2x vs0, 0, r3 +; P8-NEXT: mffprd r30, f0 +; P8-NEXT: clrldi r3, r30, 48 +; P8-NEXT: clrlwi r3, r3, 16 +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: mfvsrd r29, vs62 +; P8-NEXT: xxlor vs63, f1, f1 +; P8-NEXT: clrldi r3, r29, 48 +; P8-NEXT: clrlwi r3, r3, 16 +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: rldicl r3, r30, 48, 48 +; P8-NEXT: xxmrghd vs0, vs1, vs63 +; P8-NEXT: clrlwi r3, r3, 16 +; P8-NEXT: xvcvdpsp vs62, vs0 +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: rldicl r3, r29, 48, 48 +; P8-NEXT: xxlor vs63, f1, f1 +; P8-NEXT: clrlwi r3, r3, 16 +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: xxmrghd vs0, vs1, vs63 +; P8-NEXT: li r3, 96 +; P8-NEXT: ld r30, 128(r1) # 8-byte Folded Reload +; P8-NEXT: ld r29, 120(r1) # 8-byte Folded Reload +; P8-NEXT: xvcvdpsp vs34, vs0 +; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload +; P8-NEXT: li r3, 80 +; P8-NEXT: vmrgew v2, v2, v30 +; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload +; P8-NEXT: addi r1, r1, 144 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_extend32_vec4: +; CHECK: # %bb.0: +; CHECK-NEXT: lwz r4, 4(r3) +; CHECK-NEXT: stw r4, -16(r1) +; CHECK-NEXT: lwz r3, 0(r3) +; CHECK-NEXT: lxv vs34, -16(r1) +; CHECK-NEXT: stw r3, -32(r1) +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: lxv vs35, -32(r1) +; CHECK-NEXT: vextuhrx r4, r3, v3 +; CHECK-NEXT: vextuhrx r3, r3, v2 +; CHECK-NEXT: clrlwi r4, r4, 16 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r4 +; CHECK-NEXT: mtfprwz f1, r3 +; CHECK-NEXT: li r3, 2 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: xscvhpdp f1, f1 +; CHECK-NEXT: vextuhrx r4, r3, v3 +; CHECK-NEXT: vextuhrx r3, r3, v2 +; CHECK-NEXT: clrlwi r4, r4, 16 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: xxmrghd vs0, vs1, vs0 +; CHECK-NEXT: mtfprwz f1, r3 +; CHECK-NEXT: xvcvdpsp vs36, vs0 +; CHECK-NEXT: mtfprwz f0, r4 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: xscvhpdp f1, f1 +; CHECK-NEXT: xxmrghd vs0, vs1, vs0 +; CHECK-NEXT: xvcvdpsp vs34, vs0 +; CHECK-NEXT: vmrgew v2, v2, v4 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_extend32_vec4: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -80(r1) +; SOFT-NEXT: std r0, 96(r1) +; SOFT-NEXT: mr r30, r3 +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r29, r3 +; SOFT-NEXT: lhz r3, 2(r30) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r28, r3 +; SOFT-NEXT: lhz r3, 4(r30) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r27, r3 +; SOFT-NEXT: lhz r3, 6(r30) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r6, r3 +; SOFT-NEXT: mr r3, r29 +; SOFT-NEXT: mr r4, r28 +; SOFT-NEXT: mr r5, r27 +; SOFT-NEXT: addi r1, r1, 80 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload +; SOFT-NEXT: blr +; +; BE-LABEL: test_extend32_vec4: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -192(r1) +; BE-NEXT: std r0, 208(r1) +; BE-NEXT: lwz r4, 0(r3) +; BE-NEXT: stw r4, 160(r1) +; BE-NEXT: lwz r3, 4(r3) +; BE-NEXT: stw r3, 176(r1) +; BE-NEXT: addi r3, r1, 160 +; BE-NEXT: lvx v2, 0, r3 +; BE-NEXT: addi r3, r1, 176 +; BE-NEXT: lvx v3, 0, r3 +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: stvx v3, 0, r3 +; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: stvx v2, 0, r3 +; BE-NEXT: lhz r3, 130(r1) +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: lhz r3, 128(r1) +; BE-NEXT: stfs f1, 156(r1) +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: lhz r3, 114(r1) +; BE-NEXT: stfs f1, 152(r1) +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: lhz r3, 112(r1) +; BE-NEXT: stfs f1, 148(r1) +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: stfs f1, 144(r1) +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lvx v2, 0, r3 +; BE-NEXT: addi r1, r1, 192 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %a = load <4 x half>, ptr %p, align 8 + %b = fpext <4 x half> %a to <4 x float> + ret <4 x float> %b +} +define <4 x double> @test_extend64_vec4(ptr %p) nounwind { +; PPC32-LABEL: test_extend64_vec4: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -48(r1) +; PPC32-NEXT: stw r0, 52(r1) +; PPC32-NEXT: stw r30, 16(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r3 +; PPC32-NEXT: lhz r3, 0(r3) +; PPC32-NEXT: stfd f29, 24(r1) # 8-byte Folded Spill +; PPC32-NEXT: stfd f30, 32(r1) # 8-byte Folded Spill +; PPC32-NEXT: stfd f31, 40(r1) # 8-byte Folded Spill +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lhz r3, 2(r30) +; PPC32-NEXT: fmr f31, f1 +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lhz r3, 4(r30) +; PPC32-NEXT: fmr f30, f1 +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lhz r3, 6(r30) +; PPC32-NEXT: fmr f29, f1 +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: fmr f4, f1 +; PPC32-NEXT: fmr f1, f31 +; PPC32-NEXT: fmr f2, f30 +; PPC32-NEXT: fmr f3, f29 +; PPC32-NEXT: lfd f31, 40(r1) # 8-byte Folded Reload +; PPC32-NEXT: lfd f30, 32(r1) # 8-byte Folded Reload +; PPC32-NEXT: lfd f29, 24(r1) # 8-byte Folded Reload +; PPC32-NEXT: lwz r30, 16(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 52(r1) +; PPC32-NEXT: addi r1, r1, 48 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_extend64_vec4: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: std r0, 128(r1) +; P8-NEXT: li r4, 48 +; P8-NEXT: std r28, 80(r1) # 8-byte Folded Spill +; P8-NEXT: lhz r28, 2(r3) +; P8-NEXT: std r29, 88(r1) # 8-byte Folded Spill +; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill +; P8-NEXT: lhz r30, 6(r3) +; P8-NEXT: lhz r29, 4(r3) +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: stxvd2x vs62, r1, r4 # 16-byte Folded Spill +; P8-NEXT: li r4, 64 +; P8-NEXT: stxvd2x vs63, r1, r4 # 16-byte Folded Spill +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: mr r3, r28 +; P8-NEXT: xxlor vs63, f1, f1 +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: mr r3, r29 +; P8-NEXT: xxmrghd vs63, vs1, vs63 +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: mr r3, r30 +; P8-NEXT: xxlor vs62, f1, f1 +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: li r3, 64 +; P8-NEXT: vmr v2, v31 +; P8-NEXT: xxmrghd vs35, vs1, vs62 +; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload +; P8-NEXT: ld r29, 88(r1) # 8-byte Folded Reload +; P8-NEXT: ld r28, 80(r1) # 8-byte Folded Reload +; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload +; P8-NEXT: li r3, 48 +; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_extend64_vec4: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r4, 6(r3) +; CHECK-NEXT: lhz r5, 4(r3) +; CHECK-NEXT: lhz r6, 2(r3) +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: mtfprwz f1, r6 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: xscvhpdp f1, f1 +; CHECK-NEXT: xxmrghd vs34, vs1, vs0 +; CHECK-NEXT: mtfprwz f0, r5 +; CHECK-NEXT: mtfprwz f1, r4 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: xscvhpdp f1, f1 +; CHECK-NEXT: xxmrghd vs35, vs1, vs0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_extend64_vec4: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -80(r1) +; SOFT-NEXT: std r0, 96(r1) +; SOFT-NEXT: mr r30, r3 +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __extendsfdf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r29, r3 +; SOFT-NEXT: lhz r3, 2(r30) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __extendsfdf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r28, r3 +; SOFT-NEXT: lhz r3, 4(r30) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __extendsfdf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r27, r3 +; SOFT-NEXT: lhz r3, 6(r30) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __extendsfdf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r6, r3 +; SOFT-NEXT: mr r3, r29 +; SOFT-NEXT: mr r4, r28 +; SOFT-NEXT: mr r5, r27 +; SOFT-NEXT: addi r1, r1, 80 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload +; SOFT-NEXT: blr +; +; BE-LABEL: test_extend64_vec4: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -176(r1) +; BE-NEXT: std r0, 192(r1) +; BE-NEXT: std r28, 120(r1) # 8-byte Folded Spill +; BE-NEXT: std r29, 128(r1) # 8-byte Folded Spill +; BE-NEXT: std r30, 136(r1) # 8-byte Folded Spill +; BE-NEXT: lhz r30, 6(r3) +; BE-NEXT: lhz r29, 4(r3) +; BE-NEXT: lhz r28, 2(r3) +; BE-NEXT: lhz r3, 0(r3) +; BE-NEXT: stfd f29, 152(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f30, 160(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f31, 168(r1) # 8-byte Folded Spill +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: mr r3, r28 +; BE-NEXT: fmr f31, f1 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: mr r3, r29 +; BE-NEXT: fmr f30, f1 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: mr r3, r30 +; BE-NEXT: fmr f29, f1 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: fmr f4, f1 +; BE-NEXT: fmr f1, f31 +; BE-NEXT: lfd f31, 168(r1) # 8-byte Folded Reload +; BE-NEXT: ld r30, 136(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f2, f30 +; BE-NEXT: fmr f3, f29 +; BE-NEXT: lfd f30, 160(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f29, 152(r1) # 8-byte Folded Reload +; BE-NEXT: ld r29, 128(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 120(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 176 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %a = load <4 x half>, ptr %p, align 8 + %b = fpext <4 x half> %a to <4 x double> + ret <4 x double> %b +} +define void @test_trunc32_vec4(<4 x float> %a, ptr %p) nounwind { +; PPC32-LABEL: test_trunc32_vec4: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -64(r1) +; PPC32-NEXT: stw r0, 68(r1) +; PPC32-NEXT: stw r27, 20(r1) # 4-byte Folded Spill +; PPC32-NEXT: stw r28, 24(r1) # 4-byte Folded Spill +; PPC32-NEXT: stw r29, 28(r1) # 4-byte Folded Spill +; PPC32-NEXT: stw r30, 32(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r3 +; PPC32-NEXT: stfd f29, 40(r1) # 8-byte Folded Spill +; PPC32-NEXT: fmr f29, f2 +; PPC32-NEXT: stfd f30, 48(r1) # 8-byte Folded Spill +; PPC32-NEXT: fmr f30, f3 +; PPC32-NEXT: stfd f31, 56(r1) # 8-byte Folded Spill +; PPC32-NEXT: fmr f31, f4 +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: fmr f1, f29 +; PPC32-NEXT: mr r29, r3 +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: fmr f1, f30 +; PPC32-NEXT: mr r28, r3 +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: fmr f1, f31 +; PPC32-NEXT: mr r27, r3 +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: sth r27, 4(r30) +; PPC32-NEXT: sth r28, 2(r30) +; PPC32-NEXT: sth r3, 6(r30) +; PPC32-NEXT: sth r29, 0(r30) +; PPC32-NEXT: lfd f31, 56(r1) # 8-byte Folded Reload +; PPC32-NEXT: lfd f30, 48(r1) # 8-byte Folded Reload +; PPC32-NEXT: lfd f29, 40(r1) # 8-byte Folded Reload +; PPC32-NEXT: lwz r30, 32(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r29, 28(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r28, 24(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r27, 20(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 68(r1) +; PPC32-NEXT: addi r1, r1, 64 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_trunc32_vec4: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: xxsldwi vs0, vs34, vs34, 3 +; P8-NEXT: li r3, 48 +; P8-NEXT: std r0, 128(r1) +; P8-NEXT: std r27, 72(r1) # 8-byte Folded Spill +; P8-NEXT: std r28, 80(r1) # 8-byte Folded Spill +; P8-NEXT: std r29, 88(r1) # 8-byte Folded Spill +; P8-NEXT: xscvspdpn f1, vs0 +; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill +; P8-NEXT: stxvd2x vs63, r1, r3 # 16-byte Folded Spill +; P8-NEXT: mr r30, r5 +; P8-NEXT: vmr v31, v2 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: xxswapd vs0, vs63 +; P8-NEXT: mr r29, r3 +; P8-NEXT: xscvspdpn f1, vs0 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: xxsldwi vs0, vs63, vs63, 1 +; P8-NEXT: mr r28, r3 +; P8-NEXT: xscvspdpn f1, vs0 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: xscvspdpn f1, vs63 +; P8-NEXT: mr r27, r3 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 6(r30) +; P8-NEXT: li r3, 48 +; P8-NEXT: sth r27, 4(r30) +; P8-NEXT: ld r27, 72(r1) # 8-byte Folded Reload +; P8-NEXT: sth r28, 2(r30) +; P8-NEXT: sth r29, 0(r30) +; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload +; P8-NEXT: ld r29, 88(r1) # 8-byte Folded Reload +; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload +; P8-NEXT: ld r28, 80(r1) # 8-byte Folded Reload +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_trunc32_vec4: +; CHECK: # %bb.0: +; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3 +; CHECK-NEXT: xxsldwi vs1, vs34, vs34, 1 +; CHECK-NEXT: xscvspdpn f0, vs0 +; CHECK-NEXT: xscvspdpn f1, vs1 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: xxswapd vs0, vs34 +; CHECK-NEXT: xscvspdpn f0, vs0 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: xscvdphp f1, f1 +; CHECK-NEXT: mffprwz r4, f1 +; CHECK-NEXT: xscvspdpn f1, vs34 +; CHECK-NEXT: xscvdphp f1, f1 +; CHECK-NEXT: sth r4, 4(r5) +; CHECK-NEXT: mffprwz r4, f0 +; CHECK-NEXT: sth r3, 0(r5) +; CHECK-NEXT: sth r4, 2(r5) +; CHECK-NEXT: mffprwz r6, f1 +; CHECK-NEXT: sth r6, 6(r5) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_trunc32_vec4: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r26, -48(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -80(r1) +; SOFT-NEXT: clrldi r3, r3, 32 +; SOFT-NEXT: std r0, 96(r1) +; SOFT-NEXT: mr r30, r7 +; SOFT-NEXT: mr r29, r6 +; SOFT-NEXT: mr r28, r5 +; SOFT-NEXT: mr r27, r4 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r26, r3 +; SOFT-NEXT: clrldi r3, r27, 32 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r27, r3 +; SOFT-NEXT: clrldi r3, r28, 32 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r28, r3 +; SOFT-NEXT: clrldi r3, r29, 32 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 6(r30) +; SOFT-NEXT: sth r28, 4(r30) +; SOFT-NEXT: sth r27, 2(r30) +; SOFT-NEXT: sth r26, 0(r30) +; SOFT-NEXT: addi r1, r1, 80 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r26, -48(r1) # 8-byte Folded Reload +; SOFT-NEXT: blr +; +; BE-LABEL: test_trunc32_vec4: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -176(r1) +; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: std r0, 192(r1) +; BE-NEXT: std r27, 136(r1) # 8-byte Folded Spill +; BE-NEXT: std r28, 144(r1) # 8-byte Folded Spill +; BE-NEXT: std r29, 152(r1) # 8-byte Folded Spill +; BE-NEXT: std r30, 160(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r5 +; BE-NEXT: stvx v2, 0, r3 +; BE-NEXT: lfs f1, 112(r1) +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: lfs f1, 116(r1) +; BE-NEXT: mr r29, r3 +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: lfs f1, 120(r1) +; BE-NEXT: mr r28, r3 +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: lfs f1, 124(r1) +; BE-NEXT: mr r27, r3 +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: sth r27, 4(r30) +; BE-NEXT: sth r28, 2(r30) +; BE-NEXT: sth r3, 6(r30) +; BE-NEXT: sth r29, 0(r30) +; BE-NEXT: ld r30, 160(r1) # 8-byte Folded Reload +; BE-NEXT: ld r29, 152(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 144(r1) # 8-byte Folded Reload +; BE-NEXT: ld r27, 136(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 176 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %v = fptrunc <4 x float> %a to <4 x half> + store <4 x half> %v, ptr %p + ret void +} +define void @test_trunc64_vec4(<4 x double> %a, ptr %p) nounwind { +; PPC32-LABEL: test_trunc64_vec4: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -64(r1) +; PPC32-NEXT: stw r0, 68(r1) +; PPC32-NEXT: stw r27, 20(r1) # 4-byte Folded Spill +; PPC32-NEXT: stw r28, 24(r1) # 4-byte Folded Spill +; PPC32-NEXT: stw r29, 28(r1) # 4-byte Folded Spill +; PPC32-NEXT: stw r30, 32(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r3 +; PPC32-NEXT: stfd f29, 40(r1) # 8-byte Folded Spill +; PPC32-NEXT: fmr f29, f2 +; PPC32-NEXT: stfd f30, 48(r1) # 8-byte Folded Spill +; PPC32-NEXT: fmr f30, f3 +; PPC32-NEXT: stfd f31, 56(r1) # 8-byte Folded Spill +; PPC32-NEXT: fmr f31, f4 +; PPC32-NEXT: bl __truncdfhf2 +; PPC32-NEXT: fmr f1, f29 +; PPC32-NEXT: mr r29, r3 +; PPC32-NEXT: bl __truncdfhf2 +; PPC32-NEXT: fmr f1, f30 +; PPC32-NEXT: mr r28, r3 +; PPC32-NEXT: bl __truncdfhf2 +; PPC32-NEXT: fmr f1, f31 +; PPC32-NEXT: mr r27, r3 +; PPC32-NEXT: bl __truncdfhf2 +; PPC32-NEXT: sth r27, 4(r30) +; PPC32-NEXT: sth r28, 2(r30) +; PPC32-NEXT: sth r3, 6(r30) +; PPC32-NEXT: sth r29, 0(r30) +; PPC32-NEXT: lfd f31, 56(r1) # 8-byte Folded Reload +; PPC32-NEXT: lfd f30, 48(r1) # 8-byte Folded Reload +; PPC32-NEXT: lfd f29, 40(r1) # 8-byte Folded Reload +; PPC32-NEXT: lwz r30, 32(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r29, 28(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r28, 24(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r27, 20(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 68(r1) +; PPC32-NEXT: addi r1, r1, 64 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_trunc64_vec4: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -128(r1) +; P8-NEXT: li r3, 48 +; P8-NEXT: std r0, 144(r1) +; P8-NEXT: xxswapd vs1, vs34 +; P8-NEXT: std r27, 88(r1) # 8-byte Folded Spill +; P8-NEXT: std r28, 96(r1) # 8-byte Folded Spill +; P8-NEXT: std r29, 104(r1) # 8-byte Folded Spill +; P8-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; P8-NEXT: mr r30, r7 +; P8-NEXT: stxvd2x vs62, r1, r3 # 16-byte Folded Spill +; P8-NEXT: li r3, 64 +; P8-NEXT: vmr v30, v2 +; P8-NEXT: stxvd2x vs63, r1, r3 # 16-byte Folded Spill +; P8-NEXT: vmr v31, v3 +; P8-NEXT: bl __truncdfhf2 +; P8-NEXT: nop +; P8-NEXT: xxswapd vs1, vs63 +; P8-NEXT: mr r29, r3 +; P8-NEXT: bl __truncdfhf2 +; P8-NEXT: nop +; P8-NEXT: xxlor f1, vs62, vs62 +; P8-NEXT: mr r28, r3 +; P8-NEXT: bl __truncdfhf2 +; P8-NEXT: nop +; P8-NEXT: xxlor f1, vs63, vs63 +; P8-NEXT: mr r27, r3 +; P8-NEXT: bl __truncdfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 6(r30) +; P8-NEXT: li r3, 64 +; P8-NEXT: sth r27, 2(r30) +; P8-NEXT: ld r27, 88(r1) # 8-byte Folded Reload +; P8-NEXT: sth r28, 4(r30) +; P8-NEXT: sth r29, 0(r30) +; P8-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; P8-NEXT: ld r29, 104(r1) # 8-byte Folded Reload +; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload +; P8-NEXT: li r3, 48 +; P8-NEXT: ld r28, 96(r1) # 8-byte Folded Reload +; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload +; P8-NEXT: addi r1, r1, 128 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_trunc64_vec4: +; CHECK: # %bb.0: +; CHECK-NEXT: xxswapd vs0, vs34 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: xxswapd vs0, vs35 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: xscvdphp f1, vs34 +; CHECK-NEXT: mffprwz r4, f1 +; CHECK-NEXT: xscvdphp f1, vs35 +; CHECK-NEXT: sth r3, 0(r7) +; CHECK-NEXT: sth r4, 2(r7) +; CHECK-NEXT: mffprwz r4, f0 +; CHECK-NEXT: sth r4, 4(r7) +; CHECK-NEXT: mffprwz r5, f1 +; CHECK-NEXT: sth r5, 6(r7) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_trunc64_vec4: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r26, -48(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -80(r1) +; SOFT-NEXT: std r0, 96(r1) +; SOFT-NEXT: mr r30, r7 +; SOFT-NEXT: mr r29, r6 +; SOFT-NEXT: mr r28, r5 +; SOFT-NEXT: mr r27, r4 +; SOFT-NEXT: bl __truncdfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r26, r3 +; SOFT-NEXT: mr r3, r27 +; SOFT-NEXT: bl __truncdfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r27, r3 +; SOFT-NEXT: mr r3, r28 +; SOFT-NEXT: bl __truncdfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r28, r3 +; SOFT-NEXT: mr r3, r29 +; SOFT-NEXT: bl __truncdfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 6(r30) +; SOFT-NEXT: sth r28, 4(r30) +; SOFT-NEXT: sth r27, 2(r30) +; SOFT-NEXT: sth r26, 0(r30) +; SOFT-NEXT: addi r1, r1, 80 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r26, -48(r1) # 8-byte Folded Reload +; SOFT-NEXT: blr +; +; BE-LABEL: test_trunc64_vec4: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -176(r1) +; BE-NEXT: std r0, 192(r1) +; BE-NEXT: std r27, 112(r1) # 8-byte Folded Spill +; BE-NEXT: std r28, 120(r1) # 8-byte Folded Spill +; BE-NEXT: std r29, 128(r1) # 8-byte Folded Spill +; BE-NEXT: std r30, 136(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r7 +; BE-NEXT: stfd f29, 152(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f29, f2 +; BE-NEXT: stfd f30, 160(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f30, f3 +; BE-NEXT: stfd f31, 168(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f31, f4 +; BE-NEXT: bl __truncdfhf2 +; BE-NEXT: nop +; BE-NEXT: fmr f1, f29 +; BE-NEXT: mr r29, r3 +; BE-NEXT: bl __truncdfhf2 +; BE-NEXT: nop +; BE-NEXT: fmr f1, f30 +; BE-NEXT: mr r28, r3 +; BE-NEXT: bl __truncdfhf2 +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: mr r27, r3 +; BE-NEXT: bl __truncdfhf2 +; BE-NEXT: nop +; BE-NEXT: sth r27, 4(r30) +; BE-NEXT: sth r28, 2(r30) +; BE-NEXT: sth r3, 6(r30) +; BE-NEXT: sth r29, 0(r30) +; BE-NEXT: lfd f31, 168(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f30, 160(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f29, 152(r1) # 8-byte Folded Reload +; BE-NEXT: ld r30, 136(r1) # 8-byte Folded Reload +; BE-NEXT: ld r29, 128(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 120(r1) # 8-byte Folded Reload +; BE-NEXT: ld r27, 112(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 176 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %v = fptrunc <4 x double> %a to <4 x half> + store <4 x half> %v, ptr %p + ret void +} +define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind { +; PPC32-LABEL: test_sitofp_fadd_i32: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -32(r1) +; PPC32-NEXT: stw r0, 36(r1) +; PPC32-NEXT: lis r5, 17200 +; PPC32-NEXT: xoris r3, r3, 32768 +; PPC32-NEXT: stw r30, 16(r1) # 4-byte Folded Spill +; PPC32-NEXT: lhz r30, 0(r4) +; PPC32-NEXT: stw r5, 8(r1) +; PPC32-NEXT: stw r3, 12(r1) +; PPC32-NEXT: lis r3, .LCPI23_0@ha +; PPC32-NEXT: lfd f0, 8(r1) +; PPC32-NEXT: lfs f1, .LCPI23_0@l(r3) +; PPC32-NEXT: stfd f31, 24(r1) # 8-byte Folded Spill +; PPC32-NEXT: fsub f0, f0, f1 +; PPC32-NEXT: frsp f1, f0 +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: clrlwi r3, r3, 16 +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: mr r3, r30 +; PPC32-NEXT: fmr f31, f1 +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: fadds f1, f1, f31 +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: clrlwi r3, r3, 16 +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lfd f31, 24(r1) # 8-byte Folded Reload +; PPC32-NEXT: lwz r30, 16(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 36(r1) +; PPC32-NEXT: addi r1, r1, 32 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_sitofp_fadd_i32: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -24(r1) # 8-byte Folded Spill +; P8-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill +; P8-NEXT: stdu r1, -64(r1) +; P8-NEXT: mtfprwa f0, r3 +; P8-NEXT: std r0, 80(r1) +; P8-NEXT: lhz r30, 0(r4) +; P8-NEXT: xscvsxdsp f1, f0 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: clrldi r3, r3, 48 +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: mr r3, r30 +; P8-NEXT: fmr f31, f1 +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: xsaddsp f1, f1, f31 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: clrldi r3, r3, 48 +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 64 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload +; P8-NEXT: ld r30, -24(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_sitofp_fadd_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: mtfprwa f0, r3 +; CHECK-NEXT: lhz r4, 0(r4) +; CHECK-NEXT: xscvsxdsp f0, f0 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: mtfprwz f0, r4 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: mtfprwz f1, r3 +; CHECK-NEXT: xscvhpdp f1, f1 +; CHECK-NEXT: xsaddsp f0, f0, f1 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_sitofp_fadd_i32: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -64(r1) +; SOFT-NEXT: extsw r3, r3 +; SOFT-NEXT: std r0, 80(r1) +; SOFT-NEXT: mr r30, r4 +; SOFT-NEXT: bl __floatsisf +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 32 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r29, r3 +; SOFT-NEXT: lhz r3, 0(r30) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r30, r3 +; SOFT-NEXT: clrldi r3, r29, 48 +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r4, r3 +; SOFT-NEXT: mr r3, r30 +; SOFT-NEXT: bl __addsf3 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 64 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: test_sitofp_fadd_i32: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -144(r1) +; BE-NEXT: std r0, 160(r1) +; BE-NEXT: extsw r3, r3 +; BE-NEXT: std r30, 120(r1) # 8-byte Folded Spill +; BE-NEXT: lhz r30, 0(r4) +; BE-NEXT: std r3, 112(r1) +; BE-NEXT: lfd f0, 112(r1) +; BE-NEXT: fcfid f0, f0 +; BE-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill +; BE-NEXT: frsp f1, f0 +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: mr r3, r30 +; BE-NEXT: fmr f31, f1 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: fadds f1, f1, f31 +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: lfd f31, 136(r1) # 8-byte Folded Reload +; BE-NEXT: ld r30, 120(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 144 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %tmp0 = load half, ptr %b + %tmp1 = sitofp i32 %a to half + %tmp2 = fadd half %tmp0, %tmp1 + %tmp3 = fpext half %tmp2 to float + ret float %tmp3 +} +define half @PR40273(half) nounwind { +; PPC32-LABEL: PR40273: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: clrlwi r3, r3, 16 +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lis r3, .LCPI24_0@ha +; PPC32-NEXT: lfs f0, .LCPI24_0@l(r3) +; PPC32-NEXT: li r3, 15360 +; PPC32-NEXT: fcmpu cr0, f1, f0 +; PPC32-NEXT: bne cr0, .LBB24_2 +; PPC32-NEXT: # %bb.1: +; PPC32-NEXT: li r3, 0 +; PPC32-NEXT: .LBB24_2: +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: PR40273: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: clrldi r3, r3, 48 +; P8-NEXT: std r0, 48(r1) +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: xxlxor f0, f0, f0 +; P8-NEXT: li r3, 15360 +; P8-NEXT: fcmpu cr0, f1, f0 +; P8-NEXT: iseleq r3, 0, r3 +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: PR40273: +; CHECK: # %bb.0: +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: xxlxor f1, f1, f1 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: li r3, 15360 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: fcmpu cr0, f0, f1 +; CHECK-NEXT: iseleq r3, 0, r3 +; CHECK-NEXT: blr +; +; SOFT-LABEL: PR40273: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: std r0, 48(r1) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: li r4, 0 +; SOFT-NEXT: bl __nesf2 +; SOFT-NEXT: nop +; SOFT-NEXT: cmplwi r3, 0 +; SOFT-NEXT: li r3, 15360 +; SOFT-NEXT: iseleq r3, 0, r3 +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: PR40273: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: addis r3, r2, .LCPI24_0@toc@ha +; BE-NEXT: lfs f0, .LCPI24_0@toc@l(r3) +; BE-NEXT: li r3, 15360 +; BE-NEXT: fcmpu cr0, f1, f0 +; BE-NEXT: bne cr0, .LBB24_2 +; BE-NEXT: # %bb.1: +; BE-NEXT: li r3, 0 +; BE-NEXT: .LBB24_2: +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %2 = fcmp une half %0, 0xH0000 + %3 = uitofp i1 %2 to half + ret half %3 +} + +; Trivial operations shouldn't need a libcall + +define half @fabs(half %x) nounwind { +; PPC32-LABEL: fabs: +; PPC32: # %bb.0: +; PPC32-NEXT: clrlwi r3, r3, 17 +; PPC32-NEXT: blr +; +; P8-LABEL: fabs: +; P8: # %bb.0: +; P8-NEXT: clrldi r3, r3, 49 +; P8-NEXT: blr +; +; CHECK-LABEL: fabs: +; CHECK: # %bb.0: +; CHECK-NEXT: clrldi r3, r3, 49 +; CHECK-NEXT: blr +; +; SOFT-LABEL: fabs: +; SOFT: # %bb.0: +; SOFT-NEXT: clrldi r3, r3, 49 +; SOFT-NEXT: blr +; +; BE-LABEL: fabs: +; BE: # %bb.0: +; BE-NEXT: clrldi r3, r3, 49 +; BE-NEXT: blr + %a = call half @llvm.fabs.f16(half %x) + ret half %a +} + +define half @fcopysign(half %x, half %y) nounwind { +; PPC32-LABEL: fcopysign: +; PPC32: # %bb.0: +; PPC32-NEXT: rlwimi r3, r4, 0, 0, 16 +; PPC32-NEXT: blr +; +; P8-LABEL: fcopysign: +; P8: # %bb.0: +; P8-NEXT: rotldi r4, r4, 49 +; P8-NEXT: clrldi r3, r3, 49 +; P8-NEXT: rldimi r3, r4, 15, 32 +; P8-NEXT: blr +; +; CHECK-LABEL: fcopysign: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi r4, r4, 49 +; CHECK-NEXT: clrldi r3, r3, 49 +; CHECK-NEXT: rldimi r3, r4, 15, 32 +; CHECK-NEXT: blr +; +; SOFT-LABEL: fcopysign: +; SOFT: # %bb.0: +; SOFT-NEXT: rotldi r4, r4, 49 +; SOFT-NEXT: clrldi r3, r3, 49 +; SOFT-NEXT: rldimi r3, r4, 15, 32 +; SOFT-NEXT: blr +; +; BE-LABEL: fcopysign: +; BE: # %bb.0: +; BE-NEXT: rotldi r4, r4, 49 +; BE-NEXT: clrldi r3, r3, 49 +; BE-NEXT: rldimi r3, r4, 15, 32 +; BE-NEXT: blr + %a = call half @llvm.copysign.f16(half %x, half %y) + ret half %a +} diff --git a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll deleted file mode 100644 index 50f05cca80458..0000000000000 --- a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll +++ /dev/null @@ -1,1281 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \ -; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \ -; RUN: --check-prefix=P8 -; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ -; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s -; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -mattr=-hard-float \ -; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \ -; RUN: --check-prefix=SOFT - -; Tests for various operations on half precison float. Much of the test is -; copied from test/CodeGen/X86/half.ll. -define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr #0 { -; P8-LABEL: loadd: -; P8: # %bb.0: # %entry -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -32(r1) -; P8-NEXT: std r0, 48(r1) -; P8-NEXT: lhz r3, 2(r3) -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: addi r1, r1, 32 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: loadd: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi r3, r3, 2 -; CHECK-NEXT: lxsihzx f0, 0, r3 -; CHECK-NEXT: xscvhpdp f1, f0 -; CHECK-NEXT: blr -; -; SOFT-LABEL: loadd: -; SOFT: # %bb.0: # %entry -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: stdu r1, -32(r1) -; SOFT-NEXT: std r0, 48(r1) -; SOFT-NEXT: lhz r3, 2(r3) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __extendsfdf2 -; SOFT-NEXT: nop -; SOFT-NEXT: addi r1, r1, 32 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr -entry: - %arrayidx = getelementptr inbounds i16, ptr %a, i64 1 - %0 = load i16, ptr %arrayidx, align 2 - %1 = tail call double @llvm.convert.from.fp16.f64(i16 %0) - ret double %1 -} - -declare double @llvm.convert.from.fp16.f64(i16) - -define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr #0 { -; P8-LABEL: loadf: -; P8: # %bb.0: # %entry -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -32(r1) -; P8-NEXT: std r0, 48(r1) -; P8-NEXT: lhz r3, 2(r3) -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: addi r1, r1, 32 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: loadf: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi r3, r3, 2 -; CHECK-NEXT: lxsihzx f0, 0, r3 -; CHECK-NEXT: xscvhpdp f1, f0 -; CHECK-NEXT: blr -; -; SOFT-LABEL: loadf: -; SOFT: # %bb.0: # %entry -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: stdu r1, -32(r1) -; SOFT-NEXT: std r0, 48(r1) -; SOFT-NEXT: lhz r3, 2(r3) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: addi r1, r1, 32 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr -entry: - %arrayidx = getelementptr inbounds i16, ptr %a, i64 1 - %0 = load i16, ptr %arrayidx, align 2 - %1 = tail call float @llvm.convert.from.fp16.f32(i16 %0) - ret float %1 -} - -declare float @llvm.convert.from.fp16.f32(i16) - -define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr #0 { -; P8-LABEL: stored: -; P8: # %bb.0: # %entry -; P8-NEXT: mflr r0 -; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8-NEXT: stdu r1, -48(r1) -; P8-NEXT: std r0, 64(r1) -; P8-NEXT: mr r30, r3 -; P8-NEXT: bl __truncdfhf2 -; P8-NEXT: nop -; P8-NEXT: sth r3, 0(r30) -; P8-NEXT: addi r1, r1, 48 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: stored: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: stxsihx f0, 0, r3 -; CHECK-NEXT: blr -; -; SOFT-LABEL: stored: -; SOFT: # %bb.0: # %entry -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -48(r1) -; SOFT-NEXT: mr r30, r3 -; SOFT-NEXT: mr r3, r4 -; SOFT-NEXT: std r0, 64(r1) -; SOFT-NEXT: bl __truncdfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 0(r30) -; SOFT-NEXT: addi r1, r1, 48 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr -entry: - %0 = tail call i16 @llvm.convert.to.fp16.f64(double %b) - store i16 %0, ptr %a, align 2 - ret void -} - -declare i16 @llvm.convert.to.fp16.f64(double) - -define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr #0 { -; P8-LABEL: storef: -; P8: # %bb.0: # %entry -; P8-NEXT: mflr r0 -; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8-NEXT: stdu r1, -48(r1) -; P8-NEXT: std r0, 64(r1) -; P8-NEXT: mr r30, r3 -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: sth r3, 0(r30) -; P8-NEXT: addi r1, r1, 48 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: storef: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: stxsihx f0, 0, r3 -; CHECK-NEXT: blr -; -; SOFT-LABEL: storef: -; SOFT: # %bb.0: # %entry -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -48(r1) -; SOFT-NEXT: mr r30, r3 -; SOFT-NEXT: clrldi r3, r4, 32 -; SOFT-NEXT: std r0, 64(r1) -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 0(r30) -; SOFT-NEXT: addi r1, r1, 48 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr -entry: - %0 = tail call i16 @llvm.convert.to.fp16.f32(float %b) - store i16 %0, ptr %a, align 2 - ret void -} - -declare i16 @llvm.convert.to.fp16.f32(float) -define void @test_load_store(ptr %in, ptr %out) #0 { -; P8-LABEL: test_load_store: -; P8: # %bb.0: -; P8-NEXT: lhz r3, 0(r3) -; P8-NEXT: sth r3, 0(r4) -; P8-NEXT: blr -; -; CHECK-LABEL: test_load_store: -; CHECK: # %bb.0: -; CHECK-NEXT: lhz r3, 0(r3) -; CHECK-NEXT: sth r3, 0(r4) -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_load_store: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -48(r1) -; SOFT-NEXT: std r0, 64(r1) -; SOFT-NEXT: mr r30, r4 -; SOFT-NEXT: lhz r3, 0(r3) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 0(r30) -; SOFT-NEXT: addi r1, r1, 48 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %val = load half, ptr %in - store half %val, ptr %out - ret void -} -define i16 @test_bitcast_from_half(ptr %addr) #0 { -; P8-LABEL: test_bitcast_from_half: -; P8: # %bb.0: -; P8-NEXT: lhz r3, 0(r3) -; P8-NEXT: blr -; -; CHECK-LABEL: test_bitcast_from_half: -; CHECK: # %bb.0: -; CHECK-NEXT: lhz r3, 0(r3) -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_bitcast_from_half: -; SOFT: # %bb.0: -; SOFT-NEXT: lhz r3, 0(r3) -; SOFT-NEXT: blr - %val = load half, ptr %addr - %val_int = bitcast half %val to i16 - ret i16 %val_int -} -define void @test_bitcast_to_half(ptr %addr, i16 %in) #0 { -; P8-LABEL: test_bitcast_to_half: -; P8: # %bb.0: -; P8-NEXT: sth r4, 0(r3) -; P8-NEXT: blr -; -; CHECK-LABEL: test_bitcast_to_half: -; CHECK: # %bb.0: -; CHECK-NEXT: sth r4, 0(r3) -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_bitcast_to_half: -; SOFT: # %bb.0: -; SOFT-NEXT: sth r4, 0(r3) -; SOFT-NEXT: blr - %val_fp = bitcast i16 %in to half - store half %val_fp, ptr %addr - ret void -} -define float @test_extend32(ptr %addr) #0 { -; P8-LABEL: test_extend32: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -32(r1) -; P8-NEXT: std r0, 48(r1) -; P8-NEXT: lhz r3, 0(r3) -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: addi r1, r1, 32 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_extend32: -; CHECK: # %bb.0: -; CHECK-NEXT: lxsihzx f0, 0, r3 -; CHECK-NEXT: xscvhpdp f1, f0 -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_extend32: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: stdu r1, -32(r1) -; SOFT-NEXT: std r0, 48(r1) -; SOFT-NEXT: lhz r3, 0(r3) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: addi r1, r1, 32 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %val16 = load half, ptr %addr - %val32 = fpext half %val16 to float - ret float %val32 -} -define double @test_extend64(ptr %addr) #0 { -; P8-LABEL: test_extend64: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -32(r1) -; P8-NEXT: std r0, 48(r1) -; P8-NEXT: lhz r3, 0(r3) -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: addi r1, r1, 32 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_extend64: -; CHECK: # %bb.0: -; CHECK-NEXT: lxsihzx f0, 0, r3 -; CHECK-NEXT: xscvhpdp f1, f0 -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_extend64: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: stdu r1, -32(r1) -; SOFT-NEXT: std r0, 48(r1) -; SOFT-NEXT: lhz r3, 0(r3) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __extendsfdf2 -; SOFT-NEXT: nop -; SOFT-NEXT: addi r1, r1, 32 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %val16 = load half, ptr %addr - %val32 = fpext half %val16 to double - ret double %val32 -} -define void @test_trunc32(float %in, ptr %addr) #0 { -; P8-LABEL: test_trunc32: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8-NEXT: stdu r1, -48(r1) -; P8-NEXT: std r0, 64(r1) -; P8-NEXT: mr r30, r4 -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: sth r3, 0(r30) -; P8-NEXT: addi r1, r1, 48 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_trunc32: -; CHECK: # %bb.0: -; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: stxsihx f0, 0, r4 -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_trunc32: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -48(r1) -; SOFT-NEXT: clrldi r3, r3, 32 -; SOFT-NEXT: std r0, 64(r1) -; SOFT-NEXT: mr r30, r4 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 0(r30) -; SOFT-NEXT: addi r1, r1, 48 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %val16 = fptrunc float %in to half - store half %val16, ptr %addr - ret void -} -define void @test_trunc64(double %in, ptr %addr) #0 { -; P8-LABEL: test_trunc64: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8-NEXT: stdu r1, -48(r1) -; P8-NEXT: std r0, 64(r1) -; P8-NEXT: mr r30, r4 -; P8-NEXT: bl __truncdfhf2 -; P8-NEXT: nop -; P8-NEXT: sth r3, 0(r30) -; P8-NEXT: addi r1, r1, 48 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_trunc64: -; CHECK: # %bb.0: -; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: stxsihx f0, 0, r4 -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_trunc64: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -48(r1) -; SOFT-NEXT: std r0, 64(r1) -; SOFT-NEXT: mr r30, r4 -; SOFT-NEXT: bl __truncdfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 0(r30) -; SOFT-NEXT: addi r1, r1, 48 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %val16 = fptrunc double %in to half - store half %val16, ptr %addr - ret void -} -define i64 @test_fptosi_i64(ptr %p) #0 { -; P8-LABEL: test_fptosi_i64: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -32(r1) -; P8-NEXT: std r0, 48(r1) -; P8-NEXT: lhz r3, 0(r3) -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: xscvdpsxds f0, f1 -; P8-NEXT: mffprd r3, f0 -; P8-NEXT: addi r1, r1, 32 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_fptosi_i64: -; CHECK: # %bb.0: -; CHECK-NEXT: lhz r3, 0(r3) -; CHECK-NEXT: mtfprwz f0, r3 -; CHECK-NEXT: xscvhpdp f0, f0 -; CHECK-NEXT: xscvdpsxds f0, f0 -; CHECK-NEXT: mffprd r3, f0 -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_fptosi_i64: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: stdu r1, -32(r1) -; SOFT-NEXT: std r0, 48(r1) -; SOFT-NEXT: lhz r3, 0(r3) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __fixsfdi -; SOFT-NEXT: nop -; SOFT-NEXT: addi r1, r1, 32 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %a = load half, ptr %p, align 2 - %r = fptosi half %a to i64 - ret i64 %r -} -define void @test_sitofp_i64(i64 %a, ptr %p) #0 { -; P8-LABEL: test_sitofp_i64: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8-NEXT: stdu r1, -48(r1) -; P8-NEXT: mtfprd f0, r3 -; P8-NEXT: std r0, 64(r1) -; P8-NEXT: mr r30, r4 -; P8-NEXT: xscvsxdsp f1, f0 -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: sth r3, 0(r30) -; P8-NEXT: addi r1, r1, 48 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_sitofp_i64: -; CHECK: # %bb.0: -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xscvsxdsp f0, f0 -; CHECK-NEXT: xscvdphp f0, f0 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: sth r3, 0(r4) -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_sitofp_i64: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -48(r1) -; SOFT-NEXT: std r0, 64(r1) -; SOFT-NEXT: mr r30, r4 -; SOFT-NEXT: bl __floatdisf -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 32 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 0(r30) -; SOFT-NEXT: addi r1, r1, 48 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %r = sitofp i64 %a to half - store half %r, ptr %p - ret void -} -define i64 @test_fptoui_i64(ptr %p) #0 { -; P8-LABEL: test_fptoui_i64: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -32(r1) -; P8-NEXT: std r0, 48(r1) -; P8-NEXT: lhz r3, 0(r3) -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: xscvdpuxds f0, f1 -; P8-NEXT: mffprd r3, f0 -; P8-NEXT: addi r1, r1, 32 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_fptoui_i64: -; CHECK: # %bb.0: -; CHECK-NEXT: lhz r3, 0(r3) -; CHECK-NEXT: mtfprwz f0, r3 -; CHECK-NEXT: xscvhpdp f0, f0 -; CHECK-NEXT: xscvdpuxds f0, f0 -; CHECK-NEXT: mffprd r3, f0 -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_fptoui_i64: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: stdu r1, -32(r1) -; SOFT-NEXT: std r0, 48(r1) -; SOFT-NEXT: lhz r3, 0(r3) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __fixunssfdi -; SOFT-NEXT: nop -; SOFT-NEXT: addi r1, r1, 32 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %a = load half, ptr %p, align 2 - %r = fptoui half %a to i64 - ret i64 %r -} -define void @test_uitofp_i64(i64 %a, ptr %p) #0 { -; P8-LABEL: test_uitofp_i64: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8-NEXT: stdu r1, -48(r1) -; P8-NEXT: mtfprd f0, r3 -; P8-NEXT: std r0, 64(r1) -; P8-NEXT: mr r30, r4 -; P8-NEXT: xscvuxdsp f1, f0 -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: sth r3, 0(r30) -; P8-NEXT: addi r1, r1, 48 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_uitofp_i64: -; CHECK: # %bb.0: -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xscvuxdsp f0, f0 -; CHECK-NEXT: xscvdphp f0, f0 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: sth r3, 0(r4) -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_uitofp_i64: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -48(r1) -; SOFT-NEXT: std r0, 64(r1) -; SOFT-NEXT: mr r30, r4 -; SOFT-NEXT: bl __floatundisf -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 0(r30) -; SOFT-NEXT: addi r1, r1, 48 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %r = uitofp i64 %a to half - store half %r, ptr %p - ret void -} -define <4 x float> @test_extend32_vec4(ptr %p) #0 { -; P8-LABEL: test_extend32_vec4: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -112(r1) -; P8-NEXT: li r4, 48 -; P8-NEXT: std r0, 128(r1) -; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill -; P8-NEXT: mr r30, r3 -; P8-NEXT: lhz r3, 6(r3) -; P8-NEXT: stxvd2x vs61, r1, r4 # 16-byte Folded Spill -; P8-NEXT: li r4, 64 -; P8-NEXT: stxvd2x vs62, r1, r4 # 16-byte Folded Spill -; P8-NEXT: li r4, 80 -; P8-NEXT: stxvd2x vs63, r1, r4 # 16-byte Folded Spill -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: lhz r3, 2(r30) -; P8-NEXT: xxlor vs63, f1, f1 -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: lhz r3, 4(r30) -; P8-NEXT: xxlor vs62, f1, f1 -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: lhz r3, 0(r30) -; P8-NEXT: xxlor vs61, f1, f1 -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: li r3, 80 -; P8-NEXT: xxmrghd vs0, vs61, vs1 -; P8-NEXT: xxmrghd vs1, vs63, vs62 -; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload -; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload -; P8-NEXT: li r3, 64 -; P8-NEXT: xvcvdpsp vs34, vs0 -; P8-NEXT: xvcvdpsp vs35, vs1 -; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload -; P8-NEXT: li r3, 48 -; P8-NEXT: lxvd2x vs61, r1, r3 # 16-byte Folded Reload -; P8-NEXT: vmrgew v2, v3, v2 -; P8-NEXT: addi r1, r1, 112 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_extend32_vec4: -; CHECK: # %bb.0: -; CHECK-NEXT: lhz r4, 6(r3) -; CHECK-NEXT: mtfprwz f0, r4 -; CHECK-NEXT: xscvhpdp f0, f0 -; CHECK-NEXT: lhz r4, 2(r3) -; CHECK-NEXT: mtfprwz f1, r4 -; CHECK-NEXT: xscvhpdp f1, f1 -; CHECK-NEXT: lhz r4, 4(r3) -; CHECK-NEXT: mtfprwz f2, r4 -; CHECK-NEXT: xscvhpdp f2, f2 -; CHECK-NEXT: lhz r3, 0(r3) -; CHECK-NEXT: xxmrghd vs0, vs0, vs1 -; CHECK-NEXT: mtfprwz f3, r3 -; CHECK-NEXT: xvcvdpsp vs35, vs0 -; CHECK-NEXT: xscvhpdp f3, f3 -; CHECK-NEXT: xxmrghd vs2, vs2, vs3 -; CHECK-NEXT: xvcvdpsp vs34, vs2 -; CHECK-NEXT: vmrgew v2, v3, v2 -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_extend32_vec4: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -80(r1) -; SOFT-NEXT: std r0, 96(r1) -; SOFT-NEXT: mr r30, r3 -; SOFT-NEXT: lhz r3, 0(r3) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r29, r3 -; SOFT-NEXT: lhz r3, 2(r30) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r28, r3 -; SOFT-NEXT: lhz r3, 4(r30) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r27, r3 -; SOFT-NEXT: lhz r3, 6(r30) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r6, r3 -; SOFT-NEXT: mr r3, r29 -; SOFT-NEXT: mr r4, r28 -; SOFT-NEXT: mr r5, r27 -; SOFT-NEXT: addi r1, r1, 80 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload -; SOFT-NEXT: blr - %a = load <4 x half>, ptr %p, align 8 - %b = fpext <4 x half> %a to <4 x float> - ret <4 x float> %b -} -define <4 x double> @test_extend64_vec4(ptr %p) #0 { -; P8-LABEL: test_extend64_vec4: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -112(r1) -; P8-NEXT: li r4, 48 -; P8-NEXT: std r0, 128(r1) -; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill -; P8-NEXT: mr r30, r3 -; P8-NEXT: lhz r3, 6(r3) -; P8-NEXT: stxvd2x vs61, r1, r4 # 16-byte Folded Spill -; P8-NEXT: li r4, 64 -; P8-NEXT: stxvd2x vs62, r1, r4 # 16-byte Folded Spill -; P8-NEXT: li r4, 80 -; P8-NEXT: stxvd2x vs63, r1, r4 # 16-byte Folded Spill -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: lhz r3, 4(r30) -; P8-NEXT: xxlor vs63, f1, f1 -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: lhz r3, 2(r30) -; P8-NEXT: xxlor vs62, f1, f1 -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: lhz r3, 0(r30) -; P8-NEXT: xxlor vs61, f1, f1 -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: li r3, 80 -; P8-NEXT: xxmrghd vs35, vs63, vs62 -; P8-NEXT: xxmrghd vs34, vs61, vs1 -; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload -; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload -; P8-NEXT: li r3, 64 -; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload -; P8-NEXT: li r3, 48 -; P8-NEXT: lxvd2x vs61, r1, r3 # 16-byte Folded Reload -; P8-NEXT: addi r1, r1, 112 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_extend64_vec4: -; CHECK: # %bb.0: -; CHECK-NEXT: lhz r4, 6(r3) -; CHECK-NEXT: lhz r5, 4(r3) -; CHECK-NEXT: lhz r6, 2(r3) -; CHECK-NEXT: lhz r3, 0(r3) -; CHECK-NEXT: mtfprwz f0, r3 -; CHECK-NEXT: mtfprwz f1, r6 -; CHECK-NEXT: xscvhpdp f0, f0 -; CHECK-NEXT: xscvhpdp f1, f1 -; CHECK-NEXT: xxmrghd vs34, vs1, vs0 -; CHECK-NEXT: mtfprwz f0, r5 -; CHECK-NEXT: mtfprwz f1, r4 -; CHECK-NEXT: xscvhpdp f0, f0 -; CHECK-NEXT: xscvhpdp f1, f1 -; CHECK-NEXT: xxmrghd vs35, vs1, vs0 -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_extend64_vec4: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -80(r1) -; SOFT-NEXT: std r0, 96(r1) -; SOFT-NEXT: mr r30, r3 -; SOFT-NEXT: lhz r3, 0(r3) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __extendsfdf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r29, r3 -; SOFT-NEXT: lhz r3, 2(r30) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __extendsfdf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r28, r3 -; SOFT-NEXT: lhz r3, 4(r30) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __extendsfdf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r27, r3 -; SOFT-NEXT: lhz r3, 6(r30) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __extendsfdf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r6, r3 -; SOFT-NEXT: mr r3, r29 -; SOFT-NEXT: mr r4, r28 -; SOFT-NEXT: mr r5, r27 -; SOFT-NEXT: addi r1, r1, 80 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload -; SOFT-NEXT: blr - %a = load <4 x half>, ptr %p, align 8 - %b = fpext <4 x half> %a to <4 x double> - ret <4 x double> %b -} -define void @test_trunc32_vec4(<4 x float> %a, ptr %p) #0 { -; P8-LABEL: test_trunc32_vec4: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -112(r1) -; P8-NEXT: xxsldwi vs0, vs34, vs34, 3 -; P8-NEXT: li r3, 48 -; P8-NEXT: std r0, 128(r1) -; P8-NEXT: std r27, 72(r1) # 8-byte Folded Spill -; P8-NEXT: std r28, 80(r1) # 8-byte Folded Spill -; P8-NEXT: std r29, 88(r1) # 8-byte Folded Spill -; P8-NEXT: xscvspdpn f1, vs0 -; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill -; P8-NEXT: stxvd2x vs63, r1, r3 # 16-byte Folded Spill -; P8-NEXT: mr r30, r5 -; P8-NEXT: vmr v31, v2 -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: xxswapd vs0, vs63 -; P8-NEXT: mr r29, r3 -; P8-NEXT: xscvspdpn f1, vs0 -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: xxsldwi vs0, vs63, vs63, 1 -; P8-NEXT: mr r28, r3 -; P8-NEXT: xscvspdpn f1, vs0 -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: xscvspdpn f1, vs63 -; P8-NEXT: mr r27, r3 -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: sth r3, 6(r30) -; P8-NEXT: li r3, 48 -; P8-NEXT: sth r27, 4(r30) -; P8-NEXT: ld r27, 72(r1) # 8-byte Folded Reload -; P8-NEXT: sth r28, 2(r30) -; P8-NEXT: sth r29, 0(r30) -; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload -; P8-NEXT: ld r29, 88(r1) # 8-byte Folded Reload -; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload -; P8-NEXT: ld r28, 80(r1) # 8-byte Folded Reload -; P8-NEXT: addi r1, r1, 112 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_trunc32_vec4: -; CHECK: # %bb.0: -; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-NEXT: xxsldwi vs1, vs34, vs34, 1 -; CHECK-NEXT: xscvspdpn f0, vs0 -; CHECK-NEXT: xscvspdpn f1, vs1 -; CHECK-NEXT: xscvdphp f0, f0 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: xxswapd vs0, vs34 -; CHECK-NEXT: xscvspdpn f0, vs0 -; CHECK-NEXT: xscvdphp f0, f0 -; CHECK-NEXT: xscvdphp f1, f1 -; CHECK-NEXT: mffprwz r4, f1 -; CHECK-NEXT: xscvspdpn f1, vs34 -; CHECK-NEXT: xscvdphp f1, f1 -; CHECK-NEXT: sth r4, 4(r5) -; CHECK-NEXT: mffprwz r4, f0 -; CHECK-NEXT: sth r3, 0(r5) -; CHECK-NEXT: sth r4, 2(r5) -; CHECK-NEXT: mffprwz r6, f1 -; CHECK-NEXT: sth r6, 6(r5) -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_trunc32_vec4: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r26, -48(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -80(r1) -; SOFT-NEXT: mr r27, r3 -; SOFT-NEXT: clrldi r3, r6, 32 -; SOFT-NEXT: std r0, 96(r1) -; SOFT-NEXT: mr r30, r7 -; SOFT-NEXT: mr r29, r5 -; SOFT-NEXT: mr r28, r4 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r26, r3 -; SOFT-NEXT: clrldi r3, r29, 32 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r29, r3 -; SOFT-NEXT: clrldi r3, r28, 32 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r28, r3 -; SOFT-NEXT: clrldi r3, r27, 32 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r27, r3 -; SOFT-NEXT: clrldi r3, r28, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r28, r3 -; SOFT-NEXT: clrldi r3, r29, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r29, r3 -; SOFT-NEXT: clrldi r3, r26, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 6(r30) -; SOFT-NEXT: mr r3, r29 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 4(r30) -; SOFT-NEXT: mr r3, r28 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 2(r30) -; SOFT-NEXT: mr r3, r27 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 0(r30) -; SOFT-NEXT: addi r1, r1, 80 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r26, -48(r1) # 8-byte Folded Reload -; SOFT-NEXT: blr - %v = fptrunc <4 x float> %a to <4 x half> - store <4 x half> %v, ptr %p - ret void -} -define void @test_trunc64_vec4(<4 x double> %a, ptr %p) #0 { -; P8-LABEL: test_trunc64_vec4: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -128(r1) -; P8-NEXT: li r3, 48 -; P8-NEXT: std r0, 144(r1) -; P8-NEXT: xxswapd vs1, vs34 -; P8-NEXT: std r27, 88(r1) # 8-byte Folded Spill -; P8-NEXT: std r28, 96(r1) # 8-byte Folded Spill -; P8-NEXT: std r29, 104(r1) # 8-byte Folded Spill -; P8-NEXT: std r30, 112(r1) # 8-byte Folded Spill -; P8-NEXT: mr r30, r7 -; P8-NEXT: stxvd2x vs62, r1, r3 # 16-byte Folded Spill -; P8-NEXT: li r3, 64 -; P8-NEXT: vmr v30, v2 -; P8-NEXT: stxvd2x vs63, r1, r3 # 16-byte Folded Spill -; P8-NEXT: vmr v31, v3 -; P8-NEXT: bl __truncdfhf2 -; P8-NEXT: nop -; P8-NEXT: xxswapd vs1, vs63 -; P8-NEXT: mr r29, r3 -; P8-NEXT: bl __truncdfhf2 -; P8-NEXT: nop -; P8-NEXT: xxlor f1, vs62, vs62 -; P8-NEXT: mr r28, r3 -; P8-NEXT: bl __truncdfhf2 -; P8-NEXT: nop -; P8-NEXT: xxlor f1, vs63, vs63 -; P8-NEXT: mr r27, r3 -; P8-NEXT: bl __truncdfhf2 -; P8-NEXT: nop -; P8-NEXT: sth r3, 6(r30) -; P8-NEXT: li r3, 64 -; P8-NEXT: sth r27, 2(r30) -; P8-NEXT: ld r27, 88(r1) # 8-byte Folded Reload -; P8-NEXT: sth r28, 4(r30) -; P8-NEXT: sth r29, 0(r30) -; P8-NEXT: ld r30, 112(r1) # 8-byte Folded Reload -; P8-NEXT: ld r29, 104(r1) # 8-byte Folded Reload -; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload -; P8-NEXT: li r3, 48 -; P8-NEXT: ld r28, 96(r1) # 8-byte Folded Reload -; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload -; P8-NEXT: addi r1, r1, 128 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_trunc64_vec4: -; CHECK: # %bb.0: -; CHECK-NEXT: xxswapd vs0, vs34 -; CHECK-NEXT: xscvdphp f0, f0 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: xxswapd vs0, vs35 -; CHECK-NEXT: xscvdphp f0, f0 -; CHECK-NEXT: xscvdphp f1, vs34 -; CHECK-NEXT: mffprwz r4, f1 -; CHECK-NEXT: xscvdphp f1, vs35 -; CHECK-NEXT: sth r3, 0(r7) -; CHECK-NEXT: sth r4, 2(r7) -; CHECK-NEXT: mffprwz r4, f0 -; CHECK-NEXT: sth r4, 4(r7) -; CHECK-NEXT: mffprwz r5, f1 -; CHECK-NEXT: sth r5, 6(r7) -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_trunc64_vec4: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r26, -48(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -80(r1) -; SOFT-NEXT: mr r27, r3 -; SOFT-NEXT: mr r3, r6 -; SOFT-NEXT: std r0, 96(r1) -; SOFT-NEXT: mr r30, r7 -; SOFT-NEXT: mr r29, r5 -; SOFT-NEXT: mr r28, r4 -; SOFT-NEXT: bl __truncdfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r26, r3 -; SOFT-NEXT: mr r3, r29 -; SOFT-NEXT: bl __truncdfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r29, r3 -; SOFT-NEXT: mr r3, r28 -; SOFT-NEXT: bl __truncdfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r28, r3 -; SOFT-NEXT: mr r3, r27 -; SOFT-NEXT: bl __truncdfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r27, r3 -; SOFT-NEXT: clrldi r3, r28, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r28, r3 -; SOFT-NEXT: clrldi r3, r29, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r29, r3 -; SOFT-NEXT: clrldi r3, r26, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 6(r30) -; SOFT-NEXT: mr r3, r29 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 4(r30) -; SOFT-NEXT: mr r3, r28 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 2(r30) -; SOFT-NEXT: mr r3, r27 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 0(r30) -; SOFT-NEXT: addi r1, r1, 80 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r26, -48(r1) # 8-byte Folded Reload -; SOFT-NEXT: blr - %v = fptrunc <4 x double> %a to <4 x half> - store <4 x half> %v, ptr %p - ret void -} -define float @test_sitofp_fadd_i32(i32 %a, ptr %b) #0 { -; P8-LABEL: test_sitofp_fadd_i32: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: std r30, -24(r1) # 8-byte Folded Spill -; P8-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; P8-NEXT: stdu r1, -64(r1) -; P8-NEXT: std r0, 80(r1) -; P8-NEXT: mr r30, r3 -; P8-NEXT: lhz r3, 0(r4) -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: mtfprwa f0, r30 -; P8-NEXT: fmr f31, f1 -; P8-NEXT: xscvsxdsp f1, f0 -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: clrldi r3, r3, 48 -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: xsaddsp f1, f31, f1 -; P8-NEXT: addi r1, r1, 64 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; P8-NEXT: ld r30, -24(r1) # 8-byte Folded Reload -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_sitofp_fadd_i32: -; CHECK: # %bb.0: -; CHECK-NEXT: mtfprwa f1, r3 -; CHECK-NEXT: lhz r4, 0(r4) -; CHECK-NEXT: xscvsxdsp f1, f1 -; CHECK-NEXT: mtfprwz f0, r4 -; CHECK-NEXT: xscvhpdp f0, f0 -; CHECK-NEXT: xscvdphp f1, f1 -; CHECK-NEXT: mffprwz r3, f1 -; CHECK-NEXT: clrlwi r3, r3, 16 -; CHECK-NEXT: mtfprwz f1, r3 -; CHECK-NEXT: xscvhpdp f1, f1 -; CHECK-NEXT: xsaddsp f1, f0, f1 -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_sitofp_fadd_i32: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -64(r1) -; SOFT-NEXT: std r0, 80(r1) -; SOFT-NEXT: mr r30, r3 -; SOFT-NEXT: lhz r3, 0(r4) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r29, r3 -; SOFT-NEXT: extsw r3, r30 -; SOFT-NEXT: bl __floatsisf -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 32 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r4, r3 -; SOFT-NEXT: mr r3, r29 -; SOFT-NEXT: bl __addsf3 -; SOFT-NEXT: nop -; SOFT-NEXT: addi r1, r1, 64 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %tmp0 = load half, ptr %b - %tmp1 = sitofp i32 %a to half - %tmp2 = fadd half %tmp0, %tmp1 - %tmp3 = fpext half %tmp2 to float - ret float %tmp3 -} -define half @PR40273(half) #0 { -; P8-LABEL: PR40273: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -32(r1) -; P8-NEXT: std r0, 48(r1) -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: clrldi r3, r3, 48 -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: fmr f0, f1 -; P8-NEXT: xxlxor f1, f1, f1 -; P8-NEXT: fcmpu cr0, f0, f1 -; P8-NEXT: beq cr0, .LBB20_2 -; P8-NEXT: # %bb.1: -; P8-NEXT: vspltisw v2, 1 -; P8-NEXT: xvcvsxwdp vs1, vs34 -; P8-NEXT: .LBB20_2: -; P8-NEXT: addi r1, r1, 32 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: PR40273: -; CHECK: # %bb.0: -; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: xxlxor f1, f1, f1 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrlwi r3, r3, 16 -; CHECK-NEXT: mtfprwz f0, r3 -; CHECK-NEXT: xscvhpdp f0, f0 -; CHECK-NEXT: fcmpu cr0, f0, f1 -; CHECK-NEXT: beqlr cr0 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vspltisw v2, 1 -; CHECK-NEXT: xvcvsxwdp vs1, vs34 -; CHECK-NEXT: blr -; -; SOFT-LABEL: PR40273: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: stdu r1, -32(r1) -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: std r0, 48(r1) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: li r4, 0 -; SOFT-NEXT: bl __nesf2 -; SOFT-NEXT: nop -; SOFT-NEXT: cmplwi r3, 0 -; SOFT-NEXT: lis r3, 16256 -; SOFT-NEXT: iseleq r3, 0, r3 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: addi r1, r1, 32 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %2 = fcmp une half %0, 0xH0000 - %3 = uitofp i1 %2 to half - ret half %3 -} -attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll b/llvm/test/CodeGen/PowerPC/ldexp.ll index 8d7253b5ce8e3..3c8439683cba5 100644 --- a/llvm/test/CodeGen/PowerPC/ldexp.ll +++ b/llvm/test/CodeGen/PowerPC/ldexp.ll @@ -143,15 +143,15 @@ define half @ldexp_f16(half %arg0, i32 %arg1) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: clrlwi r3, r3, 16 ; CHECK-NEXT: std r0, 48(r1) -; CHECK-NEXT: xscvdphp f0, f1 ; CHECK-NEXT: extsw r4, r4 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrlwi r3, r3, 16 ; CHECK-NEXT: mtfprwz f0, r3 ; CHECK-NEXT: xscvhpdp f1, f0 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: mffprwz r3, f0 ; CHECK-NEXT: addi r1, r1, 32 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 diff --git a/llvm/test/CodeGen/PowerPC/llvm.frexp.ll b/llvm/test/CodeGen/PowerPC/llvm.frexp.ll index 95d763d7179ed..b0f9fd47a1e54 100644 --- a/llvm/test/CodeGen/PowerPC/llvm.frexp.ll +++ b/llvm/test/CodeGen/PowerPC/llvm.frexp.ll @@ -7,16 +7,16 @@ define { half, i32 } @test_frexp_f16_i32(half %a) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: clrlwi r3, r3, 16 ; CHECK-NEXT: std r0, 64(r1) -; CHECK-NEXT: xscvdphp f0, f1 ; CHECK-NEXT: addi r4, r1, 44 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrlwi r3, r3, 16 ; CHECK-NEXT: mtfprwz f0, r3 ; CHECK-NEXT: xscvhpdp f1, f0 ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: nop -; CHECK-NEXT: lwz r3, 44(r1) +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: lwz r4, 44(r1) +; CHECK-NEXT: mffprwz r3, f0 ; CHECK-NEXT: addi r1, r1, 48 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -30,15 +30,15 @@ define half @test_frexp_f16_i32_only_use_fract(half %a) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: clrlwi r3, r3, 16 ; CHECK-NEXT: std r0, 64(r1) -; CHECK-NEXT: xscvdphp f0, f1 ; CHECK-NEXT: addi r4, r1, 44 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrlwi r3, r3, 16 ; CHECK-NEXT: mtfprwz f0, r3 ; CHECK-NEXT: xscvhpdp f1, f0 ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: nop +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: mffprwz r3, f0 ; CHECK-NEXT: addi r1, r1, 48 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -53,11 +53,9 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: clrlwi r3, r3, 16 ; CHECK-NEXT: std r0, 64(r1) -; CHECK-NEXT: xscvdphp f0, f1 ; CHECK-NEXT: addi r4, r1, 44 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrlwi r3, r3, 16 ; CHECK-NEXT: mtfprwz f0, r3 ; CHECK-NEXT: xscvhpdp f1, f0 ; CHECK-NEXT: bl frexpf @@ -76,43 +74,42 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) nounwind ; CHECK-LABEL: test_frexp_v2f16_v2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: std r29, -40(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, -32(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: stdu r1, -80(r1) +; CHECK-NEXT: clrlwi r29, r3, 16 +; CHECK-NEXT: clrlwi r3, r4, 16 +; CHECK-NEXT: addi r30, r1, 44 +; CHECK-NEXT: mtfprwz f0, r3 ; CHECK-NEXT: std r0, 96(r1) -; CHECK-NEXT: xscvdphp f0, f2 -; CHECK-NEXT: addi r30, r1, 32 ; CHECK-NEXT: mr r4, r30 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrlwi r3, r3, 16 -; CHECK-NEXT: mtfprwz f0, r3 -; CHECK-NEXT: xscvhpdp f31, f0 -; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrlwi r3, r3, 16 -; CHECK-NEXT: mtfprwz f0, r3 ; CHECK-NEXT: xscvhpdp f1, f0 ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: nop -; CHECK-NEXT: addi r29, r1, 36 -; CHECK-NEXT: fmr f30, f1 -; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: mtfprwz f0, r29 +; CHECK-NEXT: addi r29, r1, 40 +; CHECK-NEXT: sth r3, 50(r1) +; CHECK-NEXT: xscvhpdp f1, f0 ; CHECK-NEXT: mr r4, r29 ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f2, f1 -; CHECK-NEXT: lfiwzx f0, 0, r30 -; CHECK-NEXT: lfiwzx f1, 0, r29 +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: li r4, 2 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: sth r3, 48(r1) +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: lxv v3, 48(r1) +; CHECK-NEXT: lfiwzx f0, 0, r29 +; CHECK-NEXT: lfiwzx f1, 0, r30 ; CHECK-NEXT: xxmrghw v2, vs1, vs0 -; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: vextuhrx r3, r3, v3 +; CHECK-NEXT: vextuhrx r4, r4, v3 ; CHECK-NEXT: addi r1, r1, 80 ; CHECK-NEXT: ld r0, 16(r1) -; CHECK-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r30, -32(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, -40(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a) @@ -123,34 +120,35 @@ define <2 x half> @test_frexp_v2f16_v2i32_only_use_fract(<2 x half> %a) nounwind ; CHECK-LABEL: test_frexp_v2f16_v2i32_only_use_fract: ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; CHECK-NEXT: stdu r1, -64(r1) -; CHECK-NEXT: std r0, 80(r1) -; CHECK-NEXT: xscvdphp f0, f2 -; CHECK-NEXT: addi r4, r1, 40 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrlwi r3, r3, 16 -; CHECK-NEXT: mtfprwz f0, r3 -; CHECK-NEXT: xscvhpdp f31, f0 -; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: stdu r1, -80(r1) +; CHECK-NEXT: clrlwi r30, r3, 16 +; CHECK-NEXT: clrlwi r3, r4, 16 +; CHECK-NEXT: addi r4, r1, 44 ; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: std r0, 96(r1) ; CHECK-NEXT: xscvhpdp f1, f0 ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: nop -; CHECK-NEXT: addi r4, r1, 44 -; CHECK-NEXT: fmr f30, f1 -; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: addi r4, r1, 40 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: mtfprwz f0, r30 +; CHECK-NEXT: sth r3, 50(r1) +; CHECK-NEXT: xscvhpdp f1, f0 ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f2, f1 -; CHECK-NEXT: fmr f1, f30 -; CHECK-NEXT: addi r1, r1, 64 +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: li r4, 2 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: sth r3, 48(r1) +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: lxv v2, 48(r1) +; CHECK-NEXT: vextuhrx r3, r3, v2 +; CHECK-NEXT: vextuhrx r4, r4, v2 +; CHECK-NEXT: addi r1, r1, 80 ; CHECK-NEXT: ld r0, 16(r1) -; CHECK-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a) @@ -162,38 +160,31 @@ define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) nounwind { ; CHECK-LABEL: test_frexp_v2f16_v2i32_only_use_exp: ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: std r29, -32(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, -24(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; CHECK-NEXT: stdu r1, -80(r1) -; CHECK-NEXT: std r0, 96(r1) -; CHECK-NEXT: xscvdphp f0, f2 -; CHECK-NEXT: addi r30, r1, 40 -; CHECK-NEXT: mr r4, r30 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrlwi r3, r3, 16 -; CHECK-NEXT: mtfprwz f0, r3 -; CHECK-NEXT: xscvhpdp f31, f0 -; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: stdu r1, -64(r1) ; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: std r0, 80(r1) +; CHECK-NEXT: addi r30, r1, 32 ; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: clrlwi r29, r4, 16 +; CHECK-NEXT: mr r4, r30 ; CHECK-NEXT: xscvhpdp f1, f0 ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: nop -; CHECK-NEXT: addi r29, r1, 44 -; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mtfprwz f0, r29 +; CHECK-NEXT: addi r29, r1, 36 +; CHECK-NEXT: xscvhpdp f1, f0 ; CHECK-NEXT: mr r4, r29 ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: nop ; CHECK-NEXT: lfiwzx f0, 0, r30 ; CHECK-NEXT: lfiwzx f1, 0, r29 ; CHECK-NEXT: xxmrghw v2, vs1, vs0 -; CHECK-NEXT: addi r1, r1, 80 +; CHECK-NEXT: addi r1, r1, 64 ; CHECK-NEXT: ld r0, 16(r1) -; CHECK-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r30, -24(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, -32(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a) diff --git a/llvm/test/CodeGen/PowerPC/llvm.modf.ll b/llvm/test/CodeGen/PowerPC/llvm.modf.ll index 1b137c786cc91..fa9082278826c 100644 --- a/llvm/test/CodeGen/PowerPC/llvm.modf.ll +++ b/llvm/test/CodeGen/PowerPC/llvm.modf.ll @@ -10,15 +10,17 @@ define { half, half } @test_modf_f16(half %a) { ; CHECK-NEXT: std r0, 64(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: addi r4, r1, 44 -; CHECK-NEXT: mffprwz r3, f0 ; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: addi r4, r1, 44 ; CHECK-NEXT: mtfprwz f0, r3 ; CHECK-NEXT: xscvhpdp f1, f0 ; CHECK-NEXT: bl modff ; CHECK-NEXT: nop -; CHECK-NEXT: lfs f2, 44(r1) +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: lfs f0, 44(r1) +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: mffprwz r4, f0 ; CHECK-NEXT: addi r1, r1, 48 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -35,14 +37,14 @@ define half @test_modf_f16_only_use_fractional_part(half %a) { ; CHECK-NEXT: std r0, 64(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: addi r4, r1, 44 -; CHECK-NEXT: mffprwz r3, f0 ; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: addi r4, r1, 44 ; CHECK-NEXT: mtfprwz f0, r3 ; CHECK-NEXT: xscvhpdp f1, f0 ; CHECK-NEXT: bl modff ; CHECK-NEXT: nop +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: mffprwz r3, f0 ; CHECK-NEXT: addi r1, r1, 48 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -60,15 +62,15 @@ define half @test_modf_f16_only_use_integral_part(half %a) { ; CHECK-NEXT: std r0, 64(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: addi r4, r1, 44 -; CHECK-NEXT: mffprwz r3, f0 ; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: addi r4, r1, 44 ; CHECK-NEXT: mtfprwz f0, r3 ; CHECK-NEXT: xscvhpdp f1, f0 ; CHECK-NEXT: bl modff ; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 44(r1) +; CHECK-NEXT: lfs f0, 44(r1) +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: mffprwz r3, f0 ; CHECK-NEXT: addi r1, r1, 48 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -82,40 +84,53 @@ define { <2 x half>, <2 x half> } @test_modf_v2f16(<2 x half> %a) { ; CHECK-LABEL: test_modf_v2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_def_cfa_offset 96 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset f30, -16 +; CHECK-NEXT: .cfi_offset r30, -24 ; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -24(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; CHECK-NEXT: stdu r1, -64(r1) -; CHECK-NEXT: std r0, 80(r1) -; CHECK-NEXT: xscvdphp f0, f2 -; CHECK-NEXT: addi r4, r1, 40 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrlwi r3, r3, 16 -; CHECK-NEXT: mtfprwz f0, r3 -; CHECK-NEXT: xscvhpdp f31, f0 -; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: stdu r1, -96(r1) +; CHECK-NEXT: clrlwi r30, r3, 16 +; CHECK-NEXT: clrlwi r3, r4, 16 +; CHECK-NEXT: addi r4, r1, 44 ; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: std r0, 112(r1) ; CHECK-NEXT: xscvhpdp f1, f0 ; CHECK-NEXT: bl modff ; CHECK-NEXT: nop -; CHECK-NEXT: addi r4, r1, 44 -; CHECK-NEXT: fmr f30, f1 -; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: lfs f0, 44(r1) +; CHECK-NEXT: addi r4, r1, 40 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: fmr f31, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: mtfprwz f0, r30 +; CHECK-NEXT: sth r3, 50(r1) +; CHECK-NEXT: xscvhpdp f1, f0 ; CHECK-NEXT: bl modff ; CHECK-NEXT: nop -; CHECK-NEXT: lfs f3, 40(r1) -; CHECK-NEXT: fmr f2, f1 -; CHECK-NEXT: fmr f1, f30 -; CHECK-NEXT: lfs f4, 44(r1) -; CHECK-NEXT: addi r1, r1, 64 +; CHECK-NEXT: lfs f0, 40(r1) +; CHECK-NEXT: li r5, 0 +; CHECK-NEXT: li r6, 2 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: sth r3, 48(r1) +; CHECK-NEXT: xscvdphp f0, f31 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: sth r3, 66(r1) +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: lxv v2, 48(r1) +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: sth r3, 64(r1) +; CHECK-NEXT: lxv v3, 64(r1) +; CHECK-NEXT: vextuhrx r3, r5, v3 +; CHECK-NEXT: vextuhrx r4, r6, v3 +; CHECK-NEXT: vextuhrx r5, r5, v2 +; CHECK-NEXT: vextuhrx r6, r6, v2 +; CHECK-NEXT: addi r1, r1, 96 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr %result = call { <2 x half>, <2 x half> } @llvm.modf.v2f16(<2 x half> %a) diff --git a/llvm/test/CodeGen/PowerPC/pr48519.ll b/llvm/test/CodeGen/PowerPC/pr48519.ll index b610f12159ee2..61a8ebe49e6dd 100644 --- a/llvm/test/CodeGen/PowerPC/pr48519.ll +++ b/llvm/test/CodeGen/PowerPC/pr48519.ll @@ -12,26 +12,21 @@ define void @julia__typed_vcat_20() #0 { ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: stdu r1, -48(r1) ; CHECK-NEXT: li r30, 0 -; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: li r4, 1 ; CHECK-NEXT: std r0, 64(r1) ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: # %bb3 ; CHECK-NEXT: # -; CHECK-NEXT: addi r3, r3, -1 +; CHECK-NEXT: addi r3, r4, -1 ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: xscvsxdsp f1, f0 ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r3, 48 -; CHECK-NEXT: bl __extendhfsf2 -; CHECK-NEXT: nop ; CHECK-NEXT: addi r30, r30, -1 -; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: li r4, 0 ; CHECK-NEXT: cmpldi r30, 0 ; CHECK-NEXT: bc 12, gt, .LBB0_1 ; CHECK-NEXT: # %bb.2: # %bb11 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop ; CHECK-NEXT: sth r3, 128(0) ; ; CHECK-P9-LABEL: julia__typed_vcat_20: @@ -39,23 +34,18 @@ define void @julia__typed_vcat_20() #0 { ; CHECK-P9-NEXT: li r3, 0 ; CHECK-P9-NEXT: mtctr r3 ; CHECK-P9-NEXT: li r3, 1 -; CHECK-P9-NEXT: .p2align 4 +; CHECK-P9-NEXT: .p2align 5 ; CHECK-P9-NEXT: .LBB0_1: # %bb3 ; CHECK-P9-NEXT: # ; CHECK-P9-NEXT: addi r3, r3, -1 ; CHECK-P9-NEXT: mtfprd f0, r3 +; CHECK-P9-NEXT: li r3, 0 ; CHECK-P9-NEXT: xscvsxdsp f0, f0 ; CHECK-P9-NEXT: xscvdphp f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrlwi r3, r3, 16 -; CHECK-P9-NEXT: mtfprwz f0, r3 -; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: xscvhpdp f0, f0 ; CHECK-P9-NEXT: bdnz .LBB0_1 ; CHECK-P9-NEXT: # %bb.2: # %bb11 -; CHECK-P9-NEXT: xscvdphp f0, f0 -; CHECK-P9-NEXT: li r3, 128 -; CHECK-P9-NEXT: stxsihx f0, 0, r3 +; CHECK-P9-NEXT: mffprwz r3, f0 +; CHECK-P9-NEXT: sth r3, 128(0) bb: %i = load i64, ptr addrspace(11) null, align 8 %i1 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %i, i64 0) @@ -147,54 +137,33 @@ bb9: ; preds = %bb3, %bb1 define void @func_48786() #0 { ; CHECK-LABEL: func_48786: ; CHECK: # %bb.0: # %bb -; CHECK-NEXT: mfocrf r12, 32 -; CHECK-NEXT: stw r12, 8(r1) -; CHECK-NEXT: mflr r0 -; CHECK-NEXT: stdu r1, -48(r1) -; CHECK-NEXT: std r0, 64(r1) -; CHECK-NEXT: std r30, 32(r1) # 8-byte Folded Spill -; CHECK-NEXT: # implicit-def: $x30 ; CHECK-NEXT: ld r3, 0(r3) ; CHECK-NEXT: cmpdi r3, 0 -; CHECK-NEXT: crnot 4*cr2+lt, eq +; CHECK-NEXT: mtctr r3 +; CHECK-NEXT: crnot 4*cr5+lt, eq ; CHECK-NEXT: b .LBB2_2 -; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB2_1: # %bb10 ; CHECK-NEXT: # -; CHECK-NEXT: addi r30, r30, -1 -; CHECK-NEXT: cmpldi r30, 0 -; CHECK-NEXT: bc 4, gt, .LBB2_5 +; CHECK-NEXT: bdzlr ; CHECK-NEXT: .LBB2_2: # %bb2 ; CHECK-NEXT: # ; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB2_1 ; CHECK-NEXT: # %bb.3: # %bb4 ; CHECK-NEXT: # -; CHECK-NEXT: lhz r3, 0(r3) -; CHECK-NEXT: bl __extendhfsf2 -; CHECK-NEXT: nop -; CHECK-NEXT: bc 4, 4*cr2+lt, .LBB2_6 +; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB2_5 ; CHECK-NEXT: # %bb.4: # %bb8 ; CHECK-NEXT: # -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop +; CHECK-NEXT: lhz r3, 0(r3) ; CHECK-NEXT: sth r3, 0(0) ; CHECK-NEXT: b .LBB2_1 -; CHECK-NEXT: .LBB2_5: # %bb14 -; CHECK-NEXT: ld r30, 32(r1) # 8-byte Folded Reload -; CHECK-NEXT: addi r1, r1, 48 -; CHECK-NEXT: ld r0, 16(r1) -; CHECK-NEXT: lwz r12, 8(r1) -; CHECK-NEXT: mtlr r0 -; CHECK-NEXT: mtocrf 32, r12 -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB2_6: # %bb15 +; CHECK-NEXT: .LBB2_5: # %bb15 ; ; CHECK-P9-LABEL: func_48786: ; CHECK-P9: # %bb.0: # %bb ; CHECK-P9-NEXT: ld r3, 0(r3) ; CHECK-P9-NEXT: cmpdi r3, 0 ; CHECK-P9-NEXT: mtctr r3 -; CHECK-P9-NEXT: li r3, 0 ; CHECK-P9-NEXT: crnot 4*cr5+lt, eq ; CHECK-P9-NEXT: b .LBB2_2 ; CHECK-P9-NEXT: .p2align 5 @@ -206,13 +175,11 @@ define void @func_48786() #0 { ; CHECK-P9-NEXT: bc 12, 4*cr5+lt, .LBB2_1 ; CHECK-P9-NEXT: # %bb.3: # %bb4 ; CHECK-P9-NEXT: # -; CHECK-P9-NEXT: lxsihzx f0, 0, r3 -; CHECK-P9-NEXT: xscvhpdp f0, f0 ; CHECK-P9-NEXT: bc 4, 4*cr5+lt, .LBB2_5 ; CHECK-P9-NEXT: # %bb.4: # %bb8 ; CHECK-P9-NEXT: # -; CHECK-P9-NEXT: xscvdphp f0, f0 -; CHECK-P9-NEXT: stxsihx f0, 0, r3 +; CHECK-P9-NEXT: lhz r3, 0(r3) +; CHECK-P9-NEXT: sth r3, 0(0) ; CHECK-P9-NEXT: b .LBB2_1 ; CHECK-P9-NEXT: .LBB2_5: # %bb15 bb: @@ -260,41 +227,29 @@ bb15: ; preds = %bb5 define void @func_48785(half %arg) #0 { ; CHECK-LABEL: func_48785: ; CHECK: # %bb.0: # %bb -; CHECK-NEXT: mflr r0 -; CHECK-NEXT: std r29, -32(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, -24(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; CHECK-NEXT: stdu r1, -64(r1) -; CHECK-NEXT: fmr f31, f1 -; CHECK-NEXT: li r3, 1 -; CHECK-NEXT: li r29, 0 -; CHECK-NEXT: std r0, 80(r1) -; CHECK-NEXT: rldic r30, r3, 62, 1 -; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: li r4, 1 +; CHECK-NEXT: rldic r4, r4, 62, 1 +; CHECK-NEXT: mtctr r4 +; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB3_1: # %bb1 ; CHECK-NEXT: # -; CHECK-NEXT: fmr f1, f31 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: addi r30, r30, -1 -; CHECK-NEXT: sth r3, 0(r29) -; CHECK-NEXT: addi r29, r29, 24 -; CHECK-NEXT: cmpldi r30, 0 -; CHECK-NEXT: bc 12, gt, .LBB3_1 +; CHECK-NEXT: sth r3, 0(r4) +; CHECK-NEXT: addi r4, r4, 24 +; CHECK-NEXT: bdnz .LBB3_1 ; CHECK-NEXT: # %bb.2: # %bb5 ; ; CHECK-P9-LABEL: func_48785: ; CHECK-P9: # %bb.0: # %bb -; CHECK-P9-NEXT: li r3, 1 -; CHECK-P9-NEXT: rldic r3, r3, 62, 1 -; CHECK-P9-NEXT: mtctr r3 -; CHECK-P9-NEXT: li r3, 0 +; CHECK-P9-NEXT: li r4, 1 +; CHECK-P9-NEXT: rldic r4, r4, 62, 1 +; CHECK-P9-NEXT: mtctr r4 +; CHECK-P9-NEXT: li r4, 0 ; CHECK-P9-NEXT: .p2align 4 ; CHECK-P9-NEXT: .LBB3_1: # %bb1 ; CHECK-P9-NEXT: # -; CHECK-P9-NEXT: xscvdphp f0, f1 -; CHECK-P9-NEXT: stxsihx f0, 0, r3 -; CHECK-P9-NEXT: addi r3, r3, 24 +; CHECK-P9-NEXT: sth r3, 0(r4) +; CHECK-P9-NEXT: addi r4, r4, 24 ; CHECK-P9-NEXT: bdnz .LBB3_1 ; CHECK-P9-NEXT: # %bb.2: # %bb5 bb: diff --git a/llvm/test/CodeGen/PowerPC/pr49092.ll b/llvm/test/CodeGen/PowerPC/pr49092.ll index 7b524a6d2f69b..3c028e9005ee6 100644 --- a/llvm/test/CodeGen/PowerPC/pr49092.ll +++ b/llvm/test/CodeGen/PowerPC/pr49092.ll @@ -8,26 +8,14 @@ define dso_local half @test2(i64 %a, i64 %b) local_unnamed_addr #0 { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mflr r0 -; CHECK-NEXT: stdu r1, -32(r1) ; CHECK-NEXT: add r3, r4, r3 -; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: addi r3, r3, 11 -; CHECK-NEXT: clrlwi r3, r3, 16 -; CHECK-NEXT: bl __extendhfsf2 -; CHECK-NEXT: nop -; CHECK-NEXT: addi r1, r1, 32 -; CHECK-NEXT: ld r0, 16(r1) -; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr ; ; CHECK-P9-LABEL: test2: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: add r3, r4, r3 ; CHECK-P9-NEXT: addi r3, r3, 11 -; CHECK-P9-NEXT: clrlwi r3, r3, 16 -; CHECK-P9-NEXT: mtfprwz f0, r3 -; CHECK-P9-NEXT: xscvhpdp f1, f0 ; CHECK-P9-NEXT: blr entry: %add = add i64 %b, %a diff --git a/llvm/test/CodeGen/PowerPC/vector-llrint.ll b/llvm/test/CodeGen/PowerPC/vector-llrint.ll index 8a9e48e002381..fef9c039c043e 100644 --- a/llvm/test/CodeGen/PowerPC/vector-llrint.ll +++ b/llvm/test/CodeGen/PowerPC/vector-llrint.ll @@ -18,10 +18,8 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind { ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -112(r1) -; BE-NEXT: std r0, 128(r1) -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop ; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: std r0, 128(r1) ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop ; BE-NEXT: bl llrintf @@ -35,10 +33,8 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -32(r1) -; CHECK-NEXT: std r0, 48(r1) -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop ; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop ; CHECK-NEXT: bl llrintf @@ -52,10 +48,8 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind { ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 ; FAST-NEXT: stdu r1, -32(r1) -; FAST-NEXT: std r0, 48(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop ; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: std r0, 48(r1) ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop ; FAST-NEXT: fctid f0, f1 @@ -73,37 +67,26 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind { ; BE-LABEL: llrint_v1i64_v2f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 -; BE-NEXT: stdu r1, -160(r1) -; BE-NEXT: std r0, 176(r1) -; BE-NEXT: stfd f31, 152(r1) # 8-byte Folded Spill -; BE-NEXT: fmr f31, f1 -; BE-NEXT: fmr f1, f2 -; BE-NEXT: std r30, 136(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f31 +; BE-NEXT: stdu r1, -144(r1) +; BE-NEXT: std r0, 160(r1) +; BE-NEXT: std r30, 128(r1) # 8-byte Folded Spill ; BE-NEXT: mr r30, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r3, 48 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r30, 48 -; BE-NEXT: fmr f31, f1 +; BE-NEXT: clrldi r3, r4, 48 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f31 ; BE-NEXT: std r3, 120(r1) +; BE-NEXT: clrldi r3, r30, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop ; BE-NEXT: std r3, 112(r1) ; BE-NEXT: addi r3, r1, 112 -; BE-NEXT: ld r30, 136(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f31, 152(r1) # 8-byte Folded Reload +; BE-NEXT: ld r30, 128(r1) # 8-byte Folded Reload ; BE-NEXT: lxvd2x v2, 0, r3 -; BE-NEXT: addi r1, r1, 160 +; BE-NEXT: addi r1, r1, 144 ; BE-NEXT: ld r0, 16(r1) ; BE-NEXT: mtlr r0 ; BE-NEXT: blr @@ -112,35 +95,28 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -96(r1) -; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: std r0, 112(r1) -; CHECK-NEXT: std r30, 72(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f31, 88(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f31, f2 -; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f31 -; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop +; CHECK-NEXT: li r5, 48 ; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: std r0, 112(r1) +; CHECK-NEXT: std r29, 72(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, 80(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r30, r4 +; CHECK-NEXT: stxvd2x v31, r1, r5 # 16-byte Folded Spill ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mr r29, r3 ; CHECK-NEXT: clrldi r3, r30, 48 -; CHECK-NEXT: fmr f31, f1 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: bl llrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f31 -; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: mtvsrd v31, r29 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lfd f31, 88(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r30, 72(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, 80(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 72(r1) # 8-byte Folded Reload ; CHECK-NEXT: xxmrghd v2, vs0, v31 ; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 96 @@ -151,35 +127,30 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind { ; FAST-LABEL: llrint_v1i64_v2f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; FAST-NEXT: stdu r1, -48(r1) -; FAST-NEXT: fmr f31, f1 -; FAST-NEXT: fmr f1, f2 -; FAST-NEXT: std r0, 64(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop +; FAST-NEXT: stdu r1, -80(r1) +; FAST-NEXT: li r5, 48 ; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: std r0, 96(r1) +; FAST-NEXT: std r30, 64(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r30, r4 +; FAST-NEXT: stxvd2x v31, r1, r5 # 16-byte Folded Spill ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f30, f1 -; FAST-NEXT: fmr f1, f31 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: clrldi r3, r30, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop ; FAST-NEXT: fctid f0, f1 -; FAST-NEXT: fctid f1, f30 +; FAST-NEXT: ld r30, 64(r1) # 8-byte Folded Reload ; FAST-NEXT: mffprd r3, f0 ; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v2, vs1, vs0 -; FAST-NEXT: addi r1, r1, 48 +; FAST-NEXT: li r3, 48 +; FAST-NEXT: xxmrghd v2, vs0, v31 +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 80 ; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload ; FAST-NEXT: mtlr r0 ; FAST-NEXT: blr %a = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> %x) @@ -191,73 +162,46 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind { ; BE-LABEL: llrint_v4i64_v4f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 -; BE-NEXT: stdu r1, -208(r1) -; BE-NEXT: std r0, 224(r1) -; BE-NEXT: stfd f29, 184(r1) # 8-byte Folded Spill -; BE-NEXT: fmr f29, f1 -; BE-NEXT: fmr f1, f2 -; BE-NEXT: std r28, 152(r1) # 8-byte Folded Spill -; BE-NEXT: std r29, 160(r1) # 8-byte Folded Spill -; BE-NEXT: std r30, 168(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f30, 192(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f31, 200(r1) # 8-byte Folded Spill -; BE-NEXT: fmr f31, f4 -; BE-NEXT: fmr f30, f3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f29 -; BE-NEXT: mr r30, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f31 -; BE-NEXT: mr r29, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f30 +; BE-NEXT: stdu r1, -176(r1) +; BE-NEXT: std r0, 192(r1) +; BE-NEXT: std r28, 144(r1) # 8-byte Folded Spill ; BE-NEXT: mr r28, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: clrldi r3, r4, 48 +; BE-NEXT: std r29, 152(r1) # 8-byte Folded Spill +; BE-NEXT: std r30, 160(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r6 +; BE-NEXT: mr r29, r5 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 120(r1) ; BE-NEXT: clrldi r3, r28, 48 -; BE-NEXT: fmr f31, f1 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r29, 48 -; BE-NEXT: fmr f30, f1 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl llrintf ; BE-NEXT: nop +; BE-NEXT: std r3, 112(r1) ; BE-NEXT: clrldi r3, r30, 48 -; BE-NEXT: fmr f29, f1 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f29 -; BE-NEXT: std r3, 120(r1) -; BE-NEXT: bl llrintf -; BE-NEXT: nop -; BE-NEXT: fmr f1, f30 -; BE-NEXT: std r3, 112(r1) -; BE-NEXT: bl llrintf -; BE-NEXT: nop -; BE-NEXT: fmr f1, f31 ; BE-NEXT: std r3, 136(r1) +; BE-NEXT: clrldi r3, r29, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop ; BE-NEXT: std r3, 128(r1) ; BE-NEXT: addi r3, r1, 112 -; BE-NEXT: ld r30, 168(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f31, 200(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f30, 192(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f29, 184(r1) # 8-byte Folded Reload +; BE-NEXT: ld r30, 160(r1) # 8-byte Folded Reload +; BE-NEXT: ld r29, 152(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 144(r1) # 8-byte Folded Reload ; BE-NEXT: lxvd2x v2, 0, r3 ; BE-NEXT: addi r3, r1, 128 -; BE-NEXT: ld r29, 160(r1) # 8-byte Folded Reload -; BE-NEXT: ld r28, 152(r1) # 8-byte Folded Reload ; BE-NEXT: lxvd2x v3, 0, r3 -; BE-NEXT: addi r1, r1, 208 +; BE-NEXT: addi r1, r1, 176 ; BE-NEXT: ld r0, 16(r1) ; BE-NEXT: mtlr r0 ; BE-NEXT: blr @@ -265,79 +209,57 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind { ; CHECK-LABEL: llrint_v4i64_v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: stdu r1, -144(r1) -; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: std r0, 160(r1) -; CHECK-NEXT: std r28, 88(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r29, 96(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, 104(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f29, 120(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f29, f2 -; CHECK-NEXT: stfd f30, 128(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f30, f3 -; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f31, f4 -; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f29 -; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f30 -; CHECK-NEXT: mr r29, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f31 -; CHECK-NEXT: mr r28, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop +; CHECK-NEXT: stdu r1, -128(r1) +; CHECK-NEXT: li r7, 48 +; CHECK-NEXT: std r0, 144(r1) ; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: std r27, 88(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, 96(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, 104(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r29, r5 +; CHECK-NEXT: mr r28, r4 +; CHECK-NEXT: stxvd2x v30, r1, r7 # 16-byte Folded Spill +; CHECK-NEXT: li r7, 64 +; CHECK-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r30, r6 +; CHECK-NEXT: stxvd2x v31, r1, r7 # 16-byte Folded Spill ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r28, 48 -; CHECK-NEXT: fmr f31, f1 -; CHECK-NEXT: bl __extendhfsf2 -; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r29, 48 -; CHECK-NEXT: fmr f30, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r30, 48 -; CHECK-NEXT: fmr f29, f1 +; CHECK-NEXT: mr r27, r3 +; CHECK-NEXT: clrldi r3, r28, 48 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v31, r27 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f29 -; CHECK-NEXT: mtvsrd v31, r3 -; CHECK-NEXT: bl llrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f30 ; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: clrldi r3, r29, 48 ; CHECK-NEXT: xxmrghd v31, vs0, v31 +; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: nop ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f31 -; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: clrldi r3, r30, 48 +; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v30, r29 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: vmr v2, v31 -; CHECK-NEXT: lfd f31, 136(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f30, 128(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f29, 120(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r30, 104(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, 96(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 104(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, 96(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r27, 88(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: ld r28, 88(r1) # 8-byte Folded Reload ; CHECK-NEXT: xxmrghd v3, vs0, v30 ; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: addi r1, r1, 144 +; CHECK-NEXT: addi r1, r1, 128 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr @@ -345,63 +267,55 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind { ; FAST-LABEL: llrint_v4i64_v4f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; FAST-NEXT: stdu r1, -64(r1) -; FAST-NEXT: fmr f29, f1 -; FAST-NEXT: fmr f1, f4 -; FAST-NEXT: std r0, 80(r1) -; FAST-NEXT: fmr f31, f3 -; FAST-NEXT: fmr f30, f2 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop +; FAST-NEXT: stdu r1, -112(r1) +; FAST-NEXT: li r7, 48 +; FAST-NEXT: std r0, 128(r1) ; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: std r28, 80(r1) # 8-byte Folded Spill +; FAST-NEXT: std r29, 88(r1) # 8-byte Folded Spill +; FAST-NEXT: std r30, 96(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r30, r6 +; FAST-NEXT: mr r29, r5 +; FAST-NEXT: stxvd2x v30, r1, r7 # 16-byte Folded Spill +; FAST-NEXT: li r7, 64 +; FAST-NEXT: mr r28, r4 +; FAST-NEXT: stxvd2x v31, r1, r7 # 16-byte Folded Spill ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f28, f1 -; FAST-NEXT: fmr f1, f31 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: clrldi r3, r28, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f31, f1 -; FAST-NEXT: fmr f1, f30 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: clrldi r3, r29, 48 +; FAST-NEXT: xxmrghd v31, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f30, f1 -; FAST-NEXT: fmr f1, f29 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: clrldi r3, r30, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fctid f0, f30 -; FAST-NEXT: fctid f2, f31 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: mtfprd f2, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v2, vs0, vs1 -; FAST-NEXT: fctid f0, f28 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: vmr v2, v31 +; FAST-NEXT: ld r30, 96(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r29, 88(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r28, 80(r1) # 8-byte Folded Reload ; FAST-NEXT: mffprd r3, f0 ; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v3, vs0, vs2 -; FAST-NEXT: addi r1, r1, 64 +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 48 +; FAST-NEXT: xxmrghd v3, vs0, v30 +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 112 ; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload ; FAST-NEXT: mtlr r0 -; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload ; FAST-NEXT: blr %a = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> %x) ret <4 x i64> %a @@ -412,145 +326,86 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind { ; BE-LABEL: llrint_v8i64_v8f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 -; BE-NEXT: stdu r1, -304(r1) -; BE-NEXT: std r0, 320(r1) -; BE-NEXT: stfd f25, 248(r1) # 8-byte Folded Spill -; BE-NEXT: fmr f25, f1 -; BE-NEXT: fmr f1, f2 -; BE-NEXT: std r24, 184(r1) # 8-byte Folded Spill -; BE-NEXT: std r25, 192(r1) # 8-byte Folded Spill -; BE-NEXT: std r26, 200(r1) # 8-byte Folded Spill -; BE-NEXT: std r27, 208(r1) # 8-byte Folded Spill -; BE-NEXT: std r28, 216(r1) # 8-byte Folded Spill -; BE-NEXT: std r29, 224(r1) # 8-byte Folded Spill -; BE-NEXT: std r30, 232(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f26, 256(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f27, 264(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f28, 272(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f29, 280(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f30, 288(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f31, 296(r1) # 8-byte Folded Spill -; BE-NEXT: fmr f31, f8 -; BE-NEXT: fmr f30, f7 -; BE-NEXT: fmr f29, f6 -; BE-NEXT: fmr f28, f5 -; BE-NEXT: fmr f27, f4 -; BE-NEXT: fmr f26, f3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f25 -; BE-NEXT: mr r30, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f27 -; BE-NEXT: mr r29, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f26 -; BE-NEXT: mr r28, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f29 -; BE-NEXT: mr r27, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f28 -; BE-NEXT: mr r26, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f31 -; BE-NEXT: mr r25, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f30 +; BE-NEXT: stdu r1, -240(r1) +; BE-NEXT: std r0, 256(r1) +; BE-NEXT: std r24, 176(r1) # 8-byte Folded Spill ; BE-NEXT: mr r24, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: clrldi r3, r4, 48 +; BE-NEXT: std r25, 184(r1) # 8-byte Folded Spill +; BE-NEXT: std r26, 192(r1) # 8-byte Folded Spill +; BE-NEXT: std r27, 200(r1) # 8-byte Folded Spill +; BE-NEXT: std r28, 208(r1) # 8-byte Folded Spill +; BE-NEXT: std r29, 216(r1) # 8-byte Folded Spill +; BE-NEXT: std r30, 224(r1) # 8-byte Folded Spill +; BE-NEXT: mr r29, r10 +; BE-NEXT: mr r30, r9 +; BE-NEXT: mr r27, r8 +; BE-NEXT: mr r28, r7 +; BE-NEXT: mr r26, r6 +; BE-NEXT: mr r25, r5 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 120(r1) ; BE-NEXT: clrldi r3, r24, 48 -; BE-NEXT: fmr f31, f1 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r25, 48 -; BE-NEXT: fmr f30, f1 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl llrintf ; BE-NEXT: nop +; BE-NEXT: std r3, 112(r1) ; BE-NEXT: clrldi r3, r26, 48 -; BE-NEXT: fmr f29, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r27, 48 -; BE-NEXT: fmr f28, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r28, 48 -; BE-NEXT: fmr f27, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r29, 48 -; BE-NEXT: fmr f26, f1 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r30, 48 -; BE-NEXT: fmr f25, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: bl llrintf -; BE-NEXT: nop -; BE-NEXT: fmr f1, f25 -; BE-NEXT: std r3, 120(r1) -; BE-NEXT: bl llrintf -; BE-NEXT: nop -; BE-NEXT: fmr f1, f26 -; BE-NEXT: std r3, 112(r1) ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f27 ; BE-NEXT: std r3, 136(r1) +; BE-NEXT: clrldi r3, r25, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f28 ; BE-NEXT: std r3, 128(r1) +; BE-NEXT: clrldi r3, r27, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f29 ; BE-NEXT: std r3, 152(r1) +; BE-NEXT: clrldi r3, r28, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f30 ; BE-NEXT: std r3, 144(r1) +; BE-NEXT: clrldi r3, r29, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f31 ; BE-NEXT: std r3, 168(r1) +; BE-NEXT: clrldi r3, r30, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop ; BE-NEXT: std r3, 160(r1) ; BE-NEXT: addi r3, r1, 112 -; BE-NEXT: ld r30, 232(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f31, 296(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f30, 288(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f29, 280(r1) # 8-byte Folded Reload +; BE-NEXT: ld r30, 224(r1) # 8-byte Folded Reload +; BE-NEXT: ld r29, 216(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 208(r1) # 8-byte Folded Reload +; BE-NEXT: ld r27, 200(r1) # 8-byte Folded Reload +; BE-NEXT: ld r26, 192(r1) # 8-byte Folded Reload +; BE-NEXT: ld r25, 184(r1) # 8-byte Folded Reload +; BE-NEXT: ld r24, 176(r1) # 8-byte Folded Reload ; BE-NEXT: lxvd2x v2, 0, r3 ; BE-NEXT: addi r3, r1, 128 -; BE-NEXT: lfd f28, 272(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f27, 264(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f26, 256(r1) # 8-byte Folded Reload -; BE-NEXT: ld r29, 224(r1) # 8-byte Folded Reload -; BE-NEXT: ld r28, 216(r1) # 8-byte Folded Reload ; BE-NEXT: lxvd2x v3, 0, r3 ; BE-NEXT: addi r3, r1, 144 -; BE-NEXT: lfd f25, 248(r1) # 8-byte Folded Reload -; BE-NEXT: ld r27, 208(r1) # 8-byte Folded Reload -; BE-NEXT: ld r26, 200(r1) # 8-byte Folded Reload -; BE-NEXT: ld r25, 192(r1) # 8-byte Folded Reload -; BE-NEXT: ld r24, 184(r1) # 8-byte Folded Reload ; BE-NEXT: lxvd2x v4, 0, r3 ; BE-NEXT: addi r3, r1, 160 ; BE-NEXT: lxvd2x v5, 0, r3 -; BE-NEXT: addi r1, r1, 304 +; BE-NEXT: addi r1, r1, 240 ; BE-NEXT: ld r0, 16(r1) ; BE-NEXT: mtlr r0 ; BE-NEXT: blr @@ -558,159 +413,107 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind { ; CHECK-LABEL: llrint_v8i64_v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: stdu r1, -240(r1) -; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: std r0, 256(r1) -; CHECK-NEXT: std r24, 120(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r25, 128(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r26, 136(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r27, 144(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r28, 152(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r29, 160(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: std r30, 168(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f25, 184(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f25, f2 -; CHECK-NEXT: stfd f26, 192(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f26, f3 -; CHECK-NEXT: stfd f27, 200(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f27, f4 -; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: stfd f28, 208(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f28, f5 -; CHECK-NEXT: stfd f29, 216(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f29, f6 -; CHECK-NEXT: stfd f30, 224(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f30, f7 -; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 96 -; CHECK-NEXT: stfd f31, 232(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f31, f8 -; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f25 -; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f26 -; CHECK-NEXT: mr r29, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f27 -; CHECK-NEXT: mr r28, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f28 -; CHECK-NEXT: mr r27, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f29 -; CHECK-NEXT: mr r26, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f30 -; CHECK-NEXT: mr r25, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f31 -; CHECK-NEXT: mr r24, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop +; CHECK-NEXT: stdu r1, -192(r1) +; CHECK-NEXT: li r11, 48 +; CHECK-NEXT: std r0, 208(r1) ; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: std r23, 120(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r24, 128(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r25, 136(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r25, r5 +; CHECK-NEXT: mr r24, r4 +; CHECK-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 64 +; CHECK-NEXT: std r26, 144(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r26, r6 +; CHECK-NEXT: std r27, 152(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, 160(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r28, r8 +; CHECK-NEXT: mr r27, r7 +; CHECK-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 80 +; CHECK-NEXT: std r29, 168(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r29, r9 +; CHECK-NEXT: std r30, 176(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r30, r10 +; CHECK-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 96 +; CHECK-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r24, 48 -; CHECK-NEXT: fmr f31, f1 -; CHECK-NEXT: bl __extendhfsf2 -; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r25, 48 -; CHECK-NEXT: fmr f30, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r26, 48 -; CHECK-NEXT: fmr f29, f1 +; CHECK-NEXT: mr r23, r3 +; CHECK-NEXT: clrldi r3, r24, 48 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r27, 48 -; CHECK-NEXT: fmr f28, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: mtvsrd v31, r23 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r28, 48 -; CHECK-NEXT: fmr f27, f1 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: clrldi r3, r25, 48 +; CHECK-NEXT: xxmrghd v31, vs0, v31 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r29, 48 -; CHECK-NEXT: fmr f26, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r30, 48 -; CHECK-NEXT: fmr f25, f1 +; CHECK-NEXT: mr r25, r3 +; CHECK-NEXT: clrldi r3, r26, 48 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v30, r25 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f25 -; CHECK-NEXT: mtvsrd v31, r3 -; CHECK-NEXT: bl llrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f26 ; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v31, vs0, v31 -; CHECK-NEXT: bl llrintf +; CHECK-NEXT: clrldi r3, r27, 48 +; CHECK-NEXT: xxmrghd v30, vs0, v30 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f27 -; CHECK-NEXT: mtvsrd v30, r3 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f28 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v30, vs0, v30 -; CHECK-NEXT: bl llrintf +; CHECK-NEXT: mr r27, r3 +; CHECK-NEXT: clrldi r3, r28, 48 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f29 -; CHECK-NEXT: mtvsrd v29, r3 +; CHECK-NEXT: mtvsrd v29, r27 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f30 ; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: clrldi r3, r29, 48 ; CHECK-NEXT: xxmrghd v29, vs0, v29 +; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: nop ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f31 -; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: clrldi r3, r30, 48 +; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v28, r29 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: vmr v2, v31 -; CHECK-NEXT: lfd f31, 232(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, 176(r1) # 8-byte Folded Reload ; CHECK-NEXT: vmr v3, v30 ; CHECK-NEXT: vmr v4, v29 -; CHECK-NEXT: lfd f30, 224(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f29, 216(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 168(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, 160(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: lfd f28, 208(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f27, 200(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f26, 192(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f25, 184(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r30, 168(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, 160(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r27, 152(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r26, 144(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r25, 136(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r24, 128(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r23, 120(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: ld r28, 152(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r27, 144(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxmrghd v5, vs0, v28 -; CHECK-NEXT: ld r26, 136(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r25, 128(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r24, 120(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xxmrghd v5, vs0, v28 ; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: addi r1, r1, 240 +; CHECK-NEXT: addi r1, r1, 192 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr @@ -718,117 +521,103 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind { ; FAST-LABEL: llrint_v8i64_v8f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; FAST-NEXT: stdu r1, -96(r1) -; FAST-NEXT: fmr f24, f1 -; FAST-NEXT: fmr f1, f8 -; FAST-NEXT: std r0, 112(r1) -; FAST-NEXT: fmr f30, f7 -; FAST-NEXT: fmr f29, f6 -; FAST-NEXT: fmr f28, f5 -; FAST-NEXT: fmr f27, f4 -; FAST-NEXT: fmr f26, f3 -; FAST-NEXT: fmr f25, f2 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop +; FAST-NEXT: stdu r1, -176(r1) +; FAST-NEXT: li r11, 48 +; FAST-NEXT: std r0, 192(r1) ; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: std r24, 112(r1) # 8-byte Folded Spill +; FAST-NEXT: std r25, 120(r1) # 8-byte Folded Spill +; FAST-NEXT: std r26, 128(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r26, r6 +; FAST-NEXT: mr r25, r5 +; FAST-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 64 +; FAST-NEXT: std r27, 136(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r27, r7 +; FAST-NEXT: std r28, 144(r1) # 8-byte Folded Spill +; FAST-NEXT: std r29, 152(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r29, r9 +; FAST-NEXT: mr r28, r8 +; FAST-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 80 +; FAST-NEXT: std r30, 160(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r30, r10 +; FAST-NEXT: mr r24, r4 +; FAST-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 96 +; FAST-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f31, f1 -; FAST-NEXT: fmr f1, f30 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: clrldi r3, r24, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f30, f1 -; FAST-NEXT: fmr f1, f29 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: clrldi r3, r25, 48 +; FAST-NEXT: xxmrghd v31, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f29, f1 -; FAST-NEXT: fmr f1, f28 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: clrldi r3, r26, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f28, f1 -; FAST-NEXT: fmr f1, f27 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: clrldi r3, r27, 48 +; FAST-NEXT: xxmrghd v30, vs0, v30 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f27, f1 -; FAST-NEXT: fmr f1, f26 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v29, r3 +; FAST-NEXT: clrldi r3, r28, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f26, f1 -; FAST-NEXT: fmr f1, f25 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: clrldi r3, r29, 48 +; FAST-NEXT: xxmrghd v29, vs0, v29 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f25, f1 -; FAST-NEXT: fmr f1, f24 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v28, r3 +; FAST-NEXT: clrldi r3, r30, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fctid f0, f25 -; FAST-NEXT: fctid f2, f26 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: fctid f3, f27 -; FAST-NEXT: fctid f4, f28 -; FAST-NEXT: fctid f5, f29 -; FAST-NEXT: fctid f6, f30 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: mtfprd f2, r3 -; FAST-NEXT: mffprd r3, f3 -; FAST-NEXT: mtfprd f3, r3 -; FAST-NEXT: mffprd r3, f4 -; FAST-NEXT: mtfprd f4, r3 -; FAST-NEXT: mffprd r3, f5 -; FAST-NEXT: mtfprd f5, r3 -; FAST-NEXT: mffprd r3, f6 -; FAST-NEXT: mtfprd f6, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v3, vs3, vs2 -; FAST-NEXT: xxmrghd v4, vs5, vs4 -; FAST-NEXT: xxmrghd v2, vs0, vs1 -; FAST-NEXT: fctid f0, f31 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: vmr v2, v31 +; FAST-NEXT: ld r30, 160(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r29, 152(r1) # 8-byte Folded Reload +; FAST-NEXT: vmr v3, v30 +; FAST-NEXT: vmr v4, v29 +; FAST-NEXT: ld r28, 144(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r27, 136(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r26, 128(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r25, 120(r1) # 8-byte Folded Reload ; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: ld r24, 112(r1) # 8-byte Folded Reload ; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v5, vs0, vs6 -; FAST-NEXT: addi r1, r1, 96 +; FAST-NEXT: li r3, 96 +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 80 +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 48 +; FAST-NEXT: xxmrghd v5, vs0, v28 +; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 176 ; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload ; FAST-NEXT: mtlr r0 -; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f25, -56(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f24, -64(r1) # 8-byte Folded Reload ; FAST-NEXT: blr %a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x) ret <8 x i64> %a @@ -839,286 +628,166 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind { ; BE-LABEL: llrint_v16i64_v16f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 -; BE-NEXT: stdu r1, -496(r1) -; BE-NEXT: std r0, 512(r1) -; BE-NEXT: stfd f20, 400(r1) # 8-byte Folded Spill -; BE-NEXT: fmr f20, f1 -; BE-NEXT: fmr f1, f2 -; BE-NEXT: std r16, 248(r1) # 8-byte Folded Spill -; BE-NEXT: std r17, 256(r1) # 8-byte Folded Spill -; BE-NEXT: std r18, 264(r1) # 8-byte Folded Spill -; BE-NEXT: std r19, 272(r1) # 8-byte Folded Spill -; BE-NEXT: std r20, 280(r1) # 8-byte Folded Spill -; BE-NEXT: std r21, 288(r1) # 8-byte Folded Spill -; BE-NEXT: std r22, 296(r1) # 8-byte Folded Spill -; BE-NEXT: std r23, 304(r1) # 8-byte Folded Spill -; BE-NEXT: std r24, 312(r1) # 8-byte Folded Spill -; BE-NEXT: std r25, 320(r1) # 8-byte Folded Spill -; BE-NEXT: std r26, 328(r1) # 8-byte Folded Spill -; BE-NEXT: std r27, 336(r1) # 8-byte Folded Spill -; BE-NEXT: std r28, 344(r1) # 8-byte Folded Spill -; BE-NEXT: std r29, 352(r1) # 8-byte Folded Spill -; BE-NEXT: std r30, 360(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f17, 376(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f18, 384(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f19, 392(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f21, 408(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f22, 416(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f23, 424(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f24, 432(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f25, 440(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f26, 448(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f27, 456(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f28, 464(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f29, 472(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f30, 480(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f31, 488(r1) # 8-byte Folded Spill -; BE-NEXT: fmr f31, f13 -; BE-NEXT: fmr f29, f12 -; BE-NEXT: fmr f30, f11 -; BE-NEXT: fmr f28, f10 -; BE-NEXT: fmr f27, f9 -; BE-NEXT: fmr f26, f8 -; BE-NEXT: fmr f25, f7 -; BE-NEXT: fmr f24, f6 -; BE-NEXT: fmr f23, f5 -; BE-NEXT: fmr f22, f4 -; BE-NEXT: fmr f21, f3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f20 -; BE-NEXT: mr r30, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f22 -; BE-NEXT: mr r29, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f21 -; BE-NEXT: mr r28, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f24 -; BE-NEXT: mr r27, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f23 -; BE-NEXT: mr r26, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f26 -; BE-NEXT: mr r25, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f25 +; BE-NEXT: stdu r1, -368(r1) +; BE-NEXT: std r0, 384(r1) +; BE-NEXT: std r24, 304(r1) # 8-byte Folded Spill ; BE-NEXT: mr r24, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f28 -; BE-NEXT: mr r23, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f27 -; BE-NEXT: mr r22, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f29 -; BE-NEXT: mr r21, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f30 -; BE-NEXT: mr r20, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 652(r1) -; BE-NEXT: mr r19, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f31 -; BE-NEXT: mr r18, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 668(r1) -; BE-NEXT: mr r17, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 660(r1) -; BE-NEXT: mr r16, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r3, 48 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r16, 48 -; BE-NEXT: fmr f31, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r17, 48 -; BE-NEXT: fmr f30, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r18, 48 -; BE-NEXT: fmr f29, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r19, 48 -; BE-NEXT: fmr f28, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r20, 48 -; BE-NEXT: fmr f27, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r21, 48 -; BE-NEXT: fmr f26, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r22, 48 -; BE-NEXT: fmr f25, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r23, 48 -; BE-NEXT: fmr f24, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r24, 48 -; BE-NEXT: fmr f23, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r25, 48 -; BE-NEXT: fmr f22, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r26, 48 -; BE-NEXT: fmr f21, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r27, 48 -; BE-NEXT: fmr f20, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r28, 48 -; BE-NEXT: fmr f19, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r29, 48 -; BE-NEXT: fmr f18, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r30, 48 -; BE-NEXT: fmr f17, f1 +; BE-NEXT: lhz r3, 494(r1) +; BE-NEXT: std r16, 240(r1) # 8-byte Folded Spill +; BE-NEXT: lhz r16, 486(r1) +; BE-NEXT: std r17, 248(r1) # 8-byte Folded Spill +; BE-NEXT: std r18, 256(r1) # 8-byte Folded Spill +; BE-NEXT: std r19, 264(r1) # 8-byte Folded Spill +; BE-NEXT: std r20, 272(r1) # 8-byte Folded Spill +; BE-NEXT: std r21, 280(r1) # 8-byte Folded Spill +; BE-NEXT: std r22, 288(r1) # 8-byte Folded Spill +; BE-NEXT: lhz r22, 534(r1) +; BE-NEXT: lhz r21, 542(r1) +; BE-NEXT: lhz r20, 518(r1) +; BE-NEXT: lhz r19, 526(r1) +; BE-NEXT: lhz r18, 502(r1) +; BE-NEXT: lhz r17, 510(r1) +; BE-NEXT: std r23, 296(r1) # 8-byte Folded Spill +; BE-NEXT: std r25, 312(r1) # 8-byte Folded Spill +; BE-NEXT: std r26, 320(r1) # 8-byte Folded Spill +; BE-NEXT: std r27, 328(r1) # 8-byte Folded Spill +; BE-NEXT: std r28, 336(r1) # 8-byte Folded Spill +; BE-NEXT: std r29, 344(r1) # 8-byte Folded Spill +; BE-NEXT: std r30, 352(r1) # 8-byte Folded Spill +; BE-NEXT: mr r29, r10 +; BE-NEXT: mr r30, r9 +; BE-NEXT: mr r27, r8 +; BE-NEXT: mr r28, r7 +; BE-NEXT: mr r25, r6 +; BE-NEXT: mr r26, r5 +; BE-NEXT: mr r23, r4 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f17 ; BE-NEXT: std r3, 120(r1) +; BE-NEXT: mr r3, r16 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f18 ; BE-NEXT: std r3, 112(r1) +; BE-NEXT: mr r3, r17 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f19 ; BE-NEXT: std r3, 136(r1) +; BE-NEXT: mr r3, r18 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f20 ; BE-NEXT: std r3, 128(r1) +; BE-NEXT: mr r3, r19 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f21 ; BE-NEXT: std r3, 152(r1) +; BE-NEXT: mr r3, r20 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f22 ; BE-NEXT: std r3, 144(r1) +; BE-NEXT: mr r3, r21 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f23 ; BE-NEXT: std r3, 168(r1) +; BE-NEXT: mr r3, r22 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f24 ; BE-NEXT: std r3, 160(r1) +; BE-NEXT: clrldi r3, r23, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f25 ; BE-NEXT: std r3, 184(r1) +; BE-NEXT: clrldi r3, r24, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f26 ; BE-NEXT: std r3, 176(r1) +; BE-NEXT: clrldi r3, r25, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f27 ; BE-NEXT: std r3, 200(r1) +; BE-NEXT: clrldi r3, r26, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f28 ; BE-NEXT: std r3, 192(r1) +; BE-NEXT: clrldi r3, r27, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f29 ; BE-NEXT: std r3, 216(r1) +; BE-NEXT: clrldi r3, r28, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f30 ; BE-NEXT: std r3, 208(r1) +; BE-NEXT: clrldi r3, r29, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f31 ; BE-NEXT: std r3, 232(r1) +; BE-NEXT: clrldi r3, r30, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop ; BE-NEXT: std r3, 224(r1) ; BE-NEXT: addi r3, r1, 112 -; BE-NEXT: ld r30, 360(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f31, 488(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f30, 480(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f29, 472(r1) # 8-byte Folded Reload -; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: ld r30, 352(r1) # 8-byte Folded Reload +; BE-NEXT: ld r29, 344(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 336(r1) # 8-byte Folded Reload +; BE-NEXT: ld r27, 328(r1) # 8-byte Folded Reload +; BE-NEXT: ld r26, 320(r1) # 8-byte Folded Reload +; BE-NEXT: ld r25, 312(r1) # 8-byte Folded Reload +; BE-NEXT: ld r24, 304(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v6, 0, r3 ; BE-NEXT: addi r3, r1, 128 -; BE-NEXT: lfd f28, 464(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f27, 456(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f26, 448(r1) # 8-byte Folded Reload -; BE-NEXT: ld r29, 352(r1) # 8-byte Folded Reload -; BE-NEXT: ld r28, 344(r1) # 8-byte Folded Reload -; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: ld r23, 296(r1) # 8-byte Folded Reload +; BE-NEXT: ld r22, 288(r1) # 8-byte Folded Reload +; BE-NEXT: ld r21, 280(r1) # 8-byte Folded Reload +; BE-NEXT: ld r20, 272(r1) # 8-byte Folded Reload +; BE-NEXT: ld r19, 264(r1) # 8-byte Folded Reload +; BE-NEXT: ld r18, 256(r1) # 8-byte Folded Reload +; BE-NEXT: ld r17, 248(r1) # 8-byte Folded Reload +; BE-NEXT: ld r16, 240(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v7, 0, r3 ; BE-NEXT: addi r3, r1, 144 -; BE-NEXT: lfd f25, 440(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f24, 432(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f23, 424(r1) # 8-byte Folded Reload -; BE-NEXT: ld r27, 336(r1) # 8-byte Folded Reload -; BE-NEXT: ld r26, 328(r1) # 8-byte Folded Reload -; BE-NEXT: lxvd2x v4, 0, r3 +; BE-NEXT: lxvd2x v8, 0, r3 ; BE-NEXT: addi r3, r1, 160 -; BE-NEXT: lfd f22, 416(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f21, 408(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f20, 400(r1) # 8-byte Folded Reload -; BE-NEXT: ld r25, 320(r1) # 8-byte Folded Reload -; BE-NEXT: ld r24, 312(r1) # 8-byte Folded Reload -; BE-NEXT: lxvd2x v5, 0, r3 +; BE-NEXT: lxvd2x v9, 0, r3 ; BE-NEXT: addi r3, r1, 176 -; BE-NEXT: lfd f19, 392(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f18, 384(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f17, 376(r1) # 8-byte Folded Reload -; BE-NEXT: ld r23, 304(r1) # 8-byte Folded Reload -; BE-NEXT: ld r22, 296(r1) # 8-byte Folded Reload -; BE-NEXT: lxvd2x v6, 0, r3 +; BE-NEXT: lxvd2x v2, 0, r3 ; BE-NEXT: addi r3, r1, 192 -; BE-NEXT: ld r21, 288(r1) # 8-byte Folded Reload -; BE-NEXT: ld r20, 280(r1) # 8-byte Folded Reload -; BE-NEXT: ld r19, 272(r1) # 8-byte Folded Reload -; BE-NEXT: ld r18, 264(r1) # 8-byte Folded Reload -; BE-NEXT: ld r17, 256(r1) # 8-byte Folded Reload -; BE-NEXT: ld r16, 248(r1) # 8-byte Folded Reload -; BE-NEXT: lxvd2x v7, 0, r3 +; BE-NEXT: lxvd2x v3, 0, r3 ; BE-NEXT: addi r3, r1, 208 -; BE-NEXT: lxvd2x v8, 0, r3 +; BE-NEXT: lxvd2x v4, 0, r3 ; BE-NEXT: addi r3, r1, 224 -; BE-NEXT: lxvd2x v9, 0, r3 -; BE-NEXT: addi r1, r1, 496 +; BE-NEXT: lxvd2x v5, 0, r3 +; BE-NEXT: addi r1, r1, 368 ; BE-NEXT: ld r0, 16(r1) ; BE-NEXT: mtlr r0 ; BE-NEXT: blr @@ -1126,316 +795,207 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind { ; CHECK-LABEL: llrint_v16i64_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: stdu r1, -432(r1) -; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: std r0, 448(r1) -; CHECK-NEXT: std r16, 184(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r17, 192(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r18, 200(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r19, 208(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r20, 216(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r21, 224(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: std r22, 232(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r23, 240(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r24, 248(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r25, 256(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r26, 264(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r27, 272(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: std r28, 280(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r29, 288(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, 296(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f17, 312(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f18, 320(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f19, 328(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 96 -; CHECK-NEXT: stfd f20, 336(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f20, f2 -; CHECK-NEXT: stfd f21, 344(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f21, f3 -; CHECK-NEXT: stfd f22, 352(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f22, f4 -; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 112 -; CHECK-NEXT: stfd f23, 360(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f23, f5 -; CHECK-NEXT: stfd f24, 368(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f24, f6 -; CHECK-NEXT: stfd f25, 376(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f25, f7 -; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 128 -; CHECK-NEXT: stfd f26, 384(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f26, f8 -; CHECK-NEXT: stfd f27, 392(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f27, f9 -; CHECK-NEXT: stfd f28, 400(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f28, f10 -; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 144 -; CHECK-NEXT: stfd f29, 408(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f29, f11 -; CHECK-NEXT: stfd f30, 416(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f30, f12 -; CHECK-NEXT: stfd f31, 424(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f31, f13 -; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 160 -; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f20 -; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f21 -; CHECK-NEXT: mr r29, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f22 -; CHECK-NEXT: mr r28, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f23 -; CHECK-NEXT: mr r27, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f24 -; CHECK-NEXT: mr r26, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f25 -; CHECK-NEXT: mr r25, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f26 -; CHECK-NEXT: mr r24, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f27 +; CHECK-NEXT: stdu r1, -320(r1) +; CHECK-NEXT: li r11, 48 +; CHECK-NEXT: std r0, 336(r1) +; CHECK-NEXT: std r23, 248(r1) # 8-byte Folded Spill ; CHECK-NEXT: mr r23, r3 -; CHECK-NEXT: bl __truncsfhf2 +; CHECK-NEXT: lhz r3, 416(r1) +; CHECK-NEXT: std r16, 192(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r17, 200(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r17, 432(r1) +; CHECK-NEXT: stxvd2x v24, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 64 +; CHECK-NEXT: std r18, 208(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r18, 440(r1) +; CHECK-NEXT: std r19, 216(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r19, 448(r1) +; CHECK-NEXT: lhz r16, 424(r1) +; CHECK-NEXT: stxvd2x v25, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 80 +; CHECK-NEXT: std r20, 224(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r20, 456(r1) +; CHECK-NEXT: stxvd2x v26, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 96 +; CHECK-NEXT: std r21, 232(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r22, 240(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r22, 472(r1) +; CHECK-NEXT: lhz r21, 464(r1) +; CHECK-NEXT: stxvd2x v27, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 112 +; CHECK-NEXT: std r15, 184(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r24, 256(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r25, 264(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r25, r5 +; CHECK-NEXT: mr r24, r4 +; CHECK-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 128 +; CHECK-NEXT: std r26, 272(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r27, 280(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, 288(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, 296(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r29, r9 +; CHECK-NEXT: mr r28, r8 +; CHECK-NEXT: mr r27, r7 +; CHECK-NEXT: mr r26, r6 +; CHECK-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 144 +; CHECK-NEXT: std r30, 304(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r30, r10 +; CHECK-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 160 +; CHECK-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f28 -; CHECK-NEXT: mr r22, r3 -; CHECK-NEXT: bl __truncsfhf2 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f29 -; CHECK-NEXT: mr r21, r3 -; CHECK-NEXT: bl __truncsfhf2 +; CHECK-NEXT: mr r15, r3 +; CHECK-NEXT: mr r3, r16 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f30 -; CHECK-NEXT: mr r20, r3 -; CHECK-NEXT: bl __truncsfhf2 +; CHECK-NEXT: mtvsrd v31, r15 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f31 -; CHECK-NEXT: mr r19, r3 -; CHECK-NEXT: bl __truncsfhf2 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: mr r3, r17 +; CHECK-NEXT: xxmrghd v31, vs0, v31 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 568(r1) -; CHECK-NEXT: mr r18, r3 -; CHECK-NEXT: bl __truncsfhf2 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 576(r1) ; CHECK-NEXT: mr r17, r3 -; CHECK-NEXT: bl __truncsfhf2 +; CHECK-NEXT: mr r3, r18 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 584(r1) -; CHECK-NEXT: mr r16, r3 -; CHECK-NEXT: bl __truncsfhf2 +; CHECK-NEXT: mtvsrd v30, r17 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: mr r3, r19 +; CHECK-NEXT: xxmrghd v30, vs0, v30 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r16, 48 -; CHECK-NEXT: fmr f31, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r17, 48 -; CHECK-NEXT: fmr f30, f1 +; CHECK-NEXT: mr r19, r3 +; CHECK-NEXT: mr r3, r20 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r18, 48 -; CHECK-NEXT: fmr f29, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: mtvsrd v29, r19 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r19, 48 -; CHECK-NEXT: fmr f28, f1 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: mr r3, r21 +; CHECK-NEXT: xxmrghd v29, vs0, v29 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r20, 48 -; CHECK-NEXT: fmr f27, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r21, 48 -; CHECK-NEXT: fmr f26, f1 +; CHECK-NEXT: mr r21, r3 +; CHECK-NEXT: mr r3, r22 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r22, 48 -; CHECK-NEXT: fmr f25, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: mtvsrd v28, r21 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: clrldi r3, r23, 48 -; CHECK-NEXT: fmr f24, f1 +; CHECK-NEXT: xxmrghd v28, vs0, v28 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mr r23, r3 ; CHECK-NEXT: clrldi r3, r24, 48 -; CHECK-NEXT: fmr f23, f1 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v27, r23 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: clrldi r3, r25, 48 -; CHECK-NEXT: fmr f22, f1 +; CHECK-NEXT: xxmrghd v27, vs0, v27 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mr r25, r3 ; CHECK-NEXT: clrldi r3, r26, 48 -; CHECK-NEXT: fmr f21, f1 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v26, r25 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: clrldi r3, r27, 48 -; CHECK-NEXT: fmr f20, f1 +; CHECK-NEXT: xxmrghd v26, vs0, v26 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mr r27, r3 ; CHECK-NEXT: clrldi r3, r28, 48 -; CHECK-NEXT: fmr f19, f1 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r29, 48 -; CHECK-NEXT: fmr f18, f1 -; CHECK-NEXT: bl __extendhfsf2 -; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r30, 48 -; CHECK-NEXT: fmr f17, f1 -; CHECK-NEXT: bl __extendhfsf2 -; CHECK-NEXT: nop -; CHECK-NEXT: bl llrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f17 -; CHECK-NEXT: mtvsrd v31, r3 -; CHECK-NEXT: bl llrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f18 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v31, vs0, v31 -; CHECK-NEXT: bl llrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f19 -; CHECK-NEXT: mtvsrd v30, r3 -; CHECK-NEXT: bl llrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f20 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v30, vs0, v30 -; CHECK-NEXT: bl llrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f21 -; CHECK-NEXT: mtvsrd v29, r3 -; CHECK-NEXT: bl llrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f22 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v29, vs0, v29 -; CHECK-NEXT: bl llrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f23 -; CHECK-NEXT: mtvsrd v28, r3 -; CHECK-NEXT: bl llrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f24 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v28, vs0, v28 -; CHECK-NEXT: bl llrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f25 -; CHECK-NEXT: mtvsrd v27, r3 -; CHECK-NEXT: bl llrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f26 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v27, vs0, v27 -; CHECK-NEXT: bl llrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f27 -; CHECK-NEXT: mtvsrd v26, r3 +; CHECK-NEXT: mtvsrd v25, r27 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f28 ; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v26, vs0, v26 -; CHECK-NEXT: bl llrintf +; CHECK-NEXT: clrldi r3, r29, 48 +; CHECK-NEXT: xxmrghd v25, vs0, v25 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f29 -; CHECK-NEXT: mtvsrd v25, r3 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f30 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v25, vs0, v25 -; CHECK-NEXT: bl llrintf +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: clrldi r3, r30, 48 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f31 -; CHECK-NEXT: mtvsrd v24, r3 +; CHECK-NEXT: mtvsrd v24, r29 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 160 -; CHECK-NEXT: vmr v2, v31 -; CHECK-NEXT: lfd f31, 424(r1) # 8-byte Folded Reload -; CHECK-NEXT: vmr v3, v30 -; CHECK-NEXT: vmr v4, v29 -; CHECK-NEXT: lfd f30, 416(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f29, 408(r1) # 8-byte Folded Reload +; CHECK-NEXT: vmr v6, v31 +; CHECK-NEXT: ld r30, 304(r1) # 8-byte Folded Reload +; CHECK-NEXT: vmr v7, v30 +; CHECK-NEXT: vmr v8, v29 +; CHECK-NEXT: ld r29, 296(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, 288(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 144 -; CHECK-NEXT: vmr v5, v28 -; CHECK-NEXT: vmr v6, v27 -; CHECK-NEXT: vmr v7, v26 -; CHECK-NEXT: vmr v8, v25 -; CHECK-NEXT: lfd f28, 400(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f27, 392(r1) # 8-byte Folded Reload +; CHECK-NEXT: vmr v9, v28 +; CHECK-NEXT: vmr v2, v27 +; CHECK-NEXT: vmr v3, v26 +; CHECK-NEXT: vmr v4, v25 +; CHECK-NEXT: ld r27, 280(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r26, 272(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 128 -; CHECK-NEXT: lfd f26, 384(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f25, 376(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxmrghd v9, vs0, v24 -; CHECK-NEXT: lfd f24, 368(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f23, 360(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f22, 352(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r25, 264(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r24, 256(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v5, vs0, v24 +; CHECK-NEXT: ld r23, 248(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r22, 240(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r21, 232(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 112 -; CHECK-NEXT: lfd f21, 344(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r30, 296(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f20, 336(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f19, 328(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, 288(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r28, 280(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r20, 224(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r19, 216(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r18, 208(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r17, 200(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r16, 192(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r15, 184(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 96 -; CHECK-NEXT: lfd f18, 320(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r27, 272(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f17, 312(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r26, 264(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r25, 256(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r24, 248(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: ld r23, 240(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r22, 232(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r21, 224(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r20, 216(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r19, 208(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r18, 200(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: ld r17, 192(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r16, 184(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 ; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: addi r1, r1, 432 +; CHECK-NEXT: addi r1, r1, 320 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr @@ -1443,223 +1003,199 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind { ; FAST-LABEL: llrint_v16i64_v16f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f19, -104(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f20, -96(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f21, -88(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f22, -80(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f23, -72(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; FAST-NEXT: stdu r1, -160(r1) -; FAST-NEXT: fmr f26, f1 -; FAST-NEXT: lfs f1, 312(r1) -; FAST-NEXT: std r0, 176(r1) -; FAST-NEXT: fmr f28, f13 -; FAST-NEXT: fmr f27, f12 -; FAST-NEXT: fmr f24, f11 -; FAST-NEXT: fmr f21, f10 -; FAST-NEXT: fmr f19, f9 -; FAST-NEXT: fmr f18, f8 -; FAST-NEXT: fmr f17, f7 -; FAST-NEXT: fmr f16, f6 -; FAST-NEXT: fmr f20, f5 -; FAST-NEXT: fmr f22, f4 -; FAST-NEXT: fmr f23, f3 -; FAST-NEXT: fmr f25, f2 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: stdu r1, -304(r1) +; FAST-NEXT: li r11, 48 +; FAST-NEXT: std r0, 320(r1) +; FAST-NEXT: std r23, 232(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r23, r3 +; FAST-NEXT: lhz r3, 400(r1) +; FAST-NEXT: std r16, 176(r1) # 8-byte Folded Spill +; FAST-NEXT: std r17, 184(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r17, 416(r1) +; FAST-NEXT: stxvd2x v24, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 64 +; FAST-NEXT: std r18, 192(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r18, 424(r1) +; FAST-NEXT: std r19, 200(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r19, 432(r1) +; FAST-NEXT: lhz r16, 408(r1) +; FAST-NEXT: stxvd2x v25, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 80 +; FAST-NEXT: std r20, 208(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r20, 440(r1) +; FAST-NEXT: stxvd2x v26, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 96 +; FAST-NEXT: std r21, 216(r1) # 8-byte Folded Spill +; FAST-NEXT: std r22, 224(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r22, 456(r1) +; FAST-NEXT: lhz r21, 448(r1) +; FAST-NEXT: stxvd2x v27, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 112 +; FAST-NEXT: std r24, 240(r1) # 8-byte Folded Spill +; FAST-NEXT: std r25, 248(r1) # 8-byte Folded Spill +; FAST-NEXT: std r26, 256(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r26, r6 +; FAST-NEXT: mr r25, r5 +; FAST-NEXT: mr r24, r4 +; FAST-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 128 +; FAST-NEXT: std r27, 264(r1) # 8-byte Folded Spill +; FAST-NEXT: std r28, 272(r1) # 8-byte Folded Spill +; FAST-NEXT: std r29, 280(r1) # 8-byte Folded Spill +; FAST-NEXT: std r30, 288(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r30, r10 +; FAST-NEXT: mr r29, r9 +; FAST-NEXT: mr r28, r8 +; FAST-NEXT: mr r27, r7 +; FAST-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 144 +; FAST-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 160 +; FAST-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f31, f1 -; FAST-NEXT: lfs f1, 304(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: mr r3, r16 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f30, f1 -; FAST-NEXT: lfs f1, 296(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mr r3, r17 +; FAST-NEXT: xxmrghd v31, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f29, f1 -; FAST-NEXT: fmr f1, f28 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: mr r3, r18 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f28, f1 -; FAST-NEXT: fmr f1, f27 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mr r3, r19 +; FAST-NEXT: xxmrghd v30, vs0, v30 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f27, f1 -; FAST-NEXT: fmr f1, f24 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v29, r3 +; FAST-NEXT: mr r3, r20 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f24, f1 -; FAST-NEXT: fmr f1, f21 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mr r3, r21 +; FAST-NEXT: xxmrghd v29, vs0, v29 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f21, f1 -; FAST-NEXT: fmr f1, f19 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v28, r3 +; FAST-NEXT: mr r3, r22 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f19, f1 -; FAST-NEXT: fmr f1, f18 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: clrldi r3, r23, 48 +; FAST-NEXT: xxmrghd v28, vs0, v28 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f18, f1 -; FAST-NEXT: fmr f1, f17 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v27, r3 +; FAST-NEXT: clrldi r3, r24, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f17, f1 -; FAST-NEXT: fmr f1, f16 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: clrldi r3, r25, 48 +; FAST-NEXT: xxmrghd v27, vs0, v27 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f16, f1 -; FAST-NEXT: fmr f1, f20 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v26, r3 +; FAST-NEXT: clrldi r3, r26, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f20, f1 -; FAST-NEXT: fmr f1, f22 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: clrldi r3, r27, 48 +; FAST-NEXT: xxmrghd v26, vs0, v26 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f22, f1 -; FAST-NEXT: fmr f1, f23 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v25, r3 +; FAST-NEXT: clrldi r3, r28, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f23, f1 -; FAST-NEXT: fmr f1, f25 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: clrldi r3, r29, 48 +; FAST-NEXT: xxmrghd v25, vs0, v25 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f25, f1 -; FAST-NEXT: fmr f1, f26 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v24, r3 +; FAST-NEXT: clrldi r3, r30, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fctid f0, f25 -; FAST-NEXT: fctid f2, f23 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: fctid f3, f22 -; FAST-NEXT: fctid f4, f20 -; FAST-NEXT: fctid f5, f16 -; FAST-NEXT: fctid f6, f17 -; FAST-NEXT: fctid f7, f18 -; FAST-NEXT: fctid f8, f19 -; FAST-NEXT: fctid f9, f21 -; FAST-NEXT: fctid f10, f24 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: mtfprd f2, r3 -; FAST-NEXT: mffprd r3, f3 -; FAST-NEXT: mtfprd f3, r3 -; FAST-NEXT: mffprd r3, f4 -; FAST-NEXT: mtfprd f4, r3 -; FAST-NEXT: mffprd r3, f5 -; FAST-NEXT: mtfprd f5, r3 -; FAST-NEXT: mffprd r3, f6 -; FAST-NEXT: mtfprd f6, r3 -; FAST-NEXT: mffprd r3, f7 -; FAST-NEXT: mtfprd f7, r3 -; FAST-NEXT: mffprd r3, f8 -; FAST-NEXT: mtfprd f8, r3 -; FAST-NEXT: mffprd r3, f9 -; FAST-NEXT: mtfprd f9, r3 -; FAST-NEXT: mffprd r3, f10 -; FAST-NEXT: mtfprd f10, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v3, vs3, vs2 -; FAST-NEXT: xxmrghd v4, vs5, vs4 -; FAST-NEXT: xxmrghd v5, vs7, vs6 -; FAST-NEXT: xxmrghd v6, vs9, vs8 -; FAST-NEXT: xxmrghd v2, vs0, vs1 -; FAST-NEXT: fctid f0, f27 -; FAST-NEXT: fctid f1, f29 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v7, vs0, vs10 -; FAST-NEXT: fctid f0, f28 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v8, vs1, vs0 -; FAST-NEXT: fctid f0, f30 -; FAST-NEXT: fctid f1, f31 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: vmr v6, v31 +; FAST-NEXT: ld r30, 288(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r29, 280(r1) # 8-byte Folded Reload +; FAST-NEXT: vmr v7, v30 +; FAST-NEXT: vmr v8, v29 +; FAST-NEXT: ld r28, 272(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r27, 264(r1) # 8-byte Folded Reload +; FAST-NEXT: vmr v9, v28 +; FAST-NEXT: vmr v2, v27 +; FAST-NEXT: ld r26, 256(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r25, 248(r1) # 8-byte Folded Reload +; FAST-NEXT: vmr v3, v26 +; FAST-NEXT: vmr v4, v25 +; FAST-NEXT: ld r24, 240(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r23, 232(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r22, 224(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r21, 216(r1) # 8-byte Folded Reload ; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: ld r20, 208(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r19, 200(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r18, 192(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r17, 184(r1) # 8-byte Folded Reload ; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v9, vs1, vs0 -; FAST-NEXT: addi r1, r1, 160 +; FAST-NEXT: li r3, 160 +; FAST-NEXT: ld r16, 176(r1) # 8-byte Folded Reload +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 144 +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 128 +; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 112 +; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 96 +; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 80 +; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 48 +; FAST-NEXT: xxmrghd v5, vs0, v24 +; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 304 ; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload ; FAST-NEXT: mtlr r0 -; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f25, -56(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f24, -64(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f23, -72(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f22, -80(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f21, -88(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f20, -96(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f19, -104(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f18, -112(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f17, -120(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f16, -128(r1) # 8-byte Folded Reload ; FAST-NEXT: blr %a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x) ret <16 x i64> %a @@ -1670,483 +1206,295 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind { ; BE-LABEL: llrint_v32i64_v32f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 -; BE-NEXT: stdu r1, -864(r1) -; BE-NEXT: std r0, 880(r1) -; BE-NEXT: stfd f20, 768(r1) # 8-byte Folded Spill -; BE-NEXT: fmr f20, f1 -; BE-NEXT: fmr f1, f2 -; BE-NEXT: std r14, 576(r1) # 8-byte Folded Spill -; BE-NEXT: std r15, 584(r1) # 8-byte Folded Spill -; BE-NEXT: std r16, 592(r1) # 8-byte Folded Spill -; BE-NEXT: std r17, 600(r1) # 8-byte Folded Spill -; BE-NEXT: std r18, 608(r1) # 8-byte Folded Spill -; BE-NEXT: std r19, 616(r1) # 8-byte Folded Spill -; BE-NEXT: std r20, 624(r1) # 8-byte Folded Spill -; BE-NEXT: std r21, 632(r1) # 8-byte Folded Spill -; BE-NEXT: std r22, 640(r1) # 8-byte Folded Spill -; BE-NEXT: std r23, 648(r1) # 8-byte Folded Spill -; BE-NEXT: std r24, 656(r1) # 8-byte Folded Spill -; BE-NEXT: std r25, 664(r1) # 8-byte Folded Spill -; BE-NEXT: std r26, 672(r1) # 8-byte Folded Spill -; BE-NEXT: std r27, 680(r1) # 8-byte Folded Spill -; BE-NEXT: std r28, 688(r1) # 8-byte Folded Spill -; BE-NEXT: std r29, 696(r1) # 8-byte Folded Spill -; BE-NEXT: std r30, 704(r1) # 8-byte Folded Spill -; BE-NEXT: std r31, 712(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f14, 720(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f15, 728(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f16, 736(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f17, 744(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f18, 752(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f19, 760(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f21, 776(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f22, 784(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f23, 792(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f24, 800(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f25, 808(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f26, 816(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f27, 824(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f28, 832(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f29, 840(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f30, 848(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f31, 856(r1) # 8-byte Folded Spill -; BE-NEXT: fmr f31, f13 +; BE-NEXT: stdu r1, -624(r1) +; BE-NEXT: std r0, 640(r1) +; BE-NEXT: std r30, 608(r1) # 8-byte Folded Spill ; BE-NEXT: mr r30, r3 -; BE-NEXT: fmr f29, f12 -; BE-NEXT: fmr f30, f11 -; BE-NEXT: fmr f28, f10 -; BE-NEXT: fmr f27, f9 -; BE-NEXT: fmr f26, f8 -; BE-NEXT: fmr f25, f7 -; BE-NEXT: fmr f24, f6 -; BE-NEXT: fmr f23, f5 -; BE-NEXT: fmr f22, f4 -; BE-NEXT: fmr f21, f3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f20 -; BE-NEXT: std r3, 304(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f22 -; BE-NEXT: std r3, 296(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f21 -; BE-NEXT: std r3, 280(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f24 -; BE-NEXT: std r3, 264(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f23 -; BE-NEXT: std r3, 248(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f26 -; BE-NEXT: std r3, 232(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f25 +; BE-NEXT: lhz r3, 926(r1) +; BE-NEXT: std r14, 480(r1) # 8-byte Folded Spill +; BE-NEXT: lhz r14, 822(r1) +; BE-NEXT: std r15, 488(r1) # 8-byte Folded Spill +; BE-NEXT: std r19, 520(r1) # 8-byte Folded Spill ; BE-NEXT: std r3, 216(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f28 +; BE-NEXT: lhz r3, 934(r1) +; BE-NEXT: lhz r15, 814(r1) +; BE-NEXT: lhz r19, 742(r1) +; BE-NEXT: std r22, 544(r1) # 8-byte Folded Spill +; BE-NEXT: std r23, 552(r1) # 8-byte Folded Spill +; BE-NEXT: std r25, 568(r1) # 8-byte Folded Spill +; BE-NEXT: std r26, 576(r1) # 8-byte Folded Spill +; BE-NEXT: std r3, 208(r1) # 8-byte Folded Spill +; BE-NEXT: lhz r3, 910(r1) +; BE-NEXT: lhz r26, 766(r1) +; BE-NEXT: lhz r25, 774(r1) +; BE-NEXT: std r27, 584(r1) # 8-byte Folded Spill +; BE-NEXT: std r28, 592(r1) # 8-byte Folded Spill +; BE-NEXT: std r29, 600(r1) # 8-byte Folded Spill +; BE-NEXT: std r31, 616(r1) # 8-byte Folded Spill ; BE-NEXT: std r3, 200(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f27 +; BE-NEXT: lhz r3, 918(r1) +; BE-NEXT: lhz r31, 798(r1) +; BE-NEXT: lhz r29, 806(r1) +; BE-NEXT: lhz r28, 782(r1) +; BE-NEXT: lhz r27, 790(r1) +; BE-NEXT: lhz r23, 750(r1) +; BE-NEXT: lhz r22, 758(r1) +; BE-NEXT: std r16, 496(r1) # 8-byte Folded Spill +; BE-NEXT: std r17, 504(r1) # 8-byte Folded Spill +; BE-NEXT: std r3, 192(r1) # 8-byte Folded Spill +; BE-NEXT: lhz r3, 894(r1) +; BE-NEXT: mr r17, r7 +; BE-NEXT: mr r16, r4 +; BE-NEXT: std r18, 512(r1) # 8-byte Folded Spill +; BE-NEXT: std r20, 528(r1) # 8-byte Folded Spill +; BE-NEXT: std r21, 536(r1) # 8-byte Folded Spill +; BE-NEXT: std r24, 560(r1) # 8-byte Folded Spill ; BE-NEXT: std r3, 184(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f29 +; BE-NEXT: lhz r3, 902(r1) +; BE-NEXT: mr r24, r10 +; BE-NEXT: mr r20, r9 +; BE-NEXT: mr r21, r8 +; BE-NEXT: mr r18, r6 +; BE-NEXT: std r3, 176(r1) # 8-byte Folded Spill +; BE-NEXT: lhz r3, 878(r1) ; BE-NEXT: std r3, 168(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f30 +; BE-NEXT: lhz r3, 886(r1) +; BE-NEXT: std r3, 160(r1) # 8-byte Folded Spill +; BE-NEXT: lhz r3, 862(r1) ; BE-NEXT: std r3, 152(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1028(r1) +; BE-NEXT: lhz r3, 870(r1) +; BE-NEXT: std r3, 144(r1) # 8-byte Folded Spill +; BE-NEXT: lhz r3, 846(r1) ; BE-NEXT: std r3, 136(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f31 +; BE-NEXT: lhz r3, 854(r1) +; BE-NEXT: std r3, 128(r1) # 8-byte Folded Spill +; BE-NEXT: lhz r3, 830(r1) ; BE-NEXT: std r3, 120(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1044(r1) +; BE-NEXT: lhz r3, 838(r1) ; BE-NEXT: std r3, 112(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1036(r1) -; BE-NEXT: mr r15, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1060(r1) -; BE-NEXT: mr r14, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1052(r1) -; BE-NEXT: mr r31, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1076(r1) -; BE-NEXT: mr r29, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1068(r1) -; BE-NEXT: mr r28, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1092(r1) -; BE-NEXT: mr r27, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1084(r1) -; BE-NEXT: mr r26, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1108(r1) -; BE-NEXT: mr r25, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1100(r1) -; BE-NEXT: mr r24, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1124(r1) -; BE-NEXT: mr r23, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1116(r1) -; BE-NEXT: mr r22, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1140(r1) -; BE-NEXT: mr r21, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1132(r1) -; BE-NEXT: mr r20, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1156(r1) -; BE-NEXT: mr r19, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1148(r1) -; BE-NEXT: mr r18, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1172(r1) -; BE-NEXT: mr r17, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1164(r1) -; BE-NEXT: mr r16, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: clrldi r3, r5, 48 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 424(r1) ; BE-NEXT: clrldi r3, r16, 48 -; BE-NEXT: stfs f1, 316(r1) # 4-byte Folded Spill ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 416(r1) ; BE-NEXT: clrldi r3, r17, 48 -; BE-NEXT: stfs f1, 312(r1) # 4-byte Folded Spill ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 440(r1) ; BE-NEXT: clrldi r3, r18, 48 -; BE-NEXT: stfs f1, 292(r1) # 4-byte Folded Spill ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r19, 48 -; BE-NEXT: stfs f1, 276(r1) # 4-byte Folded Spill -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl llrintf ; BE-NEXT: nop +; BE-NEXT: std r3, 432(r1) ; BE-NEXT: clrldi r3, r20, 48 -; BE-NEXT: stfs f1, 260(r1) # 4-byte Folded Spill -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r21, 48 -; BE-NEXT: stfs f1, 244(r1) # 4-byte Folded Spill -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r22, 48 -; BE-NEXT: stfs f1, 228(r1) # 4-byte Folded Spill ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r23, 48 -; BE-NEXT: stfs f1, 212(r1) # 4-byte Folded Spill -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r24, 48 -; BE-NEXT: stfs f1, 196(r1) # 4-byte Folded Spill +; BE-NEXT: std r3, 456(r1) +; BE-NEXT: clrldi r3, r21, 48 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r25, 48 -; BE-NEXT: stfs f1, 180(r1) # 4-byte Folded Spill -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r26, 48 -; BE-NEXT: stfs f1, 164(r1) # 4-byte Folded Spill +; BE-NEXT: std r3, 448(r1) +; BE-NEXT: mr r3, r19 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r27, 48 -; BE-NEXT: stfs f1, 148(r1) # 4-byte Folded Spill -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r28, 48 -; BE-NEXT: stfs f1, 132(r1) # 4-byte Folded Spill +; BE-NEXT: std r3, 472(r1) +; BE-NEXT: clrldi r3, r24, 48 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r29, 48 -; BE-NEXT: fmr f18, f1 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r31, 48 -; BE-NEXT: fmr f17, f1 +; BE-NEXT: std r3, 464(r1) +; BE-NEXT: mr r3, r22 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r14, 48 -; BE-NEXT: fmr f16, f1 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r15, 48 -; BE-NEXT: fmr f15, f1 +; BE-NEXT: std r3, 232(r1) +; BE-NEXT: mr r3, r23 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: ld r3, 112(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f14, f1 -; BE-NEXT: clrldi r3, r3, 48 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: ld r3, 120(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f31, f1 -; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: std r3, 224(r1) +; BE-NEXT: mr r3, r25 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: ld r3, 136(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f30, f1 -; BE-NEXT: clrldi r3, r3, 48 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: ld r3, 152(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f29, f1 -; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: std r3, 248(r1) +; BE-NEXT: mr r3, r26 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: ld r3, 168(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f28, f1 -; BE-NEXT: clrldi r3, r3, 48 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: ld r3, 184(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f27, f1 -; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: std r3, 240(r1) +; BE-NEXT: mr r3, r27 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: ld r3, 200(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f26, f1 -; BE-NEXT: clrldi r3, r3, 48 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: ld r3, 216(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f25, f1 -; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: std r3, 264(r1) +; BE-NEXT: mr r3, r28 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: ld r3, 232(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f24, f1 -; BE-NEXT: clrldi r3, r3, 48 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: ld r3, 248(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f23, f1 -; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: std r3, 256(r1) +; BE-NEXT: mr r3, r29 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: ld r3, 264(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f22, f1 -; BE-NEXT: clrldi r3, r3, 48 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: ld r3, 280(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f21, f1 -; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: std r3, 280(r1) +; BE-NEXT: mr r3, r31 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: ld r3, 296(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f20, f1 -; BE-NEXT: clrldi r3, r3, 48 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: ld r3, 304(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f19, f1 -; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: std r3, 272(r1) +; BE-NEXT: mr r3, r14 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f19 -; BE-NEXT: std r3, 328(r1) -; BE-NEXT: bl llrintf -; BE-NEXT: nop -; BE-NEXT: fmr f1, f20 -; BE-NEXT: std r3, 320(r1) -; BE-NEXT: bl llrintf +; BE-NEXT: std r3, 296(r1) +; BE-NEXT: mr r3, r15 +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: fmr f1, f21 -; BE-NEXT: std r3, 344(r1) ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f22 -; BE-NEXT: std r3, 336(r1) -; BE-NEXT: bl llrintf +; BE-NEXT: std r3, 288(r1) +; BE-NEXT: ld r3, 112(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: fmr f1, f23 -; BE-NEXT: std r3, 360(r1) ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f24 -; BE-NEXT: std r3, 352(r1) -; BE-NEXT: bl llrintf +; BE-NEXT: std r3, 312(r1) +; BE-NEXT: ld r3, 120(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: fmr f1, f25 -; BE-NEXT: std r3, 376(r1) ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f26 -; BE-NEXT: std r3, 368(r1) -; BE-NEXT: bl llrintf +; BE-NEXT: std r3, 304(r1) +; BE-NEXT: ld r3, 128(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: fmr f1, f27 -; BE-NEXT: std r3, 392(r1) ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f28 -; BE-NEXT: std r3, 384(r1) -; BE-NEXT: bl llrintf +; BE-NEXT: std r3, 328(r1) +; BE-NEXT: ld r3, 136(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: fmr f1, f29 -; BE-NEXT: std r3, 408(r1) ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f30 -; BE-NEXT: std r3, 400(r1) -; BE-NEXT: bl llrintf +; BE-NEXT: std r3, 320(r1) +; BE-NEXT: ld r3, 144(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: fmr f1, f31 -; BE-NEXT: std r3, 424(r1) ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f14 -; BE-NEXT: std r3, 416(r1) -; BE-NEXT: bl llrintf +; BE-NEXT: std r3, 344(r1) +; BE-NEXT: ld r3, 152(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: fmr f1, f15 -; BE-NEXT: std r3, 440(r1) ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f16 -; BE-NEXT: std r3, 432(r1) -; BE-NEXT: bl llrintf +; BE-NEXT: std r3, 336(r1) +; BE-NEXT: ld r3, 160(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: fmr f1, f17 -; BE-NEXT: std r3, 456(r1) ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f18 -; BE-NEXT: std r3, 448(r1) -; BE-NEXT: bl llrintf +; BE-NEXT: std r3, 360(r1) +; BE-NEXT: ld r3, 168(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: lfs f1, 132(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 472(r1) ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: lfs f1, 148(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 464(r1) -; BE-NEXT: bl llrintf +; BE-NEXT: std r3, 352(r1) +; BE-NEXT: ld r3, 176(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: lfs f1, 164(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 488(r1) ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: lfs f1, 180(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 480(r1) -; BE-NEXT: bl llrintf +; BE-NEXT: std r3, 376(r1) +; BE-NEXT: ld r3, 184(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: lfs f1, 196(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 504(r1) ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: lfs f1, 212(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 496(r1) -; BE-NEXT: bl llrintf +; BE-NEXT: std r3, 368(r1) +; BE-NEXT: ld r3, 192(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: lfs f1, 228(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 520(r1) ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: lfs f1, 244(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 512(r1) -; BE-NEXT: bl llrintf +; BE-NEXT: std r3, 392(r1) +; BE-NEXT: ld r3, 200(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: lfs f1, 260(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 536(r1) ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: lfs f1, 276(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 528(r1) -; BE-NEXT: bl llrintf +; BE-NEXT: std r3, 384(r1) +; BE-NEXT: ld r3, 208(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: lfs f1, 292(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 552(r1) ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: lfs f1, 312(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 544(r1) -; BE-NEXT: bl llrintf +; BE-NEXT: std r3, 408(r1) +; BE-NEXT: ld r3, 216(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: lfs f1, 316(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 568(r1) ; BE-NEXT: bl llrintf ; BE-NEXT: nop -; BE-NEXT: std r3, 560(r1) -; BE-NEXT: addi r3, r1, 320 +; BE-NEXT: std r3, 400(r1) +; BE-NEXT: addi r3, r1, 416 ; BE-NEXT: lxvd2x vs0, 0, r3 -; BE-NEXT: addi r3, r1, 336 +; BE-NEXT: addi r3, r1, 432 ; BE-NEXT: lxvd2x vs1, 0, r3 -; BE-NEXT: addi r3, r1, 352 +; BE-NEXT: addi r3, r1, 448 ; BE-NEXT: lxvd2x vs2, 0, r3 -; BE-NEXT: addi r3, r1, 368 +; BE-NEXT: addi r3, r1, 464 ; BE-NEXT: lxvd2x vs3, 0, r3 -; BE-NEXT: addi r3, r1, 384 +; BE-NEXT: addi r3, r1, 224 ; BE-NEXT: lxvd2x vs4, 0, r3 -; BE-NEXT: addi r3, r1, 400 +; BE-NEXT: addi r3, r1, 240 ; BE-NEXT: lxvd2x vs5, 0, r3 -; BE-NEXT: addi r3, r1, 416 +; BE-NEXT: addi r3, r1, 256 ; BE-NEXT: lxvd2x vs6, 0, r3 -; BE-NEXT: addi r3, r1, 432 +; BE-NEXT: addi r3, r1, 272 ; BE-NEXT: lxvd2x vs7, 0, r3 -; BE-NEXT: addi r3, r1, 448 +; BE-NEXT: addi r3, r1, 288 ; BE-NEXT: lxvd2x vs8, 0, r3 -; BE-NEXT: addi r3, r1, 464 +; BE-NEXT: addi r3, r1, 304 ; BE-NEXT: lxvd2x vs9, 0, r3 -; BE-NEXT: addi r3, r1, 480 +; BE-NEXT: addi r3, r1, 320 ; BE-NEXT: lxvd2x vs10, 0, r3 -; BE-NEXT: addi r3, r1, 496 +; BE-NEXT: addi r3, r1, 336 ; BE-NEXT: lxvd2x vs11, 0, r3 -; BE-NEXT: addi r3, r1, 512 +; BE-NEXT: addi r3, r1, 352 ; BE-NEXT: lxvd2x vs12, 0, r3 -; BE-NEXT: addi r3, r1, 528 +; BE-NEXT: addi r3, r1, 368 ; BE-NEXT: lxvd2x vs13, 0, r3 -; BE-NEXT: addi r3, r1, 544 +; BE-NEXT: addi r3, r1, 384 ; BE-NEXT: lxvd2x v2, 0, r3 -; BE-NEXT: addi r3, r1, 560 +; BE-NEXT: addi r3, r1, 400 ; BE-NEXT: lxvd2x v3, 0, r3 ; BE-NEXT: li r3, 240 ; BE-NEXT: stxvd2x v3, r30, r3 @@ -2179,43 +1527,25 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind { ; BE-NEXT: li r3, 16 ; BE-NEXT: stxvd2x vs1, r30, r3 ; BE-NEXT: stxvd2x vs0, 0, r30 -; BE-NEXT: lfd f31, 856(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f30, 848(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f29, 840(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f28, 832(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f27, 824(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f26, 816(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f25, 808(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f24, 800(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f23, 792(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f22, 784(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f21, 776(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f20, 768(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f19, 760(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f18, 752(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f17, 744(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f16, 736(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f15, 728(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f14, 720(r1) # 8-byte Folded Reload -; BE-NEXT: ld r31, 712(r1) # 8-byte Folded Reload -; BE-NEXT: ld r30, 704(r1) # 8-byte Folded Reload -; BE-NEXT: ld r29, 696(r1) # 8-byte Folded Reload -; BE-NEXT: ld r28, 688(r1) # 8-byte Folded Reload -; BE-NEXT: ld r27, 680(r1) # 8-byte Folded Reload -; BE-NEXT: ld r26, 672(r1) # 8-byte Folded Reload -; BE-NEXT: ld r25, 664(r1) # 8-byte Folded Reload -; BE-NEXT: ld r24, 656(r1) # 8-byte Folded Reload -; BE-NEXT: ld r23, 648(r1) # 8-byte Folded Reload -; BE-NEXT: ld r22, 640(r1) # 8-byte Folded Reload -; BE-NEXT: ld r21, 632(r1) # 8-byte Folded Reload -; BE-NEXT: ld r20, 624(r1) # 8-byte Folded Reload -; BE-NEXT: ld r19, 616(r1) # 8-byte Folded Reload -; BE-NEXT: ld r18, 608(r1) # 8-byte Folded Reload -; BE-NEXT: ld r17, 600(r1) # 8-byte Folded Reload -; BE-NEXT: ld r16, 592(r1) # 8-byte Folded Reload -; BE-NEXT: ld r15, 584(r1) # 8-byte Folded Reload -; BE-NEXT: ld r14, 576(r1) # 8-byte Folded Reload -; BE-NEXT: addi r1, r1, 864 +; BE-NEXT: ld r31, 616(r1) # 8-byte Folded Reload +; BE-NEXT: ld r30, 608(r1) # 8-byte Folded Reload +; BE-NEXT: ld r29, 600(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 592(r1) # 8-byte Folded Reload +; BE-NEXT: ld r27, 584(r1) # 8-byte Folded Reload +; BE-NEXT: ld r26, 576(r1) # 8-byte Folded Reload +; BE-NEXT: ld r25, 568(r1) # 8-byte Folded Reload +; BE-NEXT: ld r24, 560(r1) # 8-byte Folded Reload +; BE-NEXT: ld r23, 552(r1) # 8-byte Folded Reload +; BE-NEXT: ld r22, 544(r1) # 8-byte Folded Reload +; BE-NEXT: ld r21, 536(r1) # 8-byte Folded Reload +; BE-NEXT: ld r20, 528(r1) # 8-byte Folded Reload +; BE-NEXT: ld r19, 520(r1) # 8-byte Folded Reload +; BE-NEXT: ld r18, 512(r1) # 8-byte Folded Reload +; BE-NEXT: ld r17, 504(r1) # 8-byte Folded Reload +; BE-NEXT: ld r16, 496(r1) # 8-byte Folded Reload +; BE-NEXT: ld r15, 488(r1) # 8-byte Folded Reload +; BE-NEXT: ld r14, 480(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 624 ; BE-NEXT: ld r0, 16(r1) ; BE-NEXT: mtlr r0 ; BE-NEXT: blr @@ -2223,508 +1553,334 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind { ; CHECK-LABEL: llrint_v32i64_v32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: stdu r1, -688(r1) -; CHECK-NEXT: li r4, 208 -; CHECK-NEXT: std r0, 704(r1) -; CHECK-NEXT: std r14, 400(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r15, 408(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r16, 416(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r17, 424(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r18, 432(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r19, 440(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 224 -; CHECK-NEXT: std r20, 448(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r21, 456(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r22, 464(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r23, 472(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r24, 480(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r25, 488(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 240 -; CHECK-NEXT: std r26, 496(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r27, 504(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r28, 512(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r29, 520(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, 528(r1) # 8-byte Folded Spill +; CHECK-NEXT: stdu r1, -576(r1) +; CHECK-NEXT: std r0, 592(r1) +; CHECK-NEXT: std r30, 560(r1) # 8-byte Folded Spill ; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 256 -; CHECK-NEXT: std r31, 536(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f14, 544(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f15, 552(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f16, 560(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f17, 568(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f18, 576(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 272 -; CHECK-NEXT: stfd f19, 584(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f20, 592(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f20, f2 -; CHECK-NEXT: stfd f21, 600(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f21, f3 -; CHECK-NEXT: stfd f22, 608(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f22, f4 -; CHECK-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 288 -; CHECK-NEXT: stfd f23, 616(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f23, f5 -; CHECK-NEXT: stfd f24, 624(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f24, f6 -; CHECK-NEXT: stfd f25, 632(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f25, f7 -; CHECK-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 304 -; CHECK-NEXT: stfd f26, 640(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f26, f8 -; CHECK-NEXT: stfd f27, 648(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f27, f9 -; CHECK-NEXT: stfd f28, 656(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f28, f10 -; CHECK-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 320 -; CHECK-NEXT: stfd f29, 664(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f29, f11 -; CHECK-NEXT: stfd f30, 672(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f30, f12 -; CHECK-NEXT: stfd f31, 680(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f31, f13 -; CHECK-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 336 -; CHECK-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 352 -; CHECK-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 368 -; CHECK-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 384 -; CHECK-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f20 +; CHECK-NEXT: lhz r3, 864(r1) +; CHECK-NEXT: li r11, 240 +; CHECK-NEXT: std r14, 432(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r19, 472(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r14, 744(r1) +; CHECK-NEXT: stxvd2x v20, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 256 +; CHECK-NEXT: std r22, 496(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r22, 680(r1) +; CHECK-NEXT: std r3, 216(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r3, 856(r1) +; CHECK-NEXT: lhz r19, 672(r1) +; CHECK-NEXT: stxvd2x v21, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 272 +; CHECK-NEXT: std r23, 504(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r23, 688(r1) +; CHECK-NEXT: stxvd2x v22, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: std r3, 184(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r3, 848(r1) +; CHECK-NEXT: li r11, 288 +; CHECK-NEXT: std r25, 520(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r25, 696(r1) +; CHECK-NEXT: stxvd2x v23, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 304 +; CHECK-NEXT: std r26, 528(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r27, 536(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, 544(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r28, 720(r1) +; CHECK-NEXT: lhz r27, 712(r1) +; CHECK-NEXT: lhz r26, 704(r1) +; CHECK-NEXT: stxvd2x v24, r1, r11 # 16-byte Folded Spill ; CHECK-NEXT: std r3, 176(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f21 -; CHECK-NEXT: std r3, 160(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f22 +; CHECK-NEXT: lhz r3, 840(r1) +; CHECK-NEXT: li r11, 320 +; CHECK-NEXT: std r29, 552(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r29, 728(r1) +; CHECK-NEXT: stxvd2x v25, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: std r3, 152(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r3, 832(r1) +; CHECK-NEXT: li r11, 336 +; CHECK-NEXT: std r31, 568(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r31, 736(r1) +; CHECK-NEXT: stxvd2x v26, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 352 +; CHECK-NEXT: std r15, 440(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r16, 448(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r17, 456(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r17, r6 +; CHECK-NEXT: mr r16, r5 +; CHECK-NEXT: stxvd2x v27, r1, r11 # 16-byte Folded Spill ; CHECK-NEXT: std r3, 144(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f23 -; CHECK-NEXT: std r3, 128(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f24 +; CHECK-NEXT: lhz r3, 824(r1) +; CHECK-NEXT: li r11, 368 +; CHECK-NEXT: std r18, 464(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r20, 480(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r20, r8 +; CHECK-NEXT: mr r18, r7 +; CHECK-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill ; CHECK-NEXT: std r3, 120(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f25 +; CHECK-NEXT: lhz r3, 816(r1) +; CHECK-NEXT: li r11, 384 +; CHECK-NEXT: std r21, 488(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r24, 512(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r24, r10 +; CHECK-NEXT: mr r21, r9 +; CHECK-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 400 ; CHECK-NEXT: std r3, 112(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f26 +; CHECK-NEXT: lhz r3, 808(r1) +; CHECK-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 416 ; CHECK-NEXT: std r3, 104(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f27 +; CHECK-NEXT: lhz r3, 800(r1) +; CHECK-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill ; CHECK-NEXT: std r3, 96(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f28 +; CHECK-NEXT: lhz r3, 792(r1) ; CHECK-NEXT: std r3, 88(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f29 +; CHECK-NEXT: lhz r3, 784(r1) ; CHECK-NEXT: std r3, 80(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: lhz r3, 776(r1) ; CHECK-NEXT: std r3, 72(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: lhz r3, 768(r1) ; CHECK-NEXT: std r3, 64(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 832(r1) +; CHECK-NEXT: lhz r3, 760(r1) ; CHECK-NEXT: std r3, 56(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 840(r1) +; CHECK-NEXT: lhz r3, 752(r1) ; CHECK-NEXT: std r3, 48(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 848(r1) -; CHECK-NEXT: mr r15, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 856(r1) -; CHECK-NEXT: mr r14, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 864(r1) -; CHECK-NEXT: mr r31, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 872(r1) -; CHECK-NEXT: mr r29, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 880(r1) -; CHECK-NEXT: mr r28, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 888(r1) -; CHECK-NEXT: mr r27, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 896(r1) -; CHECK-NEXT: mr r26, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 904(r1) -; CHECK-NEXT: mr r25, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 912(r1) -; CHECK-NEXT: mr r24, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 920(r1) -; CHECK-NEXT: mr r23, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 928(r1) -; CHECK-NEXT: mr r22, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 936(r1) -; CHECK-NEXT: mr r21, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 944(r1) -; CHECK-NEXT: mr r20, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 952(r1) -; CHECK-NEXT: mr r19, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 960(r1) -; CHECK-NEXT: mr r18, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 968(r1) -; CHECK-NEXT: mr r17, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 976(r1) -; CHECK-NEXT: mr r16, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: clrldi r3, r4, 48 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: li r3, 204 -; CHECK-NEXT: stxsspx f1, r1, r3 # 4-byte Folded Spill +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mr r15, r3 ; CHECK-NEXT: clrldi r3, r16, 48 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: li r3, 200 -; CHECK-NEXT: stxsspx f1, r1, r3 # 4-byte Folded Spill +; CHECK-NEXT: mtvsrd v31, r15 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 224 +; CHECK-NEXT: xxmrghd vs0, vs0, v31 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: clrldi r3, r17, 48 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mr r17, r3 ; CHECK-NEXT: clrldi r3, r18, 48 -; CHECK-NEXT: fmr f29, f1 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r19, 48 -; CHECK-NEXT: fmr f28, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: mtvsrd v31, r17 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 192 +; CHECK-NEXT: xxmrghd vs0, vs0, v31 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: clrldi r3, r20, 48 -; CHECK-NEXT: fmr f27, f1 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r21, 48 -; CHECK-NEXT: fmr f26, f1 -; CHECK-NEXT: bl __extendhfsf2 -; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r22, 48 -; CHECK-NEXT: fmr f25, f1 -; CHECK-NEXT: bl __extendhfsf2 -; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r23, 48 -; CHECK-NEXT: fmr f24, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r24, 48 -; CHECK-NEXT: fmr f23, f1 +; CHECK-NEXT: mr r20, r3 +; CHECK-NEXT: clrldi r3, r21, 48 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r25, 48 -; CHECK-NEXT: fmr f22, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: mtvsrd v31, r20 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r26, 48 -; CHECK-NEXT: fmr f21, f1 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: xxmrghd vs0, vs0, v31 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: mr r3, r19 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r27, 48 -; CHECK-NEXT: fmr f20, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r28, 48 -; CHECK-NEXT: fmr f19, f1 +; CHECK-NEXT: mr r21, r3 +; CHECK-NEXT: clrldi r3, r24, 48 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r29, 48 -; CHECK-NEXT: fmr f18, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: mtvsrd v31, r21 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r31, 48 -; CHECK-NEXT: fmr f17, f1 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: xxmrghd vs0, v31, vs0 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: mr r3, r22 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r14, 48 -; CHECK-NEXT: fmr f16, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r15, 48 -; CHECK-NEXT: fmr f15, f1 +; CHECK-NEXT: mr r24, r3 +; CHECK-NEXT: mr r3, r23 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 48(r1) # 8-byte Folded Reload -; CHECK-NEXT: fmr f14, f1 -; CHECK-NEXT: clrldi r3, r3, 48 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: mtvsrd v31, r24 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 56(r1) # 8-byte Folded Reload -; CHECK-NEXT: fmr f30, f1 -; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: mr r3, r25 +; CHECK-NEXT: xxmrghd v27, vs0, v31 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 64(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v30, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 72(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v29, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: mr r25, r3 +; CHECK-NEXT: mr r3, r26 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 80(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v28, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: mtvsrd v31, r25 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 88(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v27, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: mr r3, r27 +; CHECK-NEXT: xxmrghd v26, vs0, v31 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 96(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v26, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 104(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v25, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: mr r27, r3 +; CHECK-NEXT: mr r3, r28 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 112(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v24, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: mtvsrd v31, r27 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 120(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v23, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: mr r3, r29 +; CHECK-NEXT: xxmrghd v25, vs0, v31 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 128(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v22, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 144(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v21, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: mr r3, r31 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 160(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v20, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: mtvsrd v31, r29 +; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 176(r1) # 8-byte Folded Reload -; CHECK-NEXT: fmr f31, f1 -; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: mr r3, r14 +; CHECK-NEXT: xxmrghd v24, vs0, v31 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f31 -; CHECK-NEXT: mtvsrd v31, r3 -; CHECK-NEXT: bl llrintf -; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 176 -; CHECK-NEXT: xxlor f1, v20, v20 -; CHECK-NEXT: xxmrghd vs0, vs0, v31 -; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: bl llrintf +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: ld r3, 48(r1) # 8-byte Folded Reload +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: xxlor f1, v21, v21 -; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: mtvsrd v31, r29 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop ; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 160 -; CHECK-NEXT: xxlor f1, v22, v22 -; CHECK-NEXT: xxmrghd vs0, vs0, v31 -; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: bl llrintf +; CHECK-NEXT: ld r3, 56(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v23, vs0, v31 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: xxlor f1, v23, v23 -; CHECK-NEXT: mtvsrd v31, r3 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 144 -; CHECK-NEXT: xxlor f1, v24, v24 -; CHECK-NEXT: xxmrghd vs0, vs0, v31 -; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: bl llrintf +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: ld r3, 64(r1) # 8-byte Folded Reload +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: xxlor f1, v25, v25 -; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: mtvsrd v31, r29 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop ; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 128 -; CHECK-NEXT: xxlor f1, v26, v26 -; CHECK-NEXT: xxmrghd vs0, vs0, v31 -; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: bl llrintf +; CHECK-NEXT: ld r3, 72(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v22, vs0, v31 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: xxlor f1, v27, v27 -; CHECK-NEXT: mtvsrd v31, r3 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxlor f1, v28, v28 -; CHECK-NEXT: xxmrghd v27, vs0, v31 -; CHECK-NEXT: bl llrintf +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: ld r3, 80(r1) # 8-byte Folded Reload +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: xxlor f1, v29, v29 -; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: mtvsrd v31, r29 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop ; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxlor f1, v30, v30 -; CHECK-NEXT: xxmrghd v29, vs0, v31 -; CHECK-NEXT: bl llrintf +; CHECK-NEXT: ld r3, 88(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v21, vs0, v31 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f30 -; CHECK-NEXT: mtvsrd v31, r3 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f14 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v31, vs0, v31 -; CHECK-NEXT: bl llrintf +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: ld r3, 96(r1) # 8-byte Folded Reload +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f15 -; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: mtvsrd v31, r29 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f16 ; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v30, vs0, v30 -; CHECK-NEXT: bl llrintf +; CHECK-NEXT: ld r3, 104(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v20, vs0, v31 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f17 -; CHECK-NEXT: mtvsrd v28, r3 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f18 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v28, vs0, v28 -; CHECK-NEXT: bl llrintf +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: ld r3, 112(r1) # 8-byte Folded Reload +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f19 -; CHECK-NEXT: mtvsrd v26, r3 +; CHECK-NEXT: mtvsrd v31, r29 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f20 ; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v26, vs0, v26 -; CHECK-NEXT: bl llrintf +; CHECK-NEXT: ld r3, 120(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v31, vs0, v31 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f21 -; CHECK-NEXT: mtvsrd v24, r3 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f22 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v24, vs0, v24 -; CHECK-NEXT: bl llrintf +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: ld r3, 144(r1) # 8-byte Folded Reload +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f23 -; CHECK-NEXT: mtvsrd v22, r3 +; CHECK-NEXT: mtvsrd v30, r29 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f24 ; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v22, vs0, v22 -; CHECK-NEXT: bl llrintf +; CHECK-NEXT: ld r3, 152(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v30, vs0, v30 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f25 -; CHECK-NEXT: mtvsrd v20, r3 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f26 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v20, vs0, v20 -; CHECK-NEXT: bl llrintf +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: ld r3, 176(r1) # 8-byte Folded Reload +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f27 -; CHECK-NEXT: mtvsrd v21, r3 +; CHECK-NEXT: mtvsrd v29, r29 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f28 ; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v21, vs0, v21 -; CHECK-NEXT: bl llrintf +; CHECK-NEXT: ld r3, 184(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v29, vs0, v29 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f29 -; CHECK-NEXT: mtvsrd v23, r3 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 200 -; CHECK-NEXT: lxsspx f1, r1, r3 # 4-byte Folded Reload -; CHECK-NEXT: xxmrghd v23, vs0, v23 -; CHECK-NEXT: bl llrintf +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: ld r3, 216(r1) # 8-byte Folded Reload +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: mtvsrd v25, r3 -; CHECK-NEXT: li r3, 204 -; CHECK-NEXT: lxsspx f1, r1, r3 # 4-byte Folded Reload +; CHECK-NEXT: mtvsrd v28, r29 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 240 -; CHECK-NEXT: xxswapd vs1, v23 +; CHECK-NEXT: xxswapd vs1, v29 ; CHECK-NEXT: li r4, 128 -; CHECK-NEXT: xxswapd vs2, v21 -; CHECK-NEXT: xxswapd vs3, v31 -; CHECK-NEXT: xxmrghd v2, vs0, v25 +; CHECK-NEXT: xxswapd vs2, v30 +; CHECK-NEXT: xxswapd vs3, v25 +; CHECK-NEXT: xxmrghd v2, vs0, v28 ; CHECK-NEXT: xxswapd vs0, v2 ; CHECK-NEXT: stxvd2x vs0, r30, r3 ; CHECK-NEXT: li r3, 224 @@ -2732,35 +1888,35 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind { ; CHECK-NEXT: li r3, 208 ; CHECK-NEXT: stxvd2x vs2, r30, r3 ; CHECK-NEXT: li r3, 192 -; CHECK-NEXT: xxswapd vs0, v20 +; CHECK-NEXT: xxswapd vs0, v31 ; CHECK-NEXT: stxvd2x vs0, r30, r3 ; CHECK-NEXT: li r3, 176 -; CHECK-NEXT: xxswapd vs1, v22 +; CHECK-NEXT: xxswapd vs1, v20 ; CHECK-NEXT: stxvd2x vs1, r30, r3 ; CHECK-NEXT: li r3, 160 -; CHECK-NEXT: xxswapd vs2, v28 -; CHECK-NEXT: xxswapd vs0, v24 +; CHECK-NEXT: xxswapd vs2, v23 +; CHECK-NEXT: xxswapd vs0, v21 ; CHECK-NEXT: stxvd2x vs0, r30, r3 ; CHECK-NEXT: li r3, 144 -; CHECK-NEXT: xxswapd vs1, v26 +; CHECK-NEXT: xxswapd vs1, v22 ; CHECK-NEXT: stxvd2x vs1, r30, r3 ; CHECK-NEXT: li r3, 128 ; CHECK-NEXT: stxvd2x vs2, r30, r3 ; CHECK-NEXT: li r3, 112 -; CHECK-NEXT: xxswapd vs0, v30 +; CHECK-NEXT: xxswapd vs0, v24 ; CHECK-NEXT: stxvd2x vs0, r30, r3 ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: stxvd2x vs3, r30, r3 ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: lxvd2x vs2, r1, r4 # 16-byte Folded Reload -; CHECK-NEXT: li r4, 144 -; CHECK-NEXT: xxswapd vs1, v29 +; CHECK-NEXT: li r4, 160 +; CHECK-NEXT: xxswapd vs1, v26 ; CHECK-NEXT: stxvd2x vs1, r30, r3 ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: lxvd2x vs1, r1, r4 # 16-byte Folded Reload -; CHECK-NEXT: li r4, 160 +; CHECK-NEXT: li r4, 192 ; CHECK-NEXT: lxvd2x vs3, r1, r4 # 16-byte Folded Reload -; CHECK-NEXT: li r4, 176 +; CHECK-NEXT: li r4, 224 ; CHECK-NEXT: lxvd2x vs4, r1, r4 # 16-byte Folded Reload ; CHECK-NEXT: xxswapd vs0, v27 ; CHECK-NEXT: stxvd2x vs0, r30, r3 @@ -2773,69 +1929,51 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind { ; CHECK-NEXT: li r3, 16 ; CHECK-NEXT: xxswapd vs3, vs3 ; CHECK-NEXT: stxvd2x vs3, r30, r3 -; CHECK-NEXT: li r3, 384 +; CHECK-NEXT: li r3, 416 ; CHECK-NEXT: xxswapd vs4, vs4 ; CHECK-NEXT: stxvd2x vs4, 0, r30 ; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 368 -; CHECK-NEXT: lfd f31, 680(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f30, 672(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f29, 664(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f28, 656(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f27, 648(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f26, 640(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f25, 632(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f24, 624(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f23, 616(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f22, 608(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f21, 600(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f20, 592(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f19, 584(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f18, 576(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f17, 568(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f16, 560(r1) # 8-byte Folded Reload +; CHECK-NEXT: li r3, 400 +; CHECK-NEXT: ld r31, 568(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, 560(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 552(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, 544(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r27, 536(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r26, 528(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r25, 520(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r24, 512(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r23, 504(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r22, 496(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r21, 488(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r20, 480(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r19, 472(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r18, 464(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r17, 456(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r16, 448(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 352 -; CHECK-NEXT: lfd f15, 552(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f14, 544(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r31, 536(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r30, 528(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, 520(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r28, 512(r1) # 8-byte Folded Reload +; CHECK-NEXT: li r3, 384 +; CHECK-NEXT: ld r15, 440(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r14, 432(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 336 -; CHECK-NEXT: ld r27, 504(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r26, 496(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r25, 488(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r24, 480(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r23, 472(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r22, 464(r1) # 8-byte Folded Reload +; CHECK-NEXT: li r3, 368 ; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 320 -; CHECK-NEXT: ld r21, 456(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r20, 448(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r19, 440(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r18, 432(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r17, 424(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r16, 416(r1) # 8-byte Folded Reload +; CHECK-NEXT: li r3, 352 ; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 304 -; CHECK-NEXT: ld r15, 408(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r14, 400(r1) # 8-byte Folded Reload +; CHECK-NEXT: li r3, 336 ; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 288 +; CHECK-NEXT: li r3, 320 ; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 272 +; CHECK-NEXT: li r3, 304 ; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 256 +; CHECK-NEXT: li r3, 288 ; CHECK-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 240 +; CHECK-NEXT: li r3, 272 ; CHECK-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 224 +; CHECK-NEXT: li r3, 256 ; CHECK-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 208 +; CHECK-NEXT: li r3, 240 ; CHECK-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: addi r1, r1, 688 +; CHECK-NEXT: addi r1, r1, 576 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr @@ -2843,516 +1981,410 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind { ; FAST-LABEL: llrint_v32i64_v32f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: stdu r1, -480(r1) -; FAST-NEXT: li r4, 128 -; FAST-NEXT: std r0, 496(r1) -; FAST-NEXT: std r30, 320(r1) # 8-byte Folded Spill +; FAST-NEXT: stdu r1, -560(r1) +; FAST-NEXT: std r0, 576(r1) +; FAST-NEXT: std r30, 544(r1) # 8-byte Folded Spill ; FAST-NEXT: mr r30, r3 -; FAST-NEXT: stfd f14, 336(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f15, 344(r1) # 8-byte Folded Spill -; FAST-NEXT: fmr f14, f5 -; FAST-NEXT: stfd f16, 352(r1) # 8-byte Folded Spill -; FAST-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 144 -; FAST-NEXT: fmr f16, f4 -; FAST-NEXT: stfd f17, 360(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f18, 368(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f19, 376(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f20, 384(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f21, 392(r1) # 8-byte Folded Spill -; FAST-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 160 -; FAST-NEXT: stfd f22, 400(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f23, 408(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f24, 416(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f25, 424(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f26, 432(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f27, 440(r1) # 8-byte Folded Spill -; FAST-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 176 -; FAST-NEXT: xxlor v22, f3, f3 -; FAST-NEXT: stfd f28, 448(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f29, 456(r1) # 8-byte Folded Spill -; FAST-NEXT: fmr f29, f9 -; FAST-NEXT: stfd f30, 464(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f31, 472(r1) # 8-byte Folded Spill -; FAST-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 192 -; FAST-NEXT: xxlor v23, f2, f2 -; FAST-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 208 -; FAST-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 224 -; FAST-NEXT: xxlor v25, f13, f13 -; FAST-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 240 -; FAST-NEXT: xxlor v26, f12, f12 -; FAST-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 256 -; FAST-NEXT: xxlor v27, f11, f11 -; FAST-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 272 -; FAST-NEXT: xxlor v28, f10, f10 -; FAST-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 288 -; FAST-NEXT: xxlor v29, f8, f8 -; FAST-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 304 -; FAST-NEXT: xxlor v30, f7, f7 -; FAST-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 44 -; FAST-NEXT: xxlor v31, f6, f6 -; FAST-NEXT: stxsspx f1, r1, r4 # 4-byte Folded Spill -; FAST-NEXT: lfs f1, 768(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: lhz r3, 848(r1) +; FAST-NEXT: li r11, 224 +; FAST-NEXT: std r14, 416(r1) # 8-byte Folded Spill +; FAST-NEXT: std r15, 424(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r15, 736(r1) +; FAST-NEXT: stxvd2x v20, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 240 +; FAST-NEXT: std r19, 456(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r14, 728(r1) +; FAST-NEXT: std r3, 184(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 840(r1) +; FAST-NEXT: lhz r19, 656(r1) +; FAST-NEXT: stxvd2x v21, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 256 +; FAST-NEXT: std r21, 472(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r21, 664(r1) +; FAST-NEXT: stxvd2x v22, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: std r3, 176(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 832(r1) +; FAST-NEXT: li r11, 272 +; FAST-NEXT: std r23, 488(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r23, 672(r1) +; FAST-NEXT: stxvd2x v23, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 288 +; FAST-NEXT: std r24, 496(r1) # 8-byte Folded Spill +; FAST-NEXT: std r26, 512(r1) # 8-byte Folded Spill +; FAST-NEXT: std r27, 520(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r27, 696(r1) +; FAST-NEXT: lhz r26, 688(r1) +; FAST-NEXT: lhz r24, 680(r1) +; FAST-NEXT: stxvd2x v24, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: std r3, 152(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 824(r1) +; FAST-NEXT: li r11, 304 +; FAST-NEXT: std r28, 528(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r28, 704(r1) +; FAST-NEXT: stxvd2x v25, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: std r3, 144(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 816(r1) +; FAST-NEXT: li r11, 320 +; FAST-NEXT: std r29, 536(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r29, 712(r1) +; FAST-NEXT: stxvd2x v26, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 336 +; FAST-NEXT: std r31, 552(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r31, 720(r1) +; FAST-NEXT: std r16, 432(r1) # 8-byte Folded Spill +; FAST-NEXT: std r17, 440(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r17, r6 +; FAST-NEXT: mr r16, r5 +; FAST-NEXT: stxvd2x v27, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: std r3, 136(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 808(r1) +; FAST-NEXT: li r11, 352 +; FAST-NEXT: std r18, 448(r1) # 8-byte Folded Spill +; FAST-NEXT: std r20, 464(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r20, r8 +; FAST-NEXT: mr r18, r7 +; FAST-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: std r3, 104(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 800(r1) +; FAST-NEXT: li r11, 368 +; FAST-NEXT: std r22, 480(r1) # 8-byte Folded Spill +; FAST-NEXT: std r25, 504(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r25, r10 +; FAST-NEXT: mr r22, r9 +; FAST-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 384 +; FAST-NEXT: std r3, 96(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 792(r1) +; FAST-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 400 +; FAST-NEXT: std r3, 88(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 784(r1) +; FAST-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: std r3, 80(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 776(r1) +; FAST-NEXT: std r3, 72(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 768(r1) +; FAST-NEXT: std r3, 64(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 760(r1) +; FAST-NEXT: std r3, 56(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 752(r1) +; FAST-NEXT: std r3, 48(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 744(r1) +; FAST-NEXT: std r3, 40(r1) # 8-byte Folded Spill +; FAST-NEXT: clrldi r3, r4, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 120 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 760(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: clrldi r3, r16, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 112 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 752(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 208 +; FAST-NEXT: xxmrghd vs0, vs0, v31 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: clrldi r3, r17, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 104 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 744(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: clrldi r3, r18, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 96 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 736(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 192 +; FAST-NEXT: xxmrghd vs0, vs0, v31 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: clrldi r3, r20, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 88 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 728(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: clrldi r3, r22, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 80 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 720(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 160 +; FAST-NEXT: xxmrghd vs0, vs0, v31 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: mr r3, r19 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 72 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 712(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: clrldi r3, r25, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 64 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 704(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 112 +; FAST-NEXT: xxmrghd vs0, v31, vs0 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: mr r3, r21 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 56 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 696(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: mr r3, r23 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 48 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 688(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mr r3, r24 +; FAST-NEXT: xxmrghd v27, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: xxlor v21, f1, f1 -; FAST-NEXT: lfs f1, 680(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: mr r3, r26 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: xxlor v20, f1, f1 -; FAST-NEXT: lfs f1, 672(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mr r3, r27 +; FAST-NEXT: xxmrghd v26, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: xxlor v24, f1, f1 -; FAST-NEXT: lfs f1, 664(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: mr r3, r28 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f31, f1 -; FAST-NEXT: lfs f1, 656(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mr r3, r29 +; FAST-NEXT: xxmrghd v25, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f30, f1 -; FAST-NEXT: lfs f1, 648(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: mr r3, r31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f28, f1 -; FAST-NEXT: lfs f1, 640(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mr r3, r14 +; FAST-NEXT: xxmrghd v24, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f27, f1 -; FAST-NEXT: lfs f1, 632(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: mr r3, r15 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f26, f1 -; FAST-NEXT: lfs f1, 624(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: ld r3, 40(r1) # 8-byte Folded Reload +; FAST-NEXT: xxmrghd v23, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f25, f1 -; FAST-NEXT: xxlor f1, v25, v25 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: ld r3, 48(r1) # 8-byte Folded Reload ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f24, f1 -; FAST-NEXT: xxlor f1, v26, v26 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: ld r3, 56(r1) # 8-byte Folded Reload +; FAST-NEXT: xxmrghd v22, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f23, f1 -; FAST-NEXT: xxlor f1, v27, v27 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: ld r3, 64(r1) # 8-byte Folded Reload ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f22, f1 -; FAST-NEXT: xxlor f1, v28, v28 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: ld r3, 72(r1) # 8-byte Folded Reload +; FAST-NEXT: xxmrghd v21, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f21, f1 -; FAST-NEXT: fmr f1, f29 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: ld r3, 80(r1) # 8-byte Folded Reload ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f20, f1 -; FAST-NEXT: xxlor f1, v29, v29 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: ld r3, 88(r1) # 8-byte Folded Reload +; FAST-NEXT: xxmrghd v20, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f19, f1 -; FAST-NEXT: xxlor f1, v30, v30 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: ld r3, 96(r1) # 8-byte Folded Reload ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f18, f1 -; FAST-NEXT: xxlor f1, v31, v31 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: ld r3, 104(r1) # 8-byte Folded Reload +; FAST-NEXT: xxmrghd v31, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f29, f1 -; FAST-NEXT: fmr f1, f14 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: ld r3, 136(r1) # 8-byte Folded Reload ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f14, f1 -; FAST-NEXT: fmr f1, f16 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: ld r3, 144(r1) # 8-byte Folded Reload +; FAST-NEXT: xxmrghd v30, vs0, v30 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f16, f1 -; FAST-NEXT: xxlor f1, v22, v22 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v29, r3 +; FAST-NEXT: ld r3, 152(r1) # 8-byte Folded Reload ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f17, f1 -; FAST-NEXT: xxlor f1, v23, v23 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: ld r3, 176(r1) # 8-byte Folded Reload +; FAST-NEXT: xxmrghd v29, vs0, v29 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 44 -; FAST-NEXT: fmr f15, f1 -; FAST-NEXT: lxsspx f1, r1, r3 # 4-byte Folded Reload -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v28, r3 +; FAST-NEXT: ld r3, 184(r1) # 8-byte Folded Reload ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fctid f3, f15 -; FAST-NEXT: fctid f4, f17 -; FAST-NEXT: mffprd r3, f3 -; FAST-NEXT: fctid f5, f16 -; FAST-NEXT: fctid f6, f14 -; FAST-NEXT: fctid f7, f18 -; FAST-NEXT: fctid f8, f19 -; FAST-NEXT: fctid f13, f1 -; FAST-NEXT: fctid f9, f20 -; FAST-NEXT: fctid f10, f22 -; FAST-NEXT: fctid f11, f24 -; FAST-NEXT: fctid f12, f25 -; FAST-NEXT: fctid f2, f23 -; FAST-NEXT: fctid f0, f21 -; FAST-NEXT: mtvsrd v2, r3 -; FAST-NEXT: mffprd r3, f4 -; FAST-NEXT: mtvsrd v3, r3 -; FAST-NEXT: mffprd r3, f5 -; FAST-NEXT: mtfprd f5, r3 -; FAST-NEXT: mffprd r3, f6 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: mffprd r3, f7 -; FAST-NEXT: mtfprd f6, r3 -; FAST-NEXT: mffprd r3, f8 -; FAST-NEXT: mtfprd f7, r3 -; FAST-NEXT: mffprd r3, f9 -; FAST-NEXT: mtfprd f3, r3 -; FAST-NEXT: mffprd r3, f10 -; FAST-NEXT: mtfprd f4, r3 -; FAST-NEXT: mffprd r3, f11 -; FAST-NEXT: fctid f11, f31 -; FAST-NEXT: lfd f31, 56(r1) # 8-byte Folded Reload -; FAST-NEXT: mtfprd f8, r3 -; FAST-NEXT: mffprd r3, f12 -; FAST-NEXT: xxlor f12, v24, v24 -; FAST-NEXT: fctid f31, f31 -; FAST-NEXT: fctid f12, f12 -; FAST-NEXT: mtfprd f9, r3 -; FAST-NEXT: mffprd r3, f13 -; FAST-NEXT: lfd f13, 48(r1) # 8-byte Folded Reload -; FAST-NEXT: mtfprd f10, r3 -; FAST-NEXT: fctid f13, f13 -; FAST-NEXT: xxmrghd v3, vs5, v3 -; FAST-NEXT: fctid f5, f26 -; FAST-NEXT: mffprd r3, f5 -; FAST-NEXT: mtfprd f5, r3 -; FAST-NEXT: xxmrghd v4, vs7, vs6 -; FAST-NEXT: fctid f6, f27 -; FAST-NEXT: fctid f7, f28 -; FAST-NEXT: mffprd r3, f6 -; FAST-NEXT: lfd f28, 96(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f28, f28 -; FAST-NEXT: mtfprd f6, r3 -; FAST-NEXT: mffprd r3, f7 -; FAST-NEXT: mtfprd f7, r3 -; FAST-NEXT: xxmrghd v2, v2, vs10 -; FAST-NEXT: fctid f10, f30 -; FAST-NEXT: mffprd r3, f10 -; FAST-NEXT: lfd f30, 80(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f30, f30 -; FAST-NEXT: mtfprd f10, r3 -; FAST-NEXT: mffprd r3, f11 -; FAST-NEXT: mtfprd f11, r3 -; FAST-NEXT: mffprd r3, f12 -; FAST-NEXT: mtfprd f12, r3 -; FAST-NEXT: xxmrghd v5, vs12, vs11 -; FAST-NEXT: xxlor f11, v20, v20 -; FAST-NEXT: xxlor f12, v21, v21 -; FAST-NEXT: fctid f11, f11 -; FAST-NEXT: fctid f12, f12 -; FAST-NEXT: mffprd r3, f11 -; FAST-NEXT: mtfprd f11, r3 -; FAST-NEXT: mffprd r3, f12 -; FAST-NEXT: mtfprd f12, r3 -; FAST-NEXT: mffprd r3, f13 -; FAST-NEXT: mtfprd f13, r3 -; FAST-NEXT: mffprd r3, f31 -; FAST-NEXT: lfd f31, 64(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f31, f31 -; FAST-NEXT: mtvsrd v0, r3 -; FAST-NEXT: mffprd r3, f31 -; FAST-NEXT: lfd f31, 72(r1) # 8-byte Folded Reload -; FAST-NEXT: mtvsrd v1, r3 -; FAST-NEXT: mffprd r3, f30 -; FAST-NEXT: lfd f30, 88(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f31, f31 -; FAST-NEXT: mtvsrd v6, r3 -; FAST-NEXT: mffprd r3, f28 -; FAST-NEXT: lfd f28, 104(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f30, f30 -; FAST-NEXT: fctid f28, f28 -; FAST-NEXT: mtvsrd v7, r3 -; FAST-NEXT: mffprd r3, f28 -; FAST-NEXT: lfd f28, 112(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f28, f28 -; FAST-NEXT: mtvsrd v8, r3 -; FAST-NEXT: mffprd r3, f28 -; FAST-NEXT: lfd f28, 120(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f28, f28 -; FAST-NEXT: xxmrghd v10, vs12, vs11 -; FAST-NEXT: xxmrghd v0, v0, vs13 -; FAST-NEXT: xxswapd vs12, v0 -; FAST-NEXT: xxmrghd v0, vs9, vs8 -; FAST-NEXT: xxmrghd v7, v8, v7 -; FAST-NEXT: mtvsrd v8, r3 -; FAST-NEXT: mffprd r3, f28 -; FAST-NEXT: mtvsrd v9, r3 -; FAST-NEXT: mffprd r3, f30 -; FAST-NEXT: xxswapd v7, v7 -; FAST-NEXT: xxmrghd v8, v9, v8 -; FAST-NEXT: mtvsrd v9, r3 -; FAST-NEXT: mffprd r3, f31 -; FAST-NEXT: xxswapd v8, v8 -; FAST-NEXT: xxmrghd v6, v9, v6 -; FAST-NEXT: mtvsrd v9, r3 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: xxswapd vs1, v29 +; FAST-NEXT: li r4, 112 +; FAST-NEXT: xxswapd vs2, v30 +; FAST-NEXT: xxswapd vs3, v25 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 ; FAST-NEXT: li r3, 240 -; FAST-NEXT: stxvd2x v8, r30, r3 +; FAST-NEXT: xxmrghd v2, vs0, v28 +; FAST-NEXT: xxswapd vs0, v2 +; FAST-NEXT: stxvd2x vs0, r30, r3 ; FAST-NEXT: li r3, 224 -; FAST-NEXT: stxvd2x v7, r30, r3 +; FAST-NEXT: stxvd2x vs1, r30, r3 ; FAST-NEXT: li r3, 208 -; FAST-NEXT: xxswapd vs11, v6 -; FAST-NEXT: xxmrghd v6, vs10, vs7 -; FAST-NEXT: stxvd2x vs11, r30, r3 +; FAST-NEXT: stxvd2x vs2, r30, r3 ; FAST-NEXT: li r3, 192 -; FAST-NEXT: xxmrghd v1, v9, v1 -; FAST-NEXT: xxswapd vs11, v1 -; FAST-NEXT: xxmrghd v1, vs6, vs5 -; FAST-NEXT: xxswapd vs5, v10 -; FAST-NEXT: xxswapd vs6, v5 -; FAST-NEXT: stxvd2x vs11, r30, r3 +; FAST-NEXT: xxswapd vs0, v31 +; FAST-NEXT: stxvd2x vs0, r30, r3 ; FAST-NEXT: li r3, 176 -; FAST-NEXT: stxvd2x vs12, r30, r3 +; FAST-NEXT: xxswapd vs1, v20 +; FAST-NEXT: stxvd2x vs1, r30, r3 ; FAST-NEXT: li r3, 160 -; FAST-NEXT: stxvd2x vs5, r30, r3 +; FAST-NEXT: xxswapd vs2, v23 +; FAST-NEXT: xxswapd vs0, v21 +; FAST-NEXT: stxvd2x vs0, r30, r3 ; FAST-NEXT: li r3, 144 -; FAST-NEXT: stxvd2x vs6, r30, r3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: mtfprd f7, r3 +; FAST-NEXT: xxswapd vs1, v22 +; FAST-NEXT: stxvd2x vs1, r30, r3 ; FAST-NEXT: li r3, 128 -; FAST-NEXT: xxswapd vs5, v6 -; FAST-NEXT: stxvd2x vs5, r30, r3 -; FAST-NEXT: li r3, 112 -; FAST-NEXT: xxswapd vs2, v1 -; FAST-NEXT: xxswapd vs6, v0 ; FAST-NEXT: stxvd2x vs2, r30, r3 +; FAST-NEXT: li r3, 112 +; FAST-NEXT: xxswapd vs0, v24 +; FAST-NEXT: stxvd2x vs0, r30, r3 ; FAST-NEXT: li r3, 96 -; FAST-NEXT: fctid f2, f29 -; FAST-NEXT: stxvd2x vs6, r30, r3 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: mtfprd f2, r3 +; FAST-NEXT: stxvd2x vs3, r30, r3 ; FAST-NEXT: li r3, 80 -; FAST-NEXT: xxmrghd v5, vs7, vs4 -; FAST-NEXT: xxswapd vs4, v2 -; FAST-NEXT: xxmrghd v0, vs0, vs3 -; FAST-NEXT: xxswapd vs0, v5 -; FAST-NEXT: xxswapd vs3, v3 -; FAST-NEXT: stxvd2x vs0, r30, r3 +; FAST-NEXT: lxvd2x vs2, r1, r4 # 16-byte Folded Reload +; FAST-NEXT: li r4, 160 +; FAST-NEXT: xxswapd vs1, v26 +; FAST-NEXT: stxvd2x vs1, r30, r3 ; FAST-NEXT: li r3, 64 -; FAST-NEXT: xxswapd vs0, v0 +; FAST-NEXT: lxvd2x vs1, r1, r4 # 16-byte Folded Reload +; FAST-NEXT: li r4, 192 +; FAST-NEXT: lxvd2x vs3, r1, r4 # 16-byte Folded Reload +; FAST-NEXT: li r4, 208 +; FAST-NEXT: lxvd2x vs4, r1, r4 # 16-byte Folded Reload +; FAST-NEXT: xxswapd vs0, v27 ; FAST-NEXT: stxvd2x vs0, r30, r3 ; FAST-NEXT: li r3, 48 -; FAST-NEXT: xxmrghd v5, vs2, vs1 -; FAST-NEXT: xxswapd vs1, v4 -; FAST-NEXT: stxvd2x vs1, r30, r3 -; FAST-NEXT: li r3, 32 -; FAST-NEXT: xxswapd vs2, v5 +; FAST-NEXT: xxswapd vs2, vs2 ; FAST-NEXT: stxvd2x vs2, r30, r3 +; FAST-NEXT: li r3, 32 +; FAST-NEXT: xxswapd vs1, vs1 +; FAST-NEXT: stxvd2x vs1, r30, r3 ; FAST-NEXT: li r3, 16 +; FAST-NEXT: xxswapd vs3, vs3 ; FAST-NEXT: stxvd2x vs3, r30, r3 -; FAST-NEXT: li r3, 304 +; FAST-NEXT: li r3, 400 +; FAST-NEXT: xxswapd vs4, vs4 ; FAST-NEXT: stxvd2x vs4, 0, r30 -; FAST-NEXT: lfd f31, 472(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f30, 464(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f29, 456(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f28, 448(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f27, 440(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f26, 432(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f25, 424(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f24, 416(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f23, 408(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f22, 400(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f21, 392(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f20, 384(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f19, 376(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f18, 368(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f17, 360(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f16, 352(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f15, 344(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f14, 336(r1) # 8-byte Folded Reload ; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 288 -; FAST-NEXT: ld r30, 320(r1) # 8-byte Folded Reload +; FAST-NEXT: li r3, 384 +; FAST-NEXT: ld r31, 552(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r30, 544(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r29, 536(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r28, 528(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r27, 520(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r26, 512(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r25, 504(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r24, 496(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r23, 488(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r22, 480(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r21, 472(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r20, 464(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r19, 456(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r18, 448(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r17, 440(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r16, 432(r1) # 8-byte Folded Reload ; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 272 +; FAST-NEXT: li r3, 368 +; FAST-NEXT: ld r15, 424(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r14, 416(r1) # 8-byte Folded Reload ; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 256 +; FAST-NEXT: li r3, 352 ; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 240 +; FAST-NEXT: li r3, 336 ; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 224 +; FAST-NEXT: li r3, 320 ; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 208 +; FAST-NEXT: li r3, 304 ; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 192 +; FAST-NEXT: li r3, 288 ; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 176 +; FAST-NEXT: li r3, 272 ; FAST-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 160 +; FAST-NEXT: li r3, 256 ; FAST-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 144 +; FAST-NEXT: li r3, 240 ; FAST-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 128 +; FAST-NEXT: li r3, 224 ; FAST-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: addi r1, r1, 480 +; FAST-NEXT: addi r1, r1, 560 ; FAST-NEXT: ld r0, 16(r1) ; FAST-NEXT: mtlr r0 ; FAST-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/vector-lrint.ll b/llvm/test/CodeGen/PowerPC/vector-lrint.ll index f4375362f861c..d0a709c884a25 100644 --- a/llvm/test/CodeGen/PowerPC/vector-lrint.ll +++ b/llvm/test/CodeGen/PowerPC/vector-lrint.ll @@ -33,10 +33,8 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) nounwind { ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -112(r1) -; BE-NEXT: std r0, 128(r1) -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop ; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: std r0, 128(r1) ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop ; BE-NEXT: bl lrintf @@ -50,10 +48,8 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -32(r1) -; CHECK-NEXT: std r0, 48(r1) -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop ; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop ; CHECK-NEXT: bl lrintf @@ -67,10 +63,8 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) nounwind { ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 ; FAST-NEXT: stdu r1, -32(r1) -; FAST-NEXT: std r0, 48(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop ; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: std r0, 48(r1) ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop ; FAST-NEXT: fctid f0, f1 @@ -88,37 +82,26 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) nounwind { ; BE-LABEL: lrint_v2f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 -; BE-NEXT: stdu r1, -160(r1) -; BE-NEXT: std r0, 176(r1) -; BE-NEXT: stfd f31, 152(r1) # 8-byte Folded Spill -; BE-NEXT: fmr f31, f1 -; BE-NEXT: fmr f1, f2 -; BE-NEXT: std r30, 136(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f31 +; BE-NEXT: stdu r1, -144(r1) +; BE-NEXT: std r0, 160(r1) +; BE-NEXT: std r30, 128(r1) # 8-byte Folded Spill ; BE-NEXT: mr r30, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r3, 48 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r30, 48 -; BE-NEXT: fmr f31, f1 +; BE-NEXT: clrldi r3, r4, 48 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f31 ; BE-NEXT: std r3, 120(r1) +; BE-NEXT: clrldi r3, r30, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop ; BE-NEXT: std r3, 112(r1) ; BE-NEXT: addi r3, r1, 112 -; BE-NEXT: ld r30, 136(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f31, 152(r1) # 8-byte Folded Reload +; BE-NEXT: ld r30, 128(r1) # 8-byte Folded Reload ; BE-NEXT: lxvd2x v2, 0, r3 -; BE-NEXT: addi r1, r1, 160 +; BE-NEXT: addi r1, r1, 144 ; BE-NEXT: ld r0, 16(r1) ; BE-NEXT: mtlr r0 ; BE-NEXT: blr @@ -127,35 +110,28 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -96(r1) -; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: std r0, 112(r1) -; CHECK-NEXT: std r30, 72(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f31, 88(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f31, f2 -; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f31 -; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop +; CHECK-NEXT: li r5, 48 ; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: std r0, 112(r1) +; CHECK-NEXT: std r29, 72(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, 80(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r30, r4 +; CHECK-NEXT: stxvd2x v31, r1, r5 # 16-byte Folded Spill ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mr r29, r3 ; CHECK-NEXT: clrldi r3, r30, 48 -; CHECK-NEXT: fmr f31, f1 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: bl lrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f31 -; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: mtvsrd v31, r29 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lfd f31, 88(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r30, 72(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, 80(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 72(r1) # 8-byte Folded Reload ; CHECK-NEXT: xxmrghd v2, vs0, v31 ; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 96 @@ -166,35 +142,30 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) nounwind { ; FAST-LABEL: lrint_v2f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; FAST-NEXT: stdu r1, -48(r1) -; FAST-NEXT: fmr f31, f1 -; FAST-NEXT: fmr f1, f2 -; FAST-NEXT: std r0, 64(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop +; FAST-NEXT: stdu r1, -80(r1) +; FAST-NEXT: li r5, 48 ; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: std r0, 96(r1) +; FAST-NEXT: std r30, 64(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r30, r4 +; FAST-NEXT: stxvd2x v31, r1, r5 # 16-byte Folded Spill ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f30, f1 -; FAST-NEXT: fmr f1, f31 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: clrldi r3, r30, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop ; FAST-NEXT: fctid f0, f1 -; FAST-NEXT: fctid f1, f30 +; FAST-NEXT: ld r30, 64(r1) # 8-byte Folded Reload ; FAST-NEXT: mffprd r3, f0 ; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v2, vs1, vs0 -; FAST-NEXT: addi r1, r1, 48 +; FAST-NEXT: li r3, 48 +; FAST-NEXT: xxmrghd v2, vs0, v31 +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 80 ; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload ; FAST-NEXT: mtlr r0 ; FAST-NEXT: blr %a = call <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half> %x) @@ -206,73 +177,46 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) nounwind { ; BE-LABEL: lrint_v4f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 -; BE-NEXT: stdu r1, -208(r1) -; BE-NEXT: std r0, 224(r1) -; BE-NEXT: stfd f29, 184(r1) # 8-byte Folded Spill -; BE-NEXT: fmr f29, f1 -; BE-NEXT: fmr f1, f2 -; BE-NEXT: std r28, 152(r1) # 8-byte Folded Spill -; BE-NEXT: std r29, 160(r1) # 8-byte Folded Spill -; BE-NEXT: std r30, 168(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f30, 192(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f31, 200(r1) # 8-byte Folded Spill -; BE-NEXT: fmr f31, f4 -; BE-NEXT: fmr f30, f3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f29 -; BE-NEXT: mr r30, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f31 -; BE-NEXT: mr r29, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f30 +; BE-NEXT: stdu r1, -176(r1) +; BE-NEXT: std r0, 192(r1) +; BE-NEXT: std r28, 144(r1) # 8-byte Folded Spill ; BE-NEXT: mr r28, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: clrldi r3, r4, 48 +; BE-NEXT: std r29, 152(r1) # 8-byte Folded Spill +; BE-NEXT: std r30, 160(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r6 +; BE-NEXT: mr r29, r5 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 120(r1) ; BE-NEXT: clrldi r3, r28, 48 -; BE-NEXT: fmr f31, f1 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r29, 48 -; BE-NEXT: fmr f30, f1 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl lrintf ; BE-NEXT: nop +; BE-NEXT: std r3, 112(r1) ; BE-NEXT: clrldi r3, r30, 48 -; BE-NEXT: fmr f29, f1 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f29 -; BE-NEXT: std r3, 120(r1) -; BE-NEXT: bl lrintf -; BE-NEXT: nop -; BE-NEXT: fmr f1, f30 -; BE-NEXT: std r3, 112(r1) -; BE-NEXT: bl lrintf -; BE-NEXT: nop -; BE-NEXT: fmr f1, f31 ; BE-NEXT: std r3, 136(r1) +; BE-NEXT: clrldi r3, r29, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop ; BE-NEXT: std r3, 128(r1) ; BE-NEXT: addi r3, r1, 112 -; BE-NEXT: ld r30, 168(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f31, 200(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f30, 192(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f29, 184(r1) # 8-byte Folded Reload +; BE-NEXT: ld r30, 160(r1) # 8-byte Folded Reload +; BE-NEXT: ld r29, 152(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 144(r1) # 8-byte Folded Reload ; BE-NEXT: lxvd2x v2, 0, r3 ; BE-NEXT: addi r3, r1, 128 -; BE-NEXT: ld r29, 160(r1) # 8-byte Folded Reload -; BE-NEXT: ld r28, 152(r1) # 8-byte Folded Reload ; BE-NEXT: lxvd2x v3, 0, r3 -; BE-NEXT: addi r1, r1, 208 +; BE-NEXT: addi r1, r1, 176 ; BE-NEXT: ld r0, 16(r1) ; BE-NEXT: mtlr r0 ; BE-NEXT: blr @@ -280,79 +224,57 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) nounwind { ; CHECK-LABEL: lrint_v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: stdu r1, -144(r1) -; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: std r0, 160(r1) -; CHECK-NEXT: std r28, 88(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r29, 96(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, 104(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f29, 120(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f29, f2 -; CHECK-NEXT: stfd f30, 128(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f30, f3 -; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f31, f4 -; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f29 -; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f30 -; CHECK-NEXT: mr r29, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f31 -; CHECK-NEXT: mr r28, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop +; CHECK-NEXT: stdu r1, -128(r1) +; CHECK-NEXT: li r7, 48 +; CHECK-NEXT: std r0, 144(r1) ; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: std r27, 88(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, 96(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, 104(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r29, r5 +; CHECK-NEXT: mr r28, r4 +; CHECK-NEXT: stxvd2x v30, r1, r7 # 16-byte Folded Spill +; CHECK-NEXT: li r7, 64 +; CHECK-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r30, r6 +; CHECK-NEXT: stxvd2x v31, r1, r7 # 16-byte Folded Spill ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r28, 48 -; CHECK-NEXT: fmr f31, f1 -; CHECK-NEXT: bl __extendhfsf2 -; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r29, 48 -; CHECK-NEXT: fmr f30, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r30, 48 -; CHECK-NEXT: fmr f29, f1 +; CHECK-NEXT: mr r27, r3 +; CHECK-NEXT: clrldi r3, r28, 48 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v31, r27 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f29 -; CHECK-NEXT: mtvsrd v31, r3 -; CHECK-NEXT: bl lrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f30 ; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: clrldi r3, r29, 48 ; CHECK-NEXT: xxmrghd v31, vs0, v31 +; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: nop ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f31 -; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: clrldi r3, r30, 48 +; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v30, r29 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: vmr v2, v31 -; CHECK-NEXT: lfd f31, 136(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f30, 128(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f29, 120(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r30, 104(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, 96(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 104(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, 96(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r27, 88(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: ld r28, 88(r1) # 8-byte Folded Reload ; CHECK-NEXT: xxmrghd v3, vs0, v30 ; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: addi r1, r1, 144 +; CHECK-NEXT: addi r1, r1, 128 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr @@ -360,63 +282,55 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) nounwind { ; FAST-LABEL: lrint_v4f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; FAST-NEXT: stdu r1, -64(r1) -; FAST-NEXT: fmr f29, f1 -; FAST-NEXT: fmr f1, f4 -; FAST-NEXT: std r0, 80(r1) -; FAST-NEXT: fmr f31, f3 -; FAST-NEXT: fmr f30, f2 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop +; FAST-NEXT: stdu r1, -112(r1) +; FAST-NEXT: li r7, 48 +; FAST-NEXT: std r0, 128(r1) ; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: std r28, 80(r1) # 8-byte Folded Spill +; FAST-NEXT: std r29, 88(r1) # 8-byte Folded Spill +; FAST-NEXT: std r30, 96(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r30, r6 +; FAST-NEXT: mr r29, r5 +; FAST-NEXT: stxvd2x v30, r1, r7 # 16-byte Folded Spill +; FAST-NEXT: li r7, 64 +; FAST-NEXT: mr r28, r4 +; FAST-NEXT: stxvd2x v31, r1, r7 # 16-byte Folded Spill ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f28, f1 -; FAST-NEXT: fmr f1, f31 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: clrldi r3, r28, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f31, f1 -; FAST-NEXT: fmr f1, f30 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: clrldi r3, r29, 48 +; FAST-NEXT: xxmrghd v31, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f30, f1 -; FAST-NEXT: fmr f1, f29 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: clrldi r3, r30, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fctid f0, f30 -; FAST-NEXT: fctid f2, f31 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: mtfprd f2, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v2, vs0, vs1 -; FAST-NEXT: fctid f0, f28 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: vmr v2, v31 +; FAST-NEXT: ld r30, 96(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r29, 88(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r28, 80(r1) # 8-byte Folded Reload ; FAST-NEXT: mffprd r3, f0 ; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v3, vs0, vs2 -; FAST-NEXT: addi r1, r1, 64 +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 48 +; FAST-NEXT: xxmrghd v3, vs0, v30 +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 112 ; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload ; FAST-NEXT: mtlr r0 -; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload ; FAST-NEXT: blr %a = call <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half> %x) ret <4 x i64> %a @@ -427,145 +341,86 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) nounwind { ; BE-LABEL: lrint_v8f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 -; BE-NEXT: stdu r1, -304(r1) -; BE-NEXT: std r0, 320(r1) -; BE-NEXT: stfd f25, 248(r1) # 8-byte Folded Spill -; BE-NEXT: fmr f25, f1 -; BE-NEXT: fmr f1, f2 -; BE-NEXT: std r24, 184(r1) # 8-byte Folded Spill -; BE-NEXT: std r25, 192(r1) # 8-byte Folded Spill -; BE-NEXT: std r26, 200(r1) # 8-byte Folded Spill -; BE-NEXT: std r27, 208(r1) # 8-byte Folded Spill -; BE-NEXT: std r28, 216(r1) # 8-byte Folded Spill -; BE-NEXT: std r29, 224(r1) # 8-byte Folded Spill -; BE-NEXT: std r30, 232(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f26, 256(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f27, 264(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f28, 272(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f29, 280(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f30, 288(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f31, 296(r1) # 8-byte Folded Spill -; BE-NEXT: fmr f31, f8 -; BE-NEXT: fmr f30, f7 -; BE-NEXT: fmr f29, f6 -; BE-NEXT: fmr f28, f5 -; BE-NEXT: fmr f27, f4 -; BE-NEXT: fmr f26, f3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f25 -; BE-NEXT: mr r30, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f27 -; BE-NEXT: mr r29, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f26 -; BE-NEXT: mr r28, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f29 -; BE-NEXT: mr r27, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f28 -; BE-NEXT: mr r26, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f31 -; BE-NEXT: mr r25, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f30 +; BE-NEXT: stdu r1, -240(r1) +; BE-NEXT: std r0, 256(r1) +; BE-NEXT: std r24, 176(r1) # 8-byte Folded Spill ; BE-NEXT: mr r24, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: clrldi r3, r4, 48 +; BE-NEXT: std r25, 184(r1) # 8-byte Folded Spill +; BE-NEXT: std r26, 192(r1) # 8-byte Folded Spill +; BE-NEXT: std r27, 200(r1) # 8-byte Folded Spill +; BE-NEXT: std r28, 208(r1) # 8-byte Folded Spill +; BE-NEXT: std r29, 216(r1) # 8-byte Folded Spill +; BE-NEXT: std r30, 224(r1) # 8-byte Folded Spill +; BE-NEXT: mr r29, r10 +; BE-NEXT: mr r30, r9 +; BE-NEXT: mr r27, r8 +; BE-NEXT: mr r28, r7 +; BE-NEXT: mr r26, r6 +; BE-NEXT: mr r25, r5 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 120(r1) ; BE-NEXT: clrldi r3, r24, 48 -; BE-NEXT: fmr f31, f1 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r25, 48 -; BE-NEXT: fmr f30, f1 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl lrintf ; BE-NEXT: nop +; BE-NEXT: std r3, 112(r1) ; BE-NEXT: clrldi r3, r26, 48 -; BE-NEXT: fmr f29, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r27, 48 -; BE-NEXT: fmr f28, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r28, 48 -; BE-NEXT: fmr f27, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r29, 48 -; BE-NEXT: fmr f26, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r30, 48 -; BE-NEXT: fmr f25, f1 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f25 -; BE-NEXT: std r3, 120(r1) -; BE-NEXT: bl lrintf -; BE-NEXT: nop -; BE-NEXT: fmr f1, f26 -; BE-NEXT: std r3, 112(r1) -; BE-NEXT: bl lrintf -; BE-NEXT: nop -; BE-NEXT: fmr f1, f27 ; BE-NEXT: std r3, 136(r1) +; BE-NEXT: clrldi r3, r25, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f28 ; BE-NEXT: std r3, 128(r1) +; BE-NEXT: clrldi r3, r27, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f29 ; BE-NEXT: std r3, 152(r1) +; BE-NEXT: clrldi r3, r28, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f30 ; BE-NEXT: std r3, 144(r1) +; BE-NEXT: clrldi r3, r29, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f31 ; BE-NEXT: std r3, 168(r1) +; BE-NEXT: clrldi r3, r30, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop ; BE-NEXT: std r3, 160(r1) ; BE-NEXT: addi r3, r1, 112 -; BE-NEXT: ld r30, 232(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f31, 296(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f30, 288(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f29, 280(r1) # 8-byte Folded Reload +; BE-NEXT: ld r30, 224(r1) # 8-byte Folded Reload +; BE-NEXT: ld r29, 216(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 208(r1) # 8-byte Folded Reload +; BE-NEXT: ld r27, 200(r1) # 8-byte Folded Reload +; BE-NEXT: ld r26, 192(r1) # 8-byte Folded Reload +; BE-NEXT: ld r25, 184(r1) # 8-byte Folded Reload +; BE-NEXT: ld r24, 176(r1) # 8-byte Folded Reload ; BE-NEXT: lxvd2x v2, 0, r3 ; BE-NEXT: addi r3, r1, 128 -; BE-NEXT: lfd f28, 272(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f27, 264(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f26, 256(r1) # 8-byte Folded Reload -; BE-NEXT: ld r29, 224(r1) # 8-byte Folded Reload -; BE-NEXT: ld r28, 216(r1) # 8-byte Folded Reload ; BE-NEXT: lxvd2x v3, 0, r3 ; BE-NEXT: addi r3, r1, 144 -; BE-NEXT: lfd f25, 248(r1) # 8-byte Folded Reload -; BE-NEXT: ld r27, 208(r1) # 8-byte Folded Reload -; BE-NEXT: ld r26, 200(r1) # 8-byte Folded Reload -; BE-NEXT: ld r25, 192(r1) # 8-byte Folded Reload -; BE-NEXT: ld r24, 184(r1) # 8-byte Folded Reload ; BE-NEXT: lxvd2x v4, 0, r3 ; BE-NEXT: addi r3, r1, 160 ; BE-NEXT: lxvd2x v5, 0, r3 -; BE-NEXT: addi r1, r1, 304 +; BE-NEXT: addi r1, r1, 240 ; BE-NEXT: ld r0, 16(r1) ; BE-NEXT: mtlr r0 ; BE-NEXT: blr @@ -573,159 +428,107 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) nounwind { ; CHECK-LABEL: lrint_v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: stdu r1, -240(r1) -; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: std r0, 256(r1) -; CHECK-NEXT: std r24, 120(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r25, 128(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r26, 136(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r27, 144(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r28, 152(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r29, 160(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: std r30, 168(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f25, 184(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f25, f2 -; CHECK-NEXT: stfd f26, 192(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f26, f3 -; CHECK-NEXT: stfd f27, 200(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f27, f4 -; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: stfd f28, 208(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f28, f5 -; CHECK-NEXT: stfd f29, 216(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f29, f6 -; CHECK-NEXT: stfd f30, 224(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f30, f7 -; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 96 -; CHECK-NEXT: stfd f31, 232(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f31, f8 -; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f25 -; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f26 -; CHECK-NEXT: mr r29, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f27 -; CHECK-NEXT: mr r28, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f28 -; CHECK-NEXT: mr r27, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f29 -; CHECK-NEXT: mr r26, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f30 -; CHECK-NEXT: mr r25, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f31 -; CHECK-NEXT: mr r24, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop +; CHECK-NEXT: stdu r1, -192(r1) +; CHECK-NEXT: li r11, 48 +; CHECK-NEXT: std r0, 208(r1) ; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: std r23, 120(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r24, 128(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r25, 136(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r25, r5 +; CHECK-NEXT: mr r24, r4 +; CHECK-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 64 +; CHECK-NEXT: std r26, 144(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r26, r6 +; CHECK-NEXT: std r27, 152(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, 160(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r28, r8 +; CHECK-NEXT: mr r27, r7 +; CHECK-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 80 +; CHECK-NEXT: std r29, 168(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r29, r9 +; CHECK-NEXT: std r30, 176(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r30, r10 +; CHECK-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 96 +; CHECK-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mr r23, r3 ; CHECK-NEXT: clrldi r3, r24, 48 -; CHECK-NEXT: fmr f31, f1 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v31, r23 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: clrldi r3, r25, 48 -; CHECK-NEXT: fmr f30, f1 +; CHECK-NEXT: xxmrghd v31, vs0, v31 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mr r25, r3 ; CHECK-NEXT: clrldi r3, r26, 48 -; CHECK-NEXT: fmr f29, f1 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r27, 48 -; CHECK-NEXT: fmr f28, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: mtvsrd v30, r25 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r28, 48 -; CHECK-NEXT: fmr f27, f1 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: clrldi r3, r27, 48 +; CHECK-NEXT: xxmrghd v30, vs0, v30 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r29, 48 -; CHECK-NEXT: fmr f26, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r30, 48 -; CHECK-NEXT: fmr f25, f1 +; CHECK-NEXT: mr r27, r3 +; CHECK-NEXT: clrldi r3, r28, 48 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v29, r27 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f25 -; CHECK-NEXT: mtvsrd v31, r3 -; CHECK-NEXT: bl lrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f26 ; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v31, vs0, v31 -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: clrldi r3, r29, 48 +; CHECK-NEXT: xxmrghd v29, vs0, v29 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f27 -; CHECK-NEXT: mtvsrd v30, r3 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f28 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v30, vs0, v30 -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: clrldi r3, r30, 48 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f29 -; CHECK-NEXT: mtvsrd v29, r3 -; CHECK-NEXT: bl lrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f30 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v29, vs0, v29 -; CHECK-NEXT: bl lrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f31 -; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: mtvsrd v28, r29 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: vmr v2, v31 -; CHECK-NEXT: lfd f31, 232(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, 176(r1) # 8-byte Folded Reload ; CHECK-NEXT: vmr v3, v30 ; CHECK-NEXT: vmr v4, v29 -; CHECK-NEXT: lfd f30, 224(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f29, 216(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 168(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, 160(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: lfd f28, 208(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f27, 200(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f26, 192(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f25, 184(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r30, 168(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, 160(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r27, 152(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r26, 144(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r25, 136(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r24, 128(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r23, 120(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: ld r28, 152(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r27, 144(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxmrghd v5, vs0, v28 -; CHECK-NEXT: ld r26, 136(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r25, 128(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r24, 120(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xxmrghd v5, vs0, v28 ; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: addi r1, r1, 240 +; CHECK-NEXT: addi r1, r1, 192 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr @@ -733,117 +536,103 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) nounwind { ; FAST-LABEL: lrint_v8f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; FAST-NEXT: stdu r1, -96(r1) -; FAST-NEXT: fmr f24, f1 -; FAST-NEXT: fmr f1, f8 -; FAST-NEXT: std r0, 112(r1) -; FAST-NEXT: fmr f30, f7 -; FAST-NEXT: fmr f29, f6 -; FAST-NEXT: fmr f28, f5 -; FAST-NEXT: fmr f27, f4 -; FAST-NEXT: fmr f26, f3 -; FAST-NEXT: fmr f25, f2 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop +; FAST-NEXT: stdu r1, -176(r1) +; FAST-NEXT: li r11, 48 +; FAST-NEXT: std r0, 192(r1) ; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: std r24, 112(r1) # 8-byte Folded Spill +; FAST-NEXT: std r25, 120(r1) # 8-byte Folded Spill +; FAST-NEXT: std r26, 128(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r26, r6 +; FAST-NEXT: mr r25, r5 +; FAST-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 64 +; FAST-NEXT: std r27, 136(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r27, r7 +; FAST-NEXT: std r28, 144(r1) # 8-byte Folded Spill +; FAST-NEXT: std r29, 152(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r29, r9 +; FAST-NEXT: mr r28, r8 +; FAST-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 80 +; FAST-NEXT: std r30, 160(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r30, r10 +; FAST-NEXT: mr r24, r4 +; FAST-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 96 +; FAST-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f31, f1 -; FAST-NEXT: fmr f1, f30 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: clrldi r3, r24, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f30, f1 -; FAST-NEXT: fmr f1, f29 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: clrldi r3, r25, 48 +; FAST-NEXT: xxmrghd v31, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f29, f1 -; FAST-NEXT: fmr f1, f28 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: clrldi r3, r26, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f28, f1 -; FAST-NEXT: fmr f1, f27 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: clrldi r3, r27, 48 +; FAST-NEXT: xxmrghd v30, vs0, v30 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f27, f1 -; FAST-NEXT: fmr f1, f26 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v29, r3 +; FAST-NEXT: clrldi r3, r28, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f26, f1 -; FAST-NEXT: fmr f1, f25 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: clrldi r3, r29, 48 +; FAST-NEXT: xxmrghd v29, vs0, v29 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f25, f1 -; FAST-NEXT: fmr f1, f24 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v28, r3 +; FAST-NEXT: clrldi r3, r30, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fctid f0, f25 -; FAST-NEXT: fctid f2, f26 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: fctid f3, f27 -; FAST-NEXT: fctid f4, f28 -; FAST-NEXT: fctid f5, f29 -; FAST-NEXT: fctid f6, f30 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: mtfprd f2, r3 -; FAST-NEXT: mffprd r3, f3 -; FAST-NEXT: mtfprd f3, r3 -; FAST-NEXT: mffprd r3, f4 -; FAST-NEXT: mtfprd f4, r3 -; FAST-NEXT: mffprd r3, f5 -; FAST-NEXT: mtfprd f5, r3 -; FAST-NEXT: mffprd r3, f6 -; FAST-NEXT: mtfprd f6, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v3, vs3, vs2 -; FAST-NEXT: xxmrghd v4, vs5, vs4 -; FAST-NEXT: xxmrghd v2, vs0, vs1 -; FAST-NEXT: fctid f0, f31 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: vmr v2, v31 +; FAST-NEXT: ld r30, 160(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r29, 152(r1) # 8-byte Folded Reload +; FAST-NEXT: vmr v3, v30 +; FAST-NEXT: vmr v4, v29 +; FAST-NEXT: ld r28, 144(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r27, 136(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r26, 128(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r25, 120(r1) # 8-byte Folded Reload ; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: ld r24, 112(r1) # 8-byte Folded Reload ; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v5, vs0, vs6 -; FAST-NEXT: addi r1, r1, 96 +; FAST-NEXT: li r3, 96 +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 80 +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 48 +; FAST-NEXT: xxmrghd v5, vs0, v28 +; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 176 ; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload ; FAST-NEXT: mtlr r0 -; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f25, -56(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f24, -64(r1) # 8-byte Folded Reload ; FAST-NEXT: blr %a = call <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half> %x) ret <8 x i64> %a @@ -854,286 +643,166 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) nounwind { ; BE-LABEL: lrint_v16i64_v16f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 -; BE-NEXT: stdu r1, -496(r1) -; BE-NEXT: std r0, 512(r1) -; BE-NEXT: stfd f20, 400(r1) # 8-byte Folded Spill -; BE-NEXT: fmr f20, f1 -; BE-NEXT: fmr f1, f2 -; BE-NEXT: std r16, 248(r1) # 8-byte Folded Spill -; BE-NEXT: std r17, 256(r1) # 8-byte Folded Spill -; BE-NEXT: std r18, 264(r1) # 8-byte Folded Spill -; BE-NEXT: std r19, 272(r1) # 8-byte Folded Spill -; BE-NEXT: std r20, 280(r1) # 8-byte Folded Spill -; BE-NEXT: std r21, 288(r1) # 8-byte Folded Spill -; BE-NEXT: std r22, 296(r1) # 8-byte Folded Spill -; BE-NEXT: std r23, 304(r1) # 8-byte Folded Spill -; BE-NEXT: std r24, 312(r1) # 8-byte Folded Spill -; BE-NEXT: std r25, 320(r1) # 8-byte Folded Spill -; BE-NEXT: std r26, 328(r1) # 8-byte Folded Spill -; BE-NEXT: std r27, 336(r1) # 8-byte Folded Spill -; BE-NEXT: std r28, 344(r1) # 8-byte Folded Spill -; BE-NEXT: std r29, 352(r1) # 8-byte Folded Spill -; BE-NEXT: std r30, 360(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f17, 376(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f18, 384(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f19, 392(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f21, 408(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f22, 416(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f23, 424(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f24, 432(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f25, 440(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f26, 448(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f27, 456(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f28, 464(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f29, 472(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f30, 480(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f31, 488(r1) # 8-byte Folded Spill -; BE-NEXT: fmr f31, f13 -; BE-NEXT: fmr f29, f12 -; BE-NEXT: fmr f30, f11 -; BE-NEXT: fmr f28, f10 -; BE-NEXT: fmr f27, f9 -; BE-NEXT: fmr f26, f8 -; BE-NEXT: fmr f25, f7 -; BE-NEXT: fmr f24, f6 -; BE-NEXT: fmr f23, f5 -; BE-NEXT: fmr f22, f4 -; BE-NEXT: fmr f21, f3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f20 -; BE-NEXT: mr r30, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f22 -; BE-NEXT: mr r29, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f21 -; BE-NEXT: mr r28, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f24 -; BE-NEXT: mr r27, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f23 -; BE-NEXT: mr r26, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f26 -; BE-NEXT: mr r25, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f25 +; BE-NEXT: stdu r1, -368(r1) +; BE-NEXT: std r0, 384(r1) +; BE-NEXT: std r24, 304(r1) # 8-byte Folded Spill ; BE-NEXT: mr r24, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f28 -; BE-NEXT: mr r23, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f27 -; BE-NEXT: mr r22, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f29 -; BE-NEXT: mr r21, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f30 -; BE-NEXT: mr r20, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 652(r1) -; BE-NEXT: mr r19, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f31 -; BE-NEXT: mr r18, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 668(r1) -; BE-NEXT: mr r17, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 660(r1) -; BE-NEXT: mr r16, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r3, 48 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r16, 48 -; BE-NEXT: fmr f31, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r17, 48 -; BE-NEXT: fmr f30, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r18, 48 -; BE-NEXT: fmr f29, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r19, 48 -; BE-NEXT: fmr f28, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r20, 48 -; BE-NEXT: fmr f27, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r21, 48 -; BE-NEXT: fmr f26, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r22, 48 -; BE-NEXT: fmr f25, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r23, 48 -; BE-NEXT: fmr f24, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r24, 48 -; BE-NEXT: fmr f23, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r25, 48 -; BE-NEXT: fmr f22, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r26, 48 -; BE-NEXT: fmr f21, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r27, 48 -; BE-NEXT: fmr f20, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r28, 48 -; BE-NEXT: fmr f19, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r29, 48 -; BE-NEXT: fmr f18, f1 -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r30, 48 -; BE-NEXT: fmr f17, f1 +; BE-NEXT: lhz r3, 494(r1) +; BE-NEXT: std r16, 240(r1) # 8-byte Folded Spill +; BE-NEXT: lhz r16, 486(r1) +; BE-NEXT: std r17, 248(r1) # 8-byte Folded Spill +; BE-NEXT: std r18, 256(r1) # 8-byte Folded Spill +; BE-NEXT: std r19, 264(r1) # 8-byte Folded Spill +; BE-NEXT: std r20, 272(r1) # 8-byte Folded Spill +; BE-NEXT: std r21, 280(r1) # 8-byte Folded Spill +; BE-NEXT: std r22, 288(r1) # 8-byte Folded Spill +; BE-NEXT: lhz r22, 534(r1) +; BE-NEXT: lhz r21, 542(r1) +; BE-NEXT: lhz r20, 518(r1) +; BE-NEXT: lhz r19, 526(r1) +; BE-NEXT: lhz r18, 502(r1) +; BE-NEXT: lhz r17, 510(r1) +; BE-NEXT: std r23, 296(r1) # 8-byte Folded Spill +; BE-NEXT: std r25, 312(r1) # 8-byte Folded Spill +; BE-NEXT: std r26, 320(r1) # 8-byte Folded Spill +; BE-NEXT: std r27, 328(r1) # 8-byte Folded Spill +; BE-NEXT: std r28, 336(r1) # 8-byte Folded Spill +; BE-NEXT: std r29, 344(r1) # 8-byte Folded Spill +; BE-NEXT: std r30, 352(r1) # 8-byte Folded Spill +; BE-NEXT: mr r29, r10 +; BE-NEXT: mr r30, r9 +; BE-NEXT: mr r27, r8 +; BE-NEXT: mr r28, r7 +; BE-NEXT: mr r25, r6 +; BE-NEXT: mr r26, r5 +; BE-NEXT: mr r23, r4 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f17 ; BE-NEXT: std r3, 120(r1) +; BE-NEXT: mr r3, r16 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f18 ; BE-NEXT: std r3, 112(r1) +; BE-NEXT: mr r3, r17 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f19 ; BE-NEXT: std r3, 136(r1) +; BE-NEXT: mr r3, r18 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f20 ; BE-NEXT: std r3, 128(r1) +; BE-NEXT: mr r3, r19 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f21 ; BE-NEXT: std r3, 152(r1) +; BE-NEXT: mr r3, r20 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f22 ; BE-NEXT: std r3, 144(r1) +; BE-NEXT: mr r3, r21 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f23 ; BE-NEXT: std r3, 168(r1) +; BE-NEXT: mr r3, r22 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f24 ; BE-NEXT: std r3, 160(r1) +; BE-NEXT: clrldi r3, r23, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f25 ; BE-NEXT: std r3, 184(r1) +; BE-NEXT: clrldi r3, r24, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f26 ; BE-NEXT: std r3, 176(r1) +; BE-NEXT: clrldi r3, r25, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f27 ; BE-NEXT: std r3, 200(r1) +; BE-NEXT: clrldi r3, r26, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f28 ; BE-NEXT: std r3, 192(r1) +; BE-NEXT: clrldi r3, r27, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f29 ; BE-NEXT: std r3, 216(r1) +; BE-NEXT: clrldi r3, r28, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f30 ; BE-NEXT: std r3, 208(r1) +; BE-NEXT: clrldi r3, r29, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f31 ; BE-NEXT: std r3, 232(r1) +; BE-NEXT: clrldi r3, r30, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop ; BE-NEXT: std r3, 224(r1) ; BE-NEXT: addi r3, r1, 112 -; BE-NEXT: ld r30, 360(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f31, 488(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f30, 480(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f29, 472(r1) # 8-byte Folded Reload -; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: ld r30, 352(r1) # 8-byte Folded Reload +; BE-NEXT: ld r29, 344(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 336(r1) # 8-byte Folded Reload +; BE-NEXT: ld r27, 328(r1) # 8-byte Folded Reload +; BE-NEXT: ld r26, 320(r1) # 8-byte Folded Reload +; BE-NEXT: ld r25, 312(r1) # 8-byte Folded Reload +; BE-NEXT: ld r24, 304(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v6, 0, r3 ; BE-NEXT: addi r3, r1, 128 -; BE-NEXT: lfd f28, 464(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f27, 456(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f26, 448(r1) # 8-byte Folded Reload -; BE-NEXT: ld r29, 352(r1) # 8-byte Folded Reload -; BE-NEXT: ld r28, 344(r1) # 8-byte Folded Reload -; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: ld r23, 296(r1) # 8-byte Folded Reload +; BE-NEXT: ld r22, 288(r1) # 8-byte Folded Reload +; BE-NEXT: ld r21, 280(r1) # 8-byte Folded Reload +; BE-NEXT: ld r20, 272(r1) # 8-byte Folded Reload +; BE-NEXT: ld r19, 264(r1) # 8-byte Folded Reload +; BE-NEXT: ld r18, 256(r1) # 8-byte Folded Reload +; BE-NEXT: ld r17, 248(r1) # 8-byte Folded Reload +; BE-NEXT: ld r16, 240(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v7, 0, r3 ; BE-NEXT: addi r3, r1, 144 -; BE-NEXT: lfd f25, 440(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f24, 432(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f23, 424(r1) # 8-byte Folded Reload -; BE-NEXT: ld r27, 336(r1) # 8-byte Folded Reload -; BE-NEXT: ld r26, 328(r1) # 8-byte Folded Reload -; BE-NEXT: lxvd2x v4, 0, r3 +; BE-NEXT: lxvd2x v8, 0, r3 ; BE-NEXT: addi r3, r1, 160 -; BE-NEXT: lfd f22, 416(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f21, 408(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f20, 400(r1) # 8-byte Folded Reload -; BE-NEXT: ld r25, 320(r1) # 8-byte Folded Reload -; BE-NEXT: ld r24, 312(r1) # 8-byte Folded Reload -; BE-NEXT: lxvd2x v5, 0, r3 +; BE-NEXT: lxvd2x v9, 0, r3 ; BE-NEXT: addi r3, r1, 176 -; BE-NEXT: lfd f19, 392(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f18, 384(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f17, 376(r1) # 8-byte Folded Reload -; BE-NEXT: ld r23, 304(r1) # 8-byte Folded Reload -; BE-NEXT: ld r22, 296(r1) # 8-byte Folded Reload -; BE-NEXT: lxvd2x v6, 0, r3 +; BE-NEXT: lxvd2x v2, 0, r3 ; BE-NEXT: addi r3, r1, 192 -; BE-NEXT: ld r21, 288(r1) # 8-byte Folded Reload -; BE-NEXT: ld r20, 280(r1) # 8-byte Folded Reload -; BE-NEXT: ld r19, 272(r1) # 8-byte Folded Reload -; BE-NEXT: ld r18, 264(r1) # 8-byte Folded Reload -; BE-NEXT: ld r17, 256(r1) # 8-byte Folded Reload -; BE-NEXT: ld r16, 248(r1) # 8-byte Folded Reload -; BE-NEXT: lxvd2x v7, 0, r3 +; BE-NEXT: lxvd2x v3, 0, r3 ; BE-NEXT: addi r3, r1, 208 -; BE-NEXT: lxvd2x v8, 0, r3 +; BE-NEXT: lxvd2x v4, 0, r3 ; BE-NEXT: addi r3, r1, 224 -; BE-NEXT: lxvd2x v9, 0, r3 -; BE-NEXT: addi r1, r1, 496 +; BE-NEXT: lxvd2x v5, 0, r3 +; BE-NEXT: addi r1, r1, 368 ; BE-NEXT: ld r0, 16(r1) ; BE-NEXT: mtlr r0 ; BE-NEXT: blr @@ -1141,316 +810,207 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) nounwind { ; CHECK-LABEL: lrint_v16i64_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: stdu r1, -432(r1) -; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: std r0, 448(r1) -; CHECK-NEXT: std r16, 184(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r17, 192(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r18, 200(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r19, 208(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r20, 216(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r21, 224(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: std r22, 232(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r23, 240(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r24, 248(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r25, 256(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r26, 264(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r27, 272(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: std r28, 280(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r29, 288(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, 296(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f17, 312(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f18, 320(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f19, 328(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 96 -; CHECK-NEXT: stfd f20, 336(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f20, f2 -; CHECK-NEXT: stfd f21, 344(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f21, f3 -; CHECK-NEXT: stfd f22, 352(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f22, f4 -; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 112 -; CHECK-NEXT: stfd f23, 360(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f23, f5 -; CHECK-NEXT: stfd f24, 368(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f24, f6 -; CHECK-NEXT: stfd f25, 376(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f25, f7 -; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 128 -; CHECK-NEXT: stfd f26, 384(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f26, f8 -; CHECK-NEXT: stfd f27, 392(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f27, f9 -; CHECK-NEXT: stfd f28, 400(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f28, f10 -; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 144 -; CHECK-NEXT: stfd f29, 408(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f29, f11 -; CHECK-NEXT: stfd f30, 416(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f30, f12 -; CHECK-NEXT: stfd f31, 424(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f31, f13 -; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 160 -; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f20 -; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f21 -; CHECK-NEXT: mr r29, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f22 -; CHECK-NEXT: mr r28, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f23 -; CHECK-NEXT: mr r27, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f24 -; CHECK-NEXT: mr r26, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f25 -; CHECK-NEXT: mr r25, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f26 -; CHECK-NEXT: mr r24, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f27 +; CHECK-NEXT: stdu r1, -320(r1) +; CHECK-NEXT: li r11, 48 +; CHECK-NEXT: std r0, 336(r1) +; CHECK-NEXT: std r23, 248(r1) # 8-byte Folded Spill ; CHECK-NEXT: mr r23, r3 -; CHECK-NEXT: bl __truncsfhf2 +; CHECK-NEXT: lhz r3, 416(r1) +; CHECK-NEXT: std r16, 192(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r17, 200(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r17, 432(r1) +; CHECK-NEXT: stxvd2x v24, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 64 +; CHECK-NEXT: std r18, 208(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r18, 440(r1) +; CHECK-NEXT: std r19, 216(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r19, 448(r1) +; CHECK-NEXT: lhz r16, 424(r1) +; CHECK-NEXT: stxvd2x v25, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 80 +; CHECK-NEXT: std r20, 224(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r20, 456(r1) +; CHECK-NEXT: stxvd2x v26, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 96 +; CHECK-NEXT: std r21, 232(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r22, 240(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r22, 472(r1) +; CHECK-NEXT: lhz r21, 464(r1) +; CHECK-NEXT: stxvd2x v27, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 112 +; CHECK-NEXT: std r15, 184(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r24, 256(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r25, 264(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r25, r5 +; CHECK-NEXT: mr r24, r4 +; CHECK-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 128 +; CHECK-NEXT: std r26, 272(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r27, 280(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, 288(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, 296(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r29, r9 +; CHECK-NEXT: mr r28, r8 +; CHECK-NEXT: mr r27, r7 +; CHECK-NEXT: mr r26, r6 +; CHECK-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 144 +; CHECK-NEXT: std r30, 304(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r30, r10 +; CHECK-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 160 +; CHECK-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f28 -; CHECK-NEXT: mr r22, r3 -; CHECK-NEXT: bl __truncsfhf2 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f29 -; CHECK-NEXT: mr r21, r3 -; CHECK-NEXT: bl __truncsfhf2 +; CHECK-NEXT: mr r15, r3 +; CHECK-NEXT: mr r3, r16 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f30 -; CHECK-NEXT: mr r20, r3 -; CHECK-NEXT: bl __truncsfhf2 +; CHECK-NEXT: mtvsrd v31, r15 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f31 -; CHECK-NEXT: mr r19, r3 -; CHECK-NEXT: bl __truncsfhf2 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: mr r3, r17 +; CHECK-NEXT: xxmrghd v31, vs0, v31 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 568(r1) -; CHECK-NEXT: mr r18, r3 -; CHECK-NEXT: bl __truncsfhf2 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 576(r1) ; CHECK-NEXT: mr r17, r3 -; CHECK-NEXT: bl __truncsfhf2 +; CHECK-NEXT: mr r3, r18 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 584(r1) -; CHECK-NEXT: mr r16, r3 -; CHECK-NEXT: bl __truncsfhf2 +; CHECK-NEXT: mtvsrd v30, r17 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: mr r3, r19 +; CHECK-NEXT: xxmrghd v30, vs0, v30 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r16, 48 -; CHECK-NEXT: fmr f31, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r17, 48 -; CHECK-NEXT: fmr f30, f1 +; CHECK-NEXT: mr r19, r3 +; CHECK-NEXT: mr r3, r20 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r18, 48 -; CHECK-NEXT: fmr f29, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: mtvsrd v29, r19 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r19, 48 -; CHECK-NEXT: fmr f28, f1 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: mr r3, r21 +; CHECK-NEXT: xxmrghd v29, vs0, v29 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r20, 48 -; CHECK-NEXT: fmr f27, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r21, 48 -; CHECK-NEXT: fmr f26, f1 +; CHECK-NEXT: mr r21, r3 +; CHECK-NEXT: mr r3, r22 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r22, 48 -; CHECK-NEXT: fmr f25, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: mtvsrd v28, r21 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: clrldi r3, r23, 48 -; CHECK-NEXT: fmr f24, f1 +; CHECK-NEXT: xxmrghd v28, vs0, v28 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mr r23, r3 ; CHECK-NEXT: clrldi r3, r24, 48 -; CHECK-NEXT: fmr f23, f1 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v27, r23 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: clrldi r3, r25, 48 -; CHECK-NEXT: fmr f22, f1 +; CHECK-NEXT: xxmrghd v27, vs0, v27 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r26, 48 -; CHECK-NEXT: fmr f21, f1 -; CHECK-NEXT: bl __extendhfsf2 -; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r27, 48 -; CHECK-NEXT: fmr f20, f1 -; CHECK-NEXT: bl __extendhfsf2 -; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r28, 48 -; CHECK-NEXT: fmr f19, f1 -; CHECK-NEXT: bl __extendhfsf2 -; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r29, 48 -; CHECK-NEXT: fmr f18, f1 -; CHECK-NEXT: bl __extendhfsf2 -; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r30, 48 -; CHECK-NEXT: fmr f17, f1 -; CHECK-NEXT: bl __extendhfsf2 -; CHECK-NEXT: nop -; CHECK-NEXT: bl lrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f17 -; CHECK-NEXT: mtvsrd v31, r3 -; CHECK-NEXT: bl lrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f18 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v31, vs0, v31 -; CHECK-NEXT: bl lrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f19 -; CHECK-NEXT: mtvsrd v30, r3 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f20 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v30, vs0, v30 -; CHECK-NEXT: bl lrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f21 -; CHECK-NEXT: mtvsrd v29, r3 -; CHECK-NEXT: bl lrintf -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f22 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v29, vs0, v29 -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: mr r25, r3 +; CHECK-NEXT: clrldi r3, r26, 48 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f23 -; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: mtvsrd v26, r25 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f24 ; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v28, vs0, v28 -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: clrldi r3, r27, 48 +; CHECK-NEXT: xxmrghd v26, vs0, v26 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f25 -; CHECK-NEXT: mtvsrd v27, r3 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f26 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v27, vs0, v27 -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: mr r27, r3 +; CHECK-NEXT: clrldi r3, r28, 48 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f27 -; CHECK-NEXT: mtvsrd v26, r3 +; CHECK-NEXT: mtvsrd v25, r27 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f28 ; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v26, vs0, v26 -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: clrldi r3, r29, 48 +; CHECK-NEXT: xxmrghd v25, vs0, v25 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f29 -; CHECK-NEXT: mtvsrd v25, r3 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f30 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v25, vs0, v25 -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: clrldi r3, r30, 48 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f31 -; CHECK-NEXT: mtvsrd v24, r3 +; CHECK-NEXT: mtvsrd v24, r29 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 160 -; CHECK-NEXT: vmr v2, v31 -; CHECK-NEXT: lfd f31, 424(r1) # 8-byte Folded Reload -; CHECK-NEXT: vmr v3, v30 -; CHECK-NEXT: vmr v4, v29 -; CHECK-NEXT: lfd f30, 416(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f29, 408(r1) # 8-byte Folded Reload +; CHECK-NEXT: vmr v6, v31 +; CHECK-NEXT: ld r30, 304(r1) # 8-byte Folded Reload +; CHECK-NEXT: vmr v7, v30 +; CHECK-NEXT: vmr v8, v29 +; CHECK-NEXT: ld r29, 296(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, 288(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 144 -; CHECK-NEXT: vmr v5, v28 -; CHECK-NEXT: vmr v6, v27 -; CHECK-NEXT: vmr v7, v26 -; CHECK-NEXT: vmr v8, v25 -; CHECK-NEXT: lfd f28, 400(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f27, 392(r1) # 8-byte Folded Reload +; CHECK-NEXT: vmr v9, v28 +; CHECK-NEXT: vmr v2, v27 +; CHECK-NEXT: vmr v3, v26 +; CHECK-NEXT: vmr v4, v25 +; CHECK-NEXT: ld r27, 280(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r26, 272(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 128 -; CHECK-NEXT: lfd f26, 384(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f25, 376(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxmrghd v9, vs0, v24 -; CHECK-NEXT: lfd f24, 368(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f23, 360(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f22, 352(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r25, 264(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r24, 256(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v5, vs0, v24 +; CHECK-NEXT: ld r23, 248(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r22, 240(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r21, 232(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 112 -; CHECK-NEXT: lfd f21, 344(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r30, 296(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f20, 336(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f19, 328(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, 288(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r28, 280(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r20, 224(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r19, 216(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r18, 208(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r17, 200(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r16, 192(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r15, 184(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 96 -; CHECK-NEXT: lfd f18, 320(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r27, 272(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f17, 312(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r26, 264(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r25, 256(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r24, 248(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: ld r23, 240(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r22, 232(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r21, 224(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r20, 216(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r19, 208(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r18, 200(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: ld r17, 192(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r16, 184(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 ; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: addi r1, r1, 432 +; CHECK-NEXT: addi r1, r1, 320 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr @@ -1458,223 +1018,199 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) nounwind { ; FAST-LABEL: lrint_v16i64_v16f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f19, -104(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f20, -96(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f21, -88(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f22, -80(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f23, -72(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; FAST-NEXT: stdu r1, -160(r1) -; FAST-NEXT: fmr f26, f1 -; FAST-NEXT: lfs f1, 312(r1) -; FAST-NEXT: std r0, 176(r1) -; FAST-NEXT: fmr f28, f13 -; FAST-NEXT: fmr f27, f12 -; FAST-NEXT: fmr f24, f11 -; FAST-NEXT: fmr f21, f10 -; FAST-NEXT: fmr f19, f9 -; FAST-NEXT: fmr f18, f8 -; FAST-NEXT: fmr f17, f7 -; FAST-NEXT: fmr f16, f6 -; FAST-NEXT: fmr f20, f5 -; FAST-NEXT: fmr f22, f4 -; FAST-NEXT: fmr f23, f3 -; FAST-NEXT: fmr f25, f2 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: stdu r1, -304(r1) +; FAST-NEXT: li r11, 48 +; FAST-NEXT: std r0, 320(r1) +; FAST-NEXT: std r23, 232(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r23, r3 +; FAST-NEXT: lhz r3, 400(r1) +; FAST-NEXT: std r16, 176(r1) # 8-byte Folded Spill +; FAST-NEXT: std r17, 184(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r17, 416(r1) +; FAST-NEXT: stxvd2x v24, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 64 +; FAST-NEXT: std r18, 192(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r18, 424(r1) +; FAST-NEXT: std r19, 200(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r19, 432(r1) +; FAST-NEXT: lhz r16, 408(r1) +; FAST-NEXT: stxvd2x v25, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 80 +; FAST-NEXT: std r20, 208(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r20, 440(r1) +; FAST-NEXT: stxvd2x v26, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 96 +; FAST-NEXT: std r21, 216(r1) # 8-byte Folded Spill +; FAST-NEXT: std r22, 224(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r22, 456(r1) +; FAST-NEXT: lhz r21, 448(r1) +; FAST-NEXT: stxvd2x v27, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 112 +; FAST-NEXT: std r24, 240(r1) # 8-byte Folded Spill +; FAST-NEXT: std r25, 248(r1) # 8-byte Folded Spill +; FAST-NEXT: std r26, 256(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r26, r6 +; FAST-NEXT: mr r25, r5 +; FAST-NEXT: mr r24, r4 +; FAST-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 128 +; FAST-NEXT: std r27, 264(r1) # 8-byte Folded Spill +; FAST-NEXT: std r28, 272(r1) # 8-byte Folded Spill +; FAST-NEXT: std r29, 280(r1) # 8-byte Folded Spill +; FAST-NEXT: std r30, 288(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r30, r10 +; FAST-NEXT: mr r29, r9 +; FAST-NEXT: mr r28, r8 +; FAST-NEXT: mr r27, r7 +; FAST-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 144 +; FAST-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 160 +; FAST-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f31, f1 -; FAST-NEXT: lfs f1, 304(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: mr r3, r16 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f30, f1 -; FAST-NEXT: lfs f1, 296(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mr r3, r17 +; FAST-NEXT: xxmrghd v31, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f29, f1 -; FAST-NEXT: fmr f1, f28 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: mr r3, r18 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f28, f1 -; FAST-NEXT: fmr f1, f27 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mr r3, r19 +; FAST-NEXT: xxmrghd v30, vs0, v30 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f27, f1 -; FAST-NEXT: fmr f1, f24 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v29, r3 +; FAST-NEXT: mr r3, r20 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f24, f1 -; FAST-NEXT: fmr f1, f21 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mr r3, r21 +; FAST-NEXT: xxmrghd v29, vs0, v29 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f21, f1 -; FAST-NEXT: fmr f1, f19 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v28, r3 +; FAST-NEXT: mr r3, r22 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f19, f1 -; FAST-NEXT: fmr f1, f18 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: clrldi r3, r23, 48 +; FAST-NEXT: xxmrghd v28, vs0, v28 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f18, f1 -; FAST-NEXT: fmr f1, f17 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v27, r3 +; FAST-NEXT: clrldi r3, r24, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f17, f1 -; FAST-NEXT: fmr f1, f16 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: clrldi r3, r25, 48 +; FAST-NEXT: xxmrghd v27, vs0, v27 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f16, f1 -; FAST-NEXT: fmr f1, f20 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v26, r3 +; FAST-NEXT: clrldi r3, r26, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f20, f1 -; FAST-NEXT: fmr f1, f22 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: clrldi r3, r27, 48 +; FAST-NEXT: xxmrghd v26, vs0, v26 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f22, f1 -; FAST-NEXT: fmr f1, f23 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v25, r3 +; FAST-NEXT: clrldi r3, r28, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f23, f1 -; FAST-NEXT: fmr f1, f25 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: clrldi r3, r29, 48 +; FAST-NEXT: xxmrghd v25, vs0, v25 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f25, f1 -; FAST-NEXT: fmr f1, f26 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v24, r3 +; FAST-NEXT: clrldi r3, r30, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fctid f0, f25 -; FAST-NEXT: fctid f2, f23 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: fctid f3, f22 -; FAST-NEXT: fctid f4, f20 -; FAST-NEXT: fctid f5, f16 -; FAST-NEXT: fctid f6, f17 -; FAST-NEXT: fctid f7, f18 -; FAST-NEXT: fctid f8, f19 -; FAST-NEXT: fctid f9, f21 -; FAST-NEXT: fctid f10, f24 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: mtfprd f2, r3 -; FAST-NEXT: mffprd r3, f3 -; FAST-NEXT: mtfprd f3, r3 -; FAST-NEXT: mffprd r3, f4 -; FAST-NEXT: mtfprd f4, r3 -; FAST-NEXT: mffprd r3, f5 -; FAST-NEXT: mtfprd f5, r3 -; FAST-NEXT: mffprd r3, f6 -; FAST-NEXT: mtfprd f6, r3 -; FAST-NEXT: mffprd r3, f7 -; FAST-NEXT: mtfprd f7, r3 -; FAST-NEXT: mffprd r3, f8 -; FAST-NEXT: mtfprd f8, r3 -; FAST-NEXT: mffprd r3, f9 -; FAST-NEXT: mtfprd f9, r3 -; FAST-NEXT: mffprd r3, f10 -; FAST-NEXT: mtfprd f10, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v3, vs3, vs2 -; FAST-NEXT: xxmrghd v4, vs5, vs4 -; FAST-NEXT: xxmrghd v5, vs7, vs6 -; FAST-NEXT: xxmrghd v6, vs9, vs8 -; FAST-NEXT: xxmrghd v2, vs0, vs1 -; FAST-NEXT: fctid f0, f27 -; FAST-NEXT: fctid f1, f29 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v7, vs0, vs10 -; FAST-NEXT: fctid f0, f28 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v8, vs1, vs0 -; FAST-NEXT: fctid f0, f30 -; FAST-NEXT: fctid f1, f31 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: vmr v6, v31 +; FAST-NEXT: ld r30, 288(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r29, 280(r1) # 8-byte Folded Reload +; FAST-NEXT: vmr v7, v30 +; FAST-NEXT: vmr v8, v29 +; FAST-NEXT: ld r28, 272(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r27, 264(r1) # 8-byte Folded Reload +; FAST-NEXT: vmr v9, v28 +; FAST-NEXT: vmr v2, v27 +; FAST-NEXT: ld r26, 256(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r25, 248(r1) # 8-byte Folded Reload +; FAST-NEXT: vmr v3, v26 +; FAST-NEXT: vmr v4, v25 +; FAST-NEXT: ld r24, 240(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r23, 232(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r22, 224(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r21, 216(r1) # 8-byte Folded Reload ; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: ld r20, 208(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r19, 200(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r18, 192(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r17, 184(r1) # 8-byte Folded Reload ; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v9, vs1, vs0 -; FAST-NEXT: addi r1, r1, 160 +; FAST-NEXT: li r3, 160 +; FAST-NEXT: ld r16, 176(r1) # 8-byte Folded Reload +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 144 +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 128 +; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 112 +; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 96 +; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 80 +; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 48 +; FAST-NEXT: xxmrghd v5, vs0, v24 +; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 304 ; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload ; FAST-NEXT: mtlr r0 -; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f25, -56(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f24, -64(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f23, -72(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f22, -80(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f21, -88(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f20, -96(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f19, -104(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f18, -112(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f17, -120(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f16, -128(r1) # 8-byte Folded Reload ; FAST-NEXT: blr %a = call <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half> %x) ret <16 x i64> %a @@ -1685,483 +1221,295 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind { ; BE-LABEL: lrint_v32i64_v32f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 -; BE-NEXT: stdu r1, -864(r1) -; BE-NEXT: std r0, 880(r1) -; BE-NEXT: stfd f20, 768(r1) # 8-byte Folded Spill -; BE-NEXT: fmr f20, f1 -; BE-NEXT: fmr f1, f2 -; BE-NEXT: std r14, 576(r1) # 8-byte Folded Spill -; BE-NEXT: std r15, 584(r1) # 8-byte Folded Spill -; BE-NEXT: std r16, 592(r1) # 8-byte Folded Spill -; BE-NEXT: std r17, 600(r1) # 8-byte Folded Spill -; BE-NEXT: std r18, 608(r1) # 8-byte Folded Spill -; BE-NEXT: std r19, 616(r1) # 8-byte Folded Spill -; BE-NEXT: std r20, 624(r1) # 8-byte Folded Spill -; BE-NEXT: std r21, 632(r1) # 8-byte Folded Spill -; BE-NEXT: std r22, 640(r1) # 8-byte Folded Spill -; BE-NEXT: std r23, 648(r1) # 8-byte Folded Spill -; BE-NEXT: std r24, 656(r1) # 8-byte Folded Spill -; BE-NEXT: std r25, 664(r1) # 8-byte Folded Spill -; BE-NEXT: std r26, 672(r1) # 8-byte Folded Spill -; BE-NEXT: std r27, 680(r1) # 8-byte Folded Spill -; BE-NEXT: std r28, 688(r1) # 8-byte Folded Spill -; BE-NEXT: std r29, 696(r1) # 8-byte Folded Spill -; BE-NEXT: std r30, 704(r1) # 8-byte Folded Spill -; BE-NEXT: std r31, 712(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f14, 720(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f15, 728(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f16, 736(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f17, 744(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f18, 752(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f19, 760(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f21, 776(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f22, 784(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f23, 792(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f24, 800(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f25, 808(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f26, 816(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f27, 824(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f28, 832(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f29, 840(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f30, 848(r1) # 8-byte Folded Spill -; BE-NEXT: stfd f31, 856(r1) # 8-byte Folded Spill -; BE-NEXT: fmr f31, f13 +; BE-NEXT: stdu r1, -624(r1) +; BE-NEXT: std r0, 640(r1) +; BE-NEXT: std r30, 608(r1) # 8-byte Folded Spill ; BE-NEXT: mr r30, r3 -; BE-NEXT: fmr f29, f12 -; BE-NEXT: fmr f30, f11 -; BE-NEXT: fmr f28, f10 -; BE-NEXT: fmr f27, f9 -; BE-NEXT: fmr f26, f8 -; BE-NEXT: fmr f25, f7 -; BE-NEXT: fmr f24, f6 -; BE-NEXT: fmr f23, f5 -; BE-NEXT: fmr f22, f4 -; BE-NEXT: fmr f21, f3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f20 -; BE-NEXT: std r3, 304(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f22 -; BE-NEXT: std r3, 296(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f21 -; BE-NEXT: std r3, 280(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f24 -; BE-NEXT: std r3, 264(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f23 -; BE-NEXT: std r3, 248(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f26 -; BE-NEXT: std r3, 232(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f25 +; BE-NEXT: lhz r3, 926(r1) +; BE-NEXT: std r14, 480(r1) # 8-byte Folded Spill +; BE-NEXT: lhz r14, 822(r1) +; BE-NEXT: std r15, 488(r1) # 8-byte Folded Spill +; BE-NEXT: std r19, 520(r1) # 8-byte Folded Spill ; BE-NEXT: std r3, 216(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f28 +; BE-NEXT: lhz r3, 934(r1) +; BE-NEXT: lhz r15, 814(r1) +; BE-NEXT: lhz r19, 742(r1) +; BE-NEXT: std r22, 544(r1) # 8-byte Folded Spill +; BE-NEXT: std r23, 552(r1) # 8-byte Folded Spill +; BE-NEXT: std r25, 568(r1) # 8-byte Folded Spill +; BE-NEXT: std r26, 576(r1) # 8-byte Folded Spill +; BE-NEXT: std r3, 208(r1) # 8-byte Folded Spill +; BE-NEXT: lhz r3, 910(r1) +; BE-NEXT: lhz r26, 766(r1) +; BE-NEXT: lhz r25, 774(r1) +; BE-NEXT: std r27, 584(r1) # 8-byte Folded Spill +; BE-NEXT: std r28, 592(r1) # 8-byte Folded Spill +; BE-NEXT: std r29, 600(r1) # 8-byte Folded Spill +; BE-NEXT: std r31, 616(r1) # 8-byte Folded Spill ; BE-NEXT: std r3, 200(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f27 +; BE-NEXT: lhz r3, 918(r1) +; BE-NEXT: lhz r31, 798(r1) +; BE-NEXT: lhz r29, 806(r1) +; BE-NEXT: lhz r28, 782(r1) +; BE-NEXT: lhz r27, 790(r1) +; BE-NEXT: lhz r23, 750(r1) +; BE-NEXT: lhz r22, 758(r1) +; BE-NEXT: std r16, 496(r1) # 8-byte Folded Spill +; BE-NEXT: std r17, 504(r1) # 8-byte Folded Spill +; BE-NEXT: std r3, 192(r1) # 8-byte Folded Spill +; BE-NEXT: lhz r3, 894(r1) +; BE-NEXT: mr r17, r7 +; BE-NEXT: mr r16, r4 +; BE-NEXT: std r18, 512(r1) # 8-byte Folded Spill +; BE-NEXT: std r20, 528(r1) # 8-byte Folded Spill +; BE-NEXT: std r21, 536(r1) # 8-byte Folded Spill +; BE-NEXT: std r24, 560(r1) # 8-byte Folded Spill ; BE-NEXT: std r3, 184(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f29 +; BE-NEXT: lhz r3, 902(r1) +; BE-NEXT: mr r24, r10 +; BE-NEXT: mr r20, r9 +; BE-NEXT: mr r21, r8 +; BE-NEXT: mr r18, r6 +; BE-NEXT: std r3, 176(r1) # 8-byte Folded Spill +; BE-NEXT: lhz r3, 878(r1) ; BE-NEXT: std r3, 168(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f30 +; BE-NEXT: lhz r3, 886(r1) +; BE-NEXT: std r3, 160(r1) # 8-byte Folded Spill +; BE-NEXT: lhz r3, 862(r1) ; BE-NEXT: std r3, 152(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1028(r1) +; BE-NEXT: lhz r3, 870(r1) +; BE-NEXT: std r3, 144(r1) # 8-byte Folded Spill +; BE-NEXT: lhz r3, 846(r1) ; BE-NEXT: std r3, 136(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: fmr f1, f31 +; BE-NEXT: lhz r3, 854(r1) +; BE-NEXT: std r3, 128(r1) # 8-byte Folded Spill +; BE-NEXT: lhz r3, 830(r1) ; BE-NEXT: std r3, 120(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1044(r1) +; BE-NEXT: lhz r3, 838(r1) ; BE-NEXT: std r3, 112(r1) # 8-byte Folded Spill -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1036(r1) -; BE-NEXT: mr r15, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1060(r1) -; BE-NEXT: mr r14, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1052(r1) -; BE-NEXT: mr r31, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1076(r1) -; BE-NEXT: mr r29, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1068(r1) -; BE-NEXT: mr r28, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1092(r1) -; BE-NEXT: mr r27, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1084(r1) -; BE-NEXT: mr r26, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1108(r1) -; BE-NEXT: mr r25, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1100(r1) -; BE-NEXT: mr r24, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1124(r1) -; BE-NEXT: mr r23, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1116(r1) -; BE-NEXT: mr r22, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1140(r1) -; BE-NEXT: mr r21, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1132(r1) -; BE-NEXT: mr r20, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1156(r1) -; BE-NEXT: mr r19, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1148(r1) -; BE-NEXT: mr r18, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1172(r1) -; BE-NEXT: mr r17, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: lfs f1, 1164(r1) -; BE-NEXT: mr r16, r3 -; BE-NEXT: bl __truncsfhf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: clrldi r3, r5, 48 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 424(r1) ; BE-NEXT: clrldi r3, r16, 48 -; BE-NEXT: stfs f1, 316(r1) # 4-byte Folded Spill ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 416(r1) ; BE-NEXT: clrldi r3, r17, 48 -; BE-NEXT: stfs f1, 312(r1) # 4-byte Folded Spill ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 440(r1) ; BE-NEXT: clrldi r3, r18, 48 -; BE-NEXT: stfs f1, 292(r1) # 4-byte Folded Spill ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r19, 48 -; BE-NEXT: stfs f1, 276(r1) # 4-byte Folded Spill -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl lrintf ; BE-NEXT: nop +; BE-NEXT: std r3, 432(r1) ; BE-NEXT: clrldi r3, r20, 48 -; BE-NEXT: stfs f1, 260(r1) # 4-byte Folded Spill -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r21, 48 -; BE-NEXT: stfs f1, 244(r1) # 4-byte Folded Spill -; BE-NEXT: bl __extendhfsf2 -; BE-NEXT: nop -; BE-NEXT: clrldi r3, r22, 48 -; BE-NEXT: stfs f1, 228(r1) # 4-byte Folded Spill ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r23, 48 -; BE-NEXT: stfs f1, 212(r1) # 4-byte Folded Spill -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r24, 48 -; BE-NEXT: stfs f1, 196(r1) # 4-byte Folded Spill +; BE-NEXT: std r3, 456(r1) +; BE-NEXT: clrldi r3, r21, 48 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r25, 48 -; BE-NEXT: stfs f1, 180(r1) # 4-byte Folded Spill -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r26, 48 -; BE-NEXT: stfs f1, 164(r1) # 4-byte Folded Spill +; BE-NEXT: std r3, 448(r1) +; BE-NEXT: mr r3, r19 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r27, 48 -; BE-NEXT: stfs f1, 148(r1) # 4-byte Folded Spill -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r28, 48 -; BE-NEXT: stfs f1, 132(r1) # 4-byte Folded Spill +; BE-NEXT: std r3, 472(r1) +; BE-NEXT: clrldi r3, r24, 48 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r29, 48 -; BE-NEXT: fmr f18, f1 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r31, 48 -; BE-NEXT: fmr f17, f1 +; BE-NEXT: std r3, 464(r1) +; BE-NEXT: mr r3, r22 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r14, 48 -; BE-NEXT: fmr f16, f1 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: clrldi r3, r15, 48 -; BE-NEXT: fmr f15, f1 +; BE-NEXT: std r3, 232(r1) +; BE-NEXT: mr r3, r23 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: ld r3, 112(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f14, f1 -; BE-NEXT: clrldi r3, r3, 48 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: ld r3, 120(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f31, f1 -; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: std r3, 224(r1) +; BE-NEXT: mr r3, r25 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: ld r3, 136(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f30, f1 -; BE-NEXT: clrldi r3, r3, 48 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: ld r3, 152(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f29, f1 -; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: std r3, 248(r1) +; BE-NEXT: mr r3, r26 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: ld r3, 168(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f28, f1 -; BE-NEXT: clrldi r3, r3, 48 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: ld r3, 184(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f27, f1 -; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: std r3, 240(r1) +; BE-NEXT: mr r3, r27 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: ld r3, 200(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f26, f1 -; BE-NEXT: clrldi r3, r3, 48 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: ld r3, 216(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f25, f1 -; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: std r3, 264(r1) +; BE-NEXT: mr r3, r28 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: ld r3, 232(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f24, f1 -; BE-NEXT: clrldi r3, r3, 48 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: ld r3, 248(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f23, f1 -; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: std r3, 256(r1) +; BE-NEXT: mr r3, r29 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: ld r3, 264(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f22, f1 -; BE-NEXT: clrldi r3, r3, 48 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: ld r3, 280(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f21, f1 -; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: std r3, 280(r1) +; BE-NEXT: mr r3, r31 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: ld r3, 296(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f20, f1 -; BE-NEXT: clrldi r3, r3, 48 -; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: ld r3, 304(r1) # 8-byte Folded Reload -; BE-NEXT: fmr f19, f1 -; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: std r3, 272(r1) +; BE-NEXT: mr r3, r14 ; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f19 -; BE-NEXT: std r3, 328(r1) -; BE-NEXT: bl lrintf -; BE-NEXT: nop -; BE-NEXT: fmr f1, f20 -; BE-NEXT: std r3, 320(r1) -; BE-NEXT: bl lrintf +; BE-NEXT: std r3, 296(r1) +; BE-NEXT: mr r3, r15 +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: fmr f1, f21 -; BE-NEXT: std r3, 344(r1) ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f22 -; BE-NEXT: std r3, 336(r1) -; BE-NEXT: bl lrintf +; BE-NEXT: std r3, 288(r1) +; BE-NEXT: ld r3, 112(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: fmr f1, f23 -; BE-NEXT: std r3, 360(r1) ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f24 -; BE-NEXT: std r3, 352(r1) -; BE-NEXT: bl lrintf +; BE-NEXT: std r3, 312(r1) +; BE-NEXT: ld r3, 120(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: fmr f1, f25 -; BE-NEXT: std r3, 376(r1) ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f26 -; BE-NEXT: std r3, 368(r1) -; BE-NEXT: bl lrintf +; BE-NEXT: std r3, 304(r1) +; BE-NEXT: ld r3, 128(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: fmr f1, f27 -; BE-NEXT: std r3, 392(r1) ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f28 -; BE-NEXT: std r3, 384(r1) -; BE-NEXT: bl lrintf +; BE-NEXT: std r3, 328(r1) +; BE-NEXT: ld r3, 136(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: fmr f1, f29 -; BE-NEXT: std r3, 408(r1) ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f30 -; BE-NEXT: std r3, 400(r1) -; BE-NEXT: bl lrintf +; BE-NEXT: std r3, 320(r1) +; BE-NEXT: ld r3, 144(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: fmr f1, f31 -; BE-NEXT: std r3, 424(r1) ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f14 -; BE-NEXT: std r3, 416(r1) -; BE-NEXT: bl lrintf +; BE-NEXT: std r3, 344(r1) +; BE-NEXT: ld r3, 152(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: fmr f1, f15 -; BE-NEXT: std r3, 440(r1) ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f16 -; BE-NEXT: std r3, 432(r1) -; BE-NEXT: bl lrintf +; BE-NEXT: std r3, 336(r1) +; BE-NEXT: ld r3, 160(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: fmr f1, f17 -; BE-NEXT: std r3, 456(r1) ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: fmr f1, f18 -; BE-NEXT: std r3, 448(r1) -; BE-NEXT: bl lrintf +; BE-NEXT: std r3, 360(r1) +; BE-NEXT: ld r3, 168(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: lfs f1, 132(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 472(r1) ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: lfs f1, 148(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 464(r1) -; BE-NEXT: bl lrintf +; BE-NEXT: std r3, 352(r1) +; BE-NEXT: ld r3, 176(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: lfs f1, 164(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 488(r1) ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: lfs f1, 180(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 480(r1) -; BE-NEXT: bl lrintf +; BE-NEXT: std r3, 376(r1) +; BE-NEXT: ld r3, 184(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: lfs f1, 196(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 504(r1) ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: lfs f1, 212(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 496(r1) -; BE-NEXT: bl lrintf +; BE-NEXT: std r3, 368(r1) +; BE-NEXT: ld r3, 192(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: lfs f1, 228(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 520(r1) ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: lfs f1, 244(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 512(r1) -; BE-NEXT: bl lrintf +; BE-NEXT: std r3, 392(r1) +; BE-NEXT: ld r3, 200(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: lfs f1, 260(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 536(r1) ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: lfs f1, 276(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 528(r1) -; BE-NEXT: bl lrintf +; BE-NEXT: std r3, 384(r1) +; BE-NEXT: ld r3, 208(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: lfs f1, 292(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 552(r1) ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: lfs f1, 312(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 544(r1) -; BE-NEXT: bl lrintf +; BE-NEXT: std r3, 408(r1) +; BE-NEXT: ld r3, 216(r1) # 8-byte Folded Reload +; BE-NEXT: bl __extendhfsf2 ; BE-NEXT: nop -; BE-NEXT: lfs f1, 316(r1) # 4-byte Folded Reload -; BE-NEXT: std r3, 568(r1) ; BE-NEXT: bl lrintf ; BE-NEXT: nop -; BE-NEXT: std r3, 560(r1) -; BE-NEXT: addi r3, r1, 320 +; BE-NEXT: std r3, 400(r1) +; BE-NEXT: addi r3, r1, 416 ; BE-NEXT: lxvd2x vs0, 0, r3 -; BE-NEXT: addi r3, r1, 336 +; BE-NEXT: addi r3, r1, 432 ; BE-NEXT: lxvd2x vs1, 0, r3 -; BE-NEXT: addi r3, r1, 352 +; BE-NEXT: addi r3, r1, 448 ; BE-NEXT: lxvd2x vs2, 0, r3 -; BE-NEXT: addi r3, r1, 368 +; BE-NEXT: addi r3, r1, 464 ; BE-NEXT: lxvd2x vs3, 0, r3 -; BE-NEXT: addi r3, r1, 384 +; BE-NEXT: addi r3, r1, 224 ; BE-NEXT: lxvd2x vs4, 0, r3 -; BE-NEXT: addi r3, r1, 400 +; BE-NEXT: addi r3, r1, 240 ; BE-NEXT: lxvd2x vs5, 0, r3 -; BE-NEXT: addi r3, r1, 416 +; BE-NEXT: addi r3, r1, 256 ; BE-NEXT: lxvd2x vs6, 0, r3 -; BE-NEXT: addi r3, r1, 432 +; BE-NEXT: addi r3, r1, 272 ; BE-NEXT: lxvd2x vs7, 0, r3 -; BE-NEXT: addi r3, r1, 448 +; BE-NEXT: addi r3, r1, 288 ; BE-NEXT: lxvd2x vs8, 0, r3 -; BE-NEXT: addi r3, r1, 464 +; BE-NEXT: addi r3, r1, 304 ; BE-NEXT: lxvd2x vs9, 0, r3 -; BE-NEXT: addi r3, r1, 480 +; BE-NEXT: addi r3, r1, 320 ; BE-NEXT: lxvd2x vs10, 0, r3 -; BE-NEXT: addi r3, r1, 496 +; BE-NEXT: addi r3, r1, 336 ; BE-NEXT: lxvd2x vs11, 0, r3 -; BE-NEXT: addi r3, r1, 512 +; BE-NEXT: addi r3, r1, 352 ; BE-NEXT: lxvd2x vs12, 0, r3 -; BE-NEXT: addi r3, r1, 528 +; BE-NEXT: addi r3, r1, 368 ; BE-NEXT: lxvd2x vs13, 0, r3 -; BE-NEXT: addi r3, r1, 544 +; BE-NEXT: addi r3, r1, 384 ; BE-NEXT: lxvd2x v2, 0, r3 -; BE-NEXT: addi r3, r1, 560 +; BE-NEXT: addi r3, r1, 400 ; BE-NEXT: lxvd2x v3, 0, r3 ; BE-NEXT: li r3, 240 ; BE-NEXT: stxvd2x v3, r30, r3 @@ -2194,43 +1542,25 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind { ; BE-NEXT: li r3, 16 ; BE-NEXT: stxvd2x vs1, r30, r3 ; BE-NEXT: stxvd2x vs0, 0, r30 -; BE-NEXT: lfd f31, 856(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f30, 848(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f29, 840(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f28, 832(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f27, 824(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f26, 816(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f25, 808(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f24, 800(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f23, 792(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f22, 784(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f21, 776(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f20, 768(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f19, 760(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f18, 752(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f17, 744(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f16, 736(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f15, 728(r1) # 8-byte Folded Reload -; BE-NEXT: lfd f14, 720(r1) # 8-byte Folded Reload -; BE-NEXT: ld r31, 712(r1) # 8-byte Folded Reload -; BE-NEXT: ld r30, 704(r1) # 8-byte Folded Reload -; BE-NEXT: ld r29, 696(r1) # 8-byte Folded Reload -; BE-NEXT: ld r28, 688(r1) # 8-byte Folded Reload -; BE-NEXT: ld r27, 680(r1) # 8-byte Folded Reload -; BE-NEXT: ld r26, 672(r1) # 8-byte Folded Reload -; BE-NEXT: ld r25, 664(r1) # 8-byte Folded Reload -; BE-NEXT: ld r24, 656(r1) # 8-byte Folded Reload -; BE-NEXT: ld r23, 648(r1) # 8-byte Folded Reload -; BE-NEXT: ld r22, 640(r1) # 8-byte Folded Reload -; BE-NEXT: ld r21, 632(r1) # 8-byte Folded Reload -; BE-NEXT: ld r20, 624(r1) # 8-byte Folded Reload -; BE-NEXT: ld r19, 616(r1) # 8-byte Folded Reload -; BE-NEXT: ld r18, 608(r1) # 8-byte Folded Reload -; BE-NEXT: ld r17, 600(r1) # 8-byte Folded Reload -; BE-NEXT: ld r16, 592(r1) # 8-byte Folded Reload -; BE-NEXT: ld r15, 584(r1) # 8-byte Folded Reload -; BE-NEXT: ld r14, 576(r1) # 8-byte Folded Reload -; BE-NEXT: addi r1, r1, 864 +; BE-NEXT: ld r31, 616(r1) # 8-byte Folded Reload +; BE-NEXT: ld r30, 608(r1) # 8-byte Folded Reload +; BE-NEXT: ld r29, 600(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 592(r1) # 8-byte Folded Reload +; BE-NEXT: ld r27, 584(r1) # 8-byte Folded Reload +; BE-NEXT: ld r26, 576(r1) # 8-byte Folded Reload +; BE-NEXT: ld r25, 568(r1) # 8-byte Folded Reload +; BE-NEXT: ld r24, 560(r1) # 8-byte Folded Reload +; BE-NEXT: ld r23, 552(r1) # 8-byte Folded Reload +; BE-NEXT: ld r22, 544(r1) # 8-byte Folded Reload +; BE-NEXT: ld r21, 536(r1) # 8-byte Folded Reload +; BE-NEXT: ld r20, 528(r1) # 8-byte Folded Reload +; BE-NEXT: ld r19, 520(r1) # 8-byte Folded Reload +; BE-NEXT: ld r18, 512(r1) # 8-byte Folded Reload +; BE-NEXT: ld r17, 504(r1) # 8-byte Folded Reload +; BE-NEXT: ld r16, 496(r1) # 8-byte Folded Reload +; BE-NEXT: ld r15, 488(r1) # 8-byte Folded Reload +; BE-NEXT: ld r14, 480(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 624 ; BE-NEXT: ld r0, 16(r1) ; BE-NEXT: mtlr r0 ; BE-NEXT: blr @@ -2238,508 +1568,334 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind { ; CHECK-LABEL: lrint_v32i64_v32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: stdu r1, -688(r1) -; CHECK-NEXT: li r4, 208 -; CHECK-NEXT: std r0, 704(r1) -; CHECK-NEXT: std r14, 400(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r15, 408(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r16, 416(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r17, 424(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r18, 432(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r19, 440(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 224 -; CHECK-NEXT: std r20, 448(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r21, 456(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r22, 464(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r23, 472(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r24, 480(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r25, 488(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 240 -; CHECK-NEXT: std r26, 496(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r27, 504(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r28, 512(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r29, 520(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, 528(r1) # 8-byte Folded Spill +; CHECK-NEXT: stdu r1, -576(r1) +; CHECK-NEXT: std r0, 592(r1) +; CHECK-NEXT: std r30, 560(r1) # 8-byte Folded Spill ; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 256 -; CHECK-NEXT: std r31, 536(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f14, 544(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f15, 552(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f16, 560(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f17, 568(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f18, 576(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 272 -; CHECK-NEXT: stfd f19, 584(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f20, 592(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f20, f2 -; CHECK-NEXT: stfd f21, 600(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f21, f3 -; CHECK-NEXT: stfd f22, 608(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f22, f4 -; CHECK-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 288 -; CHECK-NEXT: stfd f23, 616(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f23, f5 -; CHECK-NEXT: stfd f24, 624(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f24, f6 -; CHECK-NEXT: stfd f25, 632(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f25, f7 -; CHECK-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 304 -; CHECK-NEXT: stfd f26, 640(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f26, f8 -; CHECK-NEXT: stfd f27, 648(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f27, f9 -; CHECK-NEXT: stfd f28, 656(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f28, f10 -; CHECK-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 320 -; CHECK-NEXT: stfd f29, 664(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f29, f11 -; CHECK-NEXT: stfd f30, 672(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f30, f12 -; CHECK-NEXT: stfd f31, 680(r1) # 8-byte Folded Spill -; CHECK-NEXT: fmr f31, f13 -; CHECK-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 336 -; CHECK-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 352 -; CHECK-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 368 -; CHECK-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 384 -; CHECK-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f20 +; CHECK-NEXT: lhz r3, 864(r1) +; CHECK-NEXT: li r11, 240 +; CHECK-NEXT: std r14, 432(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r19, 472(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r14, 744(r1) +; CHECK-NEXT: stxvd2x v20, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 256 +; CHECK-NEXT: std r22, 496(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r22, 680(r1) +; CHECK-NEXT: std r3, 216(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r3, 856(r1) +; CHECK-NEXT: lhz r19, 672(r1) +; CHECK-NEXT: stxvd2x v21, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 272 +; CHECK-NEXT: std r23, 504(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r23, 688(r1) +; CHECK-NEXT: stxvd2x v22, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: std r3, 184(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r3, 848(r1) +; CHECK-NEXT: li r11, 288 +; CHECK-NEXT: std r25, 520(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r25, 696(r1) +; CHECK-NEXT: stxvd2x v23, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 304 +; CHECK-NEXT: std r26, 528(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r27, 536(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, 544(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r28, 720(r1) +; CHECK-NEXT: lhz r27, 712(r1) +; CHECK-NEXT: lhz r26, 704(r1) +; CHECK-NEXT: stxvd2x v24, r1, r11 # 16-byte Folded Spill ; CHECK-NEXT: std r3, 176(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f21 -; CHECK-NEXT: std r3, 160(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f22 +; CHECK-NEXT: lhz r3, 840(r1) +; CHECK-NEXT: li r11, 320 +; CHECK-NEXT: std r29, 552(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r29, 728(r1) +; CHECK-NEXT: stxvd2x v25, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: std r3, 152(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r3, 832(r1) +; CHECK-NEXT: li r11, 336 +; CHECK-NEXT: std r31, 568(r1) # 8-byte Folded Spill +; CHECK-NEXT: lhz r31, 736(r1) +; CHECK-NEXT: stxvd2x v26, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 352 +; CHECK-NEXT: std r15, 440(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r16, 448(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r17, 456(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r17, r6 +; CHECK-NEXT: mr r16, r5 +; CHECK-NEXT: stxvd2x v27, r1, r11 # 16-byte Folded Spill ; CHECK-NEXT: std r3, 144(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f23 -; CHECK-NEXT: std r3, 128(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f24 +; CHECK-NEXT: lhz r3, 824(r1) +; CHECK-NEXT: li r11, 368 +; CHECK-NEXT: std r18, 464(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r20, 480(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r20, r8 +; CHECK-NEXT: mr r18, r7 +; CHECK-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill ; CHECK-NEXT: std r3, 120(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f25 +; CHECK-NEXT: lhz r3, 816(r1) +; CHECK-NEXT: li r11, 384 +; CHECK-NEXT: std r21, 488(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r24, 512(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r24, r10 +; CHECK-NEXT: mr r21, r9 +; CHECK-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 400 ; CHECK-NEXT: std r3, 112(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f26 +; CHECK-NEXT: lhz r3, 808(r1) +; CHECK-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill +; CHECK-NEXT: li r11, 416 ; CHECK-NEXT: std r3, 104(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f27 +; CHECK-NEXT: lhz r3, 800(r1) +; CHECK-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill ; CHECK-NEXT: std r3, 96(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f28 +; CHECK-NEXT: lhz r3, 792(r1) ; CHECK-NEXT: std r3, 88(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f29 +; CHECK-NEXT: lhz r3, 784(r1) ; CHECK-NEXT: std r3, 80(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: lhz r3, 776(r1) ; CHECK-NEXT: std r3, 72(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: lhz r3, 768(r1) ; CHECK-NEXT: std r3, 64(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 832(r1) +; CHECK-NEXT: lhz r3, 760(r1) ; CHECK-NEXT: std r3, 56(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 840(r1) +; CHECK-NEXT: lhz r3, 752(r1) ; CHECK-NEXT: std r3, 48(r1) # 8-byte Folded Spill -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 848(r1) -; CHECK-NEXT: mr r15, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 856(r1) -; CHECK-NEXT: mr r14, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 864(r1) -; CHECK-NEXT: mr r31, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 872(r1) -; CHECK-NEXT: mr r29, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 880(r1) -; CHECK-NEXT: mr r28, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 888(r1) -; CHECK-NEXT: mr r27, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 896(r1) -; CHECK-NEXT: mr r26, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 904(r1) -; CHECK-NEXT: mr r25, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 912(r1) -; CHECK-NEXT: mr r24, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 920(r1) -; CHECK-NEXT: mr r23, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 928(r1) -; CHECK-NEXT: mr r22, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 936(r1) -; CHECK-NEXT: mr r21, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 944(r1) -; CHECK-NEXT: mr r20, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 952(r1) -; CHECK-NEXT: mr r19, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 960(r1) -; CHECK-NEXT: mr r18, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 968(r1) -; CHECK-NEXT: mr r17, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: lfs f1, 976(r1) -; CHECK-NEXT: mr r16, r3 -; CHECK-NEXT: bl __truncsfhf2 -; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: clrldi r3, r4, 48 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: li r3, 204 -; CHECK-NEXT: stxsspx f1, r1, r3 # 4-byte Folded Spill +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mr r15, r3 ; CHECK-NEXT: clrldi r3, r16, 48 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: li r3, 200 -; CHECK-NEXT: stxsspx f1, r1, r3 # 4-byte Folded Spill +; CHECK-NEXT: mtvsrd v31, r15 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 224 +; CHECK-NEXT: xxmrghd vs0, vs0, v31 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: clrldi r3, r17, 48 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mr r17, r3 ; CHECK-NEXT: clrldi r3, r18, 48 -; CHECK-NEXT: fmr f29, f1 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r19, 48 -; CHECK-NEXT: fmr f28, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: mtvsrd v31, r17 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 192 +; CHECK-NEXT: xxmrghd vs0, vs0, v31 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: clrldi r3, r20, 48 -; CHECK-NEXT: fmr f27, f1 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r21, 48 -; CHECK-NEXT: fmr f26, f1 -; CHECK-NEXT: bl __extendhfsf2 -; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r22, 48 -; CHECK-NEXT: fmr f25, f1 -; CHECK-NEXT: bl __extendhfsf2 -; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r23, 48 -; CHECK-NEXT: fmr f24, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r24, 48 -; CHECK-NEXT: fmr f23, f1 +; CHECK-NEXT: mr r20, r3 +; CHECK-NEXT: clrldi r3, r21, 48 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r25, 48 -; CHECK-NEXT: fmr f22, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: mtvsrd v31, r20 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r26, 48 -; CHECK-NEXT: fmr f21, f1 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: xxmrghd vs0, vs0, v31 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: mr r3, r19 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r27, 48 -; CHECK-NEXT: fmr f20, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r28, 48 -; CHECK-NEXT: fmr f19, f1 +; CHECK-NEXT: mr r21, r3 +; CHECK-NEXT: clrldi r3, r24, 48 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r29, 48 -; CHECK-NEXT: fmr f18, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: mtvsrd v31, r21 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r31, 48 -; CHECK-NEXT: fmr f17, f1 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: xxmrghd vs0, v31, vs0 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: mr r3, r22 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r14, 48 -; CHECK-NEXT: fmr f16, f1 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: clrldi r3, r15, 48 -; CHECK-NEXT: fmr f15, f1 +; CHECK-NEXT: mr r24, r3 +; CHECK-NEXT: mr r3, r23 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 48(r1) # 8-byte Folded Reload -; CHECK-NEXT: fmr f14, f1 -; CHECK-NEXT: clrldi r3, r3, 48 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: mtvsrd v31, r24 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 56(r1) # 8-byte Folded Reload -; CHECK-NEXT: fmr f30, f1 -; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: mr r3, r25 +; CHECK-NEXT: xxmrghd v27, vs0, v31 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 64(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v30, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 72(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v29, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: mr r25, r3 +; CHECK-NEXT: mr r3, r26 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 80(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v28, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: mtvsrd v31, r25 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 88(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v27, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: mr r3, r27 +; CHECK-NEXT: xxmrghd v26, vs0, v31 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 96(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v26, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 104(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v25, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: mr r27, r3 +; CHECK-NEXT: mr r3, r28 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 112(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v24, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: mtvsrd v31, r27 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 120(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v23, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: mr r3, r29 +; CHECK-NEXT: xxmrghd v25, vs0, v31 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 128(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v22, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 144(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v21, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: mr r3, r31 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 160(r1) # 8-byte Folded Reload -; CHECK-NEXT: xxlor v20, f1, f1 -; CHECK-NEXT: clrldi r3, r3, 48 -; CHECK-NEXT: bl __extendhfsf2 +; CHECK-NEXT: mtvsrd v31, r29 +; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: ld r3, 176(r1) # 8-byte Folded Reload -; CHECK-NEXT: fmr f31, f1 -; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: mr r3, r14 +; CHECK-NEXT: xxmrghd v24, vs0, v31 ; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f31 -; CHECK-NEXT: mtvsrd v31, r3 -; CHECK-NEXT: bl lrintf -; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 176 -; CHECK-NEXT: xxlor f1, v20, v20 -; CHECK-NEXT: xxmrghd vs0, vs0, v31 -; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: ld r3, 48(r1) # 8-byte Folded Reload +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: xxlor f1, v21, v21 -; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: mtvsrd v31, r29 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop ; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 160 -; CHECK-NEXT: xxlor f1, v22, v22 -; CHECK-NEXT: xxmrghd vs0, vs0, v31 -; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: ld r3, 56(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v23, vs0, v31 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: xxlor f1, v23, v23 -; CHECK-NEXT: mtvsrd v31, r3 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 144 -; CHECK-NEXT: xxlor f1, v24, v24 -; CHECK-NEXT: xxmrghd vs0, vs0, v31 -; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: ld r3, 64(r1) # 8-byte Folded Reload +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: xxlor f1, v25, v25 -; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: mtvsrd v31, r29 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop ; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 128 -; CHECK-NEXT: xxlor f1, v26, v26 -; CHECK-NEXT: xxmrghd vs0, vs0, v31 -; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: ld r3, 72(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v22, vs0, v31 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: xxlor f1, v27, v27 -; CHECK-NEXT: mtvsrd v31, r3 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxlor f1, v28, v28 -; CHECK-NEXT: xxmrghd v27, vs0, v31 -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: ld r3, 80(r1) # 8-byte Folded Reload +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: xxlor f1, v29, v29 -; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: mtvsrd v31, r29 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop ; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxlor f1, v30, v30 -; CHECK-NEXT: xxmrghd v29, vs0, v31 -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: ld r3, 88(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v21, vs0, v31 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f30 -; CHECK-NEXT: mtvsrd v31, r3 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f14 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v31, vs0, v31 -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: ld r3, 96(r1) # 8-byte Folded Reload +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f15 -; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: mtvsrd v31, r29 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f16 ; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v30, vs0, v30 -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: ld r3, 104(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v20, vs0, v31 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f17 -; CHECK-NEXT: mtvsrd v28, r3 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f18 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v28, vs0, v28 -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: ld r3, 112(r1) # 8-byte Folded Reload +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f19 -; CHECK-NEXT: mtvsrd v26, r3 +; CHECK-NEXT: mtvsrd v31, r29 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f20 ; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v26, vs0, v26 -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: ld r3, 120(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v31, vs0, v31 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f21 -; CHECK-NEXT: mtvsrd v24, r3 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f22 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v24, vs0, v24 -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: ld r3, 144(r1) # 8-byte Folded Reload +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f23 -; CHECK-NEXT: mtvsrd v22, r3 +; CHECK-NEXT: mtvsrd v30, r29 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f24 ; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v22, vs0, v22 -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: ld r3, 152(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v30, vs0, v30 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f25 -; CHECK-NEXT: mtvsrd v20, r3 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f26 -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v20, vs0, v20 -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: ld r3, 176(r1) # 8-byte Folded Reload +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f27 -; CHECK-NEXT: mtvsrd v21, r3 +; CHECK-NEXT: mtvsrd v29, r29 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f28 ; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xxmrghd v21, vs0, v21 -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: ld r3, 184(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v29, vs0, v29 +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: fmr f1, f29 -; CHECK-NEXT: mtvsrd v23, r3 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 200 -; CHECK-NEXT: lxsspx f1, r1, r3 # 4-byte Folded Reload -; CHECK-NEXT: xxmrghd v23, vs0, v23 -; CHECK-NEXT: bl lrintf +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: ld r3, 216(r1) # 8-byte Folded Reload +; CHECK-NEXT: bl __extendhfsf2 ; CHECK-NEXT: nop -; CHECK-NEXT: mtvsrd v25, r3 -; CHECK-NEXT: li r3, 204 -; CHECK-NEXT: lxsspx f1, r1, r3 # 4-byte Folded Reload +; CHECK-NEXT: mtvsrd v28, r29 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 240 -; CHECK-NEXT: xxswapd vs1, v23 +; CHECK-NEXT: xxswapd vs1, v29 ; CHECK-NEXT: li r4, 128 -; CHECK-NEXT: xxswapd vs2, v21 -; CHECK-NEXT: xxswapd vs3, v31 -; CHECK-NEXT: xxmrghd v2, vs0, v25 +; CHECK-NEXT: xxswapd vs2, v30 +; CHECK-NEXT: xxswapd vs3, v25 +; CHECK-NEXT: xxmrghd v2, vs0, v28 ; CHECK-NEXT: xxswapd vs0, v2 ; CHECK-NEXT: stxvd2x vs0, r30, r3 ; CHECK-NEXT: li r3, 224 @@ -2747,35 +1903,35 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind { ; CHECK-NEXT: li r3, 208 ; CHECK-NEXT: stxvd2x vs2, r30, r3 ; CHECK-NEXT: li r3, 192 -; CHECK-NEXT: xxswapd vs0, v20 +; CHECK-NEXT: xxswapd vs0, v31 ; CHECK-NEXT: stxvd2x vs0, r30, r3 ; CHECK-NEXT: li r3, 176 -; CHECK-NEXT: xxswapd vs1, v22 +; CHECK-NEXT: xxswapd vs1, v20 ; CHECK-NEXT: stxvd2x vs1, r30, r3 ; CHECK-NEXT: li r3, 160 -; CHECK-NEXT: xxswapd vs2, v28 -; CHECK-NEXT: xxswapd vs0, v24 +; CHECK-NEXT: xxswapd vs2, v23 +; CHECK-NEXT: xxswapd vs0, v21 ; CHECK-NEXT: stxvd2x vs0, r30, r3 ; CHECK-NEXT: li r3, 144 -; CHECK-NEXT: xxswapd vs1, v26 +; CHECK-NEXT: xxswapd vs1, v22 ; CHECK-NEXT: stxvd2x vs1, r30, r3 ; CHECK-NEXT: li r3, 128 ; CHECK-NEXT: stxvd2x vs2, r30, r3 ; CHECK-NEXT: li r3, 112 -; CHECK-NEXT: xxswapd vs0, v30 +; CHECK-NEXT: xxswapd vs0, v24 ; CHECK-NEXT: stxvd2x vs0, r30, r3 ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: stxvd2x vs3, r30, r3 ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: lxvd2x vs2, r1, r4 # 16-byte Folded Reload -; CHECK-NEXT: li r4, 144 -; CHECK-NEXT: xxswapd vs1, v29 +; CHECK-NEXT: li r4, 160 +; CHECK-NEXT: xxswapd vs1, v26 ; CHECK-NEXT: stxvd2x vs1, r30, r3 ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: lxvd2x vs1, r1, r4 # 16-byte Folded Reload -; CHECK-NEXT: li r4, 160 +; CHECK-NEXT: li r4, 192 ; CHECK-NEXT: lxvd2x vs3, r1, r4 # 16-byte Folded Reload -; CHECK-NEXT: li r4, 176 +; CHECK-NEXT: li r4, 224 ; CHECK-NEXT: lxvd2x vs4, r1, r4 # 16-byte Folded Reload ; CHECK-NEXT: xxswapd vs0, v27 ; CHECK-NEXT: stxvd2x vs0, r30, r3 @@ -2788,69 +1944,51 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind { ; CHECK-NEXT: li r3, 16 ; CHECK-NEXT: xxswapd vs3, vs3 ; CHECK-NEXT: stxvd2x vs3, r30, r3 -; CHECK-NEXT: li r3, 384 +; CHECK-NEXT: li r3, 416 ; CHECK-NEXT: xxswapd vs4, vs4 ; CHECK-NEXT: stxvd2x vs4, 0, r30 ; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 368 -; CHECK-NEXT: lfd f31, 680(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f30, 672(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f29, 664(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f28, 656(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f27, 648(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f26, 640(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f25, 632(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f24, 624(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f23, 616(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f22, 608(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f21, 600(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f20, 592(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f19, 584(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f18, 576(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f17, 568(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f16, 560(r1) # 8-byte Folded Reload +; CHECK-NEXT: li r3, 400 +; CHECK-NEXT: ld r31, 568(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, 560(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 552(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, 544(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r27, 536(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r26, 528(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r25, 520(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r24, 512(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r23, 504(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r22, 496(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r21, 488(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r20, 480(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r19, 472(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r18, 464(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r17, 456(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r16, 448(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 352 -; CHECK-NEXT: lfd f15, 552(r1) # 8-byte Folded Reload -; CHECK-NEXT: lfd f14, 544(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r31, 536(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r30, 528(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, 520(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r28, 512(r1) # 8-byte Folded Reload +; CHECK-NEXT: li r3, 384 +; CHECK-NEXT: ld r15, 440(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r14, 432(r1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 336 -; CHECK-NEXT: ld r27, 504(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r26, 496(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r25, 488(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r24, 480(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r23, 472(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r22, 464(r1) # 8-byte Folded Reload +; CHECK-NEXT: li r3, 368 ; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 320 -; CHECK-NEXT: ld r21, 456(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r20, 448(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r19, 440(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r18, 432(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r17, 424(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r16, 416(r1) # 8-byte Folded Reload +; CHECK-NEXT: li r3, 352 ; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 304 -; CHECK-NEXT: ld r15, 408(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r14, 400(r1) # 8-byte Folded Reload +; CHECK-NEXT: li r3, 336 ; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 288 +; CHECK-NEXT: li r3, 320 ; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 272 +; CHECK-NEXT: li r3, 304 ; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 256 +; CHECK-NEXT: li r3, 288 ; CHECK-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 240 +; CHECK-NEXT: li r3, 272 ; CHECK-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 224 +; CHECK-NEXT: li r3, 256 ; CHECK-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 208 +; CHECK-NEXT: li r3, 240 ; CHECK-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: addi r1, r1, 688 +; CHECK-NEXT: addi r1, r1, 576 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr @@ -2858,516 +1996,410 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind { ; FAST-LABEL: lrint_v32i64_v32f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: stdu r1, -480(r1) -; FAST-NEXT: li r4, 128 -; FAST-NEXT: std r0, 496(r1) -; FAST-NEXT: std r30, 320(r1) # 8-byte Folded Spill +; FAST-NEXT: stdu r1, -560(r1) +; FAST-NEXT: std r0, 576(r1) +; FAST-NEXT: std r30, 544(r1) # 8-byte Folded Spill ; FAST-NEXT: mr r30, r3 -; FAST-NEXT: stfd f14, 336(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f15, 344(r1) # 8-byte Folded Spill -; FAST-NEXT: fmr f14, f5 -; FAST-NEXT: stfd f16, 352(r1) # 8-byte Folded Spill -; FAST-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 144 -; FAST-NEXT: fmr f16, f4 -; FAST-NEXT: stfd f17, 360(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f18, 368(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f19, 376(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f20, 384(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f21, 392(r1) # 8-byte Folded Spill -; FAST-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 160 -; FAST-NEXT: stfd f22, 400(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f23, 408(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f24, 416(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f25, 424(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f26, 432(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f27, 440(r1) # 8-byte Folded Spill -; FAST-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 176 -; FAST-NEXT: xxlor v22, f3, f3 -; FAST-NEXT: stfd f28, 448(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f29, 456(r1) # 8-byte Folded Spill -; FAST-NEXT: fmr f29, f9 -; FAST-NEXT: stfd f30, 464(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f31, 472(r1) # 8-byte Folded Spill -; FAST-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 192 -; FAST-NEXT: xxlor v23, f2, f2 -; FAST-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 208 -; FAST-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 224 -; FAST-NEXT: xxlor v25, f13, f13 -; FAST-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 240 -; FAST-NEXT: xxlor v26, f12, f12 -; FAST-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 256 -; FAST-NEXT: xxlor v27, f11, f11 -; FAST-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 272 -; FAST-NEXT: xxlor v28, f10, f10 -; FAST-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 288 -; FAST-NEXT: xxlor v29, f8, f8 -; FAST-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 304 -; FAST-NEXT: xxlor v30, f7, f7 -; FAST-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 44 -; FAST-NEXT: xxlor v31, f6, f6 -; FAST-NEXT: stxsspx f1, r1, r4 # 4-byte Folded Spill -; FAST-NEXT: lfs f1, 768(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: lhz r3, 848(r1) +; FAST-NEXT: li r11, 224 +; FAST-NEXT: std r14, 416(r1) # 8-byte Folded Spill +; FAST-NEXT: std r15, 424(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r15, 736(r1) +; FAST-NEXT: stxvd2x v20, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 240 +; FAST-NEXT: std r19, 456(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r14, 728(r1) +; FAST-NEXT: std r3, 184(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 840(r1) +; FAST-NEXT: lhz r19, 656(r1) +; FAST-NEXT: stxvd2x v21, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 256 +; FAST-NEXT: std r21, 472(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r21, 664(r1) +; FAST-NEXT: stxvd2x v22, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: std r3, 176(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 832(r1) +; FAST-NEXT: li r11, 272 +; FAST-NEXT: std r23, 488(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r23, 672(r1) +; FAST-NEXT: stxvd2x v23, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 288 +; FAST-NEXT: std r24, 496(r1) # 8-byte Folded Spill +; FAST-NEXT: std r26, 512(r1) # 8-byte Folded Spill +; FAST-NEXT: std r27, 520(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r27, 696(r1) +; FAST-NEXT: lhz r26, 688(r1) +; FAST-NEXT: lhz r24, 680(r1) +; FAST-NEXT: stxvd2x v24, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: std r3, 152(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 824(r1) +; FAST-NEXT: li r11, 304 +; FAST-NEXT: std r28, 528(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r28, 704(r1) +; FAST-NEXT: stxvd2x v25, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: std r3, 144(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 816(r1) +; FAST-NEXT: li r11, 320 +; FAST-NEXT: std r29, 536(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r29, 712(r1) +; FAST-NEXT: stxvd2x v26, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 336 +; FAST-NEXT: std r31, 552(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r31, 720(r1) +; FAST-NEXT: std r16, 432(r1) # 8-byte Folded Spill +; FAST-NEXT: std r17, 440(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r17, r6 +; FAST-NEXT: mr r16, r5 +; FAST-NEXT: stxvd2x v27, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: std r3, 136(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 808(r1) +; FAST-NEXT: li r11, 352 +; FAST-NEXT: std r18, 448(r1) # 8-byte Folded Spill +; FAST-NEXT: std r20, 464(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r20, r8 +; FAST-NEXT: mr r18, r7 +; FAST-NEXT: stxvd2x v28, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: std r3, 104(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 800(r1) +; FAST-NEXT: li r11, 368 +; FAST-NEXT: std r22, 480(r1) # 8-byte Folded Spill +; FAST-NEXT: std r25, 504(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r25, r10 +; FAST-NEXT: mr r22, r9 +; FAST-NEXT: stxvd2x v29, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 384 +; FAST-NEXT: std r3, 96(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 792(r1) +; FAST-NEXT: stxvd2x v30, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: li r11, 400 +; FAST-NEXT: std r3, 88(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 784(r1) +; FAST-NEXT: stxvd2x v31, r1, r11 # 16-byte Folded Spill +; FAST-NEXT: std r3, 80(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 776(r1) +; FAST-NEXT: std r3, 72(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 768(r1) +; FAST-NEXT: std r3, 64(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 760(r1) +; FAST-NEXT: std r3, 56(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 752(r1) +; FAST-NEXT: std r3, 48(r1) # 8-byte Folded Spill +; FAST-NEXT: lhz r3, 744(r1) +; FAST-NEXT: std r3, 40(r1) # 8-byte Folded Spill +; FAST-NEXT: clrldi r3, r4, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 120 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 760(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: clrldi r3, r16, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 112 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 752(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 208 +; FAST-NEXT: xxmrghd vs0, vs0, v31 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: clrldi r3, r17, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 104 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 744(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: clrldi r3, r18, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 96 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 736(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 192 +; FAST-NEXT: xxmrghd vs0, vs0, v31 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: clrldi r3, r20, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 88 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 728(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: clrldi r3, r22, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 80 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 720(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 160 +; FAST-NEXT: xxmrghd vs0, vs0, v31 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: mr r3, r19 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 72 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 712(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: clrldi r3, r25, 48 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 64 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 704(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 112 +; FAST-NEXT: xxmrghd vs0, v31, vs0 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: mr r3, r21 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 56 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 696(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: mr r3, r23 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 48 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 688(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mr r3, r24 +; FAST-NEXT: xxmrghd v27, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: xxlor v21, f1, f1 -; FAST-NEXT: lfs f1, 680(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: mr r3, r26 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: xxlor v20, f1, f1 -; FAST-NEXT: lfs f1, 672(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mr r3, r27 +; FAST-NEXT: xxmrghd v26, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: xxlor v24, f1, f1 -; FAST-NEXT: lfs f1, 664(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: mr r3, r28 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f31, f1 -; FAST-NEXT: lfs f1, 656(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mr r3, r29 +; FAST-NEXT: xxmrghd v25, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f30, f1 -; FAST-NEXT: lfs f1, 648(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: mr r3, r31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f28, f1 -; FAST-NEXT: lfs f1, 640(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mr r3, r14 +; FAST-NEXT: xxmrghd v24, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f27, f1 -; FAST-NEXT: lfs f1, 632(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: mr r3, r15 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f26, f1 -; FAST-NEXT: lfs f1, 624(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: ld r3, 40(r1) # 8-byte Folded Reload +; FAST-NEXT: xxmrghd v23, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f25, f1 -; FAST-NEXT: xxlor f1, v25, v25 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: ld r3, 48(r1) # 8-byte Folded Reload ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f24, f1 -; FAST-NEXT: xxlor f1, v26, v26 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: ld r3, 56(r1) # 8-byte Folded Reload +; FAST-NEXT: xxmrghd v22, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f23, f1 -; FAST-NEXT: xxlor f1, v27, v27 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: ld r3, 64(r1) # 8-byte Folded Reload ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f22, f1 -; FAST-NEXT: xxlor f1, v28, v28 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: ld r3, 72(r1) # 8-byte Folded Reload +; FAST-NEXT: xxmrghd v21, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f21, f1 -; FAST-NEXT: fmr f1, f29 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: ld r3, 80(r1) # 8-byte Folded Reload ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f20, f1 -; FAST-NEXT: xxlor f1, v29, v29 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: ld r3, 88(r1) # 8-byte Folded Reload +; FAST-NEXT: xxmrghd v20, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f19, f1 -; FAST-NEXT: xxlor f1, v30, v30 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: ld r3, 96(r1) # 8-byte Folded Reload ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f18, f1 -; FAST-NEXT: xxlor f1, v31, v31 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: ld r3, 104(r1) # 8-byte Folded Reload +; FAST-NEXT: xxmrghd v31, vs0, v31 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f29, f1 -; FAST-NEXT: fmr f1, f14 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: ld r3, 136(r1) # 8-byte Folded Reload ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f14, f1 -; FAST-NEXT: fmr f1, f16 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: ld r3, 144(r1) # 8-byte Folded Reload +; FAST-NEXT: xxmrghd v30, vs0, v30 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f16, f1 -; FAST-NEXT: xxlor f1, v22, v22 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v29, r3 +; FAST-NEXT: ld r3, 152(r1) # 8-byte Folded Reload ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fmr f17, f1 -; FAST-NEXT: xxlor f1, v23, v23 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: ld r3, 176(r1) # 8-byte Folded Reload +; FAST-NEXT: xxmrghd v29, vs0, v29 ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: li r3, 44 -; FAST-NEXT: fmr f15, f1 -; FAST-NEXT: lxsspx f1, r1, r3 # 4-byte Folded Reload -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtvsrd v28, r3 +; FAST-NEXT: ld r3, 184(r1) # 8-byte Folded Reload ; FAST-NEXT: bl __extendhfsf2 ; FAST-NEXT: nop -; FAST-NEXT: fctid f3, f15 -; FAST-NEXT: fctid f4, f17 -; FAST-NEXT: mffprd r3, f3 -; FAST-NEXT: fctid f5, f16 -; FAST-NEXT: fctid f6, f14 -; FAST-NEXT: fctid f7, f18 -; FAST-NEXT: fctid f8, f19 -; FAST-NEXT: fctid f13, f1 -; FAST-NEXT: fctid f9, f20 -; FAST-NEXT: fctid f10, f22 -; FAST-NEXT: fctid f11, f24 -; FAST-NEXT: fctid f12, f25 -; FAST-NEXT: fctid f2, f23 -; FAST-NEXT: fctid f0, f21 -; FAST-NEXT: mtvsrd v2, r3 -; FAST-NEXT: mffprd r3, f4 -; FAST-NEXT: mtvsrd v3, r3 -; FAST-NEXT: mffprd r3, f5 -; FAST-NEXT: mtfprd f5, r3 -; FAST-NEXT: mffprd r3, f6 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: mffprd r3, f7 -; FAST-NEXT: mtfprd f6, r3 -; FAST-NEXT: mffprd r3, f8 -; FAST-NEXT: mtfprd f7, r3 -; FAST-NEXT: mffprd r3, f9 -; FAST-NEXT: mtfprd f3, r3 -; FAST-NEXT: mffprd r3, f10 -; FAST-NEXT: mtfprd f4, r3 -; FAST-NEXT: mffprd r3, f11 -; FAST-NEXT: fctid f11, f31 -; FAST-NEXT: lfd f31, 56(r1) # 8-byte Folded Reload -; FAST-NEXT: mtfprd f8, r3 -; FAST-NEXT: mffprd r3, f12 -; FAST-NEXT: xxlor f12, v24, v24 -; FAST-NEXT: fctid f31, f31 -; FAST-NEXT: fctid f12, f12 -; FAST-NEXT: mtfprd f9, r3 -; FAST-NEXT: mffprd r3, f13 -; FAST-NEXT: lfd f13, 48(r1) # 8-byte Folded Reload -; FAST-NEXT: mtfprd f10, r3 -; FAST-NEXT: fctid f13, f13 -; FAST-NEXT: xxmrghd v3, vs5, v3 -; FAST-NEXT: fctid f5, f26 -; FAST-NEXT: mffprd r3, f5 -; FAST-NEXT: mtfprd f5, r3 -; FAST-NEXT: xxmrghd v4, vs7, vs6 -; FAST-NEXT: fctid f6, f27 -; FAST-NEXT: fctid f7, f28 -; FAST-NEXT: mffprd r3, f6 -; FAST-NEXT: lfd f28, 96(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f28, f28 -; FAST-NEXT: mtfprd f6, r3 -; FAST-NEXT: mffprd r3, f7 -; FAST-NEXT: mtfprd f7, r3 -; FAST-NEXT: xxmrghd v2, v2, vs10 -; FAST-NEXT: fctid f10, f30 -; FAST-NEXT: mffprd r3, f10 -; FAST-NEXT: lfd f30, 80(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f30, f30 -; FAST-NEXT: mtfprd f10, r3 -; FAST-NEXT: mffprd r3, f11 -; FAST-NEXT: mtfprd f11, r3 -; FAST-NEXT: mffprd r3, f12 -; FAST-NEXT: mtfprd f12, r3 -; FAST-NEXT: xxmrghd v5, vs12, vs11 -; FAST-NEXT: xxlor f11, v20, v20 -; FAST-NEXT: xxlor f12, v21, v21 -; FAST-NEXT: fctid f11, f11 -; FAST-NEXT: fctid f12, f12 -; FAST-NEXT: mffprd r3, f11 -; FAST-NEXT: mtfprd f11, r3 -; FAST-NEXT: mffprd r3, f12 -; FAST-NEXT: mtfprd f12, r3 -; FAST-NEXT: mffprd r3, f13 -; FAST-NEXT: mtfprd f13, r3 -; FAST-NEXT: mffprd r3, f31 -; FAST-NEXT: lfd f31, 64(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f31, f31 -; FAST-NEXT: mtvsrd v0, r3 -; FAST-NEXT: mffprd r3, f31 -; FAST-NEXT: lfd f31, 72(r1) # 8-byte Folded Reload -; FAST-NEXT: mtvsrd v1, r3 -; FAST-NEXT: mffprd r3, f30 -; FAST-NEXT: lfd f30, 88(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f31, f31 -; FAST-NEXT: mtvsrd v6, r3 -; FAST-NEXT: mffprd r3, f28 -; FAST-NEXT: lfd f28, 104(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f30, f30 -; FAST-NEXT: fctid f28, f28 -; FAST-NEXT: mtvsrd v7, r3 -; FAST-NEXT: mffprd r3, f28 -; FAST-NEXT: lfd f28, 112(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f28, f28 -; FAST-NEXT: mtvsrd v8, r3 -; FAST-NEXT: mffprd r3, f28 -; FAST-NEXT: lfd f28, 120(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f28, f28 -; FAST-NEXT: xxmrghd v10, vs12, vs11 -; FAST-NEXT: xxmrghd v0, v0, vs13 -; FAST-NEXT: xxswapd vs12, v0 -; FAST-NEXT: xxmrghd v0, vs9, vs8 -; FAST-NEXT: xxmrghd v7, v8, v7 -; FAST-NEXT: mtvsrd v8, r3 -; FAST-NEXT: mffprd r3, f28 -; FAST-NEXT: mtvsrd v9, r3 -; FAST-NEXT: mffprd r3, f30 -; FAST-NEXT: xxswapd v7, v7 -; FAST-NEXT: xxmrghd v8, v9, v8 -; FAST-NEXT: mtvsrd v9, r3 -; FAST-NEXT: mffprd r3, f31 -; FAST-NEXT: xxswapd v8, v8 -; FAST-NEXT: xxmrghd v6, v9, v6 -; FAST-NEXT: mtvsrd v9, r3 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: xxswapd vs1, v29 +; FAST-NEXT: li r4, 112 +; FAST-NEXT: xxswapd vs2, v30 +; FAST-NEXT: xxswapd vs3, v25 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 ; FAST-NEXT: li r3, 240 -; FAST-NEXT: stxvd2x v8, r30, r3 +; FAST-NEXT: xxmrghd v2, vs0, v28 +; FAST-NEXT: xxswapd vs0, v2 +; FAST-NEXT: stxvd2x vs0, r30, r3 ; FAST-NEXT: li r3, 224 -; FAST-NEXT: stxvd2x v7, r30, r3 +; FAST-NEXT: stxvd2x vs1, r30, r3 ; FAST-NEXT: li r3, 208 -; FAST-NEXT: xxswapd vs11, v6 -; FAST-NEXT: xxmrghd v6, vs10, vs7 -; FAST-NEXT: stxvd2x vs11, r30, r3 +; FAST-NEXT: stxvd2x vs2, r30, r3 ; FAST-NEXT: li r3, 192 -; FAST-NEXT: xxmrghd v1, v9, v1 -; FAST-NEXT: xxswapd vs11, v1 -; FAST-NEXT: xxmrghd v1, vs6, vs5 -; FAST-NEXT: xxswapd vs5, v10 -; FAST-NEXT: xxswapd vs6, v5 -; FAST-NEXT: stxvd2x vs11, r30, r3 +; FAST-NEXT: xxswapd vs0, v31 +; FAST-NEXT: stxvd2x vs0, r30, r3 ; FAST-NEXT: li r3, 176 -; FAST-NEXT: stxvd2x vs12, r30, r3 +; FAST-NEXT: xxswapd vs1, v20 +; FAST-NEXT: stxvd2x vs1, r30, r3 ; FAST-NEXT: li r3, 160 -; FAST-NEXT: stxvd2x vs5, r30, r3 +; FAST-NEXT: xxswapd vs2, v23 +; FAST-NEXT: xxswapd vs0, v21 +; FAST-NEXT: stxvd2x vs0, r30, r3 ; FAST-NEXT: li r3, 144 -; FAST-NEXT: stxvd2x vs6, r30, r3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: mtfprd f7, r3 +; FAST-NEXT: xxswapd vs1, v22 +; FAST-NEXT: stxvd2x vs1, r30, r3 ; FAST-NEXT: li r3, 128 -; FAST-NEXT: xxswapd vs5, v6 -; FAST-NEXT: stxvd2x vs5, r30, r3 -; FAST-NEXT: li r3, 112 -; FAST-NEXT: xxswapd vs2, v1 -; FAST-NEXT: xxswapd vs6, v0 ; FAST-NEXT: stxvd2x vs2, r30, r3 +; FAST-NEXT: li r3, 112 +; FAST-NEXT: xxswapd vs0, v24 +; FAST-NEXT: stxvd2x vs0, r30, r3 ; FAST-NEXT: li r3, 96 -; FAST-NEXT: fctid f2, f29 -; FAST-NEXT: stxvd2x vs6, r30, r3 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: mtfprd f2, r3 +; FAST-NEXT: stxvd2x vs3, r30, r3 ; FAST-NEXT: li r3, 80 -; FAST-NEXT: xxmrghd v5, vs7, vs4 -; FAST-NEXT: xxswapd vs4, v2 -; FAST-NEXT: xxmrghd v0, vs0, vs3 -; FAST-NEXT: xxswapd vs0, v5 -; FAST-NEXT: xxswapd vs3, v3 -; FAST-NEXT: stxvd2x vs0, r30, r3 +; FAST-NEXT: lxvd2x vs2, r1, r4 # 16-byte Folded Reload +; FAST-NEXT: li r4, 160 +; FAST-NEXT: xxswapd vs1, v26 +; FAST-NEXT: stxvd2x vs1, r30, r3 ; FAST-NEXT: li r3, 64 -; FAST-NEXT: xxswapd vs0, v0 +; FAST-NEXT: lxvd2x vs1, r1, r4 # 16-byte Folded Reload +; FAST-NEXT: li r4, 192 +; FAST-NEXT: lxvd2x vs3, r1, r4 # 16-byte Folded Reload +; FAST-NEXT: li r4, 208 +; FAST-NEXT: lxvd2x vs4, r1, r4 # 16-byte Folded Reload +; FAST-NEXT: xxswapd vs0, v27 ; FAST-NEXT: stxvd2x vs0, r30, r3 ; FAST-NEXT: li r3, 48 -; FAST-NEXT: xxmrghd v5, vs2, vs1 -; FAST-NEXT: xxswapd vs1, v4 -; FAST-NEXT: stxvd2x vs1, r30, r3 -; FAST-NEXT: li r3, 32 -; FAST-NEXT: xxswapd vs2, v5 +; FAST-NEXT: xxswapd vs2, vs2 ; FAST-NEXT: stxvd2x vs2, r30, r3 +; FAST-NEXT: li r3, 32 +; FAST-NEXT: xxswapd vs1, vs1 +; FAST-NEXT: stxvd2x vs1, r30, r3 ; FAST-NEXT: li r3, 16 +; FAST-NEXT: xxswapd vs3, vs3 ; FAST-NEXT: stxvd2x vs3, r30, r3 -; FAST-NEXT: li r3, 304 +; FAST-NEXT: li r3, 400 +; FAST-NEXT: xxswapd vs4, vs4 ; FAST-NEXT: stxvd2x vs4, 0, r30 -; FAST-NEXT: lfd f31, 472(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f30, 464(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f29, 456(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f28, 448(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f27, 440(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f26, 432(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f25, 424(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f24, 416(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f23, 408(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f22, 400(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f21, 392(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f20, 384(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f19, 376(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f18, 368(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f17, 360(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f16, 352(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f15, 344(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f14, 336(r1) # 8-byte Folded Reload ; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 288 -; FAST-NEXT: ld r30, 320(r1) # 8-byte Folded Reload +; FAST-NEXT: li r3, 384 +; FAST-NEXT: ld r31, 552(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r30, 544(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r29, 536(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r28, 528(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r27, 520(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r26, 512(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r25, 504(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r24, 496(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r23, 488(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r22, 480(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r21, 472(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r20, 464(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r19, 456(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r18, 448(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r17, 440(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r16, 432(r1) # 8-byte Folded Reload ; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 272 +; FAST-NEXT: li r3, 368 +; FAST-NEXT: ld r15, 424(r1) # 8-byte Folded Reload +; FAST-NEXT: ld r14, 416(r1) # 8-byte Folded Reload ; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 256 +; FAST-NEXT: li r3, 352 ; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 240 +; FAST-NEXT: li r3, 336 ; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 224 +; FAST-NEXT: li r3, 320 ; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 208 +; FAST-NEXT: li r3, 304 ; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 192 +; FAST-NEXT: li r3, 288 ; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 176 +; FAST-NEXT: li r3, 272 ; FAST-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 160 +; FAST-NEXT: li r3, 256 ; FAST-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 144 +; FAST-NEXT: li r3, 240 ; FAST-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 128 +; FAST-NEXT: li r3, 224 ; FAST-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: addi r1, r1, 480 +; FAST-NEXT: addi r1, r1, 560 ; FAST-NEXT: ld r0, 16(r1) ; FAST-NEXT: mtlr r0 ; FAST-NEXT: blr diff --git a/llvm/test/CodeGen/RISCV/lrint-conv.ll b/llvm/test/CodeGen/RISCV/lrint-conv.ll index d3af2153588a1..ecb6bd0932ef3 100644 --- a/llvm/test/CodeGen/RISCV/lrint-conv.ll +++ b/llvm/test/CodeGen/RISCV/lrint-conv.ll @@ -5,14 +5,25 @@ ; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64 ; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64 -; FIXME: crash -; define ITy @test_lrint_ixx_f16(half %x) nounwind { -; %res = tail call ITy @llvm.lrint.ITy.f16(half %x) -; } +define ITy @test_lrint_ixx_f16(half %x) nounwind { +; RV32-LABEL: test_lrint_ixx_f16: +; RV32: call lrintf +; +; RV64-LABEL: test_lrint_ixx_f16: +; RV64: call lrintf + %res = tail call ITy @llvm.lrint.ITy.f16(half %x) + ret ITy %res +} -; define ITy @test_llrint_ixx_f16(half %x) nounwind { -; %res = tail call ITy @llvm.llrint.ITy.f16(half %x) -; } +define ITy @test_llrint_ixx_f16(half %x) nounwind { +; RV32-LABEL: test_llrint_ixx_f16: +; RV32: call llrintf +; +; RV64-LABEL: test_llrint_ixx_f16: +; RV64: call llrintf + %res = tail call ITy @llvm.llrint.ITy.f16(half %x) + ret ITy %res +} define ITy @test_lrint_ixx_f32(float %x) nounwind { ; RV32-LABEL: test_lrint_ixx_f32: diff --git a/llvm/test/CodeGen/X86/lrint-conv-i32.ll b/llvm/test/CodeGen/X86/lrint-conv-i32.ll index 3c50aea1095f4..5c0a64f1477e6 100644 --- a/llvm/test/CodeGen/X86/lrint-conv-i32.ll +++ b/llvm/test/CodeGen/X86/lrint-conv-i32.ll @@ -7,12 +7,52 @@ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX -; FIXME: crash -; define i32 @testmswh(half %x) nounwind { -; entry: -; %0 = tail call i32 @llvm.lrint.i32.f16(half %x) -; ret i32 %0 -; } +define i32 @testmswh(half %x) nounwind { +; X86-NOSSE-LABEL: testmswh: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: calll __extendhfsf2 +; X86-NOSSE-NEXT: addl $4, %esp +; X86-NOSSE-NEXT: fistpl (%esp) +; X86-NOSSE-NEXT: movl (%esp), %eax +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: testmswh: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 +; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax +; X86-SSE2-NEXT: movw %ax, (%esp) +; X86-SSE2-NEXT: calll __extendhfsf2 +; X86-SSE2-NEXT: fstps (%esp) +; X86-SSE2-NEXT: calll rintf +; X86-SSE2-NEXT: fstps (%esp) +; X86-SSE2-NEXT: calll __truncsfhf2 +; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax +; X86-SSE2-NEXT: movw %ax, (%esp) +; X86-SSE2-NEXT: calll __extendhfsf2 +; X86-SSE2-NEXT: fstps {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: cvttss2si {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: addl $8, %esp +; X86-SSE2-NEXT: retl +; +; X64-SSE-LABEL: testmswh: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: callq __extendhfsf2@PLT +; X64-SSE-NEXT: callq rintf@PLT +; X64-SSE-NEXT: callq __truncsfhf2@PLT +; X64-SSE-NEXT: callq __extendhfsf2@PLT +; X64-SSE-NEXT: cvttss2si %xmm0, %eax +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +entry: + %0 = tail call i32 @llvm.lrint.i32.f16(half %x) + ret i32 %0 +} define i32 @testmsws(float %x) nounwind { ; X86-NOSSE-LABEL: testmsws: