From e81188d58202ee7b887e48bc3e4b102fc5f45619 Mon Sep 17 00:00:00 2001 From: Kai Yan Date: Wed, 24 Jul 2024 12:06:35 +0800 Subject: [PATCH 1/5] [llvm][CodeGen] Added missing initialization failure information for window scheduler (#99449) Added missing initialization failure information for window scheduler. --- llvm/lib/CodeGen/WindowScheduler.cpp | 5 ++++- llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/WindowScheduler.cpp b/llvm/lib/CodeGen/WindowScheduler.cpp index 0777480499e55..3fe8a1aaafd12 100644 --- a/llvm/lib/CodeGen/WindowScheduler.cpp +++ b/llvm/lib/CodeGen/WindowScheduler.cpp @@ -232,8 +232,11 @@ bool WindowScheduler::initialize() { return false; } for (auto &Def : MI.all_defs()) - if (Def.isReg() && Def.getReg().isPhysical()) + if (Def.isReg() && Def.getReg().isPhysical()) { + LLVM_DEBUG(dbgs() << "Physical registers are not supported in " + "window scheduling!\n"); return false; + } } if (SchedInstrNum <= WindowRegionLimit) { LLVM_DEBUG(dbgs() << "There are too few MIs in the window region!\n"); diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir b/llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir index 601b98dca8e20..be75301b016ed 100644 --- a/llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir +++ b/llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir @@ -3,6 +3,7 @@ # RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \ # RUN: | FileCheck %s +# CHECK: Physical registers are not supported in window scheduling! # CHECK: The WindowScheduler failed to initialize! --- From 7b86034dcb8c7fd7ea125cec43f0117cd4a428b6 Mon Sep 17 00:00:00 2001 From: Kai Yan Date: Wed, 24 Jul 2024 12:11:58 +0800 Subject: [PATCH 2/5] [llvm][CodeGen] Added a new restriction for II by pragma in window scheduler (#99448) Added a new restriction for window scheduling. Window scheduling is disabled when llvm.loop.pipeline.initiationinterval is set. --- llvm/lib/CodeGen/MachinePipeliner.cpp | 12 ++- ...swp-ws-pragma-initiation-interval-fail.mir | 83 +++++++++++++++++++ 2 files changed, 93 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/swp-ws-pragma-initiation-interval-fail.mir diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 497e282bb9768..5c68711ff6193 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -528,8 +528,16 @@ bool MachinePipeliner::useSwingModuloScheduler() { } bool MachinePipeliner::useWindowScheduler(bool Changed) { - // WindowScheduler does not work when it is off or when SwingModuloScheduler - // is successfully scheduled. + // WindowScheduler does not work for following cases: + // 1. when it is off. + // 2. when SwingModuloScheduler is successfully scheduled. + // 3. when pragma II is enabled. + if (II_setByPragma) { + LLVM_DEBUG(dbgs() << "Window scheduling is disabled when " + "llvm.loop.pipeline.initiationinterval is set.\n"); + return false; + } + return WindowSchedulingOption == WindowSchedulingFlag::WS_Force || (WindowSchedulingOption == WindowSchedulingFlag::WS_On && !Changed); } diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-pragma-initiation-interval-fail.mir b/llvm/test/CodeGen/Hexagon/swp-ws-pragma-initiation-interval-fail.mir new file mode 100644 index 0000000000000..6e69a76290fb1 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-ws-pragma-initiation-interval-fail.mir @@ -0,0 +1,83 @@ +# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \ +# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \ +# RUN: | FileCheck %s +# REQUIRES: asserts + +# Test that checks no window scheduler is performed if the II set by pragma was +# enabled + +# CHECK: Window scheduling is disabled when llvm.loop.pipeline.initiationinterval is set. + +--- | + define void @test_pragma_ii_fail(ptr %a0, i32 %a1) { + b0: + %v0 = icmp sgt i32 %a1, 1 + br i1 %v0, label %b1, label %b4 + + b1: ; preds = %b0 + %v1 = load i32, ptr %a0, align 4 + %v2 = add i32 %v1, 10 + %v4 = add i32 %a1, -1 + %cgep = getelementptr i32, ptr %a0, i32 1 + br label %b2 + + b2: ; preds = %b2, %b1 + %v5 = phi i32 [ %v12, %b2 ], [ %v4, %b1 ] + %v6 = phi ptr [ %cgep2, %b2 ], [ %cgep, %b1 ] + %v7 = phi i32 [ %v10, %b2 ], [ %v2, %b1 ] + store i32 %v7, ptr %v6, align 4 + %v8 = add i32 %v7, 10 + %cgep1 = getelementptr i32, ptr %v6, i32 -1 + store i32 %v8, ptr %cgep1, align 4 + %v10 = add i32 %v7, 10 + %v12 = add i32 %v5, -1 + %v13 = icmp eq i32 %v12, 0 + %cgep2 = getelementptr i32, ptr %v6, i32 1 + br i1 %v13, label %b4, label %b2, !llvm.loop !0 + + b4: ; preds = %b2, %b0 + ret void + } + + !0 = distinct !{!0, !1} + !1 = !{!"llvm.loop.pipeline.initiationinterval", i32 2} +... +--- +name: test_pragma_ii_fail +tracksRegLiveness: true +body: | + bb.0.b0: + successors: %bb.1(0x40000000), %bb.3(0x40000000) + liveins: $r0, $r1 + + %0:intregs = COPY $r1 + %1:intregs = COPY $r0 + %2:predregs = C2_cmpgti %0, 1 + J2_jumpf %2, %bb.3, implicit-def dead $pc + J2_jump %bb.1, implicit-def dead $pc + + bb.1.b1: + successors: %bb.2(0x80000000) + + %3:intregs, %4:intregs = L2_loadri_pi %1, 4 + %5:intregs = A2_addi killed %3, 10 + %6:intregs = A2_addi %0, -1 + %7:intregs = COPY %6 + J2_loop0r %bb.2, %7, implicit-def $lc0, implicit-def $sa0, implicit-def $usr + + bb.2.b2 (machine-block-address-taken): + successors: %bb.3(0x04000000), %bb.2(0x7c000000) + + %8:intregs = PHI %4, %bb.1, %9, %bb.2 + %10:intregs = PHI %5, %bb.1, %11, %bb.2 + S2_storeri_io %8, 0, %10 + %11:intregs = A2_addi %10, 10 + S2_storeri_io %8, -4, %11 + %9:intregs = A2_addi %8, 4 + ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 + J2_jump %bb.3, implicit-def dead $pc + + bb.3.b4: + PS_jmpret $r31, implicit-def dead $pc + +... From 06d009789f771e8cef82714549c3136e320312be Mon Sep 17 00:00:00 2001 From: Kai Yan Date: Thu, 25 Jul 2024 19:16:23 +0800 Subject: [PATCH 3/5] [llvm][CodeGen] Fixed a bug in stall cycle calculation for window scheduler (#99451) Fixed a bug in stall cycle calculation. When a register defined by an instruction in the current iteration is used by an instruction in the next iteration, we have modified the number of stall cycle that need to be inserted. --- llvm/lib/CodeGen/WindowScheduler.cpp | 7 ++- .../CodeGen/Hexagon/swp-ws-stall-cycle.mir | 59 +++++++++++++++++++ 2 files changed, 63 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir diff --git a/llvm/lib/CodeGen/WindowScheduler.cpp b/llvm/lib/CodeGen/WindowScheduler.cpp index 3fe8a1aaafd12..02275afff4e6d 100644 --- a/llvm/lib/CodeGen/WindowScheduler.cpp +++ b/llvm/lib/CodeGen/WindowScheduler.cpp @@ -488,6 +488,7 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG, // ======================================== int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) { int MaxStallCycle = 0; + int CurrentII = MaxCycle + 1; auto Range = getScheduleRange(Offset, SchedInstrNum); for (auto &MI : Range) { auto *SU = TripleDAG->getSUnit(&MI); @@ -495,8 +496,8 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) { for (auto &Succ : SU->Succs) { if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU) continue; - // If the expected cycle does not exceed MaxCycle, no check is needed. - if (DefCycle + (int)Succ.getLatency() <= MaxCycle) + // If the expected cycle does not exceed CurrentII, no check is needed. + if (DefCycle + (int)Succ.getLatency() <= CurrentII) continue; // If the cycle of the scheduled MI A is less than that of the scheduled // MI B, the scheduling will fail because the lifetime of the @@ -506,7 +507,7 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) { if (DefCycle < UseCycle) return WindowIILimit; // Get the stall cycle introduced by the register between two trips. - int StallCycle = DefCycle + (int)Succ.getLatency() - MaxCycle - UseCycle; + int StallCycle = DefCycle + (int)Succ.getLatency() - CurrentII - UseCycle; MaxStallCycle = std::max(MaxStallCycle, StallCycle); } } diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir b/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir new file mode 100644 index 0000000000000..ddba67d78eb58 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir @@ -0,0 +1,59 @@ +# REQUIRES: asserts +# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \ +# RUN: -window-sched=force -filetype=null -verify-machineinstrs \ +# RUN: -window-region-limit=1 -window-search-ratio=100 -window-diff-limit=0 \ +# RUN: 2>&1 | FileCheck %s + +# CHECK-LABEL: Start analyzing II +# CHECK: MaxStallCycle is 0 +# CHECK-LABEL: Start analyzing II +# CHECK: MaxStallCycle is 0 +# CHECK-LABEL: Start analyzing II +# CHECK: MaxStallCycle is 0 + +--- +name: test_window_stall_cycle +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.3(0x40000000), %bb.1(0x40000000) + liveins: $r0, $r1 + + %0:intregs = COPY $r1 + %1:intregs = COPY $r0 + %2:intregs = nsw A2_add %0, %1 + %3:intregs = S2_lsr_i_r_acc %2, %2, 31 + %4:intregs = S2_asr_i_r killed %3, 1 + %5:predregs = C2_cmpgt %1, %4 + %6:intregs = A2_tfrsi 0 + J2_jumpt killed %5, %bb.3, implicit-def dead $pc + J2_jump %bb.1, implicit-def dead $pc + + bb.1: + successors: %bb.2(0x80000000) + + %7:intregs = A2_addi %4, 2 + %8:intregs = A2_tfrsi 0 + %9:intregs = A2_sub %4, %1 + %10:intregs = A2_addi %9, 1 + %11:intregs = COPY %10 + J2_loop0r %bb.2, %11, implicit-def $lc0, implicit-def $sa0, implicit-def $usr + + bb.2 (machine-block-address-taken): + successors: %bb.3(0x04000000), %bb.2(0x7c000000) + + %12:intregs = PHI %7, %bb.1, %13, %bb.2 + %14:intregs = PHI %8, %bb.1, %15, %bb.2 + %16:intregs = PHI %8, %bb.1, %17, %bb.2 + %18:intregs, %13:intregs = L2_loadri_pi %12, -4 + %17:intregs = nsw A2_add killed %18, %16 + %15:intregs = A2_max %17, %14 + ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 + J2_jump %bb.3, implicit-def dead $pc + + bb.3: + %19:intregs = PHI %6, %bb.0, %15, %bb.2 + $r0 = COPY %19 + PS_jmpret $r31, implicit-def dead $pc, implicit $r0 + +... From 5a164a28e37fe3cda99236595167f7762b47c76d Mon Sep 17 00:00:00 2001 From: Kai Yan Date: Wed, 24 Jul 2024 12:06:10 +0800 Subject: [PATCH 4/5] [llvm][CodeGen] Fixed max cycle calculation with zero-cost instructions for window scheduler (#99454) We discovered some scheduling failures occurring when zero-cost instructions were involved. This issue will be addressed by this patch. --- llvm/lib/CodeGen/WindowScheduler.cpp | 16 ++++--- .../test/CodeGen/Hexagon/swp-ws-zero-cost.mir | 45 +++++++++++++++++++ 2 files changed, 55 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/swp-ws-zero-cost.mir diff --git a/llvm/lib/CodeGen/WindowScheduler.cpp b/llvm/lib/CodeGen/WindowScheduler.cpp index 02275afff4e6d..6a2c6537470db 100644 --- a/llvm/lib/CodeGen/WindowScheduler.cpp +++ b/llvm/lib/CodeGen/WindowScheduler.cpp @@ -440,12 +440,16 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG, int PredCycle = getOriCycle(PredMI); ExpectCycle = std::max(ExpectCycle, PredCycle + (int)Pred.getLatency()); } - // ResourceManager can be used to detect resource conflicts between the - // current MI and the previously inserted MIs. - while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) { - ++CurCycle; - if (CurCycle == (int)WindowIILimit) - return CurCycle; + // Zero cost instructions do not need to check resource. + if (!TII->isZeroCost(MI.getOpcode())) { + // ResourceManager can be used to detect resource conflicts between the + // current MI and the previously inserted MIs. + while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) { + ++CurCycle; + if (CurCycle == (int)WindowIILimit) + return CurCycle; + } + RM.reserveResources(*SU, CurCycle); } RM.reserveResources(*SU, CurCycle); OriToCycle[getOriMI(&MI)] = CurCycle; diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-zero-cost.mir b/llvm/test/CodeGen/Hexagon/swp-ws-zero-cost.mir new file mode 100644 index 0000000000000..ecf49a83c69e1 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-ws-zero-cost.mir @@ -0,0 +1,45 @@ +# REQUIRES: asserts +# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \ +# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \ +# RUN: | FileCheck %s + +# CHECK-NOT: Can't find a valid II. Keep searching... +# CHECK: Start analyzing II +# CHECK: Start scheduling Phis +# CHECK: Current window Offset is {{[0-9]+}} and II is {{[0-9]+}} + +--- +name: relu +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + liveins: $r0, $r1, $r2 + %0:intregs = COPY $r2 + %1:intregs = COPY $r1 + %2:intregs = COPY $r0 + %3:predregs = C2_cmpeqi %2, 0 + J2_jumpt killed %3, %bb.2, implicit-def dead $pc + J2_jump %bb.1, implicit-def dead $pc + bb.1: + successors: %bb.3(0x80000000) + %4:hvxvr = V6_vd0 + %5:intregs = A2_addi %2, 31 + %6:intregs = S2_lsr_i_r %5, 5 + %7:intregs = COPY %6 + J2_loop0r %bb.3, %7, implicit-def $lc0, implicit-def $sa0, implicit-def $usr + J2_jump %bb.3, implicit-def dead $pc + bb.2: + PS_jmpret $r31, implicit-def dead $pc + bb.3 (machine-block-address-taken): + successors: %bb.3(0x7c000000), %bb.2(0x04000000) + %8:intregs = PHI %1, %bb.1, %9, %bb.3 + %10:intregs = PHI %0, %bb.1, %14, %bb.3 + %11:hvxvr, %9:intregs = V6_vL32b_pi %8, 128 + %12:intregs = COPY %10 + %13:hvxvr = V6_vmaxw killed %11, %4 + %14:intregs = V6_vS32b_pi %12, 128, killed %13 + ENDLOOP0 %bb.3, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 + J2_jump %bb.2, implicit-def dead $pc +... + From 54579830d81a67cfa52b90c34bcf9a631f53fcc5 Mon Sep 17 00:00:00 2001 From: Kai Yan Date: Mon, 5 Aug 2024 17:44:05 +0800 Subject: [PATCH 5/5] [llvm][CodeGen] Address the issue of multiple resource reservations In window scheduling (#101665) Address the issue of multiple resource reservations in window scheduling. --- llvm/lib/CodeGen/WindowScheduler.cpp | 1 - .../Hexagon/swp-ws-resource-reserve.mir | 100 ++++++++++++++++++ 2 files changed, 100 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/Hexagon/swp-ws-resource-reserve.mir diff --git a/llvm/lib/CodeGen/WindowScheduler.cpp b/llvm/lib/CodeGen/WindowScheduler.cpp index 6a2c6537470db..f1658e36ae1e9 100644 --- a/llvm/lib/CodeGen/WindowScheduler.cpp +++ b/llvm/lib/CodeGen/WindowScheduler.cpp @@ -451,7 +451,6 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG, } RM.reserveResources(*SU, CurCycle); } - RM.reserveResources(*SU, CurCycle); OriToCycle[getOriMI(&MI)] = CurCycle; LLVM_DEBUG(dbgs() << "\tCycle " << CurCycle << " [S." << getOriStage(getOriMI(&MI), Offset) << "]: " << MI); diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-resource-reserve.mir b/llvm/test/CodeGen/Hexagon/swp-ws-resource-reserve.mir new file mode 100644 index 0000000000000..4a9a09c4148cb --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-ws-resource-reserve.mir @@ -0,0 +1,100 @@ +# REQUIRES: asserts +# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \ +# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \ +# RUN: -window-search-ratio=100 -window-search-num=100 -window-diff-limit=1 \ +# RUN: | FileCheck %s + +# We want to verify that all three V6_vaddw instructions are emitted in the same cycle. +# CHECK-LABEL: Current window Offset is 2 +# CHECK: Cycle [[CycleNum:[0-9]+]] [{{S.[0-9]+}}]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr +# CHECK: Cycle [[CycleNum]] [{{S.[0-9]+}}]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr +# CHECK: Cycle [[CycleNum]] [{{S.[0-9]+}}]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr +# CHECK-LABEL: Current window Offset is 3 + +--- | + define void @add_parallel(i32 %N, ptr noalias %x, ptr noalias %y) { + entry: + %isZeroLength = icmp eq i32 %N, 0 + br i1 %isZeroLength, label %loop.exit, label %loop.preheader + + loop.preheader: ; preds = %entry + %half_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1056964608) + %one_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1065353216) + %two_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1073741824) + br label %loop.body + + loop.exit: ; preds = %loop.body, %entry + ret void + + loop.body: ; preds = %loop.body, %loop.preheader + %lsr.iv1 = phi ptr [ %cgep2, %loop.body ], [ %x, %loop.preheader ] + %lsr.iv = phi ptr [ %cgep1, %loop.body ], [ %y, %loop.preheader ] + %index = phi i32 [ 0, %loop.preheader ], [ %index.next, %loop.body ] + %vec_x1 = load <32 x i32>, ptr %lsr.iv1, align 128 + %vec_add_1 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %one_splat, <32 x i32> %vec_x1) + %vec_add_2 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %half_splat, <32 x i32> %vec_x1) + %vec_add_3 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %two_splat, <32 x i32> %vec_x1) + %vec_add_4 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_1, <32 x i32> %vec_add_2) + %vec_add_5 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_1, <32 x i32> %vec_add_3) + %vec_add_6 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_5, <32 x i32> %vec_add_4) + store <32 x i32> %vec_add_6, ptr %lsr.iv, align 128 + %index.next = add nuw i32 %index, 32 + %continue = icmp ult i32 %index.next, %N + %cgep1 = getelementptr i8, ptr %lsr.iv, i32 128 + %cgep2 = getelementptr i8, ptr %lsr.iv1, i32 128 + br i1 %continue, label %loop.body, label %loop.exit + } + + declare <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32) + declare <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32>, <32 x i32>) +... +--- +name: add_parallel +tracksRegLiveness: true +body: | + bb.0.entry: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + liveins: $r0, $r1, $r2 + + %0:intregs = COPY $r2 + %1:intregs = COPY $r1 + %2:intregs = COPY $r0 + %3:predregs = C2_cmpeqi %2, 0 + J2_jumpt killed %3, %bb.2, implicit-def dead $pc + J2_jump %bb.1, implicit-def dead $pc + + bb.1.loop.preheader: + successors: %bb.3(0x80000000) + + %4:intregs = A2_tfrsi 1056964608 + %5:hvxvr = V6_lvsplatw killed %4 + %6:intregs = A2_tfrsi 1065353216 + %7:hvxvr = V6_lvsplatw killed %6 + %8:intregs = A2_tfrsi 1073741824 + %9:hvxvr = V6_lvsplatw killed %8 + %10:intregs = A2_addi %2, 31 + %11:intregs = S2_lsr_i_r %10, 5 + %12:intregs = COPY %11 + J2_loop0r %bb.3, %12, implicit-def $lc0, implicit-def $sa0, implicit-def $usr + J2_jump %bb.3, implicit-def dead $pc + + bb.2.loop.exit: + PS_jmpret $r31, implicit-def dead $pc + + bb.3.loop.body (machine-block-address-taken): + successors: %bb.3(0x7c000000), %bb.2(0x04000000) + + %13:intregs = PHI %1, %bb.1, %14, %bb.3 + %15:intregs = PHI %0, %bb.1, %16, %bb.3 + %17:hvxvr, %14:intregs = V6_vL32b_pi %13, 128 :: (load (s1024) from %ir.lsr.iv1) + %18:hvxvr = V6_vaddw %7, %17 + %19:hvxvr = V6_vaddw %5, %17 + %20:hvxvr = V6_vaddw %9, %17 + %21:hvxvr = V6_vaddw %18, killed %19 + %22:hvxvr = V6_vaddw %18, killed %20 + %23:hvxvr = V6_vaddw killed %22, killed %21 + %16:intregs = V6_vS32b_pi %15, 128, killed %23 :: (store (s1024) into %ir.lsr.iv) + ENDLOOP0 %bb.3, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 + J2_jump %bb.2, implicit-def dead $pc + +...