Skip to content

Commit be6f5e1

Browse files
jmmartinezDanielCChen
authored andcommitted
[AMDGPU][SIPreEmitPeephole] mustRetainExeczBranch: use BranchProbability and TargetSchedmodel (llvm#109818)
Remove s_cbranch_execnz branches if the transformation is profitable according to `BranchProbability` and `TargetSchedmodel`.
1 parent a34393d commit be6f5e1

21 files changed

+154
-106
lines changed

llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp

Lines changed: 68 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,13 @@
1515
#include "GCNSubtarget.h"
1616
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
1717
#include "llvm/CodeGen/MachineFunctionPass.h"
18+
#include "llvm/CodeGen/TargetSchedule.h"
19+
#include "llvm/Support/BranchProbability.h"
1820

1921
using namespace llvm;
2022

2123
#define DEBUG_TYPE "si-pre-emit-peephole"
2224

23-
static unsigned SkipThreshold;
24-
25-
static cl::opt<unsigned, true> SkipThresholdFlag(
26-
"amdgpu-skip-threshold", cl::Hidden,
27-
cl::desc(
28-
"Number of instructions before jumping over divergent control flow"),
29-
cl::location(SkipThreshold), cl::init(12));
30-
3125
namespace {
3226

3327
class SIPreEmitPeephole : public MachineFunctionPass {
@@ -41,7 +35,8 @@ class SIPreEmitPeephole : public MachineFunctionPass {
4135
MachineBasicBlock *&TrueMBB,
4236
MachineBasicBlock *&FalseMBB,
4337
SmallVectorImpl<MachineOperand> &Cond);
44-
bool mustRetainExeczBranch(const MachineBasicBlock &From,
38+
bool mustRetainExeczBranch(const MachineInstr &Branch,
39+
const MachineBasicBlock &From,
4540
const MachineBasicBlock &To) const;
4641
bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
4742

@@ -304,11 +299,61 @@ bool SIPreEmitPeephole::getBlockDestinations(
304299
return true;
305300
}
306301

302+
namespace {
303+
class BranchWeightCostModel {
304+
const SIInstrInfo &TII;
305+
const TargetSchedModel &SchedModel;
306+
BranchProbability BranchProb;
307+
static constexpr uint64_t BranchNotTakenCost = 1;
308+
uint64_t BranchTakenCost;
309+
uint64_t ThenCyclesCost = 0;
310+
311+
public:
312+
BranchWeightCostModel(const SIInstrInfo &TII, const MachineInstr &Branch,
313+
const MachineBasicBlock &Succ)
314+
: TII(TII), SchedModel(TII.getSchedModel()) {
315+
const MachineBasicBlock &Head = *Branch.getParent();
316+
const auto *FromIt = find(Head.successors(), &Succ);
317+
assert(FromIt != Head.succ_end());
318+
319+
BranchProb = Head.getSuccProbability(FromIt);
320+
if (BranchProb.isUnknown())
321+
BranchProb = BranchProbability::getZero();
322+
BranchTakenCost = SchedModel.computeInstrLatency(&Branch);
323+
}
324+
325+
bool isProfitable(const MachineInstr &MI) {
326+
if (TII.isWaitcnt(MI.getOpcode()))
327+
return false;
328+
329+
ThenCyclesCost += SchedModel.computeInstrLatency(&MI);
330+
331+
// Consider `P = N/D` to be the probability of execz being false (skipping
332+
// the then-block) The transformation is profitable if always executing the
333+
// 'then' block is cheaper than executing sometimes 'then' and always
334+
// executing s_cbranch_execz:
335+
// * ThenCost <= P*ThenCost + (1-P)*BranchTakenCost + P*BranchNotTakenCost
336+
// * (1-P) * ThenCost <= (1-P)*BranchTakenCost + P*BranchNotTakenCost
337+
// * (D-N)/D * ThenCost <= (D-N)/D * BranchTakenCost + N/D *
338+
// BranchNotTakenCost
339+
uint64_t Numerator = BranchProb.getNumerator();
340+
uint64_t Denominator = BranchProb.getDenominator();
341+
return (Denominator - Numerator) * ThenCyclesCost <=
342+
((Denominator - Numerator) * BranchTakenCost +
343+
Numerator * BranchNotTakenCost);
344+
}
345+
};
346+
307347
bool SIPreEmitPeephole::mustRetainExeczBranch(
308-
const MachineBasicBlock &From, const MachineBasicBlock &To) const {
309-
unsigned NumInstr = 0;
310-
const MachineFunction *MF = From.getParent();
348+
const MachineInstr &Branch, const MachineBasicBlock &From,
349+
const MachineBasicBlock &To) const {
350+
351+
const MachineBasicBlock &Head = *Branch.getParent();
352+
assert(is_contained(Head.successors(), &From));
353+
354+
BranchWeightCostModel CostModel{*TII, Branch, From};
311355

356+
const MachineFunction *MF = From.getParent();
312357
for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
313358
MBBI != End && MBBI != ToI; ++MBBI) {
314359
const MachineBasicBlock &MBB = *MBBI;
@@ -326,23 +371,22 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
326371
if (TII->hasUnwantedEffectsWhenEXECEmpty(MI))
327372
return true;
328373

329-
// These instructions are potentially expensive even if EXEC = 0.
330-
if (TII->isSMRD(MI) || TII->isVMEM(MI) || TII->isFLAT(MI) ||
331-
TII->isDS(MI) || TII->isWaitcnt(MI.getOpcode()))
332-
return true;
333-
334-
++NumInstr;
335-
if (NumInstr >= SkipThreshold)
374+
if (!CostModel.isProfitable(MI))
336375
return true;
337376
}
338377
}
339378

340379
return false;
341380
}
381+
} // namespace
342382

343383
// Returns true if the skip branch instruction is removed.
344384
bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
345385
MachineBasicBlock &SrcMBB) {
386+
387+
if (!TII->getSchedModel().hasInstrSchedModel())
388+
return false;
389+
346390
MachineBasicBlock *TrueMBB = nullptr;
347391
MachineBasicBlock *FalseMBB = nullptr;
348392
SmallVector<MachineOperand, 1> Cond;
@@ -351,8 +395,11 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
351395
return false;
352396

353397
// Consider only the forward branches.
354-
if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) ||
355-
mustRetainExeczBranch(*FalseMBB, *TrueMBB))
398+
if (SrcMBB.getNumber() >= TrueMBB->getNumber())
399+
return false;
400+
401+
// Consider only when it is legal and profitable
402+
if (mustRetainExeczBranch(MI, *FalseMBB, *TrueMBB))
356403
return false;
357404

358405
LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);

llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,6 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
292292
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
293293
; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0
294294
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
295-
; GFX9-NEXT: s_cbranch_execz .LBB5_2
296295
; GFX9-NEXT: ; %bb.1: ; %if.then
297296
; GFX9-NEXT: s_mov_b32 s11, s18
298297
; GFX9-NEXT: s_mov_b32 s10, s17
@@ -301,7 +300,7 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
301300
; GFX9-NEXT: v_mov_b32_e32 v0, s6
302301
; GFX9-NEXT: v_mov_b32_e32 v1, s19
303302
; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
304-
; GFX9-NEXT: .LBB5_2: ; %if.end
303+
; GFX9-NEXT: ; %bb.2: ; %if.end
305304
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
306305
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
307306
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -311,7 +310,6 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
311310
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
312311
; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0
313312
; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo
314-
; GFX1010-NEXT: s_cbranch_execz .LBB5_2
315313
; GFX1010-NEXT: ; %bb.1: ; %if.then
316314
; GFX1010-NEXT: v_mov_b32_e32 v0, s6
317315
; GFX1010-NEXT: v_mov_b32_e32 v1, s19
@@ -320,7 +318,7 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
320318
; GFX1010-NEXT: s_mov_b32 s9, s16
321319
; GFX1010-NEXT: s_mov_b32 s8, s7
322320
; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
323-
; GFX1010-NEXT: .LBB5_2: ; %if.end
321+
; GFX1010-NEXT: ; %bb.2: ; %if.end
324322
; GFX1010-NEXT: s_waitcnt_depctr 0xffe3
325323
; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4
326324
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -331,7 +329,6 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
331329
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
332330
; GFX1030-NEXT: s_mov_b32 s4, exec_lo
333331
; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0
334-
; GFX1030-NEXT: s_cbranch_execz .LBB5_2
335332
; GFX1030-NEXT: ; %bb.1: ; %if.then
336333
; GFX1030-NEXT: v_mov_b32_e32 v0, s6
337334
; GFX1030-NEXT: v_mov_b32_e32 v1, s19
@@ -340,7 +337,7 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
340337
; GFX1030-NEXT: s_mov_b32 s9, s16
341338
; GFX1030-NEXT: s_mov_b32 s8, s7
342339
; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
343-
; GFX1030-NEXT: .LBB5_2: ; %if.end
340+
; GFX1030-NEXT: ; %bb.2: ; %if.end
344341
; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4
345342
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
346343
; GFX1030-NEXT: s_setpc_b64 s[30:31]

llvm/test/CodeGen/AMDGPU/branch-condition-and.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,12 @@ define amdgpu_ps void @ham(float %arg, float %arg1) #0 {
1717
; GCN-NEXT: v_cmp_lt_f32_e64 s[0:1], 0, v1
1818
; GCN-NEXT: s_and_b64 s[0:1], vcc, s[0:1]
1919
; GCN-NEXT: s_and_saveexec_b64 s[2:3], s[0:1]
20-
; GCN-NEXT: s_cbranch_execz .LBB0_2
2120
; GCN-NEXT: ; %bb.1: ; %bb4
2221
; GCN-NEXT: v_mov_b32_e32 v0, 4
2322
; GCN-NEXT: s_mov_b32 m0, -1
2423
; GCN-NEXT: ds_write_b32 v0, v0
2524
; GCN-NEXT: ; divergent unreachable
26-
; GCN-NEXT: .LBB0_2: ; %UnifiedReturnBlock
25+
; GCN-NEXT: ; %bb.2: ; %UnifiedReturnBlock
2726
; GCN-NEXT: s_endpgm
2827
bb:
2928
%tmp = fcmp ogt float %arg, 0.000000e+00

llvm/test/CodeGen/AMDGPU/else.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ end:
3030
; CHECK-NEXT: s_and_b64 exec, exec, [[INIT_EXEC]]
3131
; CHECK-NEXT: s_and_b64 [[AND_INIT:s\[[0-9]+:[0-9]+\]]], exec, [[DST]]
3232
; CHECK-NEXT: s_xor_b64 exec, exec, [[AND_INIT]]
33-
; CHECK-NEXT: s_cbranch_execz
3433
define amdgpu_ps void @else_execfix_leave_wqm(i32 %z, float %v) #0 {
3534
main_body:
3635
%cc = icmp sgt i32 %z, 5

llvm/test/CodeGen/AMDGPU/fptoi.i128.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1578,6 +1578,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
15781578
; SDAG-NEXT: ; implicit-def: $vgpr6_vgpr7
15791579
; SDAG-NEXT: .LBB6_4: ; %Flow
15801580
; SDAG-NEXT: s_andn2_saveexec_b64 s[6:7], s[12:13]
1581+
; SDAG-NEXT: s_cbranch_execz .LBB6_6
15811582
; SDAG-NEXT: ; %bb.5: ; %fp-to-i-if-then12
15821583
; SDAG-NEXT: v_sub_u32_e32 v2, 0x86, v5
15831584
; SDAG-NEXT: v_lshrrev_b64 v[0:1], v2, v[6:7]
@@ -1589,7 +1590,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
15891590
; SDAG-NEXT: v_ashrrev_i32_e32 v2, 31, v1
15901591
; SDAG-NEXT: v_mul_i32_i24_e32 v0, v0, v8
15911592
; SDAG-NEXT: v_mov_b32_e32 v3, v2
1592-
; SDAG-NEXT: ; %bb.6: ; %Flow1
1593+
; SDAG-NEXT: .LBB6_6: ; %Flow1
15931594
; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
15941595
; SDAG-NEXT: .LBB6_7: ; %Flow2
15951596
; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
@@ -1929,6 +1930,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
19291930
; SDAG-NEXT: ; implicit-def: $vgpr6_vgpr7
19301931
; SDAG-NEXT: .LBB7_4: ; %Flow
19311932
; SDAG-NEXT: s_andn2_saveexec_b64 s[6:7], s[12:13]
1933+
; SDAG-NEXT: s_cbranch_execz .LBB7_6
19321934
; SDAG-NEXT: ; %bb.5: ; %fp-to-i-if-then12
19331935
; SDAG-NEXT: v_sub_u32_e32 v2, 0x86, v5
19341936
; SDAG-NEXT: v_lshrrev_b64 v[0:1], v2, v[6:7]
@@ -1940,7 +1942,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
19401942
; SDAG-NEXT: v_ashrrev_i32_e32 v2, 31, v1
19411943
; SDAG-NEXT: v_mul_i32_i24_e32 v0, v0, v8
19421944
; SDAG-NEXT: v_mov_b32_e32 v3, v2
1943-
; SDAG-NEXT: ; %bb.6: ; %Flow1
1945+
; SDAG-NEXT: .LBB7_6: ; %Flow1
19441946
; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
19451947
; SDAG-NEXT: .LBB7_7: ; %Flow2
19461948
; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]

llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,12 @@ define amdgpu_ps void @i1_copy_from_loop(ptr addrspace(8) inreg %rsrc, i32 %tid)
3636
; SI-NEXT: v_cmp_le_f32_e32 vcc, 0, v1
3737
; SI-NEXT: s_mov_b64 s[8:9], -1
3838
; SI-NEXT: s_and_saveexec_b64 s[12:13], vcc
39+
; SI-NEXT: s_cbranch_execz .LBB0_6
3940
; SI-NEXT: ; %bb.5: ; %end.loop
4041
; SI-NEXT: ; in Loop: Header=BB0_3 Depth=1
4142
; SI-NEXT: s_add_i32 s14, s14, 1
4243
; SI-NEXT: s_xor_b64 s[8:9], exec, -1
43-
; SI-NEXT: ; %bb.6: ; %Flow1
44+
; SI-NEXT: .LBB0_6: ; %Flow1
4445
; SI-NEXT: ; in Loop: Header=BB0_3 Depth=1
4546
; SI-NEXT: s_or_b64 exec, exec, s[12:13]
4647
; SI-NEXT: s_branch .LBB0_2

llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir renamed to llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn -mcpu=polaris10 -run-pass si-pre-emit-peephole -amdgpu-skip-threshold=1 -verify-machineinstrs %s -o - | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=polaris10 -run-pass si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
33

44
---
55

66
name: skip_execz_flat
77
body: |
88
; CHECK-LABEL: name: skip_execz_flat
99
; CHECK: bb.0:
10-
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
11-
; CHECK-NEXT: {{ $}}
12-
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
10+
; CHECK-NEXT: successors: %bb.1(0x7fffffff)
1311
; CHECK-NEXT: {{ $}}
1412
; CHECK-NEXT: bb.1:
1513
; CHECK-NEXT: successors: %bb.2(0x80000000)
@@ -20,7 +18,7 @@ body: |
2018
; CHECK-NEXT: bb.2:
2119
; CHECK-NEXT: S_ENDPGM 0
2220
bb.0:
23-
successors: %bb.1, %bb.2
21+
successors: %bb.1(0x70000000), %bb.2(0x00000001)
2422
S_CBRANCH_EXECZ %bb.2, implicit $exec
2523
2624
bb.1:
@@ -38,9 +36,7 @@ name: skip_execz_mubuf
3836
body: |
3937
; CHECK-LABEL: name: skip_execz_mubuf
4038
; CHECK: bb.0:
41-
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
42-
; CHECK-NEXT: {{ $}}
43-
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
39+
; CHECK-NEXT: successors: %bb.1(0x7fffffff)
4440
; CHECK-NEXT: {{ $}}
4541
; CHECK-NEXT: bb.1:
4642
; CHECK-NEXT: successors: %bb.2(0x80000000)
@@ -51,7 +47,7 @@ body: |
5147
; CHECK-NEXT: bb.2:
5248
; CHECK-NEXT: S_ENDPGM 0
5349
bb.0:
54-
successors: %bb.1, %bb.2
50+
successors: %bb.1(0x70000000), %bb.2(0x00000001)
5551
S_CBRANCH_EXECZ %bb.2, implicit $exec
5652
5753
bb.1:
@@ -69,9 +65,7 @@ name: skip_execz_ds
6965
body: |
7066
; CHECK-LABEL: name: skip_execz_ds
7167
; CHECK: bb.0:
72-
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
73-
; CHECK-NEXT: {{ $}}
74-
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
68+
; CHECK-NEXT: successors: %bb.1(0x7fffffff)
7569
; CHECK-NEXT: {{ $}}
7670
; CHECK-NEXT: bb.1:
7771
; CHECK-NEXT: successors: %bb.2(0x80000000)
@@ -82,7 +76,7 @@ body: |
8276
; CHECK-NEXT: bb.2:
8377
; CHECK-NEXT: S_ENDPGM 0
8478
bb.0:
85-
successors: %bb.1, %bb.2
79+
successors: %bb.1(0x70000000), %bb.2(0x00000001)
8680
S_CBRANCH_EXECZ %bb.2, implicit $exec
8781
8882
bb.1:

llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass si-pre-emit-peephole -amdgpu-skip-threshold=10 -verify-machineinstrs %s -o - | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
33

44
---
55
name: skip_waitcnt_vscnt

llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass si-pre-emit-peephole -amdgpu-skip-threshold=10 -verify-machineinstrs %s -o - | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
33

44
---
55
name: skip_wait_loadcnt

llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass si-pre-emit-peephole -amdgpu-skip-threshold=1 -verify-machineinstrs %s -o - | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
33
# Make sure mandatory skips are inserted to ensure GWS ops aren't run with exec = 0
44

55
---

llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,34 @@
1-
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-pre-emit-peephole -amdgpu-skip-threshold=3 %s -o - | FileCheck %s
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass si-pre-emit-peephole %s -o - | FileCheck %s
23

34
---
45

5-
# CHECK-LABEL: name: no_count_dbg_value
6-
# CHECK: $vgpr1 = V_MOV_B32_e32 7, implicit $exec
7-
# CHECK-NOT: S_CBRANCH_EXECZ
86
name: no_count_dbg_value
97
body: |
8+
; CHECK-LABEL: name: no_count_dbg_value
9+
; CHECK: bb.0:
10+
; CHECK-NEXT: successors: %bb.1(0x40000000)
11+
; CHECK-NEXT: {{ $}}
12+
; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 7, implicit $exec
13+
; CHECK-NEXT: {{ $}}
14+
; CHECK-NEXT: bb.1:
15+
; CHECK-NEXT: successors: %bb.2(0x80000000)
16+
; CHECK-NEXT: {{ $}}
17+
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
18+
; CHECK-NEXT: DBG_VALUE
19+
; CHECK-NEXT: DBG_VALUE
20+
; CHECK-NEXT: DBG_VALUE
21+
; CHECK-NEXT: DBG_VALUE
22+
; CHECK-NEXT: DBG_VALUE
23+
; CHECK-NEXT: DBG_VALUE
24+
; CHECK-NEXT: {{ $}}
25+
; CHECK-NEXT: bb.2:
26+
; CHECK-NEXT: successors: %bb.3(0x80000000)
27+
; CHECK-NEXT: {{ $}}
28+
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 1, implicit $exec
29+
; CHECK-NEXT: {{ $}}
30+
; CHECK-NEXT: bb.3:
31+
; CHECK-NEXT: S_ENDPGM 0
1032
bb.0:
1133
successors: %bb.1, %bb.2
1234

0 commit comments

Comments
 (0)