Skip to content

[MachineSink] Clear kill flags of sunk addressing mode registers #75072

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions llvm/lib/CodeGen/MachineSink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -500,11 +500,6 @@ bool MachineSinking::PerformSinkAndFold(MachineInstr &MI,
return false;

// Now we know we can fold the instruction in all its users.
if (UsedRegA)
MRI->clearKillFlags(UsedRegA);
if (UsedRegB)
MRI->clearKillFlags(UsedRegB);

for (auto &[SinkDst, MaybeAM] : SinkInto) {
MachineInstr *New = nullptr;
LLVM_DEBUG(dbgs() << "Sinking copy of"; MI.dump(); dbgs() << "into";
Expand All @@ -527,9 +522,25 @@ bool MachineSinking::PerformSinkAndFold(MachineInstr &MI,
New = &*std::prev(InsertPt);
if (!New->getDebugLoc())
New->setDebugLoc(SinkDst->getDebugLoc());

// The operand registers of the "sunk" instruction have their live range
// extended and their kill flags may no longer be correct. Conservatively
// clear the kill flags.
if (UsedRegA)
MRI->clearKillFlags(UsedRegA);
if (UsedRegB)
MRI->clearKillFlags(UsedRegB);
} else {
// Fold instruction into the addressing mode of a memory instruction.
New = TII->emitLdStWithAddr(*SinkDst, MaybeAM);

// The registers of the addressing mode may have their live range extended
// and their kill flags may no longer be correct. Conservatively clear the
// kill flags.
if (Register R = MaybeAM.BaseReg; R.isValid() && R.isVirtual())
MRI->clearKillFlags(R);
if (Register R = MaybeAM.ScaledReg; R.isValid() && R.isVirtual())
MRI->clearKillFlags(R);
}
LLVM_DEBUG(dbgs() << "yielding"; New->dump());
// Clear the StoreInstrCache, since we may invalidate it by erasing.
Expand Down
193 changes: 193 additions & 0 deletions llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc --run-pass=machine-sink %s -o - | FileCheck %s

# Test that the "killed" flags are cleared in the ORRWrs and SUBSWrr instructions
# in 'f and @g, respectively

--- |
source_filename = "crash.ll"
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-linux"

define i32 @f(ptr %image, i32 %i) {
entry:
%add = add i32 %i, 1
%idx = zext i32 %add to i64
br label %A

A: ; preds = %B, %A, %entry
%sunkaddr = getelementptr i8, ptr %image, i64 %idx
%0 = load i8, ptr %sunkaddr, align 1
%cmp153 = icmp eq i8 %0, 0
br i1 %cmp153, label %B, label %A

B: ; preds = %A
store i32 0, ptr %image, align 1
br label %A
}

define i32 @g(i32 %i, i32 %j) {
entry:
%add = add i32 %i, %j
%neg = sub i32 0, %i
br label %A

A: ; preds = %B, %A, %entry
%0 = call i8 @h(i32 %add)
%c = icmp eq i8 %0, 0
br i1 %c, label %B, label %A

B: ; preds = %A
%1 = call i8 @h(i32 %neg)
br label %A
}

declare i8 @h(i32)

...
---
name: f
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: gpr64, preferred-register: '' }
- { id: 1, class: gpr64common, preferred-register: '' }
- { id: 2, class: gpr32common, preferred-register: '' }
- { id: 3, class: gpr32common, preferred-register: '' }
- { id: 4, class: gpr32, preferred-register: '' }
- { id: 5, class: gpr32, preferred-register: '' }
- { id: 6, class: gpr32, preferred-register: '' }
liveins:
- { reg: '$x0', virtual-reg: '%1' }
- { reg: '$w1', virtual-reg: '%2' }
body: |
; CHECK-LABEL: name: f
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $x0, $w1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32common = ADDWri [[COPY]], 1, 0
; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[ADDWri]], 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.A:
; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.1(0x50000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[LDRBBroW:%[0-9]+]]:gpr32 = LDRBBroW [[COPY1]], [[ADDWri]], 0, 0 :: (load (s8) from %ir.sunkaddr)
; CHECK-NEXT: CBNZW killed [[LDRBBroW]], %bb.1
; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.B:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr
; CHECK-NEXT: STRWui [[COPY2]], [[COPY1]], 0 :: (store (s32) into %ir.image, align 1)
; CHECK-NEXT: B %bb.1
bb.0.entry:
successors: %bb.1(0x80000000)
liveins: $x0, $w1

%2:gpr32common = COPY $w1
%1:gpr64common = COPY $x0
%3:gpr32common = ADDWri %2, 1, 0
%4:gpr32 = ORRWrs $wzr, killed %3, 0
%0:gpr64 = SUBREG_TO_REG 0, killed %4, %subreg.sub_32

bb.1.A:
successors: %bb.2(0x30000000), %bb.1(0x50000000)

%5:gpr32 = LDRBBroX %1, %0, 0, 0 :: (load (s8) from %ir.sunkaddr)
CBNZW killed %5, %bb.1
B %bb.2

bb.2.B:
successors: %bb.1(0x80000000)

%6:gpr32 = COPY $wzr
STRWui %6, %1, 0 :: (store (s32) into %ir.image, align 1)
B %bb.1
...
---
name: g
alignment: 4
registers:
- { id: 0, class: gpr32all, preferred-register: '' }
- { id: 1, class: gpr32all, preferred-register: '' }
- { id: 2, class: gpr32, preferred-register: '' }
- { id: 3, class: gpr32, preferred-register: '' }
- { id: 4, class: gpr32, preferred-register: '' }
- { id: 5, class: gpr32, preferred-register: '' }
- { id: 6, class: gpr32, preferred-register: '' }
- { id: 7, class: gpr32, preferred-register: '' }
- { id: 8, class: gpr32common, preferred-register: '' }
- { id: 9, class: gpr32all, preferred-register: '' }
liveins:
- { reg: '$w0', virtual-reg: '%2' }
- { reg: '$w1', virtual-reg: '%3' }
body: |
; CHECK-LABEL: name: g
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr
; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY2]], [[COPY1]], implicit-def dead $nzcv
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32all = COPY [[SUBSWrr]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.A:
; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.1(0x50000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-NEXT: $w0 = ADDWrr [[COPY1]], [[COPY]]
; CHECK-NEXT: BL @h, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $w0
; CHECK-NEXT: $wzr = ANDSWri [[COPY4]], 7, implicit-def $nzcv
; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv
; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.B:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-NEXT: $w0 = COPY [[COPY3]]
; CHECK-NEXT: BL @h, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-NEXT: B %bb.1
bb.0.entry:
successors: %bb.1(0x80000000)
liveins: $w0, $w1

%3:gpr32 = COPY $w1
%2:gpr32 = COPY $w0
%4:gpr32 = ADDWrr %2, killed %3
%0:gpr32all = COPY %4
%5:gpr32 = COPY $wzr
%6:gpr32 = SUBSWrr %5, killed %2, implicit-def dead $nzcv
%1:gpr32all = COPY %6

bb.1.A:
successors: %bb.2(0x30000000), %bb.1(0x50000000)

ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
$w0 = COPY %0
BL @h, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
%7:gpr32 = COPY $w0
$wzr = ANDSWri %7, 7, implicit-def $nzcv
Bcc 1, %bb.1, implicit $nzcv
B %bb.2

bb.2.B:
successors: %bb.1(0x80000000)

ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
$w0 = COPY %1
BL @h, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
B %bb.1

...