From e9fcb408f9fb6106502ca3ff3c8863b825367994 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 11 Dec 2023 16:31:35 +0000 Subject: [PATCH 1/3] [MachineSink] Clear kill flags of sunk addressing mode registers When doing sink-and-fold, the MachineSink clears the "killed" flags of the operands of the sunk (and deleted) instruction. However, this is not always sufficient. In some cases we can create the new load/store instruction with operands other than the ones present in the deleted instruction. One such example is folding a zero word extend into a memory load on AArch64. The zero-extend is represented by a pair of instructions - `MOV` (i.e. `ORRwrs`) followed by a `SUBREG_TO_REG`. The `SUBREG_TO_REG` is deleted (it is the sunk instruction), but the new load instruction mentions operands "killed" in the `MOV`, which is no longer correct. To fix this, clear the "killed" flags of the registers paritipating in the adressing mode. --- llvm/lib/CodeGen/MachineSink.cpp | 13 +- .../sink-and-fold-clear-kill-flags.mir | 134 ++++++++++++++++++ 2 files changed, 142 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 83d775055dfd7..6441b456cc39d 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -500,11 +500,6 @@ bool MachineSinking::PerformSinkAndFold(MachineInstr &MI, return false; // Now we know we can fold the instruction in all its users. - if (UsedRegA) - MRI->clearKillFlags(UsedRegA); - if (UsedRegB) - MRI->clearKillFlags(UsedRegB); - for (auto &[SinkDst, MaybeAM] : SinkInto) { MachineInstr *New = nullptr; LLVM_DEBUG(dbgs() << "Sinking copy of"; MI.dump(); dbgs() << "into"; @@ -530,6 +525,14 @@ bool MachineSinking::PerformSinkAndFold(MachineInstr &MI, } else { // Fold instruction into the addressing mode of a memory instruction. New = TII->emitLdStWithAddr(*SinkDst, MaybeAM); + + // The registers of the addressing mode may have their live range extended + // and their kill flags may no longer be correct. Conservatively clear the + // kill flags. + if (Register R = MaybeAM.BaseReg; R.isValid() && R.isVirtual()) + MRI->clearKillFlags(R); + if (Register R = MaybeAM.ScaledReg; R.isValid() && R.isVirtual()) + MRI->clearKillFlags(R); } LLVM_DEBUG(dbgs() << "yielding"; New->dump()); // Clear the StoreInstrCache, since we may invalidate it by erasing. diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir b/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir new file mode 100644 index 0000000000000..effc346848b75 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir @@ -0,0 +1,134 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc --run-pass=machine-sink %s -o - | FileCheck %s + +# Test that the "killed" flags is not present in the ORRWrs instruction below +--- | + source_filename = "crash.ll" + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-linux" + + define i32 @f(ptr %image, i32 %i) { + entry: + %add = add i32 %i, 1 + %idx = zext i32 %add to i64 + br label %A + + A: ; preds = %B, %A, %entry + %sunkaddr = getelementptr i8, ptr %image, i64 %idx + %0 = load i8, ptr %sunkaddr, align 1 + %cmp153 = icmp eq i8 %0, 0 + br i1 %cmp153, label %B, label %A + + B: ; preds = %A + store i32 0, ptr %image, align 1 + br label %A + } + +... +--- +name: f +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: gpr64, preferred-register: '' } + - { id: 1, class: gpr64common, preferred-register: '' } + - { id: 2, class: gpr32common, preferred-register: '' } + - { id: 3, class: gpr32common, preferred-register: '' } + - { id: 4, class: gpr32, preferred-register: '' } + - { id: 5, class: gpr32, preferred-register: '' } + - { id: 6, class: gpr32, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%1' } + - { reg: '$w1', virtual-reg: '%2' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: f + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $x0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32common = ADDWri [[COPY]], 1, 0 + ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[ADDWri]], 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.A: + ; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.1(0x50000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LDRBBroW:%[0-9]+]]:gpr32 = LDRBBroW [[COPY1]], [[ADDWri]], 0, 0 :: (load (s8) from %ir.sunkaddr) + ; CHECK-NEXT: CBNZW killed [[LDRBBroW]], %bb.1 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.B: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK-NEXT: STRWui [[COPY2]], [[COPY1]], 0 :: (store (s32) into %ir.image, align 1) + ; CHECK-NEXT: B %bb.1 + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $x0, $w1 + + %2:gpr32common = COPY $w1 + %1:gpr64common = COPY $x0 + %3:gpr32common = ADDWri %2, 1, 0 + %4:gpr32 = ORRWrs $wzr, killed %3, 0 + %0:gpr64 = SUBREG_TO_REG 0, killed %4, %subreg.sub_32 + + bb.1.A: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + + %5:gpr32 = LDRBBroX %1, %0, 0, 0 :: (load (s8) from %ir.sunkaddr) + CBNZW killed %5, %bb.1 + B %bb.2 + + bb.2.B: + successors: %bb.1(0x80000000) + + %6:gpr32 = COPY $wzr + STRWui %6, %1, 0 :: (store (s32) into %ir.image, align 1) + B %bb.1 + +... From 3e487cc634ff4fe7744e10678cf1b4f8731b5115 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 11 Dec 2023 22:15:40 +0000 Subject: [PATCH 2/3] [fixup] Remove some cruft from a test --- .../sink-and-fold-clear-kill-flags.mir | 43 ------------------- 1 file changed, 43 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir b/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir index effc346848b75..732af0890d718 100644 --- a/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir +++ b/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir @@ -28,22 +28,7 @@ --- name: f alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false tracksRegLiveness: true -hasWinCFI: false -callsEHReturn: false -callsUnwindInit: false -hasEHCatchret: false -hasEHScopes: false -hasEHFunclets: false -isOutlined: false -debugInstrRef: false -failsVerification: false -tracksDebugUserValues: false registers: - { id: 0, class: gpr64, preferred-register: '' } - { id: 1, class: gpr64common, preferred-register: '' } @@ -55,34 +40,6 @@ registers: liveins: - { reg: '$x0', virtual-reg: '%1' } - { reg: '$w1', virtual-reg: '%2' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 1 - adjustsStack: false - hasCalls: false - stackProtector: '' - functionContext: '' - maxCallFrameSize: 0 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - hasTailCall: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: [] -entry_values: [] -callSites: [] -debugValueSubstitutions: [] -constants: [] -machineFunctionInfo: {} body: | ; CHECK-LABEL: name: f ; CHECK: bb.0.entry: From d23a5e40dfcdfb34c60e416c6ef3ee37248f43a2 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 12 Dec 2023 11:28:31 +0000 Subject: [PATCH 3/3] [fixup] Clear "killed" flags when sinking into a copy as well --- llvm/lib/CodeGen/MachineSink.cpp | 8 ++ .../sink-and-fold-clear-kill-flags.mir | 104 +++++++++++++++++- 2 files changed, 111 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 6441b456cc39d..e7e8f60268348 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -522,6 +522,14 @@ bool MachineSinking::PerformSinkAndFold(MachineInstr &MI, New = &*std::prev(InsertPt); if (!New->getDebugLoc()) New->setDebugLoc(SinkDst->getDebugLoc()); + + // The operand registers of the "sunk" instruction have their live range + // extended and their kill flags may no longer be correct. Conservatively + // clear the kill flags. + if (UsedRegA) + MRI->clearKillFlags(UsedRegA); + if (UsedRegB) + MRI->clearKillFlags(UsedRegB); } else { // Fold instruction into the addressing mode of a memory instruction. New = TII->emitLdStWithAddr(*SinkDst, MaybeAM); diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir b/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir index 732af0890d718..1303776230f1a 100644 --- a/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir +++ b/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir @@ -1,7 +1,9 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 # RUN: llc --run-pass=machine-sink %s -o - | FileCheck %s -# Test that the "killed" flags is not present in the ORRWrs instruction below +# Test that the "killed" flags are cleared in the ORRWrs and SUBSWrr instructions +# in 'f and @g, respectively + --- | source_filename = "crash.ll" target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -24,6 +26,24 @@ br label %A } + define i32 @g(i32 %i, i32 %j) { + entry: + %add = add i32 %i, %j + %neg = sub i32 0, %i + br label %A + + A: ; preds = %B, %A, %entry + %0 = call i8 @h(i32 %add) + %c = icmp eq i8 %0, 0 + br i1 %c, label %B, label %A + + B: ; preds = %A + %1 = call i8 @h(i32 %neg) + br label %A + } + + declare i8 @h(i32) + ... --- name: f @@ -87,5 +107,87 @@ body: | %6:gpr32 = COPY $wzr STRWui %6, %1, 0 :: (store (s32) into %ir.image, align 1) B %bb.1 +... +--- +name: g +alignment: 4 +registers: + - { id: 0, class: gpr32all, preferred-register: '' } + - { id: 1, class: gpr32all, preferred-register: '' } + - { id: 2, class: gpr32, preferred-register: '' } + - { id: 3, class: gpr32, preferred-register: '' } + - { id: 4, class: gpr32, preferred-register: '' } + - { id: 5, class: gpr32, preferred-register: '' } + - { id: 6, class: gpr32, preferred-register: '' } + - { id: 7, class: gpr32, preferred-register: '' } + - { id: 8, class: gpr32common, preferred-register: '' } + - { id: 9, class: gpr32all, preferred-register: '' } +liveins: + - { reg: '$w0', virtual-reg: '%2' } + - { reg: '$w1', virtual-reg: '%3' } +body: | + ; CHECK-LABEL: name: g + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY2]], [[COPY1]], implicit-def dead $nzcv + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32all = COPY [[SUBSWrr]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.A: + ; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.1(0x50000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: $w0 = ADDWrr [[COPY1]], [[COPY]] + ; CHECK-NEXT: BL @h, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: $wzr = ANDSWri [[COPY4]], 7, implicit-def $nzcv + ; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.B: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: $w0 = COPY [[COPY3]] + ; CHECK-NEXT: BL @h, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: B %bb.1 + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $w0, $w1 + + %3:gpr32 = COPY $w1 + %2:gpr32 = COPY $w0 + %4:gpr32 = ADDWrr %2, killed %3 + %0:gpr32all = COPY %4 + %5:gpr32 = COPY $wzr + %6:gpr32 = SUBSWrr %5, killed %2, implicit-def dead $nzcv + %1:gpr32all = COPY %6 + + bb.1.A: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $w0 = COPY %0 + BL @h, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + %7:gpr32 = COPY $w0 + $wzr = ANDSWri %7, 7, implicit-def $nzcv + Bcc 1, %bb.1, implicit $nzcv + B %bb.2 + + bb.2.B: + successors: %bb.1(0x80000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $w0 = COPY %1 + BL @h, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + B %bb.1 ...