Skip to content

Commit bea3684

Browse files
[AArch64] Allow only LSL to be folded into addressing mode (#69235)
There was an error in decoding shift type, which permitted shift types other than LSL to be (incorrectly) folded into the addressing mode of a load/store instruction.
1 parent 52db7e2 commit bea3684

File tree

3 files changed

+116
-1
lines changed

3 files changed

+116
-1
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2978,7 +2978,10 @@ bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
29782978

29792979
// Don't fold the add if the result would be slower, unless optimising for
29802980
// size.
2981-
int64_t Shift = AddrI.getOperand(3).getImm();
2981+
unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
2982+
if (AArch64_AM::getShiftType(Shift) != AArch64_AM::ShiftExtendType::LSL)
2983+
return false;
2984+
Shift = AArch64_AM::getShiftValue(Shift);
29822985
if (!OptSize) {
29832986
if ((Shift != 2 && Shift != 3) || !Subtarget.hasAddrLSLFast())
29842987
return false;
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc -global-isel --aarch64-enable-sink-fold=true < %s | FileCheck %s
3+
4+
target triple = "aarch64-linux"
5+
6+
; Test a non-LSL shift cannot be folded into the addressing mode.
7+
define void @f(ptr %p, i64 %i) optsize {
8+
; CHECK-LABEL: f:
9+
; CHECK: // %bb.0:
10+
; CHECK-NEXT: add x8, x0, x1, asr #32
11+
; CHECK-NEXT: strb wzr, [x8]
12+
; CHECK-NEXT: ret
13+
%d = ashr i64 %i, 32
14+
%a = getelementptr i8, ptr %p, i64 %d
15+
store i8 0, ptr %a
16+
ret void
17+
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
2+
# RUN: llc --run-pass=machine-sink --aarch64-enable-sink-fold=true %s -o - | FileCheck %s
3+
--- |
4+
source_filename = "../llvm/test/CodeGen/AArch64/GlobalISel/sink-and-fold-illegal-shift.ll"
5+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
6+
target triple = "aarch64-linux"
7+
8+
define void @f(ptr %p, i64 %i) #0 {
9+
%d = ashr i64 %i, 32
10+
%a = getelementptr i8, ptr %p, i64 %d
11+
store i8 0, ptr %a, align 1
12+
ret void
13+
}
14+
15+
attributes #0 = { optsize }
16+
17+
...
18+
---
19+
name: f
20+
alignment: 4
21+
exposesReturnsTwice: false
22+
legalized: true
23+
regBankSelected: true
24+
selected: true
25+
failedISel: false
26+
tracksRegLiveness: true
27+
hasWinCFI: false
28+
callsEHReturn: false
29+
callsUnwindInit: false
30+
hasEHCatchret: false
31+
hasEHScopes: false
32+
hasEHFunclets: false
33+
isOutlined: false
34+
debugInstrRef: false
35+
failsVerification: false
36+
tracksDebugUserValues: false
37+
registers:
38+
- { id: 0, class: gpr64, preferred-register: '' }
39+
- { id: 1, class: gpr64, preferred-register: '' }
40+
- { id: 2, class: gpr, preferred-register: '' }
41+
- { id: 3, class: gpr, preferred-register: '' }
42+
- { id: 4, class: gpr64common, preferred-register: '' }
43+
- { id: 5, class: _, preferred-register: '' }
44+
- { id: 6, class: gpr, preferred-register: '' }
45+
- { id: 7, class: gpr64, preferred-register: '' }
46+
liveins:
47+
- { reg: '$x0', virtual-reg: '' }
48+
- { reg: '$x1', virtual-reg: '' }
49+
frameInfo:
50+
isFrameAddressTaken: false
51+
isReturnAddressTaken: false
52+
hasStackMap: false
53+
hasPatchPoint: false
54+
stackSize: 0
55+
offsetAdjustment: 0
56+
maxAlignment: 1
57+
adjustsStack: false
58+
hasCalls: false
59+
stackProtector: ''
60+
functionContext: ''
61+
maxCallFrameSize: 0
62+
cvBytesOfCalleeSavedRegisters: 0
63+
hasOpaqueSPAdjustment: false
64+
hasVAStart: false
65+
hasMustTailInVarArgFunc: false
66+
hasTailCall: false
67+
localFrameSize: 0
68+
savePoint: ''
69+
restorePoint: ''
70+
fixedStack: []
71+
stack: []
72+
entry_values: []
73+
callSites: []
74+
debugValueSubstitutions: []
75+
constants: []
76+
machineFunctionInfo: {}
77+
body: |
78+
bb.1 (%ir-block.0):
79+
liveins: $x0, $x1
80+
81+
; CHECK-LABEL: name: f
82+
; CHECK: liveins: $x0, $x1
83+
; CHECK-NEXT: {{ $}}
84+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
85+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
86+
; CHECK-NEXT: [[ADDXrs:%[0-9]+]]:gpr64common = ADDXrs [[COPY]], [[COPY1]], 160
87+
; CHECK-NEXT: STRBBui $wzr, [[ADDXrs]], 0 :: (store (s8) into %ir.a)
88+
; CHECK-NEXT: RET_ReallyLR
89+
%0:gpr64 = COPY $x0
90+
%1:gpr64 = COPY $x1
91+
%4:gpr64common = ADDXrs %0, %1, 160
92+
STRBBui $wzr, %4, 0 :: (store (s8) into %ir.a)
93+
RET_ReallyLR
94+
95+
...

0 commit comments

Comments
 (0)