Skip to content

Commit 378b159

Browse files
committed
[AArch64] prevent (shl (srl x, c1), c2) -> (and (shift x, c3)) when load
Currently, process of replacing bitwise operations consisting of `(shl (srl x, c1), c2)` with `And` is performed by `DAGCombiner`. However, in certain case like `(shl (srl, x, c1) 2)` is do not need to transform to `AND` if it was used to `Load` Target. Consider following case: ``` lsr x8, x8, #56 and x8, x8, #0xfc ldr w0, [x2, x8] ret ``` In this case, we can remove the `AND` by changing the target of `LDR` to `[X2, X8, LSL #2]` and right-shifting amount change to 56 to 58. after changed: ``` lsr x8, x8, #58 ldr w0, [x2, x8, lsl #2] ret ``` This patch checks to see if the `(shl (srl x, c1) 2)` operation on `load` target can be prevent transform to `And`.
1 parent 5251d57 commit 378b159

File tree

2 files changed

+53
-9
lines changed

2 files changed

+53
-9
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+47-1
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,7 @@ namespace {
563563
SDValue visitFMULForFMADistributiveCombine(SDNode *N);
564564

565565
SDValue XformToShuffleWithZero(SDNode *N);
566+
bool isCanBeLoadedWithLsl(SDNode *N);
566567
bool reassociationCanBreakAddressingModePattern(unsigned Opc,
567568
const SDLoc &DL,
568569
SDNode *N,
@@ -9893,7 +9894,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
98939894
// folding this will increase the total number of instructions.
98949895
if (N0.getOpcode() == ISD::SRL &&
98959896
(N0.getOperand(1) == N1 || N0.hasOneUse()) &&
9896-
TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
9897+
TLI.shouldFoldConstantShiftPairToMask(N, Level) &&
9898+
!isCanBeLoadedWithLsl(N)) {
98979899
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
98989900
/*AllowUndefs*/ false,
98999901
/*AllowTypeMismatch*/ true)) {
@@ -28338,6 +28340,50 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
2833828340
return false;
2833928341
}
2834028342

28343+
bool DAGCombiner::isCanBeLoadedWithLsl(SDNode *N) {
28344+
if (!N->hasOneUse())
28345+
return false;
28346+
28347+
APInt SrlAmt;
28348+
if (sd_match(N,
28349+
m_Shl(m_Srl(m_Value(), m_ConstInt(SrlAmt)), m_SpecificInt(2)))) {
28350+
// Srl knownbits
28351+
SDValue ShlV = SDValue(N, 0);
28352+
unsigned RegSize = ShlV.getValueType().getScalarSizeInBits();
28353+
KnownBits Known = DAG.computeKnownBits(ShlV);
28354+
if (Known.getBitWidth() != RegSize)
28355+
return false;
28356+
28357+
// check load (ldr x, (add x, (shl (srl x, c1) 2)))
28358+
SDNode *User = N->use_begin().getUse().getUser();
28359+
if (!User || User->getOpcode() != ISD::ADD)
28360+
return false;
28361+
28362+
SDNode *Load = User->use_begin().getUse().getUser();
28363+
if (!Load || Load->getOpcode() != ISD::LOAD)
28364+
return false;
28365+
28366+
auto LoadN = dyn_cast<LoadSDNode>(Load);
28367+
if (!LoadN)
28368+
return false;
28369+
28370+
TargetLoweringBase::AddrMode AM;
28371+
AM.HasBaseReg = true;
28372+
AM.BaseOffs = Known.getMaxValue().getZExtValue();
28373+
EVT VT = LoadN->getMemoryVT();
28374+
unsigned AS = LoadN->getAddressSpace();
28375+
Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
28376+
if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
28377+
return false;
28378+
28379+
if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT))
28380+
return false;
28381+
return true;
28382+
}
28383+
28384+
return false;
28385+
}
28386+
2834128387
/// This is the entry point for the file.
2834228388
void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
2834328389
CodeGenOptLevel OptLevel) {

llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll

+6-8
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,8 @@ define i32 @load_shr63(i64 %a, i64 %b, ptr %table) {
1919
; CHECK-LABEL: load_shr63:
2020
; CHECK: // %bb.0: // %entry
2121
; CHECK-NEXT: mul x8, x1, x0
22-
; CHECK-NEXT: lsr x8, x8, #61
23-
; CHECK-NEXT: and x8, x8, #0x4
24-
; CHECK-NEXT: ldr w0, [x2, x8]
22+
; CHECK-NEXT: lsr x8, x8, #63
23+
; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
2524
; CHECK-NEXT: ret
2625
entry:
2726
%mul = mul i64 %b, %a
@@ -35,8 +34,8 @@ define i32 @load_shr2(i64 %a, i64 %b, ptr %table) {
3534
; CHECK-LABEL: load_shr2:
3635
; CHECK: // %bb.0: // %entry
3736
; CHECK-NEXT: mul x8, x1, x0
38-
; CHECK-NEXT: and x8, x8, #0xfffffffffffffffc
39-
; CHECK-NEXT: ldr w0, [x2, x8]
37+
; CHECK-NEXT: lsr x8, x8, #2
38+
; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
4039
; CHECK-NEXT: ret
4140
entry:
4241
%mul = mul i64 %b, %a
@@ -50,9 +49,8 @@ define i32 @load_shr1(i64 %a, i64 %b, ptr %table) {
5049
; CHECK-LABEL: load_shr1:
5150
; CHECK: // %bb.0: // %entry
5251
; CHECK-NEXT: mul x8, x1, x0
53-
; CHECK-NEXT: lsl x8, x8, #1
54-
; CHECK-NEXT: and x8, x8, #0xfffffffffffffffc
55-
; CHECK-NEXT: ldr w0, [x2, x8]
52+
; CHECK-NEXT: lsr x8, x8, #1
53+
; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
5654
; CHECK-NEXT: ret
5755
entry:
5856
%mul = mul i64 %b, %a

0 commit comments

Comments
 (0)