diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h index 0f20a33f3a755..7990997835d01 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h @@ -35,11 +35,23 @@ struct LegalityQuery; class MachineRegisterInfo; namespace GISelAddressing { /// Helper struct to store a base, index and offset that forms an address -struct BaseIndexOffset { +class BaseIndexOffset { +private: Register BaseReg; Register IndexReg; - int64_t Offset = 0; - bool IsIndexSignExt = false; + std::optional Offset; + +public: + BaseIndexOffset() = default; + Register getBase() { return BaseReg; } + Register getBase() const { return BaseReg; } + Register getIndex() { return IndexReg; } + Register getIndex() const { return IndexReg; } + void setBase(Register NewBase) { BaseReg = NewBase; } + void setIndex(Register NewIndex) { IndexReg = NewIndex; } + void setOffset(std::optional NewOff) { Offset = NewOff; } + bool hasValidOffset() const { return Offset.has_value(); } + int64_t getOffset() const { return *Offset; } }; /// Returns a BaseIndexOffset which describes the pointer in \p Ptr. @@ -89,7 +101,7 @@ class LoadStoreOpt : public MachineFunctionPass { // order stores are writing to incremeneting consecutive addresses. So when // we walk the block in reverse order, the next eligible store must write to // an offset one store width lower than CurrentLowestOffset. - uint64_t CurrentLowestOffset; + int64_t CurrentLowestOffset; SmallVector Stores; // A vector of MachineInstr/unsigned pairs to denote potential aliases that // need to be checked before the candidate is considered safe to merge. The diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp index 246aa88b09acf..ee499c41c558c 100644 --- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -84,21 +84,20 @@ BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr, MachineRegisterInfo &MRI) { BaseIndexOffset Info; Register PtrAddRHS; - if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(Info.BaseReg), m_Reg(PtrAddRHS)))) { - Info.BaseReg = Ptr; - Info.IndexReg = Register(); - Info.IsIndexSignExt = false; + Register BaseReg; + if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(BaseReg), m_Reg(PtrAddRHS)))) { + Info.setBase(Ptr); + Info.setOffset(0); return Info; } - + Info.setBase(BaseReg); auto RHSCst = getIConstantVRegValWithLookThrough(PtrAddRHS, MRI); if (RHSCst) - Info.Offset = RHSCst->Value.getSExtValue(); + Info.setOffset(RHSCst->Value.getSExtValue()); // Just recognize a simple case for now. In future we'll need to match // indexing patterns for base + index + constant. - Info.IndexReg = PtrAddRHS; - Info.IsIndexSignExt = false; + Info.setIndex(PtrAddRHS); return Info; } @@ -114,15 +113,16 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1, BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI); BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI); - if (!BasePtr0.BaseReg.isValid() || !BasePtr1.BaseReg.isValid()) + if (!BasePtr0.getBase().isValid() || !BasePtr1.getBase().isValid()) return false; int64_t Size1 = LdSt1->getMemSize(); int64_t Size2 = LdSt2->getMemSize(); int64_t PtrDiff; - if (BasePtr0.BaseReg == BasePtr1.BaseReg) { - PtrDiff = BasePtr1.Offset - BasePtr0.Offset; + if (BasePtr0.getBase() == BasePtr1.getBase() && BasePtr0.hasValidOffset() && + BasePtr1.hasValidOffset()) { + PtrDiff = BasePtr1.getOffset() - BasePtr0.getOffset(); // If the size of memory access is unknown, do not use it to do analysis. // One example of unknown size memory access is to load/store scalable // vector objects on the stack. @@ -151,8 +151,8 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1, // able to calculate their relative offset if at least one arises // from an alloca. However, these allocas cannot overlap and we // can infer there is no alias. - auto *Base0Def = getDefIgnoringCopies(BasePtr0.BaseReg, MRI); - auto *Base1Def = getDefIgnoringCopies(BasePtr1.BaseReg, MRI); + auto *Base0Def = getDefIgnoringCopies(BasePtr0.getBase(), MRI); + auto *Base1Def = getDefIgnoringCopies(BasePtr1.getBase(), MRI); if (!Base0Def || !Base1Def) return false; // Couldn't tell anything. @@ -520,16 +520,20 @@ bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI, Register StoreAddr = StoreMI.getPointerReg(); auto BIO = getPointerInfo(StoreAddr, *MRI); - Register StoreBase = BIO.BaseReg; - uint64_t StoreOffCst = BIO.Offset; + Register StoreBase = BIO.getBase(); if (C.Stores.empty()) { + C.BasePtr = StoreBase; + if (!BIO.hasValidOffset()) { + C.CurrentLowestOffset = 0; + } else { + C.CurrentLowestOffset = BIO.getOffset(); + } // This is the first store of the candidate. // If the offset can't possibly allow for a lower addressed store with the // same base, don't bother adding it. - if (StoreOffCst < ValueTy.getSizeInBytes()) + if (BIO.hasValidOffset() && + BIO.getOffset() < static_cast(ValueTy.getSizeInBytes())) return false; - C.BasePtr = StoreBase; - C.CurrentLowestOffset = StoreOffCst; C.Stores.emplace_back(&StoreMI); LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: " << StoreMI); @@ -549,8 +553,12 @@ bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI, // writes to the next lowest adjacent address. if (C.BasePtr != StoreBase) return false; - if ((C.CurrentLowestOffset - ValueTy.getSizeInBytes()) != - static_cast(StoreOffCst)) + // If we don't have a valid offset, we can't guarantee to be an adjacent + // offset. + if (!BIO.hasValidOffset()) + return false; + if ((C.CurrentLowestOffset - + static_cast(ValueTy.getSizeInBytes())) != BIO.getOffset()) return false; // This writes to an adjacent address. Allow it. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll index 07744dada4f1f..b1166e683ec74 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll @@ -323,3 +323,22 @@ define i32 @test_alias_3xs16(ptr %ptr, ptr %ptr2, ptr %ptr3, ptr noalias %safe_p store i32 14, ptr %addr4 ret i32 %safeld } + +@G = external global [10 x i32] + +define void @invalid_zero_offset_no_merge(i64 %0) { +; CHECK-LABEL: invalid_zero_offset_no_merge: +; CHECK: ; %bb.0: +; CHECK-NEXT: Lloh0: +; CHECK-NEXT: adrp x8, _G@GOTPAGE +; CHECK-NEXT: Lloh1: +; CHECK-NEXT: ldr x8, [x8, _G@GOTPAGEOFF] +; CHECK-NEXT: str wzr, [x8, x0, lsl #2] +; CHECK-NEXT: str wzr, [x8, #4] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdrGot Lloh0, Lloh1 + %2 = getelementptr [10 x i32], ptr @G, i64 0, i64 %0 + store i32 0, ptr %2, align 4 + store i32 0, ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1), align 4 + ret void +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir index e98e1ce599f2f..69457a8cc0c19 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir @@ -162,6 +162,13 @@ ret void } + @G = external global [10 x i32] + define void @invalid_zero_offset_no_merge(i64 %0) { + %2 = getelementptr [10 x i32], ptr @G, i64 0, i64 %0 + store i32 0, ptr %2, align 4 + store i32 0, ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1), align 4 + ret void + } ... --- name: test_simple_2xs8 @@ -582,13 +589,11 @@ liveins: frameInfo: maxAlignment: 1 machineFunctionInfo: {} +# The store to ptr2 prevents merging into a single store. +# We can still merge the stores into addr1 and addr2. body: | bb.1 (%ir-block.0): liveins: $x0, $x1 - - ; The store to ptr2 prevents merging into a single store. - ; We can still merge the stores into addr1 and addr2. - ; CHECK-LABEL: name: test_alias_4xs16 ; CHECK: liveins: $x0, $x1 ; CHECK-NEXT: {{ $}} @@ -639,10 +644,10 @@ liveins: frameInfo: maxAlignment: 1 machineFunctionInfo: {} +# Here store of 5 and 9 can be merged, others have aliasing barriers. body: | bb.1 (%ir-block.0): liveins: $x0, $x1, $x2 - ; Here store of 5 and 9 can be merged, others have aliasing barriers. ; CHECK-LABEL: name: test_alias2_4xs16 ; CHECK: liveins: $x0, $x1, $x2 ; CHECK-NEXT: {{ $}} @@ -698,12 +703,11 @@ liveins: frameInfo: maxAlignment: 1 machineFunctionInfo: {} +# No merging can be done here. body: | bb.1 (%ir-block.0): liveins: $x0, $x1, $x2, $x3 - ; No merging can be done here. - ; CHECK-LABEL: name: test_alias3_4xs16 ; CHECK: liveins: $x0, $x1, $x2, $x3 ; CHECK-NEXT: {{ $}} @@ -767,12 +771,10 @@ stack: - { id: 0, name: a1, size: 24, alignment: 4 } - { id: 1, name: a2, size: 4, alignment: 4 } machineFunctionInfo: {} +# Can merge because the load is from a different alloca and can't alias. body: | bb.1 (%ir-block.0): liveins: $x0 - - ; Can merge because the load is from a different alloca and can't alias. - ; CHECK-LABEL: name: test_alias_allocas_2xs32 ; CHECK: liveins: $x0 ; CHECK-NEXT: {{ $}} @@ -826,3 +828,43 @@ body: | RET_ReallyLR ... +--- +name: invalid_zero_offset_no_merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.1): + liveins: $x0 + + ; CHECK-LABEL: name: invalid_zero_offset_no_merge + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @G + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[SHL]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.2) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[GV]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into `ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1)`) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s64) = COPY $x0 + %9:_(s64) = G_CONSTANT i64 2 + %3:_(s64) = G_SHL %0, %9(s64) + %1:_(p0) = G_GLOBAL_VALUE @G + %4:_(p0) = G_PTR_ADD %1, %3(s64) + %6:_(s32) = G_CONSTANT i32 0 + G_STORE %6(s32), %4(p0) :: (store (s32) into %ir.2) + %8:_(s64) = G_CONSTANT i64 4 + %7:_(p0) = nuw G_PTR_ADD %1, %8(s64) + G_STORE %6(s32), %7(p0) :: (store (s32) into `ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1)`) + RET_ReallyLR + +...