Skip to content

Commit 7fd2a71

Browse files
committed
Move all target-specific AA before BasicAA and fix issues in FlattenCFG and AMDGPUAA
1 parent b8ee0aa commit 7fd2a71

File tree

6 files changed

+124
-15
lines changed

6 files changed

+124
-15
lines changed

llvm/lib/Analysis/AliasAnalysis.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -752,6 +752,14 @@ bool AAResultsWrapperPass::runOnFunction(Function &F) {
752752
AAR.reset(
753753
new AAResults(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F)));
754754

755+
// If available, run an external AA providing callback first. Running a
756+
// target-specific AA early can improve compile time by leveraging
757+
// target-specific knowledge to quickly determine some alias results, thereby
758+
// reducing the workload for BasicAA.
759+
if (auto *WrapperPass = getAnalysisIfAvailable<ExternalAAWrapperPass>())
760+
if (WrapperPass->CB)
761+
WrapperPass->CB(*this, F, *AAR);
762+
755763
// BasicAA is always available for function analyses. Also, we add it first
756764
// so that it can trump TBAA results when it proves MustAlias.
757765
// FIXME: TBAA should have an explicit mode to support this and then we
@@ -769,12 +777,6 @@ bool AAResultsWrapperPass::runOnFunction(Function &F) {
769777
if (auto *WrapperPass = getAnalysisIfAvailable<SCEVAAWrapperPass>())
770778
AAR->addAAResult(WrapperPass->getResult());
771779

772-
// If available, run an external AA providing callback over the results as
773-
// well.
774-
if (auto *WrapperPass = getAnalysisIfAvailable<ExternalAAWrapperPass>())
775-
if (WrapperPass->CB)
776-
WrapperPass->CB(*this, F, *AAR);
777-
778780
// Analyses don't mutate the IR, so return false.
779781
return false;
780782
}

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2202,6 +2202,12 @@ AAManager PassBuilder::buildDefaultAAPipeline() {
22022202
// The order in which these are registered determines their priority when
22032203
// being queried.
22042204

2205+
// Add target-specific alias analyses. Running a target-specific AA early can
2206+
// improve compile time by leveraging target-specific knowledge to quickly
2207+
// determine some alias results, thereby reducing the workload for BasicAA.
2208+
if (TM)
2209+
TM->registerDefaultAliasAnalyses(AA);
2210+
22052211
// First we register the basic alias analysis that provides the majority of
22062212
// per-function local AA logic. This is a stateless, on-demand local set of
22072213
// AA techniques.
@@ -2219,9 +2225,5 @@ AAManager PassBuilder::buildDefaultAAPipeline() {
22192225
if (EnableGlobalAnalyses)
22202226
AA.registerModuleAnalysis<GlobalsAA>();
22212227

2222-
// Add target-specific alias analyses.
2223-
if (TM)
2224-
TM->registerDefaultAliasAnalyses(AA);
2225-
22262228
return AA;
22272229
}

llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,12 @@ void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
4949
AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
5050
const MemoryLocation &LocB, AAQueryInfo &AAQI,
5151
const Instruction *) {
52-
unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace();
53-
unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace();
52+
Type* TypeA = LocA.Ptr->getType();
53+
Type* TypeB = LocB.Ptr->getType();
54+
if (!TypeA->isPointerTy() || !TypeB->isPointerTy())
55+
return AliasResult::MayAlias;
56+
unsigned asA = TypeA->getPointerAddressSpace();
57+
unsigned asB = TypeB->getPointerAddressSpace();
5458

5559
if (!AMDGPU::addrspacesMayAlias(asA, asB))
5660
return AliasResult::NoAlias;

llvm/lib/Transforms/Utils/FlattenCFG.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -357,8 +357,12 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Block1, BasicBlock *Block2,
357357
for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) {
358358
if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) {
359359
// Check alias with Head2.
360-
if (!AA || !AA->isNoAlias(&*iter1, &*BI))
361-
return false;
360+
if (AA) {
361+
MemoryLocation Loc1 = MemoryLocation::get(&*iter1);
362+
MemoryLocation Loc2 = MemoryLocation::get(&*BI);
363+
if (!AA->isNoAlias(Loc1, Loc2))
364+
return false;
365+
}
362366
}
363367
}
364368
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
; RUN: opt -aa-pipeline=default -passes='require<aa>' -debug-pass-manager -disable-output -S < %s 2>&1 | FileCheck %s
2+
3+
; Target-specific AA should run before BasicAA to reduce compile time
4+
target triple = "nvptx64-nvidia-cuda"
5+
6+
; CHECK: Running analysis: NVPTXAA on foo
7+
; CHECK-NEXT: Running analysis: BasicAA on foo
8+
define void @foo(){
9+
entry:
10+
ret void
11+
}

llvm/test/Transforms/Util/flatten-cfg.ll

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2-
; RUN: opt -passes=flatten-cfg -S < %s | FileCheck %s
2+
; RUN: opt -passes='require<aa>,flatten-cfg' -S < %s | FileCheck %s
33

44

55
; This test checks whether the pass completes without a crash.
@@ -309,3 +309,89 @@ if.then.y:
309309
exit:
310310
ret i1 %cmp.y
311311
}
312+
313+
; Test that two if-regions are not merged when there's potential aliasing
314+
; between a store in the first if-region and a load in the second if-region's header
315+
define i32 @test_alias(i32 %a, i32 %b, ptr %p1, ptr %p2) {
316+
; CHECK-LABEL: define i32 @test_alias
317+
; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) {
318+
; CHECK-NEXT: entry:
319+
; CHECK-NEXT: store i32 42, ptr [[P1]], align 4
320+
; CHECK-NEXT: [[COND1:%.*]] = icmp eq i32 [[A]], 0
321+
; CHECK-NEXT: br i1 [[COND1]], label [[IF_THEN1:%.*]], label [[IF_END1:%.*]]
322+
; CHECK: if.then1:
323+
; CHECK-NEXT: store i32 100, ptr [[P2]], align 4
324+
; CHECK-NEXT: br label [[IF_END1]]
325+
; CHECK: if.end1:
326+
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[P1]], align 4
327+
; CHECK-NEXT: [[COND2:%.*]] = icmp eq i32 [[B]], 0
328+
; CHECK-NEXT: br i1 [[COND2]], label [[IF_THEN2:%.*]], label [[IF_END2:%.*]]
329+
; CHECK: if.then2:
330+
; CHECK-NEXT: store i32 100, ptr [[P2]], align 4
331+
; CHECK-NEXT: br label [[IF_END2]]
332+
; CHECK: if.end2:
333+
; CHECK-NEXT: ret i32 0
334+
;
335+
entry:
336+
store i32 42, ptr %p1
337+
%cond1 = icmp eq i32 %a, 0
338+
br i1 %cond1, label %if.then1, label %if.end1
339+
340+
if.then1:
341+
store i32 100, ptr %p2 ; May alias with the load below
342+
br label %if.end1
343+
344+
if.end1:
345+
%val = load i32, ptr %p1 ; This load prevents merging due to potential alias
346+
%cond2 = icmp eq i32 %b, 0
347+
br i1 %cond2, label %if.then2, label %if.end2
348+
349+
if.then2:
350+
store i32 100, ptr %p2
351+
br label %if.end2
352+
353+
if.end2:
354+
ret i32 0
355+
}
356+
357+
; Test that two if-regions are merged when there's no potential aliasing
358+
; between a store in the first if-region and a load in the second if-region's header
359+
define i32 @test_no_alias(i32 %a, i32 %b) {
360+
; CHECK-LABEL: define i32 @test_no_alias
361+
; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) {
362+
; CHECK-NEXT: entry:
363+
; CHECK-NEXT: [[P:%.*]] = alloca i32, align 4
364+
; CHECK-NEXT: store i32 42, ptr [[P]], align 4
365+
; CHECK-NEXT: [[COND1:%.*]] = icmp eq i32 [[A]], 0
366+
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr @g, align 4
367+
; CHECK-NEXT: [[COND2:%.*]] = icmp eq i32 [[B]], 0
368+
; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[COND1]], [[COND2]]
369+
; CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN2:%.*]], label [[IF_END2:%.*]]
370+
; CHECK: if.then2:
371+
; CHECK-NEXT: store i32 100, ptr [[P]], align 4
372+
; CHECK-NEXT: br label [[IF_END2]]
373+
; CHECK: if.end2:
374+
; CHECK-NEXT: ret i32 0
375+
;
376+
entry:
377+
%p = alloca i32
378+
store i32 42, ptr %p
379+
%cond1 = icmp eq i32 %a, 0
380+
br i1 %cond1, label %if.then1, label %if.end1
381+
382+
if.then1:
383+
store i32 100, ptr %p ; No alias with the load below
384+
br label %if.end1
385+
386+
if.end1:
387+
%val = load i32, ptr @g
388+
%cond2 = icmp eq i32 %b, 0
389+
br i1 %cond2, label %if.then2, label %if.end2
390+
391+
if.then2:
392+
store i32 100, ptr %p
393+
br label %if.end2
394+
395+
if.end2:
396+
ret i32 0
397+
}

0 commit comments

Comments
 (0)