diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 195e2a19214e8..3ef3f838f366b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -283,12 +283,22 @@ class AMDGPULowerKernelArgumentsPass PreservedAnalyses run(Function &, FunctionAnalysisManager &); }; +struct AMDGPUAttributorOptions { + bool IsClosedWorld = false; +}; + class AMDGPUAttributorPass : public PassInfoMixin { private: TargetMachine &TM; + AMDGPUAttributorOptions Options; + + /// Asserts whether we can assume whole program visibility. + bool HasWholeProgramVisibility = false; + public: - AMDGPUAttributorPass(TargetMachine &TM) : TM(TM){}; + AMDGPUAttributorPass(TargetMachine &TM, AMDGPUAttributorOptions Options = {}) + : TM(TM), Options(Options) {}; PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 39c52140dfbd2..6cee897959638 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -1023,7 +1023,8 @@ static void addPreloadKernArgHint(Function &F, TargetMachine &TM) { } } -static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) { +static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, + AMDGPUAttributorOptions Options) { SetVector Functions; for (Function &F : M) { if (!F.isIntrinsic()) @@ -1041,6 +1042,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) { &AAUnderlyingObjects::ID, &AAAddressSpace::ID}); AttributorConfig AC(CGUpdater); + AC.IsClosedWorldModule = Options.IsClosedWorld; AC.Allowed = &Allowed; AC.IsModulePass = true; AC.DefaultInitializeLiveInternals = false; @@ -1098,7 +1100,7 @@ class AMDGPUAttributorLegacy : public ModulePass { bool runOnModule(Module &M) override { AnalysisGetter AG(this); - return runImpl(M, AG, *TM); + return runImpl(M, AG, *TM, /*Options=*/{}); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -1119,8 +1121,8 @@ PreservedAnalyses llvm::AMDGPUAttributorPass::run(Module &M, AnalysisGetter AG(FAM); // TODO: Probably preserves CFG - return runImpl(M, AG, TM) ? PreservedAnalyses::none() - : PreservedAnalyses::all(); + return runImpl(M, AG, TM, Options) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); } char AMDGPUAttributorLegacy::ID = 0; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index b6a6c33d85f83..60d1e67259819 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -17,7 +17,6 @@ #define MODULE_PASS(NAME, CREATE_PASS) #endif MODULE_PASS("amdgpu-always-inline", AMDGPUAlwaysInlinePass()) -MODULE_PASS("amdgpu-attributor", AMDGPUAttributorPass(*this)) MODULE_PASS("amdgpu-lower-buffer-fat-pointers", AMDGPULowerBufferFatPointersPass(*this)) MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass()) @@ -26,6 +25,17 @@ MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass()) MODULE_PASS("amdgpu-unify-metadata", AMDGPUUnifyMetadataPass()) #undef MODULE_PASS +#ifndef MODULE_PASS_WITH_PARAMS +#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) +#endif +MODULE_PASS_WITH_PARAMS( + "amdgpu-attributor", "AMDGPUAttributorPass", + [=](AMDGPUAttributorOptions Options) { + return AMDGPUAttributorPass(*this, Options); + }, + parseAMDGPUAttributorPassOptions, "closed-world") +#undef MODULE_PASS_WITH_PARAMS + #ifndef FUNCTION_PASS #define FUNCTION_PASS(NAME, CREATE_PASS) #endif diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index e80daff96c431..1f4264db157b4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -55,6 +55,7 @@ #include "llvm/InitializePasses.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Transforms/HipStdPar/HipStdPar.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" @@ -662,6 +663,24 @@ Error AMDGPUTargetMachine::buildCodeGenPipeline( return CGPB.buildPipeline(MPM, Out, DwoOut, FileType); } +Expected +parseAMDGPUAttributorPassOptions(StringRef Params) { + AMDGPUAttributorOptions Result; + while (!Params.empty()) { + StringRef ParamName; + std::tie(ParamName, Params) = Params.split(';'); + if (ParamName == "closed-world") { + Result.IsClosedWorld = true; + } else { + return make_error( + formatv("invalid AMDGPUAttributor pass parameter '{0}' ", ParamName) + .str(), + inconvertibleErrorCode()); + } + } + return Result; +} + void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { #define GET_PASS_REGISTRY "AMDGPUPassRegistry.def" diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll new file mode 100644 index 0000000000000..c2b43f9e00058 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck --check-prefixes=CHECK,OW %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes='amdgpu-attributor' %s | FileCheck --check-prefixes=CHECK,CW %s + +target datalayout = "A5" + +@G = global i32 0, align 4 + +;. +; CHECK: @G = global i32 0, align 4 +;. +define void @bar() { +; CHECK-LABEL: define {{[^@]+}}@bar +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 1, ptr @G, align 4 +; CHECK-NEXT: ret void +; +entry: + store i32 1, ptr @G, align 4 + ret void +} + +define ptr @helper() { +; CHECK-LABEL: define {{[^@]+}}@helper +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret ptr @bar +; +entry: + ret ptr @bar +} + +define amdgpu_kernel void @foo(ptr noundef %fp) { +; CHECK-LABEL: define {{[^@]+}}@foo +; CHECK-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +; CHECK-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8 +; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(5) [[FP_ADDR]], align 8 +; CHECK-NEXT: call void [[LOAD]]() +; CHECK-NEXT: ret void +; +entry: + %fp.addr = alloca ptr, addrspace(5) + store ptr %fp, ptr addrspace(5) %fp.addr + %load = load ptr, ptr addrspace(5) %fp.addr + call void %load() + ret void +} + +;. +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } +;. +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CW: {{.*}} +; OW: {{.*}} diff --git a/llvm/test/Other/amdgpu-pass-pipeline-parsing.ll b/llvm/test/Other/amdgpu-pass-pipeline-parsing.ll new file mode 100644 index 0000000000000..032c2b9d297bb --- /dev/null +++ b/llvm/test/Other/amdgpu-pass-pipeline-parsing.ll @@ -0,0 +1,12 @@ +; REQUIRES: amdgpu-registered-target + +; RUN: not opt -S -mtriple=amdgcn-amd-amdhsa -passes='amdgpu-attributor' -disable-output %s 2>&1 | FileCheck %s + +; CHECK: amdgpu-attributor: invalid AMDGPUAttributor pass parameter 'random' + +define void @f() { +entry: + br label %loop +loop: + br label %loop +}