Skip to content

Commit c415b83

Browse files
pow2clktmsri
authored andcommitted
[HLSL] set alwaysinline on HLSL functions (llvm#106588)
HLSL inlines all its functions by default. This uses the alwaysinline attribute to make the alwaysinliner pass inline any function not explicitly marked noinline by the user or autogeneration. The alwayslinline marking takes place in `SetLLVMFunctionAttributesForDefinitions` where all other inlining interactions are determined. The outermost entry function is marked noinline because there's no reason to inline it. Any user calls to an entry function will instead call the internal mangled version of the entry function. Adds tests for function and constructor inlining and augments some existing tests to verify correct inlining of implicitly created functions as well. Incidentally restore RUN line that I believe was mistakenly removed as part of llvm#88918 Fixes llvm#89282
1 parent c5550fc commit c415b83

9 files changed

+282
-42
lines changed

clang/lib/CodeGen/CGHLSLRuntime.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes(
338338
NumThreadsAttr->getZ());
339339
Fn->addFnAttr(NumThreadsKindStr, NumThreadsStr);
340340
}
341+
Fn->addFnAttr(llvm::Attribute::NoInline);
341342
}
342343

343344
static Value *buildVectorInput(IRBuilder<> &B, Function *F, llvm::Type *Ty) {

clang/lib/CodeGen/CodeGenModule.cpp

+14-6
Original file line numberDiff line numberDiff line change
@@ -2473,11 +2473,14 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
24732473
B.addAttribute(llvm::Attribute::StackProtectReq);
24742474

24752475
if (!D) {
2476+
// Non-entry HLSL functions must always be inlined.
2477+
if (getLangOpts().HLSL && !F->hasFnAttribute(llvm::Attribute::NoInline))
2478+
B.addAttribute(llvm::Attribute::AlwaysInline);
24762479
// If we don't have a declaration to control inlining, the function isn't
24772480
// explicitly marked as alwaysinline for semantic reasons, and inlining is
24782481
// disabled, mark the function as noinline.
2479-
if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) &&
2480-
CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining)
2482+
else if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) &&
2483+
CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining)
24812484
B.addAttribute(llvm::Attribute::NoInline);
24822485

24832486
F->addFnAttrs(B);
@@ -2504,9 +2507,13 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
25042507
ShouldAddOptNone &= !D->hasAttr<MinSizeAttr>();
25052508
ShouldAddOptNone &= !D->hasAttr<AlwaysInlineAttr>();
25062509

2507-
// Add optnone, but do so only if the function isn't always_inline.
2508-
if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&
2509-
!F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
2510+
// Non-entry HLSL functions must always be inlined.
2511+
if (getLangOpts().HLSL && !F->hasFnAttribute(llvm::Attribute::NoInline) &&
2512+
!D->hasAttr<NoInlineAttr>()) {
2513+
B.addAttribute(llvm::Attribute::AlwaysInline);
2514+
} else if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&
2515+
!F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
2516+
// Add optnone, but do so only if the function isn't always_inline.
25102517
B.addAttribute(llvm::Attribute::OptimizeNone);
25112518

25122519
// OptimizeNone implies noinline; we should not be inlining such functions.
@@ -2526,7 +2533,8 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
25262533
B.addAttribute(llvm::Attribute::NoInline);
25272534
} else if (D->hasAttr<NoDuplicateAttr>()) {
25282535
B.addAttribute(llvm::Attribute::NoDuplicate);
2529-
} else if (D->hasAttr<NoInlineAttr>() && !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
2536+
} else if (D->hasAttr<NoInlineAttr>() &&
2537+
!F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
25302538
// Add noinline if the function isn't always_inline.
25312539
B.addAttribute(llvm::Attribute::NoInline);
25322540
} else if (D->hasAttr<AlwaysInlineAttr>() &&
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
2+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
23

34
int i;
45

@@ -7,7 +8,7 @@ __attribute__((constructor)) void call_me_first(void) {
78
}
89

910
__attribute__((constructor)) void then_call_me(void) {
10-
i = 12;
11+
i = 13;
1112
}
1213

1314
__attribute__((destructor)) void call_me_last(void) {
@@ -21,11 +22,21 @@ void main(unsigned GI : SV_GroupIndex) {}
2122
// CHECK-NOT:@llvm.global_ctors
2223
// CHECK-NOT:@llvm.global_dtors
2324

24-
//CHECK: define void @main()
25-
//CHECK-NEXT: entry:
26-
//CHECK-NEXT: call void @"?call_me_first@@YAXXZ"()
27-
//CHECK-NEXT: call void @"?then_call_me@@YAXXZ"()
28-
//CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
29-
//CHECK-NEXT: call void @"?main@@YAXI@Z"(i32 %0)
30-
//CHECK-NEXT: call void @"?call_me_last@@YAXXZ"(
31-
//CHECK-NEXT: ret void
25+
// CHECK: define void @main()
26+
// CHECK-NEXT: entry:
27+
// Verify function constructors are emitted
28+
// NOINLINE-NEXT: call void @"?call_me_first@@YAXXZ"()
29+
// NOINLINE-NEXT: call void @"?then_call_me@@YAXXZ"()
30+
// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
31+
// NOINLINE-NEXT: call void @"?main@@YAXI@Z"(i32 %0)
32+
// NOINLINE-NEXT: call void @"?call_me_last@@YAXXZ"(
33+
// NOINLINE-NEXT: ret void
34+
35+
// Verify constructor calls are inlined when AlwaysInline is run
36+
// INLINE-NEXT: alloca
37+
// INLINE-NEXT: store i32 12
38+
// INLINE-NEXT: store i32 13
39+
// INLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
40+
// INLINE-NEXT: store i32 %
41+
// INLINE-NEXT: store i32 0
42+
// INLINE: ret void

clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl

+19-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
2+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
23

34
// Make sure global variable for ctors exist for lib profile.
45
// CHECK:@llvm.global_ctors
@@ -11,13 +12,27 @@ void FirstEntry() {}
1112

1213
// CHECK: define void @FirstEntry()
1314
// CHECK-NEXT: entry:
14-
// CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
15+
// NOINLINE-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
16+
// NOINLINE-NEXT: call void @"?FirstEntry@@YAXXZ"()
17+
// Verify inlining leaves only calls to "llvm." intrinsics
18+
// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
19+
// CHECK: ret void
1520

1621
[shader("compute")]
1722
[numthreads(1,1,1)]
1823
void SecondEntry() {}
1924

2025
// CHECK: define void @SecondEntry()
2126
// CHECK-NEXT: entry:
22-
// CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
23-
// CHECK-NEXT: call void @"?SecondEntry@@YAXXZ"()
27+
// NOINLINE-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
28+
// NOINLINE-NEXT: call void @"?SecondEntry@@YAXXZ"()
29+
// Verify inlining leaves only calls to "llvm." intrinsics
30+
// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
31+
// CHECK: ret void
32+
33+
34+
// Verify the constructor is alwaysinline
35+
// NOINLINE: ; Function Attrs: {{.*}}alwaysinline
36+
// NOINLINE-NEXT: define internal void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() [[IntAttr:\#[0-9]+]]
37+
38+
// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline

clang/test/CodeGenHLSL/GlobalDestructors.hlsl

+31-20
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,18 @@
1-
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,CHECK
2-
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,CHECK
1+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,NOINLINE,CHECK
2+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,NOINLINE,CHECK
3+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
4+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
35

4-
// Make sure global variable for dtors exist for lib profile.
6+
// Tests that constructors and destructors are appropriately generated for globals
7+
// and that their calls are inlined when AlwaysInline is run
8+
// but global variables are retained for the library profiles
9+
10+
// Make sure global variable for ctors/dtors exist for lib profile.
11+
// LIB:@llvm.global_ctors
512
// LIB:@llvm.global_dtors
6-
// Make sure global variable for dtors removed for compute profile.
7-
// CS-NOT:llvm.global_dtors
13+
// Make sure global variable for ctors/dtors removed for compute profile.
14+
// CS-NOT:@llvm.global_ctors
15+
// CS-NOT:@llvm.global_dtors
816

917
struct Tail {
1018
Tail() {
@@ -46,22 +54,25 @@ void main(unsigned GI : SV_GroupIndex) {
4654
Wag();
4755
}
4856

49-
// Make sure global variable for ctors/dtors removed.
50-
// CHECK-NOT:@llvm.global_ctors
51-
// CHECK-NOT:@llvm.global_dtors
52-
//CHECK: define void @main()
53-
//CHECK-NEXT: entry:
54-
//CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalDestructors.hlsl()
55-
//CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
56-
//CHECK-NEXT: call void @"?main@@YAXI@Z"(i32 %0)
57-
//CHECK-NEXT: call void @_GLOBAL__D_a()
58-
//CHECK-NEXT: ret void
57+
// CHECK: define void @main()
58+
// CHECK-NEXT: entry:
59+
// Verify destructor is emitted
60+
// NOINLINE-NEXT: call void @_GLOBAL__sub_I_GlobalDestructors.hlsl()
61+
// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
62+
// NOINLINE-NEXT: call void @"?main@@YAXI@Z"(i32 %0)
63+
// NOINLINE-NEXT: call void @_GLOBAL__D_a()
64+
// NOINLINE-NEXT: ret void
65+
// Verify inlining leaves only calls to "llvm." intrinsics
66+
// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
67+
// INLINE: ret void
5968

6069
// This is really just a sanity check I needed for myself to verify that
6170
// function scope static variables also get destroyed properly.
6271

63-
//CHECK: define internal void @_GLOBAL__D_a()
64-
//CHECK-NEXT: entry:
65-
//CHECK-NEXT: call void @"??1Tail@@QAA@XZ"(ptr @"?T@?1??Wag@@YAXXZ@4UTail@@A")
66-
//CHECK-NEXT: call void @"??1Pupper@@QAA@XZ"(ptr @"?GlobalPup@@3UPupper@@A")
67-
//CHECK-NEXT: ret void
72+
// NOINLINE: define internal void @_GLOBAL__D_a() [[IntAttr:\#[0-9]+]]
73+
// NOINLINE-NEXT: entry:
74+
// NOINLINE-NEXT: call void @"??1Tail@@QAA@XZ"(ptr @"?T@?1??Wag@@YAXXZ@4UTail@@A")
75+
// NOINLINE-NEXT: call void @"??1Pupper@@QAA@XZ"(ptr @"?GlobalPup@@3UPupper@@A")
76+
// NOINLINE-NEXT: ret void
77+
78+
// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline

clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
12
// RUN: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=CHECK-SPIRV
23

34
RWBuffer<float> Buf;

clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl

+3-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ void main(unsigned GI : SV_GroupIndex) {
1111
// Even at -O0 the subscript operators get inlined. The -O0 IR is a bit messy
1212
// and confusing to follow so the match here is pretty weak.
1313

14-
// CHECK: define internal void @"?main@@YAXI@Z"
15-
// CHECK-NOT: call
14+
// CHECK: define void @main()
15+
// Verify inlining leaves only calls to "llvm." intrinsics
16+
// CHECK-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
1617
// CHECK: ret void
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
2+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
3+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
4+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
5+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
6+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
7+
8+
// Tests that implicit constructor calls for user classes will always be inlined.
9+
10+
struct Weed {
11+
Weed() {Count += 1;}
12+
[[maybe_unused]] void pull() {Count--;}
13+
static int weedCount() { return Count; }
14+
private:
15+
static int Count;
16+
17+
} YardWeeds;
18+
19+
int Weed::Count = 1; // It begins. . .
20+
21+
struct Kitty {
22+
unsigned burrsInFur;
23+
24+
Kitty() {
25+
burrsInFur = 0;
26+
}
27+
28+
void wanderInYard(int hours) {
29+
burrsInFur = hours*Weed::weedCount()/8;
30+
}
31+
32+
void lick() {
33+
if(burrsInFur) {
34+
burrsInFur--;
35+
Weed w;
36+
}
37+
}
38+
39+
} Nion;
40+
41+
void NionsDay(int hours) {
42+
static Kitty Nion;
43+
Nion.wanderInYard(hours);
44+
while(Nion.burrsInFur) Nion.lick();
45+
}
46+
47+
// CHECK: define void @main()
48+
// CHECK-NEXT: entry:
49+
// Verify constructor is emitted
50+
// NOINLINE-NEXT: call void @_GLOBAL__sub_I_inline_constructors.hlsl()
51+
// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
52+
// NOINLINE-NEXT: call void @"?main@@YAXI@Z"(i32 %0)
53+
// Verify inlining leaves only calls to "llvm." intrinsics
54+
// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
55+
// CHECK: ret void
56+
[shader("compute")]
57+
[numthreads(1,1,1)]
58+
void main(unsigned GI : SV_GroupIndex) {
59+
NionsDay(10);
60+
}
61+
62+
63+
// CHECK: define void @rainyMain()
64+
// CHECK-NEXT: entry:
65+
// Verify constructor is emitted
66+
// NOINLINE-NEXT: call void @_GLOBAL__sub_I_inline_constructors.hlsl()
67+
// NOINLINE-NEXT: call void @"?rainyMain@@YAXXZ"()
68+
// Verify inlining leaves only calls to "llvm." intrinsics
69+
// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
70+
// CHECK: ret void
71+
[shader("compute")]
72+
[numthreads(1,1,1)]
73+
void rainyMain() {
74+
NionsDay(1);
75+
}
76+

0 commit comments

Comments
 (0)