Skip to content

[ThinLTO][TypeProf] Import local-linkage global var for mod1:func_foo-> mod2:local-var edge #100448

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 40 additions & 25 deletions compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
// ld.lld: error: /lib/../lib64/Scrt1.o: ABI version 1 is not supported
// UNSUPPORTED: ppc && host-byteorder-big-endian

// RUN: rm -rf %t && mkdir %t && cd %t
// RUN: rm -rf %t && mkdir %t && split-file %s %t && cd %t

// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -fprofile-generate=. -mllvm -enable-vtable-value-profiling %s -o test
// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -fprofile-generate=. -mllvm -enable-vtable-value-profiling lib.cpp main.cpp -o test
// RUN: env LLVM_PROFILE_FILE=test.profraw ./test

// Show vtable profiles from raw profile.
Expand Down Expand Up @@ -37,23 +37,23 @@
// COMMON-NEXT: Number of instrumented vtables: 2
// RAW: Indirect Target Results:
// RAW-NEXT: [ 0, _ZN8Derived14funcEii, 50 ] (25.00%)
// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%)
// RAW-NEXT: [ 0, {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%)
// RAW-NEXT: [ 1, _ZN8Derived1D0Ev, 250 ] (25.00%)
// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%)
// RAW-NEXT: [ 1, {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%)
// RAW-NEXT: VTable Results:
// RAW-NEXT: [ 0, _ZTV8Derived1, 50 ] (25.00%)
// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%)
// RAW-NEXT: [ 0, {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%)
// RAW-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%)
// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
// RAW-NEXT: [ 1, {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
// INDEXED: Indirect Target Results:
// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%)
// INDEXED-NEXT: [ 0, {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%)
// INDEXED-NEXT: [ 0, _ZN8Derived14funcEii, 50 ] (25.00%)
// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%)
// INDEXED-NEXT: [ 1, {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%)
// INDEXED-NEXT: [ 1, _ZN8Derived1D0Ev, 250 ] (25.00%)
// INDEXED-NEXT: VTable Results:
// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%)
// INDEXED-NEXT: [ 0, {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%)
// INDEXED-NEXT: [ 0, _ZTV8Derived1, 50 ] (25.00%)
// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
// INDEXED-NEXT: [ 1, {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
// INDEXED-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%)
// COMMON: Instrumentation level: IR entry_first = 0
// COMMON-NEXT: Functions shown: 1
Expand Down Expand Up @@ -93,27 +93,27 @@
// ICTEXT: # NumValueSites:
// ICTEXT: 2
// ICTEXT: 2
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii:150
// ICTEXT: {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived24funcEii:150
// ICTEXT: _ZN8Derived14funcEii:50
// ICTEXT: 2
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev:750
// ICTEXT: {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev:750
// ICTEXT: _ZN8Derived1D0Ev:250
// ICTEXT: # ValueKind = IPVK_VTableTarget:
// ICTEXT: 2
// ICTEXT: # NumValueSites:
// ICTEXT: 2
// ICTEXT: 2
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:150
// ICTEXT: {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E:150
// ICTEXT: _ZTV8Derived1:50
// ICTEXT: 2
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750
// ICTEXT: {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750
// ICTEXT: _ZTV8Derived1:250

// When vtable value profiles exist, pgo-instr-use pass should annotate them
// even if `-enable-vtable-value-profiling` is not explicitly on.
// RUN: %clangxx -m64 -fprofile-use=test.profdata -fuse-ld=lld -O2 \
// RUN: -mllvm -print-after=pgo-instr-use -mllvm -filter-print-funcs=main \
// RUN: -mllvm -print-module-scope %s 2>&1 | FileCheck %s --check-prefix=ANNOTATE
// RUN: -mllvm -print-module-scope lib.cpp main.cpp 2>&1 | FileCheck %s --check-prefix=ANNOTATE

// ANNOTATE-NOT: Inconsistent number of value sites
// ANNOTATE: !{!"VP", i32 2
Expand All @@ -122,7 +122,7 @@
// if `-icp-max-num-vtables` is set to zero.
// RUN: %clangxx -m64 -fprofile-use=test.profdata -fuse-ld=lld -O2 \
// RUN: -mllvm -icp-max-num-vtables=0 -mllvm -print-after=pgo-instr-use \
// RUN: -mllvm -filter-print-funcs=main -mllvm -print-module-scope %s 2>&1 | \
// RUN: -mllvm -filter-print-funcs=main -mllvm -print-module-scope lib.cpp main.cpp 2>&1 | \
// RUN: FileCheck %s --check-prefix=OMIT

// OMIT: Inconsistent number of value sites
Expand All @@ -141,28 +141,29 @@
// RUN: -g -flto=thin -fwhole-program-vtables -fno-split-lto-unit -O2 \
// RUN: -mllvm -enable-vtable-value-profiling -Wl,-mllvm,-enable-vtable-value-profiling \
// RUN: -mllvm -enable-vtable-profile-use \
// RUN: -Wl,-plugin-opt,-import-assume-unique-local \
// RUN: -Wl,-mllvm,-enable-vtable-profile-use -Rpass=pgo-icall-prom \
// RUN: -Wl,-mllvm,-print-after=pgo-icall-prom \
// RUN: -Wl,-mllvm,-filter-print-funcs=main %s 2>&1 \
// RUN: -Wl,-mllvm,-filter-print-funcs=main lib.cpp main.cpp 2>&1 \
// RUN: | FileCheck %s --check-prefixes=REMARK,IR --implicit-check-not="!VP"

// For the indirect call site `ptr->func`
// REMARK: instrprof-vtable-value-prof.cpp:226:19: Promote indirect call to _ZN12_GLOBAL__N_18Derived24funcEii with count 150 out of 200, sink 1 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E}
// REMARK: instrprof-vtable-value-prof.cpp:226:19: Promote indirect call to _ZN8Derived14funcEii with count 50 out of 50, sink 1 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1}
// REMARK: main.cpp:10:19: Promote indirect call to _ZN12_GLOBAL__N_18Derived24funcEii.llvm.{{.*}} with count 150 out of 200, sink 1 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E.llvm.{{.*}}}
// REMARK: main.cpp:10:19: Promote indirect call to _ZN8Derived14funcEii with count 50 out of 50, sink 1 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1}
//
// For the indirect call site `delete ptr`
// REMARK: instrprof-vtable-value-prof.cpp:228:5: Promote indirect call to _ZN12_GLOBAL__N_18Derived2D0Ev with count 750 out of 1000, sink 2 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E}
// REMARK: instrprof-vtable-value-prof.cpp:228:5: Promote indirect call to _ZN8Derived1D0Ev with count 250 out of 250, sink 2 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1}
// REMARK: main.cpp:12:5: Promote indirect call to _ZN12_GLOBAL__N_18Derived2D0Ev.llvm.{{.*}} with count 750 out of 1000, sink 2 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E.llvm.{{.*}}}
// REMARK: main.cpp:12:5: Promote indirect call to _ZN8Derived1D0Ev with count 250 out of 250, sink 2 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1}

// The IR matchers for indirect callsite `ptr->func`.
// IR-LABEL: @main
// IR: [[OBJ:%.*]] = {{.*}}call {{.*}} @_Z10createTypei
// IR: [[VTABLE:%.*]] = load ptr, ptr [[OBJ]]
// IR: [[CMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTVN12_GLOBAL__N_18Derived2E, i32 16)
// IR: [[CMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTVN12_GLOBAL__N_18Derived2E.llvm.{{.*}}, i32 16)
// IR: br i1 [[CMP1]], label %[[BB1:.*]], label %[[BB2:[a-zA-Z0-9_.]+]],
//
// IR: [[BB1]]:
// IR: [[RESBB1:%.*]] = {{.*}}call {{.*}} @_ZN12_GLOBAL__N_18Derived24funcEii
// IR: [[RESBB1:%.*]] = {{.*}}call {{.*}} @_ZN12_GLOBAL__N_18Derived24funcEii.llvm.{{.*}}
// IR: br label %[[MERGE0:[a-zA-Z0-9_.]+]]
//
// IR: [[BB2]]:
Expand All @@ -185,6 +186,7 @@
// IR: [[MERGE0]]:
// IR: [[RES2:%.*]] = phi i32 [ [[RES1]], %[[MERGE1]] ], [ [[RESBB1]], %[[BB1]] ]

//--- lib.h
#include <stdio.h>
#include <stdlib.h>
class Base {
Expand All @@ -193,12 +195,19 @@ class Base {

virtual ~Base() {};
};

class Derived1 : public Base {
public:
int func(int a, int b) override { return a * b; }
int func(int a, int b) override;

~Derived1() {}
};

__attribute__((noinline)) Base *createType(int a);

//--- lib.cpp
#include "lib.h"

namespace {
class Derived2 : public Base {
public:
Expand All @@ -207,7 +216,10 @@ class Derived2 : public Base {
~Derived2() {}
};
} // namespace
__attribute__((noinline)) Base *createType(int a) {

int Derived1::func(int a, int b) { return a * b; }

Base *createType(int a) {
Base *base = nullptr;
if (a % 4 == 0)
base = new Derived1();
Expand All @@ -216,6 +228,9 @@ __attribute__((noinline)) Base *createType(int a) {
return base;
}

//--- main.cpp
#include "lib.h"

int main(int argc, char **argv) {
int sum = 0;
for (int i = 0; i < 1000; i++) {
Expand Down
71 changes: 45 additions & 26 deletions llvm/lib/Transforms/IPO/FunctionImport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,17 @@ static cl::opt<std::string> WorkloadDefinitions(
"}"),
cl::Hidden);

static cl::opt<bool> ImportAssumeUniqueLocal(
"import-assume-unique-local", cl::init(false),
cl::desc(
"By default, a local-linkage global variable won't be imported in the "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can the local var be promoted to global to enable importing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As shown in the updated test, with this option on, when the local-var gets imported, it will have a global name <vtable>.llvm.<mod-hash>.

"edge mod1:func -> mod2:local-var (from value profiles) since compiler "
"cannot assume mod2 is compiled with full path which gives local-var a "
"program-wide unique GUID. Set this option to true will help cross "
"module import of such variables. This is only safe if the compiler "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am trying to learn more about this space. Can you elaborate on "This is only safe if the compiler user specify the full module path." How does user specify it? I looked at the test and it's not clear to me from it.

Copy link
Contributor Author

@mingmingl-llvm mingmingl-llvm Jul 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does user specify it?

In a monolithic repository (https://research.google/pubs/why-google-stores-billions-of-lines-of-code-in-a-single-repository/) of source code, user specifies the full path in the compile options. In practice, bazel (https://bazel.build/) configures the build rules and guarantees full path is used. Users invoke bazel to compile an executable.

Using the folder structure below as an example [1], clang -O2 <other-options> /tmp/src1/a.cpp /tmp/src1/b.cpp /tmp/src2/a.cpp /tmp/src2/b.cpp /tmp/src2/c.cpp gives the full path, and cd /tmp/src1; clang -O2 a.cpp b.cpp <other arguments to specify paths in src2> doesn't.

I looked at the test and it's not clear to me from it.

The purpose of the test is to illustrate and provide coverage for profile-gen and its use, so I saved the extra work to use full path there.

[1]

/tmp/
|── src1/
│   ├── a.cpp
│   ├── b.cpp
├── src2/
│   ├── a.cpp
│   ├── b.cpp
│   └──c.cpp

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test program itself is correct as there are only two files and they don't have conflict names.

I'm open to update it to use the full path if that is preferred or better.

Copy link
Contributor Author

@mingmingl-llvm mingmingl-llvm Aug 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI, I double checked whether compiler correctness can rely on the fact that full file paths are used. My takeaway is that bazel doesn't fail the executable compilation if local paths are used (if any in a unconventional way) so I don't plan to turn on import-assume-unique-local.

Yet still NumDefs == 1 condition (https://github.com/llvm/llvm-project/pull/100448/files#diff-e7cb370fee46f0f773f2b5429dfab36b75126d3909ae98ee87ff3d0e3f75c6e9R215) should hold true for many internal-linkage vtables as long as full paths are indeed used.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotcha, thx for explanation.

"user specify the full module path."),
cl::Hidden);

namespace llvm {
extern cl::opt<bool> EnableMemProfContextDisambiguation;
}
Expand All @@ -196,6 +207,23 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName,
return Result;
}

static bool shouldSkipLocalInAnotherModule(const GlobalVarSummary *RefSummary,
size_t NumDefs,
StringRef ImporterModule) {
// We can import a local from another module if all inputs are compiled
// with full paths or when there is one definition.
if (ImportAssumeUniqueLocal || NumDefs == 1)
return false;
// In other cases, make sure we import the copy in the caller's module if the
// referenced value has local linkage. The only time a local variable can
// share an entry in the index is if there is a local with the same name in
// another module that had the same source file name (in a different
// directory), where each was compiled in their own directory so there was not
// distinguishing path.
return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
RefSummary->modulePath() != ImporterModule;
}

/// Given a list of possible callee implementation for a call site, qualify the
/// legality of importing each. The return is a range of pairs. Each pair
/// corresponds to a candidate. The first value is the ImportFailureReason for
Expand Down Expand Up @@ -228,19 +256,21 @@ static auto qualifyCalleeCandidates(
if (!Summary)
return {FunctionImporter::ImportFailureReason::GlobalVar, GVSummary};

// If this is a local function, make sure we import the copy
// in the caller's module. The only time a local function can
// share an entry in the index is if there is a local with the same name
// in another module that had the same source file name (in a different
// directory), where each was compiled in their own directory so there
// was not distinguishing path.
// However, do the import from another module if there is only one
// entry in the list - in that case this must be a reference due
// to indirect call profile data, since a function pointer can point to
// a local in another module.
if (GlobalValue::isLocalLinkage(Summary->linkage()) &&
CalleeSummaryList.size() > 1 &&
Summary->modulePath() != CallerModulePath)
// If this is a local function, make sure we import the copy in the
// caller's module. The only time a local function can share an entry in
// the index is if there is a local with the same name in another module
// that had the same source file name (in a different directory), where
// each was compiled in their own directory so there was not
// distinguishing path.
// If the local function is from another module, it must be a reference
// due to indirect call profile data since a function pointer can point
// to a local in another module. Do the import from another module if
// there is only one entry in the list or when all files in the program
// are compiled with full path - in both cases the local function has
// unique PGO name and GUID.
if (shouldSkipLocalInAnotherModule(dyn_cast<GlobalVarSummary>(Summary),
CalleeSummaryList.size(),
CallerModulePath))
return {
FunctionImporter::ImportFailureReason::LocalLinkageNotInModule,
GVSummary};
Expand Down Expand Up @@ -359,18 +389,6 @@ class GlobalsImporter final {

LLVM_DEBUG(dbgs() << " ref -> " << VI << "\n");

// If this is a local variable, make sure we import the copy
// in the caller's module. The only time a local variable can
// share an entry in the index is if there is a local with the same name
// in another module that had the same source file name (in a different
// directory), where each was compiled in their own directory so there
// was not distinguishing path.
auto LocalNotInModule =
[&](const GlobalValueSummary *RefSummary) -> bool {
return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
RefSummary->modulePath() != Summary.modulePath();
};

for (const auto &RefSummary : VI.getSummaryList()) {
const auto *GVS = dyn_cast<GlobalVarSummary>(RefSummary.get());
// Functions could be referenced by global vars - e.g. a vtable; but we
Expand All @@ -379,7 +397,8 @@ class GlobalsImporter final {
// based on profile information). Should we decide to handle them here,
// we can refactor accordingly at that time.
if (!GVS || !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true) ||
LocalNotInModule(GVS))
shouldSkipLocalInAnotherModule(GVS, VI.getSummaryList().size(),
Summary.modulePath()))
continue;

// If there isn't an entry for GUID, insert <GUID, Definition> pair.
Expand Down
Loading