Skip to content

[ELF] Fix unnecessary inclusion of unreferenced provide symbols #84512

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 5 additions & 17 deletions lld/ELF/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2393,12 +2393,6 @@ static void readSymbolPartitionSection(InputSectionBase *s) {
sym->partition = newPart.getNumber();
}

static Symbol *addUnusedUndefined(StringRef name,
uint8_t binding = STB_GLOBAL) {
return symtab.addSymbol(
Undefined{ctx.internalFile, name, binding, STV_DEFAULT, 0});
}

static void markBuffersAsDontNeed(bool skipLinkedOutput) {
// With --thinlto-index-only, all buffers are nearly unused from now on
// (except symbol/section names used by infrequent passes). Mark input file
Expand Down Expand Up @@ -2485,15 +2479,15 @@ static std::vector<WrappedSymbol> addWrappedSymbols(opt::InputArgList &args) {
continue;

Symbol *wrap =
addUnusedUndefined(saver().save("__wrap_" + name), sym->binding);
symtab.addUnusedUndefined(saver().save("__wrap_" + name), sym->binding);

// If __real_ is referenced, pull in the symbol if it is lazy. Do this after
// processing __wrap_ as that may have referenced __real_.
StringRef realName = saver().save("__real_" + name);
if (symtab.find(realName))
addUnusedUndefined(name, sym->binding);
symtab.addUnusedUndefined(name, sym->binding);

Symbol *real = addUnusedUndefined(realName);
Symbol *real = symtab.addUnusedUndefined(realName);
v.push_back({sym, real, wrap});

// We want to tell LTO not to inline symbols to be overwritten
Expand Down Expand Up @@ -2723,7 +2717,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
// Handle -u/--undefined before input files. If both a.a and b.so define foo,
// -u foo a.a b.so will extract a.a.
for (StringRef name : config->undefined)
addUnusedUndefined(name)->referenced = true;
symtab.addUnusedUndefined(name)->referenced = true;

// Add all files to the symbol table. This will add almost all
// symbols that we need to the symbol table. This process might
Expand All @@ -2748,13 +2742,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
config->hasDynSymTab =
!ctx.sharedFiles.empty() || config->isPic || config->exportDynamic;

// Some symbols (such as __ehdr_start) are defined lazily only when there
// are undefined symbols for them, so we add these to trigger that logic.
for (StringRef name : script->referencedSymbols) {
Symbol *sym = addUnusedUndefined(name);
sym->isUsedInRegularObj = true;
sym->referenced = true;
}
script->addScriptReferencedSymbolsToSymTable();

// Prevent LTO from removing any definition referenced by -u.
for (StringRef name : config->undefined)
Expand Down
48 changes: 39 additions & 9 deletions lld/ELF/LinkerScript.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,15 +198,7 @@ static bool shouldDefineSym(SymbolAssignment *cmd) {
if (cmd->name == ".")
return false;

if (!cmd->provide)
return true;

// If a symbol was in PROVIDE(), we need to define it only
// when it is a referenced undefined symbol.
Symbol *b = symtab.find(cmd->name);
if (b && !b->isDefined() && !b->isCommon())
return true;
return false;
return !cmd->provide || LinkerScript::shouldAddProvideSym(cmd->name);
}

// Called by processSymbolAssignments() to assign definitions to
Expand Down Expand Up @@ -1517,3 +1509,41 @@ void LinkerScript::checkFinalScriptConditions() const {
checkMemoryRegion(lmaRegion, sec, sec->getLMA());
}
}

void LinkerScript::addScriptReferencedSymbolsToSymTable() {
// Some symbols (such as __ehdr_start) are defined lazily only when there
// are undefined symbols for them, so we add these to trigger that logic.
auto reference = [](StringRef name) {
Symbol *sym = symtab.addUnusedUndefined(name);
sym->isUsedInRegularObj = true;
sym->referenced = true;
};
for (StringRef name : referencedSymbols)
reference(name);

// Keeps track of references from which PROVIDE symbols have been added to the
// symbol table.
DenseSet<StringRef> added;
SmallVector<const SmallVector<StringRef, 0> *, 0> symRefsVec;
for (const auto &[name, symRefs] : provideMap)
if (LinkerScript::shouldAddProvideSym(name) && added.insert(name).second)
symRefsVec.push_back(&symRefs);
while (symRefsVec.size()) {
for (StringRef name : *symRefsVec.pop_back_val()) {
reference(name);
// Prevent the symbol from being discarded by --gc-sections.
script->referencedSymbols.push_back(name);
auto it = script->provideMap.find(name);
if (it != script->provideMap.end() &&
LinkerScript::shouldAddProvideSym(name) &&
added.insert(name).second) {
symRefsVec.push_back(&it->second);
}
}
}
}

bool LinkerScript::shouldAddProvideSym(StringRef symName) {
Symbol *sym = symtab.find(symName);
return sym && !sym->isDefined() && !sym->isCommon();
}
21 changes: 21 additions & 0 deletions lld/ELF/LinkerScript.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Compiler.h"
#include <cstddef>
Expand Down Expand Up @@ -348,6 +349,18 @@ class LinkerScript final {
// Check backward location counter assignment and memory region/LMA overflows.
void checkFinalScriptConditions() const;

// Add symbols that are referenced in the linker script to the symbol table.
// Symbols referenced in a PROVIDE command are only added to the symbol table
// if the PROVIDE command actually provides the symbol.
// It also adds the symbols referenced by the used PROVIDE symbols to the
// linker script referenced symbols list.
void addScriptReferencedSymbolsToSymTable();

// Returns true if the PROVIDE symbol should be added to the link.
// A PROVIDE symbol is added to the link only if it satisfies an
// undefined reference.
static bool shouldAddProvideSym(StringRef symName);

// SECTIONS command list.
SmallVector<SectionCommand *, 0> sectionCommands;

Expand Down Expand Up @@ -379,6 +392,14 @@ class LinkerScript final {

// Sections that will be warned/errored by --orphan-handling.
SmallVector<const InputSectionBase *, 0> orphanSections;

// Stores the mapping: PROVIDE symbol -> symbols referred in the PROVIDE
// expression. For example, if the PROVIDE command is:
//
// PROVIDE(v = a + b + c);
//
// then provideMap should contain the mapping: 'v' -> ['a', 'b', 'c']
llvm::MapVector<StringRef, SmallVector<StringRef, 0>> provideMap;
};

LLVM_LIBRARY_VISIBILITY extern std::unique_ptr<LinkerScript> script;
Expand Down
13 changes: 12 additions & 1 deletion lld/ELF/ScriptParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "llvm/Support/TimeProfiler.h"
#include <cassert>
#include <limits>
#include <optional>
#include <vector>

using namespace llvm;
Expand Down Expand Up @@ -138,6 +139,10 @@ class ScriptParser final : ScriptLexer {

// A set to detect an INCLUDE() cycle.
StringSet<> seen;

// If we are currently parsing a PROVIDE|PROVIDE_HIDDEN command,
// then this member is set to the PROVIDE symbol name.
std::optional<llvm::StringRef> activeProvideSym;
};
} // namespace

Expand Down Expand Up @@ -1055,6 +1060,9 @@ SymbolAssignment *ScriptParser::readProvideHidden(bool provide, bool hidden) {
;
return nullptr;
}
llvm::SaveAndRestore saveActiveProvideSym(activeProvideSym);
if (provide)
activeProvideSym = name;
SymbolAssignment *cmd = readSymbolAssignment(name);
cmd->provide = provide;
cmd->hidden = hidden;
Expand Down Expand Up @@ -1570,7 +1578,10 @@ Expr ScriptParser::readPrimary() {
tok = unquote(tok);
else if (!isValidSymbolName(tok))
setError("malformed number: " + tok);
script->referencedSymbols.push_back(tok);
if (activeProvideSym)
script->provideMap[*activeProvideSym].push_back(tok);
else
script->referencedSymbols.push_back(tok);
return [=] { return script->getSymbolValue(tok, location); };
}

Expand Down
4 changes: 4 additions & 0 deletions lld/ELF/SymbolTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,3 +333,7 @@ void SymbolTable::scanVersionScript() {
// --dynamic-list.
handleDynamicList();
}

Symbol *SymbolTable::addUnusedUndefined(StringRef name, uint8_t binding) {
return addSymbol(Undefined{ctx.internalFile, name, binding, STV_DEFAULT, 0});
}
3 changes: 3 additions & 0 deletions lld/ELF/SymbolTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ class SymbolTable {

void handleDynamicList();

Symbol *addUnusedUndefined(StringRef name,
uint8_t binding = llvm::ELF::STB_GLOBAL);

// Set of .so files to not link the same shared object file more than once.
llvm::DenseMap<llvm::CachedHashStringRef, SharedFile *> soNames;

Expand Down
60 changes: 60 additions & 0 deletions lld/test/ELF/gc-sections-with-provide.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# REQUIRES: x86

# This test verifies that garbage-collection is correctly garbage collecting
# unused sections when the symbol of the unused section is only referred by
# an unused PROVIDE symbol.

# RUN: rm -rf %t && split-file %s %t && cd %t
# RUN: llvm-mc -filetype=obj -triple=x86_64 a.s -o a.o
# RUN: ld.lld -o a_nogc a.o -T script.t
# RUN: llvm-nm a_nogc | FileCheck -check-prefix=NOGC %s
# RUN: ld.lld -o a_gc a.o --gc-sections --print-gc-sections -T script.t | FileCheck --check-prefix=GC_LINK %s
# RUN: llvm-nm a_gc | FileCheck -check-prefix=GC %s

NOGC-NOT: another_unused
NOGC: another_used
NOGC: bar
NOGC: baz
NOGC: baz_ref
NOGC: foo
NOGC-NOT: unused
NOGC: used

GC_LINK: removing unused section a.o:(.text.bar)

GC-NOT: another_unused
GC: another_used
GC-NOT: bar
GC: baz
GC: baz_ref
GC: foo
GC-NOT: unused
GC: used

#--- a.s
.global _start
_start:
call foo
call used

.section .text.foo,"ax",@progbits
foo:
nop

.section .text.bar,"ax",@progbits
.global bar
bar:
nop

.section .text.baz,"ax",@progbits
.global baz
baz:
nop


#--- script.t
PROVIDE(unused = bar + used);
PROVIDE(used = another_used);
PROVIDE(baz_ref = baz);
PROVIDE(another_used = baz_ref);
PROVIDE(another_unused = unused + bar + 0x1);
51 changes: 44 additions & 7 deletions lld/test/ELF/linkerscript/symbolreferenced.s
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,31 @@
# RUN: ld.lld -o chain -T chain.t a.o
# RUN: llvm-nm chain | FileCheck %s

# CHECK: 0000000000001000 a f1
# CHECK-NEXT: 0000000000001000 A f2
# CHECK-NEXT: 0000000000001000 a g1
# CHECK-NEXT: 0000000000001000 A g2
# CHECK-NEXT: 0000000000001000 A newsym
# CHECK-NOT: another_unused
# CHECK: 0000000000007000 a f1
# CHECK-NEXT: 0000000000007000 A f2
# CHECK-NEXT: 0000000000007000 A f3
# CHECK-NEXT: 0000000000007000 A f4
# CHECK-NEXT: 0000000000006000 A f5
# CHECK-NEXT: 0000000000003000 A f6
# CHECK-NEXT: 0000000000001000 A f7
# CHECK-NOT: g1
# CHECK-NOT: g2
# CHECK-NEXT: 0000000000007500 A newsym
# CHECK: 0000000000002000 A u
# CHECK-NOT: unused
# CHECK-NEXT: 0000000000002000 A v
# CHECK-NEXT: 0000000000002000 A w


# RUN: ld.lld -o chain_with_cycle -T chain_with_cycle.t a.o
# RUN: llvm-nm chain_with_cycle | FileCheck %s --check-prefix=CHAIN_WITH_CYCLE

# CHAIN_WITH_CYCLE: 000 A f1
# CHAIN_WITH_CYCLE: 000 A f2
# CHAIN_WITH_CYCLE: 000 A f3
# CHAIN_WITH_CYCLE: 000 A f4
# CHAIN_WITH_CYCLE: 000 A newsym

# RUN: not ld.lld -T chain2.t a.o 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error:
# ERR-COUNT-3: error: chain2.t:1: symbol not found: undef
Expand All @@ -40,13 +60,30 @@ patatino:
movl newsym, %eax

#--- chain.t
PROVIDE(f2 = 0x1000);
PROVIDE(f7 = 0x1000);
PROVIDE(f5 = f6 + 0x3000);
PROVIDE(f6 = f7 + 0x2000);
PROVIDE(f4 = f5 + 0x1000);
PROVIDE(f3 = f4);
PROVIDE(f2 = f3);
PROVIDE_HIDDEN(f1 = f2);
PROVIDE(newsym = f1);
PROVIDE(newsym = f1 + 0x500);

u = v;
PROVIDE(w = 0x2000);
PROVIDE(v = w);

PROVIDE(g2 = 0x1000);
PROVIDE_HIDDEN(g1 = g2);
PROVIDE(unused = g1);
PROVIDE_HIDDEN(another_unused = g1);

#--- chain_with_cycle.t
PROVIDE(f1 = f2 + f3);
PROVIDE(f2 = f3 + f4);
PROVIDE(f3 = f4);
PROVIDE(f4 = f1);
PROVIDE(newsym = f1);

#--- chain2.t
PROVIDE(f2 = undef);
Expand Down
Loading