llvm-project/lld/MachO/SymbolTable.cpp
Nico Weber d5a70db193 [lld/mac] Write every weak symbol only once in the output
Before this, if an inline function was defined in several input files,
lld would write each copy of the inline function the output. With this
patch, it only writes one copy.

Reduces the size of Chromium Framework from 378MB to 345MB (compared
to 290MB linked with ld64, which also does dead-stripping, which we
don't do yet), and makes linking it faster:

        N           Min           Max        Median           Avg        Stddev
    x  10     3.9957051     4.3496981     4.1411121      4.156837    0.10092097
    +  10      3.908154      4.169318     3.9712729     3.9846753   0.075773012
    Difference at 95.0% confidence
            -0.172162 +/- 0.083847
            -4.14165% +/- 2.01709%
            (Student's t, pooled s = 0.0892373)

Implementation-wise, when merging two weak symbols, this sets a
"canOmitFromOutput" on the InputSection belonging to the weak symbol not put in
the symbol table. We then don't write InputSections that have this set, as long
as they are not referenced from other symbols. (This happens e.g. for object
files that don't set .subsections_via_symbols or that use .alt_entry.)

Some restrictions:
- not yet done for bitcode inputs
- no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) --
  Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs)
  (that is, catch block unwind information) and Personality Routines
  associated with weak functions still not stripped. This is wasteful,
  but harmless.
- However, this does strip weaks from __unwind_info (which is needed for
  correctness and not just for size)
- This nopes out on InputSections that are referenced form more than
  one symbol (eg from .alt_entry) for now

Things that work based on symbols Just Work:
- map files (change in MapFile.cpp is no-op and not needed; I just
  found it a bit more explicit)
- exports

Things that work with inputSections need to explicitly check if
an inputSection is written (e.g. unwind info).

This patch is useful in itself, but it's also likely also a useful foundation
for dead_strip.

I used to have a "canoncialRepresentative" pointer on InputSection instead of
just the bool, which would be handy for ICF too. But I ended up not needing it
for this patch, so I removed that again for now.

Differential Revision: https://reviews.llvm.org/D102076
2021-05-07 17:11:40 -04:00

214 lines
7.2 KiB
C++

//===- SymbolTable.cpp ----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "SymbolTable.h"
#include "Config.h"
#include "InputFiles.h"
#include "Symbols.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
using namespace llvm;
using namespace lld;
using namespace lld::macho;
Symbol *SymbolTable::find(CachedHashStringRef cachedName) {
auto it = symMap.find(cachedName);
if (it == symMap.end())
return nullptr;
return symVector[it->second];
}
std::pair<Symbol *, bool> SymbolTable::insert(StringRef name,
const InputFile *file) {
auto p = symMap.insert({CachedHashStringRef(name), (int)symVector.size()});
Symbol *sym;
if (!p.second) {
// Name already present in the symbol table.
sym = symVector[p.first->second];
} else {
// Name is a new symbol.
sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
symVector.push_back(sym);
}
sym->isUsedInRegularObj |= !file || isa<ObjFile>(file);
return {sym, p.second};
}
Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
InputSection *isec, uint64_t value,
uint64_t size, bool isWeakDef,
bool isPrivateExtern, bool isThumb) {
Symbol *s;
bool wasInserted;
bool overridesWeakDef = false;
std::tie(s, wasInserted) = insert(name, file);
assert(!isWeakDef || (isa<BitcodeFile>(file) && !isec) ||
(isa<ObjFile>(file) && file == isec->file));
if (!wasInserted) {
if (auto *defined = dyn_cast<Defined>(s)) {
if (isWeakDef) {
// Both old and new symbol weak (e.g. inline function in two TUs):
// If one of them isn't private extern, the merged symbol isn't.
if (defined->isWeakDef()) {
defined->privateExtern &= isPrivateExtern;
// FIXME: Handle this for bitcode files.
// FIXME: We currently only do this if both symbols are weak.
// We could do this if either is weak (but getting the
// case where !isWeakDef && defined->isWeakDef() right
// requires some care and testing).
if (isec)
isec->canOmitFromOutput = true;
}
return defined;
}
if (!defined->isWeakDef())
error("duplicate symbol: " + name + "\n>>> defined in " +
toString(defined->getFile()) + "\n>>> defined in " +
toString(file));
} else if (auto *dysym = dyn_cast<DylibSymbol>(s)) {
overridesWeakDef = !isWeakDef && dysym->isWeakDef();
}
// Defined symbols take priority over other types of symbols, so in case
// of a name conflict, we fall through to the replaceSymbol() call below.
}
Defined *defined =
replaceSymbol<Defined>(s, name, file, isec, value, size, isWeakDef,
/*isExternal=*/true, isPrivateExtern, isThumb);
defined->overridesWeakDef = overridesWeakDef;
return defined;
}
Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file,
bool isWeakRef) {
Symbol *s;
bool wasInserted;
std::tie(s, wasInserted) = insert(name, file);
RefState refState = isWeakRef ? RefState::Weak : RefState::Strong;
if (wasInserted)
replaceSymbol<Undefined>(s, name, file, refState);
else if (auto *lazy = dyn_cast<LazySymbol>(s))
lazy->fetchArchiveMember();
else if (auto *dynsym = dyn_cast<DylibSymbol>(s))
dynsym->refState = std::max(dynsym->refState, refState);
else if (auto *undefined = dyn_cast<Undefined>(s))
undefined->refState = std::max(undefined->refState, refState);
return s;
}
Symbol *SymbolTable::addCommon(StringRef name, InputFile *file, uint64_t size,
uint32_t align, bool isPrivateExtern) {
Symbol *s;
bool wasInserted;
std::tie(s, wasInserted) = insert(name, file);
if (!wasInserted) {
if (auto *common = dyn_cast<CommonSymbol>(s)) {
if (size < common->size)
return s;
} else if (isa<Defined>(s)) {
return s;
}
// Common symbols take priority over all non-Defined symbols, so in case of
// a name conflict, we fall through to the replaceSymbol() call below.
}
replaceSymbol<CommonSymbol>(s, name, file, size, align, isPrivateExtern);
return s;
}
Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef,
bool isTlv) {
Symbol *s;
bool wasInserted;
std::tie(s, wasInserted) = insert(name, file);
RefState refState = RefState::Unreferenced;
if (!wasInserted) {
if (auto *defined = dyn_cast<Defined>(s)) {
if (isWeakDef && !defined->isWeakDef())
defined->overridesWeakDef = true;
} else if (auto *undefined = dyn_cast<Undefined>(s)) {
refState = undefined->refState;
} else if (auto *dysym = dyn_cast<DylibSymbol>(s)) {
refState = dysym->refState;
}
}
bool isDynamicLookup = file == nullptr;
if (wasInserted || isa<Undefined>(s) ||
(isa<DylibSymbol>(s) &&
((!isWeakDef && s->isWeakDef()) ||
(!isDynamicLookup && cast<DylibSymbol>(s)->isDynamicLookup()))))
replaceSymbol<DylibSymbol>(s, file, name, isWeakDef, refState, isTlv);
return s;
}
Symbol *SymbolTable::addDynamicLookup(StringRef name) {
return addDylib(name, /*file=*/nullptr, /*isWeakDef=*/false, /*isTlv=*/false);
}
Symbol *SymbolTable::addLazy(StringRef name, ArchiveFile *file,
const object::Archive::Symbol &sym) {
Symbol *s;
bool wasInserted;
std::tie(s, wasInserted) = insert(name, file);
if (wasInserted)
replaceSymbol<LazySymbol>(s, file, sym);
else if (isa<Undefined>(s) || (isa<DylibSymbol>(s) && s->isWeakDef()))
file->fetch(sym);
return s;
}
Defined *SymbolTable::addSynthetic(StringRef name, InputSection *isec,
uint64_t value, bool isPrivateExtern,
bool includeInSymtab) {
Defined *s = addDefined(name, nullptr, isec, value, /*size=*/0,
/*isWeakDef=*/false, isPrivateExtern,
/*isThumb=*/false);
s->includeInSymtab = includeInSymtab;
return s;
}
void lld::macho::treatUndefinedSymbol(const Undefined &sym) {
auto message = [](const Undefined &sym) {
std::string message = "undefined symbol: " + toString(sym);
std::string fileName = toString(sym.getFile());
if (!fileName.empty())
message += "\n>>> referenced by " + fileName;
return message;
};
switch (config->undefinedSymbolTreatment) {
case UndefinedSymbolTreatment::error:
error(message(sym));
break;
case UndefinedSymbolTreatment::warning:
warn(message(sym));
LLVM_FALLTHROUGH;
case UndefinedSymbolTreatment::dynamic_lookup:
case UndefinedSymbolTreatment::suppress:
symtab->addDynamicLookup(sym.getName());
break;
case UndefinedSymbolTreatment::unknown:
llvm_unreachable("unknown -undefined TREATMENT");
}
}
SymbolTable *macho::symtab;