COFF: Change the order of adding symbols to the symbol table.

Previously, the order of adding symbols to the symbol table was simple.
We have a list of all input files. We read each file from beginning of
the list and add all symbols in it to the symbol table.

This patch changes that order. Now all archive files are added to the
symbol table first, and then all the other object files are added.
This shouldn't change the behavior in single-threading, and make room
to parallelize in multi-threading.

In the first step, only lazy symbols are added to the symbol table
because archives contain only Lazy symbols. Member object files
found to be necessary are queued. In the second step, defined and
undefined symbols are added from object files. Adding an undefined
symbol to the symbol table may cause more member files to be added
to the queue. We simply continue reading all object files until the
queue is empty.

Finally, new archive or object files may be added to the queues by
object files' directive sections (which contain new command line
options).

The above process is repeated until we get no new files.

Symbols defined both in object files and in archives can make results
undeterministic. If an archive is read before an object, a new member
file gets linked, while in the other way, no new file would be added.
That is the most popular cause of an undeterministic result or linking
failure as I observed. Separating phases of adding lazy symbols and
undefined symbols makes that deterministic. Adding symbols in each
phase should be parallelizable.

llvm-svn: 241107
This commit is contained in:
Rui Ueyama 2015-06-30 19:35:21 +00:00
parent a34b7bf2eb
commit 8d3010a1a6
8 changed files with 157 additions and 82 deletions

View File

@ -532,6 +532,10 @@ bool LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) {
// The symbol table will take care of name resolution. // The symbol table will take care of name resolution.
for (MemoryBufferRef MB : Inputs) for (MemoryBufferRef MB : Inputs)
Symtab.addFile(createFile(MB)); Symtab.addFile(createFile(MB));
if (auto EC = Symtab.readObjects()) {
llvm::errs() << EC.message() << "\n";
return false;
}
if (auto EC = Symtab.run()) { if (auto EC = Symtab.run()) {
llvm::errs() << EC.message() << "\n"; llvm::errs() << EC.message() << "\n";
return false; return false;

View File

@ -60,7 +60,7 @@ std::error_code ArchiveFile::parse() {
size_t NumSyms = File->getNumberOfSymbols(); size_t NumSyms = File->getNumberOfSymbols();
size_t BufSize = NumSyms * sizeof(Lazy); size_t BufSize = NumSyms * sizeof(Lazy);
Lazy *Buf = (Lazy *)Alloc.Allocate(BufSize, llvm::alignOf<Lazy>()); Lazy *Buf = (Lazy *)Alloc.Allocate(BufSize, llvm::alignOf<Lazy>());
SymbolBodies.reserve(NumSyms); LazySymbols.reserve(NumSyms);
// Read the symbol table to construct Lazy objects. // Read the symbol table to construct Lazy objects.
uint32_t I = 0; uint32_t I = 0;
@ -68,7 +68,7 @@ std::error_code ArchiveFile::parse() {
auto *B = new (&Buf[I++]) Lazy(this, Sym); auto *B = new (&Buf[I++]) Lazy(this, Sym);
// Skip special symbol exists in import library files. // Skip special symbol exists in import library files.
if (B->getName() != "__NULL_IMPORT_DESCRIPTOR") if (B->getName() != "__NULL_IMPORT_DESCRIPTOR")
SymbolBodies.push_back(B); LazySymbols.push_back(B);
} }
return std::error_code(); return std::error_code();
} }

View File

@ -30,6 +30,7 @@ using llvm::object::COFFSymbolRef;
class Chunk; class Chunk;
class Defined; class Defined;
class Lazy;
class SymbolBody; class SymbolBody;
class Undefined; class Undefined;
@ -83,13 +84,17 @@ public:
// (So that we don't instantiate same members more than once.) // (So that we don't instantiate same members more than once.)
ErrorOr<MemoryBufferRef> getMember(const Archive::Symbol *Sym); ErrorOr<MemoryBufferRef> getMember(const Archive::Symbol *Sym);
// NB: All symbols returned by ArchiveFiles are of Lazy type. std::vector<Lazy *> &getLazySymbols() { return LazySymbols; }
std::vector<SymbolBody *> &getSymbols() override { return SymbolBodies; }
// All symbols returned by ArchiveFiles are of Lazy type.
std::vector<SymbolBody *> &getSymbols() override {
llvm_unreachable("internal error");
}
private: private:
std::unique_ptr<Archive> File; std::unique_ptr<Archive> File;
std::string Filename; std::string Filename;
std::vector<SymbolBody *> SymbolBodies; std::vector<Lazy *> LazySymbols;
std::set<const char *> Seen; std::set<const char *> Seen;
llvm::MallocAllocator Alloc; llvm::MallocAllocator Alloc;
}; };

View File

@ -24,47 +24,96 @@ namespace lld {
namespace coff { namespace coff {
SymbolTable::SymbolTable() { SymbolTable::SymbolTable() {
resolve(new (Alloc) DefinedAbsolute("__ImageBase", Config->ImageBase)); addSymbol(new (Alloc) DefinedAbsolute("__ImageBase", Config->ImageBase));
if (!Config->EntryName.empty()) if (!Config->EntryName.empty())
resolve(new (Alloc) Undefined(Config->EntryName)); addSymbol(new (Alloc) Undefined(Config->EntryName));
} }
void SymbolTable::addFile(std::unique_ptr<InputFile> File) { void SymbolTable::addFile(std::unique_ptr<InputFile> FileP) {
Files.push_back(std::move(File)); InputFile *File = FileP.get();
Files.push_back(std::move(FileP));
if (auto *F = dyn_cast<ArchiveFile>(File)) {
ArchiveQueue.push_back(F);
return;
}
ObjectQueue.push_back(File);
if (auto *F = dyn_cast<ObjectFile>(File)) {
ObjectFiles.push_back(F);
} else if (auto *F = dyn_cast<BitcodeFile>(File)) {
BitcodeFiles.push_back(F);
} else {
ImportFiles.push_back(cast<ImportFile>(File));
}
} }
std::error_code SymbolTable::run() { std::error_code SymbolTable::run() {
while (FileIdx < Files.size()) { while (!ArchiveQueue.empty() || !ObjectQueue.empty()) {
InputFile *F = Files[FileIdx++].get(); if (auto EC = readArchives())
if (Config->Verbose)
llvm::outs() << "Reading " << F->getShortName() << "\n";
if (auto EC = F->parse())
return EC; return EC;
if (auto *P = dyn_cast<ObjectFile>(F)) { if (auto EC = readObjects())
ObjectFiles.push_back(P); return EC;
} else if (auto *P = dyn_cast<ArchiveFile>(F)) { ++Version;
ArchiveFiles.push_back(P);
} else if (auto *P = dyn_cast<BitcodeFile>(F)) {
BitcodeFiles.push_back(P);
} else {
ImportFiles.push_back(cast<ImportFile>(F));
}
for (SymbolBody *B : F->getSymbols())
if (B->isExternal())
if (auto EC = resolve(B))
return EC;
// If a object file contains .drectve section,
// read that and add files listed there.
StringRef S = F->getDirectives();
if (!S.empty())
if (auto EC = Driver->parseDirectives(S))
return EC;
} }
return std::error_code(); return std::error_code();
} }
std::error_code SymbolTable::readArchives() {
if (ArchiveQueue.empty())
return std::error_code();
// Add lazy symbols to the symbol table. Lazy symbols that conflict
// with existing undefined symbols are accumulated in LazySyms.
std::vector<Symbol *> LazySyms;
for (ArchiveFile *File : ArchiveQueue) {
if (Config->Verbose)
llvm::outs() << "Reading " << File->getShortName() << "\n";
if (auto EC = File->parse())
return EC;
for (Lazy *Sym : File->getLazySymbols())
addLazy(Sym, &LazySyms);
}
ArchiveQueue.clear();
// Add archive member files to ObjectQueue that should resolve
// existing undefined symbols.
for (Symbol *Sym : LazySyms)
if (auto EC = addMemberFile(cast<Lazy>(Sym->Body)))
return EC;
return std::error_code();
}
std::error_code SymbolTable::readObjects() {
if (ObjectQueue.empty())
return std::error_code();
// Add defined and undefined symbols to the symbol table.
std::vector<StringRef> Directives;
for (size_t I = 0; I < ObjectQueue.size(); ++I) {
InputFile *File = ObjectQueue[I];
if (Config->Verbose)
llvm::outs() << "Reading " << File->getShortName() << "\n";
if (auto EC = File->parse())
return EC;
// Adding symbols may add more files to ObjectQueue
// (but not to ArchiveQueue).
for (SymbolBody *Sym : File->getSymbols())
if (Sym->isExternal())
if (auto EC = addSymbol(Sym))
return EC;
StringRef S = File->getDirectives();
if (!S.empty())
Directives.push_back(S);
}
ObjectQueue.clear();
// Parse directive sections. This may add files to
// ArchiveQueue and ObjectQueue.
for (StringRef S : Directives)
if (auto EC = Driver->parseDirectives(S))
return EC;
return std::error_code();
}
bool SymbolTable::reportRemainingUndefines() { bool SymbolTable::reportRemainingUndefines() {
bool Ret = false; bool Ret = false;
for (auto &I : Symtab) { for (auto &I : Symtab) {
@ -102,40 +151,53 @@ bool SymbolTable::reportRemainingUndefines() {
return Ret; return Ret;
} }
// This function resolves conflicts if there's an existing symbol with void SymbolTable::addLazy(Lazy *New, std::vector<Symbol *> *Accum) {
// the same name. Decisions are made based on symbol type. Symbol *&Sym = Symtab[New->getName()];
std::error_code SymbolTable::resolve(SymbolBody *New) { if (!Sym) {
// Find an existing Symbol or create and insert a new one. Sym = new (Alloc) Symbol(New);
StringRef Name = New->getName(); New->setBackref(Sym);
Symbol *&Sym = Symtab[Name]; return;
}
SymbolBody *Existing = Sym->Body;
if (!isa<Undefined>(Existing))
return;
Sym->Body = New;
New->setBackref(Sym);
Accum->push_back(Sym);
}
std::error_code SymbolTable::addSymbol(SymbolBody *New) {
// Find an existing symbol or create and insert a new one.
assert(isa<Defined>(New) || isa<Undefined>(New));
Symbol *&Sym = Symtab[New->getName()];
if (!Sym) { if (!Sym) {
Sym = new (Alloc) Symbol(New); Sym = new (Alloc) Symbol(New);
New->setBackref(Sym); New->setBackref(Sym);
++Version;
return std::error_code(); return std::error_code();
} }
New->setBackref(Sym); New->setBackref(Sym);
// If we have an undefined symbol and a lazy symbol,
// let the lazy symbol to read a member file.
SymbolBody *Existing = Sym->Body;
if (auto *L = dyn_cast<Lazy>(Existing)) {
if (auto *U = dyn_cast<Undefined>(New))
if (!U->getWeakAlias())
return addMemberFile(L);
Sym->Body = New;
return std::error_code();
}
// compare() returns -1, 0, or 1 if the lhs symbol is less preferable, // compare() returns -1, 0, or 1 if the lhs symbol is less preferable,
// equivalent (conflicting), or more preferable, respectively. // equivalent (conflicting), or more preferable, respectively.
SymbolBody *Existing = Sym->Body; int Comp = Existing->compare(New);
int comp = Existing->compare(New); if (Comp == 0) {
if (comp < 0) {
Sym->Body = New;
++Version;
}
if (comp == 0) {
llvm::errs() << "duplicate symbol: " << Existing->getDebugName() llvm::errs() << "duplicate symbol: " << Existing->getDebugName()
<< " and " << New->getDebugName() << "\n"; << " and " << New->getDebugName() << "\n";
return make_error_code(LLDError::DuplicateSymbols); return make_error_code(LLDError::DuplicateSymbols);
} }
if (Comp < 0)
// If we have an Undefined symbol for a Lazy symbol, we need Sym->Body = New;
// to read an archive member to replace the Lazy symbol with
// a Defined symbol.
if (isa<Undefined>(Existing) || isa<Undefined>(New))
if (auto *B = dyn_cast<Lazy>(Sym->Body))
return addMemberFile(B);
return std::error_code(); return std::error_code();
} }
@ -221,7 +283,7 @@ std::pair<StringRef, Symbol *> SymbolTable::findMangled(StringRef S) {
} }
std::error_code SymbolTable::addUndefined(StringRef Name) { std::error_code SymbolTable::addUndefined(StringRef Name) {
return resolve(new (Alloc) Undefined(Name)); return addSymbol(new (Alloc) Undefined(Name));
} }
// Resolve To, and make From an alias to To. // Resolve To, and make From an alias to To.
@ -235,7 +297,7 @@ std::error_code SymbolTable::rename(StringRef From, StringRef To) {
if (!isa<Undefined>(Sym->Body)) if (!isa<Undefined>(Sym->Body))
return std::error_code(); return std::error_code();
SymbolBody *Body = new (Alloc) Undefined(To); SymbolBody *Body = new (Alloc) Undefined(To);
if (auto EC = resolve(Body)) if (auto EC = addSymbol(Body))
return EC; return EC;
SymbolBody *Repl = Body->getReplacement(); SymbolBody *Repl = Body->getReplacement();
if (isa<Undefined>(Repl)) if (isa<Undefined>(Repl))
@ -269,10 +331,6 @@ std::error_code SymbolTable::addCombinedLTOObject() {
return EC; return EC;
ObjectFile *Obj = FileOrErr.get(); ObjectFile *Obj = FileOrErr.get();
// Skip the combined object file as the file is processed below
// rather than by run().
++FileIdx;
for (SymbolBody *Body : Obj->getSymbols()) { for (SymbolBody *Body : Obj->getSymbols()) {
if (!Body->isExternal()) if (!Body->isExternal())
continue; continue;
@ -298,21 +356,23 @@ std::error_code SymbolTable::addCombinedLTOObject() {
return make_error_code(LLDError::BrokenFile); return make_error_code(LLDError::BrokenFile);
} }
Sym->Body = Body; Sym->Body = Body;
} else { continue;
int comp = Sym->Body->compare(Body);
if (comp < 0)
Sym->Body = Body;
if (comp == 0) {
llvm::errs() << "LTO: unexpected duplicate symbol: " << Name << "\n";
return make_error_code(LLDError::BrokenFile);
}
} }
if (auto *L = dyn_cast<Lazy>(Sym->Body)) {
// We may see new references to runtime library symbols such as __chkstk // We may see new references to runtime library symbols such as __chkstk
// here. These symbols must be wholly defined in non-bitcode files. // here. These symbols must be wholly defined in non-bitcode files.
if (auto *B = dyn_cast<Lazy>(Sym->Body)) if (auto EC = addMemberFile(L))
if (auto EC = addMemberFile(B))
return EC; return EC;
continue;
}
SymbolBody *Existing = Sym->Body;
int Comp = Existing->compare(Body);
if (Comp == 0) {
llvm::errs() << "LTO: unexpected duplicate symbol: " << Name << "\n";
return make_error_code(LLDError::BrokenFile);
}
if (Comp < 0)
Sym->Body = Body;
} }
size_t NumBitcodeFiles = BitcodeFiles.size(); size_t NumBitcodeFiles = BitcodeFiles.size();

View File

@ -44,6 +44,8 @@ public:
SymbolTable(); SymbolTable();
void addFile(std::unique_ptr<InputFile> File); void addFile(std::unique_ptr<InputFile> File);
std::error_code run(); std::error_code run();
std::error_code readArchives();
std::error_code readObjects();
size_t getVersion() { return Version; } size_t getVersion() { return Version; }
// Print an error message on undefined symbols. // Print an error message on undefined symbols.
@ -89,14 +91,18 @@ public:
std::vector<Chunk *> LocalImportChunks; std::vector<Chunk *> LocalImportChunks;
private: private:
std::error_code resolve(SymbolBody *Body); std::error_code addSymbol(SymbolBody *New);
void addLazy(Lazy *New, std::vector<Symbol *> *Accum);
std::error_code addMemberFile(Lazy *Body); std::error_code addMemberFile(Lazy *Body);
ErrorOr<ObjectFile *> createLTOObject(llvm::LTOCodeGenerator *CG); ErrorOr<ObjectFile *> createLTOObject(llvm::LTOCodeGenerator *CG);
llvm::DenseMap<StringRef, Symbol *> Symtab; llvm::DenseMap<StringRef, Symbol *> Symtab;
std::vector<std::unique_ptr<InputFile>> Files; std::vector<std::unique_ptr<InputFile>> Files;
size_t FileIdx = 0; std::vector<ArchiveFile *> ArchiveQueue;
std::vector<ArchiveFile *> ArchiveFiles; std::vector<InputFile *> ObjectQueue;
std::vector<BitcodeFile *> BitcodeFiles; std::vector<BitcodeFile *> BitcodeFiles;
std::unique_ptr<MemoryBuffer> LTOMB; std::unique_ptr<MemoryBuffer> LTOMB;
llvm::BumpPtrAllocator Alloc; llvm::BumpPtrAllocator Alloc;

View File

@ -58,8 +58,8 @@ public:
DefinedAbsoluteKind, DefinedAbsoluteKind,
DefinedBitcodeKind, DefinedBitcodeKind,
LazyKind,
UndefinedKind, UndefinedKind,
LazyKind,
LastDefinedCOFFKind = DefinedCommonKind, LastDefinedCOFFKind = DefinedCommonKind,
LastDefinedKind = DefinedBitcodeKind, LastDefinedKind = DefinedBitcodeKind,

View File

@ -9,6 +9,6 @@
CHECK: include2.test.tmp1.obj CHECK: include2.test.tmp1.obj
CHECK: include2.test.tmp2.lib CHECK: include2.test.tmp2.lib
CHECK: include2.test.tmp2.lib(include2.test.tmp2.obj) for foo
CHECK: include2.test.tmp3.lib CHECK: include2.test.tmp3.lib
CHECK: include2.test.tmp2.lib(include2.test.tmp2.obj) for foo
CHECK: include2.test.tmp3.lib(include2.test.tmp3.obj) for bar CHECK: include2.test.tmp3.lib(include2.test.tmp3.obj) for bar

View File

@ -9,7 +9,7 @@
# RUN: FileCheck %s < %t.log # RUN: FileCheck %s < %t.log
CHECK: order.test.tmp1.obj CHECK: order.test.tmp1.obj
CHECK: order.test.tmp2.lib
CHECK: order.test.tmp2.lib(order.test.tmp2.obj) for foo
CHECK: order.test.tmp3.obj CHECK: order.test.tmp3.obj
CHECK: order.test.tmp2.lib
CHECK: order.test.tmp3.lib CHECK: order.test.tmp3.lib
CHECK: order.test.tmp2.lib(order.test.tmp2.obj) for foo