2015-08-14 14:12:54 +00:00
|
|
|
//===- SymbolTable.h --------------------------------------------*- C++ -*-===//
|
2015-07-24 21:03:07 +00:00
|
|
|
//
|
2019-01-19 08:50:56 +00:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2015-07-24 21:03:07 +00:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#ifndef LLD_ELF_SYMBOL_TABLE_H
|
|
|
|
#define LLD_ELF_SYMBOL_TABLE_H
|
|
|
|
|
2019-05-28 06:33:06 +00:00
|
|
|
#include "Symbols.h"
|
2016-10-18 17:50:36 +00:00
|
|
|
#include "llvm/ADT/CachedHashString.h"
|
2016-04-14 20:42:43 +00:00
|
|
|
#include "llvm/ADT/DenseMap.h"
|
2022-10-01 14:46:49 -07:00
|
|
|
#include "llvm/Support/Compiler.h"
|
2015-07-24 21:03:07 +00:00
|
|
|
|
2022-08-10 15:31:58 -04:00
|
|
|
namespace lld::elf {
|
2024-09-29 14:45:00 -07:00
|
|
|
struct Ctx;
|
2022-02-07 21:53:34 -08:00
|
|
|
class InputFile;
|
2022-02-23 21:36:45 -08:00
|
|
|
class SharedFile;
|
2022-02-07 21:53:34 -08:00
|
|
|
|
[LLD][ELF] Cortex-M Security Extensions (CMSE) Support
This commit provides linker support for Cortex-M Security Extensions (CMSE).
The specification for this feature can be found in ARM v8-M Security Extensions:
Requirements on Development Tools.
The linker synthesizes a security gateway veneer in a special section;
`.gnu.sgstubs`, when it finds non-local symbols `__acle_se_<entry>` and `<entry>`,
defined relative to the same text section and having the same address. The
address of `<entry>` is retargeted to the starting address of the
linker-synthesized security gateway veneer in section `.gnu.sgstubs`.
In summary, the linker translates input:
```
.text
entry:
__acle_se_entry:
[entry_code]
```
into:
```
.section .gnu.sgstubs
entry:
SG
B.W __acle_se_entry
.text
__acle_se_entry:
[entry_code]
```
If addresses of `__acle_se_<entry>` and `<entry>` are not equal, the linker
considers that `<entry>` already defines a secure gateway veneer so does not
synthesize one.
If `--out-implib=<out.lib>` is specified, the linker writes the list of secure
gateway veneers into a CMSE import library `<out.lib>`. The CMSE import library
will have 3 sections: `.symtab`, `.strtab`, `.shstrtab`. For every secure gateway
veneer <entry> at address `<addr>`, `.symtab` contains a `SHN_ABS` symbol `<entry>` with
value `<addr>`.
If `--in-implib=<in.lib>` is specified, the linker reads the existing CMSE import
library `<in.lib>` and preserves the entry function addresses in the resulting
executable and new import library.
Reviewed By: MaskRay, peter.smith
Differential Revision: https://reviews.llvm.org/D139092
2023-07-06 10:45:10 +01:00
|
|
|
struct ArmCmseEntryFunction {
|
|
|
|
Symbol *acleSeSym;
|
|
|
|
Symbol *sym;
|
|
|
|
};
|
|
|
|
|
2015-07-24 21:03:07 +00:00
|
|
|
// SymbolTable is a bucket of all known symbols, including defined,
|
|
|
|
// undefined, or lazy symbols (the last one is symbols in archive
|
|
|
|
// files whose archive members are not yet loaded).
|
|
|
|
//
|
|
|
|
// We put all symbols of all files to a SymbolTable, and the
|
|
|
|
// SymbolTable selects the "best" symbols if there are name
|
|
|
|
// conflicts. For example, obviously, a defined symbol is better than
|
|
|
|
// an undefined symbol. Or, if there's a conflict between a lazy and a
|
|
|
|
// undefined, it'll read an archive member to read a real definition
|
ELF: New symbol table design.
This patch implements a new design for the symbol table that stores
SymbolBodies within a memory region of the Symbol object. Symbols are mutated
by constructing SymbolBodies in place over existing SymbolBodies, rather
than by mutating pointers. As mentioned in the initial proposal [1], this
memory layout helps reduce the cache miss rate by improving memory locality.
Performance numbers:
old(s) new(s)
Without debug info:
chrome 7.178 6.432 (-11.5%)
LLVMgold.so 0.505 0.502 (-0.5%)
clang 0.954 0.827 (-15.4%)
llvm-as 0.052 0.045 (-15.5%)
With debug info:
scylla 5.695 5.613 (-1.5%)
clang 14.396 14.143 (-1.8%)
Performance counter results show that the fewer required indirections is
indeed the cause of the improved performance. For example, when linking
chrome, stalled cycles decreases from 14,556,444,002 to 12,959,238,310, and
instructions per cycle increases from 0.78 to 0.83. We are also executing
many fewer instructions (15,516,401,933 down to 15,002,434,310), probably
because we spend less time allocating SymbolBodies.
The new mechanism by which symbols are added to the symbol table is by calling
add* functions on the SymbolTable.
In this patch, I handle local symbols by storing them inside "unparented"
SymbolBodies. This is suboptimal, but if we do want to try to avoid allocating
these SymbolBodies, we can probably do that separately.
I also removed a few members from the SymbolBody class that were only being
used to pass information from the input file to the symbol table.
This patch implements the new design for the ELF linker only. I intend to
prepare a similar patch for the COFF linker.
[1] http://lists.llvm.org/pipermail/llvm-dev/2016-April/098832.html
Differential Revision: http://reviews.llvm.org/D19752
llvm-svn: 268178
2016-05-01 04:55:03 +00:00
|
|
|
// to replace the lazy symbol. The logic is implemented in the
|
|
|
|
// add*() functions, which are called by input files as they are parsed. There
|
|
|
|
// is one add* function per symbol type.
|
2017-07-26 18:42:48 +00:00
|
|
|
class SymbolTable {
|
2019-11-20 11:16:15 -08:00
|
|
|
public:
|
2024-09-29 14:45:00 -07:00
|
|
|
SymbolTable(Ctx &ctx) : ctx(ctx) {}
|
2022-07-29 10:16:57 -07:00
|
|
|
ArrayRef<Symbol *> getSymbols() const { return symVector; }
|
2015-09-08 19:43:27 +00:00
|
|
|
|
2019-11-20 11:16:15 -08:00
|
|
|
void wrap(Symbol *sym, Symbol *real, Symbol *wrap);
|
|
|
|
|
2019-05-17 01:55:20 +00:00
|
|
|
Symbol *insert(StringRef name);
|
ELF: New symbol table design.
This patch implements a new design for the symbol table that stores
SymbolBodies within a memory region of the Symbol object. Symbols are mutated
by constructing SymbolBodies in place over existing SymbolBodies, rather
than by mutating pointers. As mentioned in the initial proposal [1], this
memory layout helps reduce the cache miss rate by improving memory locality.
Performance numbers:
old(s) new(s)
Without debug info:
chrome 7.178 6.432 (-11.5%)
LLVMgold.so 0.505 0.502 (-0.5%)
clang 0.954 0.827 (-15.4%)
llvm-as 0.052 0.045 (-15.5%)
With debug info:
scylla 5.695 5.613 (-1.5%)
clang 14.396 14.143 (-1.8%)
Performance counter results show that the fewer required indirections is
indeed the cause of the improved performance. For example, when linking
chrome, stalled cycles decreases from 14,556,444,002 to 12,959,238,310, and
instructions per cycle increases from 0.78 to 0.83. We are also executing
many fewer instructions (15,516,401,933 down to 15,002,434,310), probably
because we spend less time allocating SymbolBodies.
The new mechanism by which symbols are added to the symbol table is by calling
add* functions on the SymbolTable.
In this patch, I handle local symbols by storing them inside "unparented"
SymbolBodies. This is suboptimal, but if we do want to try to avoid allocating
these SymbolBodies, we can probably do that separately.
I also removed a few members from the SymbolBody class that were only being
used to pass information from the input file to the symbol table.
This patch implements the new design for the ELF linker only. I intend to
prepare a similar patch for the COFF linker.
[1] http://lists.llvm.org/pipermail/llvm-dev/2016-April/098832.html
Differential Revision: http://reviews.llvm.org/D19752
llvm-svn: 268178
2016-05-01 04:55:03 +00:00
|
|
|
|
2022-09-28 20:01:41 -07:00
|
|
|
template <typename T> Symbol *addSymbol(const T &newSym) {
|
|
|
|
Symbol *sym = insert(newSym.getName());
|
2024-10-11 23:34:43 -07:00
|
|
|
sym->resolve(ctx, newSym);
|
2022-09-28 20:01:41 -07:00
|
|
|
return sym;
|
|
|
|
}
|
2024-10-11 23:34:43 -07:00
|
|
|
Symbol *addAndCheckDuplicate(Ctx &, const Defined &newSym);
|
2017-02-21 22:32:51 +00:00
|
|
|
|
2016-04-22 20:21:26 +00:00
|
|
|
void scanVersionScript();
|
2016-06-23 07:00:17 +00:00
|
|
|
|
2017-11-03 21:21:47 +00:00
|
|
|
Symbol *find(StringRef name);
|
2016-07-17 17:50:09 +00:00
|
|
|
|
2017-09-08 18:16:59 +00:00
|
|
|
void handleDynamicList();
|
|
|
|
|
2024-03-25 16:11:21 -07:00
|
|
|
Symbol *addUnusedUndefined(StringRef name,
|
|
|
|
uint8_t binding = llvm::ELF::STB_GLOBAL);
|
|
|
|
|
[ELF] Support --{,no-}allow-shlib-undefined
Summary:
In ld.bfd/gold, --no-allow-shlib-undefined is the default when linking
an executable. This patch implements a check to error on undefined
symbols in a shared object, if all of its DT_NEEDED entries are seen.
Our approach resembles the one used in gold, achieves a good balance to
be useful but not too smart (ld.bfd traces all DSOs and emulates the
behavior of a dynamic linker to catch more cases).
The error is issued based on the symbol table, different from undefined
reference errors issued for relocations. It is most effective when there
are DSOs that were not linked with -z defs (e.g. when static sanitizers
runtime is used).
gold has a comment that some system libraries on GNU/Linux may have
spurious undefined references and thus system libraries should be
excluded (https://sourceware.org/bugzilla/show_bug.cgi?id=6811). The
story may have changed now but we make --allow-shlib-undefined the
default for now. Its interaction with -shared can be discussed in the
future.
Reviewers: ruiu, grimar, pcc, espindola
Reviewed By: ruiu
Subscribers: joerg, emaste, arichardson, llvm-commits
Differential Revision: https://reviews.llvm.org/D57385
llvm-svn: 352826
2019-02-01 02:25:05 +00:00
|
|
|
// Set of .so files to not link the same shared object file more than once.
|
2022-01-16 21:19:01 -08:00
|
|
|
llvm::DenseMap<llvm::CachedHashStringRef, SharedFile *> soNames;
|
[ELF] Support --{,no-}allow-shlib-undefined
Summary:
In ld.bfd/gold, --no-allow-shlib-undefined is the default when linking
an executable. This patch implements a check to error on undefined
symbols in a shared object, if all of its DT_NEEDED entries are seen.
Our approach resembles the one used in gold, achieves a good balance to
be useful but not too smart (ld.bfd traces all DSOs and emulates the
behavior of a dynamic linker to catch more cases).
The error is issued based on the symbol table, different from undefined
reference errors issued for relocations. It is most effective when there
are DSOs that were not linked with -z defs (e.g. when static sanitizers
runtime is used).
gold has a comment that some system libraries on GNU/Linux may have
spurious undefined references and thus system libraries should be
excluded (https://sourceware.org/bugzilla/show_bug.cgi?id=6811). The
story may have changed now but we make --allow-shlib-undefined the
default for now. Its interaction with -shared can be discussed in the
future.
Reviewers: ruiu, grimar, pcc, espindola
Reviewed By: ruiu
Subscribers: joerg, emaste, arichardson, llvm-commits
Differential Revision: https://reviews.llvm.org/D57385
llvm-svn: 352826
2019-02-01 02:25:05 +00:00
|
|
|
|
2019-05-22 09:06:42 +00:00
|
|
|
// Comdat groups define "link once" sections. If two comdat groups have the
|
|
|
|
// same name, only one of them is linked, and the other is ignored. This map
|
|
|
|
// is used to uniquify them.
|
|
|
|
llvm::DenseMap<llvm::CachedHashStringRef, const InputFile *> comdatGroups;
|
|
|
|
|
[LLD][ELF] Cortex-M Security Extensions (CMSE) Support
This commit provides linker support for Cortex-M Security Extensions (CMSE).
The specification for this feature can be found in ARM v8-M Security Extensions:
Requirements on Development Tools.
The linker synthesizes a security gateway veneer in a special section;
`.gnu.sgstubs`, when it finds non-local symbols `__acle_se_<entry>` and `<entry>`,
defined relative to the same text section and having the same address. The
address of `<entry>` is retargeted to the starting address of the
linker-synthesized security gateway veneer in section `.gnu.sgstubs`.
In summary, the linker translates input:
```
.text
entry:
__acle_se_entry:
[entry_code]
```
into:
```
.section .gnu.sgstubs
entry:
SG
B.W __acle_se_entry
.text
__acle_se_entry:
[entry_code]
```
If addresses of `__acle_se_<entry>` and `<entry>` are not equal, the linker
considers that `<entry>` already defines a secure gateway veneer so does not
synthesize one.
If `--out-implib=<out.lib>` is specified, the linker writes the list of secure
gateway veneers into a CMSE import library `<out.lib>`. The CMSE import library
will have 3 sections: `.symtab`, `.strtab`, `.shstrtab`. For every secure gateway
veneer <entry> at address `<addr>`, `.symtab` contains a `SHN_ABS` symbol `<entry>` with
value `<addr>`.
If `--in-implib=<in.lib>` is specified, the linker reads the existing CMSE import
library `<in.lib>` and preserves the entry function addresses in the resulting
executable and new import library.
Reviewed By: MaskRay, peter.smith
Differential Revision: https://reviews.llvm.org/D139092
2023-07-06 10:45:10 +01:00
|
|
|
// The Map of __acle_se_<sym>, <sym> pairs found in the input objects.
|
|
|
|
// Key is the <sym> name.
|
|
|
|
llvm::SmallMapVector<StringRef, ArmCmseEntryFunction, 1> cmseSymMap;
|
|
|
|
|
|
|
|
// Map of symbols defined in the Arm CMSE import library. The linker must
|
|
|
|
// preserve the addresses in the output objects.
|
|
|
|
llvm::StringMap<Defined *> cmseImportLib;
|
|
|
|
|
|
|
|
// True if <sym> from the input Arm CMSE import library is written to the
|
|
|
|
// output Arm CMSE import library.
|
|
|
|
llvm::StringMap<bool> inCMSEOutImpLib;
|
|
|
|
|
2015-07-24 21:03:07 +00:00
|
|
|
private:
|
2021-12-23 16:49:38 -08:00
|
|
|
SmallVector<Symbol *, 0> findByVersion(SymbolVersion ver);
|
|
|
|
SmallVector<Symbol *, 0> findAllByVersion(SymbolVersion ver,
|
|
|
|
bool includeNonDefault);
|
2016-11-15 18:41:52 +00:00
|
|
|
|
2021-12-23 16:49:38 -08:00
|
|
|
llvm::StringMap<SmallVector<Symbol *, 0>> &getDemangledSyms();
|
2021-08-04 23:52:55 -07:00
|
|
|
bool assignExactVersion(SymbolVersion ver, uint16_t versionId,
|
|
|
|
StringRef versionName, bool includeNonDefault);
|
|
|
|
void assignWildcardVersion(SymbolVersion ver, uint16_t versionId,
|
|
|
|
bool includeNonDefault);
|
2016-07-16 12:26:39 +00:00
|
|
|
|
2024-09-29 14:45:00 -07:00
|
|
|
Ctx &ctx;
|
|
|
|
|
2023-02-08 10:50:44 -08:00
|
|
|
// Global symbols and a map from symbol name to the index. The order is not
|
|
|
|
// defined. We can use an arbitrary order, but it has to be deterministic even
|
|
|
|
// when cross linking.
|
2017-11-27 23:16:06 +00:00
|
|
|
llvm::DenseMap<llvm::CachedHashStringRef, int> symMap;
|
2021-12-23 16:49:38 -08:00
|
|
|
SmallVector<Symbol *, 0> symVector;
|
2015-09-03 18:56:20 +00:00
|
|
|
|
2016-11-15 18:41:52 +00:00
|
|
|
// A map from demangled symbol names to their symbol objects.
|
|
|
|
// This mapping is 1:N because two symbols with different versions
|
|
|
|
// can have the same name. We use this map to handle "extern C++ {}"
|
|
|
|
// directive in version scripts.
|
2022-11-26 19:19:15 -08:00
|
|
|
std::optional<llvm::StringMap<SmallVector<Symbol *, 0>>> demangledSyms;
|
2015-07-24 21:03:07 +00:00
|
|
|
};
|
|
|
|
|
2022-08-10 15:31:58 -04:00
|
|
|
} // namespace lld::elf
|
2015-07-24 21:03:07 +00:00
|
|
|
|
|
|
|
#endif
|