mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-25 19:16:05 +00:00

Previously we would ignore all undefined symbols when using `-shared` or `-pie`. All undefined symbols would be treated as imports regardless of whether those symbols we defined in any shared library. With this change we now track symbol in shared libraries and report undefined symbols in the main program by default. The old behavior is still available via the `--unresolved-symbols=import-dynamic` command line flag. This rationale for allowing this type of breaking change is that `-pie` and `-shared` are both still experimental will warn as such, unless `--experimental-pic` is passed. As part of this change the linker now models shared library symbols via new SharedFunctionSymbol and SharedDataSymbol types. I've also added a new `--no-shlib-sigcheck` option that bypassed the checking of functions signature in shared libraries. This is specifically required by emscripten the case where the imports/exports of shared libraries have been modified by via JS type legalization (this is only needed when targeting old JS engines where bigint is not yet available See https://github.com/emscripten-core/emscripten/issues/18198
233 lines
7.3 KiB
C++
233 lines
7.3 KiB
C++
//===- MarkLive.cpp -------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements --gc-sections, which is a feature to remove unused
|
|
// chunks from the output. Unused chunks are those that are not reachable from
|
|
// known root symbols or chunks. This feature is implemented as a mark-sweep
|
|
// garbage collector.
|
|
//
|
|
// Here's how it works. Each InputChunk has a "Live" bit. The bit is off by
|
|
// default. Starting with the GC-roots, visit all reachable chunks and set their
|
|
// Live bits. The Writer will then ignore chunks whose Live bits are off, so
|
|
// that such chunk are not appear in the output.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "MarkLive.h"
|
|
#include "Config.h"
|
|
#include "InputChunks.h"
|
|
#include "InputElement.h"
|
|
#include "SymbolTable.h"
|
|
#include "Symbols.h"
|
|
|
|
#define DEBUG_TYPE "lld"
|
|
|
|
using namespace llvm;
|
|
using namespace llvm::wasm;
|
|
|
|
namespace lld::wasm {
|
|
|
|
namespace {
|
|
|
|
class MarkLive {
|
|
public:
|
|
void run();
|
|
|
|
private:
|
|
void enqueue(Symbol *sym);
|
|
void enqueue(InputChunk *chunk);
|
|
void enqueueInitFunctions(const ObjFile *sym);
|
|
void enqueueRetainedSegments(const ObjFile *file);
|
|
void mark();
|
|
bool isCallCtorsLive();
|
|
|
|
// A list of chunks to visit.
|
|
SmallVector<InputChunk *, 256> queue;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
void MarkLive::enqueue(Symbol *sym) {
|
|
if (!sym || sym->isLive())
|
|
return;
|
|
LLVM_DEBUG(dbgs() << "markLive: " << sym->getName() << "\n");
|
|
|
|
InputFile *file = sym->getFile();
|
|
bool markImplicitDeps = file && !file->isLive() && sym->isDefined();
|
|
|
|
sym->markLive();
|
|
|
|
if (markImplicitDeps) {
|
|
if (auto obj = dyn_cast<ObjFile>(file)) {
|
|
// Mark as live the ctor functions in the object that defines this symbol.
|
|
// The ctor functions are all referenced by the synthetic callCtors
|
|
// function. However, this function does not contain relocations so we
|
|
// have to manually mark the ctors as live.
|
|
enqueueInitFunctions(obj);
|
|
// Mark retained segments in the object that defines this symbol live.
|
|
enqueueRetainedSegments(obj);
|
|
}
|
|
}
|
|
|
|
if (InputChunk *chunk = sym->getChunk())
|
|
queue.push_back(chunk);
|
|
}
|
|
|
|
void MarkLive::enqueue(InputChunk *chunk) {
|
|
LLVM_DEBUG(dbgs() << "markLive: " << toString(chunk) << "\n");
|
|
chunk->live = true;
|
|
queue.push_back(chunk);
|
|
}
|
|
|
|
// The ctor functions are all referenced by the synthetic callCtors
|
|
// function. However, this function does not contain relocations so we
|
|
// have to manually mark the ctors as live.
|
|
void MarkLive::enqueueInitFunctions(const ObjFile *obj) {
|
|
const WasmLinkingData &l = obj->getWasmObj()->linkingData();
|
|
for (const WasmInitFunc &f : l.InitFunctions) {
|
|
auto *initSym = obj->getFunctionSymbol(f.Symbol);
|
|
if (!initSym->isDiscarded())
|
|
enqueue(initSym);
|
|
}
|
|
}
|
|
|
|
// Mark segments flagged by segment-level no-strip. Segment-level no-strip is
|
|
// usually used to retain segments without having symbol table entry.
|
|
void MarkLive::enqueueRetainedSegments(const ObjFile *file) {
|
|
for (InputChunk *chunk : file->segments)
|
|
if (chunk->isRetained())
|
|
enqueue(chunk);
|
|
}
|
|
|
|
void MarkLive::run() {
|
|
// Add GC root symbols.
|
|
if (!config->entry.empty())
|
|
enqueue(symtab->find(config->entry));
|
|
|
|
// We need to preserve any no-strip or exported symbol
|
|
for (Symbol *sym : symtab->symbols())
|
|
if (sym->isNoStrip() || sym->isExported())
|
|
enqueue(sym);
|
|
|
|
if (WasmSym::callDtors)
|
|
enqueue(WasmSym::callDtors);
|
|
|
|
for (const ObjFile *obj : ctx.objectFiles)
|
|
if (obj->isLive()) {
|
|
// Enqueue constructors in objects explicitly live from the command-line.
|
|
enqueueInitFunctions(obj);
|
|
// Enqueue retained segments in objects explicitly live from the
|
|
// command-line.
|
|
enqueueRetainedSegments(obj);
|
|
}
|
|
|
|
mark();
|
|
|
|
// If we have any non-discarded init functions, mark `__wasm_call_ctors` as
|
|
// live so that we assign it an index and call it.
|
|
if (isCallCtorsLive())
|
|
WasmSym::callCtors->markLive();
|
|
}
|
|
|
|
void MarkLive::mark() {
|
|
// Follow relocations to mark all reachable chunks.
|
|
while (!queue.empty()) {
|
|
InputChunk *c = queue.pop_back_val();
|
|
|
|
for (const WasmRelocation reloc : c->getRelocations()) {
|
|
if (reloc.Type == R_WASM_TYPE_INDEX_LEB)
|
|
continue;
|
|
Symbol *sym = c->file->getSymbol(reloc.Index);
|
|
|
|
// If the function has been assigned the special index zero in the table,
|
|
// the relocation doesn't pull in the function body, since the function
|
|
// won't actually go in the table (the runtime will trap attempts to call
|
|
// that index, since we don't use it). A function with a table index of
|
|
// zero is only reachable via "call", not via "call_indirect". The stub
|
|
// functions used for weak-undefined symbols have this behaviour (compare
|
|
// equal to null pointer, only reachable via direct call).
|
|
if (reloc.Type == R_WASM_TABLE_INDEX_SLEB ||
|
|
reloc.Type == R_WASM_TABLE_INDEX_SLEB64 ||
|
|
reloc.Type == R_WASM_TABLE_INDEX_I32 ||
|
|
reloc.Type == R_WASM_TABLE_INDEX_I64) {
|
|
auto *funcSym = cast<FunctionSymbol>(sym);
|
|
if (funcSym->isStub)
|
|
continue;
|
|
}
|
|
|
|
enqueue(sym);
|
|
}
|
|
}
|
|
}
|
|
|
|
void markLive() {
|
|
if (!config->gcSections)
|
|
return;
|
|
|
|
LLVM_DEBUG(dbgs() << "markLive\n");
|
|
|
|
MarkLive marker;
|
|
marker.run();
|
|
|
|
// Report garbage-collected sections.
|
|
if (config->printGcSections) {
|
|
for (const ObjFile *obj : ctx.objectFiles) {
|
|
for (InputChunk *c : obj->functions)
|
|
if (!c->live)
|
|
message("removing unused section " + toString(c));
|
|
for (InputChunk *c : obj->segments)
|
|
if (!c->live)
|
|
message("removing unused section " + toString(c));
|
|
for (InputGlobal *g : obj->globals)
|
|
if (!g->live)
|
|
message("removing unused section " + toString(g));
|
|
for (InputTag *t : obj->tags)
|
|
if (!t->live)
|
|
message("removing unused section " + toString(t));
|
|
for (InputTable *t : obj->tables)
|
|
if (!t->live)
|
|
message("removing unused section " + toString(t));
|
|
}
|
|
for (InputChunk *c : ctx.syntheticFunctions)
|
|
if (!c->live)
|
|
message("removing unused section " + toString(c));
|
|
for (InputGlobal *g : ctx.syntheticGlobals)
|
|
if (!g->live)
|
|
message("removing unused section " + toString(g));
|
|
for (InputTable *t : ctx.syntheticTables)
|
|
if (!t->live)
|
|
message("removing unused section " + toString(t));
|
|
}
|
|
}
|
|
|
|
bool MarkLive::isCallCtorsLive() {
|
|
// In a reloctable link, we don't call `__wasm_call_ctors`.
|
|
if (config->relocatable)
|
|
return false;
|
|
|
|
// In Emscripten-style PIC, we call `__wasm_call_ctors` which calls
|
|
// `__wasm_apply_data_relocs`.
|
|
if (ctx.isPic)
|
|
return true;
|
|
|
|
// If there are any init functions, mark `__wasm_call_ctors` live so that
|
|
// it can call them.
|
|
for (const ObjFile *file : ctx.objectFiles) {
|
|
const WasmLinkingData &l = file->getWasmObj()->linkingData();
|
|
for (const WasmInitFunc &f : l.InitFunctions) {
|
|
auto *sym = file->getFunctionSymbol(f.Symbol);
|
|
if (!sym->isDiscarded() && sym->isLive())
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
} // namespace lld::wasm
|