[lld-macho] Remove symbols to __mod_init_func with -init_offsets (#97156)

When `-fixup_chains`/`-init_offsets` is used, a different section,
`__init_offsets` is synthesized from `__mod_init_func`. If there are any
symbols defined inside `__mod_init_func`, they are added to the symbol
table unconditionally while processing the input files. Later, when
querying these symbols' addresses (when constructing the symtab or
exports trie), we crash with a null deref, as there is no output section
assigned to them.

Just making the symbols point to `__init_offsets` is a bad idea, as the
new section stores 32-bit integers instead of 64-bit pointers; accessing
the symbols would not do what the programmer intended. We should
entirely omit them from the output. This is what ld64 and ld-prime do.

This patch uses the same mechanism as dead-stripping to mark these
symbols as not needed in the output. There might be nicer fixes than the
workaround, this is discussed in #97155.

Fixes https://github.com/llvm/llvm-project/pull/79894#issuecomment-1944092892
Fixes #94716
This commit is contained in:
Daniel Bertalan 2024-07-06 15:41:40 +02:00 committed by GitHub
parent 2da0055924
commit d64efe42eb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 43 additions and 2 deletions

View File

@ -1393,6 +1393,12 @@ static void handleExplicitExports() {
}
}
static void eraseInitializerSymbols() {
for (ConcatInputSection *isec : in.initOffsets->inputs())
for (Defined *sym : isec->symbols)
sym->used = false;
}
namespace lld {
namespace macho {
bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
@ -1971,6 +1977,11 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
if (config->deadStrip)
markLive();
// Ensure that no symbols point inside __mod_init_func sections if they are
// removed due to -init_offsets. This must run after dead stripping.
if (config->emitInitOffsets)
eraseInitializerSymbols();
// Categories are not subject to dead-strip. The __objc_catlist section is
// marked as NO_DEAD_STRIP and that propagates into all category data.
if (args.hasArg(OPT_check_category_conflicts))

View File

@ -640,7 +640,17 @@ void Writer::treatSpecialUndefineds() {
static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec,
const lld::macho::Reloc &r) {
assert(sym->isLive());
if (!sym->isLive()) {
if (Defined *defined = dyn_cast<Defined>(sym)) {
if (config->emitInitOffsets &&
defined->isec()->getName() == section_names::moduleInitFunc)
fatal(isec->getLocation(r.offset) + ": cannot reference " +
sym->getName() +
" defined in __mod_init_func when -init_offsets is used");
}
assert(false && "referenced symbol must be live");
}
const RelocAttrs &relocAttrs = target->getRelocAttrs(r.type);
if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) {

View File

@ -12,7 +12,7 @@
# RUN: llvm-objcopy --dump-section=__TEXT,__init_offsets=%t/section.bin %t/out
# RUN: echo "__TEXT,__init_offsets contents:" >> %t/dump.txt
# RUN: od -An -txI %t/section.bin >> %t/dump.txt
# RUN: FileCheck --check-prefix=CONTENT %s < %t/dump.txt
# RUN: FileCheck --check-prefix=CONTENT --implicit-check-not=_init_ptr %s < %t/dump.txt
## This test checks that:
## - __mod_init_func is replaced by __init_offsets.
@ -21,6 +21,7 @@
## command line, and in the order they show up within __mod_init_func.
## - for undefined and dylib symbols, stubs are created, and the offsets point to those.
## - offsets are relative to __TEXT's address, they aren't an absolute virtual address.
## - symbols defined within __mod_init_func are ignored.
# FLAGS: sectname __init_offsets
# FLAGS-NEXT: segname __TEXT
@ -48,6 +49,7 @@
#--- first.s
.globl _first_init, ___isnan, _main
.globl _init_ptr_1
.text
_first_init:
ret
@ -55,6 +57,7 @@ _main:
ret
.section __DATA,__mod_init_func,mod_init_funcs
_init_ptr_1:
.quad _first_init
.quad ___isnan
@ -68,6 +71,7 @@ _second_init:
.section __DATA,__mod_init_func,mod_init_funcs
.quad _undefined
_init_ptr_2:
.quad _second_init
.subsections_via_symbols

View File

@ -0,0 +1,16 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
# RUN: not %lld -lSystem -init_offsets %t.o -o /dev/null 2>&1 | FileCheck %s
# CHECK: error: {{.*}}init-offsets.s.tmp.o:(symbol _main+0x3): cannot reference _init_slot defined in __mod_init_func when -init_offsets is used
.globl _main
.text
_main:
leaq _init_slot(%rip), %rax
.section __DATA,__mod_init_func,mod_init_funcs
_init_slot:
.quad _main