[lld/mac] Implement support for section$start and section$ end symbols

With this, libclang_rt.profile_osx.a can be linked, that is coverage
and PGO-instrumented builds should now work with lld.

section$start and section$end symbols can create non-existing sections.
They're also undefined symbols that are only magic if there isn't a
regular symbol with their name, which means the need to be handled
in treatUndefined() instead of just looping over all existing
sections and adding start and end symbols like the ELF port does.

To represent the actual symbols, this uses absolute symbols that
get their value updated once an output section is layed out.

segment$start and segment$end are still missing for now, but they produce a
nicer error message after this patch.

Main part of PR50760.

Differential Revision: https://reviews.llvm.org/D106629
This commit is contained in:
Nico Weber 2021-07-15 12:54:42 -04:00
parent a085c23aa3
commit 04e8d0b62d
5 changed files with 363 additions and 1 deletions

View File

@ -16,3 +16,10 @@ using namespace lld::macho;
uint64_t OutputSection::getSegmentOffset() const {
return addr - parent->addr;
}
void OutputSection::assignAddressesToStartEndSymbols() {
for (Defined *d : sectionStartSymbols)
d->value = addr;
for (Defined *d : sectionEndSymbols)
d->value = addr + getSize();
}

View File

@ -9,14 +9,17 @@
#ifndef LLD_MACHO_OUTPUT_SECTION_H
#define LLD_MACHO_OUTPUT_SECTION_H
#include "Symbols.h"
#include "lld/Common/LLVM.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/TinyPtrVector.h"
#include <limits>
namespace lld {
namespace macho {
class Defined;
class InputSection;
class OutputSegment;
@ -62,7 +65,11 @@ public:
virtual void writeTo(uint8_t *buf) const = 0;
void assignAddressesToStartEndSymbols();
StringRef name;
llvm::TinyPtrVector<Defined *> sectionStartSymbols;
llvm::TinyPtrVector<Defined *> sectionEndSymbols;
OutputSegment *parent = nullptr;
// For output sections that don't have explicit ordering requirements, their
// output order should be based on the order of the input sections they

View File

@ -7,9 +7,11 @@
//===----------------------------------------------------------------------===//
#include "SymbolTable.h"
#include "ConcatOutputSection.h"
#include "Config.h"
#include "InputFiles.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
@ -196,7 +198,73 @@ Defined *SymbolTable::addSynthetic(StringRef name, InputSection *isec,
return s;
}
enum class Boundary {
Start,
End,
};
static void handleSectionBoundarySymbol(const Undefined &sym, StringRef segSect,
Boundary which) {
StringRef segName, sectName;
std::tie(segName, sectName) = segSect.split('$');
// Attach the symbol to any InputSection that will end up in the right
// OutputSection -- it doesn't matter which one we pick.
// Don't bother looking through inputSections for a matching
// ConcatInputSection -- we need to create ConcatInputSection for
// non-existing sections anyways, and that codepath works even if we should
// already have a ConcatInputSection with the right name.
OutputSection *osec = nullptr;
// This looks for __TEXT,__cstring etc.
for (SyntheticSection *ssec : syntheticSections)
if (ssec->segname == segName && ssec->name == sectName) {
osec = ssec->isec->parent;
break;
}
if (!osec) {
ConcatInputSection *isec = make<ConcatInputSection>(segName, sectName);
// This runs after markLive() and is only called for Undefineds that are
// live. Marking the isec live ensures an OutputSection is created that the
// start/end symbol can refer to.
assert(sym.isLive());
isec->live = true;
// This runs after gatherInputSections(), so need to explicitly set parent
// and add to inputSections.
osec = isec->parent = ConcatOutputSection::getOrCreateForInput(isec);
inputSections.push_back(isec);
}
Defined *boundarySym = symtab->addSynthetic(
sym.getName(), /*isec=*/nullptr, /*value=*/-1, /*isPrivateExtern=*/true,
/*includeInSymtab=*/false, /*referencedDynamically=*/false);
if (which == Boundary::Start)
osec->sectionStartSymbols.push_back(boundarySym);
else
osec->sectionEndSymbols.push_back(boundarySym);
}
static void handleSegmentBoundarySymbol(const Undefined &sym, StringRef segName,
Boundary which) {
// FIXME
error("segment$start$ and segment$end$ symbols are not yet implemented");
}
void lld::macho::treatUndefinedSymbol(const Undefined &sym, StringRef source) {
// Handle start/end symbols.
StringRef name = sym.getName();
if (name.consume_front("section$start$"))
return handleSectionBoundarySymbol(sym, name, Boundary::Start);
if (name.consume_front("section$end$"))
return handleSectionBoundarySymbol(sym, name, Boundary::End);
if (name.consume_front("segment$start$"))
return handleSegmentBoundarySymbol(sym, name, Boundary::Start);
if (name.consume_front("segment$end$"))
return handleSegmentBoundarySymbol(sym, name, Boundary::End);
// Handle -U.
if (config->explicitDynamicLookups.count(sym.getName())) {
symtab->addDynamicLookup(sym.getName());

View File

@ -630,7 +630,12 @@ static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec,
void Writer::scanRelocations() {
TimeTraceScope timeScope("Scan relocations");
for (ConcatInputSection *isec : inputSections) {
// This can't use a for-each loop: It calls treatUndefinedSymbol(), which can
// add to inputSections, which invalidates inputSections's iterators.
for (size_t i = 0; i < inputSections.size(); ++i) {
ConcatInputSection *isec = inputSections[i];
if (isec->shouldOmitFromOutput())
continue;
@ -1029,6 +1034,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
osec->addr = addr;
osec->fileOff = isZeroFill(osec->flags) ? 0 : fileOff;
osec->finalize();
osec->assignAddressesToStartEndSymbols();
addr += osec->getSize();
fileOff += osec->getFileSize();

274
lld/test/MachO/start-end.s Normal file
View File

@ -0,0 +1,274 @@
# REQUIRES: x86
## FIXME: Add tests for segment$start$foo, segment$end$foo once implemented.
# RUN: rm -rf %t; split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/main.s -o %t/main.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o
# RUN: %lld -lSystem %t/main.o %t/foo.o -o %t.out \
# RUN: -rename_section __FOO __bar __BAZ __quux \
# RUN: -rename_section __WHAT __ever __FOO __bar \
# RUN: -u 'section$start$__UFLAG_SEG$__uflag_sect' \
# RUN: -U 'section$start$__DYNAMIC$__lookup' \
# RUN: -U 'section$start$__DYNAMIC$__unref' \
# RUN: -e 'section$start$__TEXT$__text'
# RUN: llvm-objdump --macho --syms --section-headers %t.out > %t-dump.txt
# RUN: llvm-objdump --macho -d --no-symbolic-operands --no-show-raw-insn %t.out >> %t-dump.txt
# RUN: llvm-objdump --macho --function-starts %t.out >> %t-dump.txt
# RUN: FileCheck %s < %t-dump.txt
## Setting the entry point to the start of the __text section should
## set it to _main, since that's the first function in that section.
# RUN: llvm-objdump --macho --syms --all-headers %t.out \
# RUN: | FileCheck --check-prefix=MAINENTRY %s
# MAINENTRY: [[#%x, MAINADDR:]] g F __TEXT,__text _main
# MAINENTRY: LC_MAIN
# MAINENTRY-NEXT: cmdsize
# MAINENTRY-NEXT: entryoff [[#%d, MAINADDR - 0x100000000]]
## Nothing should change if we reorder two functions in the text segment.
## (Reorder some section$start/end symbols too for good measure.)
# RUN: %lld -lSystem %t/main.o %t/foo.o -o %t.ordered.out \
# RUN: -order_file %t/order.txt \
# RUN: -rename_section __FOO __bar __BAZ __quux \
# RUN: -rename_section __WHAT __ever __FOO __bar \
# RUN: -u 'section$start$__UFLAG_SEG$__uflag_sect' \
# RUN: -U 'section$start$__DYNAMIC$__lookup' \
# RUN: -U 'section$start$__DYNAMIC$__unref' \
# RUN: -e 'section$start$__TEXT$__text'
# RUN: llvm-objdump --macho --syms --section-headers %t.ordered.out > %t-ordered-dump.txt
# RUN: llvm-objdump --macho -d --no-symbolic-operands --no-show-raw-insn %t.ordered.out >> %t-ordered-dump.txt
# RUN: llvm-objdump --macho --function-starts %t.out >> %t-ordered-dump.txt
# RUN: FileCheck %s < %t-ordered-dump.txt
## `-undefined dynamic_lookup` also shouldn't change anything.
# RUN: %lld -lSystem %t/main.o %t/foo.o -o %t.dl.out -undefined dynamic_lookup \
# RUN: -rename_section __FOO __bar __BAZ __quux \
# RUN: -rename_section __WHAT __ever __FOO __bar \
# RUN: -u 'section$start$__UFLAG_SEG$__uflag_sect' \
# RUN: -U 'section$start$__DYNAMIC$__lookup' \
# RUN: -U 'section$start$__DYNAMIC$__unref' \
# RUN: -e 'section$start$__TEXT$__text'
# RUN: llvm-objdump --macho --syms --section-headers %t.dl.out > %t-dump.dl.txt
# RUN: llvm-objdump --macho -d --no-symbolic-operands --no-show-raw-insn %t.dl.out >> %t-dump.dl.txt
# RUN: llvm-objdump --macho --function-starts %t.out >> %t-dump.dl.txt
# RUN: FileCheck %s < %t-dump.dl.txt
## ...except that the entry point is now _otherfun instead of _main since
## _otherfun is now at the start of the __text section.
# RUN: llvm-objdump --macho --syms --all-headers %t.ordered.out \
# RUN: | FileCheck --check-prefix=OTHERENTRY %s
# OTHERENTRY: [[#%x, OTHERADDR:]] g F __TEXT,__text _otherfun
# OTHERENTRY: LC_MAIN
# OTHERENTRY-NEXT: cmdsize
# OTHERENTRY-NEXT: entryoff [[#%d, OTHERADDR - 0x100000000]]
## Test that the link succeeds with dead-stripping enabled too.
# RUN: %lld -dead_strip -lSystem %t/main.o -o %t/stripped.out
## (Fun fact: `-e 'section$start$__TEXT$__text -dead_strip` strips
## everything in the text section because markLive runs well before
## section$start symbols are replaced, so the entry point is just
## an undefined symbol that keeps nothing alive, and then later it
## sets the entry point to the start of the now-empty text section
## and the output program crashes when running. This matches ld64's
## behavior.)
# CHECK-LABEL: Sections:
# CHECK-NEXT: Idx Name Size VMA Type
# CHECK: 0 __text {{[0-9a-f]*}} [[#%x, TEXTSTART:]] TEXT
# CHECK: 1 __aftertext {{[0-9a-f]*}} [[#%x, TEXTEND:]]
# CHECK: 2 __cstring {{[0-9a-f]*}} [[#%x, CSTRINGSTART:]] DATA
# CHECK: 3 __aftercstring {{[0-9a-f]*}} [[#%x, CSTRINGEND:]]
# CHECK: 4 __data 00000008 [[#%x, DATASTART:]] DATA
# CHECK: 5 __llvm_orderfile 00000000 [[#%x, LLVMORDERFILESTART:]] DATA
# CHECK: 6 __mybss 00008000 [[#%x, MYBSSSTART:]] BSS
# CHECK: 7 __quux 0000002a [[#%x, QUUXSTART:]]
# CHECK: 8 __bar 00000059 [[#%x, BARSTART:]]
# CHECK: 9 __uflag_sect 00000000
# CHECK: 10 __lookup 00000000
# CHECK-NOT: symbol
# CHECK-NOT: __unref
# CHECK-LABEL: SYMBOL TABLE:
# CHECK-NOT: section$start$__TEXT$__text
# CHECK-NOT: section$end$__TEXT$__text
# CHECK-NOT: section$start$__TEXT$__cstring
# CHECK-NOT: section$end$__TEXT$__cstring
# CHECK-NOT: section$start$__DATA$__data
# CHECK-NOT: section$end$__DATA$__data
# CHECK-NOT: section$start$__DATA$__llvm_orderfile
# CHECK-NOT: section$end$__DATA$__llvm_orderfile
# CHECK-NOT: section$start$__DYNAMIC$__lookup
# CHECK-NOT: section$start$__DYNAMIC$__unref
# CHECK: section$end$ACTUAL$symbol
# CHECK: section$start$ACTUAL$symbol
# CHECK-LABEL: _main:
## The CHECK-SAMEs work around FileCheck's
## "error: numeric variable 'PC2' defined earlier in the same CHECK directive"
## limitation.
## The 7s are the length of a leaq instruction.
## section$start$__TEXT$__text / section$end$__TEXT$__text
# CHECK: [[#%x, PC1:]]:
# CHECK-SAME: leaq [[#%d, TEXTSTART - PC1 - 7]](%rip), %rax
# CHECK-NEXT: [[#%x, PC2:]]:
# CHECK-SAME: leaq [[#%d, TEXTEND - PC2 - 7]](%rip), %rbx
## section$start$__TEXT$__cstring / section$end$__TEXT$__cstring
# CHECK: [[#%x, PC3:]]:
# CHECK-SAME: leaq [[#%d, CSTRINGSTART - PC3 - 7]](%rip), %rax
# CHECK-NEXT: [[#%x, PC4:]]:
# CHECK-SAME: leaq [[#%d, CSTRINGEND - PC4 - 7]](%rip), %rbx
## section$start$__DATA$__data / section$end$__DATA$__data
# CHECK: [[#%x, PC5:]]:
# CHECK-SAME: leaq [[#%d, DATASTART - PC5 - 7]](%rip), %rax
# CHECK-NEXT: [[#%x, PC6:]]:
# CHECK-SAME: leaq [[#%d, DATASTART + 8 - PC6 - 7]](%rip), %rbx
## section$start$__MYBSS$__mybss / section$end$__MYBSS$__mybss
# CHECK: [[#%x, PC7:]]:
# CHECK-SAME: leaq [[#%d, MYBSSSTART - PC7 - 7]](%rip), %rax
# CHECK-NEXT: [[#%x, PC8:]]:
# CHECK-SAME: leaq [[#%d, MYBSSSTART + 0x8000 - PC8 - 7]](%rip), %rbx
## section$start$__DATA$__llvm_orderfile / section$end$__DATA$__llvm_orderfile
## This section has size 0.
# CHECK: [[#%x, PC9:]]:
# CHECK-SAME: leaq [[#%d, LLVMORDERFILESTART - PC9 - 7]](%rip), %rax
# CHECK-NEXT: [[#%x, PC10:]]:
# CHECK-SAME: leaq [[#%d, LLVMORDERFILESTART - PC10 - 7]](%rip), %rbx
## Secton-rename tests.
## Input section __FOO/__bar is renamed to output section
## __BAZ/__quux by a -rename_section flag.
## section$start$__FOO$__bar ends up referring to the __BAZ/__quux section.
# CHECK: [[#%x, PC11:]]:
# CHECK-SAME: leaq [[#%d, QUUXSTART - PC11 - 7]](%rip), %rax
# CHECK-NEXT: [[#%x, PC12:]]:
# CHECK-SAME: leaq [[#%d, QUUXSTART + 42 - PC12 - 7]](%rip), %rbx
## section$start$__BAZ$__quux also refers to the __BAZ/__quux section.
# CHECK: [[#%x, PC13:]]:
# CHECK-SAME: leaq [[#%d, QUUXSTART - PC13 - 7]](%rip), %rax
# CHECK-NEXT: [[#%x, PC14:]]:
# CHECK-SAME: leaq [[#%d, QUUXSTART + 42 - PC14 - 7]](%rip), %rbx
## Input section __WHAT/__ever is renamed to output section
## __FOO/__bar by a -rename_section flag.
## section$start$__WHAT$__ever ends up referring to the __FOO/__bar section.
# CHECK: [[#%x, PC15:]]:
# CHECK-SAME: leaq [[#%d, BARSTART - PC15 - 7]](%rip), %rax
# CHECK-NEXT: [[#%x, PC16:]]:
# CHECK-SAME: leaq [[#%d, BARSTART + 89 - PC16 - 7]](%rip), %rbx
## The function_starts section should not have an entry for the
## section$end$__TEXT$__text symbol.
# CHECK: [[#%.16x, TEXTSTART]]
# CHECK-NOT: [[#%.16x, TEXTEND]]
#--- order.txt
_otherfun
_main
section$end$__TEXT$__text
section$start$__TEXT$__text
#--- main.s
.zerofill __MYBSS,__mybss,_zero_foo,0x8000
.globl section$start$ACTUAL$symbol
.globl section$end$ACTUAL$symbol
## Renamed to __BAZ,__quux by -rename_section
.section __FOO,__bar
.space 42
## Renamed to __FOO,__bar by -rename_section
.section __WHAT,__ever
.space 89
.text
.globl _main
_main:
# Basics: start/end of existing, normal sections.
# For __TEXT/__text, these magic symbols shouldn't be
# included in __function_starts
movq section$start$__TEXT$__text@GOTPCREL(%rip), %rax
movq section$end$__TEXT$__text@GOTPCREL(%rip), %rbx
# __TEXT/__cstring are interesting because they're not ConcatInputSections.
movq section$start$__TEXT$__cstring@GOTPCREL(%rip), %rax
movq section$end$__TEXT$__cstring@GOTPCREL(%rip), %rbx
# Vanilla __DATA/__data
movq section$start$__DATA$__data@GOTPCREL(%rip), %rax
movq section$end$__DATA$__data@GOTPCREL(%rip), %rbx
# Vanilla zerofill.
movq section$start$__MYBSS$__mybss@GOTPCREL(%rip), %rax
movq section$end$__MYBSS$__mybss@GOTPCREL(%rip), %rbx
# Referring to a non-existent section wills it into existence.
# This is needed for e.g. __DATA/__llvm_orderfile in libclang_rt.profile.
# This means `-u` can be used as a janky `-sectcreate`.
movq section$start$__DATA$__llvm_orderfile@GOTPCREL(%rip), %rax
movq section$end$__DATA$__llvm_orderfile@GOTPCREL(%rip), %rbx
# Section-rename tests.
movq section$start$__FOO$__bar@GOTPCREL(%rip), %rax
movq section$end$__FOO$__bar@GOTPCREL(%rip), %rbx
movq section$start$__BAZ$__quux@GOTPCREL(%rip), %rax
movq section$end$__BAZ$__quux@GOTPCREL(%rip), %rbx
movq section$start$__WHAT$__ever@GOTPCREL(%rip), %rax
movq section$end$__WHAT$__ever@GOTPCREL(%rip), %rbx
# If there are actual symbols with the magic names, the magic
# names lose their magic and just refer to those symbols (and
# no section is implicitly created for them).
movq section$start$ACTUAL$symbol@GOTPCREL(%rip), %rax
movq section$end$ACTUAL$symbol@GOTPCREL(%rip), %rbx
# -U section$start is not exported as dynamic_lookup, it just
# creates a section like -u.
movq section$start$__DYNAMIC$__lookup@GOTPCREL(%rip), %rax
movq section$end$__DYNAMIC$__lookup@GOTPCREL(%rip), %rbx
ret
.globl _otherfun
_otherfun:
ret
.section __TEXT,__aftertext
.fill 1
.cstring
.asciz "foo"
.asciz "barbaz"
.section __TEXT,__aftercstring
.fill 1
.data
.quad 0x1234
.subsections_via_symbols
#--- foo.s
.text
.globl section$start$ACTUAL$symbol
section$start$ACTUAL$symbol:
.fill 1
.globl section$end$ACTUAL$symbol
section$end$ACTUAL$symbol:
.fill 1
.subsections_via_symbols