ELF: Introduce --randomize-section-padding option.

The --randomize-section-padding option randomly inserts padding between
input sections using the given seed. It is intended to be used in A/B
experiments to determine the average effect of a change on program
performance, while controlling for effects such as false sharing in
the cache which may introduce measurement bias. For more details,
see the RFC:

https://discourse.llvm.org/t/rfc-lld-feature-for-controlling-for-code-size-dependent-measurement-bias/83334

Reviewers: smithp35, MaskRay

Reviewed By: MaskRay, smithp35

Pull Request: https://github.com/llvm/llvm-project/pull/117653
This commit is contained in:
Peter Collingbourne 2024-12-13 11:52:09 -08:00 committed by GitHub
parent 2135babe28
commit 64da33a589
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 208 additions and 2 deletions

View File

@ -320,6 +320,7 @@ struct Config {
bool printGcSections;
bool printIcfSections;
bool printMemoryUsage;
std::optional<uint64_t> randomizeSectionPadding;
bool rejectMismatch;
bool relax;
bool relaxGP;

View File

@ -1410,6 +1410,9 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
ctx.arg.searchPaths = args::getStrings(args, OPT_library_path);
ctx.arg.sectionStartMap = getSectionStartMap(ctx, args);
ctx.arg.shared = args.hasArg(OPT_shared);
if (args.hasArg(OPT_randomize_section_padding))
ctx.arg.randomizeSectionPadding =
args::getInteger(args, OPT_randomize_section_padding, 0);
ctx.arg.singleRoRx = !args.hasFlag(OPT_rosegment, OPT_no_rosegment, true);
ctx.arg.soName = args.getLastArgValue(OPT_soname);
ctx.arg.sortSection = getSortSection(ctx, args);

View File

@ -434,6 +434,9 @@ defm section_start: Eq<"section-start", "Set address of section">,
def shared: F<"shared">, HelpText<"Build a shared object">;
def randomize_section_padding: JJ<"randomize-section-padding=">,
HelpText<"Randomly insert padding between input sections and at the start of each segment using given seed">;
defm soname: Eq<"soname", "Set DT_SONAME">;
defm sort_section:

View File

@ -124,14 +124,14 @@ public:
void sortInitFini();
void sortCtorsDtors();
std::array<uint8_t, 4> getFiller(Ctx &);
// Used for implementation of --compress-debug-sections and
// --compress-sections.
CompressedData compressed;
private:
SmallVector<InputSection *, 0> storage;
std::array<uint8_t, 4> getFiller(Ctx &);
};
struct OutputDesc final : SectionCommand {

View File

@ -2753,6 +2753,21 @@ RelroPaddingSection::RelroPaddingSection(Ctx &ctx)
: SyntheticSection(ctx, ".relro_padding", SHT_NOBITS, SHF_ALLOC | SHF_WRITE,
1) {}
RandomizePaddingSection::RandomizePaddingSection(Ctx &ctx, uint64_t size,
OutputSection *parent)
: SyntheticSection(ctx, ".randomize_padding", SHT_PROGBITS, SHF_ALLOC, 1),
size(size) {
this->parent = parent;
}
void RandomizePaddingSection::writeTo(uint8_t *buf) {
std::array<uint8_t, 4> filler = getParent()->getFiller(ctx);
uint8_t *end = buf + size;
for (; buf + 4 <= end; buf += 4)
memcpy(buf, &filler[0], 4);
memcpy(buf, &filler[0], end - buf);
}
// The string hash function for .gdb_index.
static uint32_t computeGdbHash(StringRef s) {
uint32_t h = 0;

View File

@ -796,6 +796,15 @@ public:
void writeTo(uint8_t *buf) override {}
};
class RandomizePaddingSection final : public SyntheticSection {
uint64_t size;
public:
RandomizePaddingSection(Ctx &ctx, uint64_t size, OutputSection *parent);
size_t getSize() const override { return size; }
void writeTo(uint8_t *buf) override;
};
// Used by the merged DWARF32 .debug_names (a per-module index). If we
// move to DWARF64, most of this data will need to be re-sized.
class DebugNamesBaseSection : public SyntheticSection {

View File

@ -1449,6 +1449,40 @@ static void finalizeSynthetic(Ctx &ctx, SyntheticSection *sec) {
}
}
static bool canInsertPadding(OutputSection *sec) {
StringRef s = sec->name;
return s == ".bss" || s == ".data" || s == ".data.rel.ro" || s == ".lbss" ||
s == ".ldata" || s == ".lrodata" || s == ".ltext" || s == ".rodata" ||
s.starts_with(".text");
}
static void randomizeSectionPadding(Ctx &ctx) {
std::mt19937 g(*ctx.arg.randomizeSectionPadding);
PhdrEntry *curPtLoad = nullptr;
for (OutputSection *os : ctx.outputSections) {
if (!canInsertPadding(os))
continue;
for (SectionCommand *bc : os->commands) {
if (auto *isd = dyn_cast<InputSectionDescription>(bc)) {
SmallVector<InputSection *, 0> tmp;
if (os->ptLoad != curPtLoad) {
tmp.push_back(make<RandomizePaddingSection>(
ctx, g() % ctx.arg.maxPageSize, os));
curPtLoad = os->ptLoad;
}
for (InputSection *isec : isd->sections) {
// Probability of inserting padding is 1 in 16.
if (g() % 16 == 0)
tmp.push_back(
make<RandomizePaddingSection>(ctx, isec->addralign, os));
tmp.push_back(isec);
}
isd->sections = std::move(tmp);
}
}
}
}
// We need to generate and finalize the content that depends on the address of
// InputSections. As the generation of the content may also alter InputSection
// addresses we must converge to a fixed point. We do that here. See the comment
@ -1475,6 +1509,9 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() {
if (ctx.arg.emachine == EM_HEXAGON)
hexagonTLSSymbolUpdate(ctx);
if (ctx.arg.randomizeSectionPadding)
randomizeSectionPadding(ctx);
uint32_t pass = 0, assignPasses = 0;
for (;;) {
bool changed = ctx.target->needsThunks

View File

@ -529,6 +529,19 @@ and
.It Fl -pop-state
Restore the states saved by
.Fl -push-state.
.It Fl -randomize-section-padding Ns = Ns Ar seed
Randomly insert padding between input sections and at the start of each segment using the given seed.
Padding is inserted into output sections with names matching the following patterns:
.Cm .bss ,
.Cm .data ,
.Cm .data.rel.ro ,
.Cm .lbss ,
.Cm .ldata ,
.Cm .lrodata ,
.Cm .ltext ,
.Cm .rodata
and
.Cm .text* .
.It Fl --relax-gp
Enable global pointer relaxation for RISC-V.
.It Fl -relocatable , Fl r

View File

@ -0,0 +1,125 @@
# REQUIRES: x86
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/a.s -o %t/a.o
## --randomize-section-padding= inserts segment offset padding and pre-section
## padding, and does not affect flags. Segment offset padding is only inserted
## when PT_LOAD changes, as shown by .bss size (.data and .bss share a PT_LOAD).
# RUN: ld.lld --randomize-section-padding=6 %t/a.o -o %t/a.out
# RUN: llvm-readelf -sS -x .rodata -x .text -x .data %t/a.out | FileCheck --check-prefix=PAD6 %s
# PAD6: .rodata PROGBITS 0000000000200158 000158 000b8d 00 A 0 0 1
# PAD6-NEXT: .text PROGBITS 0000000000201ce8 000ce8 000270 00 AX 0 0 4
# PAD6-NEXT: .data PROGBITS 0000000000202f58 000f58 000941 00 WA 0 0 1
# PAD6-NEXT: .bss NOBITS 0000000000203899 001899 000003 00 WA 0 0 1
# PAD6: 0000000000203899 0 NOTYPE LOCAL DEFAULT 4 a
# PAD6: 000000000020389a 0 NOTYPE LOCAL DEFAULT 4 b
# PAD6: 000000000020389b 0 NOTYPE LOCAL DEFAULT 4 c
# PAD6: Hex dump of section '.rodata':
# PAD6: 0x00200cd8 00000000 00000000 00000102 03
# PAD6: Hex dump of section '.text':
# PAD6: 0x00201f48 cccccccc cccccccc cccccccc 0405cc06
# PAD6: Hex dump of section '.data':
# PAD6: 0x00203888 00000000 00000000 00000000 00000708
# PAD6: 0x00203898 09
## Size of segment offset padding and location of pre-section padding is
## dependent on the seed.
# RUN: ld.lld --randomize-section-padding=46 %t/a.o -o %t/a.out
# RUN: llvm-readelf -sS -x .rodata -x .text -x .data %t/a.out | FileCheck --check-prefix=PAD46 %s
# PAD46: .rodata PROGBITS 0000000000200158 000158 000cc0 00 A 0 0 1
# PAD46-NEXT: .text PROGBITS 0000000000201e18 000e18 0009bf 00 AX 0 0 4
# PAD46-NEXT: .data PROGBITS 00000000002037d7 0017d7 000540 00 WA 0 0 1
# PAD46-NEXT: .bss NOBITS 0000000000203d17 001d17 000004 00 WA 0 0 1
# PAD46: 0000000000203d17 0 NOTYPE LOCAL DEFAULT 4 a
# PAD46: 0000000000203d18 0 NOTYPE LOCAL DEFAULT 4 b
# PAD46: 0000000000203d1a 0 NOTYPE LOCAL DEFAULT 4 c
# PAD46: Hex dump of section '.rodata':
# PAD46: 0x00200e08 00000000 00000000 00000000 00010203
# PAD46: Hex dump of section '.text':
# PAD46: 0x002027c8 cccccccc cccccccc cccccccc 040506
# PAD46: Hex dump of section '.data':
# PAD46: 0x00203d07 00000000 00000000 00000000 07000809
## When there are multiple InputSectionDescriptions for an output section,
## segment offset padding is inserted in the first InputSectionDescription.
# RUN: ld.lld --randomize-section-padding=46 %t/a.o %t/a.lds -o %t/a.out
# RUN: llvm-readelf -sS -x .rodata -x .text -x .data %t/a.out | FileCheck --check-prefix=PAD46-LDS %s
# PAD46-LDS: .rodata PROGBITS 0000000000000158 000158 000cc0 00 A 0 0 1
# PAD46-LDS-NEXT: .text PROGBITS 0000000000001000 001000 0009c0 00 AX 0 0 4
# PAD46-LDS-NEXT: .data PROGBITS 0000000000002000 002000 000540 00 WA 0 0 1
# PAD46-LDS-NEXT: .bss NOBITS 0000000000002540 002540 000004 00 WA 0 0 1
# PAD46-LDS: 0000000000002543 0 NOTYPE LOCAL DEFAULT 4 a
# PAD46-LDS: 0000000000002541 0 NOTYPE LOCAL DEFAULT 4 b
# PAD46-LDS: 0000000000002540 0 NOTYPE LOCAL DEFAULT 4 c
# PAD46-LDS: Hex dump of section '.rodata':
# PAD46-LDS: 0x00000e08 00000000 00000000 00000000 00030201 ................
# PAD46-LDS: Hex dump of section '.text':
# PAD46-LDS: 0x000019b0 cccccccc cccccccc cccc0605 04cccccc ................
# PAD46-LDS: Hex dump of section '.data':
# PAD46-LDS: 0x00002530 00000000 00000000 00000000 09000807 ................
#--- a.s
.section .rodata.a,"a",@progbits
.byte 1
.section .rodata.b,"a",@progbits
.byte 2
.section .rodata.c,"a",@progbits
.byte 3
.section .text.a,"ax",@progbits
.byte 4
.section .text.b,"ax",@progbits
.byte 5
.section .text.c,"ax",@progbits
.byte 6
.section .data.a,"aw",@progbits
.byte 7
.section .data.b,"aw",@progbits
.byte 8
.section .data.c,"aw",@progbits
.byte 9
.section .bss.a,"a",@nobits
a:
.zero 1
.section .bss.b,"a",@nobits
b:
.zero 1
.section .bss.c,"a",@nobits
c:
.zero 1
#--- a.lds
SECTIONS {
. = SIZEOF_HEADERS;
.rodata : { *(.rodata.c) *(.rodata.b) *(.rodata.a) }
. = ALIGN(CONSTANT(MAXPAGESIZE));
.text : { *(.text.c) *(.text.b) *(.text.a) }
. = ALIGN(CONSTANT(MAXPAGESIZE));
.data : { *(.data.c) *(.data.b) *(.data.a) }
.bss : { *(.bss.c) *(.bss.b) *(.bss.a) }
}