diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 59460105f231..c2ff2ce3df17 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -680,6 +680,15 @@ public: /// the execution of the binary is completed. std::optional FiniFunctionAddress; + /// DT_FINI. + std::optional FiniAddress; + + /// DT_FINI_ARRAY. Only used when DT_FINI is not set. + std::optional FiniArrayAddress; + + /// DT_FINI_ARRAYSZ. Only used when DT_FINI is not set. + std::optional FiniArraySize; + /// Page alignment used for code layout. uint64_t PageAlign{HugePageSize}; diff --git a/bolt/include/bolt/Core/BinarySection.h b/bolt/include/bolt/Core/BinarySection.h index 326d088d1f04..92ab6ea0d38e 100644 --- a/bolt/include/bolt/Core/BinarySection.h +++ b/bolt/include/bolt/Core/BinarySection.h @@ -375,8 +375,12 @@ public: /// Add a dynamic relocation at the given /p Offset. void addDynamicRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type, uint64_t Addend, uint64_t Value = 0) { - assert(Offset < getSize() && "offset not within section bounds"); - DynamicRelocations.emplace(Relocation{Offset, Symbol, Type, Addend, Value}); + addDynamicRelocation(Relocation{Offset, Symbol, Type, Addend, Value}); + } + + void addDynamicRelocation(const Relocation &Reloc) { + assert(Reloc.Offset < getSize() && "offset not within section bounds"); + DynamicRelocations.emplace(Reloc); } /// Add relocation against the original contents of this section. @@ -410,6 +414,18 @@ public: return Itr != DynamicRelocations.end() ? &*Itr : nullptr; } + std::optional takeDynamicRelocationAt(uint64_t Offset) { + Relocation Key{Offset, 0, 0, 0, 0}; + auto Itr = DynamicRelocations.find(Key); + + if (Itr == DynamicRelocations.end()) + return std::nullopt; + + Relocation Reloc = *Itr; + DynamicRelocations.erase(Itr); + return Reloc; + } + uint64_t hash(const BinaryData &BD) const { std::map Cache; return hash(BD, Cache); diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h index 2a421c5cfaa4..7cb80d52c770 100644 --- a/bolt/include/bolt/Rewrite/RewriteInstance.h +++ b/bolt/include/bolt/Rewrite/RewriteInstance.h @@ -95,6 +95,15 @@ private: /// from meta data in the file. void discoverFileObjects(); + /// Check whether we should use DT_FINI or DT_FINI_ARRAY for instrumentation. + /// DT_FINI is preferred; DT_FINI_ARRAY is only used when no DT_FINI entry was + /// found. + Error discoverRtFiniAddress(); + + /// If DT_FINI_ARRAY is used for instrumentation, update the relocation of its + /// first entry to point to the instrumentation library's fini address. + void updateRtFiniReloc(); + /// Create and initialize metadata rewriters for this instance. void initializeMetadataManager(); diff --git a/bolt/lib/Core/Relocation.cpp b/bolt/lib/Core/Relocation.cpp index e4d0f26c305f..c9165d973c71 100644 --- a/bolt/lib/Core/Relocation.cpp +++ b/bolt/lib/Core/Relocation.cpp @@ -365,7 +365,9 @@ static uint64_t encodeValueAArch64(uint64_t Type, uint64_t Value, uint64_t PC) { switch (Type) { default: llvm_unreachable("unsupported relocation"); + case ELF::R_AARCH64_ABS16: case ELF::R_AARCH64_ABS32: + case ELF::R_AARCH64_ABS64: break; case ELF::R_AARCH64_PREL16: case ELF::R_AARCH64_PREL32: diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index abdbb79e8eb6..6412b1b9095b 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -704,6 +704,10 @@ Error RewriteInstance::run() { adjustCommandLineOptions(); discoverFileObjects(); + if (opts::Instrument && !BC->IsStaticExecutable) + if (Error E = discoverRtFiniAddress()) + return E; + preprocessProfileData(); // Skip disassembling if we have a translation table and we are running an @@ -740,6 +744,9 @@ Error RewriteInstance::run() { updateMetadata(); + if (opts::Instrument && !BC->IsStaticExecutable) + updateRtFiniReloc(); + if (opts::LinuxKernelMode) { errs() << "BOLT-WARNING: not writing the output file for Linux Kernel\n"; return Error::success(); @@ -1280,6 +1287,77 @@ void RewriteInstance::discoverFileObjects() { registerFragments(); } +Error RewriteInstance::discoverRtFiniAddress() { + // Use DT_FINI if it's available. + if (BC->FiniAddress) { + BC->FiniFunctionAddress = BC->FiniAddress; + return Error::success(); + } + + if (!BC->FiniArrayAddress || !BC->FiniArraySize) { + return createStringError( + std::errc::not_supported, + "Instrumentation needs either DT_FINI or DT_FINI_ARRAY"); + } + + if (*BC->FiniArraySize < BC->AsmInfo->getCodePointerSize()) { + return createStringError(std::errc::not_supported, + "Need at least 1 DT_FINI_ARRAY slot"); + } + + ErrorOr FiniArraySection = + BC->getSectionForAddress(*BC->FiniArrayAddress); + if (auto EC = FiniArraySection.getError()) + return errorCodeToError(EC); + + if (const Relocation *Reloc = FiniArraySection->getDynamicRelocationAt(0)) { + BC->FiniFunctionAddress = Reloc->Addend; + return Error::success(); + } + + if (const Relocation *Reloc = FiniArraySection->getRelocationAt(0)) { + BC->FiniFunctionAddress = Reloc->Value; + return Error::success(); + } + + return createStringError(std::errc::not_supported, + "No relocation for first DT_FINI_ARRAY slot"); +} + +void RewriteInstance::updateRtFiniReloc() { + // Updating DT_FINI is handled by patchELFDynamic. + if (BC->FiniAddress) + return; + + const RuntimeLibrary *RT = BC->getRuntimeLibrary(); + if (!RT || !RT->getRuntimeFiniAddress()) + return; + + assert(BC->FiniArrayAddress && BC->FiniArraySize && + "inconsistent .fini_array state"); + + ErrorOr FiniArraySection = + BC->getSectionForAddress(*BC->FiniArrayAddress); + assert(FiniArraySection && ".fini_array removed"); + + if (std::optional Reloc = + FiniArraySection->takeDynamicRelocationAt(0)) { + assert(Reloc->Addend == BC->FiniFunctionAddress && + "inconsistent .fini_array dynamic relocation"); + Reloc->Addend = RT->getRuntimeFiniAddress(); + FiniArraySection->addDynamicRelocation(*Reloc); + } + + // Update the static relocation by adding a pending relocation which will get + // patched when flushPendingRelocations is called in rewriteFile. Note that + // flushPendingRelocations will calculate the value to patch as + // "Symbol + Addend". Since we don't have a symbol, just set the addend to the + // desired value. + FiniArraySection->addPendingRelocation(Relocation{ + /*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(), + /*Addend*/ RT->getRuntimeFiniAddress(), /*Value*/ 0}); +} + void RewriteInstance::registerFragments() { if (!BC->HasSplitFunctions) return; @@ -5135,7 +5213,13 @@ Error RewriteInstance::readELFDynamic(ELFObjectFile *File) { } break; case ELF::DT_FINI: - BC->FiniFunctionAddress = Dyn.getPtr(); + BC->FiniAddress = Dyn.getPtr(); + break; + case ELF::DT_FINI_ARRAY: + BC->FiniArrayAddress = Dyn.getPtr(); + break; + case ELF::DT_FINI_ARRAYSZ: + BC->FiniArraySize = Dyn.getPtr(); break; case ELF::DT_RELA: DynamicRelocationsAddress = Dyn.getPtr(); diff --git a/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp b/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp index c6c284a3f784..cd1b975be7b9 100644 --- a/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp +++ b/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp @@ -65,13 +65,6 @@ void InstrumentationRuntimeLibrary::adjustCommandLineOptions( exit(1); } - if (!BC.FiniFunctionAddress && !BC.IsStaticExecutable) { - errs() << "BOLT-ERROR: input binary lacks DT_FINI entry in the dynamic " - "section but instrumentation currently relies on patching " - "DT_FINI to write the profile\n"; - exit(1); - } - if ((opts::InstrumentationWaitForks || opts::InstrumentationSleepTime) && opts::InstrumentationFileAppendPID) { errs() diff --git a/bolt/test/AArch64/hook-fini.s b/bolt/test/AArch64/hook-fini.s new file mode 100644 index 000000000000..a07187c2ef89 --- /dev/null +++ b/bolt/test/AArch64/hook-fini.s @@ -0,0 +1,103 @@ +## Test the different ways of hooking the fini function for instrumentation (via +## DT_FINI and via DT_FINI_ARRAY). We test the latter for both PIE and non-PIE +## binaries because of the different ways of handling relocations (static or +## dynamic). +## All tests perform the following steps: +## - Compile and link for the case to be tested +## - Some sanity-checks on the dynamic section and relocations in the binary to +## verify it has the shape we want for testing: +## - DT_FINI or DT_FINI_ARRAY in dynamic section +## - No relative relocations for non-PIE +## - Instrument +## - Verify generated binary +# REQUIRES: system-linux,bolt-runtime,target=aarch64{{.*}} + +# RUN: %clang %cflags -pie %s -Wl,-q -o %t.exe +# RUN: llvm-readelf -d %t.exe | FileCheck --check-prefix=DYN-FINI %s +# RUN: llvm-readelf -r %t.exe | FileCheck --check-prefix=RELOC-PIE %s +# RUN: llvm-bolt %t.exe -o %t --instrument +# RUN: llvm-readelf -drs %t | FileCheck --check-prefix=CHECK-FINI %s + +# RUN: %clang %cflags -pie %s -Wl,-q,-fini=0 -o %t-no-fini.exe +# RUN: llvm-readelf -d %t-no-fini.exe | FileCheck --check-prefix=DYN-NO-FINI %s +# RUN: llvm-readelf -r %t-no-fini.exe | FileCheck --check-prefix=RELOC-PIE %s +# RUN: llvm-bolt %t-no-fini.exe -o %t-no-fini --instrument +# RUN: llvm-readelf -drs %t-no-fini | FileCheck --check-prefix=CHECK-NO-FINI %s +# RUN: llvm-readelf -ds -x .fini_array %t-no-fini | FileCheck --check-prefix=CHECK-NO-FINI-RELOC %s + +## Create a dummy shared library to link against to force creation of the dynamic section. +# RUN: %clang %cflags %p/../Inputs/stub.c -fPIC -shared -o %t-stubs.so +# RUN: %clang %cflags %s -no-pie -Wl,-q,-fini=0 %t-stub.so -o %t-no-pie-no-fini.exe +# RUN: llvm-readelf -r %t-no-pie-no-fini.exe | FileCheck --check-prefix=RELOC-NO-PIE %s +# RUN: llvm-bolt %t-no-pie-no-fini.exe -o %t-no-pie-no-fini --instrument +# RUN: llvm-readelf -ds -x .fini_array %t-no-pie-no-fini | FileCheck --check-prefix=CHECK-NO-PIE-NO-FINI %s + +## With fini: dynamic section should contain DT_FINI +# DYN-FINI: (FINI) + +## Without fini: dynamic section should only contain DT_FINI_ARRAY +# DYN-NO-FINI-NOT: (FINI) +# DYN-NO-FINI: (FINI_ARRAY) +# DYN-NO-FINI: (FINI_ARRAYSZ) + +## With PIE: binary should have relative relocations +# RELOC-PIE: R_AARCH64_RELATIVE + +## Without PIE: binary should not have relative relocations +# RELOC-NO-PIE-NOT: R_AARCH64_RELATIVE + +## Check that DT_FINI is set to __bolt_runtime_fini +# CHECK-FINI: Dynamic section at offset {{.*}} contains {{.*}} entries: +# CHECK-FINI-DAG: (FINI) 0x[[FINI:[[:xdigit:]]+]] +# CHECK-FINI-DAG: (FINI_ARRAY) 0x[[FINI_ARRAY:[[:xdigit:]]+]] +## Check that the dynamic relocation at .fini_array was not patched +# CHECK-FINI: Relocation section '.rela.dyn' at offset {{.*}} contains {{.*}} entries +# CHECK-FINI-NOT: {{0+}}[[FINI_ARRAY]] {{.*}} R_AARCH64_RELATIVE [[FINI]] +# CHECK-FINI: Symbol table '.symtab' contains {{.*}} entries: +# CHECK-FINI: {{0+}}[[FINI]] {{.*}} __bolt_runtime_fini + +## Check that DT_FINI_ARRAY has a dynamic relocation for __bolt_runtime_fini +# CHECK-NO-FINI: Dynamic section at offset {{.*}} contains {{.*}} entries: +# CHECK-NO-FINI-NOT: (FINI) +# CHECK-NO-FINI: (FINI_ARRAY) 0x[[FINI_ARRAY:[[:xdigit:]]+]] +# CHECK-NO-FINI: Relocation section '.rela.dyn' at offset {{.*}} contains {{.*}} entries +# CHECK-NO-FINI: {{0+}}[[FINI_ARRAY]] {{.*}} R_AARCH64_RELATIVE [[FINI_ADDR:[[:xdigit:]]+]] +# CHECK-NO-FINI: Symbol table '.symtab' contains {{.*}} entries: +# CHECK-NO-FINI: {{0+}}[[FINI_ADDR]] {{.*}} __bolt_runtime_fini + +## Check that the static relocation in .fini_array is patched even for PIE +# CHECK-NO-FINI-RELOC: Dynamic section at offset {{.*}} contains {{.*}} entries: +# CHECK-NO-FINI-RELOC: (FINI_ARRAY) 0x[[FINI_ARRAY:[[:xdigit:]]+]] +# CHECK-NO-FINI-RELOC: Symbol table '.symtab' contains {{.*}} entries: +## Read bytes separately so we can reverse them later +# CHECK-NO-FINI-RELOC: {{0+}}[[FINI_ADDR_B0:[[:xdigit:]]{2}]][[FINI_ADDR_B1:[[:xdigit:]]{2}]][[FINI_ADDR_B2:[[:xdigit:]]{2}]][[FINI_ADDR_B3:[[:xdigit:]]{2}]] {{.*}} __bolt_runtime_fini +# CHECK-NO-FINI-RELOC: Hex dump of section '.fini_array': +# CHECK-NO-FINI-RELOC: 0x{{0+}}[[FINI_ARRAY]] [[FINI_ADDR_B3]][[FINI_ADDR_B2]][[FINI_ADDR_B1]][[FINI_ADDR_B0]] 00000000 + +## Check that DT_FINI_ARRAY has static relocation applied for __bolt_runtime_fini +# CHECK-NO-PIE-NO-FINI: Dynamic section at offset {{.*}} contains {{.*}} entries: +# CHECK-NO-PIE-NO-FINI-NOT: (FINI) +# CHECK-NO-PIE-NO-FINI: (FINI_ARRAY) 0x[[FINI_ARRAY:[a-f0-9]+]] +# CHECK-NO-PIE-NO-FINI: Symbol table '.symtab' contains {{.*}} entries: +## Read address bytes separately so we can reverse them later +# CHECK-NO-PIE-NO-FINI: {{0+}}[[FINI_ADDR_B0:[[:xdigit:]]{2}]][[FINI_ADDR_B1:[[:xdigit:]]{2}]][[FINI_ADDR_B2:[[:xdigit:]]{2}]][[FINI_ADDR_B3:[[:xdigit:]]{2}]] {{.*}} __bolt_runtime_fini +# CHECK-NO-PIE-NO-FINI: Hex dump of section '.fini_array': +# CHECK-NO-PIE-NO-FINI: 0x{{0+}}[[FINI_ARRAY]] [[FINI_ADDR_B3]][[FINI_ADDR_B2]][[FINI_ADDR_B1]][[FINI_ADDR_B0]] 00000000 + + .globl _start + .type _start, %function +_start: + # Dummy relocation to force relocation mode. + .reloc 0, R_AARCH64_NONE + ret +.size _start, .-_start + + .globl _fini + .type _fini, %function +_fini: + ret + .size _fini, .-_fini + + .section .fini_array,"aw" + .align 3 + .dword _fini diff --git a/bolt/test/runtime/AArch64/hook-fini.test b/bolt/test/runtime/AArch64/hook-fini.test new file mode 100644 index 000000000000..8d23b21b6d61 --- /dev/null +++ b/bolt/test/runtime/AArch64/hook-fini.test @@ -0,0 +1,61 @@ +# Test the different ways of hooking the fini function for instrumentation (via +# DT_FINI and via DT_FINI_ARRAY). We test the latter for both PIE and non-PIE +# binaries because of the different ways of handling relocations (static or +# dynamic). +# All tests perform the following steps: +# - Compile and link for the case to be tested +# - Some sanity-checks on the dynamic section and relocations in the binary to +# verify it has the shape we want for testing: +# - DT_FINI or DT_FINI_ARRAY in dynamic section +# - No relative relocations for non-PIE +# - Instrument +# - Run instrumented binary +# - Verify generated profile +REQUIRES: system-linux,bolt-runtime + +RUN: %clang %cflags -pie %p/Inputs/basic-instrumentation.s -Wl,-q -o %t.exe +RUN: llvm-readelf -d %t.exe | FileCheck --check-prefix=DYN-FINI %s +RUN: llvm-readelf -r %t.exe | FileCheck --check-prefix=RELOC-PIE %s +RUN: llvm-bolt %t.exe -o %t --instrument \ +RUN: --instrumentation-file=%t \ +RUN: --instrumentation-file-append-pid +RUN: rm -f %t.*.fdata +RUN: %t +RUN: cat %t.*.fdata | FileCheck %s + +RUN: %clang %cflags -pie %p/Inputs/basic-instrumentation.s -Wl,-q,-fini=0 -o %t-no-fini.exe +RUN: llvm-readelf -d %t-no-fini.exe | FileCheck --check-prefix=DYN-NO-FINI %s +RUN: llvm-readelf -r %t-no-fini.exe | FileCheck --check-prefix=RELOC-PIE %s +RUN: llvm-bolt %t-no-fini.exe -o %t-no-fini --instrument \ +RUN: --instrumentation-file=%t-no-fini \ +RUN: --instrumentation-file-append-pid +RUN: rm -f %t-no-fini.*.fdata +RUN: %t-no-fini +RUN: cat %t-no-fini.*.fdata | FileCheck %s + +RUN: %clang %cflags -no-pie %p/Inputs/basic-instrumentation.s -Wl,-q,-fini=0 -o %t-no-pie-no-fini.exe +RUN: llvm-readelf -d %t-no-pie-no-fini.exe | FileCheck --check-prefix=DYN-NO-FINI %s +RUN: llvm-readelf -r %t-no-pie-no-fini.exe | FileCheck --check-prefix=RELOC-NO-PIE %s +RUN: llvm-bolt %t-no-pie-no-fini.exe -o %t-no-pie-no-fini --instrument \ +RUN: --instrumentation-file=%t-no-pie-no-fini \ +RUN: --instrumentation-file-append-pid +RUN: rm -f %t-no-pie-no-fini.*.fdata +RUN: %t-no-pie-no-fini +RUN: cat %t-no-pie-no-fini.*.fdata | FileCheck %s + +# With fini: dynamic section should contain DT_FINI +DYN-FINI: (FINI) + +# Without fini: dynamic section should only contain DT_FINI_ARRAY +DYN-NO-FINI-NOT: (FINI) +DYN-NO-FINI: (FINI_ARRAY) +DYN-NO-FINI: (FINI_ARRAYSZ) + +# With PIE: binary should have relative relocations +RELOC-PIE: R_AARCH64_RELATIVE + +# Without PIE: binary should not have relative relocations +RELOC-NO-PIE-NOT: R_AARCH64_RELATIVE + +# The instrumented profile should at least say main was called once +CHECK: main 0 0 1{{$}}