//===- bolt/RuntimeLibs/InstrumentationRuntimeLibrary.cpp -----------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the InstrumentationRuntimeLibrary class. // //===----------------------------------------------------------------------===// #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h" #include "bolt/Core/BinaryFunction.h" #include "bolt/Core/JumpTable.h" #include "bolt/Core/Linker.h" #include "bolt/Utils/CommandLineOpts.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/CommandLine.h" using namespace llvm; using namespace bolt; namespace opts { cl::opt RuntimeInstrumentationLib( "runtime-instrumentation-lib", cl::desc("specify path of the runtime instrumentation library"), cl::init("libbolt_rt_instr.a"), cl::cat(BoltOptCategory)); extern cl::opt InstrumentationFileAppendPID; extern cl::opt ConservativeInstrumentation; extern cl::opt InstrumentationFilename; extern cl::opt InstrumentationBinpath; extern cl::opt InstrumentationSleepTime; extern cl::opt InstrumentationNoCountersClear; extern cl::opt InstrumentationWaitForks; extern cl::opt JumpTables; } // namespace opts void InstrumentationRuntimeLibrary::adjustCommandLineOptions( const BinaryContext &BC) const { if (!BC.HasRelocations) { errs() << "BOLT-ERROR: instrumentation runtime libraries require " "relocations\n"; exit(1); } if (opts::JumpTables != JTS_MOVE) { opts::JumpTables = JTS_MOVE; outs() << "BOLT-INFO: forcing -jump-tables=move for instrumentation\n"; } if (!BC.StartFunctionAddress) { errs() << "BOLT-ERROR: instrumentation runtime libraries require a known " "entry point of " "the input binary\n"; exit(1); } if (BC.IsStaticExecutable && !opts::InstrumentationSleepTime) { errs() << "BOLT-ERROR: instrumentation of static binary currently does not " "support profile output on binary finalization, so it " "requires -instrumentation-sleep-time=N (N>0) usage\n"; exit(1); } if ((opts::InstrumentationWaitForks || opts::InstrumentationSleepTime) && opts::InstrumentationFileAppendPID) { errs() << "BOLT-ERROR: instrumentation-file-append-pid is not compatible with " "instrumentation-sleep-time and instrumentation-wait-forks. If you " "want a separate profile for each fork, it can only be dumped in " "the end of process when instrumentation-file-append-pid is used.\n"; exit(1); } } void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC, MCStreamer &Streamer) { MCSection *Section = BC.isELF() ? static_cast(BC.Ctx->getELFSection( ".bolt.instr.counters", ELF::SHT_PROGBITS, BinarySection::getFlags(/*IsReadOnly=*/false, /*IsText=*/false, /*IsAllocatable=*/true) )) : static_cast(BC.Ctx->getMachOSection( "__BOLT", "__counters", MachO::S_REGULAR, SectionKind::getData())); Section->setAlignment(llvm::Align(BC.RegularPageSize)); Streamer.switchSection(Section); // EmitOffset is used to determine padding size for data alignment uint64_t EmitOffset = 0; auto emitLabel = [&Streamer](MCSymbol *Symbol, bool IsGlobal = true) { Streamer.emitLabel(Symbol); if (IsGlobal) Streamer.emitSymbolAttribute(Symbol, MCSymbolAttr::MCSA_Global); }; auto emitLabelByName = [&BC, emitLabel](StringRef Name, bool IsGlobal = true) { MCSymbol *Symbol = BC.Ctx->getOrCreateSymbol(Name); emitLabel(Symbol, IsGlobal); }; auto emitPadding = [&Streamer, &EmitOffset](unsigned Size) { const uint64_t Padding = alignTo(EmitOffset, Size) - EmitOffset; if (Padding) { Streamer.emitFill(Padding, 0); EmitOffset += Padding; } }; auto emitDataSize = [&EmitOffset](unsigned Size) { EmitOffset += Size; }; auto emitDataPadding = [emitPadding, emitDataSize](unsigned Size) { emitPadding(Size); emitDataSize(Size); }; auto emitFill = [&Streamer, emitDataSize, emitLabel](unsigned Size, MCSymbol *Symbol = nullptr, uint8_t Byte = 0) { emitDataSize(Size); if (Symbol) emitLabel(Symbol, /*IsGlobal*/ false); Streamer.emitFill(Size, Byte); }; auto emitValue = [&BC, &Streamer, emitDataPadding, emitLabel](MCSymbol *Symbol, const MCExpr *Value) { const unsigned Psize = BC.AsmInfo->getCodePointerSize(); emitDataPadding(Psize); emitLabel(Symbol); if (Value) Streamer.emitValue(Value, Psize); else Streamer.emitFill(Psize, 0); }; auto emitIntValue = [&Streamer, emitDataPadding, emitLabelByName]( StringRef Name, uint64_t Value, unsigned Size = 4) { emitDataPadding(Size); emitLabelByName(Name); Streamer.emitIntValue(Value, Size); }; auto emitString = [&Streamer, emitDataSize, emitLabelByName, emitFill](StringRef Name, StringRef Contents) { emitDataSize(Contents.size()); emitLabelByName(Name); Streamer.emitBytes(Contents); emitFill(1); }; // All of the following symbols will be exported as globals to be used by the // instrumentation runtime library to dump the instrumentation data to disk. // Label marking start of the memory region containing instrumentation // counters, total vector size is Counters.size() 8-byte counters emitLabelByName("__bolt_instr_locations"); for (MCSymbol *const &Label : Summary->Counters) emitFill(sizeof(uint64_t), Label); emitPadding(BC.RegularPageSize); emitIntValue("__bolt_instr_sleep_time", opts::InstrumentationSleepTime); emitIntValue("__bolt_instr_no_counters_clear", !!opts::InstrumentationNoCountersClear, 1); emitIntValue("__bolt_instr_conservative", !!opts::ConservativeInstrumentation, 1); emitIntValue("__bolt_instr_wait_forks", !!opts::InstrumentationWaitForks, 1); emitIntValue("__bolt_num_counters", Summary->Counters.size()); emitValue(Summary->IndCallCounterFuncPtr, nullptr); emitValue(Summary->IndTailCallCounterFuncPtr, nullptr); emitIntValue("__bolt_instr_num_ind_calls", Summary->IndCallDescriptions.size()); emitIntValue("__bolt_instr_num_ind_targets", Summary->IndCallTargetDescriptions.size()); emitIntValue("__bolt_instr_num_funcs", Summary->FunctionDescriptions.size()); emitString("__bolt_instr_filename", opts::InstrumentationFilename); emitString("__bolt_instr_binpath", opts::InstrumentationBinpath); emitIntValue("__bolt_instr_use_pid", !!opts::InstrumentationFileAppendPID, 1); if (BC.isMachO()) { MCSection *TablesSection = BC.Ctx->getMachOSection( "__BOLT", "__tables", MachO::S_REGULAR, SectionKind::getData()); TablesSection->setAlignment(llvm::Align(BC.RegularPageSize)); Streamer.switchSection(TablesSection); emitString("__bolt_instr_tables", buildTables(BC)); } } void InstrumentationRuntimeLibrary::link( BinaryContext &BC, StringRef ToolPath, BOLTLinker &Linker, BOLTLinker::SectionsMapper MapSections) { std::string LibPath = getLibPath(ToolPath, opts::RuntimeInstrumentationLib); loadLibrary(LibPath, Linker, MapSections); if (BC.isMachO()) return; std::optional FiniSymInfo = Linker.lookupSymbolInfo("__bolt_instr_fini"); if (!FiniSymInfo) { errs() << "BOLT-ERROR: instrumentation library does not define " "__bolt_instr_fini: " << LibPath << "\n"; exit(1); } RuntimeFiniAddress = FiniSymInfo->Address; std::optional StartSymInfo = Linker.lookupSymbolInfo("__bolt_instr_start"); if (!StartSymInfo) { errs() << "BOLT-ERROR: instrumentation library does not define " "__bolt_instr_start: " << LibPath << "\n"; exit(1); } RuntimeStartAddress = StartSymInfo->Address; outs() << "BOLT-INFO: output linked against instrumentation runtime " "library, lib entry point is 0x" << Twine::utohexstr(RuntimeStartAddress) << "\n"; std::optional ClearSymInfo = Linker.lookupSymbolInfo("__bolt_instr_clear_counters"); const uint64_t ClearSymAddress = ClearSymInfo ? ClearSymInfo->Address : 0; outs() << "BOLT-INFO: clear procedure is 0x" << Twine::utohexstr(ClearSymAddress) << "\n"; emitTablesAsELFNote(BC); } std::string InstrumentationRuntimeLibrary::buildTables(BinaryContext &BC) { std::string TablesStr; raw_string_ostream OS(TablesStr); // This is sync'ed with runtime/instr.cpp:readDescriptions() auto getOutputAddress = [](const BinaryFunction &Func, uint64_t Offset) -> uint64_t { return Offset == 0 ? Func.getOutputAddress() : Func.translateInputToOutputAddress(Func.getAddress() + Offset); }; // Indirect targets need to be sorted for fast lookup during runtime llvm::sort(Summary->IndCallTargetDescriptions, [&](const IndCallTargetDescription &A, const IndCallTargetDescription &B) { return getOutputAddress(*A.Target, A.ToLoc.Offset) < getOutputAddress(*B.Target, B.ToLoc.Offset); }); // Start of the vector with descriptions (one CounterDescription for each // counter), vector size is Counters.size() CounterDescription-sized elmts const size_t IDSize = Summary->IndCallDescriptions.size() * sizeof(IndCallDescription); OS.write(reinterpret_cast(&IDSize), 4); for (const IndCallDescription &Desc : Summary->IndCallDescriptions) { OS.write(reinterpret_cast(&Desc.FromLoc.FuncString), 4); OS.write(reinterpret_cast(&Desc.FromLoc.Offset), 4); } const size_t ITDSize = Summary->IndCallTargetDescriptions.size() * sizeof(IndCallTargetDescription); OS.write(reinterpret_cast(&ITDSize), 4); for (const IndCallTargetDescription &Desc : Summary->IndCallTargetDescriptions) { OS.write(reinterpret_cast(&Desc.ToLoc.FuncString), 4); OS.write(reinterpret_cast(&Desc.ToLoc.Offset), 4); uint64_t TargetFuncAddress = getOutputAddress(*Desc.Target, Desc.ToLoc.Offset); OS.write(reinterpret_cast(&TargetFuncAddress), 8); } uint32_t FuncDescSize = Summary->getFDSize(); OS.write(reinterpret_cast(&FuncDescSize), 4); for (const FunctionDescription &Desc : Summary->FunctionDescriptions) { const size_t LeafNum = Desc.LeafNodes.size(); OS.write(reinterpret_cast(&LeafNum), 4); for (const InstrumentedNode &LeafNode : Desc.LeafNodes) { OS.write(reinterpret_cast(&LeafNode.Node), 4); OS.write(reinterpret_cast(&LeafNode.Counter), 4); } const size_t EdgesNum = Desc.Edges.size(); OS.write(reinterpret_cast(&EdgesNum), 4); for (const EdgeDescription &Edge : Desc.Edges) { OS.write(reinterpret_cast(&Edge.FromLoc.FuncString), 4); OS.write(reinterpret_cast(&Edge.FromLoc.Offset), 4); OS.write(reinterpret_cast(&Edge.FromNode), 4); OS.write(reinterpret_cast(&Edge.ToLoc.FuncString), 4); OS.write(reinterpret_cast(&Edge.ToLoc.Offset), 4); OS.write(reinterpret_cast(&Edge.ToNode), 4); OS.write(reinterpret_cast(&Edge.Counter), 4); } const size_t CallsNum = Desc.Calls.size(); OS.write(reinterpret_cast(&CallsNum), 4); for (const CallDescription &Call : Desc.Calls) { OS.write(reinterpret_cast(&Call.FromLoc.FuncString), 4); OS.write(reinterpret_cast(&Call.FromLoc.Offset), 4); OS.write(reinterpret_cast(&Call.FromNode), 4); OS.write(reinterpret_cast(&Call.ToLoc.FuncString), 4); OS.write(reinterpret_cast(&Call.ToLoc.Offset), 4); OS.write(reinterpret_cast(&Call.Counter), 4); uint64_t TargetFuncAddress = getOutputAddress(*Call.Target, Call.ToLoc.Offset); OS.write(reinterpret_cast(&TargetFuncAddress), 8); } const size_t EntryNum = Desc.EntryNodes.size(); OS.write(reinterpret_cast(&EntryNum), 4); for (const EntryNode &EntryNode : Desc.EntryNodes) { OS.write(reinterpret_cast(&EntryNode.Node), 8); uint64_t TargetFuncAddress = getOutputAddress(*Desc.Function, EntryNode.Address); OS.write(reinterpret_cast(&TargetFuncAddress), 8); } } // Our string table lives immediately after descriptions vector OS << Summary->StringTable; return TablesStr; } void InstrumentationRuntimeLibrary::emitTablesAsELFNote(BinaryContext &BC) { std::string TablesStr = buildTables(BC); const std::string BoltInfo = BinarySection::encodeELFNote( "BOLT", TablesStr, BinarySection::NT_BOLT_INSTRUMENTATION_TABLES); BC.registerOrUpdateNoteSection(".bolt.instr.tables", copyByteArray(BoltInfo), BoltInfo.size(), /*Alignment=*/1, /*IsReadOnly=*/true, ELF::SHT_NOTE); }