//===- bolt/Core/BinaryEmitter.cpp - Emit code and data -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the collection of functions and classes used for // emission of code and data into object/binary file. // //===----------------------------------------------------------------------===// #include "bolt/Core/BinaryEmitter.h" #include "bolt/Core/BinaryContext.h" #include "bolt/Core/BinaryFunction.h" #include "bolt/Core/DebugData.h" #include "bolt/Utils/CommandLineOpts.h" #include "bolt/Utils/Utils.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/SMLoc.h" #define DEBUG_TYPE "bolt" using namespace llvm; using namespace bolt; namespace opts { extern cl::opt JumpTables; extern cl::opt PreserveBlocksAlignment; cl::opt AlignBlocks("align-blocks", cl::desc("align basic blocks"), cl::cat(BoltOptCategory)); cl::opt AlignMacroOpFusion("align-macro-fusion", cl::desc("fix instruction alignment for macro-fusion (x86 relocation mode)"), cl::init(MFT_HOT), cl::values(clEnumValN(MFT_NONE, "none", "do not insert alignment no-ops for macro-fusion"), clEnumValN(MFT_HOT, "hot", "only insert alignment no-ops on hot execution paths (default)"), clEnumValN(MFT_ALL, "all", "always align instructions to allow macro-fusion")), cl::ZeroOrMore, cl::cat(BoltRelocCategory)); static cl::list BreakFunctionNames("break-funcs", cl::CommaSeparated, cl::desc("list of functions to core dump on (debugging)"), cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory)); static cl::list FunctionPadSpec("pad-funcs", cl::CommaSeparated, cl::desc("list of functions to pad with amount of bytes"), cl::value_desc("func1:pad1,func2:pad2,func3:pad3,..."), cl::Hidden, cl::cat(BoltCategory)); static cl::opt MarkFuncs( "mark-funcs", cl::desc("mark function boundaries with break instruction to make " "sure we accidentally don't cross them"), cl::ReallyHidden, cl::cat(BoltCategory)); static cl::opt PrintJumpTables("print-jump-tables", cl::desc("print jump tables"), cl::Hidden, cl::cat(BoltCategory)); static cl::opt X86AlignBranchBoundaryHotOnly("x86-align-branch-boundary-hot-only", cl::desc("only apply branch boundary alignment in hot code"), cl::init(true), cl::cat(BoltOptCategory)); size_t padFunction(const BinaryFunction &Function) { static std::map FunctionPadding; if (FunctionPadding.empty() && !FunctionPadSpec.empty()) { for (std::string &Spec : FunctionPadSpec) { size_t N = Spec.find(':'); if (N == std::string::npos) continue; std::string Name = Spec.substr(0, N); size_t Padding = std::stoull(Spec.substr(N + 1)); FunctionPadding[Name] = Padding; } } for (auto &FPI : FunctionPadding) { std::string Name = FPI.first; size_t Padding = FPI.second; if (Function.hasNameRegex(Name)) return Padding; } return 0; } } // namespace opts namespace { using JumpTable = bolt::JumpTable; class BinaryEmitter { private: BinaryEmitter(const BinaryEmitter &) = delete; BinaryEmitter &operator=(const BinaryEmitter &) = delete; MCStreamer &Streamer; BinaryContext &BC; public: BinaryEmitter(MCStreamer &Streamer, BinaryContext &BC) : Streamer(Streamer), BC(BC) {} /// Emit all code and data. void emitAll(StringRef OrgSecPrefix); /// Emit function code. The caller is responsible for emitting function /// symbol(s) and setting the section to emit the code to. void emitFunctionBody(BinaryFunction &BF, bool EmitColdPart, bool EmitCodeOnly = false); private: /// Emit function code. void emitFunctions(); /// Emit a single function. bool emitFunction(BinaryFunction &BF, bool EmitColdPart); /// Helper for emitFunctionBody to write data inside a function /// (used for AArch64) void emitConstantIslands(BinaryFunction &BF, bool EmitColdPart, BinaryFunction *OnBehalfOf = nullptr); /// Emit jump tables for the function. void emitJumpTables(const BinaryFunction &BF); /// Emit jump table data. Callee supplies sections for the data. void emitJumpTable(const JumpTable &JT, MCSection *HotSection, MCSection *ColdSection); void emitCFIInstruction(const MCCFIInstruction &Inst) const; /// Emit exception handling ranges for the function. void emitLSDA(BinaryFunction &BF, bool EmitColdPart); /// Emit line number information corresponding to \p NewLoc. \p PrevLoc /// provides a context for de-duplication of line number info. /// \p FirstInstr indicates if \p NewLoc represents the first instruction /// in a sequence, such as a function fragment. /// /// Return new current location which is either \p NewLoc or \p PrevLoc. SMLoc emitLineInfo(const BinaryFunction &BF, SMLoc NewLoc, SMLoc PrevLoc, bool FirstInstr); /// Use \p FunctionEndSymbol to mark the end of the line info sequence. /// Note that it does not automatically result in the insertion of the EOS /// marker in the line table program, but provides one to the DWARF generator /// when it needs it. void emitLineInfoEnd(const BinaryFunction &BF, MCSymbol *FunctionEndSymbol); /// Emit debug line info for unprocessed functions from CUs that include /// emitted functions. void emitDebugLineInfoForOriginalFunctions(); /// Emit debug line for CUs that were not modified. void emitDebugLineInfoForUnprocessedCUs(); /// Emit data sections that have code references in them. void emitDataSections(StringRef OrgSecPrefix); }; } // anonymous namespace void BinaryEmitter::emitAll(StringRef OrgSecPrefix) { Streamer.initSections(false, *BC.STI); if (opts::UpdateDebugSections && BC.isELF()) { // Force the emission of debug line info into allocatable section to ensure // RuntimeDyld will process it without ProcessAllSections flag. // // NB: on MachO all sections are required for execution, hence no need // to change flags/attributes. MCSectionELF *ELFDwarfLineSection = static_cast(BC.MOFI->getDwarfLineSection()); ELFDwarfLineSection->setFlags(ELF::SHF_ALLOC); } if (RuntimeLibrary *RtLibrary = BC.getRuntimeLibrary()) RtLibrary->emitBinary(BC, Streamer); BC.getTextSection()->setAlignment(Align(opts::AlignText)); emitFunctions(); if (opts::UpdateDebugSections) { emitDebugLineInfoForOriginalFunctions(); DwarfLineTable::emit(BC, Streamer); } emitDataSections(OrgSecPrefix); Streamer.emitLabel(BC.Ctx->getOrCreateSymbol("_end")); } void BinaryEmitter::emitFunctions() { auto emit = [&](const std::vector &Functions) { const bool HasProfile = BC.NumProfiledFuncs > 0; const bool OriginalAllowAutoPadding = Streamer.getAllowAutoPadding(); for (BinaryFunction *Function : Functions) { if (!BC.shouldEmit(*Function)) continue; LLVM_DEBUG(dbgs() << "BOLT: generating code for function \"" << *Function << "\" : " << Function->getFunctionNumber() << '\n'); // Was any part of the function emitted. bool Emitted = false; // Turn off Intel JCC Erratum mitigation for cold code if requested if (HasProfile && opts::X86AlignBranchBoundaryHotOnly && !Function->hasValidProfile()) Streamer.setAllowAutoPadding(false); Emitted |= emitFunction(*Function, /*EmitColdPart=*/false); if (Function->isSplit()) { if (opts::X86AlignBranchBoundaryHotOnly) Streamer.setAllowAutoPadding(false); Emitted |= emitFunction(*Function, /*EmitColdPart=*/true); } Streamer.setAllowAutoPadding(OriginalAllowAutoPadding); if (Emitted) Function->setEmitted(/*KeepCFG=*/opts::PrintCacheMetrics); } }; // Mark the start of hot text. if (opts::HotText) { Streamer.switchSection(BC.getTextSection()); Streamer.emitLabel(BC.getHotTextStartSymbol()); } // Emit functions in sorted order. std::vector SortedFunctions = BC.getSortedFunctions(); emit(SortedFunctions); // Emit functions added by BOLT. emit(BC.getInjectedBinaryFunctions()); // Mark the end of hot text. if (opts::HotText) { Streamer.switchSection(BC.getTextSection()); Streamer.emitLabel(BC.getHotTextEndSymbol()); } } bool BinaryEmitter::emitFunction(BinaryFunction &Function, bool EmitColdPart) { if (Function.size() == 0 && !Function.hasIslandsInfo()) return false; if (Function.getState() == BinaryFunction::State::Empty) return false; MCSection *Section = BC.getCodeSection(EmitColdPart ? Function.getColdCodeSectionName() : Function.getCodeSectionName()); Streamer.switchSection(Section); Section->setHasInstructions(true); BC.Ctx->addGenDwarfSection(Section); if (BC.HasRelocations) { // Set section alignment to at least maximum possible object alignment. // We need this to support LongJmp and other passes that calculates // tentative layout. if (Section->getAlignment() < opts::AlignFunctions) Section->setAlignment(Align(opts::AlignFunctions)); Streamer.emitCodeAlignment(BinaryFunction::MinAlign, &*BC.STI); uint16_t MaxAlignBytes = EmitColdPart ? Function.getMaxColdAlignmentBytes() : Function.getMaxAlignmentBytes(); if (MaxAlignBytes > 0) Streamer.emitCodeAlignment(Function.getAlignment(), &*BC.STI, MaxAlignBytes); } else { Streamer.emitCodeAlignment(Function.getAlignment(), &*BC.STI); } MCContext &Context = Streamer.getContext(); const MCAsmInfo *MAI = Context.getAsmInfo(); MCSymbol *StartSymbol = nullptr; // Emit all symbols associated with the main function entry. if (!EmitColdPart) { StartSymbol = Function.getSymbol(); for (MCSymbol *Symbol : Function.getSymbols()) { Streamer.emitSymbolAttribute(Symbol, MCSA_ELF_TypeFunction); Streamer.emitLabel(Symbol); } } else { StartSymbol = Function.getColdSymbol(); Streamer.emitSymbolAttribute(StartSymbol, MCSA_ELF_TypeFunction); Streamer.emitLabel(StartSymbol); } // Emit CFI start if (Function.hasCFI()) { Streamer.emitCFIStartProc(/*IsSimple=*/false); if (Function.getPersonalityFunction() != nullptr) Streamer.emitCFIPersonality(Function.getPersonalityFunction(), Function.getPersonalityEncoding()); MCSymbol *LSDASymbol = EmitColdPart ? Function.getColdLSDASymbol() : Function.getLSDASymbol(); if (LSDASymbol) Streamer.emitCFILsda(LSDASymbol, BC.LSDAEncoding); else Streamer.emitCFILsda(0, dwarf::DW_EH_PE_omit); // Emit CFI instructions relative to the CIE for (const MCCFIInstruction &CFIInstr : Function.cie()) { // Only write CIE CFI insns that LLVM will not already emit const std::vector &FrameInstrs = MAI->getInitialFrameState(); if (!llvm::is_contained(FrameInstrs, CFIInstr)) emitCFIInstruction(CFIInstr); } } assert((Function.empty() || !(*Function.begin()).isCold()) && "first basic block should never be cold"); // Emit UD2 at the beginning if requested by user. if (!opts::BreakFunctionNames.empty()) { for (std::string &Name : opts::BreakFunctionNames) { if (Function.hasNameRegex(Name)) { Streamer.emitIntValue(0x0B0F, 2); // UD2: 0F 0B break; } } } // Emit code. emitFunctionBody(Function, EmitColdPart, /*EmitCodeOnly=*/false); // Emit padding if requested. if (size_t Padding = opts::padFunction(Function)) { LLVM_DEBUG(dbgs() << "BOLT-DEBUG: padding function " << Function << " with " << Padding << " bytes\n"); Streamer.emitFill(Padding, MAI->getTextAlignFillValue()); } if (opts::MarkFuncs) Streamer.emitIntValue(BC.MIB->getTrapFillValue(), 1); // Emit CFI end if (Function.hasCFI()) Streamer.emitCFIEndProc(); MCSymbol *EndSymbol = EmitColdPart ? Function.getFunctionColdEndLabel() : Function.getFunctionEndLabel(); Streamer.emitLabel(EndSymbol); if (MAI->hasDotTypeDotSizeDirective()) { const MCExpr *SizeExpr = MCBinaryExpr::createSub( MCSymbolRefExpr::create(EndSymbol, Context), MCSymbolRefExpr::create(StartSymbol, Context), Context); Streamer.emitELFSize(StartSymbol, SizeExpr); } if (opts::UpdateDebugSections && Function.getDWARFUnit()) emitLineInfoEnd(Function, EndSymbol); // Exception handling info for the function. emitLSDA(Function, EmitColdPart); if (!EmitColdPart && opts::JumpTables > JTS_NONE) emitJumpTables(Function); return true; } void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, bool EmitColdPart, bool EmitCodeOnly) { if (!EmitCodeOnly && EmitColdPart && BF.hasConstantIsland()) BF.duplicateConstantIslands(); // Track the first emitted instruction with debug info. bool FirstInstr = true; for (BinaryBasicBlock *BB : BF.layout()) { if (EmitColdPart != BB->isCold()) continue; if ((opts::AlignBlocks || opts::PreserveBlocksAlignment) && BB->getAlignment() > 1) Streamer.emitCodeAlignment(BB->getAlignment(), &*BC.STI, BB->getAlignmentMaxBytes()); Streamer.emitLabel(BB->getLabel()); if (!EmitCodeOnly) { if (MCSymbol *EntrySymbol = BF.getSecondaryEntryPointSymbol(*BB)) Streamer.emitLabel(EntrySymbol); } // Check if special alignment for macro-fusion is needed. bool MayNeedMacroFusionAlignment = (opts::AlignMacroOpFusion == MFT_ALL) || (opts::AlignMacroOpFusion == MFT_HOT && BB->getKnownExecutionCount()); BinaryBasicBlock::const_iterator MacroFusionPair; if (MayNeedMacroFusionAlignment) { MacroFusionPair = BB->getMacroOpFusionPair(); if (MacroFusionPair == BB->end()) MayNeedMacroFusionAlignment = false; } SMLoc LastLocSeen; // Remember if the last instruction emitted was a prefix. bool LastIsPrefix = false; for (auto I = BB->begin(), E = BB->end(); I != E; ++I) { MCInst &Instr = *I; if (EmitCodeOnly && BC.MIB->isPseudo(Instr)) continue; // Handle pseudo instructions. if (BC.MIB->isEHLabel(Instr)) { const MCSymbol *Label = BC.MIB->getTargetSymbol(Instr); assert(Instr.getNumOperands() >= 1 && Label && "bad EH_LABEL instruction"); Streamer.emitLabel(const_cast(Label)); continue; } if (BC.MIB->isCFI(Instr)) { emitCFIInstruction(*BF.getCFIFor(Instr)); continue; } // Handle macro-fusion alignment. If we emitted a prefix as // the last instruction, we should've already emitted the associated // alignment hint, so don't emit it twice. if (MayNeedMacroFusionAlignment && !LastIsPrefix && I == MacroFusionPair) { // This assumes the second instruction in the macro-op pair will get // assigned to its own MCRelaxableFragment. Since all JCC instructions // are relaxable, we should be safe. } if (!EmitCodeOnly && opts::UpdateDebugSections && BF.getDWARFUnit()) { LastLocSeen = emitLineInfo(BF, Instr.getLoc(), LastLocSeen, FirstInstr); FirstInstr = false; } // Prepare to tag this location with a label if we need to keep track of // the location of calls/returns for BOLT address translation maps if (!EmitCodeOnly && BF.requiresAddressTranslation() && BC.MIB->getOffset(Instr)) { const uint32_t Offset = *BC.MIB->getOffset(Instr); MCSymbol *LocSym = BC.Ctx->createTempSymbol(); Streamer.emitLabel(LocSym); BB->getLocSyms().emplace_back(Offset, LocSym); } Streamer.emitInstruction(Instr, *BC.STI); LastIsPrefix = BC.MIB->isPrefix(Instr); } } if (!EmitCodeOnly) emitConstantIslands(BF, EmitColdPart); } void BinaryEmitter::emitConstantIslands(BinaryFunction &BF, bool EmitColdPart, BinaryFunction *OnBehalfOf) { if (!BF.hasIslandsInfo()) return; BinaryFunction::IslandInfo &Islands = BF.getIslandInfo(); if (Islands.DataOffsets.empty() && Islands.Dependency.empty()) return; // AArch64 requires CI to be aligned to 8 bytes due to access instructions // restrictions. E.g. the ldr with imm, where imm must be aligned to 8 bytes. const uint16_t Alignment = OnBehalfOf ? OnBehalfOf->getConstantIslandAlignment() : BF.getConstantIslandAlignment(); Streamer.emitCodeAlignment(Alignment, &*BC.STI); if (!OnBehalfOf) { if (!EmitColdPart) Streamer.emitLabel(BF.getFunctionConstantIslandLabel()); else Streamer.emitLabel(BF.getFunctionColdConstantIslandLabel()); } assert((!OnBehalfOf || Islands.Proxies[OnBehalfOf].size() > 0) && "spurious OnBehalfOf constant island emission"); assert(!BF.isInjected() && "injected functions should not have constant islands"); // Raw contents of the function. StringRef SectionContents = BF.getOriginSection()->getContents(); // Raw contents of the function. StringRef FunctionContents = SectionContents.substr( BF.getAddress() - BF.getOriginSection()->getAddress(), BF.getMaxSize()); if (opts::Verbosity && !OnBehalfOf) outs() << "BOLT-INFO: emitting constant island for function " << BF << "\n"; // We split the island into smaller blocks and output labels between them. auto IS = Islands.Offsets.begin(); for (auto DataIter = Islands.DataOffsets.begin(); DataIter != Islands.DataOffsets.end(); ++DataIter) { uint64_t FunctionOffset = *DataIter; uint64_t EndOffset = 0ULL; // Determine size of this data chunk auto NextData = std::next(DataIter); auto CodeIter = Islands.CodeOffsets.lower_bound(*DataIter); if (CodeIter == Islands.CodeOffsets.end() && NextData == Islands.DataOffsets.end()) EndOffset = BF.getMaxSize(); else if (CodeIter == Islands.CodeOffsets.end()) EndOffset = *NextData; else if (NextData == Islands.DataOffsets.end()) EndOffset = *CodeIter; else EndOffset = (*CodeIter > *NextData) ? *NextData : *CodeIter; if (FunctionOffset == EndOffset) continue; // Size is zero, nothing to emit auto emitCI = [&](uint64_t &FunctionOffset, uint64_t EndOffset) { if (FunctionOffset >= EndOffset) return; for (auto It = Islands.Relocations.lower_bound(FunctionOffset); It != Islands.Relocations.end(); ++It) { if (It->first >= EndOffset) break; const Relocation &Relocation = It->second; if (FunctionOffset < Relocation.Offset) { Streamer.emitBytes( FunctionContents.slice(FunctionOffset, Relocation.Offset)); FunctionOffset = Relocation.Offset; } LLVM_DEBUG( dbgs() << "BOLT-DEBUG: emitting constant island relocation" << " for " << BF << " at offset 0x" << Twine::utohexstr(Relocation.Offset) << " with size " << Relocation::getSizeForType(Relocation.Type) << '\n'); FunctionOffset += Relocation.emit(&Streamer); } assert(FunctionOffset <= EndOffset && "overflow error"); if (FunctionOffset < EndOffset) { Streamer.emitBytes(FunctionContents.slice(FunctionOffset, EndOffset)); FunctionOffset = EndOffset; } }; // Emit labels, relocs and data while (IS != Islands.Offsets.end() && IS->first < EndOffset) { auto NextLabelOffset = IS == Islands.Offsets.end() ? EndOffset : IS->first; auto NextStop = std::min(NextLabelOffset, EndOffset); assert(NextStop <= EndOffset && "internal overflow error"); emitCI(FunctionOffset, NextStop); if (IS != Islands.Offsets.end() && FunctionOffset == IS->first) { // This is a slightly complex code to decide which label to emit. We // have 4 cases to handle: regular symbol, cold symbol, regular or cold // symbol being emitted on behalf of an external function. if (!OnBehalfOf) { if (!EmitColdPart) { LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted label " << IS->second->getName() << " at offset 0x" << Twine::utohexstr(IS->first) << '\n'); if (IS->second->isUndefined()) Streamer.emitLabel(IS->second); else assert(BF.hasName(std::string(IS->second->getName()))); } else if (Islands.ColdSymbols.count(IS->second) != 0) { LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted label " << Islands.ColdSymbols[IS->second]->getName() << '\n'); if (Islands.ColdSymbols[IS->second]->isUndefined()) Streamer.emitLabel(Islands.ColdSymbols[IS->second]); } } else { if (!EmitColdPart) { if (MCSymbol *Sym = Islands.Proxies[OnBehalfOf][IS->second]) { LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted label " << Sym->getName() << '\n'); Streamer.emitLabel(Sym); } } else if (MCSymbol *Sym = Islands.ColdProxies[OnBehalfOf][IS->second]) { LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted label " << Sym->getName() << '\n'); Streamer.emitLabel(Sym); } } ++IS; } } assert(FunctionOffset <= EndOffset && "overflow error"); emitCI(FunctionOffset, EndOffset); } assert(IS == Islands.Offsets.end() && "some symbols were not emitted!"); if (OnBehalfOf) return; // Now emit constant islands from other functions that we may have used in // this function. for (BinaryFunction *ExternalFunc : Islands.Dependency) emitConstantIslands(*ExternalFunc, EmitColdPart, &BF); } SMLoc BinaryEmitter::emitLineInfo(const BinaryFunction &BF, SMLoc NewLoc, SMLoc PrevLoc, bool FirstInstr) { DWARFUnit *FunctionCU = BF.getDWARFUnit(); const DWARFDebugLine::LineTable *FunctionLineTable = BF.getDWARFLineTable(); assert(FunctionCU && "cannot emit line info for function without CU"); DebugLineTableRowRef RowReference = DebugLineTableRowRef::fromSMLoc(NewLoc); // Check if no new line info needs to be emitted. if (RowReference == DebugLineTableRowRef::NULL_ROW || NewLoc.getPointer() == PrevLoc.getPointer()) return PrevLoc; unsigned CurrentFilenum = 0; const DWARFDebugLine::LineTable *CurrentLineTable = FunctionLineTable; // If the CU id from the current instruction location does not // match the CU id from the current function, it means that we // have come across some inlined code. We must look up the CU // for the instruction's original function and get the line table // from that. const uint64_t FunctionUnitIndex = FunctionCU->getOffset(); const uint32_t CurrentUnitIndex = RowReference.DwCompileUnitIndex; if (CurrentUnitIndex != FunctionUnitIndex) { CurrentLineTable = BC.DwCtx->getLineTableForUnit( BC.DwCtx->getCompileUnitForOffset(CurrentUnitIndex)); // Add filename from the inlined function to the current CU. CurrentFilenum = BC.addDebugFilenameToUnit( FunctionUnitIndex, CurrentUnitIndex, CurrentLineTable->Rows[RowReference.RowIndex - 1].File); } const DWARFDebugLine::Row &CurrentRow = CurrentLineTable->Rows[RowReference.RowIndex - 1]; if (!CurrentFilenum) CurrentFilenum = CurrentRow.File; unsigned Flags = (DWARF2_FLAG_IS_STMT * CurrentRow.IsStmt) | (DWARF2_FLAG_BASIC_BLOCK * CurrentRow.BasicBlock) | (DWARF2_FLAG_PROLOGUE_END * CurrentRow.PrologueEnd) | (DWARF2_FLAG_EPILOGUE_BEGIN * CurrentRow.EpilogueBegin); // Always emit is_stmt at the beginning of function fragment. if (FirstInstr) Flags |= DWARF2_FLAG_IS_STMT; BC.Ctx->setCurrentDwarfLoc(CurrentFilenum, CurrentRow.Line, CurrentRow.Column, Flags, CurrentRow.Isa, CurrentRow.Discriminator); const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc(); BC.Ctx->clearDwarfLocSeen(); MCSymbol *LineSym = BC.Ctx->createTempSymbol(); Streamer.emitLabel(LineSym); BC.getDwarfLineTable(FunctionUnitIndex) .getMCLineSections() .addLineEntry(MCDwarfLineEntry(LineSym, DwarfLoc), Streamer.getCurrentSectionOnly()); return NewLoc; } void BinaryEmitter::emitLineInfoEnd(const BinaryFunction &BF, MCSymbol *FunctionEndLabel) { DWARFUnit *FunctionCU = BF.getDWARFUnit(); assert(FunctionCU && "DWARF unit expected"); BC.Ctx->setCurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_END_SEQUENCE, 0, 0); const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc(); BC.Ctx->clearDwarfLocSeen(); BC.getDwarfLineTable(FunctionCU->getOffset()) .getMCLineSections() .addLineEntry(MCDwarfLineEntry(FunctionEndLabel, DwarfLoc), Streamer.getCurrentSectionOnly()); } void BinaryEmitter::emitJumpTables(const BinaryFunction &BF) { MCSection *ReadOnlySection = BC.MOFI->getReadOnlySection(); MCSection *ReadOnlyColdSection = BC.MOFI->getContext().getELFSection( ".rodata.cold", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); if (!BF.hasJumpTables()) return; if (opts::PrintJumpTables) outs() << "BOLT-INFO: jump tables for function " << BF << ":\n"; for (auto &JTI : BF.jumpTables()) { JumpTable &JT = *JTI.second; if (opts::PrintJumpTables) JT.print(outs()); if ((opts::JumpTables == JTS_BASIC || !BF.isSimple()) && BC.HasRelocations) { JT.updateOriginal(); } else { MCSection *HotSection, *ColdSection; if (opts::JumpTables == JTS_BASIC) { // In non-relocation mode we have to emit jump tables in local sections. // This way we only overwrite them when the corresponding function is // overwritten. std::string Name = ".local." + JT.Labels[0]->getName().str(); std::replace(Name.begin(), Name.end(), '/', '.'); BinarySection &Section = BC.registerOrUpdateSection(Name, ELF::SHT_PROGBITS, ELF::SHF_ALLOC); Section.setAnonymous(true); JT.setOutputSection(Section); HotSection = BC.getDataSection(Name); ColdSection = HotSection; } else { if (BF.isSimple()) { HotSection = ReadOnlySection; ColdSection = ReadOnlyColdSection; } else { HotSection = BF.hasProfile() ? ReadOnlySection : ReadOnlyColdSection; ColdSection = HotSection; } } emitJumpTable(JT, HotSection, ColdSection); } } } void BinaryEmitter::emitJumpTable(const JumpTable &JT, MCSection *HotSection, MCSection *ColdSection) { // Pre-process entries for aggressive splitting. // Each label represents a separate switch table and gets its own count // determining its destination. std::map LabelCounts; if (opts::JumpTables > JTS_SPLIT && !JT.Counts.empty()) { MCSymbol *CurrentLabel = JT.Labels.at(0); uint64_t CurrentLabelCount = 0; for (unsigned Index = 0; Index < JT.Entries.size(); ++Index) { auto LI = JT.Labels.find(Index * JT.EntrySize); if (LI != JT.Labels.end()) { LabelCounts[CurrentLabel] = CurrentLabelCount; CurrentLabel = LI->second; CurrentLabelCount = 0; } CurrentLabelCount += JT.Counts[Index].Count; } LabelCounts[CurrentLabel] = CurrentLabelCount; } else { Streamer.switchSection(JT.Count > 0 ? HotSection : ColdSection); Streamer.emitValueToAlignment(JT.EntrySize); } MCSymbol *LastLabel = nullptr; uint64_t Offset = 0; for (MCSymbol *Entry : JT.Entries) { auto LI = JT.Labels.find(Offset); if (LI != JT.Labels.end()) { LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitting jump table " << LI->second->getName() << " (originally was at address 0x" << Twine::utohexstr(JT.getAddress() + Offset) << (Offset ? "as part of larger jump table\n" : "\n")); if (!LabelCounts.empty()) { LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump table count: " << LabelCounts[LI->second] << '\n'); if (LabelCounts[LI->second] > 0) Streamer.switchSection(HotSection); else Streamer.switchSection(ColdSection); Streamer.emitValueToAlignment(JT.EntrySize); } Streamer.emitLabel(LI->second); LastLabel = LI->second; } if (JT.Type == JumpTable::JTT_NORMAL) { Streamer.emitSymbolValue(Entry, JT.OutputEntrySize); } else { // JTT_PIC const MCSymbolRefExpr *JTExpr = MCSymbolRefExpr::create(LastLabel, Streamer.getContext()); const MCSymbolRefExpr *E = MCSymbolRefExpr::create(Entry, Streamer.getContext()); const MCBinaryExpr *Value = MCBinaryExpr::createSub(E, JTExpr, Streamer.getContext()); Streamer.emitValue(Value, JT.EntrySize); } Offset += JT.EntrySize; } } void BinaryEmitter::emitCFIInstruction(const MCCFIInstruction &Inst) const { switch (Inst.getOperation()) { default: llvm_unreachable("Unexpected instruction"); case MCCFIInstruction::OpDefCfaOffset: Streamer.emitCFIDefCfaOffset(Inst.getOffset()); break; case MCCFIInstruction::OpAdjustCfaOffset: Streamer.emitCFIAdjustCfaOffset(Inst.getOffset()); break; case MCCFIInstruction::OpDefCfa: Streamer.emitCFIDefCfa(Inst.getRegister(), Inst.getOffset()); break; case MCCFIInstruction::OpDefCfaRegister: Streamer.emitCFIDefCfaRegister(Inst.getRegister()); break; case MCCFIInstruction::OpOffset: Streamer.emitCFIOffset(Inst.getRegister(), Inst.getOffset()); break; case MCCFIInstruction::OpRegister: Streamer.emitCFIRegister(Inst.getRegister(), Inst.getRegister2()); break; case MCCFIInstruction::OpWindowSave: Streamer.emitCFIWindowSave(); break; case MCCFIInstruction::OpNegateRAState: Streamer.emitCFINegateRAState(); break; case MCCFIInstruction::OpSameValue: Streamer.emitCFISameValue(Inst.getRegister()); break; case MCCFIInstruction::OpGnuArgsSize: Streamer.emitCFIGnuArgsSize(Inst.getOffset()); break; case MCCFIInstruction::OpEscape: Streamer.AddComment(Inst.getComment()); Streamer.emitCFIEscape(Inst.getValues()); break; case MCCFIInstruction::OpRestore: Streamer.emitCFIRestore(Inst.getRegister()); break; case MCCFIInstruction::OpUndefined: Streamer.emitCFIUndefined(Inst.getRegister()); break; } } // The code is based on EHStreamer::emitExceptionTable(). void BinaryEmitter::emitLSDA(BinaryFunction &BF, bool EmitColdPart) { const BinaryFunction::CallSitesType *Sites = EmitColdPart ? &BF.getColdCallSites() : &BF.getCallSites(); if (Sites->empty()) return; // Calculate callsite table size. Size of each callsite entry is: // // sizeof(start) + sizeof(length) + sizeof(LP) + sizeof(uleb128(action)) // // or // // sizeof(dwarf::DW_EH_PE_data4) * 3 + sizeof(uleb128(action)) uint64_t CallSiteTableLength = Sites->size() * 4 * 3; for (const BinaryFunction::CallSite &CallSite : *Sites) CallSiteTableLength += getULEB128Size(CallSite.Action); Streamer.switchSection(BC.MOFI->getLSDASection()); const unsigned TTypeEncoding = BC.TTypeEncoding; const unsigned TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding); const uint16_t TTypeAlignment = 4; // Type tables have to be aligned at 4 bytes. Streamer.emitValueToAlignment(TTypeAlignment); // Emit the LSDA label. MCSymbol *LSDASymbol = EmitColdPart ? BF.getColdLSDASymbol() : BF.getLSDASymbol(); assert(LSDASymbol && "no LSDA symbol set"); Streamer.emitLabel(LSDASymbol); // Corresponding FDE start. const MCSymbol *StartSymbol = EmitColdPart ? BF.getColdSymbol() : BF.getSymbol(); // Emit the LSDA header. // If LPStart is omitted, then the start of the FDE is used as a base for // landing pad displacements. Then if a cold fragment starts with // a landing pad, this means that the first landing pad offset will be 0. // As a result, the exception handling runtime will ignore this landing pad // because zero offset denotes the absence of a landing pad. // For this reason, when the binary has fixed starting address we emit LPStart // as 0 and output the absolute value of the landing pad in the table. // // If the base address can change, we cannot use absolute addresses for // landing pads (at least not without runtime relocations). Hence, we fall // back to emitting landing pads relative to the FDE start. // As we are emitting label differences, we have to guarantee both labels are // defined in the same section and hence cannot place the landing pad into a // cold fragment when the corresponding call site is in the hot fragment. // Because of this issue and the previously described issue of possible // zero-offset landing pad we have to place landing pads in the same section // as the corresponding invokes for shared objects. std::function emitLandingPad; if (BC.HasFixedLoadAddress) { Streamer.emitIntValue(dwarf::DW_EH_PE_udata4, 1); // LPStart format Streamer.emitIntValue(0, 4); // LPStart emitLandingPad = [&](const MCSymbol *LPSymbol) { if (!LPSymbol) Streamer.emitIntValue(0, 4); else Streamer.emitSymbolValue(LPSymbol, 4); }; } else { Streamer.emitIntValue(dwarf::DW_EH_PE_omit, 1); // LPStart format emitLandingPad = [&](const MCSymbol *LPSymbol) { if (!LPSymbol) Streamer.emitIntValue(0, 4); else Streamer.emitAbsoluteSymbolDiff(LPSymbol, StartSymbol, 4); }; } Streamer.emitIntValue(TTypeEncoding, 1); // TType format // See the comment in EHStreamer::emitExceptionTable() on to use // uleb128 encoding (which can use variable number of bytes to encode the same // value) to ensure type info table is properly aligned at 4 bytes without // iteratively fixing sizes of the tables. unsigned CallSiteTableLengthSize = getULEB128Size(CallSiteTableLength); unsigned TTypeBaseOffset = sizeof(int8_t) + // Call site format CallSiteTableLengthSize + // Call site table length size CallSiteTableLength + // Call site table length BF.getLSDAActionTable().size() + // Actions table size BF.getLSDATypeTable().size() * TTypeEncodingSize; // Types table size unsigned TTypeBaseOffsetSize = getULEB128Size(TTypeBaseOffset); unsigned TotalSize = sizeof(int8_t) + // LPStart format sizeof(int8_t) + // TType format TTypeBaseOffsetSize + // TType base offset size TTypeBaseOffset; // TType base offset unsigned SizeAlign = (4 - TotalSize) & 3; // Account for any extra padding that will be added to the call site table // length. Streamer.emitULEB128IntValue(TTypeBaseOffset, /*PadTo=*/TTypeBaseOffsetSize + SizeAlign); // Emit the landing pad call site table. We use signed data4 since we can emit // a landing pad in a different part of the split function that could appear // earlier in the address space than LPStart. Streamer.emitIntValue(dwarf::DW_EH_PE_sdata4, 1); Streamer.emitULEB128IntValue(CallSiteTableLength); for (const BinaryFunction::CallSite &CallSite : *Sites) { const MCSymbol *BeginLabel = CallSite.Start; const MCSymbol *EndLabel = CallSite.End; assert(BeginLabel && "start EH label expected"); assert(EndLabel && "end EH label expected"); // Start of the range is emitted relative to the start of current // function split part. Streamer.emitAbsoluteSymbolDiff(BeginLabel, StartSymbol, 4); Streamer.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 4); emitLandingPad(CallSite.LP); Streamer.emitULEB128IntValue(CallSite.Action); } // Write out action, type, and type index tables at the end. // // For action and type index tables there's no need to change the original // table format unless we are doing function splitting, in which case we can // split and optimize the tables. // // For type table we (re-)encode the table using TTypeEncoding matching // the current assembler mode. for (uint8_t const &Byte : BF.getLSDAActionTable()) Streamer.emitIntValue(Byte, 1); const BinaryFunction::LSDATypeTableTy &TypeTable = (TTypeEncoding & dwarf::DW_EH_PE_indirect) ? BF.getLSDATypeAddressTable() : BF.getLSDATypeTable(); assert(TypeTable.size() == BF.getLSDATypeTable().size() && "indirect type table size mismatch"); for (int Index = TypeTable.size() - 1; Index >= 0; --Index) { const uint64_t TypeAddress = TypeTable[Index]; switch (TTypeEncoding & 0x70) { default: llvm_unreachable("unsupported TTypeEncoding"); case dwarf::DW_EH_PE_absptr: Streamer.emitIntValue(TypeAddress, TTypeEncodingSize); break; case dwarf::DW_EH_PE_pcrel: { if (TypeAddress) { const MCSymbol *TypeSymbol = BC.getOrCreateGlobalSymbol(TypeAddress, "TI", 0, TTypeAlignment); MCSymbol *DotSymbol = BC.Ctx->createNamedTempSymbol(); Streamer.emitLabel(DotSymbol); const MCBinaryExpr *SubDotExpr = MCBinaryExpr::createSub( MCSymbolRefExpr::create(TypeSymbol, *BC.Ctx), MCSymbolRefExpr::create(DotSymbol, *BC.Ctx), *BC.Ctx); Streamer.emitValue(SubDotExpr, TTypeEncodingSize); } else { Streamer.emitIntValue(0, TTypeEncodingSize); } break; } } } for (uint8_t const &Byte : BF.getLSDATypeIndexTable()) Streamer.emitIntValue(Byte, 1); } void BinaryEmitter::emitDebugLineInfoForOriginalFunctions() { // If a function is in a CU containing at least one processed function, we // have to rewrite the whole line table for that CU. For unprocessed functions // we use data from the input line table. for (auto &It : BC.getBinaryFunctions()) { const BinaryFunction &Function = It.second; // If the function was emitted, its line info was emitted with it. if (Function.isEmitted()) continue; const DWARFDebugLine::LineTable *LineTable = Function.getDWARFLineTable(); if (!LineTable) continue; // nothing to update for this function const uint64_t Address = Function.getAddress(); std::vector Results; if (!LineTable->lookupAddressRange( {Address, object::SectionedAddress::UndefSection}, Function.getSize(), Results)) continue; if (Results.empty()) continue; // The first row returned could be the last row matching the start address. // Find the first row with the same address that is not the end of the // sequence. uint64_t FirstRow = Results.front(); while (FirstRow > 0) { const DWARFDebugLine::Row &PrevRow = LineTable->Rows[FirstRow - 1]; if (PrevRow.Address.Address != Address || PrevRow.EndSequence) break; --FirstRow; } const uint64_t EndOfSequenceAddress = Function.getAddress() + Function.getMaxSize(); BC.getDwarfLineTable(Function.getDWARFUnit()->getOffset()) .addLineTableSequence(LineTable, FirstRow, Results.back(), EndOfSequenceAddress); } // For units that are completely unprocessed, use original debug line contents // eliminating the need to regenerate line info program. emitDebugLineInfoForUnprocessedCUs(); } void BinaryEmitter::emitDebugLineInfoForUnprocessedCUs() { // Sorted list of section offsets provides boundaries for section fragments, // where each fragment is the unit's contribution to debug line section. std::vector StmtListOffsets; StmtListOffsets.reserve(BC.DwCtx->getNumCompileUnits()); for (const std::unique_ptr &CU : BC.DwCtx->compile_units()) { DWARFDie CUDie = CU->getUnitDIE(); auto StmtList = dwarf::toSectionOffset(CUDie.find(dwarf::DW_AT_stmt_list)); if (!StmtList) continue; StmtListOffsets.push_back(*StmtList); } llvm::sort(StmtListOffsets); // For each CU that was not processed, emit its line info as a binary blob. for (const std::unique_ptr &CU : BC.DwCtx->compile_units()) { if (BC.ProcessedCUs.count(CU.get())) continue; DWARFDie CUDie = CU->getUnitDIE(); auto StmtList = dwarf::toSectionOffset(CUDie.find(dwarf::DW_AT_stmt_list)); if (!StmtList) continue; StringRef DebugLineContents = CU->getLineSection().Data; const uint64_t Begin = *StmtList; // Statement list ends where the next unit contribution begins, or at the // end of the section. auto It = llvm::upper_bound(StmtListOffsets, Begin); const uint64_t End = It == StmtListOffsets.end() ? DebugLineContents.size() : *It; BC.getDwarfLineTable(CU->getOffset()) .addRawContents(DebugLineContents.slice(Begin, End)); } } void BinaryEmitter::emitDataSections(StringRef OrgSecPrefix) { for (BinarySection &Section : BC.sections()) { if (!Section.hasRelocations() || !Section.hasSectionRef()) continue; StringRef SectionName = Section.getName(); std::string EmitName = Section.isReordered() ? std::string(Section.getOutputName()) : OrgSecPrefix.str() + std::string(SectionName); Section.emitAsData(Streamer, EmitName); Section.clearRelocations(); } } namespace llvm { namespace bolt { void emitBinaryContext(MCStreamer &Streamer, BinaryContext &BC, StringRef OrgSecPrefix) { BinaryEmitter(Streamer, BC).emitAll(OrgSecPrefix); } void emitFunctionBody(MCStreamer &Streamer, BinaryFunction &BF, bool EmitColdPart, bool EmitCodeOnly) { BinaryEmitter(Streamer, BF.getBinaryContext()) .emitFunctionBody(BF, EmitColdPart, EmitCodeOnly); } } // namespace bolt } // namespace llvm