llvm-project/bolt/lib/Core/BinaryEmitter.cpp
Rafael Auler 6d0528636a Rebase: [Facebook] [MC] Introduce NeverAlign fragment type
Summary:
Introduce NeverAlign fragment type.

The intended usage of this fragment is to insert it before a pair of
macro-op fusion eligible instructions. NeverAlign fragment ensures that
the next fragment (first instruction in the pair) does not end at a
given alignment boundary by emitting a minimal size nop if necessary.

In effect, it ensures that a pair of macro-fusible instructions is not
split by a given alignment boundary, which is a precondition for
macro-op fusion in modern Intel Cores (64B = cache line size, see Intel
Architecture Optimization Reference Manual, 2.3.2.1 Legacy Decode
Pipeline: Macro-Fusion).

This patch introduces functionality used by BOLT when emitting code with
MacroFusion alignment already in place.

The use case is different from BoundaryAlign and instruction bundling:
- BoundaryAlign can be extended to perform the desired alignment for the
first instruction in the macro-op fusion pair (D101817). However, this
approach has higher overhead due to reliance on relaxation as
BoundaryAlign requires in the general case - see
https://reviews.llvm.org/D97982#2710638.
- Instruction bundling: the intent of NeverAlign fragment is to prevent
the first instruction in a pair ending at a given alignment boundary, by
inserting at most one minimum size nop. It's OK if either instruction
crosses the cache line. Padding both instructions using bundles to not
cross the alignment boundary would result in excessive padding. There's
no straightforward way to request instruction bundling to avoid a given
end alignment for the first instruction in the bundle.

LLVM: https://reviews.llvm.org/D97982

Manual rebase conflict history:
https://phabricator.intern.facebook.com/D30142613

Test Plan: sandcastle

Reviewers: #llvm-bolt

Subscribers: phabricatorlinter

Differential Revision: https://phabricator.intern.facebook.com/D31361547
2022-07-11 09:31:52 -07:00

1147 lines
42 KiB
C++

//===- bolt/Core/BinaryEmitter.cpp - Emit code and data -------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the collection of functions and classes used for
// emission of code and data into object/binary file.
//
//===----------------------------------------------------------------------===//
#include "bolt/Core/BinaryEmitter.h"
#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Core/DebugData.h"
#include "bolt/Utils/CommandLineOpts.h"
#include "bolt/Utils/Utils.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/SMLoc.h"
#define DEBUG_TYPE "bolt"
using namespace llvm;
using namespace bolt;
namespace opts {
extern cl::opt<JumpTableSupportLevel> JumpTables;
extern cl::opt<bool> PreserveBlocksAlignment;
cl::opt<bool> AlignBlocks("align-blocks", cl::desc("align basic blocks"),
cl::cat(BoltOptCategory));
cl::opt<MacroFusionType>
AlignMacroOpFusion("align-macro-fusion",
cl::desc("fix instruction alignment for macro-fusion (x86 relocation mode)"),
cl::init(MFT_HOT),
cl::values(clEnumValN(MFT_NONE, "none",
"do not insert alignment no-ops for macro-fusion"),
clEnumValN(MFT_HOT, "hot",
"only insert alignment no-ops on hot execution paths (default)"),
clEnumValN(MFT_ALL, "all",
"always align instructions to allow macro-fusion")),
cl::ZeroOrMore,
cl::cat(BoltRelocCategory));
static cl::list<std::string>
BreakFunctionNames("break-funcs",
cl::CommaSeparated,
cl::desc("list of functions to core dump on (debugging)"),
cl::value_desc("func1,func2,func3,..."),
cl::Hidden,
cl::cat(BoltCategory));
static cl::list<std::string>
FunctionPadSpec("pad-funcs",
cl::CommaSeparated,
cl::desc("list of functions to pad with amount of bytes"),
cl::value_desc("func1:pad1,func2:pad2,func3:pad3,..."),
cl::Hidden,
cl::cat(BoltCategory));
static cl::opt<bool> MarkFuncs(
"mark-funcs",
cl::desc("mark function boundaries with break instruction to make "
"sure we accidentally don't cross them"),
cl::ReallyHidden, cl::cat(BoltCategory));
static cl::opt<bool> PrintJumpTables("print-jump-tables",
cl::desc("print jump tables"), cl::Hidden,
cl::cat(BoltCategory));
static cl::opt<bool>
X86AlignBranchBoundaryHotOnly("x86-align-branch-boundary-hot-only",
cl::desc("only apply branch boundary alignment in hot code"),
cl::init(true),
cl::cat(BoltOptCategory));
size_t padFunction(const BinaryFunction &Function) {
static std::map<std::string, size_t> FunctionPadding;
if (FunctionPadding.empty() && !FunctionPadSpec.empty()) {
for (std::string &Spec : FunctionPadSpec) {
size_t N = Spec.find(':');
if (N == std::string::npos)
continue;
std::string Name = Spec.substr(0, N);
size_t Padding = std::stoull(Spec.substr(N + 1));
FunctionPadding[Name] = Padding;
}
}
for (auto &FPI : FunctionPadding) {
std::string Name = FPI.first;
size_t Padding = FPI.second;
if (Function.hasNameRegex(Name))
return Padding;
}
return 0;
}
} // namespace opts
namespace {
using JumpTable = bolt::JumpTable;
class BinaryEmitter {
private:
BinaryEmitter(const BinaryEmitter &) = delete;
BinaryEmitter &operator=(const BinaryEmitter &) = delete;
MCStreamer &Streamer;
BinaryContext &BC;
public:
BinaryEmitter(MCStreamer &Streamer, BinaryContext &BC)
: Streamer(Streamer), BC(BC) {}
/// Emit all code and data.
void emitAll(StringRef OrgSecPrefix);
/// Emit function code. The caller is responsible for emitting function
/// symbol(s) and setting the section to emit the code to.
void emitFunctionBody(BinaryFunction &BF, bool EmitColdPart,
bool EmitCodeOnly = false);
private:
/// Emit function code.
void emitFunctions();
/// Emit a single function.
bool emitFunction(BinaryFunction &BF, bool EmitColdPart);
/// Helper for emitFunctionBody to write data inside a function
/// (used for AArch64)
void emitConstantIslands(BinaryFunction &BF, bool EmitColdPart,
BinaryFunction *OnBehalfOf = nullptr);
/// Emit jump tables for the function.
void emitJumpTables(const BinaryFunction &BF);
/// Emit jump table data. Callee supplies sections for the data.
void emitJumpTable(const JumpTable &JT, MCSection *HotSection,
MCSection *ColdSection);
void emitCFIInstruction(const MCCFIInstruction &Inst) const;
/// Emit exception handling ranges for the function.
void emitLSDA(BinaryFunction &BF, bool EmitColdPart);
/// Emit line number information corresponding to \p NewLoc. \p PrevLoc
/// provides a context for de-duplication of line number info.
/// \p FirstInstr indicates if \p NewLoc represents the first instruction
/// in a sequence, such as a function fragment.
///
/// Return new current location which is either \p NewLoc or \p PrevLoc.
SMLoc emitLineInfo(const BinaryFunction &BF, SMLoc NewLoc, SMLoc PrevLoc,
bool FirstInstr);
/// Use \p FunctionEndSymbol to mark the end of the line info sequence.
/// Note that it does not automatically result in the insertion of the EOS
/// marker in the line table program, but provides one to the DWARF generator
/// when it needs it.
void emitLineInfoEnd(const BinaryFunction &BF, MCSymbol *FunctionEndSymbol);
/// Emit debug line info for unprocessed functions from CUs that include
/// emitted functions.
void emitDebugLineInfoForOriginalFunctions();
/// Emit debug line for CUs that were not modified.
void emitDebugLineInfoForUnprocessedCUs();
/// Emit data sections that have code references in them.
void emitDataSections(StringRef OrgSecPrefix);
};
} // anonymous namespace
void BinaryEmitter::emitAll(StringRef OrgSecPrefix) {
Streamer.initSections(false, *BC.STI);
if (opts::UpdateDebugSections && BC.isELF()) {
// Force the emission of debug line info into allocatable section to ensure
// RuntimeDyld will process it without ProcessAllSections flag.
//
// NB: on MachO all sections are required for execution, hence no need
// to change flags/attributes.
MCSectionELF *ELFDwarfLineSection =
static_cast<MCSectionELF *>(BC.MOFI->getDwarfLineSection());
ELFDwarfLineSection->setFlags(ELF::SHF_ALLOC);
}
if (RuntimeLibrary *RtLibrary = BC.getRuntimeLibrary())
RtLibrary->emitBinary(BC, Streamer);
BC.getTextSection()->setAlignment(Align(opts::AlignText));
emitFunctions();
if (opts::UpdateDebugSections) {
emitDebugLineInfoForOriginalFunctions();
DwarfLineTable::emit(BC, Streamer);
}
emitDataSections(OrgSecPrefix);
Streamer.emitLabel(BC.Ctx->getOrCreateSymbol("_end"));
}
void BinaryEmitter::emitFunctions() {
auto emit = [&](const std::vector<BinaryFunction *> &Functions) {
const bool HasProfile = BC.NumProfiledFuncs > 0;
const bool OriginalAllowAutoPadding = Streamer.getAllowAutoPadding();
for (BinaryFunction *Function : Functions) {
if (!BC.shouldEmit(*Function))
continue;
LLVM_DEBUG(dbgs() << "BOLT: generating code for function \"" << *Function
<< "\" : " << Function->getFunctionNumber() << '\n');
// Was any part of the function emitted.
bool Emitted = false;
// Turn off Intel JCC Erratum mitigation for cold code if requested
if (HasProfile && opts::X86AlignBranchBoundaryHotOnly &&
!Function->hasValidProfile())
Streamer.setAllowAutoPadding(false);
Emitted |= emitFunction(*Function, /*EmitColdPart=*/false);
if (Function->isSplit()) {
if (opts::X86AlignBranchBoundaryHotOnly)
Streamer.setAllowAutoPadding(false);
Emitted |= emitFunction(*Function, /*EmitColdPart=*/true);
}
Streamer.setAllowAutoPadding(OriginalAllowAutoPadding);
if (Emitted)
Function->setEmitted(/*KeepCFG=*/opts::PrintCacheMetrics);
}
};
// Mark the start of hot text.
if (opts::HotText) {
Streamer.switchSection(BC.getTextSection());
Streamer.emitLabel(BC.getHotTextStartSymbol());
}
// Emit functions in sorted order.
std::vector<BinaryFunction *> SortedFunctions = BC.getSortedFunctions();
emit(SortedFunctions);
// Emit functions added by BOLT.
emit(BC.getInjectedBinaryFunctions());
// Mark the end of hot text.
if (opts::HotText) {
Streamer.switchSection(BC.getTextSection());
Streamer.emitLabel(BC.getHotTextEndSymbol());
}
}
bool BinaryEmitter::emitFunction(BinaryFunction &Function, bool EmitColdPart) {
if (Function.size() == 0 && !Function.hasIslandsInfo())
return false;
if (Function.getState() == BinaryFunction::State::Empty)
return false;
MCSection *Section =
BC.getCodeSection(EmitColdPart ? Function.getColdCodeSectionName()
: Function.getCodeSectionName());
Streamer.switchSection(Section);
Section->setHasInstructions(true);
BC.Ctx->addGenDwarfSection(Section);
if (BC.HasRelocations) {
// Set section alignment to at least maximum possible object alignment.
// We need this to support LongJmp and other passes that calculates
// tentative layout.
if (Section->getAlignment() < opts::AlignFunctions)
Section->setAlignment(Align(opts::AlignFunctions));
Streamer.emitCodeAlignment(BinaryFunction::MinAlign, &*BC.STI);
uint16_t MaxAlignBytes = EmitColdPart ? Function.getMaxColdAlignmentBytes()
: Function.getMaxAlignmentBytes();
if (MaxAlignBytes > 0)
Streamer.emitCodeAlignment(Function.getAlignment(), &*BC.STI,
MaxAlignBytes);
} else {
Streamer.emitCodeAlignment(Function.getAlignment(), &*BC.STI);
}
MCContext &Context = Streamer.getContext();
const MCAsmInfo *MAI = Context.getAsmInfo();
MCSymbol *StartSymbol = nullptr;
// Emit all symbols associated with the main function entry.
if (!EmitColdPart) {
StartSymbol = Function.getSymbol();
for (MCSymbol *Symbol : Function.getSymbols()) {
Streamer.emitSymbolAttribute(Symbol, MCSA_ELF_TypeFunction);
Streamer.emitLabel(Symbol);
}
} else {
StartSymbol = Function.getColdSymbol();
Streamer.emitSymbolAttribute(StartSymbol, MCSA_ELF_TypeFunction);
Streamer.emitLabel(StartSymbol);
}
// Emit CFI start
if (Function.hasCFI()) {
Streamer.emitCFIStartProc(/*IsSimple=*/false);
if (Function.getPersonalityFunction() != nullptr)
Streamer.emitCFIPersonality(Function.getPersonalityFunction(),
Function.getPersonalityEncoding());
MCSymbol *LSDASymbol =
EmitColdPart ? Function.getColdLSDASymbol() : Function.getLSDASymbol();
if (LSDASymbol)
Streamer.emitCFILsda(LSDASymbol, BC.LSDAEncoding);
else
Streamer.emitCFILsda(0, dwarf::DW_EH_PE_omit);
// Emit CFI instructions relative to the CIE
for (const MCCFIInstruction &CFIInstr : Function.cie()) {
// Only write CIE CFI insns that LLVM will not already emit
const std::vector<MCCFIInstruction> &FrameInstrs =
MAI->getInitialFrameState();
if (!llvm::is_contained(FrameInstrs, CFIInstr))
emitCFIInstruction(CFIInstr);
}
}
assert((Function.empty() || !(*Function.begin()).isCold()) &&
"first basic block should never be cold");
// Emit UD2 at the beginning if requested by user.
if (!opts::BreakFunctionNames.empty()) {
for (std::string &Name : opts::BreakFunctionNames) {
if (Function.hasNameRegex(Name)) {
Streamer.emitIntValue(0x0B0F, 2); // UD2: 0F 0B
break;
}
}
}
// Emit code.
emitFunctionBody(Function, EmitColdPart, /*EmitCodeOnly=*/false);
// Emit padding if requested.
if (size_t Padding = opts::padFunction(Function)) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: padding function " << Function << " with "
<< Padding << " bytes\n");
Streamer.emitFill(Padding, MAI->getTextAlignFillValue());
}
if (opts::MarkFuncs)
Streamer.emitIntValue(BC.MIB->getTrapFillValue(), 1);
// Emit CFI end
if (Function.hasCFI())
Streamer.emitCFIEndProc();
MCSymbol *EndSymbol = EmitColdPart ? Function.getFunctionColdEndLabel()
: Function.getFunctionEndLabel();
Streamer.emitLabel(EndSymbol);
if (MAI->hasDotTypeDotSizeDirective()) {
const MCExpr *SizeExpr = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(EndSymbol, Context),
MCSymbolRefExpr::create(StartSymbol, Context), Context);
Streamer.emitELFSize(StartSymbol, SizeExpr);
}
if (opts::UpdateDebugSections && Function.getDWARFUnit())
emitLineInfoEnd(Function, EndSymbol);
// Exception handling info for the function.
emitLSDA(Function, EmitColdPart);
if (!EmitColdPart && opts::JumpTables > JTS_NONE)
emitJumpTables(Function);
return true;
}
void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, bool EmitColdPart,
bool EmitCodeOnly) {
if (!EmitCodeOnly && EmitColdPart && BF.hasConstantIsland())
BF.duplicateConstantIslands();
// Track the first emitted instruction with debug info.
bool FirstInstr = true;
for (BinaryBasicBlock *BB : BF.layout()) {
if (EmitColdPart != BB->isCold())
continue;
if ((opts::AlignBlocks || opts::PreserveBlocksAlignment) &&
BB->getAlignment() > 1)
Streamer.emitCodeAlignment(BB->getAlignment(), &*BC.STI,
BB->getAlignmentMaxBytes());
Streamer.emitLabel(BB->getLabel());
if (!EmitCodeOnly) {
if (MCSymbol *EntrySymbol = BF.getSecondaryEntryPointSymbol(*BB))
Streamer.emitLabel(EntrySymbol);
}
// Check if special alignment for macro-fusion is needed.
bool MayNeedMacroFusionAlignment =
(opts::AlignMacroOpFusion == MFT_ALL) ||
(opts::AlignMacroOpFusion == MFT_HOT && BB->getKnownExecutionCount());
BinaryBasicBlock::const_iterator MacroFusionPair;
if (MayNeedMacroFusionAlignment) {
MacroFusionPair = BB->getMacroOpFusionPair();
if (MacroFusionPair == BB->end())
MayNeedMacroFusionAlignment = false;
}
SMLoc LastLocSeen;
// Remember if the last instruction emitted was a prefix.
bool LastIsPrefix = false;
for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
MCInst &Instr = *I;
if (EmitCodeOnly && BC.MIB->isPseudo(Instr))
continue;
// Handle pseudo instructions.
if (BC.MIB->isEHLabel(Instr)) {
const MCSymbol *Label = BC.MIB->getTargetSymbol(Instr);
assert(Instr.getNumOperands() >= 1 && Label &&
"bad EH_LABEL instruction");
Streamer.emitLabel(const_cast<MCSymbol *>(Label));
continue;
}
if (BC.MIB->isCFI(Instr)) {
emitCFIInstruction(*BF.getCFIFor(Instr));
continue;
}
// Handle macro-fusion alignment. If we emitted a prefix as
// the last instruction, we should've already emitted the associated
// alignment hint, so don't emit it twice.
if (MayNeedMacroFusionAlignment && !LastIsPrefix &&
I == MacroFusionPair) {
// This assumes the second instruction in the macro-op pair will get
// assigned to its own MCRelaxableFragment. Since all JCC instructions
// are relaxable, we should be safe.
Streamer.emitNeverAlignCodeAtEnd(/*Alignment to avoid=*/64, *BC.STI);
}
if (!EmitCodeOnly && opts::UpdateDebugSections && BF.getDWARFUnit()) {
LastLocSeen = emitLineInfo(BF, Instr.getLoc(), LastLocSeen, FirstInstr);
FirstInstr = false;
}
// Prepare to tag this location with a label if we need to keep track of
// the location of calls/returns for BOLT address translation maps
if (!EmitCodeOnly && BF.requiresAddressTranslation() &&
BC.MIB->getOffset(Instr)) {
const uint32_t Offset = *BC.MIB->getOffset(Instr);
MCSymbol *LocSym = BC.Ctx->createTempSymbol();
Streamer.emitLabel(LocSym);
BB->getLocSyms().emplace_back(Offset, LocSym);
}
Streamer.emitInstruction(Instr, *BC.STI);
LastIsPrefix = BC.MIB->isPrefix(Instr);
}
}
if (!EmitCodeOnly)
emitConstantIslands(BF, EmitColdPart);
}
void BinaryEmitter::emitConstantIslands(BinaryFunction &BF, bool EmitColdPart,
BinaryFunction *OnBehalfOf) {
if (!BF.hasIslandsInfo())
return;
BinaryFunction::IslandInfo &Islands = BF.getIslandInfo();
if (Islands.DataOffsets.empty() && Islands.Dependency.empty())
return;
// AArch64 requires CI to be aligned to 8 bytes due to access instructions
// restrictions. E.g. the ldr with imm, where imm must be aligned to 8 bytes.
const uint16_t Alignment = OnBehalfOf
? OnBehalfOf->getConstantIslandAlignment()
: BF.getConstantIslandAlignment();
Streamer.emitCodeAlignment(Alignment, &*BC.STI);
if (!OnBehalfOf) {
if (!EmitColdPart)
Streamer.emitLabel(BF.getFunctionConstantIslandLabel());
else
Streamer.emitLabel(BF.getFunctionColdConstantIslandLabel());
}
assert((!OnBehalfOf || Islands.Proxies[OnBehalfOf].size() > 0) &&
"spurious OnBehalfOf constant island emission");
assert(!BF.isInjected() &&
"injected functions should not have constant islands");
// Raw contents of the function.
StringRef SectionContents = BF.getOriginSection()->getContents();
// Raw contents of the function.
StringRef FunctionContents = SectionContents.substr(
BF.getAddress() - BF.getOriginSection()->getAddress(), BF.getMaxSize());
if (opts::Verbosity && !OnBehalfOf)
outs() << "BOLT-INFO: emitting constant island for function " << BF << "\n";
// We split the island into smaller blocks and output labels between them.
auto IS = Islands.Offsets.begin();
for (auto DataIter = Islands.DataOffsets.begin();
DataIter != Islands.DataOffsets.end(); ++DataIter) {
uint64_t FunctionOffset = *DataIter;
uint64_t EndOffset = 0ULL;
// Determine size of this data chunk
auto NextData = std::next(DataIter);
auto CodeIter = Islands.CodeOffsets.lower_bound(*DataIter);
if (CodeIter == Islands.CodeOffsets.end() &&
NextData == Islands.DataOffsets.end())
EndOffset = BF.getMaxSize();
else if (CodeIter == Islands.CodeOffsets.end())
EndOffset = *NextData;
else if (NextData == Islands.DataOffsets.end())
EndOffset = *CodeIter;
else
EndOffset = (*CodeIter > *NextData) ? *NextData : *CodeIter;
if (FunctionOffset == EndOffset)
continue; // Size is zero, nothing to emit
auto emitCI = [&](uint64_t &FunctionOffset, uint64_t EndOffset) {
if (FunctionOffset >= EndOffset)
return;
for (auto It = Islands.Relocations.lower_bound(FunctionOffset);
It != Islands.Relocations.end(); ++It) {
if (It->first >= EndOffset)
break;
const Relocation &Relocation = It->second;
if (FunctionOffset < Relocation.Offset) {
Streamer.emitBytes(
FunctionContents.slice(FunctionOffset, Relocation.Offset));
FunctionOffset = Relocation.Offset;
}
LLVM_DEBUG(
dbgs() << "BOLT-DEBUG: emitting constant island relocation"
<< " for " << BF << " at offset 0x"
<< Twine::utohexstr(Relocation.Offset) << " with size "
<< Relocation::getSizeForType(Relocation.Type) << '\n');
FunctionOffset += Relocation.emit(&Streamer);
}
assert(FunctionOffset <= EndOffset && "overflow error");
if (FunctionOffset < EndOffset) {
Streamer.emitBytes(FunctionContents.slice(FunctionOffset, EndOffset));
FunctionOffset = EndOffset;
}
};
// Emit labels, relocs and data
while (IS != Islands.Offsets.end() && IS->first < EndOffset) {
auto NextLabelOffset =
IS == Islands.Offsets.end() ? EndOffset : IS->first;
auto NextStop = std::min(NextLabelOffset, EndOffset);
assert(NextStop <= EndOffset && "internal overflow error");
emitCI(FunctionOffset, NextStop);
if (IS != Islands.Offsets.end() && FunctionOffset == IS->first) {
// This is a slightly complex code to decide which label to emit. We
// have 4 cases to handle: regular symbol, cold symbol, regular or cold
// symbol being emitted on behalf of an external function.
if (!OnBehalfOf) {
if (!EmitColdPart) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted label "
<< IS->second->getName() << " at offset 0x"
<< Twine::utohexstr(IS->first) << '\n');
if (IS->second->isUndefined())
Streamer.emitLabel(IS->second);
else
assert(BF.hasName(std::string(IS->second->getName())));
} else if (Islands.ColdSymbols.count(IS->second) != 0) {
LLVM_DEBUG(dbgs()
<< "BOLT-DEBUG: emitted label "
<< Islands.ColdSymbols[IS->second]->getName() << '\n');
if (Islands.ColdSymbols[IS->second]->isUndefined())
Streamer.emitLabel(Islands.ColdSymbols[IS->second]);
}
} else {
if (!EmitColdPart) {
if (MCSymbol *Sym = Islands.Proxies[OnBehalfOf][IS->second]) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted label "
<< Sym->getName() << '\n');
Streamer.emitLabel(Sym);
}
} else if (MCSymbol *Sym =
Islands.ColdProxies[OnBehalfOf][IS->second]) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted label " << Sym->getName()
<< '\n');
Streamer.emitLabel(Sym);
}
}
++IS;
}
}
assert(FunctionOffset <= EndOffset && "overflow error");
emitCI(FunctionOffset, EndOffset);
}
assert(IS == Islands.Offsets.end() && "some symbols were not emitted!");
if (OnBehalfOf)
return;
// Now emit constant islands from other functions that we may have used in
// this function.
for (BinaryFunction *ExternalFunc : Islands.Dependency)
emitConstantIslands(*ExternalFunc, EmitColdPart, &BF);
}
SMLoc BinaryEmitter::emitLineInfo(const BinaryFunction &BF, SMLoc NewLoc,
SMLoc PrevLoc, bool FirstInstr) {
DWARFUnit *FunctionCU = BF.getDWARFUnit();
const DWARFDebugLine::LineTable *FunctionLineTable = BF.getDWARFLineTable();
assert(FunctionCU && "cannot emit line info for function without CU");
DebugLineTableRowRef RowReference = DebugLineTableRowRef::fromSMLoc(NewLoc);
// Check if no new line info needs to be emitted.
if (RowReference == DebugLineTableRowRef::NULL_ROW ||
NewLoc.getPointer() == PrevLoc.getPointer())
return PrevLoc;
unsigned CurrentFilenum = 0;
const DWARFDebugLine::LineTable *CurrentLineTable = FunctionLineTable;
// If the CU id from the current instruction location does not
// match the CU id from the current function, it means that we
// have come across some inlined code. We must look up the CU
// for the instruction's original function and get the line table
// from that.
const uint64_t FunctionUnitIndex = FunctionCU->getOffset();
const uint32_t CurrentUnitIndex = RowReference.DwCompileUnitIndex;
if (CurrentUnitIndex != FunctionUnitIndex) {
CurrentLineTable = BC.DwCtx->getLineTableForUnit(
BC.DwCtx->getCompileUnitForOffset(CurrentUnitIndex));
// Add filename from the inlined function to the current CU.
CurrentFilenum = BC.addDebugFilenameToUnit(
FunctionUnitIndex, CurrentUnitIndex,
CurrentLineTable->Rows[RowReference.RowIndex - 1].File);
}
const DWARFDebugLine::Row &CurrentRow =
CurrentLineTable->Rows[RowReference.RowIndex - 1];
if (!CurrentFilenum)
CurrentFilenum = CurrentRow.File;
unsigned Flags = (DWARF2_FLAG_IS_STMT * CurrentRow.IsStmt) |
(DWARF2_FLAG_BASIC_BLOCK * CurrentRow.BasicBlock) |
(DWARF2_FLAG_PROLOGUE_END * CurrentRow.PrologueEnd) |
(DWARF2_FLAG_EPILOGUE_BEGIN * CurrentRow.EpilogueBegin);
// Always emit is_stmt at the beginning of function fragment.
if (FirstInstr)
Flags |= DWARF2_FLAG_IS_STMT;
BC.Ctx->setCurrentDwarfLoc(CurrentFilenum, CurrentRow.Line, CurrentRow.Column,
Flags, CurrentRow.Isa, CurrentRow.Discriminator);
const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc();
BC.Ctx->clearDwarfLocSeen();
MCSymbol *LineSym = BC.Ctx->createTempSymbol();
Streamer.emitLabel(LineSym);
BC.getDwarfLineTable(FunctionUnitIndex)
.getMCLineSections()
.addLineEntry(MCDwarfLineEntry(LineSym, DwarfLoc),
Streamer.getCurrentSectionOnly());
return NewLoc;
}
void BinaryEmitter::emitLineInfoEnd(const BinaryFunction &BF,
MCSymbol *FunctionEndLabel) {
DWARFUnit *FunctionCU = BF.getDWARFUnit();
assert(FunctionCU && "DWARF unit expected");
BC.Ctx->setCurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_END_SEQUENCE, 0, 0);
const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc();
BC.Ctx->clearDwarfLocSeen();
BC.getDwarfLineTable(FunctionCU->getOffset())
.getMCLineSections()
.addLineEntry(MCDwarfLineEntry(FunctionEndLabel, DwarfLoc),
Streamer.getCurrentSectionOnly());
}
void BinaryEmitter::emitJumpTables(const BinaryFunction &BF) {
MCSection *ReadOnlySection = BC.MOFI->getReadOnlySection();
MCSection *ReadOnlyColdSection = BC.MOFI->getContext().getELFSection(
".rodata.cold", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
if (!BF.hasJumpTables())
return;
if (opts::PrintJumpTables)
outs() << "BOLT-INFO: jump tables for function " << BF << ":\n";
for (auto &JTI : BF.jumpTables()) {
JumpTable &JT = *JTI.second;
if (opts::PrintJumpTables)
JT.print(outs());
if ((opts::JumpTables == JTS_BASIC || !BF.isSimple()) &&
BC.HasRelocations) {
JT.updateOriginal();
} else {
MCSection *HotSection, *ColdSection;
if (opts::JumpTables == JTS_BASIC) {
// In non-relocation mode we have to emit jump tables in local sections.
// This way we only overwrite them when the corresponding function is
// overwritten.
std::string Name = ".local." + JT.Labels[0]->getName().str();
std::replace(Name.begin(), Name.end(), '/', '.');
BinarySection &Section =
BC.registerOrUpdateSection(Name, ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
Section.setAnonymous(true);
JT.setOutputSection(Section);
HotSection = BC.getDataSection(Name);
ColdSection = HotSection;
} else {
if (BF.isSimple()) {
HotSection = ReadOnlySection;
ColdSection = ReadOnlyColdSection;
} else {
HotSection = BF.hasProfile() ? ReadOnlySection : ReadOnlyColdSection;
ColdSection = HotSection;
}
}
emitJumpTable(JT, HotSection, ColdSection);
}
}
}
void BinaryEmitter::emitJumpTable(const JumpTable &JT, MCSection *HotSection,
MCSection *ColdSection) {
// Pre-process entries for aggressive splitting.
// Each label represents a separate switch table and gets its own count
// determining its destination.
std::map<MCSymbol *, uint64_t> LabelCounts;
if (opts::JumpTables > JTS_SPLIT && !JT.Counts.empty()) {
MCSymbol *CurrentLabel = JT.Labels.at(0);
uint64_t CurrentLabelCount = 0;
for (unsigned Index = 0; Index < JT.Entries.size(); ++Index) {
auto LI = JT.Labels.find(Index * JT.EntrySize);
if (LI != JT.Labels.end()) {
LabelCounts[CurrentLabel] = CurrentLabelCount;
CurrentLabel = LI->second;
CurrentLabelCount = 0;
}
CurrentLabelCount += JT.Counts[Index].Count;
}
LabelCounts[CurrentLabel] = CurrentLabelCount;
} else {
Streamer.switchSection(JT.Count > 0 ? HotSection : ColdSection);
Streamer.emitValueToAlignment(JT.EntrySize);
}
MCSymbol *LastLabel = nullptr;
uint64_t Offset = 0;
for (MCSymbol *Entry : JT.Entries) {
auto LI = JT.Labels.find(Offset);
if (LI != JT.Labels.end()) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitting jump table "
<< LI->second->getName()
<< " (originally was at address 0x"
<< Twine::utohexstr(JT.getAddress() + Offset)
<< (Offset ? "as part of larger jump table\n" : "\n"));
if (!LabelCounts.empty()) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump table count: "
<< LabelCounts[LI->second] << '\n');
if (LabelCounts[LI->second] > 0)
Streamer.switchSection(HotSection);
else
Streamer.switchSection(ColdSection);
Streamer.emitValueToAlignment(JT.EntrySize);
}
Streamer.emitLabel(LI->second);
LastLabel = LI->second;
}
if (JT.Type == JumpTable::JTT_NORMAL) {
Streamer.emitSymbolValue(Entry, JT.OutputEntrySize);
} else { // JTT_PIC
const MCSymbolRefExpr *JTExpr =
MCSymbolRefExpr::create(LastLabel, Streamer.getContext());
const MCSymbolRefExpr *E =
MCSymbolRefExpr::create(Entry, Streamer.getContext());
const MCBinaryExpr *Value =
MCBinaryExpr::createSub(E, JTExpr, Streamer.getContext());
Streamer.emitValue(Value, JT.EntrySize);
}
Offset += JT.EntrySize;
}
}
void BinaryEmitter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
switch (Inst.getOperation()) {
default:
llvm_unreachable("Unexpected instruction");
case MCCFIInstruction::OpDefCfaOffset:
Streamer.emitCFIDefCfaOffset(Inst.getOffset());
break;
case MCCFIInstruction::OpAdjustCfaOffset:
Streamer.emitCFIAdjustCfaOffset(Inst.getOffset());
break;
case MCCFIInstruction::OpDefCfa:
Streamer.emitCFIDefCfa(Inst.getRegister(), Inst.getOffset());
break;
case MCCFIInstruction::OpDefCfaRegister:
Streamer.emitCFIDefCfaRegister(Inst.getRegister());
break;
case MCCFIInstruction::OpOffset:
Streamer.emitCFIOffset(Inst.getRegister(), Inst.getOffset());
break;
case MCCFIInstruction::OpRegister:
Streamer.emitCFIRegister(Inst.getRegister(), Inst.getRegister2());
break;
case MCCFIInstruction::OpWindowSave:
Streamer.emitCFIWindowSave();
break;
case MCCFIInstruction::OpNegateRAState:
Streamer.emitCFINegateRAState();
break;
case MCCFIInstruction::OpSameValue:
Streamer.emitCFISameValue(Inst.getRegister());
break;
case MCCFIInstruction::OpGnuArgsSize:
Streamer.emitCFIGnuArgsSize(Inst.getOffset());
break;
case MCCFIInstruction::OpEscape:
Streamer.AddComment(Inst.getComment());
Streamer.emitCFIEscape(Inst.getValues());
break;
case MCCFIInstruction::OpRestore:
Streamer.emitCFIRestore(Inst.getRegister());
break;
case MCCFIInstruction::OpUndefined:
Streamer.emitCFIUndefined(Inst.getRegister());
break;
}
}
// The code is based on EHStreamer::emitExceptionTable().
void BinaryEmitter::emitLSDA(BinaryFunction &BF, bool EmitColdPart) {
const BinaryFunction::CallSitesType *Sites =
EmitColdPart ? &BF.getColdCallSites() : &BF.getCallSites();
if (Sites->empty())
return;
// Calculate callsite table size. Size of each callsite entry is:
//
// sizeof(start) + sizeof(length) + sizeof(LP) + sizeof(uleb128(action))
//
// or
//
// sizeof(dwarf::DW_EH_PE_data4) * 3 + sizeof(uleb128(action))
uint64_t CallSiteTableLength = Sites->size() * 4 * 3;
for (const BinaryFunction::CallSite &CallSite : *Sites)
CallSiteTableLength += getULEB128Size(CallSite.Action);
Streamer.switchSection(BC.MOFI->getLSDASection());
const unsigned TTypeEncoding = BC.TTypeEncoding;
const unsigned TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding);
const uint16_t TTypeAlignment = 4;
// Type tables have to be aligned at 4 bytes.
Streamer.emitValueToAlignment(TTypeAlignment);
// Emit the LSDA label.
MCSymbol *LSDASymbol =
EmitColdPart ? BF.getColdLSDASymbol() : BF.getLSDASymbol();
assert(LSDASymbol && "no LSDA symbol set");
Streamer.emitLabel(LSDASymbol);
// Corresponding FDE start.
const MCSymbol *StartSymbol =
EmitColdPart ? BF.getColdSymbol() : BF.getSymbol();
// Emit the LSDA header.
// If LPStart is omitted, then the start of the FDE is used as a base for
// landing pad displacements. Then if a cold fragment starts with
// a landing pad, this means that the first landing pad offset will be 0.
// As a result, the exception handling runtime will ignore this landing pad
// because zero offset denotes the absence of a landing pad.
// For this reason, when the binary has fixed starting address we emit LPStart
// as 0 and output the absolute value of the landing pad in the table.
//
// If the base address can change, we cannot use absolute addresses for
// landing pads (at least not without runtime relocations). Hence, we fall
// back to emitting landing pads relative to the FDE start.
// As we are emitting label differences, we have to guarantee both labels are
// defined in the same section and hence cannot place the landing pad into a
// cold fragment when the corresponding call site is in the hot fragment.
// Because of this issue and the previously described issue of possible
// zero-offset landing pad we have to place landing pads in the same section
// as the corresponding invokes for shared objects.
std::function<void(const MCSymbol *)> emitLandingPad;
if (BC.HasFixedLoadAddress) {
Streamer.emitIntValue(dwarf::DW_EH_PE_udata4, 1); // LPStart format
Streamer.emitIntValue(0, 4); // LPStart
emitLandingPad = [&](const MCSymbol *LPSymbol) {
if (!LPSymbol)
Streamer.emitIntValue(0, 4);
else
Streamer.emitSymbolValue(LPSymbol, 4);
};
} else {
Streamer.emitIntValue(dwarf::DW_EH_PE_omit, 1); // LPStart format
emitLandingPad = [&](const MCSymbol *LPSymbol) {
if (!LPSymbol)
Streamer.emitIntValue(0, 4);
else
Streamer.emitAbsoluteSymbolDiff(LPSymbol, StartSymbol, 4);
};
}
Streamer.emitIntValue(TTypeEncoding, 1); // TType format
// See the comment in EHStreamer::emitExceptionTable() on to use
// uleb128 encoding (which can use variable number of bytes to encode the same
// value) to ensure type info table is properly aligned at 4 bytes without
// iteratively fixing sizes of the tables.
unsigned CallSiteTableLengthSize = getULEB128Size(CallSiteTableLength);
unsigned TTypeBaseOffset =
sizeof(int8_t) + // Call site format
CallSiteTableLengthSize + // Call site table length size
CallSiteTableLength + // Call site table length
BF.getLSDAActionTable().size() + // Actions table size
BF.getLSDATypeTable().size() * TTypeEncodingSize; // Types table size
unsigned TTypeBaseOffsetSize = getULEB128Size(TTypeBaseOffset);
unsigned TotalSize = sizeof(int8_t) + // LPStart format
sizeof(int8_t) + // TType format
TTypeBaseOffsetSize + // TType base offset size
TTypeBaseOffset; // TType base offset
unsigned SizeAlign = (4 - TotalSize) & 3;
// Account for any extra padding that will be added to the call site table
// length.
Streamer.emitULEB128IntValue(TTypeBaseOffset,
/*PadTo=*/TTypeBaseOffsetSize + SizeAlign);
// Emit the landing pad call site table. We use signed data4 since we can emit
// a landing pad in a different part of the split function that could appear
// earlier in the address space than LPStart.
Streamer.emitIntValue(dwarf::DW_EH_PE_sdata4, 1);
Streamer.emitULEB128IntValue(CallSiteTableLength);
for (const BinaryFunction::CallSite &CallSite : *Sites) {
const MCSymbol *BeginLabel = CallSite.Start;
const MCSymbol *EndLabel = CallSite.End;
assert(BeginLabel && "start EH label expected");
assert(EndLabel && "end EH label expected");
// Start of the range is emitted relative to the start of current
// function split part.
Streamer.emitAbsoluteSymbolDiff(BeginLabel, StartSymbol, 4);
Streamer.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 4);
emitLandingPad(CallSite.LP);
Streamer.emitULEB128IntValue(CallSite.Action);
}
// Write out action, type, and type index tables at the end.
//
// For action and type index tables there's no need to change the original
// table format unless we are doing function splitting, in which case we can
// split and optimize the tables.
//
// For type table we (re-)encode the table using TTypeEncoding matching
// the current assembler mode.
for (uint8_t const &Byte : BF.getLSDAActionTable())
Streamer.emitIntValue(Byte, 1);
const BinaryFunction::LSDATypeTableTy &TypeTable =
(TTypeEncoding & dwarf::DW_EH_PE_indirect) ? BF.getLSDATypeAddressTable()
: BF.getLSDATypeTable();
assert(TypeTable.size() == BF.getLSDATypeTable().size() &&
"indirect type table size mismatch");
for (int Index = TypeTable.size() - 1; Index >= 0; --Index) {
const uint64_t TypeAddress = TypeTable[Index];
switch (TTypeEncoding & 0x70) {
default:
llvm_unreachable("unsupported TTypeEncoding");
case dwarf::DW_EH_PE_absptr:
Streamer.emitIntValue(TypeAddress, TTypeEncodingSize);
break;
case dwarf::DW_EH_PE_pcrel: {
if (TypeAddress) {
const MCSymbol *TypeSymbol =
BC.getOrCreateGlobalSymbol(TypeAddress, "TI", 0, TTypeAlignment);
MCSymbol *DotSymbol = BC.Ctx->createNamedTempSymbol();
Streamer.emitLabel(DotSymbol);
const MCBinaryExpr *SubDotExpr = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(TypeSymbol, *BC.Ctx),
MCSymbolRefExpr::create(DotSymbol, *BC.Ctx), *BC.Ctx);
Streamer.emitValue(SubDotExpr, TTypeEncodingSize);
} else {
Streamer.emitIntValue(0, TTypeEncodingSize);
}
break;
}
}
}
for (uint8_t const &Byte : BF.getLSDATypeIndexTable())
Streamer.emitIntValue(Byte, 1);
}
void BinaryEmitter::emitDebugLineInfoForOriginalFunctions() {
// If a function is in a CU containing at least one processed function, we
// have to rewrite the whole line table for that CU. For unprocessed functions
// we use data from the input line table.
for (auto &It : BC.getBinaryFunctions()) {
const BinaryFunction &Function = It.second;
// If the function was emitted, its line info was emitted with it.
if (Function.isEmitted())
continue;
const DWARFDebugLine::LineTable *LineTable = Function.getDWARFLineTable();
if (!LineTable)
continue; // nothing to update for this function
const uint64_t Address = Function.getAddress();
std::vector<uint32_t> Results;
if (!LineTable->lookupAddressRange(
{Address, object::SectionedAddress::UndefSection},
Function.getSize(), Results))
continue;
if (Results.empty())
continue;
// The first row returned could be the last row matching the start address.
// Find the first row with the same address that is not the end of the
// sequence.
uint64_t FirstRow = Results.front();
while (FirstRow > 0) {
const DWARFDebugLine::Row &PrevRow = LineTable->Rows[FirstRow - 1];
if (PrevRow.Address.Address != Address || PrevRow.EndSequence)
break;
--FirstRow;
}
const uint64_t EndOfSequenceAddress =
Function.getAddress() + Function.getMaxSize();
BC.getDwarfLineTable(Function.getDWARFUnit()->getOffset())
.addLineTableSequence(LineTable, FirstRow, Results.back(),
EndOfSequenceAddress);
}
// For units that are completely unprocessed, use original debug line contents
// eliminating the need to regenerate line info program.
emitDebugLineInfoForUnprocessedCUs();
}
void BinaryEmitter::emitDebugLineInfoForUnprocessedCUs() {
// Sorted list of section offsets provides boundaries for section fragments,
// where each fragment is the unit's contribution to debug line section.
std::vector<uint64_t> StmtListOffsets;
StmtListOffsets.reserve(BC.DwCtx->getNumCompileUnits());
for (const std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
DWARFDie CUDie = CU->getUnitDIE();
auto StmtList = dwarf::toSectionOffset(CUDie.find(dwarf::DW_AT_stmt_list));
if (!StmtList)
continue;
StmtListOffsets.push_back(*StmtList);
}
llvm::sort(StmtListOffsets);
// For each CU that was not processed, emit its line info as a binary blob.
for (const std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
if (BC.ProcessedCUs.count(CU.get()))
continue;
DWARFDie CUDie = CU->getUnitDIE();
auto StmtList = dwarf::toSectionOffset(CUDie.find(dwarf::DW_AT_stmt_list));
if (!StmtList)
continue;
StringRef DebugLineContents = CU->getLineSection().Data;
const uint64_t Begin = *StmtList;
// Statement list ends where the next unit contribution begins, or at the
// end of the section.
auto It = llvm::upper_bound(StmtListOffsets, Begin);
const uint64_t End =
It == StmtListOffsets.end() ? DebugLineContents.size() : *It;
BC.getDwarfLineTable(CU->getOffset())
.addRawContents(DebugLineContents.slice(Begin, End));
}
}
void BinaryEmitter::emitDataSections(StringRef OrgSecPrefix) {
for (BinarySection &Section : BC.sections()) {
if (!Section.hasRelocations() || !Section.hasSectionRef())
continue;
StringRef SectionName = Section.getName();
std::string EmitName = Section.isReordered()
? std::string(Section.getOutputName())
: OrgSecPrefix.str() + std::string(SectionName);
Section.emitAsData(Streamer, EmitName);
Section.clearRelocations();
}
}
namespace llvm {
namespace bolt {
void emitBinaryContext(MCStreamer &Streamer, BinaryContext &BC,
StringRef OrgSecPrefix) {
BinaryEmitter(Streamer, BC).emitAll(OrgSecPrefix);
}
void emitFunctionBody(MCStreamer &Streamer, BinaryFunction &BF,
bool EmitColdPart, bool EmitCodeOnly) {
BinaryEmitter(Streamer, BF.getBinaryContext())
.emitFunctionBody(BF, EmitColdPart, EmitCodeOnly);
}
} // namespace bolt
} // namespace llvm