mirror of
https://github.com/llvm/llvm-project.git
synced 2025-05-02 23:16:05 +00:00

Summary: Perform indirect call promotion optimization in BOLT. The code scans the instructions during CFG creation for all indirect calls. Right now indirect tail calls are not handled since the functions are marked not simple. The offsets of the indirect calls are stored for later use by the ICP pass. The indirect call promotion pass visits each indirect call and examines the BranchData for each. If the most frequent targets from that callsite exceed the specified threshold (default 90%), the call is promoted. Otherwise, it is ignored. By default, only one target is considered at each callsite. When an candiate callsite is processed, we modify the callsite to test for the most common call targets before calling through the original generic call mechanism. The CFG and layout are modified by ICP. A few new command line options have been added: -indirect-call-promotion -indirect-call-promotion-threshold=<percentage> -indirect-call-promotion-topn=<int> The threshold is the minimum frequency of a call target needed before ICP is triggered. The topn option controls the number of targets to consider for each callsite, e.g. ICP is triggered if topn=2 and the total requency of the top two call targets exceeds the threshold. Example of ICP: C++ code: int B_count = 0; int C_count = 0; struct A { virtual void foo() = 0; } struct B : public A { virtual void foo() { ++B_count; }; }; struct C : public A { virtual void foo() { ++C_count; }; }; A* a = ... a->foo(); ... original: 400863: 49 8b 07 mov (%r15),%rax 400866: 4c 89 ff mov %r15,%rdi 400869: ff 10 callq *(%rax) 40086b: 41 83 e6 01 and $0x1,%r14d 40086f: 4d 89 e6 mov %r12,%r14 400872: 4c 0f 44 f5 cmove %rbp,%r14 400876: 4c 89 f7 mov %r14,%rdi ... after ICP: 40085e: 49 8b 07 mov (%r15),%rax 400861: 4c 89 ff mov %r15,%rdi 400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10 40086b: 00 00 00 40086e: 4c 3b 10 cmp (%rax),%r10 400871: 75 29 jne 40089c <main+0x9c> 400873: 41 ff d2 callq *%r10 400876: 41 83 e6 01 and $0x1,%r14d 40087a: 4d 89 e6 mov %r12,%r14 40087d: 4c 0f 44 f5 cmove %rbp,%r14 400881: 4c 89 f7 mov %r14,%rdi ... 40089c: ff 10 callq *(%rax) 40089e: eb d6 jmp 400876 <main+0x76> (cherry picked from FBD3612218)
429 lines
16 KiB
C++
429 lines
16 KiB
C++
//===--- BinaryContext.cpp - Interface for machine-level context ---------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "BinaryContext.h"
|
|
#include "BinaryFunction.h"
|
|
#include "llvm/ADT/Twine.h"
|
|
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
|
|
#include "llvm/MC/MCContext.h"
|
|
#include "llvm/MC/MCSymbol.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
|
|
|
|
using namespace llvm;
|
|
using namespace bolt;
|
|
|
|
namespace opts {
|
|
|
|
extern cl::opt<bool> Relocs;
|
|
|
|
static cl::opt<bool>
|
|
PrintDebugInfo("print-debug-info",
|
|
cl::desc("print debug info when printing functions"),
|
|
cl::Hidden);
|
|
|
|
} // namespace opts
|
|
|
|
BinaryContext::~BinaryContext() { }
|
|
|
|
MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address,
|
|
Twine Prefix) {
|
|
MCSymbol *Symbol{nullptr};
|
|
std::string Name;
|
|
auto NI = GlobalAddresses.find(Address);
|
|
if (NI != GlobalAddresses.end()) {
|
|
// Even though there could be multiple names registered at the address,
|
|
// we only use the first one.
|
|
Name = NI->second;
|
|
} else {
|
|
Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
|
|
assert(GlobalSymbols.find(Name) == GlobalSymbols.end() &&
|
|
"created name is not unique");
|
|
GlobalAddresses.emplace(std::make_pair(Address, Name));
|
|
}
|
|
|
|
Symbol = Ctx->lookupSymbol(Name);
|
|
if (Symbol)
|
|
return Symbol;
|
|
|
|
Symbol = Ctx->getOrCreateSymbol(Name);
|
|
GlobalSymbols[Name] = Address;
|
|
|
|
return Symbol;
|
|
}
|
|
|
|
MCSymbol *BinaryContext::getGlobalSymbolAtAddress(uint64_t Address) const {
|
|
auto NI = GlobalAddresses.find(Address);
|
|
if (NI == GlobalAddresses.end())
|
|
return nullptr;
|
|
|
|
auto *Symbol = Ctx->lookupSymbol(NI->second);
|
|
assert(Symbol && "symbol cannot be NULL at this point");
|
|
|
|
return Symbol;
|
|
}
|
|
|
|
void BinaryContext::foldFunction(BinaryFunction &ChildBF,
|
|
BinaryFunction &ParentBF,
|
|
std::map<uint64_t, BinaryFunction> &BFs) {
|
|
|
|
// Copy name list.
|
|
ParentBF.addNewNames(ChildBF.getNames());
|
|
|
|
// Update internal bookkeeping info.
|
|
for (auto &Name : ChildBF.getNames()) {
|
|
// Calls to functions are handled via symbols, and we keep the lookup table
|
|
// that we need to update.
|
|
auto *Symbol = Ctx->lookupSymbol(Name);
|
|
assert(Symbol && "symbol cannot be NULL at this point");
|
|
SymbolToFunctionMap[Symbol] = &ParentBF;
|
|
|
|
// NB: there's no need to update GlobalAddresses and GlobalSymbols.
|
|
}
|
|
|
|
// Merge execution counts of ChildBF into those of ParentBF.
|
|
ChildBF.mergeProfileDataInto(ParentBF);
|
|
|
|
if (opts::Relocs) {
|
|
// Remove ChildBF from the global set of functions in relocs mode.
|
|
auto FI = BFs.find(ChildBF.getAddress());
|
|
assert(FI != BFs.end() && "function not found");
|
|
assert(&ChildBF == &FI->second && "function mismatch");
|
|
FI = BFs.erase(FI);
|
|
} else {
|
|
// In non-relocation mode we keep the function, but rename it.
|
|
std::string NewName = "__ICF_" + ChildBF.Names.back();
|
|
ChildBF.Names.clear();
|
|
ChildBF.Names.push_back(NewName);
|
|
ChildBF.OutputSymbol = Ctx->getOrCreateSymbol(NewName);
|
|
ChildBF.setFolded();
|
|
}
|
|
}
|
|
|
|
void BinaryContext::printGlobalSymbols(raw_ostream& OS) const {
|
|
for (auto &entry : GlobalSymbols) {
|
|
OS << "(" << entry.first << " -> " << entry.second << ")\n";
|
|
}
|
|
}
|
|
|
|
namespace {
|
|
|
|
/// Returns a binary function that contains a given address in the input
|
|
/// binary, or nullptr if none does.
|
|
BinaryFunction *getBinaryFunctionContainingAddress(
|
|
uint64_t Address,
|
|
std::map<uint64_t, BinaryFunction> &BinaryFunctions) {
|
|
auto It = BinaryFunctions.upper_bound(Address);
|
|
if (It != BinaryFunctions.begin()) {
|
|
--It;
|
|
if (It->first + It->second.getSize() > Address) {
|
|
return &It->second;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
// Traverses the DIE tree in a recursive depth-first search and finds lexical
|
|
// blocks and instances of inlined subroutines, saving them in
|
|
// AddressRangesObjects.
|
|
void findAddressRangesObjects(
|
|
const DWARFCompileUnit *Unit,
|
|
const DWARFDebugInfoEntryMinimal *DIE,
|
|
std::map<uint64_t, BinaryFunction> &Functions,
|
|
std::vector<llvm::bolt::AddressRangesDWARFObject> &AddressRangesObjects) {
|
|
auto Tag = DIE->getTag();
|
|
if (Tag == dwarf::DW_TAG_lexical_block ||
|
|
Tag == dwarf::DW_TAG_inlined_subroutine ||
|
|
Tag == dwarf::DW_TAG_try_block ||
|
|
Tag == dwarf::DW_TAG_catch_block) {
|
|
auto const &Ranges = DIE->getAddressRanges(Unit);
|
|
if (!Ranges.empty()) {
|
|
// We have to process all ranges, even for functions that we are not
|
|
// updating. The primary reason is that abbrev entries are shared
|
|
// and if we convert one DIE, it may affect the rest. Thus
|
|
// the conservative approach that does not involve expanding
|
|
// .debug_abbrev, is to switch all DIEs to use .debug_ranges, even if
|
|
// they use a single [a,b) range. The secondary reason is that it allows
|
|
// us to get rid of the original portion of .debug_ranges to save
|
|
// space in the binary.
|
|
auto Function = getBinaryFunctionContainingAddress(Ranges.front().first,
|
|
Functions);
|
|
AddressRangesObjects.emplace_back(Unit, DIE);
|
|
auto &Object = AddressRangesObjects.back();
|
|
for (const auto &Range : Ranges) {
|
|
if (Function && Function->isSimple()) {
|
|
Object.addAddressRange(*Function, Range.first, Range.second);
|
|
} else {
|
|
Object.addAbsoluteRange(Range.first, Range.second);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Recursively visit each child.
|
|
for (auto Child = DIE->getFirstChild(); Child; Child = Child->getSibling()) {
|
|
findAddressRangesObjects(Unit, Child, Functions, AddressRangesObjects);
|
|
}
|
|
}
|
|
|
|
/// Recursively finds DWARF DW_TAG_subprogram DIEs and match them with
|
|
/// BinaryFunctions. Record DIEs for unknown subprograms (mostly functions that
|
|
/// are never called and removed from the binary) in Unknown.
|
|
void findSubprograms(DWARFCompileUnit *Unit,
|
|
const DWARFDebugInfoEntryMinimal *DIE,
|
|
std::map<uint64_t, BinaryFunction> &BinaryFunctions,
|
|
BinaryContext::DIECompileUnitVector &Unknown) {
|
|
if (DIE->isSubprogramDIE()) {
|
|
// TODO: handle DW_AT_ranges.
|
|
uint64_t LowPC, HighPC;
|
|
if (DIE->getLowAndHighPC(Unit, LowPC, HighPC)) {
|
|
auto It = BinaryFunctions.find(LowPC);
|
|
if (It != BinaryFunctions.end()) {
|
|
It->second.addSubprogramDIE(Unit, DIE);
|
|
} else {
|
|
Unknown.emplace_back(DIE, Unit);
|
|
}
|
|
}
|
|
}
|
|
|
|
for (auto ChildDIE = DIE->getFirstChild();
|
|
ChildDIE != nullptr && !ChildDIE->isNULL();
|
|
ChildDIE = ChildDIE->getSibling()) {
|
|
findSubprograms(Unit, ChildDIE, BinaryFunctions, Unknown);
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
|
|
unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
|
|
const uint32_t SrcCUID,
|
|
unsigned FileIndex) {
|
|
auto SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
|
|
auto LineTable = DwCtx->getLineTableForUnit(SrcUnit);
|
|
const auto &FileNames = LineTable->Prologue.FileNames;
|
|
// Dir indexes start at 1, as DWARF file numbers, and a dir index 0
|
|
// means empty dir.
|
|
assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
|
|
"FileIndex out of range for the compilation unit.");
|
|
const char *Dir = FileNames[FileIndex - 1].DirIdx ?
|
|
LineTable->Prologue.IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1] :
|
|
"";
|
|
return Ctx->getDwarfFile(Dir, FileNames[FileIndex - 1].Name, 0, DestCUID);
|
|
}
|
|
|
|
void BinaryContext::preprocessDebugInfo(
|
|
std::map<uint64_t, BinaryFunction> &BinaryFunctions) {
|
|
// Populate MCContext with DWARF files.
|
|
for (const auto &CU : DwCtx->compile_units()) {
|
|
const auto CUID = CU->getOffset();
|
|
auto LineTable = DwCtx->getLineTableForUnit(CU.get());
|
|
const auto &FileNames = LineTable->Prologue.FileNames;
|
|
for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
|
|
// Dir indexes start at 1, as DWARF file numbers, and a dir index 0
|
|
// means empty dir.
|
|
const char *Dir = FileNames[I].DirIdx ?
|
|
LineTable->Prologue.IncludeDirectories[FileNames[I].DirIdx - 1] :
|
|
"";
|
|
Ctx->getDwarfFile(Dir, FileNames[I].Name, 0, CUID);
|
|
}
|
|
}
|
|
|
|
// For each CU, iterate over its children DIEs and match subprogram DIEs to
|
|
// BinaryFunctions.
|
|
for (auto &CU : DwCtx->compile_units()) {
|
|
findSubprograms(CU.get(), CU->getUnitDIE(false), BinaryFunctions,
|
|
UnknownFunctions);
|
|
}
|
|
|
|
// Some functions may not have a corresponding subprogram DIE
|
|
// yet they will be included in some CU and will have line number information.
|
|
// Hence we need to associate them with the CU and include in CU ranges.
|
|
for (auto &AddrFunctionPair : BinaryFunctions) {
|
|
auto FunctionAddress = AddrFunctionPair.first;
|
|
auto &Function = AddrFunctionPair.second;
|
|
if (!Function.getSubprogramDIEs().empty())
|
|
continue;
|
|
if (auto DebugAranges = DwCtx->getDebugAranges()) {
|
|
auto CUOffset = DebugAranges->findAddress(FunctionAddress);
|
|
if (CUOffset != -1U) {
|
|
Function.addSubprogramDIE(DwCtx->getCompileUnitForOffset(CUOffset),
|
|
nullptr);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
#ifdef DWARF_LOOKUP_ALL_RANGES
|
|
// Last resort - iterate over all compile units. This should not happen
|
|
// very often. If it does, we need to create a separate lookup table
|
|
// similar to .debug_aranges internally. This slows down processing
|
|
// considerably.
|
|
for (const auto &CU : DwCtx->compile_units()) {
|
|
const auto *CUDie = CU->getUnitDIE();
|
|
for (const auto &Range : CUDie->getAddressRanges(CU.get())) {
|
|
if (FunctionAddress >= Range.first &&
|
|
FunctionAddress < Range.second) {
|
|
Function.addSubprogramDIE(CU.get(), nullptr);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
void BinaryContext::preprocessFunctionDebugInfo(
|
|
std::map<uint64_t, BinaryFunction> &BinaryFunctions) {
|
|
// Iterate over DIE trees finding objects that contain address ranges.
|
|
for (const auto &CU : DwCtx->compile_units()) {
|
|
findAddressRangesObjects(CU.get(), CU->getUnitDIE(false), BinaryFunctions,
|
|
AddressRangesObjects);
|
|
}
|
|
|
|
// Iterate over location lists and save them in LocationLists.
|
|
auto DebugLoc = DwCtx->getDebugLoc();
|
|
for (const auto &DebugLocEntry : DebugLoc->getLocationLists()) {
|
|
if (DebugLocEntry.Entries.empty())
|
|
continue;
|
|
auto StartAddress = DebugLocEntry.Entries.front().Begin;
|
|
auto *Function = getBinaryFunctionContainingAddress(StartAddress,
|
|
BinaryFunctions);
|
|
if (!Function || !Function->isSimple())
|
|
continue;
|
|
LocationLists.emplace_back(DebugLocEntry.Offset);
|
|
auto &LocationList = LocationLists.back();
|
|
for (const auto &Location : DebugLocEntry.Entries) {
|
|
LocationList.addLocation(&Location.Loc, *Function, Location.Begin,
|
|
Location.End);
|
|
}
|
|
}
|
|
}
|
|
|
|
void BinaryContext::printCFI(raw_ostream &OS, uint32_t Operation) {
|
|
switch(Operation) {
|
|
case MCCFIInstruction::OpSameValue: OS << "OpSameValue"; break;
|
|
case MCCFIInstruction::OpRememberState: OS << "OpRememberState"; break;
|
|
case MCCFIInstruction::OpRestoreState: OS << "OpRestoreState"; break;
|
|
case MCCFIInstruction::OpOffset: OS << "OpOffset"; break;
|
|
case MCCFIInstruction::OpDefCfaRegister: OS << "OpDefCfaRegister"; break;
|
|
case MCCFIInstruction::OpDefCfaOffset: OS << "OpDefCfaOffset"; break;
|
|
case MCCFIInstruction::OpDefCfa: OS << "OpDefCfa"; break;
|
|
case MCCFIInstruction::OpRelOffset: OS << "OpRelOffset"; break;
|
|
case MCCFIInstruction::OpAdjustCfaOffset: OS << "OfAdjustCfaOffset"; break;
|
|
case MCCFIInstruction::OpEscape: OS << "OpEscape"; break;
|
|
case MCCFIInstruction::OpRestore: OS << "OpRestore"; break;
|
|
case MCCFIInstruction::OpUndefined: OS << "OpUndefined"; break;
|
|
case MCCFIInstruction::OpRegister: OS << "OpRegister"; break;
|
|
case MCCFIInstruction::OpWindowSave: OS << "OpWindowSave"; break;
|
|
case MCCFIInstruction::OpGnuArgsSize: OS << "OpGnuArgsSize"; break;
|
|
default: OS << "Op#" << Operation; break;
|
|
}
|
|
}
|
|
|
|
void BinaryContext::printInstruction(raw_ostream &OS,
|
|
const MCInst &Instruction,
|
|
uint64_t Offset,
|
|
const BinaryFunction* Function,
|
|
bool printMCInst) const {
|
|
if (MIA->isEHLabel(Instruction)) {
|
|
OS << " EH_LABEL: " << *MIA->getTargetSymbol(Instruction) << '\n';
|
|
return;
|
|
}
|
|
OS << format(" %08" PRIx64 ": ", Offset);
|
|
if (MIA->isCFI(Instruction)) {
|
|
uint32_t Offset = Instruction.getOperand(0).getImm();
|
|
OS << "\t!CFI\t$" << Offset << "\t; ";
|
|
if (Function)
|
|
printCFI(OS, Function->getCFIFor(Instruction)->getOperation());
|
|
OS << "\n";
|
|
return;
|
|
}
|
|
InstPrinter->printInst(&Instruction, OS, "", *STI);
|
|
if (MIA->isCall(Instruction)) {
|
|
if (MIA->isTailCall(Instruction))
|
|
OS << " # TAILCALL ";
|
|
if (MIA->isInvoke(Instruction)) {
|
|
const MCSymbol *LP;
|
|
uint64_t Action;
|
|
std::tie(LP, Action) = MIA->getEHInfo(Instruction);
|
|
OS << " # handler: ";
|
|
if (LP)
|
|
OS << *LP;
|
|
else
|
|
OS << '0';
|
|
OS << "; action: " << Action;
|
|
auto GnuArgsSize = MIA->getGnuArgsSize(Instruction);
|
|
if (GnuArgsSize >= 0)
|
|
OS << "; GNU_args_size = " << GnuArgsSize;
|
|
}
|
|
}
|
|
if (MIA->isIndirectBranch(Instruction)) {
|
|
if (auto JTAddress = MIA->getJumpTable(Instruction)) {
|
|
OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
|
|
}
|
|
}
|
|
|
|
MIA->forEachAnnotation(
|
|
Instruction,
|
|
[&OS](const MCAnnotation *Annotation) {
|
|
OS << " # " << Annotation->getName() << ": ";
|
|
Annotation->print(OS);
|
|
}
|
|
);
|
|
|
|
const DWARFDebugLine::LineTable *LineTable =
|
|
Function && opts::PrintDebugInfo ? Function->getDWARFUnitLineTable().second
|
|
: nullptr;
|
|
|
|
if (LineTable) {
|
|
auto RowRef = DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
|
|
|
|
if (RowRef != DebugLineTableRowRef::NULL_ROW) {
|
|
const auto &Row = LineTable->Rows[RowRef.RowIndex - 1];
|
|
OS << " # debug line "
|
|
<< LineTable->Prologue.FileNames[Row.File - 1].Name
|
|
<< ":" << Row.Line;
|
|
|
|
if (Row.Column) {
|
|
OS << ":" << Row.Column;
|
|
}
|
|
}
|
|
}
|
|
|
|
OS << "\n";
|
|
|
|
if (printMCInst) {
|
|
Instruction.dump_pretty(OS, InstPrinter.get());
|
|
OS << "\n";
|
|
}
|
|
}
|
|
|
|
ErrorOr<SectionRef> BinaryContext::getSectionForAddress(uint64_t Address) const{
|
|
auto SI = AllocatableSections.upper_bound(Address);
|
|
if (SI != AllocatableSections.begin()) {
|
|
--SI;
|
|
if (SI->first + SI->second.getSize() > Address)
|
|
return SI->second;
|
|
}
|
|
return std::make_error_code(std::errc::bad_address);
|
|
}
|
|
|
|
void BinaryContext::addSectionRelocation(SectionRef Section, uint64_t Address,
|
|
MCSymbol *Symbol, uint64_t Type,
|
|
uint64_t Addend) {
|
|
auto RI = SectionRelocations.find(Section);
|
|
if (RI == SectionRelocations.end()) {
|
|
auto Result =
|
|
SectionRelocations.emplace(Section, std::vector<Relocation>());
|
|
RI = Result.first;
|
|
}
|
|
RI->second.emplace_back(Relocation{Address, Symbol, Type, Addend});
|
|
}
|