2015-10-14 15:35:14 -07:00
|
|
|
//===--- BinaryContext.cpp - Interface for machine-level context ---------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "BinaryContext.h"
|
Update subroutine address ranges in binary.
Summary:
[WIP] Update DWARF info for function address ranges.
This diff currently does not work for unknown reasons,
but I'm describing here what's the current state.
According to both llvm-dwarf and readelf our output seems correct,
but GDB does not interpret it as expected. All details go below in
hope I missed something.
I couldn't actually track the whole change that introduced support for
what we need in gdb yet, but I think I can get to it
(2007-12-04: Support
lexical bocks and function bodies that occupy non-contiguous address ranges). I have reasons to believe gdb at least at some
nges).
The set of introduced changes was basically this:
- After disassembly, iterate over the DIEs in .debug_info and find the
ones that correspond to each BinaryFunction.
- Refactor DebugArangesWriter to also write addresses of functions to
.debug_ranges and track the offsets of function address ranges there
- Add some infrastructure to facilitate patching the binary in
simple ways (BinaryPatcher.h)
- In RewriteInstance, after writing .debug_ranges already with
function address ranges, for each function do:
-- Find the abbreviation corresponding to the function
-- Patch .debug_abbrev to replace DW_AT_low_pc with DW_AT_ranges and
DW_AT_high_pc with DW_AT_producer (I'll explain this hack below).
Also patch the corresponding forms to DW_FORM_sec_offset and
DW_FORM_string (null-terminated in-place string).
-- Patch debug_info with the .debug_ranges offset in place of
the first 4 bytes of DW_AT_low_pc (DW_AT_ranges only occupies 4
bytes whereas low_pc occupies 8), and write an arbitrary string
in-place in the other 12 bytes that were the 4 MSB of low_pc
and the 8 bytes of high_pc before the patch. This depends on
low_pc and high_pc being put consecutively by the compiler, but
it serves to validate the idea. I tried another way of doing it
that does not rely on this but it didn't work either and I believe
the reason for either not working is the same (and still unknown,
but unrelated to them. I might be wrong though, and if I find yet
another way of doing it I may try it). The other way was to
use a form of DW_FORM_data8 for the section offset. This is
disallowed by the specification, but I doubt gdb validates this,
as it's just easier to store it as 64-bit anyway as this is even
necessary to support 64-bit DWARF (which is not what gcc generates
by default apparently).
I still need to make changes to the diff to make it production-ready,
but first I want to figure out why it doesn't work as expected.
By looking at the output of llvm-dwarfdump or readelf, all of
.debug_ranges, .debug_abbrev and .debug_info seem to have been
correctly updated. However, gdb seems to have serious problems with
what we write.
(In fact, readelf --debug-dump=Ranges shows some funny warning messages
of the form ("Warning: There is a hole [0x100 - 0x120] in .debug_ranges"),
but I played around with this and it seems it's just because no
compile unit was using these ranges. Changing .debug_info apparently
changes these warnings, so they seem to be unrelated to the section
itself. Also looking at the hex dump of the section doesn't help,
as everything seems fine. llvm-dwarfdump doesn't say anything.
So I think .debug_ranges is fine.)
The result is that gdb not only doesn't show the function name as we
wanted, but it also stops showing line number information.
Apparently it's not reading/interpreting the address ranges at all,
and so the functions now have no associated address ranges, only the
symbol value which allows one to put a breakpoint in the function,
but not to show source code.
As this left me without more ideas of what to try to feed gdb with,
I believe the most promising next trial is to try to debug gdb itself,
unless someone spots anything I missed.
I found where the interesting part of the code lies for this
case (gdb/dwarf2read.c and some other related files, but mainly that one).
It seems in some parts gdb uses DW_AT_ranges for only getting
its lowest and highest addresses and setting that as low_pc and
high_pc (see dwarf2_get_pc_bounds in gdb's code and where it's called).
I really hope this is not actually the case for
function address ranges. I'll investigate this further. Otherwise
I don't think any changes we make will make it work as initially
intended, as we'll simply need gdb to support it and in that case it
doesn't.
(cherry picked from FBD3073641)
2016-03-16 18:08:29 -07:00
|
|
|
#include "BinaryFunction.h"
|
2017-11-14 20:05:11 -08:00
|
|
|
#include "DataReader.h"
|
2015-10-14 15:35:14 -07:00
|
|
|
#include "llvm/ADT/Twine.h"
|
2017-05-16 09:27:34 -07:00
|
|
|
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
|
2016-03-28 17:45:22 -07:00
|
|
|
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
|
2018-11-15 16:02:16 -08:00
|
|
|
#include "llvm/MC/MCAssembler.h"
|
|
|
|
#include "llvm/MC/MCAsmLayout.h"
|
2015-10-14 15:35:14 -07:00
|
|
|
#include "llvm/MC/MCContext.h"
|
2018-11-15 16:02:16 -08:00
|
|
|
#include "llvm/MC/MCELFStreamer.h"
|
|
|
|
#include "llvm/MC/MCObjectStreamer.h"
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
#include "llvm/MC/MCObjectWriter.h"
|
2018-11-15 16:02:16 -08:00
|
|
|
#include "llvm/MC/MCSectionELF.h"
|
2017-02-21 16:15:15 -08:00
|
|
|
#include "llvm/MC/MCStreamer.h"
|
2015-10-14 15:35:14 -07:00
|
|
|
#include "llvm/MC/MCSymbol.h"
|
2016-07-23 08:01:53 -07:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2017-11-14 20:05:11 -08:00
|
|
|
#include <iterator>
|
2015-10-14 15:35:14 -07:00
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
using namespace llvm;
|
|
|
|
using namespace bolt;
|
2015-10-14 15:35:14 -07:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
#undef DEBUG_TYPE
|
|
|
|
#define DEBUG_TYPE "bolt"
|
|
|
|
|
2016-07-23 08:01:53 -07:00
|
|
|
namespace opts {
|
|
|
|
|
2017-03-28 14:40:20 -07:00
|
|
|
extern cl::OptionCategory BoltCategory;
|
|
|
|
|
2018-06-11 17:17:25 -07:00
|
|
|
extern cl::opt<unsigned> Verbosity;
|
|
|
|
|
2018-09-24 20:58:31 -07:00
|
|
|
cl::opt<bool>
|
|
|
|
NoHugePages("no-huge-pages",
|
|
|
|
cl::desc("use regular size pages for code alignment"),
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
cl::Hidden,
|
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
2016-07-23 08:01:53 -07:00
|
|
|
static cl::opt<bool>
|
|
|
|
PrintDebugInfo("print-debug-info",
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::desc("print debug info when printing functions"),
|
|
|
|
cl::Hidden,
|
2018-02-01 16:33:43 -08:00
|
|
|
cl::ZeroOrMore,
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::cat(BoltCategory));
|
2016-07-23 08:01:53 -07:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
cl::opt<bool>
|
2017-10-20 12:11:34 -07:00
|
|
|
PrintRelocations("print-relocations",
|
2018-02-01 16:33:43 -08:00
|
|
|
cl::desc("print relocations when printing functions/objects"),
|
2017-10-20 12:11:34 -07:00
|
|
|
cl::Hidden,
|
2018-02-01 16:33:43 -08:00
|
|
|
cl::ZeroOrMore,
|
2017-10-20 12:11:34 -07:00
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
|
|
|
static cl::opt<bool>
|
|
|
|
PrintMemData("print-mem-data",
|
|
|
|
cl::desc("print memory data annotations when printing functions"),
|
|
|
|
cl::Hidden,
|
2018-02-01 16:33:43 -08:00
|
|
|
cl::ZeroOrMore,
|
2017-10-20 12:11:34 -07:00
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
2016-07-23 08:01:53 -07:00
|
|
|
} // namespace opts
|
|
|
|
|
2018-09-24 20:58:31 -07:00
|
|
|
BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
|
|
|
|
std::unique_ptr<DWARFContext> DwCtx,
|
|
|
|
std::unique_ptr<Triple> TheTriple,
|
|
|
|
const Target *TheTarget,
|
|
|
|
std::string TripleName,
|
|
|
|
std::unique_ptr<MCCodeEmitter> MCE,
|
|
|
|
std::unique_ptr<MCObjectFileInfo> MOFI,
|
|
|
|
std::unique_ptr<const MCAsmInfo> AsmInfo,
|
|
|
|
std::unique_ptr<const MCInstrInfo> MII,
|
|
|
|
std::unique_ptr<const MCSubtargetInfo> STI,
|
|
|
|
std::unique_ptr<MCInstPrinter> InstPrinter,
|
|
|
|
std::unique_ptr<const MCInstrAnalysis> MIA,
|
|
|
|
std::unique_ptr<MCPlusBuilder> MIB,
|
|
|
|
std::unique_ptr<const MCRegisterInfo> MRI,
|
|
|
|
std::unique_ptr<MCDisassembler> DisAsm,
|
|
|
|
DataReader &DR)
|
|
|
|
: Ctx(std::move(Ctx)),
|
|
|
|
DwCtx(std::move(DwCtx)),
|
|
|
|
TheTriple(std::move(TheTriple)),
|
|
|
|
TheTarget(TheTarget),
|
|
|
|
TripleName(TripleName),
|
|
|
|
MCE(std::move(MCE)),
|
|
|
|
MOFI(std::move(MOFI)),
|
|
|
|
AsmInfo(std::move(AsmInfo)),
|
|
|
|
MII(std::move(MII)),
|
|
|
|
STI(std::move(STI)),
|
|
|
|
InstPrinter(std::move(InstPrinter)),
|
|
|
|
MIA(std::move(MIA)),
|
|
|
|
MIB(std::move(MIB)),
|
|
|
|
MRI(std::move(MRI)),
|
|
|
|
DisAsm(std::move(DisAsm)),
|
|
|
|
DR(DR) {
|
|
|
|
Relocation::Arch = this->TheTriple->getArch();
|
|
|
|
PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
|
|
|
|
}
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
BinaryContext::~BinaryContext() {
|
|
|
|
for (auto *Section : Sections) {
|
|
|
|
delete Section;
|
|
|
|
}
|
2018-07-08 12:14:08 -07:00
|
|
|
for (auto *InjectedFunction : InjectedBinaryFunctions) {
|
|
|
|
delete InjectedFunction;
|
|
|
|
}
|
2017-11-14 20:05:11 -08:00
|
|
|
clearBinaryData();
|
2018-02-01 16:33:43 -08:00
|
|
|
}
|
2016-03-28 17:45:22 -07:00
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
std::unique_ptr<MCObjectWriter>
|
|
|
|
BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
|
2017-05-16 09:27:34 -07:00
|
|
|
if (!MAB) {
|
|
|
|
MAB = std::unique_ptr<MCAsmBackend>(
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()));
|
2017-05-16 09:27:34 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
return MAB->createObjectWriter(OS);
|
|
|
|
}
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
bool BinaryContext::validateObjectNesting() const {
|
|
|
|
auto Itr = BinaryDataMap.begin();
|
|
|
|
auto End = BinaryDataMap.end();
|
|
|
|
bool Valid = true;
|
|
|
|
while (Itr != End) {
|
|
|
|
auto Next = std::next(Itr);
|
|
|
|
while (Next != End &&
|
|
|
|
Itr->second->getSection() == Next->second->getSection() &&
|
|
|
|
Itr->second->containsRange(Next->second->getAddress(),
|
|
|
|
Next->second->getSize())) {
|
|
|
|
if (Next->second->Parent != Itr->second) {
|
|
|
|
errs() << "BOLT-WARNING: object nesting incorrect for:\n"
|
|
|
|
<< "BOLT-WARNING: " << *Itr->second << "\n"
|
|
|
|
<< "BOLT-WARNING: " << *Next->second << "\n";
|
|
|
|
Valid = false;
|
|
|
|
}
|
|
|
|
++Next;
|
|
|
|
}
|
|
|
|
Itr = Next;
|
|
|
|
}
|
|
|
|
return Valid;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool BinaryContext::validateHoles() const {
|
|
|
|
bool Valid = true;
|
|
|
|
for (auto &Section : sections()) {
|
|
|
|
for (const auto &Rel : Section.relocations()) {
|
|
|
|
auto RelAddr = Rel.Offset + Section.getAddress();
|
|
|
|
auto *BD = getBinaryDataContainingAddress(RelAddr);
|
|
|
|
if (!BD) {
|
|
|
|
errs() << "BOLT-WARNING: no BinaryData found for relocation at address"
|
|
|
|
<< " 0x" << Twine::utohexstr(RelAddr) << " in "
|
|
|
|
<< Section.getName() << "\n";
|
|
|
|
Valid = false;
|
|
|
|
} else if (!BD->getAtomicRoot()) {
|
|
|
|
errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at "
|
|
|
|
<< "address 0x" << Twine::utohexstr(RelAddr) << " in "
|
|
|
|
<< Section.getName() << "\n";
|
|
|
|
Valid = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return Valid;
|
|
|
|
}
|
|
|
|
|
|
|
|
void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
|
|
|
|
const auto Address = GAI->second->getAddress();
|
|
|
|
const auto Size = GAI->second->getSize();
|
|
|
|
|
|
|
|
auto fixParents =
|
|
|
|
[&](BinaryDataMapType::iterator Itr, BinaryData *NewParent) {
|
2018-06-06 03:17:32 -07:00
|
|
|
auto *OldParent = Itr->second->Parent;
|
|
|
|
Itr->second->Parent = NewParent;
|
|
|
|
++Itr;
|
|
|
|
while (Itr != BinaryDataMap.end() && OldParent &&
|
|
|
|
Itr->second->Parent == OldParent) {
|
2017-11-14 20:05:11 -08:00
|
|
|
Itr->second->Parent = NewParent;
|
|
|
|
++Itr;
|
2018-06-06 03:17:32 -07:00
|
|
|
}
|
2017-11-14 20:05:11 -08:00
|
|
|
};
|
|
|
|
|
|
|
|
// Check if the previous symbol contains the newly added symbol.
|
|
|
|
if (GAI != BinaryDataMap.begin()) {
|
|
|
|
auto *Prev = std::prev(GAI)->second;
|
|
|
|
while (Prev) {
|
|
|
|
if (Prev->getSection() == GAI->second->getSection() &&
|
|
|
|
Prev->containsRange(Address, Size)) {
|
|
|
|
fixParents(GAI, Prev);
|
|
|
|
} else {
|
|
|
|
fixParents(GAI, nullptr);
|
|
|
|
}
|
|
|
|
Prev = Prev->Parent;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if the newly added symbol contains any subsequent symbols.
|
|
|
|
if (Size != 0) {
|
|
|
|
auto *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
|
|
|
|
auto Itr = std::next(GAI);
|
|
|
|
while (Itr != BinaryDataMap.end() &&
|
|
|
|
BD->containsRange(Itr->second->getAddress(),
|
2018-07-30 16:30:18 -07:00
|
|
|
Itr->second->getSize())) {
|
2017-11-14 20:05:11 -08:00
|
|
|
Itr->second->Parent = BD;
|
|
|
|
++Itr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
iterator_range<BinaryContext::binary_data_iterator>
|
|
|
|
BinaryContext::getSubBinaryData(BinaryData *BD) {
|
|
|
|
auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
|
|
|
|
auto End = Start;
|
|
|
|
while (End != BinaryDataMap.end() &&
|
|
|
|
BD->isAncestorOf(End->second)) {
|
|
|
|
++End;
|
|
|
|
}
|
|
|
|
return make_range(Start, End);
|
|
|
|
}
|
|
|
|
|
2019-06-04 15:30:22 -07:00
|
|
|
std::pair<MCSymbol *, uint64_t>
|
|
|
|
BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF) {
|
|
|
|
uint64_t Addend{0};
|
|
|
|
|
|
|
|
if (isAArch64()) {
|
|
|
|
// Check if this is an access to a constant island and create bookkeeping
|
|
|
|
// to keep track of it and emit it later as part of this function.
|
|
|
|
if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) {
|
|
|
|
return std::make_pair(IslandSym, Addend);
|
|
|
|
} else {
|
|
|
|
// Detect custom code written in assembly that refers to arbitrary
|
|
|
|
// constant islands from other functions. Write this reference so we
|
|
|
|
// can pull this constant island and emit it as part of this function
|
|
|
|
// too.
|
|
|
|
auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
|
|
|
|
if (IslandIter != AddressToConstantIslandMap.end()) {
|
|
|
|
if (auto *IslandSym =
|
|
|
|
IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) {
|
|
|
|
/// Make this function depend on IslandIter->second because we have
|
|
|
|
/// a reference to its constant island. When emitting this function,
|
|
|
|
/// we will also emit IslandIter->second's constants. This only
|
|
|
|
/// happens in custom AArch64 assembly code.
|
|
|
|
BF.IslandDependency.insert(IslandIter->second);
|
|
|
|
BF.ProxyIslandSymbols[IslandSym] = IslandIter->second;
|
|
|
|
return std::make_pair(IslandSym, Addend);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Note that the address does not necessarily have to reside inside
|
|
|
|
// a section, it could be an absolute address too.
|
|
|
|
auto Section = getSectionForAddress(Address);
|
|
|
|
if (Section && Section->isText()) {
|
|
|
|
if (BF.containsAddress(Address, /*UseMaxSize=*/ isAArch64())) {
|
|
|
|
if (Address != BF.getAddress()) {
|
|
|
|
// The address could potentially escape. Mark it as another entry
|
|
|
|
// point into the function.
|
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
outs() << "BOLT-INFO: potentially escaped address 0x"
|
|
|
|
<< Twine::utohexstr(Address) << " in function "
|
|
|
|
<< BF << '\n';
|
|
|
|
}
|
|
|
|
return std::make_pair(
|
|
|
|
BF.addEntryPointAtOffset(Address - BF.getAddress()),
|
|
|
|
Addend);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
InterproceduralReferences.insert(std::make_pair(&BF, Address));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (auto *BD = getBinaryDataContainingAddress(Address)) {
|
|
|
|
return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: use DWARF info to get size/alignment here?
|
|
|
|
auto *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
|
|
|
|
DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName());
|
|
|
|
return std::make_pair(TargetSymbol, Addend);
|
|
|
|
}
|
|
|
|
|
2019-06-12 18:21:02 -07:00
|
|
|
MemoryContentsType
|
|
|
|
BinaryContext::analyzeMemoryAt(uint64_t Address, BinaryFunction &BF) {
|
|
|
|
if (!isX86())
|
|
|
|
return MemoryContentsType::UNKNOWN;
|
|
|
|
|
|
|
|
auto Section = getSectionForAddress(Address);
|
|
|
|
if (!Section) {
|
|
|
|
// No section - possibly an absolute address. Since we don't allow
|
|
|
|
// internal function addresses to escape the function scope - we
|
|
|
|
// consider it a tail call.
|
|
|
|
if (opts::Verbosity > 1) {
|
|
|
|
errs() << "BOLT-WARNING: no section for address 0x"
|
|
|
|
<< Twine::utohexstr(Address) << " referenced from function "
|
|
|
|
<< BF << '\n';
|
|
|
|
}
|
|
|
|
return MemoryContentsType::UNKNOWN;
|
|
|
|
}
|
|
|
|
if (Section->isVirtual()) {
|
|
|
|
// The contents are filled at runtime.
|
|
|
|
return MemoryContentsType::UNKNOWN;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto couldBeJumpTable = [&](const uint64_t JTAddress,
|
|
|
|
JumpTable::JumpTableType Type) {
|
|
|
|
const auto EntrySize =
|
|
|
|
Type == JumpTable::JTT_PIC ? 4 : AsmInfo->getCodePointerSize();
|
|
|
|
auto ValueAddress = JTAddress;
|
|
|
|
auto UpperBound = Section->getEndAddress();
|
|
|
|
const auto *JumpTableBD = getBinaryDataAtAddress(JTAddress);
|
|
|
|
if (JumpTableBD && JumpTableBD->getSize()) {
|
|
|
|
UpperBound = JumpTableBD->getEndAddress();
|
|
|
|
assert(UpperBound <= Section->getEndAddress() &&
|
|
|
|
"data object cannot cross a section boundary");
|
|
|
|
}
|
|
|
|
|
|
|
|
while (ValueAddress <= UpperBound - EntrySize) {
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: analyzing memory at 0x"
|
|
|
|
<< Twine::utohexstr(ValueAddress));
|
|
|
|
uint64_t Value;
|
|
|
|
if (Type == JumpTable::JTT_PIC) {
|
|
|
|
Value = JTAddress + *getSignedValueAtAddress(ValueAddress, EntrySize);
|
|
|
|
} else {
|
|
|
|
Value = *getPointerAtAddress(ValueAddress);
|
|
|
|
}
|
|
|
|
DEBUG(dbgs() << ", which contains value 0x"
|
|
|
|
<< Twine::utohexstr(Value) << '\n');
|
|
|
|
|
|
|
|
ValueAddress += EntrySize;
|
|
|
|
|
|
|
|
// We assume that a jump table cannot have function start as an entry.
|
|
|
|
if (BF.containsAddress(Value) && Value != BF.getAddress())
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// Potentially a jump table can contain __builtin_unreachable() entry
|
|
|
|
// pointing just right after the function. In this case we have to check
|
|
|
|
// another entry. Otherwise the entry is outside of this function scope
|
|
|
|
// and it's not a jump table.
|
|
|
|
if (Value == BF.getAddress() + BF.getSize())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Start with checking for PIC jump table. We expect non-PIC jump tables
|
|
|
|
// to have high 32 bits set to 0.
|
|
|
|
if (couldBeJumpTable(Address, JumpTable::JTT_PIC))
|
|
|
|
return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
|
|
|
|
|
|
|
|
if (couldBeJumpTable(Address, JumpTable::JTT_NORMAL))
|
|
|
|
return MemoryContentsType::POSSIBLE_JUMP_TABLE;
|
|
|
|
|
|
|
|
return MemoryContentsType::UNKNOWN;
|
|
|
|
}
|
|
|
|
|
|
|
|
void BinaryContext::populateJumpTables() {
|
|
|
|
for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
|
|
|
|
++JTI) {
|
|
|
|
auto *JT = JTI->second;
|
|
|
|
auto &BF = *JT->Parent;
|
|
|
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: populating jump table "
|
|
|
|
<< JT->getName() << '\n');
|
|
|
|
|
|
|
|
// The upper bound is defined by containing object, section limits, and
|
|
|
|
// the next jump table in memory.
|
|
|
|
auto UpperBound = JT->getSection().getEndAddress();
|
|
|
|
const auto *JumpTableBD = getBinaryDataAtAddress(JT->getAddress());
|
|
|
|
if (JumpTableBD && JumpTableBD->getSize()) {
|
|
|
|
assert(JumpTableBD->getEndAddress() <= UpperBound &&
|
|
|
|
"data object cannot cross a section boundary");
|
|
|
|
UpperBound = JumpTableBD->getEndAddress();
|
|
|
|
}
|
|
|
|
auto NextJTI = std::next(JTI);
|
|
|
|
if (NextJTI != JTE) {
|
|
|
|
assert (UpperBound != JT->getAddress());
|
|
|
|
UpperBound = std::min(NextJTI->second->getAddress(), UpperBound);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto EntryAddress = JT->getAddress();
|
|
|
|
EntryAddress <= UpperBound - JT->EntrySize;
|
|
|
|
EntryAddress += JT->EntrySize) {
|
|
|
|
uint64_t Value;
|
|
|
|
if (JT->Type == JumpTable::JTT_PIC) {
|
|
|
|
Value = JT->getAddress() +
|
|
|
|
*getSignedValueAtAddress(EntryAddress, JT->EntrySize);
|
|
|
|
} else {
|
|
|
|
Value = *getPointerAtAddress(EntryAddress);
|
|
|
|
}
|
|
|
|
|
|
|
|
// __builtin_unreachable() case.
|
|
|
|
if (Value == BF.getAddress() + BF.getSize()) {
|
|
|
|
JT->OffsetEntries.emplace_back(Value - BF.getAddress());
|
|
|
|
BF.IgnoredBranches.emplace_back(Value - BF.getAddress(), BF.getSize());
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// We assume that a jump table cannot have function start as an entry.
|
2019-06-13 15:31:30 -07:00
|
|
|
if (!BF.containsAddress(Value) || Value == BF.getAddress())
|
|
|
|
break;
|
|
|
|
|
|
|
|
// Check there's an instruction at this offset.
|
|
|
|
if (!BF.getInstructionAtOffset(Value - BF.getAddress()))
|
|
|
|
break;
|
2019-06-12 18:21:02 -07:00
|
|
|
|
2019-06-13 15:31:30 -07:00
|
|
|
JT->OffsetEntries.emplace_back(Value - BF.getAddress());
|
2019-06-12 18:21:02 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
assert(JT->OffsetEntries.size() > 1 &&
|
|
|
|
"expected more than one jump table entry");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-10-14 15:35:14 -07:00
|
|
|
MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address,
|
2018-09-21 12:00:20 -07:00
|
|
|
Twine Prefix,
|
2017-11-14 20:05:11 -08:00
|
|
|
uint64_t Size,
|
|
|
|
uint16_t Alignment,
|
2018-04-20 20:03:31 -07:00
|
|
|
unsigned Flags) {
|
2017-11-14 20:05:11 -08:00
|
|
|
auto Itr = BinaryDataMap.find(Address);
|
|
|
|
if (Itr != BinaryDataMap.end()) {
|
|
|
|
assert(Itr->second->getSize() == Size || !Size);
|
|
|
|
return Itr->second->getSymbol();
|
2015-10-14 15:35:14 -07:00
|
|
|
}
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
|
|
|
|
assert(!GlobalSymbols.count(Name) && "created name is not unique");
|
2018-04-20 20:03:31 -07:00
|
|
|
return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
|
2017-11-14 20:05:11 -08:00
|
|
|
}
|
2015-10-14 15:35:14 -07:00
|
|
|
|
2019-04-03 15:52:01 -07:00
|
|
|
BinaryFunction *BinaryContext::createBinaryFunction(
|
|
|
|
const std::string &Name, BinarySection &Section, uint64_t Address,
|
|
|
|
uint64_t Size, bool IsSimple, uint64_t SymbolSize, uint16_t Alignment) {
|
|
|
|
auto Result = BinaryFunctions.emplace(
|
|
|
|
Address, BinaryFunction(Name, Section, Address, Size, *this, IsSimple));
|
|
|
|
assert(Result.second == true && "unexpected duplicate function");
|
|
|
|
auto *BF = &Result.first->second;
|
|
|
|
registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
|
|
|
|
Alignment);
|
|
|
|
setSymbolToFunctionMap(BF->getSymbol(), BF);
|
|
|
|
return BF;
|
|
|
|
}
|
|
|
|
|
2019-05-02 17:42:06 -07:00
|
|
|
std::pair<JumpTable *, const MCSymbol *>
|
2019-06-12 18:21:02 -07:00
|
|
|
BinaryContext::getOrCreateJumpTable(BinaryFunction &Function,
|
|
|
|
uint64_t Address,
|
|
|
|
JumpTable::JumpTableType Type) {
|
2019-05-02 17:42:06 -07:00
|
|
|
const auto JumpTableName = generateJumpTableName(Function, Address);
|
|
|
|
if (auto *JT = getJumpTableContainingAddress(Address)) {
|
|
|
|
assert(JT->Type == Type && "jump table types have to match");
|
|
|
|
assert(JT->Parent == &Function &&
|
|
|
|
"cannot re-use jump table of a different function");
|
|
|
|
assert((Address == JT->getAddress() || Type != JumpTable::JTT_PIC) &&
|
|
|
|
"cannot re-use part of PIC jump table");
|
|
|
|
// Get or create a new label for the table.
|
|
|
|
const auto JTOffset = Address - JT->getAddress();
|
|
|
|
auto LI = JT->Labels.find(JTOffset);
|
|
|
|
if (LI == JT->Labels.end()) {
|
|
|
|
auto *JTStartLabel = registerNameAtAddress(JumpTableName,
|
|
|
|
Address,
|
|
|
|
0,
|
|
|
|
JT->EntrySize);
|
|
|
|
auto Result = JT->Labels.emplace(JTOffset, JTStartLabel);
|
|
|
|
assert(Result.second && "error adding jump table label");
|
|
|
|
LI = Result.first;
|
|
|
|
}
|
|
|
|
|
|
|
|
return std::make_pair(JT, LI->second);
|
|
|
|
}
|
|
|
|
|
|
|
|
auto *JTStartLabel = Ctx->getOrCreateSymbol(JumpTableName);
|
|
|
|
const auto EntrySize =
|
|
|
|
Type == JumpTable::JTT_PIC ? 4 : AsmInfo->getCodePointerSize();
|
|
|
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: creating jump table "
|
|
|
|
<< JTStartLabel->getName()
|
2019-06-12 18:21:02 -07:00
|
|
|
<< " in function " << Function << 'n');
|
2019-05-02 17:42:06 -07:00
|
|
|
|
|
|
|
auto *JT = new JumpTable(JumpTableName,
|
|
|
|
Address,
|
|
|
|
EntrySize,
|
|
|
|
Type,
|
2019-06-12 18:21:02 -07:00
|
|
|
{},
|
2019-05-02 17:42:06 -07:00
|
|
|
JumpTable::LabelMapType{{0, JTStartLabel}},
|
|
|
|
Function,
|
|
|
|
*getSectionForAddress(Address));
|
|
|
|
|
|
|
|
const auto *JTLabel = registerNameAtAddress(JumpTableName, Address, JT);
|
|
|
|
assert(JTLabel == JTStartLabel);
|
|
|
|
|
|
|
|
JumpTables.emplace(Address, JT);
|
|
|
|
|
|
|
|
// Duplicate the entry for the parent function for easy access.
|
|
|
|
Function.JumpTables.emplace(Address, JT);
|
|
|
|
|
|
|
|
return std::make_pair(JT, JTLabel);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
|
|
|
|
uint64_t Address) {
|
|
|
|
size_t Id;
|
|
|
|
uint64_t Offset = 0;
|
|
|
|
if (const auto *JT = BF.getJumpTableContainingAddress(Address)) {
|
|
|
|
Offset = Address - JT->getAddress();
|
|
|
|
auto Itr = JT->Labels.find(Offset);
|
|
|
|
if (Itr != JT->Labels.end()) {
|
|
|
|
return Itr->second->getName();
|
|
|
|
}
|
|
|
|
Id = JumpTableIds.at(JT->getAddress());
|
|
|
|
} else {
|
|
|
|
Id = JumpTableIds[Address] = BF.JumpTables.size();
|
|
|
|
}
|
|
|
|
return ("JUMP_TABLE/" + BF.Names[0] + "." + std::to_string(Id) +
|
|
|
|
(Offset ? ("." + std::to_string(Offset)) : ""));
|
|
|
|
}
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name,
|
|
|
|
uint64_t Address,
|
|
|
|
uint64_t Size,
|
2018-04-20 20:03:31 -07:00
|
|
|
uint16_t Alignment,
|
|
|
|
unsigned Flags) {
|
2017-11-14 20:05:11 -08:00
|
|
|
auto SectionOrErr = getSectionForAddress(Address);
|
|
|
|
auto &Section = SectionOrErr ? SectionOrErr.get() : absoluteSection();
|
|
|
|
auto GAI = BinaryDataMap.find(Address);
|
|
|
|
BinaryData *BD;
|
|
|
|
if (GAI == BinaryDataMap.end()) {
|
|
|
|
BD = new BinaryData(Name,
|
|
|
|
Address,
|
|
|
|
Size,
|
|
|
|
Alignment ? Alignment : 1,
|
2018-04-20 20:03:31 -07:00
|
|
|
Section,
|
|
|
|
Flags);
|
2017-11-14 20:05:11 -08:00
|
|
|
} else {
|
|
|
|
BD = GAI->second;
|
|
|
|
}
|
|
|
|
return registerNameAtAddress(Name, Address, BD);
|
|
|
|
}
|
2015-10-14 15:35:14 -07:00
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name,
|
|
|
|
uint64_t Address,
|
|
|
|
BinaryData *BD) {
|
|
|
|
auto GAI = BinaryDataMap.find(Address);
|
|
|
|
if (GAI != BinaryDataMap.end()) {
|
|
|
|
if (BD != GAI->second) {
|
|
|
|
// Note: this could be a source of bugs if client code holds
|
|
|
|
// on to BinaryData*'s in data structures for any length of time.
|
|
|
|
auto *OldBD = GAI->second;
|
|
|
|
BD->merge(GAI->second);
|
|
|
|
delete OldBD;
|
|
|
|
GAI->second = BD;
|
|
|
|
for (auto &Name : BD->names()) {
|
|
|
|
GlobalSymbols[Name] = BD;
|
|
|
|
}
|
|
|
|
updateObjectNesting(GAI);
|
2018-06-06 03:17:32 -07:00
|
|
|
BD = nullptr;
|
2017-11-14 20:05:11 -08:00
|
|
|
} else if (!GAI->second->hasName(Name)) {
|
|
|
|
GAI->second->Names.push_back(Name);
|
|
|
|
GlobalSymbols[Name] = GAI->second;
|
2018-06-06 03:17:32 -07:00
|
|
|
} else {
|
|
|
|
BD = nullptr;
|
2017-11-14 20:05:11 -08:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
GAI = BinaryDataMap.emplace(Address, BD).first;
|
|
|
|
GlobalSymbols[Name] = BD;
|
|
|
|
updateObjectNesting(GAI);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Register the name with MCContext.
|
|
|
|
auto *Symbol = Ctx->getOrCreateSymbol(Name);
|
|
|
|
if (BD) {
|
|
|
|
BD->Symbols.push_back(Symbol);
|
2018-06-06 03:17:32 -07:00
|
|
|
assert(BD->Symbols.size() == BD->Names.size() &&
|
|
|
|
"there should be a 1:1 mapping between names and symbols");
|
2017-11-14 20:05:11 -08:00
|
|
|
}
|
2015-10-14 15:35:14 -07:00
|
|
|
return Symbol;
|
|
|
|
}
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
const BinaryData *
|
|
|
|
BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address,
|
|
|
|
bool IncludeEnd,
|
|
|
|
bool BestFit) const {
|
|
|
|
auto NI = BinaryDataMap.lower_bound(Address);
|
|
|
|
auto End = BinaryDataMap.end();
|
2018-04-20 20:03:31 -07:00
|
|
|
if ((NI != End && Address == NI->first && !IncludeEnd) ||
|
2017-11-14 20:05:11 -08:00
|
|
|
(NI-- != BinaryDataMap.begin())) {
|
|
|
|
if (NI->second->containsAddress(Address) ||
|
|
|
|
(IncludeEnd && NI->second->getEndAddress() == Address)) {
|
|
|
|
while (BestFit &&
|
|
|
|
std::next(NI) != End &&
|
|
|
|
(std::next(NI)->second->containsAddress(Address) ||
|
|
|
|
(IncludeEnd && std::next(NI)->second->getEndAddress() == Address))) {
|
|
|
|
++NI;
|
|
|
|
}
|
|
|
|
return NI->second;
|
|
|
|
}
|
2016-09-29 11:19:06 -07:00
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
// If this is a sub-symbol, see if a parent data contains the address.
|
|
|
|
auto *BD = NI->second->getParent();
|
|
|
|
while (BD) {
|
|
|
|
if (BD->containsAddress(Address) ||
|
|
|
|
(IncludeEnd && NI->second->getEndAddress() == Address))
|
|
|
|
return BD;
|
|
|
|
BD = BD->getParent();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
2016-09-29 11:19:06 -07:00
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
|
|
|
|
auto NI = BinaryDataMap.find(Address);
|
|
|
|
assert(NI != BinaryDataMap.end());
|
|
|
|
if (NI == BinaryDataMap.end())
|
|
|
|
return false;
|
2018-03-13 18:59:22 -07:00
|
|
|
// TODO: it's possible that a jump table starts at the same address
|
|
|
|
// as a larger blob of private data. When we set the size of the
|
|
|
|
// jump table, it might be smaller than the total blob size. In this
|
|
|
|
// case we just leave the original size since (currently) it won't really
|
|
|
|
// affect anything. See T26915981.
|
|
|
|
assert((!NI->second->Size || NI->second->Size == Size ||
|
|
|
|
(NI->second->isJumpTable() && NI->second->Size > Size)) &&
|
|
|
|
"can't change the size of a symbol that has already had its "
|
|
|
|
"size set");
|
|
|
|
if (!NI->second->Size) {
|
|
|
|
NI->second->Size = Size;
|
|
|
|
updateObjectNesting(NI);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
2016-09-29 11:19:06 -07:00
|
|
|
}
|
|
|
|
|
2018-06-06 03:17:32 -07:00
|
|
|
void BinaryContext::generateSymbolHashes() {
|
|
|
|
auto isNonAnonymousName = [](StringRef Name) {
|
|
|
|
return !(Name.startswith("SYMBOLat") ||
|
|
|
|
Name.startswith("DATAat") ||
|
|
|
|
Name.startswith("HOLEat"));
|
|
|
|
};
|
|
|
|
|
|
|
|
auto isPadding = [](const BinaryData &BD) {
|
|
|
|
auto Contents = BD.getSection().getContents();
|
|
|
|
auto SymData = Contents.substr(BD.getOffset(), BD.getSize());
|
|
|
|
return (BD.getName().startswith("HOLEat") ||
|
|
|
|
SymData.find_first_not_of(0) == StringRef::npos);
|
|
|
|
};
|
|
|
|
|
2018-06-11 17:17:25 -07:00
|
|
|
uint64_t NumCollisions = 0;
|
2018-06-06 03:17:32 -07:00
|
|
|
for (auto &Entry : BinaryDataMap) {
|
|
|
|
auto &BD = *Entry.second;
|
|
|
|
auto Name = BD.getName();
|
|
|
|
|
|
|
|
if (isNonAnonymousName(Name))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// First check if a non-anonymous alias exists and move it to the front.
|
|
|
|
if (BD.getNames().size() > 1) {
|
|
|
|
auto Itr = std::find_if(BD.Names.begin(),
|
|
|
|
BD.Names.end(),
|
|
|
|
isNonAnonymousName);
|
|
|
|
if (Itr != BD.Names.end()) {
|
|
|
|
assert(BD.Names.size() == BD.Symbols.size() &&
|
|
|
|
"there should be a 1:1 mapping between names and symbols");
|
|
|
|
auto Idx = std::distance(BD.Names.begin(), Itr);
|
|
|
|
std::swap(BD.Names[0], *Itr);
|
|
|
|
std::swap(BD.Symbols[0], BD.Symbols[Idx]);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// We have to skip 0 size symbols since they will all collide.
|
|
|
|
if (BD.getSize() == 0) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
const auto Hash = BD.getSection().hash(BD);
|
|
|
|
const auto Idx = Name.find("0x");
|
|
|
|
std::string NewName = (Twine(Name.substr(0, Idx)) +
|
|
|
|
"_" + Twine::utohexstr(Hash)).str();
|
|
|
|
if (getBinaryDataByName(NewName)) {
|
|
|
|
// Ignore collisions for symbols that appear to be padding
|
|
|
|
// (i.e. all zeros or a "hole")
|
|
|
|
if (!isPadding(BD)) {
|
2018-06-11 17:17:25 -07:00
|
|
|
if (opts::Verbosity) {
|
|
|
|
errs() << "BOLT-WARNING: collision detected when hashing " << BD
|
|
|
|
<< " with new name (" << NewName << "), skipping.\n";
|
|
|
|
}
|
|
|
|
++NumCollisions;
|
2018-06-06 03:17:32 -07:00
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
BD.Names.insert(BD.Names.begin(), NewName);
|
|
|
|
BD.Symbols.insert(BD.Symbols.begin(),
|
|
|
|
Ctx->getOrCreateSymbol(NewName));
|
|
|
|
assert(BD.Names.size() == BD.Symbols.size() &&
|
|
|
|
"there should be a 1:1 mapping between names and symbols");
|
|
|
|
GlobalSymbols[NewName] = &BD;
|
|
|
|
}
|
2018-06-11 17:17:25 -07:00
|
|
|
if (NumCollisions) {
|
|
|
|
errs() << "BOLT-WARNING: " << NumCollisions
|
|
|
|
<< " collisions detected while hashing binary objects";
|
|
|
|
if (!opts::Verbosity)
|
|
|
|
errs() << ". Use -v=1 to see the list.";
|
|
|
|
errs() << '\n';
|
|
|
|
}
|
2018-06-06 03:17:32 -07:00
|
|
|
}
|
|
|
|
|
2019-05-22 11:26:58 -07:00
|
|
|
void BinaryContext::processInterproceduralReferences() {
|
|
|
|
for (auto &Pair : InterproceduralReferences) {
|
|
|
|
auto *FromBF = Pair.first;
|
|
|
|
auto Addr = Pair.second;
|
|
|
|
auto *ContainingFunction = getBinaryFunctionContainingAddress(Addr);
|
|
|
|
if (FromBF == ContainingFunction)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (ContainingFunction) {
|
|
|
|
// Only a parent function (or a sibling) can reach its fragment.
|
|
|
|
if (ContainingFunction->IsFragment) {
|
|
|
|
assert(!FromBF->IsFragment &&
|
|
|
|
"only one cold fragment is supported at this time");
|
|
|
|
ContainingFunction->setParentFunction(FromBF);
|
|
|
|
FromBF->addFragment(ContainingFunction);
|
|
|
|
if (!HasRelocations) {
|
|
|
|
ContainingFunction->setSimple(false);
|
|
|
|
FromBF->setSimple(false);
|
|
|
|
}
|
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
outs() << "BOLT-INFO: marking " << *ContainingFunction
|
|
|
|
<< " as a fragment of " << *FromBF << '\n';
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ContainingFunction->getAddress() != Addr) {
|
|
|
|
ContainingFunction->addEntryPoint(Addr);
|
|
|
|
if (!HasRelocations) {
|
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
errs() << "BOLT-WARNING: Function " << *ContainingFunction
|
|
|
|
<< " has internal BBs that are target of a reference "
|
|
|
|
<< "located in another function. Skipping the function.\n";
|
|
|
|
}
|
|
|
|
ContainingFunction->setSimple(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (Addr) {
|
|
|
|
// Check if address falls in function padding space - this could be
|
|
|
|
// unmarked data in code. In this case adjust the padding space size.
|
|
|
|
auto Section = getSectionForAddress(Addr);
|
|
|
|
assert(Section && "cannot get section for referenced address");
|
|
|
|
|
|
|
|
if (!Section->isText())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// PLT requires special handling and could be ignored in this context.
|
|
|
|
StringRef SectionName = Section->getName();
|
|
|
|
if (SectionName == ".plt" || SectionName == ".plt.got")
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (HasRelocations) {
|
|
|
|
errs() << "BOLT-ERROR: cannot process binaries with unmarked "
|
|
|
|
<< "object in code at address 0x"
|
|
|
|
<< Twine::utohexstr(Addr) << " belonging to section "
|
|
|
|
<< SectionName << " in relocation mode.\n";
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
ContainingFunction =
|
|
|
|
getBinaryFunctionContainingAddress(Addr,
|
|
|
|
/*CheckPastEnd=*/false,
|
|
|
|
/*UseMaxSize=*/true);
|
|
|
|
// We are not going to overwrite non-simple functions, but for simple
|
|
|
|
// ones - adjust the padding size.
|
|
|
|
if (ContainingFunction && ContainingFunction->isSimple()) {
|
|
|
|
errs() << "BOLT-WARNING: function " << *ContainingFunction
|
|
|
|
<< " has an object detected in a padding region at address 0x"
|
|
|
|
<< Twine::utohexstr(Addr) << '\n';
|
|
|
|
ContainingFunction->setMaxSize(Addr -
|
|
|
|
ContainingFunction->getAddress());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
InterproceduralReferences.clear();
|
|
|
|
}
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
void BinaryContext::postProcessSymbolTable() {
|
|
|
|
fixBinaryDataHoles();
|
|
|
|
bool Valid = true;
|
|
|
|
for (auto &Entry : BinaryDataMap) {
|
|
|
|
auto *BD = Entry.second;
|
|
|
|
if ((BD->getName().startswith("SYMBOLat") ||
|
|
|
|
BD->getName().startswith("DATAat")) &&
|
|
|
|
!BD->getParent() &&
|
|
|
|
!BD->getSize() &&
|
|
|
|
!BD->isAbsolute() &&
|
|
|
|
BD->getSection()) {
|
2018-07-30 16:30:18 -07:00
|
|
|
errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n";
|
2017-11-14 20:05:11 -08:00
|
|
|
Valid = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(Valid);
|
|
|
|
assignMemData();
|
2018-06-06 03:17:32 -07:00
|
|
|
generateSymbolHashes();
|
2017-06-09 13:17:36 -07:00
|
|
|
}
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
void BinaryContext::foldFunction(BinaryFunction &ChildBF,
|
2019-05-31 16:45:31 -07:00
|
|
|
BinaryFunction &ParentBF) {
|
|
|
|
std::shared_lock<std::shared_timed_mutex> ReadCtxLock(CtxMutex,
|
|
|
|
std::defer_lock);
|
|
|
|
std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex,
|
|
|
|
std::defer_lock);
|
|
|
|
std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock(
|
|
|
|
SymbolToFunctionMapMutex, std::defer_lock);
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
// Copy name list.
|
|
|
|
ParentBF.addNewNames(ChildBF.getNames());
|
|
|
|
|
|
|
|
// Update internal bookkeeping info.
|
|
|
|
for (auto &Name : ChildBF.getNames()) {
|
2019-05-31 16:45:31 -07:00
|
|
|
ReadCtxLock.lock();
|
2016-12-21 17:13:56 -08:00
|
|
|
// Calls to functions are handled via symbols, and we keep the lookup table
|
|
|
|
// that we need to update.
|
|
|
|
auto *Symbol = Ctx->lookupSymbol(Name);
|
2019-05-31 16:45:31 -07:00
|
|
|
ReadCtxLock.unlock();
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
assert(Symbol && "symbol cannot be NULL at this point");
|
|
|
|
|
2019-05-31 16:45:31 -07:00
|
|
|
WriteSymbolMapLock.lock();
|
|
|
|
SymbolToFunctionMap[Symbol] = &ParentBF;
|
|
|
|
WriteSymbolMapLock.unlock();
|
2017-11-14 20:05:11 -08:00
|
|
|
// NB: there's no need to update BinaryDataMap and GlobalSymbols.
|
2016-12-21 17:13:56 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Merge execution counts of ChildBF into those of ParentBF.
|
|
|
|
ChildBF.mergeProfileDataInto(ParentBF);
|
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
if (HasRelocations) {
|
2019-05-31 16:45:31 -07:00
|
|
|
std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex,
|
|
|
|
std::defer_lock);
|
|
|
|
std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex,
|
|
|
|
std::defer_lock);
|
2016-12-21 17:13:56 -08:00
|
|
|
// Remove ChildBF from the global set of functions in relocs mode.
|
2019-05-31 16:45:31 -07:00
|
|
|
ReadBfsLock.lock();
|
2019-04-03 15:52:01 -07:00
|
|
|
auto FI = BinaryFunctions.find(ChildBF.getAddress());
|
2019-05-31 16:45:31 -07:00
|
|
|
ReadBfsLock.unlock();
|
|
|
|
|
2019-04-03 15:52:01 -07:00
|
|
|
assert(FI != BinaryFunctions.end() && "function not found");
|
2016-12-21 17:13:56 -08:00
|
|
|
assert(&ChildBF == &FI->second && "function mismatch");
|
2019-05-31 16:45:31 -07:00
|
|
|
|
|
|
|
WriteBfsLock.lock();
|
2019-04-03 15:52:01 -07:00
|
|
|
FI = BinaryFunctions.erase(FI);
|
2019-05-31 16:45:31 -07:00
|
|
|
WriteBfsLock.unlock();
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
} else {
|
|
|
|
// In non-relocation mode we keep the function, but rename it.
|
2019-05-31 16:45:31 -07:00
|
|
|
std::string NewName = "__ICF_" + ChildBF.getSymbol()->getName().str();
|
2016-12-21 17:13:56 -08:00
|
|
|
ChildBF.Names.clear();
|
|
|
|
ChildBF.Names.push_back(NewName);
|
2019-05-31 16:45:31 -07:00
|
|
|
|
|
|
|
WriteCtxLock.lock();
|
2016-12-21 17:13:56 -08:00
|
|
|
ChildBF.OutputSymbol = Ctx->getOrCreateSymbol(NewName);
|
2019-05-31 16:45:31 -07:00
|
|
|
WriteCtxLock.unlock();
|
|
|
|
|
2017-01-10 11:20:56 -08:00
|
|
|
ChildBF.setFolded();
|
2016-12-21 17:13:56 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
void BinaryContext::fixBinaryDataHoles() {
|
|
|
|
assert(validateObjectNesting() && "object nesting inconsitency detected");
|
|
|
|
|
|
|
|
for (auto &Section : allocatableSections()) {
|
|
|
|
std::vector<std::pair<uint64_t, uint64_t>> Holes;
|
|
|
|
|
|
|
|
auto isNotHole = [&Section](const binary_data_iterator &Itr) {
|
|
|
|
auto *BD = Itr->second;
|
|
|
|
bool isHole = (!BD->getParent() &&
|
|
|
|
!BD->getSize() &&
|
|
|
|
BD->isObject() &&
|
|
|
|
(BD->getName().startswith("SYMBOLat0x") ||
|
|
|
|
BD->getName().startswith("DATAat0x") ||
|
|
|
|
BD->getName().startswith("ANONYMOUS")));
|
|
|
|
return !isHole && BD->getSection() == Section && !BD->getParent();
|
|
|
|
};
|
|
|
|
|
|
|
|
auto BDStart = BinaryDataMap.begin();
|
|
|
|
auto BDEnd = BinaryDataMap.end();
|
|
|
|
auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
|
|
|
|
auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
|
|
|
|
|
|
|
|
uint64_t EndAddress = Section.getAddress();
|
|
|
|
|
|
|
|
while (Itr != End) {
|
2018-03-16 09:03:12 -07:00
|
|
|
if (Itr->second->getAddress() > EndAddress) {
|
2019-04-09 11:31:45 -07:00
|
|
|
auto Gap = Itr->second->getAddress() - EndAddress;
|
2017-11-14 20:05:11 -08:00
|
|
|
Holes.push_back(std::make_pair(EndAddress, Gap));
|
|
|
|
}
|
|
|
|
EndAddress = Itr->second->getEndAddress();
|
|
|
|
++Itr;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (EndAddress < Section.getEndAddress()) {
|
|
|
|
Holes.push_back(std::make_pair(EndAddress,
|
|
|
|
Section.getEndAddress() - EndAddress));
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there is already a symbol at the start of the hole, grow that symbol
|
|
|
|
// to cover the rest. Otherwise, create a new symbol to cover the hole.
|
|
|
|
for (auto &Hole : Holes) {
|
|
|
|
auto *BD = getBinaryDataAtAddress(Hole.first);
|
|
|
|
if (BD) {
|
|
|
|
// BD->getSection() can be != Section if there are sections that
|
|
|
|
// overlap. In this case it is probably safe to just skip the holes
|
|
|
|
// since the overlapping section will not(?) have any symbols in it.
|
|
|
|
if (BD->getSection() == Section)
|
|
|
|
setBinaryDataSize(Hole.first, Hole.second);
|
|
|
|
} else {
|
2018-09-21 12:00:20 -07:00
|
|
|
getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
|
2017-11-14 20:05:11 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(validateObjectNesting() && "object nesting inconsitency detected");
|
|
|
|
assert(validateHoles() && "top level hole detected in object map");
|
|
|
|
}
|
|
|
|
|
2016-07-23 08:01:53 -07:00
|
|
|
void BinaryContext::printGlobalSymbols(raw_ostream& OS) const {
|
2017-11-14 20:05:11 -08:00
|
|
|
const BinarySection* CurrentSection = nullptr;
|
|
|
|
bool FirstSection = true;
|
|
|
|
|
|
|
|
for (auto &Entry : BinaryDataMap) {
|
|
|
|
const auto *BD = Entry.second;
|
|
|
|
const auto &Section = BD->getSection();
|
|
|
|
if (FirstSection || Section != *CurrentSection) {
|
|
|
|
uint64_t Address, Size;
|
|
|
|
StringRef Name = Section.getName();
|
|
|
|
if (Section) {
|
|
|
|
Address = Section.getAddress();
|
|
|
|
Size = Section.getSize();
|
|
|
|
} else {
|
|
|
|
Address = BD->getAddress();
|
|
|
|
Size = BD->getSize();
|
|
|
|
}
|
|
|
|
OS << "BOLT-INFO: Section " << Name << ", "
|
|
|
|
<< "0x" + Twine::utohexstr(Address) << ":"
|
|
|
|
<< "0x" + Twine::utohexstr(Address + Size) << "/"
|
|
|
|
<< Size << "\n";
|
|
|
|
CurrentSection = &Section;
|
|
|
|
FirstSection = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
OS << "BOLT-INFO: ";
|
|
|
|
auto *P = BD->getParent();
|
|
|
|
while (P) {
|
|
|
|
OS << " ";
|
|
|
|
P = P->getParent();
|
|
|
|
}
|
|
|
|
OS << *BD << "\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void BinaryContext::assignMemData() {
|
2019-04-22 11:27:50 -04:00
|
|
|
auto getAddress = [&](const MemInfo &MI) -> uint64_t {
|
2017-11-14 20:05:11 -08:00
|
|
|
if (!MI.Addr.IsSymbol)
|
|
|
|
return MI.Addr.Offset;
|
|
|
|
|
|
|
|
if (auto *BD = getBinaryDataByName(MI.Addr.Name))
|
|
|
|
return BD->getAddress() + MI.Addr.Offset;
|
|
|
|
|
2019-04-22 11:27:50 -04:00
|
|
|
return 0;
|
2017-11-14 20:05:11 -08:00
|
|
|
};
|
|
|
|
|
|
|
|
// Map of sections (or heap/stack) to count/size.
|
|
|
|
std::map<StringRef, uint64_t> Counts;
|
2018-04-20 20:03:31 -07:00
|
|
|
std::map<StringRef, uint64_t> JumpTableCounts;
|
2017-11-14 20:05:11 -08:00
|
|
|
|
|
|
|
uint64_t TotalCount = 0;
|
|
|
|
for (auto &Entry : DR.getAllFuncsMemData()) {
|
|
|
|
for (auto &MI : Entry.second.Data) {
|
|
|
|
const auto Addr = getAddress(MI);
|
|
|
|
auto *BD = getBinaryDataContainingAddress(Addr);
|
|
|
|
if (BD) {
|
|
|
|
BD->getAtomicRoot()->addMemData(MI);
|
|
|
|
Counts[BD->getSectionName()] += MI.Count;
|
2018-04-20 20:03:31 -07:00
|
|
|
if (BD->getAtomicRoot()->isJumpTable()) {
|
|
|
|
JumpTableCounts[BD->getSectionName()] += MI.Count;
|
|
|
|
}
|
2017-11-14 20:05:11 -08:00
|
|
|
} else {
|
|
|
|
Counts["Heap/stack"] += MI.Count;
|
|
|
|
}
|
|
|
|
TotalCount += MI.Count;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!Counts.empty()) {
|
|
|
|
outs() << "BOLT-INFO: Memory stats breakdown:\n";
|
|
|
|
for (auto &Entry : Counts) {
|
|
|
|
const auto Section = Entry.first;
|
|
|
|
const auto Count = Entry.second;
|
|
|
|
outs() << "BOLT-INFO: " << Section << " = " << Count
|
|
|
|
<< format(" (%.1f%%)\n", 100.0*Count/TotalCount);
|
2018-04-20 20:03:31 -07:00
|
|
|
if (JumpTableCounts.count(Section) != 0) {
|
|
|
|
const auto JTCount = JumpTableCounts[Section];
|
|
|
|
outs() << "BOLT-INFO: jump tables = " << JTCount
|
|
|
|
<< format(" (%.1f%%)\n", 100.0*JTCount/Count);
|
|
|
|
}
|
2017-11-14 20:05:11 -08:00
|
|
|
}
|
|
|
|
outs() << "BOLT-INFO: Total memory events: " << TotalCount << "\n";
|
2016-07-23 08:01:53 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-28 17:45:22 -07:00
|
|
|
namespace {
|
|
|
|
|
2016-05-23 19:36:38 -07:00
|
|
|
/// Recursively finds DWARF DW_TAG_subprogram DIEs and match them with
|
|
|
|
/// BinaryFunctions. Record DIEs for unknown subprograms (mostly functions that
|
|
|
|
/// are never called and removed from the binary) in Unknown.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
void findSubprograms(const DWARFDie DIE,
|
2017-05-16 09:27:34 -07:00
|
|
|
std::map<uint64_t, BinaryFunction> &BinaryFunctions) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
if (DIE.isSubprogramDIE()) {
|
|
|
|
uint64_t LowPC, HighPC, SectionIndex;
|
|
|
|
if (DIE.getLowAndHighPC(LowPC, HighPC, SectionIndex)) {
|
2016-04-08 16:24:38 -07:00
|
|
|
auto It = BinaryFunctions.find(LowPC);
|
|
|
|
if (It != BinaryFunctions.end()) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
It->second.addSubprogramDIE(DIE);
|
2016-04-08 16:24:38 -07:00
|
|
|
} else {
|
2017-05-16 09:27:34 -07:00
|
|
|
// The function must have been optimized away by GC.
|
2016-04-08 16:24:38 -07:00
|
|
|
}
|
2017-05-08 22:51:36 -07:00
|
|
|
} else {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
const auto RangesVector = DIE.getAddressRanges();
|
[BOLT] Basic support for split functions
Summary:
This adds very basic and limited support for split functions.
In non-relocation mode, split functions are ignored, while their debug
info is properly updated. No support in the relocation mode yet.
Split functions consist of a main body and one or more fragments.
For fragments, the main part is called their parent. Any fragment
could only be entered via its parent or another fragment.
The short-term goal is to correctly update debug information for split
functions, while the long-term goal is to have a complete support
including full optimization. Note that if we don't detect split
bodies, we would have to add multiple entry points via tail calls,
which we would rather avoid.
Parent functions and fragments are represented by a `BinaryFunction`
and are marked accordingly. For now they are marked as non-simple, and
thus only supported in non-relocation mode. Once we start building a
CFG, it should be a common graph (i.e. the one that includes all
fragments) in the parent function.
The function discovery is unchanged, except for the detection of
`\.cold\.` pattern in the function name, which automatically marks the
function as a fragment of another function.
Because of the local function name ambiguity, we cannot rely on the
function name to establish child fragment and parent relationship.
Instead we rely on disassembly processing.
`BinaryContext::getBinaryFunctionContainingAddress()` now returns a
parent function if an address from its fragment is passed.
There's no jump table support at the moment. Jump tables can have
source and destinations in both fragment and parent.
Parent functions that enter their fragments via C++ exception handling
mechanism are not yet supported.
(cherry picked from FBD14970569)
2019-04-16 10:24:34 -07:00
|
|
|
for (const auto Range : DIE.getAddressRanges()) {
|
|
|
|
auto It = BinaryFunctions.find(Range.LowPC);
|
|
|
|
if (It != BinaryFunctions.end()) {
|
|
|
|
It->second.addSubprogramDIE(DIE);
|
|
|
|
}
|
2017-05-08 22:51:36 -07:00
|
|
|
}
|
2016-04-08 16:24:38 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
for (auto ChildDIE = DIE.getFirstChild(); ChildDIE && !ChildDIE.isNULL();
|
|
|
|
ChildDIE = ChildDIE.getSibling()) {
|
|
|
|
findSubprograms(ChildDIE, BinaryFunctions);
|
2016-04-08 16:24:38 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-28 17:45:22 -07:00
|
|
|
} // namespace
|
|
|
|
|
2016-09-02 11:58:53 -07:00
|
|
|
unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
|
|
|
|
const uint32_t SrcCUID,
|
|
|
|
unsigned FileIndex) {
|
|
|
|
auto SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
|
|
|
|
auto LineTable = DwCtx->getLineTableForUnit(SrcUnit);
|
|
|
|
const auto &FileNames = LineTable->Prologue.FileNames;
|
|
|
|
// Dir indexes start at 1, as DWARF file numbers, and a dir index 0
|
|
|
|
// means empty dir.
|
|
|
|
assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
|
|
|
|
"FileIndex out of range for the compilation unit.");
|
2018-05-04 10:10:41 -07:00
|
|
|
StringRef Dir = "";
|
|
|
|
if (FileNames[FileIndex - 1].DirIdx != 0) {
|
|
|
|
if (auto DirName =
|
|
|
|
LineTable->Prologue
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
.IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1]
|
2018-05-04 10:10:41 -07:00
|
|
|
.getAsCString()) {
|
|
|
|
Dir = *DirName;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
StringRef FileName = "";
|
|
|
|
if (auto FName = FileNames[FileIndex - 1].Name.getAsCString())
|
|
|
|
FileName = *FName;
|
|
|
|
assert(FileName != "");
|
|
|
|
return cantFail(Ctx->getDwarfFile(Dir, FileName, 0, nullptr, None, DestCUID));
|
2016-09-02 11:58:53 -07:00
|
|
|
}
|
|
|
|
|
2019-04-03 15:52:01 -07:00
|
|
|
std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
|
2017-08-31 11:45:37 -07:00
|
|
|
std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
|
|
|
|
std::transform(BinaryFunctions.begin(), BinaryFunctions.end(),
|
|
|
|
SortedFunctions.begin(),
|
|
|
|
[](std::pair<const uint64_t, BinaryFunction> &BFI) {
|
|
|
|
return &BFI.second;
|
|
|
|
});
|
|
|
|
|
2017-11-28 09:57:21 -08:00
|
|
|
std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(),
|
2019-03-14 18:51:05 -07:00
|
|
|
[] (const BinaryFunction *A, const BinaryFunction *B) {
|
|
|
|
if (A->hasValidIndex() && B->hasValidIndex()) {
|
|
|
|
return A->getIndex() < B->getIndex();
|
|
|
|
}
|
|
|
|
return A->hasValidIndex();
|
|
|
|
});
|
2017-08-31 11:45:37 -07:00
|
|
|
return SortedFunctions;
|
|
|
|
}
|
|
|
|
|
2019-04-03 15:52:01 -07:00
|
|
|
void BinaryContext::preprocessDebugInfo() {
|
2016-03-14 18:48:05 -07:00
|
|
|
// Populate MCContext with DWARF files.
|
|
|
|
for (const auto &CU : DwCtx->compile_units()) {
|
|
|
|
const auto CUID = CU->getOffset();
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
auto *LineTable = DwCtx->getLineTableForUnit(CU.get());
|
2016-03-14 18:48:05 -07:00
|
|
|
const auto &FileNames = LineTable->Prologue.FileNames;
|
2018-08-27 20:12:59 -07:00
|
|
|
// Make sure empty debug line tables are registered too.
|
|
|
|
if (FileNames.empty()) {
|
|
|
|
cantFail(Ctx->getDwarfFile("", "<unknown>", 0, nullptr, None, CUID));
|
|
|
|
continue;
|
|
|
|
}
|
2016-03-14 18:48:05 -07:00
|
|
|
for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
|
|
|
|
// Dir indexes start at 1, as DWARF file numbers, and a dir index 0
|
|
|
|
// means empty dir.
|
2018-05-04 10:10:41 -07:00
|
|
|
StringRef Dir = "";
|
|
|
|
if (FileNames[I].DirIdx != 0)
|
|
|
|
if (auto DirName =
|
|
|
|
LineTable->Prologue.IncludeDirectories[FileNames[I].DirIdx - 1]
|
|
|
|
.getAsCString())
|
|
|
|
Dir = *DirName;
|
|
|
|
StringRef FileName = "";
|
|
|
|
if (auto FName = FileNames[I].Name.getAsCString())
|
|
|
|
FileName = *FName;
|
|
|
|
assert(FileName != "");
|
|
|
|
cantFail(Ctx->getDwarfFile(Dir, FileName, 0, nullptr, None, CUID));
|
2016-03-14 18:48:05 -07:00
|
|
|
}
|
|
|
|
}
|
2016-02-25 16:57:07 -08:00
|
|
|
|
2016-04-08 16:24:38 -07:00
|
|
|
// For each CU, iterate over its children DIEs and match subprogram DIEs to
|
Update subroutine address ranges in binary.
Summary:
[WIP] Update DWARF info for function address ranges.
This diff currently does not work for unknown reasons,
but I'm describing here what's the current state.
According to both llvm-dwarf and readelf our output seems correct,
but GDB does not interpret it as expected. All details go below in
hope I missed something.
I couldn't actually track the whole change that introduced support for
what we need in gdb yet, but I think I can get to it
(2007-12-04: Support
lexical bocks and function bodies that occupy non-contiguous address ranges). I have reasons to believe gdb at least at some
nges).
The set of introduced changes was basically this:
- After disassembly, iterate over the DIEs in .debug_info and find the
ones that correspond to each BinaryFunction.
- Refactor DebugArangesWriter to also write addresses of functions to
.debug_ranges and track the offsets of function address ranges there
- Add some infrastructure to facilitate patching the binary in
simple ways (BinaryPatcher.h)
- In RewriteInstance, after writing .debug_ranges already with
function address ranges, for each function do:
-- Find the abbreviation corresponding to the function
-- Patch .debug_abbrev to replace DW_AT_low_pc with DW_AT_ranges and
DW_AT_high_pc with DW_AT_producer (I'll explain this hack below).
Also patch the corresponding forms to DW_FORM_sec_offset and
DW_FORM_string (null-terminated in-place string).
-- Patch debug_info with the .debug_ranges offset in place of
the first 4 bytes of DW_AT_low_pc (DW_AT_ranges only occupies 4
bytes whereas low_pc occupies 8), and write an arbitrary string
in-place in the other 12 bytes that were the 4 MSB of low_pc
and the 8 bytes of high_pc before the patch. This depends on
low_pc and high_pc being put consecutively by the compiler, but
it serves to validate the idea. I tried another way of doing it
that does not rely on this but it didn't work either and I believe
the reason for either not working is the same (and still unknown,
but unrelated to them. I might be wrong though, and if I find yet
another way of doing it I may try it). The other way was to
use a form of DW_FORM_data8 for the section offset. This is
disallowed by the specification, but I doubt gdb validates this,
as it's just easier to store it as 64-bit anyway as this is even
necessary to support 64-bit DWARF (which is not what gcc generates
by default apparently).
I still need to make changes to the diff to make it production-ready,
but first I want to figure out why it doesn't work as expected.
By looking at the output of llvm-dwarfdump or readelf, all of
.debug_ranges, .debug_abbrev and .debug_info seem to have been
correctly updated. However, gdb seems to have serious problems with
what we write.
(In fact, readelf --debug-dump=Ranges shows some funny warning messages
of the form ("Warning: There is a hole [0x100 - 0x120] in .debug_ranges"),
but I played around with this and it seems it's just because no
compile unit was using these ranges. Changing .debug_info apparently
changes these warnings, so they seem to be unrelated to the section
itself. Also looking at the hex dump of the section doesn't help,
as everything seems fine. llvm-dwarfdump doesn't say anything.
So I think .debug_ranges is fine.)
The result is that gdb not only doesn't show the function name as we
wanted, but it also stops showing line number information.
Apparently it's not reading/interpreting the address ranges at all,
and so the functions now have no associated address ranges, only the
symbol value which allows one to put a breakpoint in the function,
but not to show source code.
As this left me without more ideas of what to try to feed gdb with,
I believe the most promising next trial is to try to debug gdb itself,
unless someone spots anything I missed.
I found where the interesting part of the code lies for this
case (gdb/dwarf2read.c and some other related files, but mainly that one).
It seems in some parts gdb uses DW_AT_ranges for only getting
its lowest and highest addresses and setting that as low_pc and
high_pc (see dwarf2_get_pc_bounds in gdb's code and where it's called).
I really hope this is not actually the case for
function address ranges. I'll investigate this further. Otherwise
I don't think any changes we make will make it work as initially
intended, as we'll simply need gdb to support it and in that case it
doesn't.
(cherry picked from FBD3073641)
2016-03-16 18:08:29 -07:00
|
|
|
// BinaryFunctions.
|
2016-05-27 20:19:19 -07:00
|
|
|
for (auto &CU : DwCtx->compile_units()) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
findSubprograms(CU->getUnitDIE(false), BinaryFunctions);
|
Update subroutine address ranges in binary.
Summary:
[WIP] Update DWARF info for function address ranges.
This diff currently does not work for unknown reasons,
but I'm describing here what's the current state.
According to both llvm-dwarf and readelf our output seems correct,
but GDB does not interpret it as expected. All details go below in
hope I missed something.
I couldn't actually track the whole change that introduced support for
what we need in gdb yet, but I think I can get to it
(2007-12-04: Support
lexical bocks and function bodies that occupy non-contiguous address ranges). I have reasons to believe gdb at least at some
nges).
The set of introduced changes was basically this:
- After disassembly, iterate over the DIEs in .debug_info and find the
ones that correspond to each BinaryFunction.
- Refactor DebugArangesWriter to also write addresses of functions to
.debug_ranges and track the offsets of function address ranges there
- Add some infrastructure to facilitate patching the binary in
simple ways (BinaryPatcher.h)
- In RewriteInstance, after writing .debug_ranges already with
function address ranges, for each function do:
-- Find the abbreviation corresponding to the function
-- Patch .debug_abbrev to replace DW_AT_low_pc with DW_AT_ranges and
DW_AT_high_pc with DW_AT_producer (I'll explain this hack below).
Also patch the corresponding forms to DW_FORM_sec_offset and
DW_FORM_string (null-terminated in-place string).
-- Patch debug_info with the .debug_ranges offset in place of
the first 4 bytes of DW_AT_low_pc (DW_AT_ranges only occupies 4
bytes whereas low_pc occupies 8), and write an arbitrary string
in-place in the other 12 bytes that were the 4 MSB of low_pc
and the 8 bytes of high_pc before the patch. This depends on
low_pc and high_pc being put consecutively by the compiler, but
it serves to validate the idea. I tried another way of doing it
that does not rely on this but it didn't work either and I believe
the reason for either not working is the same (and still unknown,
but unrelated to them. I might be wrong though, and if I find yet
another way of doing it I may try it). The other way was to
use a form of DW_FORM_data8 for the section offset. This is
disallowed by the specification, but I doubt gdb validates this,
as it's just easier to store it as 64-bit anyway as this is even
necessary to support 64-bit DWARF (which is not what gcc generates
by default apparently).
I still need to make changes to the diff to make it production-ready,
but first I want to figure out why it doesn't work as expected.
By looking at the output of llvm-dwarfdump or readelf, all of
.debug_ranges, .debug_abbrev and .debug_info seem to have been
correctly updated. However, gdb seems to have serious problems with
what we write.
(In fact, readelf --debug-dump=Ranges shows some funny warning messages
of the form ("Warning: There is a hole [0x100 - 0x120] in .debug_ranges"),
but I played around with this and it seems it's just because no
compile unit was using these ranges. Changing .debug_info apparently
changes these warnings, so they seem to be unrelated to the section
itself. Also looking at the hex dump of the section doesn't help,
as everything seems fine. llvm-dwarfdump doesn't say anything.
So I think .debug_ranges is fine.)
The result is that gdb not only doesn't show the function name as we
wanted, but it also stops showing line number information.
Apparently it's not reading/interpreting the address ranges at all,
and so the functions now have no associated address ranges, only the
symbol value which allows one to put a breakpoint in the function,
but not to show source code.
As this left me without more ideas of what to try to feed gdb with,
I believe the most promising next trial is to try to debug gdb itself,
unless someone spots anything I missed.
I found where the interesting part of the code lies for this
case (gdb/dwarf2read.c and some other related files, but mainly that one).
It seems in some parts gdb uses DW_AT_ranges for only getting
its lowest and highest addresses and setting that as low_pc and
high_pc (see dwarf2_get_pc_bounds in gdb's code and where it's called).
I really hope this is not actually the case for
function address ranges. I'll investigate this further. Otherwise
I don't think any changes we make will make it work as initially
intended, as we'll simply need gdb to support it and in that case it
doesn't.
(cherry picked from FBD3073641)
2016-03-16 18:08:29 -07:00
|
|
|
}
|
2016-03-28 17:45:22 -07:00
|
|
|
|
2016-05-27 20:19:19 -07:00
|
|
|
// Some functions may not have a corresponding subprogram DIE
|
|
|
|
// yet they will be included in some CU and will have line number information.
|
|
|
|
// Hence we need to associate them with the CU and include in CU ranges.
|
|
|
|
for (auto &AddrFunctionPair : BinaryFunctions) {
|
|
|
|
auto FunctionAddress = AddrFunctionPair.first;
|
|
|
|
auto &Function = AddrFunctionPair.second;
|
|
|
|
if (!Function.getSubprogramDIEs().empty())
|
|
|
|
continue;
|
|
|
|
if (auto DebugAranges = DwCtx->getDebugAranges()) {
|
|
|
|
auto CUOffset = DebugAranges->findAddress(FunctionAddress);
|
|
|
|
if (CUOffset != -1U) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
Function.addSubprogramDIE(
|
|
|
|
DWARFDie(DwCtx->getCompileUnitForOffset(CUOffset), nullptr));
|
2016-05-27 20:19:19 -07:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef DWARF_LOOKUP_ALL_RANGES
|
|
|
|
// Last resort - iterate over all compile units. This should not happen
|
|
|
|
// very often. If it does, we need to create a separate lookup table
|
|
|
|
// similar to .debug_aranges internally. This slows down processing
|
|
|
|
// considerably.
|
|
|
|
for (const auto &CU : DwCtx->compile_units()) {
|
|
|
|
const auto *CUDie = CU->getUnitDIE();
|
|
|
|
for (const auto &Range : CUDie->getAddressRanges(CU.get())) {
|
|
|
|
if (FunctionAddress >= Range.first &&
|
|
|
|
FunctionAddress < Range.second) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
Function.addSubprogramDIE(DWARFDie(CU.get(), nullptr));
|
2016-05-27 20:19:19 -07:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-01 16:52:54 -07:00
|
|
|
void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
|
|
|
|
uint32_t Operation = Inst.getOperation();
|
|
|
|
switch (Operation) {
|
|
|
|
case MCCFIInstruction::OpSameValue:
|
|
|
|
OS << "OpSameValue Reg" << Inst.getRegister();
|
|
|
|
break;
|
|
|
|
case MCCFIInstruction::OpRememberState:
|
|
|
|
OS << "OpRememberState";
|
|
|
|
break;
|
|
|
|
case MCCFIInstruction::OpRestoreState:
|
|
|
|
OS << "OpRestoreState";
|
|
|
|
break;
|
|
|
|
case MCCFIInstruction::OpOffset:
|
|
|
|
OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
|
|
|
|
break;
|
|
|
|
case MCCFIInstruction::OpDefCfaRegister:
|
|
|
|
OS << "OpDefCfaRegister Reg" << Inst.getRegister();
|
|
|
|
break;
|
|
|
|
case MCCFIInstruction::OpDefCfaOffset:
|
|
|
|
OS << "OpDefCfaOffset " << Inst.getOffset();
|
|
|
|
break;
|
|
|
|
case MCCFIInstruction::OpDefCfa:
|
|
|
|
OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
|
|
|
|
break;
|
|
|
|
case MCCFIInstruction::OpRelOffset:
|
2018-09-05 14:36:52 -07:00
|
|
|
OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
|
2017-05-01 16:52:54 -07:00
|
|
|
break;
|
|
|
|
case MCCFIInstruction::OpAdjustCfaOffset:
|
2018-09-05 14:36:52 -07:00
|
|
|
OS << "OfAdjustCfaOffset " << Inst.getOffset();
|
2017-05-01 16:52:54 -07:00
|
|
|
break;
|
|
|
|
case MCCFIInstruction::OpEscape:
|
|
|
|
OS << "OpEscape";
|
|
|
|
break;
|
|
|
|
case MCCFIInstruction::OpRestore:
|
2018-09-05 14:36:52 -07:00
|
|
|
OS << "OpRestore Reg" << Inst.getRegister();
|
2017-05-01 16:52:54 -07:00
|
|
|
break;
|
|
|
|
case MCCFIInstruction::OpUndefined:
|
2018-09-05 14:36:52 -07:00
|
|
|
OS << "OpUndefined Reg" << Inst.getRegister();
|
2017-05-01 16:52:54 -07:00
|
|
|
break;
|
|
|
|
case MCCFIInstruction::OpRegister:
|
2018-09-05 14:36:52 -07:00
|
|
|
OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
|
|
|
|
<< Inst.getRegister2();
|
2017-05-01 16:52:54 -07:00
|
|
|
break;
|
|
|
|
case MCCFIInstruction::OpWindowSave:
|
|
|
|
OS << "OpWindowSave";
|
|
|
|
break;
|
|
|
|
case MCCFIInstruction::OpGnuArgsSize:
|
|
|
|
OS << "OpGnuArgsSize";
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
OS << "Op#" << Operation;
|
|
|
|
break;
|
2016-07-23 08:01:53 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void BinaryContext::printInstruction(raw_ostream &OS,
|
|
|
|
const MCInst &Instruction,
|
|
|
|
uint64_t Offset,
|
|
|
|
const BinaryFunction* Function,
|
2017-10-20 12:11:34 -07:00
|
|
|
bool PrintMCInst,
|
|
|
|
bool PrintMemData,
|
|
|
|
bool PrintRelocations) const {
|
2018-03-09 09:45:13 -08:00
|
|
|
if (MIB->isEHLabel(Instruction)) {
|
|
|
|
OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << '\n';
|
2016-07-23 08:01:53 -07:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
OS << format(" %08" PRIx64 ": ", Offset);
|
2018-03-09 09:45:13 -08:00
|
|
|
if (MIB->isCFI(Instruction)) {
|
2016-07-23 08:01:53 -07:00
|
|
|
uint32_t Offset = Instruction.getOperand(0).getImm();
|
|
|
|
OS << "\t!CFI\t$" << Offset << "\t; ";
|
2016-08-22 14:24:09 -07:00
|
|
|
if (Function)
|
2017-05-01 16:52:54 -07:00
|
|
|
printCFI(OS, *Function->getCFIFor(Instruction));
|
2016-07-23 08:01:53 -07:00
|
|
|
OS << "\n";
|
|
|
|
return;
|
|
|
|
}
|
2016-07-28 18:49:48 -07:00
|
|
|
InstPrinter->printInst(&Instruction, OS, "", *STI);
|
2018-03-09 09:45:13 -08:00
|
|
|
if (MIB->isCall(Instruction)) {
|
|
|
|
if (MIB->isTailCall(Instruction))
|
2016-07-23 08:01:53 -07:00
|
|
|
OS << " # TAILCALL ";
|
2018-03-09 09:45:13 -08:00
|
|
|
if (MIB->isInvoke(Instruction)) {
|
2019-01-31 11:23:02 -08:00
|
|
|
const auto EHInfo = MIB->getEHInfo(Instruction);
|
|
|
|
OS << " # handler: ";
|
|
|
|
if (EHInfo->first)
|
|
|
|
OS << *EHInfo->first;
|
|
|
|
else
|
|
|
|
OS << '0';
|
|
|
|
OS << "; action: " << EHInfo->second;
|
|
|
|
const auto GnuArgsSize = MIB->getGnuArgsSize(Instruction);
|
2016-07-23 08:01:53 -07:00
|
|
|
if (GnuArgsSize >= 0)
|
|
|
|
OS << "; GNU_args_size = " << GnuArgsSize;
|
|
|
|
}
|
|
|
|
}
|
2018-03-09 09:45:13 -08:00
|
|
|
if (MIB->isIndirectBranch(Instruction)) {
|
|
|
|
if (auto JTAddress = MIB->getJumpTable(Instruction)) {
|
2016-09-16 15:54:32 -07:00
|
|
|
OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
|
2016-09-14 16:45:40 -07:00
|
|
|
}
|
|
|
|
}
|
2016-07-23 08:01:53 -07:00
|
|
|
|
[BOLT][Refactoring] Isolate changes to MC layer
Summary:
Changes that we made to MCInst, MCOperand, MCExpr, etc. are now all
moved into tools/llvm-bolt. That required a change to the way we handle
annotations and any extra operands for MCInst.
Any MCPlus information is now attached via an extra operand of type
MCInst with an opcode ANNOTATION_LABEL. Since this operand is MCInst, we
attach extra info as operands to this instruction. For first-level
annotations use functions to access the information, such as
getConditionalTailCall() or getEHInfo(), etc. For the rest, optional or
second-class annotations, use a general named-annotation interface such
as getAnnotationAs<uint64_t>(Inst, "Count").
I did a test on HHVM binary, and a memory consumption went down a little
bit while the runtime remained the same.
(cherry picked from FBD7405412)
2018-03-19 18:32:12 -07:00
|
|
|
MIB->printAnnotations(Instruction, OS);
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
|
2016-07-23 08:01:53 -07:00
|
|
|
const DWARFDebugLine::LineTable *LineTable =
|
|
|
|
Function && opts::PrintDebugInfo ? Function->getDWARFUnitLineTable().second
|
|
|
|
: nullptr;
|
|
|
|
|
|
|
|
if (LineTable) {
|
|
|
|
auto RowRef = DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
|
|
|
|
|
|
|
|
if (RowRef != DebugLineTableRowRef::NULL_ROW) {
|
|
|
|
const auto &Row = LineTable->Rows[RowRef.RowIndex - 1];
|
2018-05-04 10:10:41 -07:00
|
|
|
StringRef FileName = "";
|
|
|
|
if (auto FName =
|
|
|
|
LineTable->Prologue.FileNames[Row.File - 1].Name.getAsCString())
|
|
|
|
FileName = *FName;
|
|
|
|
OS << " # debug line " << FileName << ":" << Row.Line;
|
2016-07-23 08:01:53 -07:00
|
|
|
|
|
|
|
if (Row.Column) {
|
|
|
|
OS << ":" << Row.Column;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-20 12:11:34 -07:00
|
|
|
if ((opts::PrintMemData || PrintMemData) && Function) {
|
|
|
|
const auto *MD = Function->getMemData();
|
|
|
|
const auto MemDataOffset =
|
2018-03-09 09:45:13 -08:00
|
|
|
MIB->tryGetAnnotationAs<uint64_t>(Instruction, "MemDataOffset");
|
2017-10-20 12:11:34 -07:00
|
|
|
if (MD && MemDataOffset) {
|
|
|
|
bool DidPrint = false;
|
|
|
|
for (auto &MI : MD->getMemInfoRange(MemDataOffset.get())) {
|
|
|
|
OS << (DidPrint ? ", " : " # Loads: ");
|
|
|
|
OS << MI.Addr << "/" << MI.Count;
|
|
|
|
DidPrint = true;
|
|
|
|
}
|
2017-10-16 13:09:43 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-20 12:11:34 -07:00
|
|
|
if ((opts::PrintRelocations || PrintRelocations) && Function) {
|
|
|
|
const auto Size = computeCodeSize(&Instruction, &Instruction + 1);
|
|
|
|
Function->printRelocations(OS, Offset, Size);
|
|
|
|
}
|
|
|
|
|
2016-07-23 08:01:53 -07:00
|
|
|
OS << "\n";
|
|
|
|
|
2017-10-20 12:11:34 -07:00
|
|
|
if (PrintMCInst) {
|
2016-07-23 08:01:53 -07:00
|
|
|
Instruction.dump_pretty(OS, InstPrinter.get());
|
|
|
|
OS << "\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-20 12:11:34 -07:00
|
|
|
ErrorOr<ArrayRef<uint8_t>>
|
|
|
|
BinaryContext::getFunctionData(const BinaryFunction &Function) const {
|
2018-01-23 15:10:24 -08:00
|
|
|
auto &Section = Function.getSection();
|
|
|
|
assert(Section.containsRange(Function.getAddress(), Function.getSize()) &&
|
2017-10-20 12:11:34 -07:00
|
|
|
"wrong section for function");
|
|
|
|
|
|
|
|
if (!Section.isText() || Section.isVirtual() || !Section.getSize()) {
|
|
|
|
return std::make_error_code(std::errc::bad_address);
|
|
|
|
}
|
|
|
|
|
2018-01-23 15:10:24 -08:00
|
|
|
StringRef SectionContents = Section.getContents();
|
2017-10-20 12:11:34 -07:00
|
|
|
|
|
|
|
assert(SectionContents.size() == Section.getSize() &&
|
|
|
|
"section size mismatch");
|
|
|
|
|
|
|
|
// Function offset from the section start.
|
|
|
|
auto FunctionOffset = Function.getAddress() - Section.getAddress();
|
|
|
|
auto *Bytes = reinterpret_cast<const uint8_t *>(SectionContents.data());
|
|
|
|
return ArrayRef<uint8_t>(Bytes + FunctionOffset, Function.getSize());
|
|
|
|
}
|
|
|
|
|
2018-01-23 15:10:24 -08:00
|
|
|
ErrorOr<BinarySection&> BinaryContext::getSectionForAddress(uint64_t Address) {
|
2018-01-31 12:12:59 -08:00
|
|
|
auto SI = AddressToSection.upper_bound(Address);
|
|
|
|
if (SI != AddressToSection.begin()) {
|
2018-01-23 15:10:24 -08:00
|
|
|
--SI;
|
2018-01-31 12:12:59 -08:00
|
|
|
if (SI->first + SI->second->getSize() > Address)
|
|
|
|
return *SI->second;
|
2018-01-23 15:10:24 -08:00
|
|
|
}
|
|
|
|
return std::make_error_code(std::errc::bad_address);
|
|
|
|
}
|
|
|
|
|
|
|
|
ErrorOr<const BinarySection &>
|
|
|
|
BinaryContext::getSectionForAddress(uint64_t Address) const {
|
2018-01-31 12:12:59 -08:00
|
|
|
auto SI = AddressToSection.upper_bound(Address);
|
|
|
|
if (SI != AddressToSection.begin()) {
|
2016-07-21 12:45:35 -07:00
|
|
|
--SI;
|
2018-01-31 12:12:59 -08:00
|
|
|
if (SI->first + SI->second->getSize() > Address)
|
|
|
|
return *SI->second;
|
2016-07-21 12:45:35 -07:00
|
|
|
}
|
|
|
|
return std::make_error_code(std::errc::bad_address);
|
|
|
|
}
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
ErrorOr<StringRef>
|
|
|
|
BinaryContext::getSectionNameForAddress(uint64_t Address) const {
|
|
|
|
if (auto Section = getSectionForAddress(Address)) {
|
|
|
|
return Section->getName();
|
|
|
|
}
|
|
|
|
return std::make_error_code(std::errc::bad_address);
|
|
|
|
}
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
BinarySection &BinaryContext::registerSection(BinarySection *Section) {
|
|
|
|
assert(!Section->getName().empty() &&
|
|
|
|
"can't register sections without a name");
|
|
|
|
auto Res = Sections.insert(Section);
|
2018-01-31 12:12:59 -08:00
|
|
|
assert(Res.second && "can't register the same section twice.");
|
|
|
|
// Only register sections with addresses in the AddressToSection map.
|
2018-02-01 16:33:43 -08:00
|
|
|
if (Section->getAddress())
|
|
|
|
AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
|
|
|
|
NameToSection.insert(std::make_pair(Section->getName(), Section));
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
|
|
|
|
return *Section;
|
|
|
|
}
|
|
|
|
|
|
|
|
BinarySection &BinaryContext::registerSection(SectionRef Section) {
|
2018-04-20 20:03:31 -07:00
|
|
|
return registerSection(new BinarySection(*this, Section));
|
|
|
|
}
|
|
|
|
|
|
|
|
BinarySection &
|
|
|
|
BinaryContext::registerSection(StringRef SectionName,
|
|
|
|
const BinarySection &OriginalSection) {
|
|
|
|
return registerSection(new BinarySection(*this,
|
|
|
|
SectionName,
|
|
|
|
OriginalSection));
|
2018-02-01 16:33:43 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
BinarySection &BinaryContext::registerOrUpdateSection(StringRef Name,
|
|
|
|
unsigned ELFType,
|
|
|
|
unsigned ELFFlags,
|
|
|
|
uint8_t *Data,
|
|
|
|
uint64_t Size,
|
|
|
|
unsigned Alignment,
|
|
|
|
bool IsLocal) {
|
|
|
|
auto NamedSections = getSectionByName(Name);
|
|
|
|
if (NamedSections.begin() != NamedSections.end()) {
|
|
|
|
assert(std::next(NamedSections.begin()) == NamedSections.end() &&
|
|
|
|
"can only update unique sections");
|
|
|
|
auto *Section = NamedSections.begin()->second;
|
|
|
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
|
|
|
|
const auto Flag = Section->isAllocatable();
|
|
|
|
Section->update(Data, Size, Alignment, ELFType, ELFFlags, IsLocal);
|
|
|
|
DEBUG(dbgs() << *Section << "\n");
|
|
|
|
assert(Flag == Section->isAllocatable() &&
|
|
|
|
"can't change section allocation status");
|
|
|
|
return *Section;
|
|
|
|
}
|
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
return registerSection(new BinarySection(*this, Name, Data, Size, Alignment,
|
2018-02-01 16:33:43 -08:00
|
|
|
ELFType, ELFFlags, IsLocal));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool BinaryContext::deregisterSection(BinarySection &Section) {
|
|
|
|
auto *SectionPtr = &Section;
|
|
|
|
auto Itr = Sections.find(SectionPtr);
|
|
|
|
if (Itr != Sections.end()) {
|
|
|
|
auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
|
|
|
|
while (Range.first != Range.second) {
|
|
|
|
if (Range.first->second == SectionPtr) {
|
|
|
|
AddressToSection.erase(Range.first);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
++Range.first;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto NameRange = NameToSection.equal_range(SectionPtr->getName());
|
|
|
|
while (NameRange.first != NameRange.second) {
|
|
|
|
if (NameRange.first->second == SectionPtr) {
|
|
|
|
NameToSection.erase(NameRange.first);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
++NameRange.first;
|
|
|
|
}
|
|
|
|
|
|
|
|
Sections.erase(Itr);
|
|
|
|
delete SectionPtr;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
void BinaryContext::printSections(raw_ostream &OS) const {
|
|
|
|
for (auto &Section : Sections) {
|
|
|
|
OS << "BOLT-INFO: " << *Section << "\n";
|
|
|
|
}
|
2018-01-31 12:12:59 -08:00
|
|
|
}
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
BinarySection &BinaryContext::absoluteSection() {
|
|
|
|
if (auto Section = getUniqueSectionByName("<absolute>"))
|
|
|
|
return *Section;
|
|
|
|
return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
|
|
|
|
}
|
|
|
|
|
2017-08-27 17:04:06 -07:00
|
|
|
ErrorOr<uint64_t>
|
2019-04-09 12:29:40 -07:00
|
|
|
BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
|
|
|
|
size_t Size) const {
|
|
|
|
const auto Section = getSectionForAddress(Address);
|
|
|
|
if (!Section)
|
|
|
|
return std::make_error_code(std::errc::bad_address);
|
|
|
|
|
|
|
|
if (Section->isVirtual())
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
|
|
|
|
AsmInfo->getCodePointerSize());
|
|
|
|
auto ValueOffset = static_cast<uint32_t>(Address - Section->getAddress());
|
|
|
|
return DE.getUnsigned(&ValueOffset, Size);
|
|
|
|
}
|
|
|
|
|
|
|
|
ErrorOr<uint64_t>
|
|
|
|
BinaryContext::getSignedValueAtAddress(uint64_t Address,
|
|
|
|
size_t Size) const {
|
|
|
|
const auto Section = getSectionForAddress(Address);
|
2017-08-27 17:04:06 -07:00
|
|
|
if (!Section)
|
2018-01-23 15:10:24 -08:00
|
|
|
return std::make_error_code(std::errc::bad_address);
|
2017-08-27 17:04:06 -07:00
|
|
|
|
2019-04-09 12:29:40 -07:00
|
|
|
if (Section->isVirtual())
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
AsmInfo->getCodePointerSize());
|
2019-04-09 12:29:40 -07:00
|
|
|
auto ValueOffset = static_cast<uint32_t>(Address - Section->getAddress());
|
|
|
|
return DE.getSigned(&ValueOffset, Size);
|
2017-08-27 17:04:06 -07:00
|
|
|
}
|
|
|
|
|
2018-01-23 15:10:24 -08:00
|
|
|
void BinaryContext::addRelocation(uint64_t Address,
|
|
|
|
MCSymbol *Symbol,
|
|
|
|
uint64_t Type,
|
2018-02-01 16:33:43 -08:00
|
|
|
uint64_t Addend,
|
|
|
|
uint64_t Value) {
|
2018-01-23 15:10:24 -08:00
|
|
|
auto Section = getSectionForAddress(Address);
|
|
|
|
assert(Section && "cannot find section for address");
|
2018-02-01 16:33:43 -08:00
|
|
|
Section->addRelocation(Address - Section->getAddress(),
|
|
|
|
Symbol,
|
|
|
|
Type,
|
|
|
|
Addend,
|
|
|
|
Value);
|
2017-02-21 16:15:15 -08:00
|
|
|
}
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
bool BinaryContext::removeRelocationAt(uint64_t Address) {
|
2018-01-23 15:10:24 -08:00
|
|
|
auto Section = getSectionForAddress(Address);
|
|
|
|
assert(Section && "cannot find section for address");
|
2018-02-01 16:33:43 -08:00
|
|
|
return Section->removeRelocationAt(Address - Section->getAddress());
|
2017-02-21 16:15:15 -08:00
|
|
|
}
|
|
|
|
|
2017-12-11 17:07:56 -08:00
|
|
|
const Relocation *BinaryContext::getRelocationAt(uint64_t Address) {
|
2018-01-23 15:10:24 -08:00
|
|
|
auto Section = getSectionForAddress(Address);
|
|
|
|
assert(Section && "cannot find section for address");
|
|
|
|
return Section->getRelocationAt(Address - Section->getAddress());
|
2017-10-20 12:11:34 -07:00
|
|
|
}
|
2018-06-20 12:03:24 -07:00
|
|
|
|
|
|
|
void BinaryContext::exitWithBugReport(StringRef Message,
|
|
|
|
const BinaryFunction &Function) const {
|
|
|
|
errs() << "=======================================\n";
|
|
|
|
errs() << "BOLT is unable to proceed because it couldn't properly understand "
|
|
|
|
"this function.\n";
|
|
|
|
errs() << "If you are running the most recent version of BOLT, you may "
|
|
|
|
"want to "
|
|
|
|
"report this and paste this dump.\nPlease check that there is no "
|
|
|
|
"sensitive contents being shared in this dump.\n";
|
|
|
|
errs() << "\nOffending function: " << Function.getPrintName() << "\n\n";
|
|
|
|
ScopedPrinter SP(errs());
|
|
|
|
SP.printBinaryBlock("Function contents", *getFunctionData(Function));
|
|
|
|
errs() << "\n";
|
|
|
|
Function.dump();
|
|
|
|
errs() << "ERROR: " << Message;
|
|
|
|
errs() << "\n=======================================\n";
|
|
|
|
exit(1);
|
|
|
|
}
|
2018-07-08 12:14:08 -07:00
|
|
|
|
|
|
|
BinaryFunction *
|
|
|
|
BinaryContext::createInjectedBinaryFunction(const std::string &Name,
|
|
|
|
bool IsSimple) {
|
|
|
|
InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
|
|
|
|
auto *BF = InjectedBinaryFunctions.back();
|
|
|
|
setSymbolToFunctionMap(BF->getSymbol(), BF);
|
|
|
|
return BF;
|
|
|
|
}
|
2018-11-15 16:02:16 -08:00
|
|
|
|
|
|
|
std::pair<size_t, size_t>
|
|
|
|
BinaryContext::calculateEmittedSize(BinaryFunction &BF) {
|
|
|
|
// Adjust branch instruction to match the current layout.
|
|
|
|
BF.fixBranches();
|
|
|
|
|
|
|
|
// Create local MC context to isolate the effect of ephemeral code emission.
|
|
|
|
std::unique_ptr<MCObjectFileInfo> LocalMOFI =
|
|
|
|
llvm::make_unique<MCObjectFileInfo>();
|
|
|
|
std::unique_ptr<MCContext> LocalCtx =
|
|
|
|
llvm::make_unique<MCContext>(AsmInfo.get(), MRI.get(), LocalMOFI.get());
|
|
|
|
LocalMOFI->InitMCObjectFileInfo(*TheTriple, /*PIC=*/false, *LocalCtx);
|
|
|
|
auto *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
|
|
|
|
auto *MCE = TheTarget->createMCCodeEmitter(*MII, *MRI, *LocalCtx);
|
|
|
|
SmallString<256> Code;
|
|
|
|
raw_svector_ostream VecOS(Code);
|
|
|
|
|
|
|
|
std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
|
|
|
|
*TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), VecOS,
|
|
|
|
std::unique_ptr<MCCodeEmitter>(MCE), *STI,
|
|
|
|
/* RelaxAll */ false,
|
|
|
|
/* IncrementalLinkerCompatible */ false,
|
|
|
|
/* DWARFMustBeAtTheEnd */ false));
|
|
|
|
|
|
|
|
Streamer->InitSections(false);
|
|
|
|
|
|
|
|
auto *Section = LocalMOFI->getTextSection();
|
|
|
|
Section->setHasInstructions(true);
|
|
|
|
|
|
|
|
auto *StartLabel = LocalCtx->getOrCreateSymbol("__hstart");
|
|
|
|
auto *EndLabel = LocalCtx->getOrCreateSymbol("__hend");
|
|
|
|
auto *ColdStartLabel = LocalCtx->getOrCreateSymbol("__cstart");
|
|
|
|
auto *ColdEndLabel = LocalCtx->getOrCreateSymbol("__cend");
|
|
|
|
|
|
|
|
Streamer->SwitchSection(Section);
|
|
|
|
Streamer->EmitLabel(StartLabel);
|
|
|
|
BF.emitBody(*Streamer, /*EmitColdPart = */false, /*EmitCodeOnly = */true);
|
|
|
|
Streamer->EmitLabel(EndLabel);
|
|
|
|
|
|
|
|
if (BF.isSplit()) {
|
|
|
|
auto *ColdSection =
|
|
|
|
LocalCtx->getELFSection(BF.getColdCodeSectionName(),
|
|
|
|
ELF::SHT_PROGBITS,
|
|
|
|
ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
|
|
|
|
ColdSection->setHasInstructions(true);
|
|
|
|
|
|
|
|
Streamer->SwitchSection(ColdSection);
|
|
|
|
Streamer->EmitLabel(ColdStartLabel);
|
|
|
|
BF.emitBody(*Streamer, /*EmitColdPart = */true, /*EmitCodeOnly = */true);
|
|
|
|
Streamer->EmitLabel(ColdEndLabel);
|
|
|
|
}
|
|
|
|
|
|
|
|
// To avoid calling MCObjectStreamer::flushPendingLabels() which is private.
|
|
|
|
Streamer->EmitBytes(StringRef(""));
|
|
|
|
|
|
|
|
auto &Assembler =
|
|
|
|
static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
|
|
|
|
MCAsmLayout Layout(Assembler);
|
|
|
|
Assembler.layout(Layout);
|
|
|
|
|
|
|
|
const auto HotSize = Layout.getSymbolOffset(*EndLabel) -
|
|
|
|
Layout.getSymbolOffset(*StartLabel);
|
|
|
|
const auto ColdSize = BF.isSplit() ? Layout.getSymbolOffset(*ColdEndLabel) -
|
|
|
|
Layout.getSymbolOffset(*ColdStartLabel)
|
|
|
|
: 0ULL;
|
|
|
|
|
|
|
|
// Clean-up the effect of the code emission.
|
|
|
|
for (const auto &Symbol : Assembler.symbols()) {
|
|
|
|
auto *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
|
|
|
|
MutableSymbol->setUndefined();
|
|
|
|
MutableSymbol->setIsRegistered(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
return std::make_pair(HotSize, ColdSize);
|
|
|
|
}
|
2019-04-03 15:52:01 -07:00
|
|
|
|
|
|
|
BinaryFunction *
|
|
|
|
BinaryContext::getBinaryFunctionContainingAddress(uint64_t Address,
|
[BOLT] Basic support for split functions
Summary:
This adds very basic and limited support for split functions.
In non-relocation mode, split functions are ignored, while their debug
info is properly updated. No support in the relocation mode yet.
Split functions consist of a main body and one or more fragments.
For fragments, the main part is called their parent. Any fragment
could only be entered via its parent or another fragment.
The short-term goal is to correctly update debug information for split
functions, while the long-term goal is to have a complete support
including full optimization. Note that if we don't detect split
bodies, we would have to add multiple entry points via tail calls,
which we would rather avoid.
Parent functions and fragments are represented by a `BinaryFunction`
and are marked accordingly. For now they are marked as non-simple, and
thus only supported in non-relocation mode. Once we start building a
CFG, it should be a common graph (i.e. the one that includes all
fragments) in the parent function.
The function discovery is unchanged, except for the detection of
`\.cold\.` pattern in the function name, which automatically marks the
function as a fragment of another function.
Because of the local function name ambiguity, we cannot rely on the
function name to establish child fragment and parent relationship.
Instead we rely on disassembly processing.
`BinaryContext::getBinaryFunctionContainingAddress()` now returns a
parent function if an address from its fragment is passed.
There's no jump table support at the moment. Jump tables can have
source and destinations in both fragment and parent.
Parent functions that enter their fragments via C++ exception handling
mechanism are not yet supported.
(cherry picked from FBD14970569)
2019-04-16 10:24:34 -07:00
|
|
|
bool CheckPastEnd,
|
2019-04-18 16:32:22 -07:00
|
|
|
bool UseMaxSize,
|
|
|
|
bool Shallow) {
|
2019-04-03 15:52:01 -07:00
|
|
|
auto FI = BinaryFunctions.upper_bound(Address);
|
|
|
|
if (FI == BinaryFunctions.begin())
|
|
|
|
return nullptr;
|
|
|
|
--FI;
|
|
|
|
|
|
|
|
const auto UsedSize = UseMaxSize ? FI->second.getMaxSize()
|
|
|
|
: FI->second.getSize();
|
|
|
|
|
|
|
|
if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
|
|
|
|
return nullptr;
|
[BOLT] Basic support for split functions
Summary:
This adds very basic and limited support for split functions.
In non-relocation mode, split functions are ignored, while their debug
info is properly updated. No support in the relocation mode yet.
Split functions consist of a main body and one or more fragments.
For fragments, the main part is called their parent. Any fragment
could only be entered via its parent or another fragment.
The short-term goal is to correctly update debug information for split
functions, while the long-term goal is to have a complete support
including full optimization. Note that if we don't detect split
bodies, we would have to add multiple entry points via tail calls,
which we would rather avoid.
Parent functions and fragments are represented by a `BinaryFunction`
and are marked accordingly. For now they are marked as non-simple, and
thus only supported in non-relocation mode. Once we start building a
CFG, it should be a common graph (i.e. the one that includes all
fragments) in the parent function.
The function discovery is unchanged, except for the detection of
`\.cold\.` pattern in the function name, which automatically marks the
function as a fragment of another function.
Because of the local function name ambiguity, we cannot rely on the
function name to establish child fragment and parent relationship.
Instead we rely on disassembly processing.
`BinaryContext::getBinaryFunctionContainingAddress()` now returns a
parent function if an address from its fragment is passed.
There's no jump table support at the moment. Jump tables can have
source and destinations in both fragment and parent.
Parent functions that enter their fragments via C++ exception handling
mechanism are not yet supported.
(cherry picked from FBD14970569)
2019-04-16 10:24:34 -07:00
|
|
|
|
|
|
|
auto *BF = &FI->second;
|
2019-04-18 16:32:22 -07:00
|
|
|
if (Shallow)
|
|
|
|
return BF;
|
|
|
|
|
[BOLT] Basic support for split functions
Summary:
This adds very basic and limited support for split functions.
In non-relocation mode, split functions are ignored, while their debug
info is properly updated. No support in the relocation mode yet.
Split functions consist of a main body and one or more fragments.
For fragments, the main part is called their parent. Any fragment
could only be entered via its parent or another fragment.
The short-term goal is to correctly update debug information for split
functions, while the long-term goal is to have a complete support
including full optimization. Note that if we don't detect split
bodies, we would have to add multiple entry points via tail calls,
which we would rather avoid.
Parent functions and fragments are represented by a `BinaryFunction`
and are marked accordingly. For now they are marked as non-simple, and
thus only supported in non-relocation mode. Once we start building a
CFG, it should be a common graph (i.e. the one that includes all
fragments) in the parent function.
The function discovery is unchanged, except for the detection of
`\.cold\.` pattern in the function name, which automatically marks the
function as a fragment of another function.
Because of the local function name ambiguity, we cannot rely on the
function name to establish child fragment and parent relationship.
Instead we rely on disassembly processing.
`BinaryContext::getBinaryFunctionContainingAddress()` now returns a
parent function if an address from its fragment is passed.
There's no jump table support at the moment. Jump tables can have
source and destinations in both fragment and parent.
Parent functions that enter their fragments via C++ exception handling
mechanism are not yet supported.
(cherry picked from FBD14970569)
2019-04-16 10:24:34 -07:00
|
|
|
while (BF->getParentFunction())
|
|
|
|
BF = BF->getParentFunction();
|
|
|
|
|
|
|
|
return BF;
|
|
|
|
}
|
|
|
|
|
|
|
|
BinaryFunction *
|
|
|
|
BinaryContext::getBinaryFunctionAtAddress(uint64_t Address, bool Shallow) {
|
|
|
|
if (const auto *BD = getBinaryDataAtAddress(Address)) {
|
|
|
|
if (auto *BF = getFunctionForSymbol(BD->getSymbol())) {
|
|
|
|
while (BF->getParentFunction() && !Shallow) {
|
|
|
|
BF = BF->getParentFunction();
|
|
|
|
}
|
|
|
|
return BF;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nullptr;
|
2019-04-03 15:52:01 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
|
|
|
|
const DWARFAddressRangesVector &InputRanges) const {
|
|
|
|
DebugAddressRangesVector OutputRanges;
|
|
|
|
|
|
|
|
for (const auto Range : InputRanges) {
|
|
|
|
auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
|
|
|
|
while (BFI != BinaryFunctions.end()) {
|
|
|
|
const auto &Function = BFI->second;
|
|
|
|
if (Function.getAddress() >= Range.HighPC)
|
|
|
|
break;
|
|
|
|
const auto FunctionRanges = Function.getOutputAddressRanges();
|
|
|
|
std::move(std::begin(FunctionRanges),
|
|
|
|
std::end(FunctionRanges),
|
|
|
|
std::back_inserter(OutputRanges));
|
|
|
|
std::advance(BFI, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return OutputRanges;
|
|
|
|
}
|