//===--- BinaryContext.cpp - Interface for machine-level context ---------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // //===----------------------------------------------------------------------===// #include "BinaryContext.h" #include "BinaryFunction.h" #include "DataReader.h" #include "llvm/ADT/Twine.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include using namespace llvm; using namespace bolt; #undef DEBUG_TYPE #define DEBUG_TYPE "bolt" namespace opts { extern cl::OptionCategory BoltCategory; extern cl::opt AggregateOnly; extern cl::opt StrictMode; extern cl::opt Verbosity; cl::opt NoHugePages("no-huge-pages", cl::desc("use regular size pages for code alignment"), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); static cl::opt PrintDebugInfo("print-debug-info", cl::desc("print debug info when printing functions"), cl::Hidden, cl::ZeroOrMore, cl::cat(BoltCategory)); cl::opt PrintRelocations("print-relocations", cl::desc("print relocations when printing functions/objects"), cl::Hidden, cl::ZeroOrMore, cl::cat(BoltCategory)); static cl::opt PrintMemData("print-mem-data", cl::desc("print memory data annotations when printing functions"), cl::Hidden, cl::ZeroOrMore, cl::cat(BoltCategory)); } // namespace opts BinaryContext::BinaryContext(std::unique_ptr Ctx, std::unique_ptr DwCtx, std::unique_ptr TheTriple, const Target *TheTarget, std::string TripleName, std::unique_ptr MCE, std::unique_ptr MOFI, std::unique_ptr AsmInfo, std::unique_ptr MII, std::unique_ptr STI, std::unique_ptr InstPrinter, std::unique_ptr MIA, std::unique_ptr MIB, std::unique_ptr MRI, std::unique_ptr DisAsm, DataReader &DR) : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), TheTriple(std::move(TheTriple)), TheTarget(TheTarget), TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)), DR(DR) { Relocation::Arch = this->TheTriple->getArch(); PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; } BinaryContext::~BinaryContext() { for (auto *Section : Sections) { delete Section; } for (auto *InjectedFunction : InjectedBinaryFunctions) { delete InjectedFunction; } for (auto JTI : JumpTables) { delete JTI.second; } clearBinaryData(); } std::unique_ptr BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { if (!MAB) { MAB = std::unique_ptr( TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions())); } return MAB->createObjectWriter(OS); } bool BinaryContext::validateObjectNesting() const { auto Itr = BinaryDataMap.begin(); auto End = BinaryDataMap.end(); bool Valid = true; while (Itr != End) { auto Next = std::next(Itr); while (Next != End && Itr->second->getSection() == Next->second->getSection() && Itr->second->containsRange(Next->second->getAddress(), Next->second->getSize())) { if (Next->second->Parent != Itr->second) { errs() << "BOLT-WARNING: object nesting incorrect for:\n" << "BOLT-WARNING: " << *Itr->second << "\n" << "BOLT-WARNING: " << *Next->second << "\n"; Valid = false; } ++Next; } Itr = Next; } return Valid; } bool BinaryContext::validateHoles() const { bool Valid = true; for (auto &Section : sections()) { for (const auto &Rel : Section.relocations()) { auto RelAddr = Rel.Offset + Section.getAddress(); auto *BD = getBinaryDataContainingAddress(RelAddr); if (!BD) { errs() << "BOLT-WARNING: no BinaryData found for relocation at address" << " 0x" << Twine::utohexstr(RelAddr) << " in " << Section.getName() << "\n"; Valid = false; } else if (!BD->getAtomicRoot()) { errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at " << "address 0x" << Twine::utohexstr(RelAddr) << " in " << Section.getName() << "\n"; Valid = false; } } } return Valid; } void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { const auto Address = GAI->second->getAddress(); const auto Size = GAI->second->getSize(); auto fixParents = [&](BinaryDataMapType::iterator Itr, BinaryData *NewParent) { auto *OldParent = Itr->second->Parent; Itr->second->Parent = NewParent; ++Itr; while (Itr != BinaryDataMap.end() && OldParent && Itr->second->Parent == OldParent) { Itr->second->Parent = NewParent; ++Itr; } }; // Check if the previous symbol contains the newly added symbol. if (GAI != BinaryDataMap.begin()) { auto *Prev = std::prev(GAI)->second; while (Prev) { if (Prev->getSection() == GAI->second->getSection() && Prev->containsRange(Address, Size)) { fixParents(GAI, Prev); } else { fixParents(GAI, nullptr); } Prev = Prev->Parent; } } // Check if the newly added symbol contains any subsequent symbols. if (Size != 0) { auto *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; auto Itr = std::next(GAI); while (Itr != BinaryDataMap.end() && BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { Itr->second->Parent = BD; ++Itr; } } } iterator_range BinaryContext::getSubBinaryData(BinaryData *BD) { auto Start = std::next(BinaryDataMap.find(BD->getAddress())); auto End = Start; while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) { ++End; } return make_range(Start, End); } std::pair BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, bool IsPCRel) { uint64_t Addend{0}; if (isAArch64()) { // Check if this is an access to a constant island and create bookkeeping // to keep track of it and emit it later as part of this function. if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) return std::make_pair(IslandSym, Addend); // Detect custom code written in assembly that refers to arbitrary // constant islands from other functions. Write this reference so we // can pull this constant island and emit it as part of this function // too. auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); if (IslandIter != AddressToConstantIslandMap.end()) { if (auto *IslandSym = IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) { /// Make this function depend on IslandIter->second because we have /// a reference to its constant island. When emitting this function, /// we will also emit IslandIter->second's constants. This only /// happens in custom AArch64 assembly code. BF.IslandDependency.insert(IslandIter->second); BF.ProxyIslandSymbols[IslandSym] = IslandIter->second; return std::make_pair(IslandSym, Addend); } } } // Note that the address does not necessarily have to reside inside // a section, it could be an absolute address too. auto Section = getSectionForAddress(Address); if (Section && Section->isText()) { if (BF.containsAddress(Address, /*UseMaxSize=*/ isAArch64())) { if (Address != BF.getAddress()) { // The address could potentially escape. Mark it as another entry // point into the function. if (opts::Verbosity >= 1) { outs() << "BOLT-INFO: potentially escaped address 0x" << Twine::utohexstr(Address) << " in function " << BF << '\n'; } BF.HasInternalLabelReference = true; return std::make_pair( BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend); } } else { InterproceduralReferences.insert(std::make_pair(&BF, Address)); } } const auto MemType = analyzeMemoryAt(Address, BF); if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { JumpTable *JT; const MCSymbol *Symbol; std::tie(JT, Symbol) = getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); return std::make_pair(Symbol, Addend); } if (auto *BD = getBinaryDataContainingAddress(Address)) { return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); } // TODO: use DWARF info to get size/alignment here? auto *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName()); return std::make_pair(TargetSymbol, Addend); } MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, BinaryFunction &BF) { if (!isX86()) return MemoryContentsType::UNKNOWN; auto Section = getSectionForAddress(Address); if (!Section) { // No section - possibly an absolute address. Since we don't allow // internal function addresses to escape the function scope - we // consider it a tail call. if (opts::Verbosity > 1) { errs() << "BOLT-WARNING: no section for address 0x" << Twine::utohexstr(Address) << " referenced from function " << BF << '\n'; } return MemoryContentsType::UNKNOWN; } if (Section->isVirtual()) { // The contents are filled at runtime. return MemoryContentsType::UNKNOWN; } // No support for jump tables in code yet. if (Section->isText()) return MemoryContentsType::UNKNOWN; auto couldBeJumpTable = [&](const uint64_t JTAddress, JumpTable::JumpTableType Type) { const auto EntrySize = Type == JumpTable::JTT_PIC ? 4 : AsmInfo->getCodePointerSize(); auto ValueAddress = JTAddress; auto UpperBound = Section->getEndAddress(); const auto *JumpTableBD = getBinaryDataAtAddress(JTAddress); if (JumpTableBD && JumpTableBD->getSize()) { UpperBound = JumpTableBD->getEndAddress(); assert(UpperBound <= Section->getEndAddress() && "data object cannot cross a section boundary"); } while (ValueAddress <= UpperBound - EntrySize) { DEBUG(dbgs() << "BOLT-DEBUG: analyzing memory at 0x" << Twine::utohexstr(ValueAddress)); uint64_t Value; if (Type == JumpTable::JTT_PIC) { Value = JTAddress + *getSignedValueAtAddress(ValueAddress, EntrySize); } else { Value = *getPointerAtAddress(ValueAddress); } DEBUG(dbgs() << ", which contains value 0x" << Twine::utohexstr(Value) << '\n'); ValueAddress += EntrySize; // We assume that a jump table cannot have function start as an entry. if (BF.containsAddress(Value) && Value != BF.getAddress()) return true; // Potentially a jump table can contain __builtin_unreachable() entry // pointing just right after the function. In this case we have to check // another entry. Otherwise the entry is outside of this function scope // and it's not a jump table. if (Value == BF.getAddress() + BF.getSize()) continue; return false; } return false; }; // Start with checking for PIC jump table. We expect non-PIC jump tables // to have high 32 bits set to 0. if (couldBeJumpTable(Address, JumpTable::JTT_PIC)) return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; if (couldBeJumpTable(Address, JumpTable::JTT_NORMAL)) return MemoryContentsType::POSSIBLE_JUMP_TABLE; return MemoryContentsType::UNKNOWN; } void BinaryContext::populateJumpTables() { for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; ++JTI) { auto *JT = JTI->second; auto &BF = *JT->Parent; DEBUG(dbgs() << "BOLT-DEBUG: populating jump table " << JT->getName() << '\n'); // The upper bound is defined by containing object, section limits, and // the next jump table in memory. auto UpperBound = JT->getSection().getEndAddress(); const auto *JumpTableBD = getBinaryDataAtAddress(JT->getAddress()); if (JumpTableBD && JumpTableBD->getSize()) { assert(JumpTableBD->getEndAddress() <= UpperBound && "data object cannot cross a section boundary"); UpperBound = JumpTableBD->getEndAddress(); } auto NextJTI = std::next(JTI); if (NextJTI != JTE) { assert (UpperBound != JT->getAddress()); UpperBound = std::min(NextJTI->second->getAddress(), UpperBound); } for (auto EntryAddress = JT->getAddress(); EntryAddress <= UpperBound - JT->EntrySize; EntryAddress += JT->EntrySize) { uint64_t Value; if (JT->Type == JumpTable::JTT_PIC) { Value = JT->getAddress() + *getSignedValueAtAddress(EntryAddress, JT->EntrySize); } else { Value = *getPointerAtAddress(EntryAddress); } // __builtin_unreachable() case. if (Value == BF.getAddress() + BF.getSize()) { JT->OffsetEntries.emplace_back(Value - BF.getAddress()); BF.IgnoredBranches.emplace_back(Value - BF.getAddress(), BF.getSize()); continue; } // We assume that a jump table cannot have function start as an entry. if (!BF.containsAddress(Value) || Value == BF.getAddress()) break; // Check there's an instruction at this offset. if (!BF.getInstructionAtOffset(Value - BF.getAddress())) break; BF.registerReferencedOffset(Value - BF.getAddress()); JT->OffsetEntries.emplace_back(Value - BF.getAddress()); } if (JT->OffsetEntries.size() <= 1) { dbgs() << "JT with size " << JT->OffsetEntries.size() << " detected in " << BF << '\n'; JT->print(dbgs()); if (NextJTI != JTE) { dbgs() << "next jump table at 0x" << Twine::utohexstr(NextJTI->second->getAddress()) << " belongs to function " << *NextJTI->second->Parent << '\n'; NextJTI->second->print(dbgs()); } } assert(JT->OffsetEntries.size() > 1 && "expected more than one jump table entry"); // Check there are relocations against JT entries. if (opts::StrictMode) { for (auto Address = JT->getAddress(); Address < JT->getAddress() + JT->getSize(); Address += JT->EntrySize) { if (JT->Type == JumpTable::JTT_PIC) { assert(PCRelocation.count(Address) && "no matching relocation"); PCRelocation.erase(PCRelocation.find(Address)); } else { assert(getRelocationAt(Address) && "missing relocation"); } } } } assert((!opts::StrictMode || !PCRelocation.size()) && "unclaimed PC-relative relocations left in data\n"); clearList(PCRelocation); } MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, uint64_t Size, uint16_t Alignment, unsigned Flags) { auto Itr = BinaryDataMap.find(Address); if (Itr != BinaryDataMap.end()) { assert(Itr->second->getSize() == Size || !Size); return Itr->second->getSymbol(); } std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); assert(!GlobalSymbols.count(Name) && "created name is not unique"); return registerNameAtAddress(Name, Address, Size, Alignment, Flags); } BinaryFunction *BinaryContext::createBinaryFunction( const std::string &Name, BinarySection &Section, uint64_t Address, uint64_t Size, bool IsSimple, uint64_t SymbolSize, uint16_t Alignment) { auto Result = BinaryFunctions.emplace( Address, BinaryFunction(Name, Section, Address, Size, *this, IsSimple)); assert(Result.second == true && "unexpected duplicate function"); auto *BF = &Result.first->second; registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, Alignment); setSymbolToFunctionMap(BF->getSymbol(), BF); return BF; } std::pair BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, JumpTable::JumpTableType Type) { if (auto *JT = getJumpTableContainingAddress(Address)) { assert(JT->Type == Type && "jump table types have to match"); assert(JT->Parent == &Function && "cannot re-use jump table of a different function"); assert(Address == JT->getAddress() && "unexpected non-empty jump table"); return std::make_pair(JT, JT->getFirstLabel()); } const auto EntrySize = Type == JumpTable::JTT_PIC ? 4 : AsmInfo->getCodePointerSize(); // Re-use the existing symbol if possible. MCSymbol *JTLabel{nullptr}; if (auto *Object = getBinaryDataAtAddress(Address)) { if (!isInternalSymbolName(Object->getSymbol()->getName())) JTLabel = Object->getSymbol(); } if (!JTLabel) { const auto JumpTableName = generateJumpTableName(Function, Address); JTLabel = Ctx->getOrCreateSymbol(JumpTableName); registerNameAtAddress(JTLabel->getName(), Address, 0, EntrySize); } DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() << " in function " << Function << 'n'); auto *JT = new JumpTable(JTLabel->getName(), Address, EntrySize, Type, {}, JumpTable::LabelMapType{{0, JTLabel}}, Function, *getSectionForAddress(Address)); JumpTables.emplace(Address, JT); // Duplicate the entry for the parent function for easy access. Function.JumpTables.emplace(Address, JT); return std::make_pair(JT, JTLabel); } std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, uint64_t Address) { size_t Id; uint64_t Offset = 0; if (const auto *JT = BF.getJumpTableContainingAddress(Address)) { Offset = Address - JT->getAddress(); auto Itr = JT->Labels.find(Offset); if (Itr != JT->Labels.end()) { return Itr->second->getName(); } Id = JumpTableIds.at(JT->getAddress()); } else { Id = JumpTableIds[Address] = BF.JumpTables.size(); } return ("JUMP_TABLE/" + BF.Names[0] + "." + std::to_string(Id) + (Offset ? ("." + std::to_string(Offset)) : "")); } MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, uint64_t Size, uint16_t Alignment, unsigned Flags) { auto SectionOrErr = getSectionForAddress(Address); auto &Section = SectionOrErr ? SectionOrErr.get() : absoluteSection(); auto GAI = BinaryDataMap.find(Address); BinaryData *BD; if (GAI == BinaryDataMap.end()) { BD = new BinaryData(Name, Address, Size, Alignment ? Alignment : 1, Section, Flags); } else { BD = GAI->second; } return registerNameAtAddress(Name, Address, BD); } MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, BinaryData *BD) { auto GAI = BinaryDataMap.find(Address); if (GAI != BinaryDataMap.end()) { if (BD != GAI->second) { // Note: this could be a source of bugs if client code holds // on to BinaryData*'s in data structures for any length of time. auto *OldBD = GAI->second; BD->merge(GAI->second); delete OldBD; GAI->second = BD; for (auto &Name : BD->names()) { GlobalSymbols[Name] = BD; } updateObjectNesting(GAI); BD = nullptr; } else if (!GAI->second->hasName(Name)) { GAI->second->Names.push_back(Name); GlobalSymbols[Name] = GAI->second; } else { BD = nullptr; } } else { GAI = BinaryDataMap.emplace(Address, BD).first; GlobalSymbols[Name] = BD; updateObjectNesting(GAI); } // Register the name with MCContext. auto *Symbol = Ctx->getOrCreateSymbol(Name); if (BD) { BD->Symbols.push_back(Symbol); assert(BD->Symbols.size() == BD->Names.size() && "there should be a 1:1 mapping between names and symbols"); } return Symbol; } const BinaryData * BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address, bool IncludeEnd, bool BestFit) const { auto NI = BinaryDataMap.lower_bound(Address); auto End = BinaryDataMap.end(); if ((NI != End && Address == NI->first && !IncludeEnd) || (NI-- != BinaryDataMap.begin())) { if (NI->second->containsAddress(Address) || (IncludeEnd && NI->second->getEndAddress() == Address)) { while (BestFit && std::next(NI) != End && (std::next(NI)->second->containsAddress(Address) || (IncludeEnd && std::next(NI)->second->getEndAddress() == Address))) { ++NI; } return NI->second; } // If this is a sub-symbol, see if a parent data contains the address. auto *BD = NI->second->getParent(); while (BD) { if (BD->containsAddress(Address) || (IncludeEnd && NI->second->getEndAddress() == Address)) return BD; BD = BD->getParent(); } } return nullptr; } bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { auto NI = BinaryDataMap.find(Address); assert(NI != BinaryDataMap.end()); if (NI == BinaryDataMap.end()) return false; // TODO: it's possible that a jump table starts at the same address // as a larger blob of private data. When we set the size of the // jump table, it might be smaller than the total blob size. In this // case we just leave the original size since (currently) it won't really // affect anything. See T26915981. assert((!NI->second->Size || NI->second->Size == Size || (NI->second->isJumpTable() && NI->second->Size > Size)) && "can't change the size of a symbol that has already had its " "size set"); if (!NI->second->Size) { NI->second->Size = Size; updateObjectNesting(NI); return true; } return false; } void BinaryContext::generateSymbolHashes() { auto isPadding = [](const BinaryData &BD) { auto Contents = BD.getSection().getContents(); auto SymData = Contents.substr(BD.getOffset(), BD.getSize()); return (BD.getName().startswith("HOLEat") || SymData.find_first_not_of(0) == StringRef::npos); }; uint64_t NumCollisions = 0; for (auto &Entry : BinaryDataMap) { auto &BD = *Entry.second; auto Name = BD.getName(); if (!isInternalSymbolName(Name)) continue; // First check if a non-anonymous alias exists and move it to the front. if (BD.getNames().size() > 1) { auto Itr = std::find_if(BD.Names.begin(), BD.Names.end(), [&](const StringRef Name) { return !isInternalSymbolName(Name); }); if (Itr != BD.Names.end()) { assert(BD.Names.size() == BD.Symbols.size() && "there should be a 1:1 mapping between names and symbols"); auto Idx = std::distance(BD.Names.begin(), Itr); std::swap(BD.Names[0], *Itr); std::swap(BD.Symbols[0], BD.Symbols[Idx]); continue; } } // We have to skip 0 size symbols since they will all collide. if (BD.getSize() == 0) { continue; } const auto Hash = BD.getSection().hash(BD); const auto Idx = Name.find("0x"); std::string NewName = (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); if (getBinaryDataByName(NewName)) { // Ignore collisions for symbols that appear to be padding // (i.e. all zeros or a "hole") if (!isPadding(BD)) { if (opts::Verbosity) { errs() << "BOLT-WARNING: collision detected when hashing " << BD << " with new name (" << NewName << "), skipping.\n"; } ++NumCollisions; } continue; } BD.Names.insert(BD.Names.begin(), NewName); BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); assert(BD.Names.size() == BD.Symbols.size() && "there should be a 1:1 mapping between names and symbols"); GlobalSymbols[NewName] = &BD; } if (NumCollisions) { errs() << "BOLT-WARNING: " << NumCollisions << " collisions detected while hashing binary objects"; if (!opts::Verbosity) errs() << ". Use -v=1 to see the list."; errs() << '\n'; } } void BinaryContext::processInterproceduralReferences() { for (auto &Pair : InterproceduralReferences) { auto *FromBF = Pair.first; auto Addr = Pair.second; auto *ContainingFunction = getBinaryFunctionContainingAddress(Addr); if (FromBF == ContainingFunction) continue; if (ContainingFunction) { // Only a parent function (or a sibling) can reach its fragment. if (ContainingFunction->IsFragment) { assert(!FromBF->IsFragment && "only one cold fragment is supported at this time"); ContainingFunction->setParentFunction(FromBF); FromBF->addFragment(ContainingFunction); if (!HasRelocations) { ContainingFunction->setSimple(false); FromBF->setSimple(false); } if (opts::Verbosity >= 1) { outs() << "BOLT-INFO: marking " << *ContainingFunction << " as a fragment of " << *FromBF << '\n'; } continue; } if (ContainingFunction->getAddress() != Addr) { ContainingFunction->addEntryPoint(Addr); if (!HasRelocations) { if (opts::Verbosity >= 1) { errs() << "BOLT-WARNING: Function " << *ContainingFunction << " has internal BBs that are target of a reference " << "located in another function. Skipping the function.\n"; } ContainingFunction->setSimple(false); } } } else if (Addr) { // Check if address falls in function padding space - this could be // unmarked data in code. In this case adjust the padding space size. auto Section = getSectionForAddress(Addr); assert(Section && "cannot get section for referenced address"); if (!Section->isText()) continue; // PLT requires special handling and could be ignored in this context. StringRef SectionName = Section->getName(); if (SectionName == ".plt" || SectionName == ".plt.got") continue; if (HasRelocations) { errs() << "BOLT-ERROR: cannot process binaries with unmarked " << "object in code at address 0x" << Twine::utohexstr(Addr) << " belonging to section " << SectionName << " in relocation mode.\n"; exit(1); } ContainingFunction = getBinaryFunctionContainingAddress(Addr, /*CheckPastEnd=*/false, /*UseMaxSize=*/true); // We are not going to overwrite non-simple functions, but for simple // ones - adjust the padding size. if (ContainingFunction && ContainingFunction->isSimple()) { errs() << "BOLT-WARNING: function " << *ContainingFunction << " has an object detected in a padding region at address 0x" << Twine::utohexstr(Addr) << '\n'; ContainingFunction->setMaxSize(Addr - ContainingFunction->getAddress()); } } } InterproceduralReferences.clear(); } void BinaryContext::postProcessSymbolTable() { fixBinaryDataHoles(); bool Valid = true; for (auto &Entry : BinaryDataMap) { auto *BD = Entry.second; if ((BD->getName().startswith("SYMBOLat") || BD->getName().startswith("DATAat")) && !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && BD->getSection()) { errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; Valid = false; } } assert(Valid); assignMemData(); generateSymbolHashes(); } void BinaryContext::foldFunction(BinaryFunction &ChildBF, BinaryFunction &ParentBF) { std::shared_lock ReadCtxLock(CtxMutex, std::defer_lock); std::unique_lock WriteCtxLock(CtxMutex, std::defer_lock); std::unique_lock WriteSymbolMapLock( SymbolToFunctionMapMutex, std::defer_lock); // Copy name list. ParentBF.addNewNames(ChildBF.getNames()); // Update internal bookkeeping info. for (auto &Name : ChildBF.getNames()) { ReadCtxLock.lock(); // Calls to functions are handled via symbols, and we keep the lookup table // that we need to update. auto *Symbol = Ctx->lookupSymbol(Name); ReadCtxLock.unlock(); assert(Symbol && "symbol cannot be NULL at this point"); WriteSymbolMapLock.lock(); SymbolToFunctionMap[Symbol] = &ParentBF; WriteSymbolMapLock.unlock(); // NB: there's no need to update BinaryDataMap and GlobalSymbols. } // Merge execution counts of ChildBF into those of ParentBF. ChildBF.mergeProfileDataInto(ParentBF); if (HasRelocations) { std::shared_lock ReadBfsLock(BinaryFunctionsMutex, std::defer_lock); std::unique_lock WriteBfsLock(BinaryFunctionsMutex, std::defer_lock); // Remove ChildBF from the global set of functions in relocs mode. ReadBfsLock.lock(); auto FI = BinaryFunctions.find(ChildBF.getAddress()); ReadBfsLock.unlock(); assert(FI != BinaryFunctions.end() && "function not found"); assert(&ChildBF == &FI->second && "function mismatch"); WriteBfsLock.lock(); FI = BinaryFunctions.erase(FI); WriteBfsLock.unlock(); } else { // In non-relocation mode we keep the function, but rename it. std::string NewName = "__ICF_" + ChildBF.getSymbol()->getName().str(); ChildBF.Names.clear(); ChildBF.Names.push_back(NewName); WriteCtxLock.lock(); ChildBF.OutputSymbol = Ctx->getOrCreateSymbol(NewName); WriteCtxLock.unlock(); ChildBF.setFolded(); } } void BinaryContext::fixBinaryDataHoles() { assert(validateObjectNesting() && "object nesting inconsitency detected"); for (auto &Section : allocatableSections()) { std::vector> Holes; auto isNotHole = [&Section](const binary_data_iterator &Itr) { auto *BD = Itr->second; bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && (BD->getName().startswith("SYMBOLat0x") || BD->getName().startswith("DATAat0x") || BD->getName().startswith("ANONYMOUS"))); return !isHole && BD->getSection() == Section && !BD->getParent(); }; auto BDStart = BinaryDataMap.begin(); auto BDEnd = BinaryDataMap.end(); auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); uint64_t EndAddress = Section.getAddress(); while (Itr != End) { if (Itr->second->getAddress() > EndAddress) { auto Gap = Itr->second->getAddress() - EndAddress; Holes.push_back(std::make_pair(EndAddress, Gap)); } EndAddress = Itr->second->getEndAddress(); ++Itr; } if (EndAddress < Section.getEndAddress()) { Holes.push_back(std::make_pair(EndAddress, Section.getEndAddress() - EndAddress)); } // If there is already a symbol at the start of the hole, grow that symbol // to cover the rest. Otherwise, create a new symbol to cover the hole. for (auto &Hole : Holes) { auto *BD = getBinaryDataAtAddress(Hole.first); if (BD) { // BD->getSection() can be != Section if there are sections that // overlap. In this case it is probably safe to just skip the holes // since the overlapping section will not(?) have any symbols in it. if (BD->getSection() == Section) setBinaryDataSize(Hole.first, Hole.second); } else { getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); } } } assert(validateObjectNesting() && "object nesting inconsitency detected"); assert(validateHoles() && "top level hole detected in object map"); } void BinaryContext::printGlobalSymbols(raw_ostream& OS) const { const BinarySection* CurrentSection = nullptr; bool FirstSection = true; for (auto &Entry : BinaryDataMap) { const auto *BD = Entry.second; const auto &Section = BD->getSection(); if (FirstSection || Section != *CurrentSection) { uint64_t Address, Size; StringRef Name = Section.getName(); if (Section) { Address = Section.getAddress(); Size = Section.getSize(); } else { Address = BD->getAddress(); Size = BD->getSize(); } OS << "BOLT-INFO: Section " << Name << ", " << "0x" + Twine::utohexstr(Address) << ":" << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; CurrentSection = &Section; FirstSection = false; } OS << "BOLT-INFO: "; auto *P = BD->getParent(); while (P) { OS << " "; P = P->getParent(); } OS << *BD << "\n"; } } void BinaryContext::assignMemData() { auto getAddress = [&](const MemInfo &MI) -> uint64_t { if (!MI.Addr.IsSymbol) return MI.Addr.Offset; if (auto *BD = getBinaryDataByName(MI.Addr.Name)) return BD->getAddress() + MI.Addr.Offset; return 0; }; // Map of sections (or heap/stack) to count/size. std::map Counts; std::map JumpTableCounts; uint64_t TotalCount = 0; for (auto &Entry : DR.getAllFuncsMemData()) { for (auto &MI : Entry.second.Data) { const auto Addr = getAddress(MI); auto *BD = getBinaryDataContainingAddress(Addr); if (BD) { BD->getAtomicRoot()->addMemData(MI); Counts[BD->getSectionName()] += MI.Count; if (BD->getAtomicRoot()->isJumpTable()) { JumpTableCounts[BD->getSectionName()] += MI.Count; } } else { Counts["Heap/stack"] += MI.Count; } TotalCount += MI.Count; } } if (!Counts.empty()) { outs() << "BOLT-INFO: Memory stats breakdown:\n"; for (auto &Entry : Counts) { const auto Section = Entry.first; const auto Count = Entry.second; outs() << "BOLT-INFO: " << Section << " = " << Count << format(" (%.1f%%)\n", 100.0*Count/TotalCount); if (JumpTableCounts.count(Section) != 0) { const auto JTCount = JumpTableCounts[Section]; outs() << "BOLT-INFO: jump tables = " << JTCount << format(" (%.1f%%)\n", 100.0*JTCount/Count); } } outs() << "BOLT-INFO: Total memory events: " << TotalCount << "\n"; } } namespace { /// Recursively finds DWARF DW_TAG_subprogram DIEs and match them with /// BinaryFunctions. Record DIEs for unknown subprograms (mostly functions that /// are never called and removed from the binary) in Unknown. void findSubprograms(const DWARFDie DIE, std::map &BinaryFunctions) { if (DIE.isSubprogramDIE()) { uint64_t LowPC, HighPC, SectionIndex; if (DIE.getLowAndHighPC(LowPC, HighPC, SectionIndex)) { auto It = BinaryFunctions.find(LowPC); if (It != BinaryFunctions.end()) { It->second.addSubprogramDIE(DIE); } else { // The function must have been optimized away by GC. } } else { const auto RangesVector = DIE.getAddressRanges(); for (const auto Range : DIE.getAddressRanges()) { auto It = BinaryFunctions.find(Range.LowPC); if (It != BinaryFunctions.end()) { It->second.addSubprogramDIE(DIE); } } } } for (auto ChildDIE = DIE.getFirstChild(); ChildDIE && !ChildDIE.isNULL(); ChildDIE = ChildDIE.getSibling()) { findSubprograms(ChildDIE, BinaryFunctions); } } } // namespace unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, const uint32_t SrcCUID, unsigned FileIndex) { auto SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); auto LineTable = DwCtx->getLineTableForUnit(SrcUnit); const auto &FileNames = LineTable->Prologue.FileNames; // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 // means empty dir. assert(FileIndex > 0 && FileIndex <= FileNames.size() && "FileIndex out of range for the compilation unit."); StringRef Dir = ""; if (FileNames[FileIndex - 1].DirIdx != 0) { if (auto DirName = LineTable->Prologue .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1] .getAsCString()) { Dir = *DirName; } } StringRef FileName = ""; if (auto FName = FileNames[FileIndex - 1].Name.getAsCString()) FileName = *FName; assert(FileName != ""); return cantFail(Ctx->getDwarfFile(Dir, FileName, 0, nullptr, None, DestCUID)); } std::vector BinaryContext::getSortedFunctions() { std::vector SortedFunctions(BinaryFunctions.size()); std::transform(BinaryFunctions.begin(), BinaryFunctions.end(), SortedFunctions.begin(), [](std::pair &BFI) { return &BFI.second; }); std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(), [] (const BinaryFunction *A, const BinaryFunction *B) { if (A->hasValidIndex() && B->hasValidIndex()) { return A->getIndex() < B->getIndex(); } return A->hasValidIndex(); }); return SortedFunctions; } void BinaryContext::preprocessDebugInfo() { // Populate MCContext with DWARF files. for (const auto &CU : DwCtx->compile_units()) { const auto CUID = CU->getOffset(); auto *LineTable = DwCtx->getLineTableForUnit(CU.get()); const auto &FileNames = LineTable->Prologue.FileNames; // Make sure empty debug line tables are registered too. if (FileNames.empty()) { cantFail(Ctx->getDwarfFile("", "", 0, nullptr, None, CUID)); continue; } for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 // means empty dir. StringRef Dir = ""; if (FileNames[I].DirIdx != 0) if (auto DirName = LineTable->Prologue.IncludeDirectories[FileNames[I].DirIdx - 1] .getAsCString()) Dir = *DirName; StringRef FileName = ""; if (auto FName = FileNames[I].Name.getAsCString()) FileName = *FName; assert(FileName != ""); cantFail(Ctx->getDwarfFile(Dir, FileName, 0, nullptr, None, CUID)); } } // For each CU, iterate over its children DIEs and match subprogram DIEs to // BinaryFunctions. for (auto &CU : DwCtx->compile_units()) { findSubprograms(CU->getUnitDIE(false), BinaryFunctions); } // Some functions may not have a corresponding subprogram DIE // yet they will be included in some CU and will have line number information. // Hence we need to associate them with the CU and include in CU ranges. for (auto &AddrFunctionPair : BinaryFunctions) { auto FunctionAddress = AddrFunctionPair.first; auto &Function = AddrFunctionPair.second; if (!Function.getSubprogramDIEs().empty()) continue; if (auto DebugAranges = DwCtx->getDebugAranges()) { auto CUOffset = DebugAranges->findAddress(FunctionAddress); if (CUOffset != -1U) { Function.addSubprogramDIE( DWARFDie(DwCtx->getCompileUnitForOffset(CUOffset), nullptr)); continue; } } #ifdef DWARF_LOOKUP_ALL_RANGES // Last resort - iterate over all compile units. This should not happen // very often. If it does, we need to create a separate lookup table // similar to .debug_aranges internally. This slows down processing // considerably. for (const auto &CU : DwCtx->compile_units()) { const auto *CUDie = CU->getUnitDIE(); for (const auto &Range : CUDie->getAddressRanges(CU.get())) { if (FunctionAddress >= Range.first && FunctionAddress < Range.second) { Function.addSubprogramDIE(DWARFDie(CU.get(), nullptr)); break; } } } #endif } } void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { uint32_t Operation = Inst.getOperation(); switch (Operation) { case MCCFIInstruction::OpSameValue: OS << "OpSameValue Reg" << Inst.getRegister(); break; case MCCFIInstruction::OpRememberState: OS << "OpRememberState"; break; case MCCFIInstruction::OpRestoreState: OS << "OpRestoreState"; break; case MCCFIInstruction::OpOffset: OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); break; case MCCFIInstruction::OpDefCfaRegister: OS << "OpDefCfaRegister Reg" << Inst.getRegister(); break; case MCCFIInstruction::OpDefCfaOffset: OS << "OpDefCfaOffset " << Inst.getOffset(); break; case MCCFIInstruction::OpDefCfa: OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); break; case MCCFIInstruction::OpRelOffset: OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); break; case MCCFIInstruction::OpAdjustCfaOffset: OS << "OfAdjustCfaOffset " << Inst.getOffset(); break; case MCCFIInstruction::OpEscape: OS << "OpEscape"; break; case MCCFIInstruction::OpRestore: OS << "OpRestore Reg" << Inst.getRegister(); break; case MCCFIInstruction::OpUndefined: OS << "OpUndefined Reg" << Inst.getRegister(); break; case MCCFIInstruction::OpRegister: OS << "OpRegister Reg" << Inst.getRegister() << " Reg" << Inst.getRegister2(); break; case MCCFIInstruction::OpWindowSave: OS << "OpWindowSave"; break; case MCCFIInstruction::OpGnuArgsSize: OS << "OpGnuArgsSize"; break; default: OS << "Op#" << Operation; break; } } void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, uint64_t Offset, const BinaryFunction* Function, bool PrintMCInst, bool PrintMemData, bool PrintRelocations) const { if (MIB->isEHLabel(Instruction)) { OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << '\n'; return; } OS << format(" %08" PRIx64 ": ", Offset); if (MIB->isCFI(Instruction)) { uint32_t Offset = Instruction.getOperand(0).getImm(); OS << "\t!CFI\t$" << Offset << "\t; "; if (Function) printCFI(OS, *Function->getCFIFor(Instruction)); OS << "\n"; return; } InstPrinter->printInst(&Instruction, OS, "", *STI); if (MIB->isCall(Instruction)) { if (MIB->isTailCall(Instruction)) OS << " # TAILCALL "; if (MIB->isInvoke(Instruction)) { const auto EHInfo = MIB->getEHInfo(Instruction); OS << " # handler: "; if (EHInfo->first) OS << *EHInfo->first; else OS << '0'; OS << "; action: " << EHInfo->second; const auto GnuArgsSize = MIB->getGnuArgsSize(Instruction); if (GnuArgsSize >= 0) OS << "; GNU_args_size = " << GnuArgsSize; } } else if (MIB->isIndirectBranch(Instruction)) { if (auto JTAddress = MIB->getJumpTable(Instruction)) { OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); } else { OS << " # UNKNOWN CONTROL FLOW"; } } MIB->printAnnotations(Instruction, OS); const DWARFDebugLine::LineTable *LineTable = Function && opts::PrintDebugInfo ? Function->getDWARFUnitLineTable().second : nullptr; if (LineTable) { auto RowRef = DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); if (RowRef != DebugLineTableRowRef::NULL_ROW) { const auto &Row = LineTable->Rows[RowRef.RowIndex - 1]; StringRef FileName = ""; if (auto FName = LineTable->Prologue.FileNames[Row.File - 1].Name.getAsCString()) FileName = *FName; OS << " # debug line " << FileName << ":" << Row.Line; if (Row.Column) { OS << ":" << Row.Column; } } } if ((opts::PrintMemData || PrintMemData) && Function) { const auto *MD = Function->getMemData(); const auto MemDataOffset = MIB->tryGetAnnotationAs(Instruction, "MemDataOffset"); if (MD && MemDataOffset) { bool DidPrint = false; for (auto &MI : MD->getMemInfoRange(MemDataOffset.get())) { OS << (DidPrint ? ", " : " # Loads: "); OS << MI.Addr << "/" << MI.Count; DidPrint = true; } } } if ((opts::PrintRelocations || PrintRelocations) && Function) { const auto Size = computeCodeSize(&Instruction, &Instruction + 1); Function->printRelocations(OS, Offset, Size); } OS << "\n"; if (PrintMCInst) { Instruction.dump_pretty(OS, InstPrinter.get()); OS << "\n"; } } ErrorOr> BinaryContext::getFunctionData(const BinaryFunction &Function) const { auto &Section = Function.getSection(); assert(Section.containsRange(Function.getAddress(), Function.getSize()) && "wrong section for function"); if (!Section.isText() || Section.isVirtual() || !Section.getSize()) { return std::make_error_code(std::errc::bad_address); } StringRef SectionContents = Section.getContents(); assert(SectionContents.size() == Section.getSize() && "section size mismatch"); // Function offset from the section start. auto FunctionOffset = Function.getAddress() - Section.getAddress(); auto *Bytes = reinterpret_cast(SectionContents.data()); return ArrayRef(Bytes + FunctionOffset, Function.getSize()); } ErrorOr BinaryContext::getSectionForAddress(uint64_t Address) { auto SI = AddressToSection.upper_bound(Address); if (SI != AddressToSection.begin()) { --SI; auto UpperBound = SI->first + SI->second->getSize(); if (!SI->second->getSize()) UpperBound += 1; if (UpperBound > Address) return *SI->second; } return std::make_error_code(std::errc::bad_address); } ErrorOr BinaryContext::getSectionNameForAddress(uint64_t Address) const { if (auto Section = getSectionForAddress(Address)) { return Section->getName(); } return std::make_error_code(std::errc::bad_address); } BinarySection &BinaryContext::registerSection(BinarySection *Section) { assert(!Section->getName().empty() && "can't register sections without a name"); auto Res = Sections.insert(Section); assert(Res.second && "can't register the same section twice."); // Only register sections with addresses in the AddressToSection map. if (Section->getAddress()) AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); NameToSection.insert(std::make_pair(Section->getName(), Section)); DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); return *Section; } BinarySection &BinaryContext::registerSection(SectionRef Section) { return registerSection(new BinarySection(*this, Section)); } BinarySection & BinaryContext::registerSection(StringRef SectionName, const BinarySection &OriginalSection) { return registerSection(new BinarySection(*this, SectionName, OriginalSection)); } BinarySection &BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType, unsigned ELFFlags, uint8_t *Data, uint64_t Size, unsigned Alignment, bool IsLocal) { auto NamedSections = getSectionByName(Name); if (NamedSections.begin() != NamedSections.end()) { assert(std::next(NamedSections.begin()) == NamedSections.end() && "can only update unique sections"); auto *Section = NamedSections.begin()->second; DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); const auto Flag = Section->isAllocatable(); Section->update(Data, Size, Alignment, ELFType, ELFFlags, IsLocal); DEBUG(dbgs() << *Section << "\n"); assert(Flag == Section->isAllocatable() && "can't change section allocation status"); return *Section; } return registerSection(new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags, IsLocal)); } bool BinaryContext::deregisterSection(BinarySection &Section) { auto *SectionPtr = &Section; auto Itr = Sections.find(SectionPtr); if (Itr != Sections.end()) { auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); while (Range.first != Range.second) { if (Range.first->second == SectionPtr) { AddressToSection.erase(Range.first); break; } ++Range.first; } auto NameRange = NameToSection.equal_range(SectionPtr->getName()); while (NameRange.first != NameRange.second) { if (NameRange.first->second == SectionPtr) { NameToSection.erase(NameRange.first); break; } ++NameRange.first; } Sections.erase(Itr); delete SectionPtr; return true; } return false; } void BinaryContext::printSections(raw_ostream &OS) const { for (auto &Section : Sections) { OS << "BOLT-INFO: " << *Section << "\n"; } } BinarySection &BinaryContext::absoluteSection() { if (auto Section = getUniqueSectionByName("")) return *Section; return registerOrUpdateSection("", ELF::SHT_NULL, 0u); } ErrorOr BinaryContext::getUnsignedValueAtAddress(uint64_t Address, size_t Size) const { const auto Section = getSectionForAddress(Address); if (!Section) return std::make_error_code(std::errc::bad_address); if (Section->isVirtual()) return 0; DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), AsmInfo->getCodePointerSize()); auto ValueOffset = static_cast(Address - Section->getAddress()); return DE.getUnsigned(&ValueOffset, Size); } ErrorOr BinaryContext::getSignedValueAtAddress(uint64_t Address, size_t Size) const { const auto Section = getSectionForAddress(Address); if (!Section) return std::make_error_code(std::errc::bad_address); if (Section->isVirtual()) return 0; DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), AsmInfo->getCodePointerSize()); auto ValueOffset = static_cast(Address - Section->getAddress()); return DE.getSigned(&ValueOffset, Size); } void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, uint64_t Type, uint64_t Addend, uint64_t Value) { auto Section = getSectionForAddress(Address); assert(Section && "cannot find section for address"); Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, Value); } bool BinaryContext::removeRelocationAt(uint64_t Address) { auto Section = getSectionForAddress(Address); assert(Section && "cannot find section for address"); return Section->removeRelocationAt(Address - Section->getAddress()); } const Relocation *BinaryContext::getRelocationAt(uint64_t Address) { auto Section = getSectionForAddress(Address); if (!Section) return nullptr; return Section->getRelocationAt(Address - Section->getAddress()); } void BinaryContext::exitWithBugReport(StringRef Message, const BinaryFunction &Function) const { errs() << "=======================================\n"; errs() << "BOLT is unable to proceed because it couldn't properly understand " "this function.\n"; errs() << "If you are running the most recent version of BOLT, you may " "want to " "report this and paste this dump.\nPlease check that there is no " "sensitive contents being shared in this dump.\n"; errs() << "\nOffending function: " << Function.getPrintName() << "\n\n"; ScopedPrinter SP(errs()); SP.printBinaryBlock("Function contents", *getFunctionData(Function)); errs() << "\n"; Function.dump(); errs() << "ERROR: " << Message; errs() << "\n=======================================\n"; exit(1); } BinaryFunction * BinaryContext::createInjectedBinaryFunction(const std::string &Name, bool IsSimple) { InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); auto *BF = InjectedBinaryFunctions.back(); setSymbolToFunctionMap(BF->getSymbol(), BF); return BF; } std::pair BinaryContext::calculateEmittedSize(BinaryFunction &BF) { // Adjust branch instruction to match the current layout. BF.fixBranches(); // Create local MC context to isolate the effect of ephemeral code emission. std::unique_ptr LocalMOFI = llvm::make_unique(); std::unique_ptr LocalCtx = llvm::make_unique(AsmInfo.get(), MRI.get(), LocalMOFI.get()); LocalMOFI->InitMCObjectFileInfo(*TheTriple, /*PIC=*/false, *LocalCtx); auto *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); auto *MCE = TheTarget->createMCCodeEmitter(*MII, *MRI, *LocalCtx); SmallString<256> Code; raw_svector_ostream VecOS(Code); std::unique_ptr Streamer(TheTarget->createMCObjectStreamer( *TheTriple, *LocalCtx, std::unique_ptr(MAB), VecOS, std::unique_ptr(MCE), *STI, /* RelaxAll */ false, /* IncrementalLinkerCompatible */ false, /* DWARFMustBeAtTheEnd */ false)); Streamer->InitSections(false); auto *Section = LocalMOFI->getTextSection(); Section->setHasInstructions(true); auto *StartLabel = LocalCtx->getOrCreateSymbol("__hstart"); auto *EndLabel = LocalCtx->getOrCreateSymbol("__hend"); auto *ColdStartLabel = LocalCtx->getOrCreateSymbol("__cstart"); auto *ColdEndLabel = LocalCtx->getOrCreateSymbol("__cend"); Streamer->SwitchSection(Section); Streamer->EmitLabel(StartLabel); BF.emitBody(*Streamer, /*EmitColdPart = */false, /*EmitCodeOnly = */true); Streamer->EmitLabel(EndLabel); if (BF.isSplit()) { auto *ColdSection = LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS, ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); ColdSection->setHasInstructions(true); Streamer->SwitchSection(ColdSection); Streamer->EmitLabel(ColdStartLabel); BF.emitBody(*Streamer, /*EmitColdPart = */true, /*EmitCodeOnly = */true); Streamer->EmitLabel(ColdEndLabel); } // To avoid calling MCObjectStreamer::flushPendingLabels() which is private. Streamer->EmitBytes(StringRef("")); auto &Assembler = static_cast(Streamer.get())->getAssembler(); MCAsmLayout Layout(Assembler); Assembler.layout(Layout); const auto HotSize = Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); const auto ColdSize = BF.isSplit() ? Layout.getSymbolOffset(*ColdEndLabel) - Layout.getSymbolOffset(*ColdStartLabel) : 0ULL; // Clean-up the effect of the code emission. for (const auto &Symbol : Assembler.symbols()) { auto *MutableSymbol = const_cast(&Symbol); MutableSymbol->setUndefined(); MutableSymbol->setIsRegistered(false); } return std::make_pair(HotSize, ColdSize); } BinaryFunction * BinaryContext::getBinaryFunctionContainingAddress(uint64_t Address, bool CheckPastEnd, bool UseMaxSize, bool Shallow) { auto FI = BinaryFunctions.upper_bound(Address); if (FI == BinaryFunctions.begin()) return nullptr; --FI; const auto UsedSize = UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) return nullptr; auto *BF = &FI->second; if (Shallow) return BF; while (BF->getParentFunction()) BF = BF->getParentFunction(); return BF; } BinaryFunction * BinaryContext::getBinaryFunctionAtAddress(uint64_t Address, bool Shallow) { if (const auto *BD = getBinaryDataAtAddress(Address)) { if (auto *BF = getFunctionForSymbol(BD->getSymbol())) { while (BF->getParentFunction() && !Shallow) { BF = BF->getParentFunction(); } return BF; } } return nullptr; } DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( const DWARFAddressRangesVector &InputRanges) const { DebugAddressRangesVector OutputRanges; for (const auto Range : InputRanges) { auto BFI = BinaryFunctions.lower_bound(Range.LowPC); while (BFI != BinaryFunctions.end()) { const auto &Function = BFI->second; if (Function.getAddress() >= Range.HighPC) break; const auto FunctionRanges = Function.getOutputAddressRanges(); std::move(std::begin(FunctionRanges), std::end(FunctionRanges), std::back_inserter(OutputRanges)); std::advance(BFI, 1); } } return OutputRanges; }