llvm-project/bolt/lib/Profile/BoltAddressTranslation.cpp
Job Noorman 23c8d38258 [BOLT] Calculate input to output address map using BOLTLinker
BOLT uses MCAsmLayout to calculate the output values of basic blocks.
This means output values are calculated based on a pre-linking state and
any changes to symbol values during linking will cause incorrect values
to be used.

This issue was first addressed in D154604 by adding all basic block
symbols to the symbol table for the linker to resolve them. However, the
runtime overhead of handling this huge symbol table turned out to be
prohibitively large.

This patch solves the issue in a different way. First, a temporary
section containing [input address, output symbol] pairs is emitted to the
intermediary object file. The linker will resolve all these references
so we end up with a section of [input address, output address] pairs.
This section is then parsed and used to:
- Replace BinaryBasicBlock::OffsetTranslationTable
- Replace BinaryFunction::InputOffsetToAddressMap
- Update BinaryBasicBlock::OutputAddressRange

Note that the reason this is more performant than the previous attempt
is that these symbol references do not cause entries to be added to the
symbol table. Instead, section-relative references are used for the
relocations.

Reviewed By: maksfb

Differential Revision: https://reviews.llvm.org/D155604
2023-08-21 10:36:20 +02:00

331 lines
12 KiB
C++

//===- bolt/Profile/BoltAddressTranslation.cpp ----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "bolt/Profile/BoltAddressTranslation.h"
#include "bolt/Core/BinaryFunction.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Errc.h"
#define DEBUG_TYPE "bolt-bat"
namespace llvm {
namespace bolt {
const char *BoltAddressTranslation::SECTION_NAME = ".note.bolt_bat";
void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
const BinaryBasicBlock &BB,
uint64_t FuncAddress) {
const uint64_t BBOutputOffset =
BB.getOutputAddressRange().first - FuncAddress;
const uint32_t BBInputOffset = BB.getInputOffset();
// Every output BB must track back to an input BB for profile collection
// in bolted binaries. If we are missing an offset, it means this block was
// created by a pass. We will skip writing any entries for it, and this means
// any traffic happening in this block will map to the previous block in the
// layout. This covers the case where an input basic block is split into two,
// and the second one lacks any offset.
if (BBInputOffset == BinaryBasicBlock::INVALID_OFFSET)
return;
LLVM_DEBUG(dbgs() << "BB " << BB.getName() << "\n");
LLVM_DEBUG(dbgs() << " Key: " << Twine::utohexstr(BBOutputOffset)
<< " Val: " << Twine::utohexstr(BBInputOffset) << "\n");
// In case of conflicts (same Key mapping to different Vals), the last
// update takes precedence. Of course it is not ideal to have conflicts and
// those happen when we have an empty BB that either contained only
// NOPs or a jump to the next block (successor). Either way, the successor
// and this deleted block will both share the same output address (the same
// key), and we need to map back. We choose here to privilege the successor by
// allowing it to overwrite the previously inserted key in the map.
Map[BBOutputOffset] = BBInputOffset;
const auto &IOAddressMap =
BB.getFunction()->getBinaryContext().getIOAddressMap();
for (const auto &[InputOffset, Sym] : BB.getLocSyms()) {
const auto InputAddress = BB.getFunction()->getAddress() + InputOffset;
const auto OutputAddress = IOAddressMap.lookup(InputAddress);
assert(OutputAddress && "Unknown instruction address");
const auto OutputOffset = *OutputAddress - FuncAddress;
// Is this the first instruction in the BB? No need to duplicate the entry.
if (OutputOffset == BBOutputOffset)
continue;
LLVM_DEBUG(dbgs() << " Key: " << Twine::utohexstr(OutputOffset) << " Val: "
<< Twine::utohexstr(InputOffset) << " (branch)\n");
Map.insert(
std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset | BRANCHENTRY));
}
}
void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Writing BOLT Address Translation Tables\n");
for (auto &BFI : BC.getBinaryFunctions()) {
const BinaryFunction &Function = BFI.second;
// We don't need a translation table if the body of the function hasn't
// changed
if (Function.isIgnored() || (!BC.HasRelocations && !Function.isSimple()))
continue;
LLVM_DEBUG(dbgs() << "Function name: " << Function.getPrintName() << "\n");
LLVM_DEBUG(dbgs() << " Address reference: 0x"
<< Twine::utohexstr(Function.getOutputAddress()) << "\n");
MapTy Map;
for (const BinaryBasicBlock *const BB :
Function.getLayout().getMainFragment())
writeEntriesForBB(Map, *BB, Function.getOutputAddress());
Maps.emplace(Function.getOutputAddress(), std::move(Map));
if (!Function.isSplit())
continue;
// Split maps
LLVM_DEBUG(dbgs() << " Cold part\n");
for (const FunctionFragment &FF :
Function.getLayout().getSplitFragments()) {
Map.clear();
for (const BinaryBasicBlock *const BB : FF)
writeEntriesForBB(Map, *BB, FF.getAddress());
Maps.emplace(FF.getAddress(), std::move(Map));
ColdPartSource.emplace(FF.getAddress(), Function.getOutputAddress());
}
}
const uint32_t NumFuncs = Maps.size();
OS.write(reinterpret_cast<const char *>(&NumFuncs), 4);
LLVM_DEBUG(dbgs() << "Writing " << NumFuncs << " functions for BAT.\n");
for (auto &MapEntry : Maps) {
const uint64_t Address = MapEntry.first;
MapTy &Map = MapEntry.second;
const uint32_t NumEntries = Map.size();
LLVM_DEBUG(dbgs() << "Writing " << NumEntries << " entries for 0x"
<< Twine::utohexstr(Address) << ".\n");
OS.write(reinterpret_cast<const char *>(&Address), 8);
OS.write(reinterpret_cast<const char *>(&NumEntries), 4);
for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
OS.write(reinterpret_cast<const char *>(&KeyVal.first), 4);
OS.write(reinterpret_cast<const char *>(&KeyVal.second), 4);
}
}
const uint32_t NumColdEntries = ColdPartSource.size();
LLVM_DEBUG(dbgs() << "Writing " << NumColdEntries
<< " cold part mappings.\n");
OS.write(reinterpret_cast<const char *>(&NumColdEntries), 4);
for (std::pair<const uint64_t, uint64_t> &ColdEntry : ColdPartSource) {
OS.write(reinterpret_cast<const char *>(&ColdEntry.first), 8);
OS.write(reinterpret_cast<const char *>(&ColdEntry.second), 8);
LLVM_DEBUG(dbgs() << " " << Twine::utohexstr(ColdEntry.first) << " -> "
<< Twine::utohexstr(ColdEntry.second) << "\n");
}
outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
outs() << "BOLT-INFO: Wrote " << NumColdEntries
<< " BAT cold-to-hot entries\n";
}
std::error_code BoltAddressTranslation::parse(StringRef Buf) {
DataExtractor DE = DataExtractor(Buf, true, 8);
uint64_t Offset = 0;
if (Buf.size() < 12)
return make_error_code(llvm::errc::io_error);
const uint32_t NameSz = DE.getU32(&Offset);
const uint32_t DescSz = DE.getU32(&Offset);
const uint32_t Type = DE.getU32(&Offset);
if (Type != BinarySection::NT_BOLT_BAT ||
Buf.size() + Offset < alignTo(NameSz, 4) + DescSz)
return make_error_code(llvm::errc::io_error);
StringRef Name = Buf.slice(Offset, Offset + NameSz);
Offset = alignTo(Offset + NameSz, 4);
if (Name.substr(0, 4) != "BOLT")
return make_error_code(llvm::errc::io_error);
if (Buf.size() - Offset < 4)
return make_error_code(llvm::errc::io_error);
const uint32_t NumFunctions = DE.getU32(&Offset);
LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << " functions\n");
for (uint32_t I = 0; I < NumFunctions; ++I) {
if (Buf.size() - Offset < 12)
return make_error_code(llvm::errc::io_error);
const uint64_t Address = DE.getU64(&Offset);
const uint32_t NumEntries = DE.getU32(&Offset);
MapTy Map;
LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
<< Twine::utohexstr(Address) << "\n");
if (Buf.size() - Offset < 8 * NumEntries)
return make_error_code(llvm::errc::io_error);
for (uint32_t J = 0; J < NumEntries; ++J) {
const uint32_t OutputAddr = DE.getU32(&Offset);
const uint32_t InputAddr = DE.getU32(&Offset);
Map.insert(std::pair<uint32_t, uint32_t>(OutputAddr, InputAddr));
LLVM_DEBUG(dbgs() << Twine::utohexstr(OutputAddr) << " -> "
<< Twine::utohexstr(InputAddr) << "\n");
}
Maps.insert(std::pair<uint64_t, MapTy>(Address, Map));
}
if (Buf.size() - Offset < 4)
return make_error_code(llvm::errc::io_error);
const uint32_t NumColdEntries = DE.getU32(&Offset);
LLVM_DEBUG(dbgs() << "Parsing " << NumColdEntries << " cold part mappings\n");
for (uint32_t I = 0; I < NumColdEntries; ++I) {
if (Buf.size() - Offset < 16)
return make_error_code(llvm::errc::io_error);
const uint32_t ColdAddress = DE.getU64(&Offset);
const uint32_t HotAddress = DE.getU64(&Offset);
ColdPartSource.insert(
std::pair<uint64_t, uint64_t>(ColdAddress, HotAddress));
LLVM_DEBUG(dbgs() << Twine::utohexstr(ColdAddress) << " -> "
<< Twine::utohexstr(HotAddress) << "\n");
}
outs() << "BOLT-INFO: Parsed " << Maps.size() << " BAT entries\n";
outs() << "BOLT-INFO: Parsed " << NumColdEntries
<< " BAT cold-to-hot entries\n";
return std::error_code();
}
void BoltAddressTranslation::dump(raw_ostream &OS) {
const size_t NumTables = Maps.size();
OS << "BAT tables for " << NumTables << " functions:\n";
for (const auto &MapEntry : Maps) {
OS << "Function Address: 0x" << Twine::utohexstr(MapEntry.first) << "\n";
OS << "BB mappings:\n";
for (const auto &Entry : MapEntry.second) {
const bool IsBranch = Entry.second & BRANCHENTRY;
const uint32_t Val = Entry.second & ~BRANCHENTRY;
OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
<< "0x" << Twine::utohexstr(Val);
if (IsBranch)
OS << " (branch)";
OS << "\n";
}
OS << "\n";
}
const size_t NumColdParts = ColdPartSource.size();
if (!NumColdParts)
return;
OS << NumColdParts << " cold mappings:\n";
for (const auto &Entry : ColdPartSource) {
OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
<< Twine::utohexstr(Entry.second) << "\n";
}
OS << "\n";
}
uint64_t BoltAddressTranslation::translate(uint64_t FuncAddress,
uint64_t Offset,
bool IsBranchSrc) const {
auto Iter = Maps.find(FuncAddress);
if (Iter == Maps.end())
return Offset;
const MapTy &Map = Iter->second;
auto KeyVal = Map.upper_bound(Offset);
if (KeyVal == Map.begin())
return Offset;
--KeyVal;
const uint32_t Val = KeyVal->second & ~BRANCHENTRY;
// Branch source addresses are translated to the first instruction of the
// source BB to avoid accounting for modifications BOLT may have made in the
// BB regarding deletion/addition of instructions.
if (IsBranchSrc)
return Val;
return Offset - KeyVal->first + Val;
}
std::optional<BoltAddressTranslation::FallthroughListTy>
BoltAddressTranslation::getFallthroughsInTrace(uint64_t FuncAddress,
uint64_t From,
uint64_t To) const {
SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
// Filter out trivial case
if (From >= To)
return Res;
From -= FuncAddress;
To -= FuncAddress;
auto Iter = Maps.find(FuncAddress);
if (Iter == Maps.end())
return std::nullopt;
const MapTy &Map = Iter->second;
auto FromIter = Map.upper_bound(From);
if (FromIter == Map.begin())
return Res;
// Skip instruction entries, to create fallthroughs we are only interested in
// BB boundaries
do {
if (FromIter == Map.begin())
return Res;
--FromIter;
} while (FromIter->second & BRANCHENTRY);
auto ToIter = Map.upper_bound(To);
if (ToIter == Map.begin())
return Res;
--ToIter;
if (FromIter->first >= ToIter->first)
return Res;
for (auto Iter = FromIter; Iter != ToIter;) {
const uint32_t Src = Iter->first;
if (Iter->second & BRANCHENTRY) {
++Iter;
continue;
}
++Iter;
while (Iter->second & BRANCHENTRY && Iter != ToIter)
++Iter;
if (Iter->second & BRANCHENTRY)
break;
Res.emplace_back(Src, Iter->first);
}
return Res;
}
uint64_t BoltAddressTranslation::fetchParentAddress(uint64_t Address) const {
auto Iter = ColdPartSource.find(Address);
if (Iter == ColdPartSource.end())
return 0;
return Iter->second;
}
bool BoltAddressTranslation::enabledFor(
llvm::object::ELFObjectFileBase *InputFile) const {
for (const SectionRef &Section : InputFile->sections()) {
Expected<StringRef> SectionNameOrErr = Section.getName();
if (Error E = SectionNameOrErr.takeError())
continue;
if (SectionNameOrErr.get() == SECTION_NAME)
return true;
}
return false;
}
} // namespace bolt
} // namespace llvm