Revert "[BOLT] Add BB index to BAT (#86044)"

This reverts commit 3b3de48fd84b8269d5f45ee0a9dc6b7448368424.
This commit is contained in:
Amir Ayupov 2024-03-22 08:38:40 -07:00
parent 6e28ecd799
commit f66d631bf8
12 changed files with 77 additions and 92 deletions

View File

@ -90,12 +90,11 @@ current function.
### Address translation table
Delta encoding means that only the difference with the previous corresponding
entry is encoded. Input offsets implicitly start at zero.
| Entry | Encoding | Description | Branch/BB |
| ------ | ------| ----------- | ------ |
| `OutputOffset` | Continuous, Delta, ULEB128 | Function offset in output binary | Both |
| `InputOffset` | Optional, Delta, SLEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit | Both |
| `BBHash` | Optional, 8b | Basic block hash in input binary | BB |
| `BBIdx` | Optional, Delta, ULEB128 | Basic block index in input binary | BB |
| Entry | Encoding | Description |
| ------ | ------| ----------- |
| `OutputOffset` | Continuous, Delta, ULEB128 | Function offset in output binary |
| `InputOffset` | Optional, Delta, SLEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit |
| `BBHash` | Optional, 8b | Basic block entries only: basic block hash in input binary |
`BRANCHENTRY` bit denotes whether a given offset pair is a control flow source
(branch or call instruction). If not set, it signifies a control flow target

View File

@ -122,10 +122,6 @@ public:
/// Returns BF hash by function output address (after BOLT).
size_t getBFHash(uint64_t OutputAddress) const;
/// Returns BB index by function output address (after BOLT) and basic block
/// input offset.
unsigned getBBIndex(uint64_t FuncOutputAddress, uint32_t BBInputOffset) const;
/// True if a given \p Address is a function with translation table entry.
bool isBATFunction(uint64_t Address) const { return Maps.count(Address); }
@ -158,8 +154,7 @@ private:
std::map<uint64_t, MapTy> Maps;
/// Map basic block input offset to a basic block index and hash pair.
using BBHashMap = std::unordered_map<uint32_t, std::pair<unsigned, size_t>>;
using BBHashMap = std::unordered_map<uint32_t, size_t>;
std::unordered_map<uint64_t, std::pair<size_t, BBHashMap>> FuncHashes;
/// Links outlined cold bocks to their original function

View File

@ -45,8 +45,6 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n",
getBBHash(HotFuncAddress, BBInputOffset)));
(void)HotFuncAddress;
LLVM_DEBUG(dbgs() << formatv(" Index: {0}\n",
getBBIndex(HotFuncAddress, BBInputOffset)));
// In case of conflicts (same Key mapping to different Vals), the last
// update takes precedence. Of course it is not ideal to have conflicts and
// those happen when we have an empty BB that either contained only
@ -219,7 +217,6 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
}
size_t Index = 0;
uint64_t InOffset = 0;
size_t PrevBBIndex = 0;
// Output and Input addresses and delta-encoded
for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
const uint64_t OutputAddress = KeyVal.first + Address;
@ -229,15 +226,11 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
encodeSLEB128(KeyVal.second - InOffset, OS);
InOffset = KeyVal.second; // Keeping InOffset as if BRANCHENTRY is encoded
if ((InOffset & BRANCHENTRY) == 0) {
unsigned BBIndex;
size_t BBHash;
std::tie(BBIndex, BBHash) = FuncHashPair.second[InOffset >> 1];
// Basic block hash
size_t BBHash = FuncHashPair.second[InOffset >> 1];
OS.write(reinterpret_cast<char *>(&BBHash), 8);
// Basic block index in the input binary
encodeULEB128(BBIndex - PrevBBIndex, OS);
PrevBBIndex = BBIndex;
LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} {2:x} {3}\n", KeyVal.first,
InOffset >> 1, BBHash, BBIndex));
LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} {2:x}\n", KeyVal.first,
InOffset >> 1, BBHash));
}
}
}
@ -323,7 +316,6 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
<< Twine::utohexstr(Address) << "\n");
uint64_t InputOffset = 0;
size_t BBIndex = 0;
for (uint32_t J = 0; J < NumEntries; ++J) {
const uint64_t OutputDelta = DE.getULEB128(&Offset, &Err);
const uint64_t OutputAddress = PrevAddress + OutputDelta;
@ -338,25 +330,19 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
}
Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset));
size_t BBHash = 0;
size_t BBIndexDelta = 0;
const bool IsBranchEntry = InputOffset & BRANCHENTRY;
if (!IsBranchEntry) {
BBHash = DE.getU64(&Offset, &Err);
BBIndexDelta = DE.getULEB128(&Offset, &Err);
BBIndex += BBIndexDelta;
// Map basic block hash to hot fragment by input offset
FuncHashes[HotAddress].second.emplace(InputOffset >> 1,
std::pair(BBIndex, BBHash));
FuncHashes[HotAddress].second.emplace(InputOffset >> 1, BBHash);
}
LLVM_DEBUG({
dbgs() << formatv(
"{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}", OutputOffset,
InputOffset, OutputDelta, getULEB128Size(OutputDelta), InputDelta,
(J < EqualElems) ? 0 : getSLEB128Size(InputDelta), OutputAddress);
if (!IsBranchEntry) {
dbgs() << formatv(" {0:x} {1}/{2}b", BBHash, BBIndex,
getULEB128Size(BBIndexDelta));
}
if (BBHash)
dbgs() << formatv(" {0:x}", BBHash);
dbgs() << '\n';
});
}
@ -508,19 +494,14 @@ void BoltAddressTranslation::saveMetadata(BinaryContext &BC) {
FuncHashes[BF.getAddress()].first = BF.computeHash();
BF.computeBlockHashes();
for (const BinaryBasicBlock &BB : BF)
FuncHashes[BF.getAddress()].second.emplace(
BB.getInputOffset(), std::pair(BB.getIndex(), BB.getHash()));
FuncHashes[BF.getAddress()].second.emplace(BB.getInputOffset(),
BB.getHash());
}
}
unsigned BoltAddressTranslation::getBBIndex(uint64_t FuncOutputAddress,
uint32_t BBInputOffset) const {
return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset).first;
}
size_t BoltAddressTranslation::getBBHash(uint64_t FuncOutputAddress,
uint32_t BBInputOffset) const {
return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset).second;
return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset);
}
size_t BoltAddressTranslation::getBFHash(uint64_t OutputAddress) const {

View File

@ -18,7 +18,7 @@ RUN: | FileCheck --check-prefix CHECK-BOLT-YAML %s
WRITE-BAT-CHECK: BOLT-INFO: Wrote 5 BAT maps
WRITE-BAT-CHECK: BOLT-INFO: Wrote 4 function and 22 basic block hashes
WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 376
WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 344
READ-BAT-CHECK-NOT: BOLT-ERROR: unable to save profile in YAML format for input file processed by BOLT
READ-BAT-CHECK: BOLT-INFO: Parsed 5 BAT entries

View File

@ -37,7 +37,7 @@
# CHECK: BOLT: 3 out of 7 functions were overwritten.
# CHECK: BOLT-INFO: Wrote 6 BAT maps
# CHECK: BOLT-INFO: Wrote 3 function and 58 basic block hashes
# CHECK: BOLT-INFO: BAT section size (bytes): 920
# CHECK: BOLT-INFO: BAT section size (bytes): 816
#
# usqrt mappings (hot part). We match against any key (left side containing
# the bolted binary offsets) because BOLT may change where it puts instructions

View File

@ -5863,8 +5863,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
} else if (Triple.getArch() == llvm::Triple::x86_64) {
Ok = llvm::is_contained({"small", "kernel", "medium", "large", "tiny"},
CM);
} else if (Triple.isNVPTX() || Triple.isAMDGPU()) {
// NVPTX/AMDGPU does not care about the code model and will accept
} else if (Triple.isNVPTX() || Triple.isAMDGPU() || Triple.isSPIRV()) {
// NVPTX/AMDGPU/SPIRV does not care about the code model and will accept
// whatever works for the host.
Ok = true;
} else if (Triple.isSPARC64()) {

View File

@ -2,4 +2,5 @@
// DEFINE: %{check} = %clang -### --target=x86_64-linux-gnu -c -mcmodel=medium
// RUN: %{check} -x cuda %s --cuda-path=%S/Inputs/CUDA/usr/local/cuda --offload-arch=sm_60 --no-cuda-version-check -fbasic-block-sections=all
// RUN: %{check} -x hip %s --offload=spirv64 -nogpulib -nogpuinc
// RUN: %{check} -x hip %s --rocm-path=%S/Inputs/rocm -nogpulib -nogpuinc

View File

@ -612,7 +612,7 @@ static void replaceCommonSymbols() {
if (!osec)
osec = ConcatOutputSection::getOrCreateForInput(isec);
isec->parent = osec;
inputSections.push_back(isec);
addInputSection(isec);
// FIXME: CommonSymbol should store isReferencedDynamically, noDeadStrip
// and pass them on here.
@ -1220,53 +1220,18 @@ static void createFiles(const InputArgList &args) {
static void gatherInputSections() {
TimeTraceScope timeScope("Gathering input sections");
int inputOrder = 0;
for (const InputFile *file : inputFiles) {
for (const Section *section : file->sections) {
// Compact unwind entries require special handling elsewhere. (In
// contrast, EH frames are handled like regular ConcatInputSections.)
if (section->name == section_names::compactUnwind)
continue;
ConcatOutputSection *osec = nullptr;
for (const Subsection &subsection : section->subsections) {
if (auto *isec = dyn_cast<ConcatInputSection>(subsection.isec)) {
if (isec->isCoalescedWeak())
continue;
if (config->emitInitOffsets &&
sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) {
in.initOffsets->addInput(isec);
continue;
}
isec->outSecOff = inputOrder++;
if (!osec)
osec = ConcatOutputSection::getOrCreateForInput(isec);
isec->parent = osec;
inputSections.push_back(isec);
} else if (auto *isec =
dyn_cast<CStringInputSection>(subsection.isec)) {
if (isec->getName() == section_names::objcMethname) {
if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder)
in.objcMethnameSection->inputOrder = inputOrder++;
in.objcMethnameSection->addInput(isec);
} else {
if (in.cStringSection->inputOrder == UnspecifiedInputOrder)
in.cStringSection->inputOrder = inputOrder++;
in.cStringSection->addInput(isec);
}
} else if (auto *isec =
dyn_cast<WordLiteralInputSection>(subsection.isec)) {
if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder)
in.wordLiteralSection->inputOrder = inputOrder++;
in.wordLiteralSection->addInput(isec);
} else {
llvm_unreachable("unexpected input section kind");
}
}
for (const Subsection &subsection : section->subsections)
addInputSection(subsection.isec);
}
if (!file->objCImageInfo.empty())
in.objCImageInfo->addFile(file);
}
assert(inputOrder <= UnspecifiedInputOrder);
}
static void foldIdenticalLiterals() {
@ -1422,6 +1387,7 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
concatOutputSections.clear();
inputFiles.clear();
inputSections.clear();
inputSectionsOrder = 0;
loadedArchives.clear();
loadedObjectFrameworks.clear();
missingAutolinkWarnings.clear();

View File

@ -37,6 +37,44 @@ static_assert(sizeof(void *) != 8 ||
"instances of it");
std::vector<ConcatInputSection *> macho::inputSections;
int macho::inputSectionsOrder = 0;
// Call this function to add a new InputSection and have it routed to the
// appropriate container. Depending on its type and current config, it will
// either be added to 'inputSections' vector or to a synthetic section.
void lld::macho::addInputSection(InputSection *inputSection) {
if (auto *isec = dyn_cast<ConcatInputSection>(inputSection)) {
if (isec->isCoalescedWeak())
return;
if (config->emitInitOffsets &&
sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) {
in.initOffsets->addInput(isec);
return;
}
isec->outSecOff = inputSectionsOrder++;
auto *osec = ConcatOutputSection::getOrCreateForInput(isec);
isec->parent = osec;
inputSections.push_back(isec);
} else if (auto *isec = dyn_cast<CStringInputSection>(inputSection)) {
if (isec->getName() == section_names::objcMethname) {
if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder)
in.objcMethnameSection->inputOrder = inputSectionsOrder++;
in.objcMethnameSection->addInput(isec);
} else {
if (in.cStringSection->inputOrder == UnspecifiedInputOrder)
in.cStringSection->inputOrder = inputSectionsOrder++;
in.cStringSection->addInput(isec);
}
} else if (auto *isec = dyn_cast<WordLiteralInputSection>(inputSection)) {
if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder)
in.wordLiteralSection->inputOrder = inputSectionsOrder++;
in.wordLiteralSection->addInput(isec);
} else {
llvm_unreachable("unexpected input section kind");
}
assert(inputSectionsOrder <= UnspecifiedInputOrder);
}
uint64_t InputSection::getFileSize() const {
return isZeroFill(getFlags()) ? 0 : getSize();

View File

@ -302,6 +302,8 @@ bool isEhFrameSection(const InputSection *);
bool isGccExceptTabSection(const InputSection *);
extern std::vector<ConcatInputSection *> inputSections;
// This is used as a counter for specyfing input order for input sections
extern int inputSectionsOrder;
namespace section_names {
@ -369,6 +371,7 @@ constexpr const char addrSig[] = "__llvm_addrsig";
} // namespace section_names
void addInputSection(InputSection *inputSection);
} // namespace macho
std::string toString(const macho::InputSection *);

View File

@ -790,7 +790,7 @@ void ObjcCategoryMerger::emitAndLinkProtocolList(
infoCategoryWriter.catPtrListInfo.align);
listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
listSec->live = true;
allInputSections.push_back(listSec);
addInputSection(listSec);
listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
@ -848,7 +848,7 @@ void ObjcCategoryMerger::emitAndLinkPointerList(
infoCategoryWriter.catPtrListInfo.align);
listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
listSec->live = true;
allInputSections.push_back(listSec);
addInputSection(listSec);
listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
@ -889,7 +889,7 @@ ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCateogryName,
bodyData, infoCategoryWriter.catListInfo.align);
newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
newCatList->live = true;
allInputSections.push_back(newCatList);
addInputSection(newCatList);
newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
@ -927,7 +927,7 @@ Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
bodyData, infoCategoryWriter.catBodyInfo.align);
newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
newBodySec->live = true;
allInputSections.push_back(newBodySec);
addInputSection(newBodySec);
std::string symName =
objc::symbol_names::category + baseClassName + "_$_(" + name + ")";
@ -1132,7 +1132,7 @@ void ObjcCategoryMerger::generateCatListForNonErasedCategories(
infoCategoryWriter.catListInfo.align);
listSec->parent = infoCategoryWriter.catListInfo.outputSection;
listSec->live = true;
allInputSections.push_back(listSec);
addInputSection(listSec);
std::string slotSymName = "<__objc_catlist slot for category ";
slotSymName += nonErasedCatBody->getName();
@ -1221,9 +1221,11 @@ void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); }
StringRef ObjcCategoryMerger::newStringData(const char *str) {
uint32_t len = strlen(str);
auto &data = newSectionData(len + 1);
uint32_t bufSize = len + 1;
auto &data = newSectionData(bufSize);
char *strData = reinterpret_cast<char *>(data.data());
strncpy(strData, str, len);
// Copy the string chars and null-terminator
memcpy(strData, str, bufSize);
return StringRef(strData, len);
}

View File

@ -793,7 +793,7 @@ void StubHelperSection::setUp() {
in.imageLoaderCache->parent =
ConcatOutputSection::getOrCreateForInput(in.imageLoaderCache);
inputSections.push_back(in.imageLoaderCache);
addInputSection(in.imageLoaderCache);
// Since this isn't in the symbol table or in any input file, the noDeadStrip
// argument doesn't matter.
dyldPrivate =
@ -855,7 +855,7 @@ ConcatInputSection *ObjCSelRefsSection::makeSelRef(StringRef methname) {
/*addend=*/static_cast<int64_t>(methnameOffset),
/*referent=*/in.objcMethnameSection->isec});
objcSelref->parent = ConcatOutputSection::getOrCreateForInput(objcSelref);
inputSections.push_back(objcSelref);
addInputSection(objcSelref);
objcSelref->isFinal = true;
methnameToSelref[CachedHashStringRef(methname)] = objcSelref;
return objcSelref;