mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-25 18:36:05 +00:00
[memprof] Add Version2 of the indexed MemProf format (#89100)
This patch adds Version2 of the indexed MemProf format. The new format comes with a hash table from CallStackId to actual call stacks llvm::SmallVector<FrameId>. The rest of the format refers to call stacks with CallStackId. This "values + references" model effectively deduplicates call stacks. Without this patch, a large indexed memprof file of mine shrinks from 4.4GB to 1.6GB, a 64% reduction. This patch does not make Version2 generally available yet as I am planning to make a few more changes to the format.
This commit is contained in:
parent
7aad1ee70f
commit
172f6ddfa7
@ -560,6 +560,8 @@ using MemProfRecordHashTable =
|
||||
OnDiskIterableChainedHashTable<memprof::RecordLookupTrait>;
|
||||
using MemProfFrameHashTable =
|
||||
OnDiskIterableChainedHashTable<memprof::FrameLookupTrait>;
|
||||
using MemProfCallStackHashTable =
|
||||
OnDiskIterableChainedHashTable<memprof::CallStackLookupTrait>;
|
||||
|
||||
template <typename HashTableImpl>
|
||||
class InstrProfReaderItaniumRemapper;
|
||||
@ -666,6 +668,8 @@ private:
|
||||
std::unique_ptr<MemProfRecordHashTable> MemProfRecordTable;
|
||||
/// MemProf frame profile data on-disk indexed via frame id.
|
||||
std::unique_ptr<MemProfFrameHashTable> MemProfFrameTable;
|
||||
/// MemProf call stack data on-disk indexed via call stack id.
|
||||
std::unique_ptr<MemProfCallStackHashTable> MemProfCallStackTable;
|
||||
/// VTableNamePtr points to the beginning of compressed vtable names.
|
||||
/// When a symtab is constructed from profiles by llvm-profdata, the list of
|
||||
/// names could be decompressed based on `VTableNamePtr` and
|
||||
|
@ -60,6 +60,10 @@ private:
|
||||
// inline.
|
||||
llvm::MapVector<memprof::FrameId, memprof::Frame> MemProfFrameData;
|
||||
|
||||
// A map to hold call stack id to call stacks.
|
||||
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
|
||||
MemProfCallStackData;
|
||||
|
||||
// List of binary ids.
|
||||
std::vector<llvm::object::BuildID> BinaryIds;
|
||||
|
||||
@ -114,6 +118,12 @@ public:
|
||||
bool addMemProfFrame(const memprof::FrameId, const memprof::Frame &F,
|
||||
function_ref<void(Error)> Warn);
|
||||
|
||||
/// Add a call stack identified by the hash of the contents of the call stack
|
||||
/// in \p CallStack.
|
||||
bool addMemProfCallStack(const memprof::CallStackId CSId,
|
||||
const llvm::SmallVector<memprof::FrameId> &CallStack,
|
||||
function_ref<void(Error)> Warn);
|
||||
|
||||
// Add a binary id to the binary ids list.
|
||||
void addBinaryIds(ArrayRef<llvm::object::BuildID> BIs);
|
||||
|
||||
@ -187,9 +197,12 @@ public:
|
||||
return static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage);
|
||||
}
|
||||
|
||||
// Internal interface for testing purpose only.
|
||||
// Internal interfaces for testing purpose only.
|
||||
void setValueProfDataEndianness(llvm::endianness Endianness);
|
||||
void setOutputSparse(bool Sparse);
|
||||
void setMemProfVersionRequested(memprof::IndexedVersion Version) {
|
||||
MemProfVersionRequested = Version;
|
||||
}
|
||||
// Compute the overlap b/w this object and Other. Program level result is
|
||||
// stored in Overlap and function level result is stored in FuncLevelOverlap.
|
||||
void overlapRecord(NamedInstrProfRecord &&Other, OverlapStats &Overlap,
|
||||
|
@ -29,7 +29,7 @@ enum IndexedVersion : uint64_t {
|
||||
};
|
||||
|
||||
constexpr uint64_t MinimumSupportedVersion = Version0;
|
||||
constexpr uint64_t MaximumSupportedVersion = Version1;
|
||||
constexpr uint64_t MaximumSupportedVersion = Version2;
|
||||
|
||||
// Verify that the minimum and maximum satisfy the obvious constraint.
|
||||
static_assert(MinimumSupportedVersion <= MaximumSupportedVersion);
|
||||
@ -633,6 +633,96 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
// Trait for writing call stacks to the on-disk hash table.
|
||||
class CallStackWriterTrait {
|
||||
public:
|
||||
using key_type = CallStackId;
|
||||
using key_type_ref = CallStackId;
|
||||
|
||||
using data_type = llvm::SmallVector<FrameId>;
|
||||
using data_type_ref = llvm::SmallVector<FrameId> &;
|
||||
|
||||
using hash_value_type = CallStackId;
|
||||
using offset_type = uint64_t;
|
||||
|
||||
static hash_value_type ComputeHash(key_type_ref K) { return K; }
|
||||
|
||||
static std::pair<offset_type, offset_type>
|
||||
EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
|
||||
using namespace support;
|
||||
endian::Writer LE(Out, llvm::endianness::little);
|
||||
offset_type N = sizeof(K);
|
||||
LE.write<offset_type>(N);
|
||||
offset_type M = sizeof(FrameId) * V.size();
|
||||
LE.write<offset_type>(M);
|
||||
return std::make_pair(N, M);
|
||||
}
|
||||
|
||||
void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
|
||||
using namespace support;
|
||||
endian::Writer LE(Out, llvm::endianness::little);
|
||||
LE.write<key_type>(K);
|
||||
}
|
||||
|
||||
void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
|
||||
offset_type /*Unused*/) {
|
||||
using namespace support;
|
||||
endian::Writer LE(Out, llvm::endianness::little);
|
||||
// Emit the frames. We do not explicitly emit the length of the vector
|
||||
// because it can be inferred from the data length.
|
||||
for (FrameId F : V)
|
||||
LE.write<FrameId>(F);
|
||||
}
|
||||
};
|
||||
|
||||
// Trait for reading call stack mappings from the on-disk hash table.
|
||||
class CallStackLookupTrait {
|
||||
public:
|
||||
using data_type = const llvm::SmallVector<FrameId>;
|
||||
using internal_key_type = CallStackId;
|
||||
using external_key_type = CallStackId;
|
||||
using hash_value_type = CallStackId;
|
||||
using offset_type = uint64_t;
|
||||
|
||||
static bool EqualKey(internal_key_type A, internal_key_type B) {
|
||||
return A == B;
|
||||
}
|
||||
static uint64_t GetInternalKey(internal_key_type K) { return K; }
|
||||
static uint64_t GetExternalKey(external_key_type K) { return K; }
|
||||
|
||||
hash_value_type ComputeHash(internal_key_type K) { return K; }
|
||||
|
||||
static std::pair<offset_type, offset_type>
|
||||
ReadKeyDataLength(const unsigned char *&D) {
|
||||
using namespace support;
|
||||
|
||||
offset_type KeyLen =
|
||||
endian::readNext<offset_type, llvm::endianness::little>(D);
|
||||
offset_type DataLen =
|
||||
endian::readNext<offset_type, llvm::endianness::little>(D);
|
||||
return std::make_pair(KeyLen, DataLen);
|
||||
}
|
||||
|
||||
uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
|
||||
using namespace support;
|
||||
return endian::readNext<external_key_type, llvm::endianness::little>(D);
|
||||
}
|
||||
|
||||
data_type ReadData(uint64_t K, const unsigned char *D, offset_type Length) {
|
||||
using namespace support;
|
||||
llvm::SmallVector<FrameId> CS;
|
||||
// Derive the number of frames from the data length.
|
||||
uint64_t NumFrames = Length / sizeof(FrameId);
|
||||
assert(Length % sizeof(FrameId) == 0);
|
||||
CS.reserve(NumFrames);
|
||||
for (size_t I = 0; I != NumFrames; ++I) {
|
||||
FrameId F = endian::readNext<FrameId, llvm::endianness::little>(D);
|
||||
CS.push_back(F);
|
||||
}
|
||||
return CS;
|
||||
}
|
||||
};
|
||||
|
||||
// Compute a CallStackId for a given call stack.
|
||||
CallStackId hashCallStack(ArrayRef<FrameId> CS);
|
||||
|
||||
|
@ -51,6 +51,12 @@ public:
|
||||
return IdToFrame;
|
||||
}
|
||||
|
||||
// Return a const reference to the internal Id to call stacks.
|
||||
const llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> &
|
||||
getCallStacks() const {
|
||||
return CSIdToCallStack;
|
||||
}
|
||||
|
||||
// Return a const reference to the internal function profile data.
|
||||
const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> &
|
||||
getProfileData() const {
|
||||
|
@ -1249,14 +1249,14 @@ Error IndexedInstrProfReader::readHeader() {
|
||||
|
||||
// Read the first 64-bit word, which may be RecordTableOffset in
|
||||
// memprof::MemProfVersion0 or the MemProf version number in
|
||||
// memprof::MemProfVersion1.
|
||||
// memprof::MemProfVersion1 and above.
|
||||
const uint64_t FirstWord =
|
||||
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
|
||||
|
||||
memprof::IndexedVersion Version = memprof::Version0;
|
||||
if (FirstWord == memprof::Version1) {
|
||||
if (FirstWord == memprof::Version1 || FirstWord == memprof::Version2) {
|
||||
// Everything is good. We can proceed to deserialize the rest.
|
||||
Version = memprof::Version1;
|
||||
Version = static_cast<memprof::IndexedVersion>(FirstWord);
|
||||
} else if (FirstWord >= 24) {
|
||||
// This is a heuristic/hack to detect memprof::MemProfVersion0,
|
||||
// which does not have a version field in the header.
|
||||
@ -1286,6 +1286,18 @@ Error IndexedInstrProfReader::readHeader() {
|
||||
const uint64_t FrameTableOffset =
|
||||
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
|
||||
|
||||
// The offset in the stream right before invoking
|
||||
// CallStackTableGenerator.Emit.
|
||||
uint64_t CallStackPayloadOffset = 0;
|
||||
// The value returned from CallStackTableGenerator.Emit.
|
||||
uint64_t CallStackTableOffset = 0;
|
||||
if (Version >= memprof::Version2) {
|
||||
CallStackPayloadOffset =
|
||||
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
|
||||
CallStackTableOffset =
|
||||
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
|
||||
}
|
||||
|
||||
// Read the schema.
|
||||
auto SchemaOr = memprof::readMemProfSchema(Ptr);
|
||||
if (!SchemaOr)
|
||||
@ -1296,7 +1308,7 @@ Error IndexedInstrProfReader::readHeader() {
|
||||
MemProfRecordTable.reset(MemProfRecordHashTable::Create(
|
||||
/*Buckets=*/Start + RecordTableOffset,
|
||||
/*Payload=*/Ptr,
|
||||
/*Base=*/Start, memprof::RecordLookupTrait(memprof::Version1, Schema)));
|
||||
/*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
|
||||
|
||||
// Initialize the frame table reader with the payload and bucket offsets.
|
||||
MemProfFrameTable.reset(MemProfFrameHashTable::Create(
|
||||
@ -1304,12 +1316,22 @@ Error IndexedInstrProfReader::readHeader() {
|
||||
/*Payload=*/Start + FramePayloadOffset,
|
||||
/*Base=*/Start, memprof::FrameLookupTrait()));
|
||||
|
||||
if (Version >= memprof::Version2)
|
||||
MemProfCallStackTable.reset(MemProfCallStackHashTable::Create(
|
||||
/*Buckets=*/Start + CallStackTableOffset,
|
||||
/*Payload=*/Start + CallStackPayloadOffset,
|
||||
/*Base=*/Start, memprof::CallStackLookupTrait()));
|
||||
|
||||
#ifdef EXPENSIVE_CHECKS
|
||||
// Go through all the records and verify that CSId has been correctly
|
||||
// populated. Do this only under EXPENSIVE_CHECKS. Otherwise, we
|
||||
// would defeat the purpose of OnDiskIterableChainedHashTable.
|
||||
for (const auto &Record : MemProfRecordTable->data())
|
||||
verifyIndexedMemProfRecord(Record);
|
||||
// Note that we can compare CSId against actual call stacks only for
|
||||
// Version0 and Version1 because IndexedAllocationInfo::CallStack and
|
||||
// IndexedMemProfRecord::CallSites are not populated in Version2.
|
||||
if (Version <= memprof::Version1)
|
||||
for (const auto &Record : MemProfRecordTable->data())
|
||||
verifyIndexedMemProfRecord(Record);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1502,7 +1524,29 @@ IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) {
|
||||
return *FrIter;
|
||||
};
|
||||
|
||||
memprof::MemProfRecord Record(*Iter, IdToFrameCallback);
|
||||
// Setup a callback to convert call stack ids to call stacks using the on-disk
|
||||
// hash table.
|
||||
std::optional<memprof::CallStackId> LastUnmappedCSId;
|
||||
auto CSIdToCallStackCallback = [&](memprof::CallStackId CSId) {
|
||||
llvm::SmallVector<memprof::Frame> Frames;
|
||||
auto CSIter = MemProfCallStackTable->find(CSId);
|
||||
if (CSIter == MemProfCallStackTable->end()) {
|
||||
LastUnmappedCSId = CSId;
|
||||
} else {
|
||||
const llvm::SmallVector<memprof::FrameId> &CS = *CSIter;
|
||||
Frames.reserve(CS.size());
|
||||
for (memprof::FrameId Id : CS)
|
||||
Frames.push_back(IdToFrameCallback(Id));
|
||||
}
|
||||
return Frames;
|
||||
};
|
||||
|
||||
const memprof::IndexedMemProfRecord IndexedRecord = *Iter;
|
||||
memprof::MemProfRecord Record;
|
||||
if (MemProfCallStackTable)
|
||||
Record = IndexedRecord.toMemProfRecord(CSIdToCallStackCallback);
|
||||
else
|
||||
Record = memprof::MemProfRecord(IndexedRecord, IdToFrameCallback);
|
||||
|
||||
// Check that all frame ids were successfully converted to frames.
|
||||
if (LastUnmappedFrameId) {
|
||||
@ -1510,6 +1554,14 @@ IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) {
|
||||
"memprof frame not found for frame id " +
|
||||
Twine(*LastUnmappedFrameId));
|
||||
}
|
||||
|
||||
// Check that all call stack ids were successfully converted to call stacks.
|
||||
if (LastUnmappedCSId) {
|
||||
return make_error<InstrProfError>(
|
||||
instrprof_error::hash_mismatch,
|
||||
"memprof call stack not found for call stack id " +
|
||||
Twine(*LastUnmappedCSId));
|
||||
}
|
||||
return Record;
|
||||
}
|
||||
|
||||
|
@ -297,6 +297,23 @@ bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool InstrProfWriter::addMemProfCallStack(
|
||||
const memprof::CallStackId CSId,
|
||||
const llvm::SmallVector<memprof::FrameId> &CallStack,
|
||||
function_ref<void(Error)> Warn) {
|
||||
auto [Iter, Inserted] = MemProfCallStackData.insert({CSId, CallStack});
|
||||
// If a mapping already exists for the current call stack id and it does not
|
||||
// match the new mapping provided then reset the existing contents and bail
|
||||
// out. We don't support the merging of memprof data whose CallStack -> Id
|
||||
// mapping across profiles is inconsistent.
|
||||
if (!Inserted && Iter->second != CallStack) {
|
||||
Warn(make_error<InstrProfError>(instrprof_error::malformed,
|
||||
"call stack to id mapping mismatch"));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) {
|
||||
llvm::append_range(BinaryIds, BIs);
|
||||
}
|
||||
@ -378,6 +395,12 @@ void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
|
||||
return;
|
||||
}
|
||||
|
||||
MemProfCallStackData.reserve(IPW.MemProfCallStackData.size());
|
||||
for (auto &[CSId, CallStack] : IPW.MemProfCallStackData) {
|
||||
if (!addMemProfCallStack(CSId, CallStack, Warn))
|
||||
return;
|
||||
}
|
||||
|
||||
MemProfRecordData.reserve(IPW.MemProfRecordData.size());
|
||||
for (auto &I : IPW.MemProfRecordData) {
|
||||
addMemProfRecord(I.first, I.second);
|
||||
@ -427,8 +450,8 @@ static uint64_t writeMemProfRecords(
|
||||
ProfOStream &OS,
|
||||
llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
|
||||
&MemProfRecordData,
|
||||
memprof::MemProfSchema *Schema) {
|
||||
memprof::RecordWriterTrait RecordWriter(memprof::Version1);
|
||||
memprof::MemProfSchema *Schema, memprof::IndexedVersion Version) {
|
||||
memprof::RecordWriterTrait RecordWriter(Version);
|
||||
RecordWriter.Schema = Schema;
|
||||
OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
|
||||
RecordTableGenerator;
|
||||
@ -461,6 +484,20 @@ static uint64_t writeMemProfFrames(
|
||||
return FrameTableGenerator.Emit(OS.OS);
|
||||
}
|
||||
|
||||
static uint64_t writeMemProfCallStacks(
|
||||
ProfOStream &OS,
|
||||
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
|
||||
&MemProfCallStackData) {
|
||||
OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait>
|
||||
CallStackTableGenerator;
|
||||
for (auto &[CSId, CallStack] : MemProfCallStackData)
|
||||
CallStackTableGenerator.insert(CSId, CallStack);
|
||||
// Release the memory of this vector as it is no longer needed.
|
||||
MemProfCallStackData.clear();
|
||||
|
||||
return CallStackTableGenerator.Emit(OS.OS);
|
||||
}
|
||||
|
||||
static Error writeMemProfV0(
|
||||
ProfOStream &OS,
|
||||
llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
|
||||
@ -475,7 +512,7 @@ static Error writeMemProfV0(
|
||||
writeMemProfSchema(OS, Schema);
|
||||
|
||||
uint64_t RecordTableOffset =
|
||||
writeMemProfRecords(OS, MemProfRecordData, &Schema);
|
||||
writeMemProfRecords(OS, MemProfRecordData, &Schema, memprof::Version0);
|
||||
|
||||
uint64_t FramePayloadOffset = OS.tell();
|
||||
uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData);
|
||||
@ -501,7 +538,7 @@ static Error writeMemProfV1(
|
||||
writeMemProfSchema(OS, Schema);
|
||||
|
||||
uint64_t RecordTableOffset =
|
||||
writeMemProfRecords(OS, MemProfRecordData, &Schema);
|
||||
writeMemProfRecords(OS, MemProfRecordData, &Schema, memprof::Version1);
|
||||
|
||||
uint64_t FramePayloadOffset = OS.tell();
|
||||
uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData);
|
||||
@ -512,6 +549,43 @@ static Error writeMemProfV1(
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
static Error writeMemProfV2(
|
||||
ProfOStream &OS,
|
||||
llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
|
||||
&MemProfRecordData,
|
||||
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
|
||||
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
|
||||
&MemProfCallStackData) {
|
||||
OS.write(memprof::Version2);
|
||||
uint64_t HeaderUpdatePos = OS.tell();
|
||||
OS.write(0ULL); // Reserve space for the memprof record table offset.
|
||||
OS.write(0ULL); // Reserve space for the memprof frame payload offset.
|
||||
OS.write(0ULL); // Reserve space for the memprof frame table offset.
|
||||
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
|
||||
OS.write(0ULL); // Reserve space for the memprof call stack table offset.
|
||||
|
||||
auto Schema = memprof::PortableMemInfoBlock::getSchema();
|
||||
writeMemProfSchema(OS, Schema);
|
||||
|
||||
uint64_t RecordTableOffset =
|
||||
writeMemProfRecords(OS, MemProfRecordData, &Schema, memprof::Version2);
|
||||
|
||||
uint64_t FramePayloadOffset = OS.tell();
|
||||
uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData);
|
||||
|
||||
uint64_t CallStackPayloadOffset = OS.tell();
|
||||
uint64_t CallStackTableOffset =
|
||||
writeMemProfCallStacks(OS, MemProfCallStackData);
|
||||
|
||||
uint64_t Header[] = {
|
||||
RecordTableOffset, FramePayloadOffset, FrameTableOffset,
|
||||
CallStackPayloadOffset, CallStackTableOffset,
|
||||
};
|
||||
OS.patch({{HeaderUpdatePos, Header, std::size(Header)}});
|
||||
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
// The MemProf profile data includes a simple schema
|
||||
// with the format described below followed by the hashtable:
|
||||
// uint64_t Version
|
||||
@ -530,6 +604,8 @@ static Error writeMemProf(
|
||||
llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
|
||||
&MemProfRecordData,
|
||||
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
|
||||
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
|
||||
&MemProfCallStackData,
|
||||
memprof::IndexedVersion MemProfVersionRequested) {
|
||||
|
||||
switch (MemProfVersionRequested) {
|
||||
@ -538,8 +614,8 @@ static Error writeMemProf(
|
||||
case memprof::Version1:
|
||||
return writeMemProfV1(OS, MemProfRecordData, MemProfFrameData);
|
||||
case memprof::Version2:
|
||||
// TODO: Implement. Fall through to the error handling below for now.
|
||||
break;
|
||||
return writeMemProfV2(OS, MemProfRecordData, MemProfFrameData,
|
||||
MemProfCallStackData);
|
||||
}
|
||||
|
||||
return make_error<InstrProfError>(
|
||||
@ -658,7 +734,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
|
||||
if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) {
|
||||
MemProfSectionStart = OS.tell();
|
||||
if (auto E = writeMemProf(OS, MemProfRecordData, MemProfFrameData,
|
||||
MemProfVersionRequested))
|
||||
MemProfCallStackData, MemProfVersionRequested))
|
||||
return E;
|
||||
}
|
||||
|
||||
|
@ -13,6 +13,9 @@ RUN: llvm-profdata show %t.prof.v0 | FileCheck %s
|
||||
RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=1 --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v1
|
||||
RUN: llvm-profdata show %t.prof.v1 | FileCheck %s
|
||||
|
||||
RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=2 --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v2
|
||||
RUN: llvm-profdata show %t.prof.v2 | FileCheck %s
|
||||
|
||||
For now we only check the validity of the instrumented profile since we don't
|
||||
have a way to display the contents of the memprof indexed format yet.
|
||||
|
||||
|
@ -305,7 +305,8 @@ cl::opt<memprof::IndexedVersion> MemProfVersionRequested(
|
||||
cl::desc("Specify the version of the memprof format to use"),
|
||||
cl::init(memprof::Version0),
|
||||
cl::values(clEnumValN(memprof::Version0, "0", "version 0"),
|
||||
clEnumValN(memprof::Version1, "1", "version 1")));
|
||||
clEnumValN(memprof::Version1, "1", "version 1"),
|
||||
clEnumValN(memprof::Version2, "2", "version 2")));
|
||||
|
||||
// Options specific to overlap subcommand.
|
||||
cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
|
||||
@ -677,6 +678,18 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
|
||||
if (!Succeeded)
|
||||
return;
|
||||
}
|
||||
|
||||
// Add the call stacks into the writer context.
|
||||
const auto &CSIdToCallStacks = Reader->getCallStacks();
|
||||
for (const auto &I : CSIdToCallStacks) {
|
||||
bool Succeeded = WC->Writer.addMemProfCallStack(
|
||||
/*Id=*/I.first, /*Frame=*/I.getSecond(), MemProfError);
|
||||
// If we weren't able to add the call stacks then it doesn't make sense
|
||||
// to try to add the records from this profile.
|
||||
if (!Succeeded)
|
||||
return;
|
||||
}
|
||||
|
||||
const auto &FunctionProfileData = Reader->getProfileData();
|
||||
// Add the memprof records into the writer context.
|
||||
for (const auto &[GUID, Record] : FunctionProfileData) {
|
||||
|
@ -347,6 +347,9 @@ using ::llvm::memprof::IndexedMemProfRecord;
|
||||
using ::llvm::memprof::MemInfoBlock;
|
||||
using FrameIdMapTy =
|
||||
llvm::DenseMap<::llvm::memprof::FrameId, ::llvm::memprof::Frame>;
|
||||
using CallStackIdMapTy =
|
||||
llvm::DenseMap<::llvm::memprof::CallStackId,
|
||||
::llvm::SmallVector<::llvm::memprof::FrameId>>;
|
||||
|
||||
static FrameIdMapTy getFrameMapping() {
|
||||
FrameIdMapTy Mapping;
|
||||
@ -359,6 +362,14 @@ static FrameIdMapTy getFrameMapping() {
|
||||
return Mapping;
|
||||
}
|
||||
|
||||
static CallStackIdMapTy getCallStackMapping() {
|
||||
CallStackIdMapTy Mapping;
|
||||
Mapping.insert({0x111, {0, 1}});
|
||||
Mapping.insert({0x222, {2, 3}});
|
||||
Mapping.insert({0x333, {4, 5}});
|
||||
return Mapping;
|
||||
}
|
||||
|
||||
IndexedMemProfRecord makeRecord(
|
||||
std::initializer_list<std::initializer_list<::llvm::memprof::FrameId>>
|
||||
AllocFrames,
|
||||
@ -374,6 +385,21 @@ IndexedMemProfRecord makeRecord(
|
||||
return MR;
|
||||
}
|
||||
|
||||
IndexedMemProfRecord
|
||||
makeRecordV2(std::initializer_list<::llvm::memprof::CallStackId> AllocFrames,
|
||||
std::initializer_list<::llvm::memprof::CallStackId> CallSiteFrames,
|
||||
const MemInfoBlock &Block = MemInfoBlock()) {
|
||||
llvm::memprof::IndexedMemProfRecord MR;
|
||||
for (const auto &CSId : AllocFrames)
|
||||
// We don't populate IndexedAllocationInfo::CallStack because we use it only
|
||||
// in Version0 and Version1.
|
||||
MR.AllocSites.emplace_back(::llvm::SmallVector<memprof::FrameId>(), CSId,
|
||||
Block);
|
||||
for (const auto &CSId : CallSiteFrames)
|
||||
MR.CallSiteIds.push_back(CSId);
|
||||
return MR;
|
||||
}
|
||||
|
||||
MATCHER_P(EqualsRecord, Want, "") {
|
||||
const memprof::MemProfRecord &Got = arg;
|
||||
|
||||
@ -408,7 +434,7 @@ MATCHER_P(EqualsRecord, Want, "") {
|
||||
return true;
|
||||
}
|
||||
|
||||
TEST_F(InstrProfTest, test_memprof) {
|
||||
TEST_F(InstrProfTest, test_memprof_v0) {
|
||||
ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
|
||||
Succeeded());
|
||||
|
||||
@ -450,6 +476,67 @@ TEST_F(InstrProfTest, test_memprof) {
|
||||
EXPECT_THAT(WantRecord, EqualsRecord(Record));
|
||||
}
|
||||
|
||||
TEST_F(InstrProfTest, test_memprof_v2) {
|
||||
Writer.setMemProfVersionRequested(memprof::Version2);
|
||||
|
||||
ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
|
||||
Succeeded());
|
||||
|
||||
const IndexedMemProfRecord IndexedMR = makeRecordV2(
|
||||
/*AllocFrames=*/{0x111, 0x222},
|
||||
/*CallSiteFrames=*/{0x333});
|
||||
const FrameIdMapTy IdToFrameMap = getFrameMapping();
|
||||
const auto CSIdToCallStackMap = getCallStackMapping();
|
||||
for (const auto &I : IdToFrameMap) {
|
||||
Writer.addMemProfFrame(I.first, I.getSecond(), Err);
|
||||
}
|
||||
for (const auto &I : CSIdToCallStackMap) {
|
||||
Writer.addMemProfCallStack(I.first, I.getSecond(), Err);
|
||||
}
|
||||
Writer.addMemProfRecord(/*Id=*/0x9999, IndexedMR);
|
||||
|
||||
auto Profile = Writer.writeBuffer();
|
||||
readProfile(std::move(Profile));
|
||||
|
||||
auto RecordOr = Reader->getMemProfRecord(0x9999);
|
||||
ASSERT_THAT_ERROR(RecordOr.takeError(), Succeeded());
|
||||
const memprof::MemProfRecord &Record = RecordOr.get();
|
||||
|
||||
std::optional<memprof::FrameId> LastUnmappedFrameId;
|
||||
auto IdToFrameCallback = [&](const memprof::FrameId Id) {
|
||||
auto Iter = IdToFrameMap.find(Id);
|
||||
if (Iter == IdToFrameMap.end()) {
|
||||
LastUnmappedFrameId = Id;
|
||||
return memprof::Frame(0, 0, 0, false);
|
||||
}
|
||||
return Iter->second;
|
||||
};
|
||||
|
||||
std::optional<::llvm::memprof::CallStackId> LastUnmappedCSId;
|
||||
auto CSIdToCallStackCallback = [&](::llvm::memprof::CallStackId CSId) {
|
||||
llvm::SmallVector<memprof::Frame> Frames;
|
||||
auto CSIter = CSIdToCallStackMap.find(CSId);
|
||||
if (CSIter == CSIdToCallStackMap.end()) {
|
||||
LastUnmappedCSId = CSId;
|
||||
} else {
|
||||
const ::llvm::SmallVector<::llvm::memprof::FrameId> &CS =
|
||||
CSIter->getSecond();
|
||||
Frames.reserve(CS.size());
|
||||
for (::llvm::memprof::FrameId Id : CS)
|
||||
Frames.push_back(IdToFrameCallback(Id));
|
||||
}
|
||||
return Frames;
|
||||
};
|
||||
|
||||
const ::llvm::memprof::MemProfRecord WantRecord =
|
||||
IndexedMR.toMemProfRecord(CSIdToCallStackCallback);
|
||||
ASSERT_EQ(LastUnmappedFrameId, std::nullopt)
|
||||
<< "could not map frame id: " << *LastUnmappedFrameId;
|
||||
ASSERT_EQ(LastUnmappedCSId, std::nullopt)
|
||||
<< "could not map call stack id: " << *LastUnmappedCSId;
|
||||
EXPECT_THAT(WantRecord, EqualsRecord(Record));
|
||||
}
|
||||
|
||||
TEST_F(InstrProfTest, test_memprof_getrecord_error) {
|
||||
ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
|
||||
Succeeded());
|
||||
|
Loading…
x
Reference in New Issue
Block a user