[MemProf] Extend CallSite information to include potential callees. (#130441)

* Added YAML traits for `CallSiteInfo`
* Updated the `MemProfReader` to pass `Frames` instead of the entire
`CallSiteInfo`
* Updated test cases to use `testing::Field`
* Add YAML sequence traits for CallSiteInfo in MemProfYAML
* Also extend IndexedMemProfRecord
* XFAIL the MemProfYaml round trip test until we update the profile
format

For now we only read and write the additional information from the YAML
format. The YAML round trip test will be enabled when the serialized format is updated.
This commit is contained in:
Snehasish Kumar 2025-03-12 10:55:56 -06:00 committed by GitHub
parent 4518780c3c
commit e1ac57d53a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 151 additions and 60 deletions

View File

@ -342,6 +342,28 @@ using CallStackId = uint64_t;
// A type representing the index into the call stack array.
using LinearCallStackId = uint32_t;
// Holds call site information with indexed frame contents.
struct IndexedCallSiteInfo {
// The call stack ID for this call site
CallStackId CSId = 0;
// The GUIDs of the callees at this call site
SmallVector<GlobalValue::GUID, 1> CalleeGuids;
IndexedCallSiteInfo() = default;
IndexedCallSiteInfo(CallStackId CSId) : CSId(CSId) {}
IndexedCallSiteInfo(CallStackId CSId,
SmallVector<GlobalValue::GUID, 1> CalleeGuids)
: CSId(CSId), CalleeGuids(std::move(CalleeGuids)) {}
bool operator==(const IndexedCallSiteInfo &Other) const {
return CSId == Other.CSId && CalleeGuids == Other.CalleeGuids;
}
bool operator!=(const IndexedCallSiteInfo &Other) const {
return !operator==(Other);
}
};
// Holds allocation information in a space efficient format where frames are
// represented using unique identifiers.
struct IndexedAllocationInfo {
@ -410,7 +432,7 @@ struct IndexedMemProfRecord {
// list of inline locations in bottom-up order i.e. from leaf to root. The
// inline location list may include additional entries, users should pick
// the last entry in the list with the same function GUID.
llvm::SmallVector<CallStackId> CallSiteIds;
llvm::SmallVector<IndexedCallSiteInfo> CallSites;
void clear() { *this = IndexedMemProfRecord(); }
@ -427,7 +449,7 @@ struct IndexedMemProfRecord {
if (Other.AllocSites != AllocSites)
return false;
if (Other.CallSiteIds != CallSiteIds)
if (Other.CallSites != CallSites)
return false;
return true;
}
@ -455,6 +477,29 @@ struct IndexedMemProfRecord {
static GlobalValue::GUID getGUID(const StringRef FunctionName);
};
// Holds call site information with frame contents inline.
struct CallSiteInfo {
// The frames in the call stack
std::vector<Frame> Frames;
// The GUIDs of the callees at this call site
SmallVector<GlobalValue::GUID, 1> CalleeGuids;
CallSiteInfo() = default;
CallSiteInfo(std::vector<Frame> Frames) : Frames(std::move(Frames)) {}
CallSiteInfo(std::vector<Frame> Frames,
SmallVector<GlobalValue::GUID, 1> CalleeGuids)
: Frames(std::move(Frames)), CalleeGuids(std::move(CalleeGuids)) {}
bool operator==(const CallSiteInfo &Other) const {
return Frames == Other.Frames && CalleeGuids == Other.CalleeGuids;
}
bool operator!=(const CallSiteInfo &Other) const {
return !operator==(Other);
}
};
// Holds the memprof profile information for a function. The internal
// representation stores frame contents inline. This representation should
// be used for small amount of temporary, in memory instances.
@ -462,7 +507,7 @@ struct MemProfRecord {
// Same as IndexedMemProfRecord::AllocSites with frame contents inline.
llvm::SmallVector<AllocationInfo> AllocSites;
// Same as IndexedMemProfRecord::CallSites with frame contents inline.
llvm::SmallVector<std::vector<Frame>> CallSites;
llvm::SmallVector<CallSiteInfo> CallSites;
MemProfRecord() = default;
@ -476,8 +521,8 @@ struct MemProfRecord {
if (!CallSites.empty()) {
OS << " CallSites:\n";
for (const std::vector<Frame> &Frames : CallSites) {
for (const Frame &F : Frames) {
for (const CallSiteInfo &CS : CallSites) {
for (const Frame &F : CS.Frames) {
OS << " -\n";
F.printYAML(OS);
}

View File

@ -155,6 +155,14 @@ template <> struct MappingTraits<memprof::AllocationInfo> {
// In YAML, we use GUIDMemProfRecordPair instead of MemProfRecord so that we can
// treat the GUID and the fields within MemProfRecord at the same level as if
// the GUID were part of MemProfRecord.
template <> struct MappingTraits<memprof::CallSiteInfo> {
static void mapping(IO &Io, memprof::CallSiteInfo &CS) {
Io.mapRequired("Frames", CS.Frames);
// Keep this optional to make it easier to write tests.
Io.mapOptional("CalleeGuids", CS.CalleeGuids);
}
};
template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> {
static void mapping(IO &Io, memprof::GUIDMemProfRecordPair &Pair) {
Io.mapRequired("GUID", Pair.GUID);
@ -174,6 +182,7 @@ template <> struct MappingTraits<memprof::AllMemProfData> {
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::Frame)
LLVM_YAML_IS_SEQUENCE_VECTOR(std::vector<memprof::Frame>)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::CallSiteInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
#endif // LLVM_PROFILEDATA_MEMPROFYAML_H_

View File

@ -64,7 +64,7 @@ static size_t serializedSizeV2(const IndexedMemProfRecord &Record,
// The number of callsites we have information for.
Result += sizeof(uint64_t);
// The CallStackId
Result += Record.CallSiteIds.size() * sizeof(CallStackId);
Result += Record.CallSites.size() * sizeof(CallStackId);
return Result;
}
@ -78,7 +78,7 @@ static size_t serializedSizeV3(const IndexedMemProfRecord &Record,
// The number of callsites we have information for.
Result += sizeof(uint64_t);
// The linear call stack ID.
Result += Record.CallSiteIds.size() * sizeof(LinearCallStackId);
Result += Record.CallSites.size() * sizeof(LinearCallStackId);
return Result;
}
@ -106,9 +106,9 @@ static void serializeV2(const IndexedMemProfRecord &Record,
}
// Related contexts.
LE.write<uint64_t>(Record.CallSiteIds.size());
for (const auto &CSId : Record.CallSiteIds)
LE.write<CallStackId>(CSId);
LE.write<uint64_t>(Record.CallSites.size());
for (const auto &CS : Record.CallSites)
LE.write<CallStackId>(CS.CSId);
}
static void serializeV3(
@ -127,10 +127,10 @@ static void serializeV3(
}
// Related contexts.
LE.write<uint64_t>(Record.CallSiteIds.size());
for (const auto &CSId : Record.CallSiteIds) {
assert(MemProfCallStackIndexes.contains(CSId));
LE.write<LinearCallStackId>(MemProfCallStackIndexes[CSId]);
LE.write<uint64_t>(Record.CallSites.size());
for (const auto &CS : Record.CallSites) {
assert(MemProfCallStackIndexes.contains(CS.CSId));
LE.write<LinearCallStackId>(MemProfCallStackIndexes[CS.CSId]);
}
}
@ -170,11 +170,11 @@ static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema,
// Read the callsite information.
const uint64_t NumCtxs =
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
Record.CallSiteIds.reserve(NumCtxs);
Record.CallSites.reserve(NumCtxs);
for (uint64_t J = 0; J < NumCtxs; J++) {
CallStackId CSId =
endian::readNext<CallStackId, llvm::endianness::little>(Ptr);
Record.CallSiteIds.push_back(CSId);
Record.CallSites.emplace_back(CSId);
}
return Record;
@ -202,7 +202,7 @@ static IndexedMemProfRecord deserializeV3(const MemProfSchema &Schema,
// Read the callsite information.
const uint64_t NumCtxs =
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
Record.CallSiteIds.reserve(NumCtxs);
Record.CallSites.reserve(NumCtxs);
for (uint64_t J = 0; J < NumCtxs; J++) {
// We are storing LinearCallStackId in CallSiteIds, which is a vector of
// CallStackId. Assert that CallStackId is no smaller than
@ -210,7 +210,7 @@ static IndexedMemProfRecord deserializeV3(const MemProfSchema &Schema,
static_assert(sizeof(LinearCallStackId) <= sizeof(CallStackId));
LinearCallStackId CSId =
endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr);
Record.CallSiteIds.push_back(CSId);
Record.CallSites.emplace_back(CSId);
}
return Record;
@ -241,9 +241,11 @@ MemProfRecord IndexedMemProfRecord::toMemProfRecord(
Record.AllocSites.push_back(std::move(AI));
}
Record.CallSites.reserve(CallSiteIds.size());
for (CallStackId CSId : CallSiteIds)
Record.CallSites.push_back(Callback(CSId));
Record.CallSites.reserve(CallSites.size());
for (const IndexedCallSiteInfo &CS : CallSites) {
std::vector<Frame> Frames = Callback(CS.CSId);
Record.CallSites.emplace_back(std::move(Frames), CS.CalleeGuids);
}
return Record;
}

View File

@ -521,7 +521,7 @@ Error RawMemProfReader::mapRawProfileToRecords() {
// we insert a new entry for callsite data if we need to.
IndexedMemProfRecord &Record = MemProfData.Records[Id];
for (LocationPtr Loc : Locs)
Record.CallSiteIds.push_back(MemProfData.addCallStack(*Loc));
Record.CallSites.emplace_back(MemProfData.addCallStack(*Loc));
}
return Error::success();
@ -808,10 +808,10 @@ void YAMLMemProfReader::parse(StringRef YAMLData) {
IndexedRecord.AllocSites.emplace_back(CSId, AI.Info);
}
// Populate CallSiteIds.
// Populate CallSites with CalleeGuids.
for (const auto &CallSite : Record.CallSites) {
CallStackId CSId = AddCallStack(CallSite);
IndexedRecord.CallSiteIds.push_back(CSId);
CallStackId CSId = AddCallStack(CallSite.Frames);
IndexedRecord.CallSites.emplace_back(CSId, CallSite.CalleeGuids);
}
MemProfData.Records.try_emplace(GUID, std::move(IndexedRecord));

View File

@ -954,7 +954,7 @@ undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
UndriftCallStack(AS.CallStack);
for (auto &CS : MemProfRec.CallSites)
UndriftCallStack(CS);
UndriftCallStack(CS.Frames);
}
static void
@ -1048,15 +1048,16 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
// Need to record all frames from leaf up to and including this function,
// as any of these may or may not have been inlined at this point.
unsigned Idx = 0;
for (auto &StackFrame : CS) {
for (auto &StackFrame : CS.Frames) {
uint64_t StackId = computeStackId(StackFrame);
LocHashToCallSites[StackId].insert(ArrayRef<Frame>(CS).drop_front(Idx++));
LocHashToCallSites[StackId].insert(
ArrayRef<Frame>(CS.Frames).drop_front(Idx++));
ProfileHasColumns |= StackFrame.Column;
// Once we find this function, we can stop recording.
if (StackFrame.Function == FuncGUID)
break;
}
assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
assert(Idx <= CS.Frames.size() && CS.Frames[Idx - 1].Function == FuncGUID);
}
auto GetOffset = [](const DILocation *DIL) {

View File

@ -20,7 +20,8 @@ HeapProfileRecords:
TotalLifetime: 1000000
TotalLifetimeAccessDensity: 1
CallSites:
- - { Function: _Z3foov, LineOffset: 6, Column: 12, IsInlineFrame: false }
- Frames:
- { Function: _Z3foov, LineOffset: 6, Column: 12, IsInlineFrame: false }
...
;--- memprof-call-site-at-alloc-site.ll

View File

@ -42,16 +42,19 @@ HeapProfileRecords:
- GUID: main
AllocSites: []
CallSites:
- - { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
- Frames:
- { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
- GUID: _ZL2f1v
AllocSites: []
CallSites:
- - { Function: _ZL2f2v, LineOffset: 0, Column: 28, IsInlineFrame: true }
- Frames:
- { Function: _ZL2f2v, LineOffset: 0, Column: 28, IsInlineFrame: true }
- { Function: _ZL2f1v, LineOffset: 0, Column: 54, IsInlineFrame: false }
- GUID: _ZL2f2v
AllocSites: []
CallSites:
- - { Function: _ZL2f2v, LineOffset: 0, Column: 28, IsInlineFrame: true }
- Frames:
- { Function: _ZL2f2v, LineOffset: 0, Column: 28, IsInlineFrame: true }
- { Function: _ZL2f1v, LineOffset: 0, Column: 54, IsInlineFrame: false }
- GUID: _Z2f3v
AllocSites:

View File

@ -35,11 +35,13 @@ HeapProfileRecords:
- GUID: _Z3aaav
AllocSites: []
CallSites:
- - { Function: _Z3aaav, LineOffset: 5, Column: 33, IsInlineFrame: false }
- Frames:
- { Function: _Z3aaav, LineOffset: 5, Column: 33, IsInlineFrame: false }
- GUID: _Z6middlev
AllocSites: []
CallSites:
- - { Function: _Z6middlev, LineOffset: 5, Column: 33, IsInlineFrame: false }
- Frames:
- { Function: _Z6middlev, LineOffset: 5, Column: 33, IsInlineFrame: false }
- GUID: _Z3foov
AllocSites:
- Callstack:
@ -77,7 +79,8 @@ HeapProfileRecords:
- GUID: _Z3bbbv
AllocSites: []
CallSites:
- - { Function: _Z3bbbv, LineOffset: 5, Column: 33, IsInlineFrame: false }
- Frames:
- { Function: _Z3bbbv, LineOffset: 5, Column: 33, IsInlineFrame: false }
...
;--- memprof_undrift.ll
define dso_local ptr @_Z3foov() !dbg !5 {

View File

@ -1,9 +1,11 @@
; REQUIRES: x86_64-linux
; RUN: split-file %s %t
; RUN: llvm-profdata merge %t/memprof-in.yaml -o %t/memprof-out.indexed
; RUN: llvm-profdata show --memory %t/memprof-out.indexed > %t/memprof-out.yaml
; RUN: cmp %t/memprof-in.yaml %t/memprof-out.yaml
; This test is expected to fail until the profile format is updated to handle CalleeGuids.
; XFAIL: *
; Verify that the YAML output is identical to the YAML input.
;--- memprof-in.yaml
---
@ -27,8 +29,12 @@ HeapProfileRecords:
TotalLifetime: 777
TotalLifetimeAccessDensity: 888
CallSites:
- - { Function: 0x5555555555555555, LineOffset: 55, Column: 50, IsInlineFrame: true }
- Frames:
- { Function: 0x5555555555555555, LineOffset: 55, Column: 50, IsInlineFrame: true }
- { Function: 0x6666666666666666, LineOffset: 66, Column: 60, IsInlineFrame: false }
- - { Function: 0x7777777777777777, LineOffset: 77, Column: 70, IsInlineFrame: true }
CalleeGuids: [0x100, 0x200]
- Frames:
- { Function: 0x7777777777777777, LineOffset: 77, Column: 70, IsInlineFrame: true }
- { Function: 0x8888888888888888, LineOffset: 88, Column: 80, IsInlineFrame: false }
CalleeGuids: [0x300]
...

View File

@ -397,7 +397,7 @@ makeRecordV2(std::initializer_list<::llvm::memprof::CallStackId> AllocFrames,
for (const auto &CSId : AllocFrames)
MR.AllocSites.emplace_back(CSId, Block, Schema);
for (const auto &CSId : CallSiteFrames)
MR.CallSiteIds.push_back(CSId);
MR.CallSites.push_back(llvm::memprof::IndexedCallSiteInfo(CSId));
return MR;
}

View File

@ -210,8 +210,10 @@ TEST(MemProf, FillsValue) {
FrameContains("abc", 5U, 30U, false));
EXPECT_THAT(Bar.CallSites,
ElementsAre(ElementsAre(FrameContains("foo", 5U, 30U, true),
FrameContains("bar", 51U, 20U, false))));
ElementsAre(testing::Field(
&CallSiteInfo::Frames,
ElementsAre(FrameContains("foo", 5U, 30U, true),
FrameContains("bar", 51U, 20U, false)))));
// Check the memprof record for xyz.
const llvm::GlobalValue::GUID XyzId = IndexedMemProfRecord::getGUID("xyz");
@ -220,8 +222,10 @@ TEST(MemProf, FillsValue) {
// Expect the entire frame even though in practice we only need the first
// entry here.
EXPECT_THAT(Xyz.CallSites,
ElementsAre(ElementsAre(FrameContains("xyz", 5U, 30U, true),
FrameContains("abc", 5U, 30U, false))));
ElementsAre(testing::Field(
&CallSiteInfo::Frames,
ElementsAre(FrameContains("xyz", 5U, 30U, true),
FrameContains("abc", 5U, 30U, false)))));
// Check the memprof record for abc.
const llvm::GlobalValue::GUID AbcId = IndexedMemProfRecord::getGUID("abc");
@ -229,8 +233,10 @@ TEST(MemProf, FillsValue) {
const MemProfRecord &Abc = Records[AbcId];
EXPECT_TRUE(Abc.AllocSites.empty());
EXPECT_THAT(Abc.CallSites,
ElementsAre(ElementsAre(FrameContains("xyz", 5U, 30U, true),
FrameContains("abc", 5U, 30U, false))));
ElementsAre(testing::Field(
&CallSiteInfo::Frames,
ElementsAre(FrameContains("xyz", 5U, 30U, true),
FrameContains("abc", 5U, 30U, false)))));
}
TEST(MemProf, PortableWrapper) {
@ -273,7 +279,8 @@ TEST(MemProf, RecordSerializationRoundTripVerion2) {
// Use the same info block for both allocation sites.
Record.AllocSites.emplace_back(CSId, Info);
}
Record.CallSiteIds.assign(CallSiteIds);
for (auto CSId : CallSiteIds)
Record.CallSites.push_back(IndexedCallSiteInfo(CSId));
std::string Buffer;
llvm::raw_string_ostream OS(Buffer);
@ -303,7 +310,8 @@ TEST(MemProf, RecordSerializationRoundTripVersion2HotColdSchema) {
// Use the same info block for both allocation sites.
Record.AllocSites.emplace_back(CSId, Info, Schema);
}
Record.CallSiteIds.assign(CallSiteIds);
for (auto CSId : CallSiteIds)
Record.CallSites.push_back(IndexedCallSiteInfo(CSId));
std::bitset<llvm::to_underlying(Meta::Size)> SchemaBitSet;
for (auto Id : Schema)
@ -498,8 +506,8 @@ TEST(MemProf, IndexedMemProfRecordToMemProfRecord) {
IndexedRecord.AllocSites.push_back(AI);
AI.CSId = CS2Id;
IndexedRecord.AllocSites.push_back(AI);
IndexedRecord.CallSiteIds.push_back(CS3Id);
IndexedRecord.CallSiteIds.push_back(CS4Id);
IndexedRecord.CallSites.push_back(IndexedCallSiteInfo(CS3Id));
IndexedRecord.CallSites.push_back(IndexedCallSiteInfo(CS4Id));
IndexedCallstackIdConveter CSIdConv(MemProfData);
@ -513,8 +521,9 @@ TEST(MemProf, IndexedMemProfRecordToMemProfRecord) {
ASSERT_THAT(Record.AllocSites, SizeIs(2));
EXPECT_THAT(Record.AllocSites[0].CallStack, ElementsAre(F1, F2));
EXPECT_THAT(Record.AllocSites[1].CallStack, ElementsAre(F1, F3));
EXPECT_THAT(Record.CallSites,
ElementsAre(ElementsAre(F2, F3), ElementsAre(F2, F4)));
ASSERT_THAT(Record.CallSites, SizeIs(2));
EXPECT_THAT(Record.CallSites[0].Frames, ElementsAre(F2, F3));
EXPECT_THAT(Record.CallSites[1].Frames, ElementsAre(F2, F4));
}
// Populate those fields returned by getHotColdSchema.
@ -690,10 +699,14 @@ HeapProfileRecords:
AllocCount: 666
TotalSize: 555
CallSites:
- - {Function: 0x500, LineOffset: 55, Column: 50, IsInlineFrame: true}
- Frames:
- {Function: 0x500, LineOffset: 55, Column: 50, IsInlineFrame: true}
- {Function: 0x600, LineOffset: 66, Column: 60, IsInlineFrame: false}
- - {Function: 0x700, LineOffset: 77, Column: 70, IsInlineFrame: true}
CalleeGuids: [0x1000, 0x2000]
- Frames:
- {Function: 0x700, LineOffset: 77, Column: 70, IsInlineFrame: true}
- {Function: 0x800, LineOffset: 88, Column: 80, IsInlineFrame: false}
CalleeGuids: [0x3000]
)YAML";
YAMLMemProfReader YAMLReader;
@ -719,11 +732,19 @@ HeapProfileRecords:
ElementsAre(Frame(0x300, 33, 30, false), Frame(0x400, 44, 40, true)));
EXPECT_EQ(Record.AllocSites[1].Info.getAllocCount(), 666U);
EXPECT_EQ(Record.AllocSites[1].Info.getTotalSize(), 555U);
EXPECT_THAT(Record.CallSites,
ElementsAre(ElementsAre(Frame(0x500, 55, 50, true),
Frame(0x600, 66, 60, false)),
ElementsAre(Frame(0x700, 77, 70, true),
Frame(0x800, 88, 80, false))));
EXPECT_THAT(
Record.CallSites,
ElementsAre(
AllOf(testing::Field(&CallSiteInfo::Frames,
ElementsAre(Frame(0x500, 55, 50, true),
Frame(0x600, 66, 60, false))),
testing::Field(&CallSiteInfo::CalleeGuids,
ElementsAre(0x1000, 0x2000))),
AllOf(testing::Field(&CallSiteInfo::Frames,
ElementsAre(Frame(0x700, 77, 70, true),
Frame(0x800, 88, 80, false))),
testing::Field(&CallSiteInfo::CalleeGuids,
ElementsAre(0x3000)))));
}
// Verify that the YAML parser accepts a GUID expressed as a function name.

View File

@ -320,7 +320,7 @@ makeRecordV2(std::initializer_list<CallStackId> AllocFrames,
for (const auto &CSId : AllocFrames)
MR.AllocSites.emplace_back(CSId, Block, Schema);
for (const auto &CSId : CallSiteFrames)
MR.CallSiteIds.push_back(CSId);
MR.CallSites.push_back(IndexedCallSiteInfo(CSId));
return MR;
}