mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-29 13:16:06 +00:00

This is the followup patch to https://reviews.llvm.org/D125246 for the `SampleContextTracker` part. Before the promotion and merging of the context is based on the SampleContext(the array of frame), this causes a lot of cost to the memory. This patch detaches the tracker from using the array ref instead to use the context trie itself. This can save a lot of memory usage and benefit both the compiler's CS inliner and llvm-profgen's pre-inliner. One structure needs to be specially treated is the `FuncToCtxtProfiles`, this is used to get all the functionSamples for one function to do the merging and promoting. Before it search each functions' context and traverse the trie to get the node of the context. Now we don't have the context inside the profile, instead we directly use an auxiliary map `ProfileToNodeMap` for profile , it initialize to create the FunctionSamples to TrieNode relations and keep updating it during promoting and merging the node. Moreover, I was expecting the results before and after remain the same, but I found that the order of FuncToCtxtProfiles matter and affect the results. This can happen on recursive context case, but the difference should be small. Now we don't have the context, so I just used a vector for the order, the result is still deterministic. Measured on one huge size(12GB) profile from one of our internal service. The profile similarity difference is 99.999%, and the running time is improved by 3X(debug mode) and the memory is reduced from 170GB to 90GB. Reviewed By: hoy, wenlei Differential Revision: https://reviews.llvm.org/D127031
91 lines
3.3 KiB
C++
91 lines
3.3 KiB
C++
//===-- CSPreInliner.h - Profile guided preinliner ---------------- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_TOOLS_LLVM_PROFGEN_PGOINLINEADVISOR_H
|
|
#define LLVM_TOOLS_LLVM_PROFGEN_PGOINLINEADVISOR_H
|
|
|
|
#include "ProfiledBinary.h"
|
|
#include "llvm/ADT/PriorityQueue.h"
|
|
#include "llvm/ProfileData/ProfileCommon.h"
|
|
#include "llvm/ProfileData/SampleProf.h"
|
|
#include "llvm/Transforms/IPO/ProfiledCallGraph.h"
|
|
#include "llvm/Transforms/IPO/SampleContextTracker.h"
|
|
|
|
using namespace llvm;
|
|
using namespace sampleprof;
|
|
|
|
namespace llvm {
|
|
namespace sampleprof {
|
|
|
|
// Inline candidate seen from profile
|
|
struct ProfiledInlineCandidate {
|
|
ProfiledInlineCandidate(const FunctionSamples *Samples, uint64_t Count,
|
|
uint32_t Size)
|
|
: CalleeSamples(Samples), CallsiteCount(Count), SizeCost(Size) {}
|
|
// Context-sensitive function profile for inline candidate
|
|
const FunctionSamples *CalleeSamples;
|
|
// Call site count for an inline candidate
|
|
// TODO: make sure entry count for context profile and call site
|
|
// target count for corresponding call are consistent.
|
|
uint64_t CallsiteCount;
|
|
// Size proxy for function under particular call context.
|
|
uint64_t SizeCost;
|
|
};
|
|
|
|
// Inline candidate comparer using call site weight
|
|
struct ProfiledCandidateComparer {
|
|
bool operator()(const ProfiledInlineCandidate &LHS,
|
|
const ProfiledInlineCandidate &RHS) {
|
|
if (LHS.CallsiteCount != RHS.CallsiteCount)
|
|
return LHS.CallsiteCount < RHS.CallsiteCount;
|
|
|
|
if (LHS.SizeCost != RHS.SizeCost)
|
|
return LHS.SizeCost > RHS.SizeCost;
|
|
|
|
// Tie breaker using GUID so we have stable/deterministic inlining order
|
|
assert(LHS.CalleeSamples && RHS.CalleeSamples &&
|
|
"Expect non-null FunctionSamples");
|
|
return LHS.CalleeSamples->getGUID(LHS.CalleeSamples->getName()) <
|
|
RHS.CalleeSamples->getGUID(RHS.CalleeSamples->getName());
|
|
}
|
|
};
|
|
|
|
using ProfiledCandidateQueue =
|
|
PriorityQueue<ProfiledInlineCandidate, std::vector<ProfiledInlineCandidate>,
|
|
ProfiledCandidateComparer>;
|
|
|
|
// Pre-compilation inliner based on context-sensitive profile.
|
|
// The PreInliner estimates inline decision using hotness from profile
|
|
// and cost estimation from machine code size. It helps merges context
|
|
// profile globally and achieves better post-inine profile quality, which
|
|
// otherwise won't be possible for ThinLTO. It also reduce context profile
|
|
// size by only keep context that is estimated to be inlined.
|
|
class CSPreInliner {
|
|
public:
|
|
CSPreInliner(SampleContextTracker &Tracker, ProfiledBinary &Binary,
|
|
ProfileSummary *Summary);
|
|
void run();
|
|
|
|
private:
|
|
bool getInlineCandidates(ProfiledCandidateQueue &CQueue,
|
|
const FunctionSamples *FCallerContextSamples);
|
|
std::vector<StringRef> buildTopDownOrder();
|
|
void processFunction(StringRef Name);
|
|
bool shouldInline(ProfiledInlineCandidate &Candidate);
|
|
uint32_t getFuncSize(const ContextTrieNode *ContextNode);
|
|
bool UseContextCost;
|
|
SampleContextTracker &ContextTracker;
|
|
ProfiledBinary &Binary;
|
|
ProfileSummary *Summary;
|
|
};
|
|
|
|
} // end namespace sampleprof
|
|
} // end namespace llvm
|
|
|
|
#endif
|