2014-10-30 18:00:06 +00:00
|
|
|
//===- SampleProfWriter.cpp - Write LLVM sample profile data --------------===//
|
|
|
|
//
|
2019-01-19 08:50:56 +00:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2014-10-30 18:00:06 +00:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file implements the class that writes LLVM sample profiles. It
|
|
|
|
// supports two file formats: text and binary. The textual representation
|
|
|
|
// is useful for debugging and testing purposes. The binary representation
|
|
|
|
// is more compact, resulting in smaller file sizes. However, they can
|
|
|
|
// both be used interchangeably.
|
|
|
|
//
|
|
|
|
// See lib/ProfileData/SampleProfReader.cpp for documentation on each of the
|
|
|
|
// supported formats.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2017-06-06 11:49:48 +00:00
|
|
|
#include "llvm/ProfileData/SampleProfWriter.h"
|
2017-03-03 01:07:34 +00:00
|
|
|
#include "llvm/ADT/StringRef.h"
|
|
|
|
#include "llvm/ProfileData/ProfileCommon.h"
|
|
|
|
#include "llvm/ProfileData/SampleProf.h"
|
2019-10-07 16:12:37 +00:00
|
|
|
#include "llvm/Support/Compression.h"
|
2018-09-14 20:52:59 +00:00
|
|
|
#include "llvm/Support/Endian.h"
|
|
|
|
#include "llvm/Support/EndianStream.h"
|
2014-10-30 18:00:06 +00:00
|
|
|
#include "llvm/Support/ErrorOr.h"
|
2017-03-03 01:07:34 +00:00
|
|
|
#include "llvm/Support/FileSystem.h"
|
2014-10-30 18:00:06 +00:00
|
|
|
#include "llvm/Support/LEB128.h"
|
2018-06-11 22:40:43 +00:00
|
|
|
#include "llvm/Support/MD5.h"
|
2017-03-03 01:07:34 +00:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
#include <algorithm>
|
2023-02-08 02:59:12 +00:00
|
|
|
#include <cmath>
|
2017-03-03 01:07:34 +00:00
|
|
|
#include <cstdint>
|
|
|
|
#include <memory>
|
2017-05-11 23:43:44 +00:00
|
|
|
#include <set>
|
2017-03-03 01:07:34 +00:00
|
|
|
#include <system_error>
|
|
|
|
#include <utility>
|
|
|
|
#include <vector>
|
2014-10-30 18:00:06 +00:00
|
|
|
|
2023-02-08 02:59:12 +00:00
|
|
|
#define DEBUG_TYPE "llvm-profdata"
|
|
|
|
|
2014-10-30 18:00:06 +00:00
|
|
|
using namespace llvm;
|
2017-03-03 01:07:34 +00:00
|
|
|
using namespace sampleprof;
|
2014-10-30 18:00:06 +00:00
|
|
|
|
2023-02-08 02:59:12 +00:00
|
|
|
namespace llvm {
|
|
|
|
namespace support {
|
|
|
|
namespace endian {
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
// Adapter class to llvm::support::endian::Writer for pwrite().
|
|
|
|
struct SeekableWriter {
|
|
|
|
raw_pwrite_stream &OS;
|
|
|
|
endianness Endian;
|
|
|
|
SeekableWriter(raw_pwrite_stream &OS, endianness Endian)
|
|
|
|
: OS(OS), Endian(Endian) {}
|
|
|
|
|
|
|
|
template <typename ValueType>
|
|
|
|
void pwrite(ValueType Val, size_t Offset) {
|
|
|
|
std::string StringBuf;
|
|
|
|
raw_string_ostream SStream(StringBuf);
|
|
|
|
Writer(SStream, Endian).write(Val);
|
|
|
|
OS.pwrite(StringBuf.data(), StringBuf.size(), Offset);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
} // namespace endian
|
|
|
|
} // namespace support
|
|
|
|
} // namespace llvm
|
|
|
|
|
|
|
|
DefaultFunctionPruningStrategy::DefaultFunctionPruningStrategy(
|
|
|
|
SampleProfileMap &ProfileMap, size_t OutputSizeLimit)
|
|
|
|
: FunctionPruningStrategy(ProfileMap, OutputSizeLimit) {
|
|
|
|
sortFuncProfiles(ProfileMap, SortedFunctions);
|
|
|
|
}
|
|
|
|
|
|
|
|
void DefaultFunctionPruningStrategy::Erase(size_t CurrentOutputSize) {
|
|
|
|
double D = (double)OutputSizeLimit / CurrentOutputSize;
|
|
|
|
size_t NewSize = (size_t)round(ProfileMap.size() * D * D);
|
|
|
|
size_t NumToRemove = ProfileMap.size() - NewSize;
|
|
|
|
if (NumToRemove < 1)
|
|
|
|
NumToRemove = 1;
|
|
|
|
|
|
|
|
assert(NumToRemove <= SortedFunctions.size());
|
|
|
|
llvm::for_each(
|
|
|
|
llvm::make_range(SortedFunctions.begin() + SortedFunctions.size() -
|
|
|
|
NumToRemove,
|
|
|
|
SortedFunctions.end()),
|
|
|
|
[&](const NameFunctionSamples &E) { ProfileMap.erase(E.first); });
|
|
|
|
SortedFunctions.resize(SortedFunctions.size() - NumToRemove);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::error_code SampleProfileWriter::writeWithSizeLimitInternal(
|
|
|
|
SampleProfileMap &ProfileMap, size_t OutputSizeLimit,
|
|
|
|
FunctionPruningStrategy *Strategy) {
|
|
|
|
if (OutputSizeLimit == 0)
|
|
|
|
return write(ProfileMap);
|
|
|
|
|
|
|
|
size_t OriginalFunctionCount = ProfileMap.size();
|
|
|
|
|
|
|
|
std::unique_ptr<raw_ostream> OriginalOutputStream;
|
|
|
|
OutputStream.swap(OriginalOutputStream);
|
|
|
|
|
|
|
|
size_t IterationCount = 0;
|
|
|
|
size_t TotalSize;
|
|
|
|
|
|
|
|
SmallVector<char> StringBuffer;
|
|
|
|
do {
|
|
|
|
StringBuffer.clear();
|
|
|
|
OutputStream.reset(new raw_svector_ostream(StringBuffer));
|
|
|
|
if (std::error_code EC = write(ProfileMap))
|
|
|
|
return EC;
|
|
|
|
|
|
|
|
TotalSize = StringBuffer.size();
|
|
|
|
// On Windows every "\n" is actually written as "\r\n" to disk but not to
|
|
|
|
// memory buffer, this difference should be added when considering the total
|
|
|
|
// output size.
|
|
|
|
#ifdef _WIN32
|
|
|
|
if (Format == SPF_Text)
|
|
|
|
TotalSize += LineCount;
|
|
|
|
#endif
|
|
|
|
if (TotalSize <= OutputSizeLimit)
|
|
|
|
break;
|
|
|
|
|
|
|
|
Strategy->Erase(TotalSize);
|
|
|
|
IterationCount++;
|
|
|
|
} while (ProfileMap.size() != 0);
|
|
|
|
|
|
|
|
if (ProfileMap.size() == 0)
|
|
|
|
return sampleprof_error::too_large;
|
|
|
|
|
|
|
|
OutputStream.swap(OriginalOutputStream);
|
|
|
|
OutputStream->write(StringBuffer.data(), StringBuffer.size());
|
|
|
|
LLVM_DEBUG(dbgs() << "Profile originally has " << OriginalFunctionCount
|
|
|
|
<< " functions, reduced to " << ProfileMap.size() << " in "
|
|
|
|
<< IterationCount << " iterations\n");
|
|
|
|
// Silence warning on Release build.
|
|
|
|
(void)OriginalFunctionCount;
|
|
|
|
(void)IterationCount;
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
std::error_code
|
|
|
|
SampleProfileWriter::writeFuncProfiles(const SampleProfileMap &ProfileMap) {
|
2017-05-11 23:43:44 +00:00
|
|
|
std::vector<NameFunctionSamples> V;
|
2021-08-16 14:17:43 -07:00
|
|
|
sortFuncProfiles(ProfileMap, V);
|
2017-05-11 23:43:44 +00:00
|
|
|
for (const auto &I : V) {
|
2019-08-23 19:05:30 +00:00
|
|
|
if (std::error_code EC = writeSample(*I.second))
|
2017-05-11 23:43:44 +00:00
|
|
|
return EC;
|
|
|
|
}
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
std::error_code SampleProfileWriter::write(const SampleProfileMap &ProfileMap) {
|
2019-08-23 19:05:30 +00:00
|
|
|
if (std::error_code EC = writeHeader(ProfileMap))
|
|
|
|
return EC;
|
|
|
|
|
|
|
|
if (std::error_code EC = writeFuncProfiles(ProfileMap))
|
|
|
|
return EC;
|
|
|
|
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Return the current position and prepare to use it as the start
|
2020-12-14 14:49:20 -08:00
|
|
|
/// position of a section given the section type \p Type and its position
|
|
|
|
/// \p LayoutIdx in SectionHdrLayout.
|
|
|
|
uint64_t
|
|
|
|
SampleProfileWriterExtBinaryBase::markSectionStart(SecType Type,
|
|
|
|
uint32_t LayoutIdx) {
|
2019-10-07 16:12:37 +00:00
|
|
|
uint64_t SectionStart = OutputStream->tell();
|
2020-12-14 14:49:20 -08:00
|
|
|
assert(LayoutIdx < SectionHdrLayout.size() && "LayoutIdx out of range");
|
|
|
|
const auto &Entry = SectionHdrLayout[LayoutIdx];
|
|
|
|
assert(Entry.Type == Type && "Unexpected section type");
|
2019-10-07 16:12:37 +00:00
|
|
|
// Use LocalBuf as a temporary output for writting data.
|
2020-03-03 13:19:32 -08:00
|
|
|
if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
|
2019-10-07 16:12:37 +00:00
|
|
|
LocalBufStream.swap(OutputStream);
|
|
|
|
return SectionStart;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::compressAndOutput() {
|
2022-07-08 11:19:05 -07:00
|
|
|
if (!llvm::compression::zlib::isAvailable())
|
2019-10-07 16:12:37 +00:00
|
|
|
return sampleprof_error::zlib_unavailable;
|
|
|
|
std::string &UncompressedStrings =
|
|
|
|
static_cast<raw_string_ostream *>(LocalBufStream.get())->str();
|
|
|
|
if (UncompressedStrings.size() == 0)
|
|
|
|
return sampleprof_error::success;
|
|
|
|
auto &OS = *OutputStream;
|
2022-07-13 16:26:54 -07:00
|
|
|
SmallVector<uint8_t, 128> CompressedStrings;
|
|
|
|
compression::zlib::compress(arrayRefFromStringRef(UncompressedStrings),
|
|
|
|
CompressedStrings,
|
2022-07-08 11:19:05 -07:00
|
|
|
compression::zlib::BestSizeCompression);
|
2019-10-07 16:12:37 +00:00
|
|
|
encodeULEB128(UncompressedStrings.size(), OS);
|
|
|
|
encodeULEB128(CompressedStrings.size(), OS);
|
2022-07-13 16:26:54 -07:00
|
|
|
OS << toStringRef(CompressedStrings);
|
2019-10-07 16:12:37 +00:00
|
|
|
UncompressedStrings.clear();
|
|
|
|
return sampleprof_error::success;
|
2019-08-23 19:05:30 +00:00
|
|
|
}
|
|
|
|
|
2020-12-14 14:49:20 -08:00
|
|
|
/// Add a new section into section header table given the section type
|
|
|
|
/// \p Type, its position \p LayoutIdx in SectionHdrLayout and the
|
|
|
|
/// location \p SectionStart where the section should be written to.
|
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::addNewSection(
|
|
|
|
SecType Type, uint32_t LayoutIdx, uint64_t SectionStart) {
|
|
|
|
assert(LayoutIdx < SectionHdrLayout.size() && "LayoutIdx out of range");
|
|
|
|
const auto &Entry = SectionHdrLayout[LayoutIdx];
|
|
|
|
assert(Entry.Type == Type && "Unexpected section type");
|
2020-03-03 13:19:32 -08:00
|
|
|
if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress)) {
|
2019-10-07 16:12:37 +00:00
|
|
|
LocalBufStream.swap(OutputStream);
|
|
|
|
if (std::error_code EC = compressAndOutput())
|
|
|
|
return EC;
|
|
|
|
}
|
|
|
|
SecHdrTable.push_back({Type, Entry.Flags, SectionStart - FileStart,
|
2020-12-14 14:49:20 -08:00
|
|
|
OutputStream->tell() - SectionStart, LayoutIdx});
|
2019-10-07 16:12:37 +00:00
|
|
|
return sampleprof_error::success;
|
2019-08-23 19:05:30 +00:00
|
|
|
}
|
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
std::error_code
|
|
|
|
SampleProfileWriterExtBinaryBase::write(const SampleProfileMap &ProfileMap) {
|
2023-02-08 02:59:12 +00:00
|
|
|
// When calling write on a different profile map, existing states should be
|
|
|
|
// cleared.
|
|
|
|
NameTable.clear();
|
|
|
|
CSNameTable.clear();
|
|
|
|
SecHdrTable.clear();
|
|
|
|
|
2019-08-23 19:05:30 +00:00
|
|
|
if (std::error_code EC = writeHeader(ProfileMap))
|
|
|
|
return EC;
|
|
|
|
|
2019-10-07 16:12:37 +00:00
|
|
|
std::string LocalBuf;
|
|
|
|
LocalBufStream = std::make_unique<raw_string_ostream>(LocalBuf);
|
2019-08-23 19:05:30 +00:00
|
|
|
if (std::error_code EC = writeSections(ProfileMap))
|
|
|
|
return EC;
|
|
|
|
|
|
|
|
if (std::error_code EC = writeSecHdrTable())
|
|
|
|
return EC;
|
|
|
|
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::writeContextIdx(
|
|
|
|
const SampleContext &Context) {
|
|
|
|
if (Context.hasContext())
|
|
|
|
return writeCSNameIdx(Context);
|
|
|
|
else
|
|
|
|
return SampleProfileWriterBinary::writeNameIdx(Context.getName());
|
|
|
|
}
|
|
|
|
|
|
|
|
std::error_code
|
|
|
|
SampleProfileWriterExtBinaryBase::writeCSNameIdx(const SampleContext &Context) {
|
|
|
|
const auto &Ret = CSNameTable.find(Context);
|
|
|
|
if (Ret == CSNameTable.end())
|
|
|
|
return sampleprof_error::truncated_name_table;
|
|
|
|
encodeULEB128(Ret->second, *OutputStream);
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
2019-10-09 21:36:03 +00:00
|
|
|
std::error_code
|
2020-10-15 15:17:28 -07:00
|
|
|
SampleProfileWriterExtBinaryBase::writeSample(const FunctionSamples &S) {
|
2019-10-09 21:36:03 +00:00
|
|
|
uint64_t Offset = OutputStream->tell();
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
auto &Context = S.getContext();
|
|
|
|
FuncOffsetTable[Context] = Offset - SecLBRProfileStart;
|
2019-10-09 21:36:03 +00:00
|
|
|
encodeULEB128(S.getHeadSamples(), *OutputStream);
|
|
|
|
return writeBody(S);
|
|
|
|
}
|
|
|
|
|
2020-10-15 15:17:28 -07:00
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::writeFuncOffsetTable() {
|
2019-10-09 21:36:03 +00:00
|
|
|
auto &OS = *OutputStream;
|
|
|
|
|
|
|
|
// Write out the table size.
|
|
|
|
encodeULEB128(FuncOffsetTable.size(), OS);
|
|
|
|
|
|
|
|
// Write out FuncOffsetTable.
|
2021-08-31 16:30:49 -07:00
|
|
|
auto WriteItem = [&](const SampleContext &Context, uint64_t Offset) {
|
|
|
|
if (std::error_code EC = writeContextIdx(Context))
|
2021-04-07 23:06:39 -07:00
|
|
|
return EC;
|
2021-08-31 16:30:49 -07:00
|
|
|
encodeULEB128(Offset, OS);
|
|
|
|
return (std::error_code)sampleprof_error::success;
|
|
|
|
};
|
|
|
|
|
2022-04-28 11:31:02 -07:00
|
|
|
if (FunctionSamples::ProfileIsCS) {
|
2021-08-31 16:30:49 -07:00
|
|
|
// Sort the contexts before writing them out. This is to help fast load all
|
|
|
|
// context profiles for a function as well as their callee contexts which
|
|
|
|
// can help profile-guided importing for ThinLTO.
|
|
|
|
std::map<SampleContext, uint64_t> OrderedFuncOffsetTable(
|
|
|
|
FuncOffsetTable.begin(), FuncOffsetTable.end());
|
|
|
|
for (const auto &Entry : OrderedFuncOffsetTable) {
|
|
|
|
if (std::error_code EC = WriteItem(Entry.first, Entry.second))
|
|
|
|
return EC;
|
|
|
|
}
|
|
|
|
addSectionFlag(SecFuncOffsetTable, SecFuncOffsetFlags::SecFlagOrdered);
|
|
|
|
} else {
|
|
|
|
for (const auto &Entry : FuncOffsetTable) {
|
|
|
|
if (std::error_code EC = WriteItem(Entry.first, Entry.second))
|
|
|
|
return EC;
|
|
|
|
}
|
2019-10-09 21:36:03 +00:00
|
|
|
}
|
2021-08-31 16:30:49 -07:00
|
|
|
|
2020-12-14 14:49:20 -08:00
|
|
|
FuncOffsetTable.clear();
|
2019-10-09 21:36:03 +00:00
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
2021-12-14 10:03:05 -08:00
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata(
|
|
|
|
const FunctionSamples &FunctionProfile) {
|
|
|
|
auto &OS = *OutputStream;
|
|
|
|
if (std::error_code EC = writeContextIdx(FunctionProfile.getContext()))
|
|
|
|
return EC;
|
|
|
|
|
|
|
|
if (FunctionSamples::ProfileIsProbeBased)
|
|
|
|
encodeULEB128(FunctionProfile.getFunctionHash(), OS);
|
2022-04-28 11:31:02 -07:00
|
|
|
if (FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsPreInlined) {
|
2021-12-14 10:03:05 -08:00
|
|
|
encodeULEB128(FunctionProfile.getContext().getAllAttributes(), OS);
|
|
|
|
}
|
|
|
|
|
2022-04-28 11:31:02 -07:00
|
|
|
if (!FunctionSamples::ProfileIsCS) {
|
2021-12-14 10:03:05 -08:00
|
|
|
// Recursively emit attributes for all callee samples.
|
|
|
|
uint64_t NumCallsites = 0;
|
|
|
|
for (const auto &J : FunctionProfile.getCallsiteSamples())
|
|
|
|
NumCallsites += J.second.size();
|
|
|
|
encodeULEB128(NumCallsites, OS);
|
|
|
|
for (const auto &J : FunctionProfile.getCallsiteSamples()) {
|
|
|
|
for (const auto &FS : J.second) {
|
|
|
|
LineLocation Loc = J.first;
|
|
|
|
encodeULEB128(Loc.LineOffset, OS);
|
|
|
|
encodeULEB128(Loc.Discriminator, OS);
|
|
|
|
if (std::error_code EC = writeFuncMetadata(FS.second))
|
|
|
|
return EC;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
2020-12-16 12:54:50 -08:00
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata(
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
const SampleProfileMap &Profiles) {
|
2022-04-28 11:31:02 -07:00
|
|
|
if (!FunctionSamples::ProfileIsProbeBased && !FunctionSamples::ProfileIsCS &&
|
|
|
|
!FunctionSamples::ProfileIsPreInlined)
|
2020-12-16 12:54:50 -08:00
|
|
|
return sampleprof_error::success;
|
|
|
|
for (const auto &Entry : Profiles) {
|
2021-12-14 10:03:05 -08:00
|
|
|
if (std::error_code EC = writeFuncMetadata(Entry.second))
|
2021-04-07 23:06:39 -07:00
|
|
|
return EC;
|
2020-12-16 12:54:50 -08:00
|
|
|
}
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
2020-10-15 15:17:28 -07:00
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::writeNameTable() {
|
2020-03-03 13:19:32 -08:00
|
|
|
if (!UseMD5)
|
|
|
|
return SampleProfileWriterBinary::writeNameTable();
|
|
|
|
|
|
|
|
auto &OS = *OutputStream;
|
|
|
|
std::set<StringRef> V;
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
stablizeNameTable(NameTable, V);
|
2020-03-03 13:19:32 -08:00
|
|
|
|
2020-12-03 12:19:25 -08:00
|
|
|
// Write out the MD5 name table. We wrote unencoded MD5 so reader can
|
|
|
|
// retrieve the name using the name index without having to read the
|
|
|
|
// whole name table.
|
2020-03-03 13:19:32 -08:00
|
|
|
encodeULEB128(NameTable.size(), OS);
|
2020-12-03 12:19:25 -08:00
|
|
|
support::endian::Writer Writer(OS, support::little);
|
|
|
|
for (auto N : V)
|
|
|
|
Writer.write(MD5Hash(N));
|
2020-03-03 13:19:32 -08:00
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
2020-10-15 15:17:28 -07:00
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::writeNameTableSection(
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
const SampleProfileMap &ProfileMap) {
|
2019-08-23 19:05:30 +00:00
|
|
|
for (const auto &I : ProfileMap) {
|
2023-06-23 17:58:22 -07:00
|
|
|
assert(I.first == I.second.getContext() && "Inconsistent profile map");
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
addContext(I.second.getContext());
|
2019-08-23 19:05:30 +00:00
|
|
|
addNames(I.second);
|
|
|
|
}
|
2021-01-19 09:20:13 -08:00
|
|
|
|
|
|
|
// If NameTable contains ".__uniq." suffix, set SecFlagUniqSuffix flag
|
|
|
|
// so compiler won't strip the suffix during profile matching after
|
|
|
|
// seeing the flag in the profile.
|
|
|
|
for (const auto &I : NameTable) {
|
2021-10-23 08:45:27 -07:00
|
|
|
if (I.first.contains(FunctionSamples::UniqSuffix)) {
|
2021-01-19 09:20:13 -08:00
|
|
|
addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagUniqSuffix);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-15 15:17:28 -07:00
|
|
|
if (auto EC = writeNameTable())
|
2019-10-07 16:12:37 +00:00
|
|
|
return EC;
|
2020-10-15 15:17:28 -07:00
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
2019-08-23 19:05:30 +00:00
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::writeCSNameTableSection() {
|
|
|
|
// Sort the names to make CSNameTable deterministic.
|
|
|
|
std::set<SampleContext> OrderedContexts;
|
|
|
|
for (const auto &I : CSNameTable)
|
|
|
|
OrderedContexts.insert(I.first);
|
|
|
|
assert(OrderedContexts.size() == CSNameTable.size() &&
|
|
|
|
"Unmatched ordered and unordered contexts");
|
|
|
|
uint64_t I = 0;
|
|
|
|
for (auto &Context : OrderedContexts)
|
|
|
|
CSNameTable[Context] = I++;
|
|
|
|
|
|
|
|
auto &OS = *OutputStream;
|
|
|
|
encodeULEB128(OrderedContexts.size(), OS);
|
|
|
|
support::endian::Writer Writer(OS, support::little);
|
|
|
|
for (auto Context : OrderedContexts) {
|
|
|
|
auto Frames = Context.getContextFrames();
|
|
|
|
encodeULEB128(Frames.size(), OS);
|
|
|
|
for (auto &Callsite : Frames) {
|
2021-10-01 16:51:38 -07:00
|
|
|
if (std::error_code EC = writeNameIdx(Callsite.FuncName))
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
return EC;
|
2021-10-01 16:51:38 -07:00
|
|
|
encodeULEB128(Callsite.Location.LineOffset, OS);
|
|
|
|
encodeULEB128(Callsite.Location.Discriminator, OS);
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
2020-10-15 15:17:28 -07:00
|
|
|
std::error_code
|
|
|
|
SampleProfileWriterExtBinaryBase::writeProfileSymbolListSection() {
|
|
|
|
if (ProfSymList && ProfSymList->size() > 0)
|
|
|
|
if (std::error_code EC = ProfSymList->write(*OutputStream))
|
|
|
|
return EC;
|
|
|
|
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
2019-10-07 16:12:37 +00:00
|
|
|
|
2020-10-15 15:17:28 -07:00
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
SecType Type, uint32_t LayoutIdx, const SampleProfileMap &ProfileMap) {
|
2020-10-15 15:17:28 -07:00
|
|
|
// The setting of SecFlagCompress should happen before markSectionStart.
|
|
|
|
if (Type == SecProfileSymbolList && ProfSymList && ProfSymList->toCompress())
|
2019-10-07 16:12:37 +00:00
|
|
|
setToCompressSection(SecProfileSymbolList);
|
2020-12-16 12:54:50 -08:00
|
|
|
if (Type == SecFuncMetadata && FunctionSamples::ProfileIsProbeBased)
|
|
|
|
addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagIsProbeBased);
|
2021-12-14 10:03:05 -08:00
|
|
|
if (Type == SecFuncMetadata &&
|
2022-04-28 11:31:02 -07:00
|
|
|
(FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsPreInlined))
|
2021-02-19 22:46:30 -08:00
|
|
|
addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagHasAttribute);
|
2022-04-28 11:31:02 -07:00
|
|
|
if (Type == SecProfSummary && FunctionSamples::ProfileIsCS)
|
2021-12-14 10:03:05 -08:00
|
|
|
addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFullContext);
|
2022-04-28 11:31:02 -07:00
|
|
|
if (Type == SecProfSummary && FunctionSamples::ProfileIsPreInlined)
|
|
|
|
addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagIsPreInlined);
|
2021-05-27 11:34:22 -07:00
|
|
|
if (Type == SecProfSummary && FunctionSamples::ProfileIsFS)
|
|
|
|
addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator);
|
2019-08-31 02:27:26 +00:00
|
|
|
|
2020-12-14 14:49:20 -08:00
|
|
|
uint64_t SectionStart = markSectionStart(Type, LayoutIdx);
|
2020-10-15 15:17:28 -07:00
|
|
|
switch (Type) {
|
|
|
|
case SecProfSummary:
|
|
|
|
computeSummary(ProfileMap);
|
|
|
|
if (auto EC = writeSummary())
|
|
|
|
return EC;
|
|
|
|
break;
|
|
|
|
case SecNameTable:
|
|
|
|
if (auto EC = writeNameTableSection(ProfileMap))
|
|
|
|
return EC;
|
|
|
|
break;
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
case SecCSNameTable:
|
|
|
|
if (auto EC = writeCSNameTableSection())
|
|
|
|
return EC;
|
|
|
|
break;
|
2020-10-15 15:17:28 -07:00
|
|
|
case SecLBRProfile:
|
|
|
|
SecLBRProfileStart = OutputStream->tell();
|
|
|
|
if (std::error_code EC = writeFuncProfiles(ProfileMap))
|
2019-08-31 02:27:26 +00:00
|
|
|
return EC;
|
2020-10-15 15:17:28 -07:00
|
|
|
break;
|
|
|
|
case SecFuncOffsetTable:
|
|
|
|
if (auto EC = writeFuncOffsetTable())
|
|
|
|
return EC;
|
|
|
|
break;
|
2020-12-16 12:54:50 -08:00
|
|
|
case SecFuncMetadata:
|
|
|
|
if (std::error_code EC = writeFuncMetadata(ProfileMap))
|
|
|
|
return EC;
|
|
|
|
break;
|
2020-10-15 15:17:28 -07:00
|
|
|
case SecProfileSymbolList:
|
|
|
|
if (auto EC = writeProfileSymbolListSection())
|
|
|
|
return EC;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
if (auto EC = writeCustomSection(Type))
|
|
|
|
return EC;
|
|
|
|
break;
|
|
|
|
}
|
2020-12-14 14:49:20 -08:00
|
|
|
if (std::error_code EC = addNewSection(Type, LayoutIdx, SectionStart))
|
2019-10-07 16:12:37 +00:00
|
|
|
return EC;
|
2020-10-15 15:17:28 -07:00
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
2019-08-23 19:05:30 +00:00
|
|
|
|
2021-01-05 23:24:43 -08:00
|
|
|
std::error_code SampleProfileWriterExtBinary::writeDefaultLayout(
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
const SampleProfileMap &ProfileMap) {
|
2020-12-14 14:49:20 -08:00
|
|
|
// The const indices passed to writeOneSection below are specifying the
|
|
|
|
// positions of the sections in SectionHdrLayout. Look at
|
|
|
|
// initSectionHdrLayout to find out where each section is located in
|
|
|
|
// SectionHdrLayout.
|
|
|
|
if (auto EC = writeOneSection(SecProfSummary, 0, ProfileMap))
|
2019-10-09 21:36:03 +00:00
|
|
|
return EC;
|
2020-12-14 14:49:20 -08:00
|
|
|
if (auto EC = writeOneSection(SecNameTable, 1, ProfileMap))
|
2020-10-15 15:17:28 -07:00
|
|
|
return EC;
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
if (auto EC = writeOneSection(SecCSNameTable, 2, ProfileMap))
|
2020-10-15 15:17:28 -07:00
|
|
|
return EC;
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
if (auto EC = writeOneSection(SecLBRProfile, 4, ProfileMap))
|
2020-10-15 15:17:28 -07:00
|
|
|
return EC;
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
if (auto EC = writeOneSection(SecProfileSymbolList, 5, ProfileMap))
|
2019-10-09 21:36:03 +00:00
|
|
|
return EC;
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
if (auto EC = writeOneSection(SecFuncOffsetTable, 3, ProfileMap))
|
|
|
|
return EC;
|
|
|
|
if (auto EC = writeOneSection(SecFuncMetadata, 6, ProfileMap))
|
2020-12-16 12:54:50 -08:00
|
|
|
return EC;
|
2019-08-23 19:05:30 +00:00
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
static void splitProfileMapToTwo(const SampleProfileMap &ProfileMap,
|
|
|
|
SampleProfileMap &ContextProfileMap,
|
|
|
|
SampleProfileMap &NoContextProfileMap) {
|
2021-01-05 23:24:43 -08:00
|
|
|
for (const auto &I : ProfileMap) {
|
|
|
|
if (I.second.getCallsiteSamples().size())
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
ContextProfileMap.insert({I.first, I.second});
|
2021-01-05 23:24:43 -08:00
|
|
|
else
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
NoContextProfileMap.insert({I.first, I.second});
|
2021-01-05 23:24:43 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::error_code SampleProfileWriterExtBinary::writeCtxSplitLayout(
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
const SampleProfileMap &ProfileMap) {
|
|
|
|
SampleProfileMap ContextProfileMap, NoContextProfileMap;
|
2021-01-05 23:24:43 -08:00
|
|
|
splitProfileMapToTwo(ProfileMap, ContextProfileMap, NoContextProfileMap);
|
|
|
|
|
|
|
|
if (auto EC = writeOneSection(SecProfSummary, 0, ProfileMap))
|
|
|
|
return EC;
|
|
|
|
if (auto EC = writeOneSection(SecNameTable, 1, ProfileMap))
|
|
|
|
return EC;
|
|
|
|
if (auto EC = writeOneSection(SecLBRProfile, 3, ContextProfileMap))
|
|
|
|
return EC;
|
|
|
|
if (auto EC = writeOneSection(SecFuncOffsetTable, 2, ContextProfileMap))
|
|
|
|
return EC;
|
|
|
|
// Mark the section to have no context. Note section flag needs to be set
|
|
|
|
// before writing the section.
|
|
|
|
addSectionFlag(5, SecCommonFlags::SecFlagFlat);
|
|
|
|
if (auto EC = writeOneSection(SecLBRProfile, 5, NoContextProfileMap))
|
|
|
|
return EC;
|
|
|
|
// Mark the section to have no context. Note section flag needs to be set
|
|
|
|
// before writing the section.
|
|
|
|
addSectionFlag(4, SecCommonFlags::SecFlagFlat);
|
|
|
|
if (auto EC = writeOneSection(SecFuncOffsetTable, 4, NoContextProfileMap))
|
|
|
|
return EC;
|
|
|
|
if (auto EC = writeOneSection(SecProfileSymbolList, 6, ProfileMap))
|
|
|
|
return EC;
|
|
|
|
if (auto EC = writeOneSection(SecFuncMetadata, 7, ProfileMap))
|
|
|
|
return EC;
|
|
|
|
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::error_code SampleProfileWriterExtBinary::writeSections(
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
const SampleProfileMap &ProfileMap) {
|
2021-01-05 23:24:43 -08:00
|
|
|
std::error_code EC;
|
|
|
|
if (SecLayout == DefaultLayout)
|
|
|
|
EC = writeDefaultLayout(ProfileMap);
|
|
|
|
else if (SecLayout == CtxSplitLayout)
|
|
|
|
EC = writeCtxSplitLayout(ProfileMap);
|
|
|
|
else
|
|
|
|
llvm_unreachable("Unsupported layout");
|
|
|
|
return EC;
|
|
|
|
}
|
|
|
|
|
2018-05-01 15:54:18 +00:00
|
|
|
/// Write samples to a text file.
|
2015-11-13 20:24:28 +00:00
|
|
|
///
|
|
|
|
/// Note: it may be tempting to implement this in terms of
|
2015-11-19 15:33:08 +00:00
|
|
|
/// FunctionSamples::print(). Please don't. The dump functionality is intended
|
2015-11-13 20:24:28 +00:00
|
|
|
/// for debugging and has no specified form.
|
|
|
|
///
|
|
|
|
/// The format used here is more structured and deliberate because
|
|
|
|
/// it needs to be parsed by the SampleProfileReaderText class.
|
2019-08-23 19:05:30 +00:00
|
|
|
std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
|
2015-12-10 17:21:42 +00:00
|
|
|
auto &OS = *OutputStream;
|
2022-04-28 11:31:02 -07:00
|
|
|
if (FunctionSamples::ProfileIsCS)
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
OS << "[" << S.getContext().toString() << "]:" << S.getTotalSamples();
|
2021-04-07 23:06:39 -07:00
|
|
|
else
|
|
|
|
OS << S.getName() << ":" << S.getTotalSamples();
|
|
|
|
|
2015-10-08 19:40:37 +00:00
|
|
|
if (Indent == 0)
|
|
|
|
OS << ":" << S.getHeadSamples();
|
|
|
|
OS << "\n";
|
2023-02-08 02:59:12 +00:00
|
|
|
LineCount++;
|
2014-10-30 18:00:06 +00:00
|
|
|
|
2015-11-19 15:33:08 +00:00
|
|
|
SampleSorter<LineLocation, SampleRecord> SortedSamples(S.getBodySamples());
|
|
|
|
for (const auto &I : SortedSamples.get()) {
|
|
|
|
LineLocation Loc = I->first;
|
|
|
|
const SampleRecord &Sample = I->second;
|
2015-10-08 19:40:37 +00:00
|
|
|
OS.indent(Indent + 1);
|
2014-10-30 18:00:06 +00:00
|
|
|
if (Loc.Discriminator == 0)
|
|
|
|
OS << Loc.LineOffset << ": ";
|
|
|
|
else
|
|
|
|
OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
|
|
|
|
|
|
|
|
OS << Sample.getSamples();
|
|
|
|
|
2019-08-20 20:52:00 +00:00
|
|
|
for (const auto &J : Sample.getSortedCallTargets())
|
|
|
|
OS << " " << J.first << ":" << J.second;
|
2014-10-30 18:00:06 +00:00
|
|
|
OS << "\n";
|
2023-02-08 02:59:12 +00:00
|
|
|
LineCount++;
|
2014-10-30 18:00:06 +00:00
|
|
|
}
|
|
|
|
|
2017-04-13 19:52:10 +00:00
|
|
|
SampleSorter<LineLocation, FunctionSamplesMap> SortedCallsiteSamples(
|
2015-11-19 15:33:08 +00:00
|
|
|
S.getCallsiteSamples());
|
2015-10-08 19:40:37 +00:00
|
|
|
Indent += 1;
|
2017-04-13 19:52:10 +00:00
|
|
|
for (const auto &I : SortedCallsiteSamples.get())
|
|
|
|
for (const auto &FS : I->second) {
|
|
|
|
LineLocation Loc = I->first;
|
|
|
|
const FunctionSamples &CalleeSamples = FS.second;
|
|
|
|
OS.indent(Indent);
|
|
|
|
if (Loc.Discriminator == 0)
|
|
|
|
OS << Loc.LineOffset << ": ";
|
|
|
|
else
|
|
|
|
OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
|
2019-08-23 19:05:30 +00:00
|
|
|
if (std::error_code EC = writeSample(CalleeSamples))
|
2017-04-13 19:52:10 +00:00
|
|
|
return EC;
|
|
|
|
}
|
2015-10-08 19:40:37 +00:00
|
|
|
Indent -= 1;
|
|
|
|
|
2021-12-14 10:03:05 -08:00
|
|
|
if (FunctionSamples::ProfileIsProbeBased) {
|
|
|
|
OS.indent(Indent + 1);
|
|
|
|
OS << "!CFGChecksum: " << S.getFunctionHash() << "\n";
|
2023-02-08 02:59:12 +00:00
|
|
|
LineCount++;
|
2021-12-14 10:03:05 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (S.getContext().getAllAttributes()) {
|
|
|
|
OS.indent(Indent + 1);
|
|
|
|
OS << "!Attributes: " << S.getContext().getAllAttributes() << "\n";
|
2023-02-08 02:59:12 +00:00
|
|
|
LineCount++;
|
2020-12-16 12:54:50 -08:00
|
|
|
}
|
|
|
|
|
2015-10-13 22:48:46 +00:00
|
|
|
return sampleprof_error::success;
|
2014-10-30 18:00:06 +00:00
|
|
|
}
|
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
std::error_code
|
|
|
|
SampleProfileWriterBinary::writeContextIdx(const SampleContext &Context) {
|
|
|
|
assert(!Context.hasContext() && "cs profile is not supported");
|
|
|
|
return writeNameIdx(Context.getName());
|
|
|
|
}
|
2021-04-07 23:06:39 -07:00
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName) {
|
|
|
|
auto &NTable = getNameTable();
|
|
|
|
const auto &Ret = NTable.find(FName);
|
|
|
|
if (Ret == NTable.end())
|
2015-10-13 22:48:46 +00:00
|
|
|
return sampleprof_error::truncated_name_table;
|
2021-04-07 23:06:39 -07:00
|
|
|
encodeULEB128(Ret->second, *OutputStream);
|
2015-10-13 22:48:46 +00:00
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
void SampleProfileWriterBinary::addName(StringRef FName) {
|
|
|
|
auto &NTable = getNameTable();
|
|
|
|
NTable.insert(std::make_pair(FName, 0));
|
|
|
|
}
|
|
|
|
|
|
|
|
void SampleProfileWriterBinary::addContext(const SampleContext &Context) {
|
|
|
|
addName(Context.getName());
|
2015-10-13 22:48:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void SampleProfileWriterBinary::addNames(const FunctionSamples &S) {
|
|
|
|
// Add all the names in indirect call targets.
|
|
|
|
for (const auto &I : S.getBodySamples()) {
|
|
|
|
const SampleRecord &Sample = I.second;
|
|
|
|
for (const auto &J : Sample.getCallTargets())
|
|
|
|
addName(J.first());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Recursively add all the names for inlined callsites.
|
2017-04-13 19:52:10 +00:00
|
|
|
for (const auto &J : S.getCallsiteSamples())
|
|
|
|
for (const auto &FS : J.second) {
|
|
|
|
const FunctionSamples &CalleeSamples = FS.second;
|
|
|
|
addName(CalleeSamples.getName());
|
|
|
|
addNames(CalleeSamples);
|
|
|
|
}
|
2015-10-13 22:48:46 +00:00
|
|
|
}
|
2014-10-30 18:00:06 +00:00
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
void SampleProfileWriterExtBinaryBase::addContext(
|
|
|
|
const SampleContext &Context) {
|
|
|
|
if (Context.hasContext()) {
|
|
|
|
for (auto &Callsite : Context.getContextFrames())
|
2021-10-01 16:51:38 -07:00
|
|
|
SampleProfileWriterBinary::addName(Callsite.FuncName);
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
CSNameTable.insert(std::make_pair(Context, 0));
|
|
|
|
} else {
|
|
|
|
SampleProfileWriterBinary::addName(Context.getName());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void SampleProfileWriterBinary::stablizeNameTable(
|
|
|
|
MapVector<StringRef, uint32_t> &NameTable, std::set<StringRef> &V) {
|
2018-06-11 22:40:43 +00:00
|
|
|
// Sort the names to make NameTable deterministic.
|
|
|
|
for (const auto &I : NameTable)
|
|
|
|
V.insert(I.first);
|
|
|
|
int i = 0;
|
|
|
|
for (const StringRef &N : V)
|
|
|
|
NameTable[N] = i++;
|
|
|
|
}
|
|
|
|
|
2019-08-23 19:05:30 +00:00
|
|
|
std::error_code SampleProfileWriterBinary::writeNameTable() {
|
2015-12-10 17:21:42 +00:00
|
|
|
auto &OS = *OutputStream;
|
2018-06-11 22:40:43 +00:00
|
|
|
std::set<StringRef> V;
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
stablizeNameTable(NameTable, V);
|
2018-06-11 22:40:43 +00:00
|
|
|
|
|
|
|
// Write out the name table.
|
|
|
|
encodeULEB128(NameTable.size(), OS);
|
|
|
|
for (auto N : V) {
|
|
|
|
OS << N;
|
|
|
|
encodeULEB128(0, OS);
|
|
|
|
}
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
2019-08-23 19:05:30 +00:00
|
|
|
std::error_code
|
|
|
|
SampleProfileWriterBinary::writeMagicIdent(SampleProfileFormat Format) {
|
2018-06-11 22:40:43 +00:00
|
|
|
auto &OS = *OutputStream;
|
|
|
|
// Write file magic identifier.
|
2019-08-23 19:05:30 +00:00
|
|
|
encodeULEB128(SPMagic(Format), OS);
|
2018-06-11 22:40:43 +00:00
|
|
|
encodeULEB128(SPVersion(), OS);
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
std::error_code
|
|
|
|
SampleProfileWriterBinary::writeHeader(const SampleProfileMap &ProfileMap) {
|
2023-02-08 02:59:12 +00:00
|
|
|
// When calling write on a different profile map, existing names should be
|
|
|
|
// cleared.
|
|
|
|
NameTable.clear();
|
|
|
|
|
2019-08-23 19:05:30 +00:00
|
|
|
writeMagicIdent(Format);
|
2015-10-13 22:48:46 +00:00
|
|
|
|
2016-02-19 03:15:33 +00:00
|
|
|
computeSummary(ProfileMap);
|
|
|
|
if (auto EC = writeSummary())
|
|
|
|
return EC;
|
|
|
|
|
2015-10-13 22:48:46 +00:00
|
|
|
// Generate the name table for all the functions referenced in the profile.
|
|
|
|
for (const auto &I : ProfileMap) {
|
2023-06-23 17:58:22 -07:00
|
|
|
assert(I.first == I.second.getContext() && "Inconsistent profile map");
|
|
|
|
addContext(I.first);
|
2015-10-13 22:48:46 +00:00
|
|
|
addNames(I.second);
|
|
|
|
}
|
|
|
|
|
2018-06-11 22:40:43 +00:00
|
|
|
writeNameTable();
|
2015-10-13 22:48:46 +00:00
|
|
|
return sampleprof_error::success;
|
2014-10-30 18:00:06 +00:00
|
|
|
}
|
|
|
|
|
2019-10-07 16:12:37 +00:00
|
|
|
void SampleProfileWriterExtBinaryBase::setToCompressAllSections() {
|
2019-10-09 21:36:03 +00:00
|
|
|
for (auto &Entry : SectionHdrLayout)
|
2020-03-03 13:19:32 -08:00
|
|
|
addSecFlag(Entry, SecCommonFlags::SecFlagCompress);
|
2019-10-07 16:12:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void SampleProfileWriterExtBinaryBase::setToCompressSection(SecType Type) {
|
2020-03-03 13:19:32 -08:00
|
|
|
addSectionFlag(Type, SecCommonFlags::SecFlagCompress);
|
2019-10-07 16:12:37 +00:00
|
|
|
}
|
|
|
|
|
2019-08-23 19:05:30 +00:00
|
|
|
void SampleProfileWriterExtBinaryBase::allocSecHdrTable() {
|
|
|
|
support::endian::Writer Writer(*OutputStream, support::little);
|
|
|
|
|
2019-10-09 21:36:03 +00:00
|
|
|
Writer.write(static_cast<uint64_t>(SectionHdrLayout.size()));
|
2019-08-23 19:05:30 +00:00
|
|
|
SecHdrTableOffset = OutputStream->tell();
|
2019-10-09 21:36:03 +00:00
|
|
|
for (uint32_t i = 0; i < SectionHdrLayout.size(); i++) {
|
2019-08-23 19:05:30 +00:00
|
|
|
Writer.write(static_cast<uint64_t>(-1));
|
|
|
|
Writer.write(static_cast<uint64_t>(-1));
|
|
|
|
Writer.write(static_cast<uint64_t>(-1));
|
|
|
|
Writer.write(static_cast<uint64_t>(-1));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() {
|
2020-12-14 14:49:20 -08:00
|
|
|
assert(SecHdrTable.size() == SectionHdrLayout.size() &&
|
|
|
|
"SecHdrTable entries doesn't match SectionHdrLayout");
|
|
|
|
SmallVector<uint32_t, 16> IndexMap(SecHdrTable.size(), -1);
|
|
|
|
for (uint32_t TableIdx = 0; TableIdx < SecHdrTable.size(); TableIdx++) {
|
|
|
|
IndexMap[SecHdrTable[TableIdx].LayoutIndex] = TableIdx;
|
2019-08-23 19:05:30 +00:00
|
|
|
}
|
|
|
|
|
2019-10-09 21:36:03 +00:00
|
|
|
// Write the section header table in the order specified in
|
2020-12-14 14:49:20 -08:00
|
|
|
// SectionHdrLayout. SectionHdrLayout specifies the sections
|
|
|
|
// order in which profile reader expect to read, so the section
|
|
|
|
// header table should be written in the order in SectionHdrLayout.
|
|
|
|
// Note that the section order in SecHdrTable may be different
|
|
|
|
// from the order in SectionHdrLayout, for example, SecFuncOffsetTable
|
|
|
|
// needs to be computed after SecLBRProfile (the order in SecHdrTable),
|
|
|
|
// but it needs to be read before SecLBRProfile (the order in
|
|
|
|
// SectionHdrLayout). So we use IndexMap above to switch the order.
|
2023-02-08 02:59:12 +00:00
|
|
|
support::endian::SeekableWriter Writer(
|
|
|
|
static_cast<raw_pwrite_stream &>(*OutputStream), support::little);
|
2020-12-14 14:49:20 -08:00
|
|
|
for (uint32_t LayoutIdx = 0; LayoutIdx < SectionHdrLayout.size();
|
|
|
|
LayoutIdx++) {
|
|
|
|
assert(IndexMap[LayoutIdx] < SecHdrTable.size() &&
|
|
|
|
"Incorrect LayoutIdx in SecHdrTable");
|
|
|
|
auto Entry = SecHdrTable[IndexMap[LayoutIdx]];
|
2023-02-08 02:59:12 +00:00
|
|
|
Writer.pwrite(static_cast<uint64_t>(Entry.Type),
|
|
|
|
SecHdrTableOffset + 4 * LayoutIdx * sizeof(uint64_t));
|
|
|
|
Writer.pwrite(static_cast<uint64_t>(Entry.Flags),
|
|
|
|
SecHdrTableOffset + (4 * LayoutIdx + 1) * sizeof(uint64_t));
|
|
|
|
Writer.pwrite(static_cast<uint64_t>(Entry.Offset),
|
|
|
|
SecHdrTableOffset + (4 * LayoutIdx + 2) * sizeof(uint64_t));
|
|
|
|
Writer.pwrite(static_cast<uint64_t>(Entry.Size),
|
|
|
|
SecHdrTableOffset + (4 * LayoutIdx + 3) * sizeof(uint64_t));
|
2019-08-23 19:05:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::writeHeader(
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
const SampleProfileMap &ProfileMap) {
|
2019-08-23 19:05:30 +00:00
|
|
|
auto &OS = *OutputStream;
|
|
|
|
FileStart = OS.tell();
|
|
|
|
writeMagicIdent(Format);
|
|
|
|
|
|
|
|
allocSecHdrTable();
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
2016-02-19 03:15:33 +00:00
|
|
|
std::error_code SampleProfileWriterBinary::writeSummary() {
|
|
|
|
auto &OS = *OutputStream;
|
2016-05-19 21:53:28 +00:00
|
|
|
encodeULEB128(Summary->getTotalCount(), OS);
|
|
|
|
encodeULEB128(Summary->getMaxCount(), OS);
|
2016-03-28 23:14:29 +00:00
|
|
|
encodeULEB128(Summary->getMaxFunctionCount(), OS);
|
2016-05-19 21:53:28 +00:00
|
|
|
encodeULEB128(Summary->getNumCounts(), OS);
|
2016-02-19 03:15:33 +00:00
|
|
|
encodeULEB128(Summary->getNumFunctions(), OS);
|
2021-10-29 08:23:51 -07:00
|
|
|
const std::vector<ProfileSummaryEntry> &Entries =
|
|
|
|
Summary->getDetailedSummary();
|
2016-02-19 03:15:33 +00:00
|
|
|
encodeULEB128(Entries.size(), OS);
|
|
|
|
for (auto Entry : Entries) {
|
|
|
|
encodeULEB128(Entry.Cutoff, OS);
|
|
|
|
encodeULEB128(Entry.MinCount, OS);
|
|
|
|
encodeULEB128(Entry.NumCounts, OS);
|
|
|
|
}
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
2016-03-03 18:09:32 +00:00
|
|
|
std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) {
|
2015-12-10 17:21:42 +00:00
|
|
|
auto &OS = *OutputStream;
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
if (std::error_code EC = writeContextIdx(S.getContext()))
|
2015-10-13 22:48:46 +00:00
|
|
|
return EC;
|
|
|
|
|
2014-10-30 18:00:06 +00:00
|
|
|
encodeULEB128(S.getTotalSamples(), OS);
|
2015-10-09 17:54:24 +00:00
|
|
|
|
|
|
|
// Emit all the body samples.
|
2015-10-16 18:54:35 +00:00
|
|
|
encodeULEB128(S.getBodySamples().size(), OS);
|
2014-11-01 00:56:55 +00:00
|
|
|
for (const auto &I : S.getBodySamples()) {
|
|
|
|
LineLocation Loc = I.first;
|
|
|
|
const SampleRecord &Sample = I.second;
|
2014-10-30 18:00:06 +00:00
|
|
|
encodeULEB128(Loc.LineOffset, OS);
|
|
|
|
encodeULEB128(Loc.Discriminator, OS);
|
|
|
|
encodeULEB128(Sample.getSamples(), OS);
|
|
|
|
encodeULEB128(Sample.getCallTargets().size(), OS);
|
2019-08-20 20:52:00 +00:00
|
|
|
for (const auto &J : Sample.getSortedCallTargets()) {
|
|
|
|
StringRef Callee = J.first;
|
2015-10-15 16:36:21 +00:00
|
|
|
uint64_t CalleeSamples = J.second;
|
2015-10-13 22:48:46 +00:00
|
|
|
if (std::error_code EC = writeNameIdx(Callee))
|
|
|
|
return EC;
|
2014-10-30 18:00:06 +00:00
|
|
|
encodeULEB128(CalleeSamples, OS);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-10-09 17:54:24 +00:00
|
|
|
// Recursively emit all the callsite samples.
|
2017-08-03 00:09:18 +00:00
|
|
|
uint64_t NumCallsites = 0;
|
|
|
|
for (const auto &J : S.getCallsiteSamples())
|
|
|
|
NumCallsites += J.second.size();
|
|
|
|
encodeULEB128(NumCallsites, OS);
|
2017-04-13 19:52:10 +00:00
|
|
|
for (const auto &J : S.getCallsiteSamples())
|
|
|
|
for (const auto &FS : J.second) {
|
|
|
|
LineLocation Loc = J.first;
|
|
|
|
const FunctionSamples &CalleeSamples = FS.second;
|
|
|
|
encodeULEB128(Loc.LineOffset, OS);
|
|
|
|
encodeULEB128(Loc.Discriminator, OS);
|
|
|
|
if (std::error_code EC = writeBody(CalleeSamples))
|
|
|
|
return EC;
|
|
|
|
}
|
2015-10-09 17:54:24 +00:00
|
|
|
|
2015-10-13 22:48:46 +00:00
|
|
|
return sampleprof_error::success;
|
2014-10-30 18:00:06 +00:00
|
|
|
}
|
2014-11-01 00:56:55 +00:00
|
|
|
|
2018-05-01 15:54:18 +00:00
|
|
|
/// Write samples of a top-level function to a binary file.
|
2015-10-16 18:54:35 +00:00
|
|
|
///
|
|
|
|
/// \returns true if the samples were written successfully, false otherwise.
|
2019-08-23 19:05:30 +00:00
|
|
|
std::error_code
|
|
|
|
SampleProfileWriterBinary::writeSample(const FunctionSamples &S) {
|
2015-12-10 17:21:42 +00:00
|
|
|
encodeULEB128(S.getHeadSamples(), *OutputStream);
|
2018-09-14 20:52:59 +00:00
|
|
|
return writeBody(S);
|
|
|
|
}
|
|
|
|
|
2018-05-01 15:54:18 +00:00
|
|
|
/// Create a sample profile file writer based on the specified format.
|
2014-11-01 00:56:55 +00:00
|
|
|
///
|
|
|
|
/// \param Filename The file to create.
|
|
|
|
///
|
|
|
|
/// \param Format Encoding format for the profile file.
|
|
|
|
///
|
|
|
|
/// \returns an error code indicating the status of the created writer.
|
2014-11-03 00:51:45 +00:00
|
|
|
ErrorOr<std::unique_ptr<SampleProfileWriter>>
|
|
|
|
SampleProfileWriter::create(StringRef Filename, SampleProfileFormat Format) {
|
2014-11-01 00:56:55 +00:00
|
|
|
std::error_code EC;
|
2015-12-10 17:21:42 +00:00
|
|
|
std::unique_ptr<raw_ostream> OS;
|
2023-04-29 00:30:25 +00:00
|
|
|
if (Format == SPF_Binary || Format == SPF_Ext_Binary)
|
2019-08-05 05:43:48 +00:00
|
|
|
OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::OF_None));
|
2015-12-10 17:21:42 +00:00
|
|
|
else
|
2021-04-06 07:22:41 -04:00
|
|
|
OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::OF_TextWithCRLF));
|
2015-12-10 17:21:42 +00:00
|
|
|
if (EC)
|
|
|
|
return EC;
|
|
|
|
|
|
|
|
return create(OS, Format);
|
|
|
|
}
|
|
|
|
|
2018-05-01 15:54:18 +00:00
|
|
|
/// Create a sample profile stream writer based on the specified format.
|
2015-12-10 17:21:42 +00:00
|
|
|
///
|
|
|
|
/// \param OS The output stream to store the profile data to.
|
|
|
|
///
|
|
|
|
/// \param Format Encoding format for the profile file.
|
|
|
|
///
|
|
|
|
/// \returns an error code indicating the status of the created writer.
|
|
|
|
ErrorOr<std::unique_ptr<SampleProfileWriter>>
|
|
|
|
SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
|
|
|
|
SampleProfileFormat Format) {
|
|
|
|
std::error_code EC;
|
2014-11-03 00:51:45 +00:00
|
|
|
std::unique_ptr<SampleProfileWriter> Writer;
|
2014-11-01 00:56:55 +00:00
|
|
|
|
2021-04-24 10:37:55 -07:00
|
|
|
// Currently only Text and Extended Binary format are supported for CSSPGO.
|
2022-04-28 11:31:02 -07:00
|
|
|
if ((FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsProbeBased) &&
|
2023-04-29 00:30:25 +00:00
|
|
|
Format == SPF_Binary)
|
2021-04-24 10:37:55 -07:00
|
|
|
return sampleprof_error::unsupported_writing_format;
|
|
|
|
|
2018-06-12 05:53:49 +00:00
|
|
|
if (Format == SPF_Binary)
|
2018-06-11 22:40:43 +00:00
|
|
|
Writer.reset(new SampleProfileWriterRawBinary(OS));
|
2019-08-23 19:05:30 +00:00
|
|
|
else if (Format == SPF_Ext_Binary)
|
|
|
|
Writer.reset(new SampleProfileWriterExtBinary(OS));
|
2014-11-01 00:56:55 +00:00
|
|
|
else if (Format == SPF_Text)
|
2015-12-10 17:21:42 +00:00
|
|
|
Writer.reset(new SampleProfileWriterText(OS));
|
2015-10-13 22:48:46 +00:00
|
|
|
else if (Format == SPF_GCC)
|
|
|
|
EC = sampleprof_error::unsupported_writing_format;
|
2014-11-01 00:56:55 +00:00
|
|
|
else
|
|
|
|
EC = sampleprof_error::unrecognized_format;
|
|
|
|
|
2014-11-03 00:51:45 +00:00
|
|
|
if (EC)
|
|
|
|
return EC;
|
|
|
|
|
2019-08-23 19:05:30 +00:00
|
|
|
Writer->Format = Format;
|
2020-02-10 07:06:45 -08:00
|
|
|
return std::move(Writer);
|
2014-11-01 00:56:55 +00:00
|
|
|
}
|
2016-02-19 03:15:33 +00:00
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
void SampleProfileWriter::computeSummary(const SampleProfileMap &ProfileMap) {
|
2016-05-19 21:07:12 +00:00
|
|
|
SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
|
2021-02-03 13:27:35 -08:00
|
|
|
Summary = Builder.computeSummaryForProfiles(ProfileMap);
|
2016-02-19 03:15:33 +00:00
|
|
|
}
|