2014-10-30 18:00:06 +00:00
|
|
|
//===- SampleProfWriter.cpp - Write LLVM sample profile data --------------===//
|
|
|
|
//
|
2019-01-19 08:50:56 +00:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2014-10-30 18:00:06 +00:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file implements the class that writes LLVM sample profiles. It
|
|
|
|
// supports two file formats: text and binary. The textual representation
|
|
|
|
// is useful for debugging and testing purposes. The binary representation
|
|
|
|
// is more compact, resulting in smaller file sizes. However, they can
|
|
|
|
// both be used interchangeably.
|
|
|
|
//
|
|
|
|
// See lib/ProfileData/SampleProfReader.cpp for documentation on each of the
|
|
|
|
// supported formats.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2017-06-06 11:49:48 +00:00
|
|
|
#include "llvm/ProfileData/SampleProfWriter.h"
|
2017-03-03 01:07:34 +00:00
|
|
|
#include "llvm/ADT/StringRef.h"
|
2021-01-05 23:24:43 -08:00
|
|
|
#include "llvm/ADT/StringSet.h"
|
2017-03-03 01:07:34 +00:00
|
|
|
#include "llvm/ProfileData/ProfileCommon.h"
|
|
|
|
#include "llvm/ProfileData/SampleProf.h"
|
2019-10-07 16:12:37 +00:00
|
|
|
#include "llvm/Support/Compression.h"
|
2018-09-14 20:52:59 +00:00
|
|
|
#include "llvm/Support/Endian.h"
|
|
|
|
#include "llvm/Support/EndianStream.h"
|
2014-10-30 18:00:06 +00:00
|
|
|
#include "llvm/Support/ErrorOr.h"
|
2017-03-03 01:07:34 +00:00
|
|
|
#include "llvm/Support/FileSystem.h"
|
2014-10-30 18:00:06 +00:00
|
|
|
#include "llvm/Support/LEB128.h"
|
2018-06-11 22:40:43 +00:00
|
|
|
#include "llvm/Support/MD5.h"
|
2017-03-03 01:07:34 +00:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
#include <algorithm>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <memory>
|
2017-05-11 23:43:44 +00:00
|
|
|
#include <set>
|
2017-03-03 01:07:34 +00:00
|
|
|
#include <system_error>
|
|
|
|
#include <utility>
|
|
|
|
#include <vector>
|
2014-10-30 18:00:06 +00:00
|
|
|
|
|
|
|
using namespace llvm;
|
2017-03-03 01:07:34 +00:00
|
|
|
using namespace sampleprof;
|
2014-10-30 18:00:06 +00:00
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
std::error_code
|
|
|
|
SampleProfileWriter::writeFuncProfiles(const SampleProfileMap &ProfileMap) {
|
2017-05-11 23:43:44 +00:00
|
|
|
std::vector<NameFunctionSamples> V;
|
2021-08-16 14:17:43 -07:00
|
|
|
sortFuncProfiles(ProfileMap, V);
|
2017-05-11 23:43:44 +00:00
|
|
|
for (const auto &I : V) {
|
2019-08-23 19:05:30 +00:00
|
|
|
if (std::error_code EC = writeSample(*I.second))
|
2017-05-11 23:43:44 +00:00
|
|
|
return EC;
|
|
|
|
}
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
std::error_code SampleProfileWriter::write(const SampleProfileMap &ProfileMap) {
|
2019-08-23 19:05:30 +00:00
|
|
|
if (std::error_code EC = writeHeader(ProfileMap))
|
|
|
|
return EC;
|
|
|
|
|
|
|
|
if (std::error_code EC = writeFuncProfiles(ProfileMap))
|
|
|
|
return EC;
|
|
|
|
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Return the current position and prepare to use it as the start
|
2020-12-14 14:49:20 -08:00
|
|
|
/// position of a section given the section type \p Type and its position
|
|
|
|
/// \p LayoutIdx in SectionHdrLayout.
|
|
|
|
uint64_t
|
|
|
|
SampleProfileWriterExtBinaryBase::markSectionStart(SecType Type,
|
|
|
|
uint32_t LayoutIdx) {
|
2019-10-07 16:12:37 +00:00
|
|
|
uint64_t SectionStart = OutputStream->tell();
|
2020-12-14 14:49:20 -08:00
|
|
|
assert(LayoutIdx < SectionHdrLayout.size() && "LayoutIdx out of range");
|
|
|
|
const auto &Entry = SectionHdrLayout[LayoutIdx];
|
|
|
|
assert(Entry.Type == Type && "Unexpected section type");
|
2019-10-07 16:12:37 +00:00
|
|
|
// Use LocalBuf as a temporary output for writting data.
|
2020-03-03 13:19:32 -08:00
|
|
|
if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
|
2019-10-07 16:12:37 +00:00
|
|
|
LocalBufStream.swap(OutputStream);
|
|
|
|
return SectionStart;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::compressAndOutput() {
|
|
|
|
if (!llvm::zlib::isAvailable())
|
|
|
|
return sampleprof_error::zlib_unavailable;
|
|
|
|
std::string &UncompressedStrings =
|
|
|
|
static_cast<raw_string_ostream *>(LocalBufStream.get())->str();
|
|
|
|
if (UncompressedStrings.size() == 0)
|
|
|
|
return sampleprof_error::success;
|
|
|
|
auto &OS = *OutputStream;
|
|
|
|
SmallString<128> CompressedStrings;
|
|
|
|
llvm::Error E = zlib::compress(UncompressedStrings, CompressedStrings,
|
|
|
|
zlib::BestSizeCompression);
|
|
|
|
if (E)
|
|
|
|
return sampleprof_error::compress_failed;
|
|
|
|
encodeULEB128(UncompressedStrings.size(), OS);
|
|
|
|
encodeULEB128(CompressedStrings.size(), OS);
|
|
|
|
OS << CompressedStrings.str();
|
|
|
|
UncompressedStrings.clear();
|
|
|
|
return sampleprof_error::success;
|
2019-08-23 19:05:30 +00:00
|
|
|
}
|
|
|
|
|
2020-12-14 14:49:20 -08:00
|
|
|
/// Add a new section into section header table given the section type
|
|
|
|
/// \p Type, its position \p LayoutIdx in SectionHdrLayout and the
|
|
|
|
/// location \p SectionStart where the section should be written to.
|
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::addNewSection(
|
|
|
|
SecType Type, uint32_t LayoutIdx, uint64_t SectionStart) {
|
|
|
|
assert(LayoutIdx < SectionHdrLayout.size() && "LayoutIdx out of range");
|
|
|
|
const auto &Entry = SectionHdrLayout[LayoutIdx];
|
|
|
|
assert(Entry.Type == Type && "Unexpected section type");
|
2020-03-03 13:19:32 -08:00
|
|
|
if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress)) {
|
2019-10-07 16:12:37 +00:00
|
|
|
LocalBufStream.swap(OutputStream);
|
|
|
|
if (std::error_code EC = compressAndOutput())
|
|
|
|
return EC;
|
|
|
|
}
|
|
|
|
SecHdrTable.push_back({Type, Entry.Flags, SectionStart - FileStart,
|
2020-12-14 14:49:20 -08:00
|
|
|
OutputStream->tell() - SectionStart, LayoutIdx});
|
2019-10-07 16:12:37 +00:00
|
|
|
return sampleprof_error::success;
|
2019-08-23 19:05:30 +00:00
|
|
|
}
|
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
std::error_code
|
|
|
|
SampleProfileWriterExtBinaryBase::write(const SampleProfileMap &ProfileMap) {
|
2019-08-23 19:05:30 +00:00
|
|
|
if (std::error_code EC = writeHeader(ProfileMap))
|
|
|
|
return EC;
|
|
|
|
|
2019-10-07 16:12:37 +00:00
|
|
|
std::string LocalBuf;
|
|
|
|
LocalBufStream = std::make_unique<raw_string_ostream>(LocalBuf);
|
2019-08-23 19:05:30 +00:00
|
|
|
if (std::error_code EC = writeSections(ProfileMap))
|
|
|
|
return EC;
|
|
|
|
|
|
|
|
if (std::error_code EC = writeSecHdrTable())
|
|
|
|
return EC;
|
|
|
|
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::writeContextIdx(
|
|
|
|
const SampleContext &Context) {
|
|
|
|
if (Context.hasContext())
|
|
|
|
return writeCSNameIdx(Context);
|
|
|
|
else
|
|
|
|
return SampleProfileWriterBinary::writeNameIdx(Context.getName());
|
|
|
|
}
|
|
|
|
|
|
|
|
std::error_code
|
|
|
|
SampleProfileWriterExtBinaryBase::writeCSNameIdx(const SampleContext &Context) {
|
|
|
|
const auto &Ret = CSNameTable.find(Context);
|
|
|
|
if (Ret == CSNameTable.end())
|
|
|
|
return sampleprof_error::truncated_name_table;
|
|
|
|
encodeULEB128(Ret->second, *OutputStream);
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
2019-10-09 21:36:03 +00:00
|
|
|
std::error_code
|
2020-10-15 15:17:28 -07:00
|
|
|
SampleProfileWriterExtBinaryBase::writeSample(const FunctionSamples &S) {
|
2019-10-09 21:36:03 +00:00
|
|
|
uint64_t Offset = OutputStream->tell();
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
auto &Context = S.getContext();
|
|
|
|
FuncOffsetTable[Context] = Offset - SecLBRProfileStart;
|
2019-10-09 21:36:03 +00:00
|
|
|
encodeULEB128(S.getHeadSamples(), *OutputStream);
|
|
|
|
return writeBody(S);
|
|
|
|
}
|
|
|
|
|
2020-10-15 15:17:28 -07:00
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::writeFuncOffsetTable() {
|
2019-10-09 21:36:03 +00:00
|
|
|
auto &OS = *OutputStream;
|
|
|
|
|
|
|
|
// Write out the table size.
|
|
|
|
encodeULEB128(FuncOffsetTable.size(), OS);
|
|
|
|
|
|
|
|
// Write out FuncOffsetTable.
|
2021-08-31 16:30:49 -07:00
|
|
|
auto WriteItem = [&](const SampleContext &Context, uint64_t Offset) {
|
|
|
|
if (std::error_code EC = writeContextIdx(Context))
|
2021-04-07 23:06:39 -07:00
|
|
|
return EC;
|
2021-08-31 16:30:49 -07:00
|
|
|
encodeULEB128(Offset, OS);
|
|
|
|
return (std::error_code)sampleprof_error::success;
|
|
|
|
};
|
|
|
|
|
|
|
|
if (FunctionSamples::ProfileIsCS) {
|
|
|
|
// Sort the contexts before writing them out. This is to help fast load all
|
|
|
|
// context profiles for a function as well as their callee contexts which
|
|
|
|
// can help profile-guided importing for ThinLTO.
|
|
|
|
std::map<SampleContext, uint64_t> OrderedFuncOffsetTable(
|
|
|
|
FuncOffsetTable.begin(), FuncOffsetTable.end());
|
|
|
|
for (const auto &Entry : OrderedFuncOffsetTable) {
|
|
|
|
if (std::error_code EC = WriteItem(Entry.first, Entry.second))
|
|
|
|
return EC;
|
|
|
|
}
|
|
|
|
addSectionFlag(SecFuncOffsetTable, SecFuncOffsetFlags::SecFlagOrdered);
|
|
|
|
} else {
|
|
|
|
for (const auto &Entry : FuncOffsetTable) {
|
|
|
|
if (std::error_code EC = WriteItem(Entry.first, Entry.second))
|
|
|
|
return EC;
|
|
|
|
}
|
2019-10-09 21:36:03 +00:00
|
|
|
}
|
2021-08-31 16:30:49 -07:00
|
|
|
|
2020-12-14 14:49:20 -08:00
|
|
|
FuncOffsetTable.clear();
|
2019-10-09 21:36:03 +00:00
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
2020-12-16 12:54:50 -08:00
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata(
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
const SampleProfileMap &Profiles) {
|
2021-02-19 22:46:30 -08:00
|
|
|
if (!FunctionSamples::ProfileIsProbeBased && !FunctionSamples::ProfileIsCS)
|
2020-12-16 12:54:50 -08:00
|
|
|
return sampleprof_error::success;
|
|
|
|
auto &OS = *OutputStream;
|
|
|
|
for (const auto &Entry : Profiles) {
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
if (std::error_code EC = writeContextIdx(Entry.second.getContext()))
|
2021-04-07 23:06:39 -07:00
|
|
|
return EC;
|
2021-02-19 22:46:30 -08:00
|
|
|
if (FunctionSamples::ProfileIsProbeBased)
|
|
|
|
encodeULEB128(Entry.second.getFunctionHash(), OS);
|
|
|
|
if (FunctionSamples::ProfileIsCS)
|
|
|
|
encodeULEB128(Entry.second.getContext().getAllAttributes(), OS);
|
2020-12-16 12:54:50 -08:00
|
|
|
}
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
2020-10-15 15:17:28 -07:00
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::writeNameTable() {
|
2020-03-03 13:19:32 -08:00
|
|
|
if (!UseMD5)
|
|
|
|
return SampleProfileWriterBinary::writeNameTable();
|
|
|
|
|
|
|
|
auto &OS = *OutputStream;
|
|
|
|
std::set<StringRef> V;
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
stablizeNameTable(NameTable, V);
|
2020-03-03 13:19:32 -08:00
|
|
|
|
2020-12-03 12:19:25 -08:00
|
|
|
// Write out the MD5 name table. We wrote unencoded MD5 so reader can
|
|
|
|
// retrieve the name using the name index without having to read the
|
|
|
|
// whole name table.
|
2020-03-03 13:19:32 -08:00
|
|
|
encodeULEB128(NameTable.size(), OS);
|
2020-12-03 12:19:25 -08:00
|
|
|
support::endian::Writer Writer(OS, support::little);
|
|
|
|
for (auto N : V)
|
|
|
|
Writer.write(MD5Hash(N));
|
2020-03-03 13:19:32 -08:00
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
2020-10-15 15:17:28 -07:00
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::writeNameTableSection(
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
const SampleProfileMap &ProfileMap) {
|
2019-08-23 19:05:30 +00:00
|
|
|
for (const auto &I : ProfileMap) {
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
assert(I.first == I.second.getContext() && "Inconsistent profile map");
|
|
|
|
addContext(I.second.getContext());
|
2019-08-23 19:05:30 +00:00
|
|
|
addNames(I.second);
|
|
|
|
}
|
2021-01-19 09:20:13 -08:00
|
|
|
|
|
|
|
// If NameTable contains ".__uniq." suffix, set SecFlagUniqSuffix flag
|
|
|
|
// so compiler won't strip the suffix during profile matching after
|
|
|
|
// seeing the flag in the profile.
|
|
|
|
for (const auto &I : NameTable) {
|
2021-10-23 08:45:27 -07:00
|
|
|
if (I.first.contains(FunctionSamples::UniqSuffix)) {
|
2021-01-19 09:20:13 -08:00
|
|
|
addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagUniqSuffix);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-15 15:17:28 -07:00
|
|
|
if (auto EC = writeNameTable())
|
2019-10-07 16:12:37 +00:00
|
|
|
return EC;
|
2020-10-15 15:17:28 -07:00
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
2019-08-23 19:05:30 +00:00
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::writeCSNameTableSection() {
|
|
|
|
// Sort the names to make CSNameTable deterministic.
|
|
|
|
std::set<SampleContext> OrderedContexts;
|
|
|
|
for (const auto &I : CSNameTable)
|
|
|
|
OrderedContexts.insert(I.first);
|
|
|
|
assert(OrderedContexts.size() == CSNameTable.size() &&
|
|
|
|
"Unmatched ordered and unordered contexts");
|
|
|
|
uint64_t I = 0;
|
|
|
|
for (auto &Context : OrderedContexts)
|
|
|
|
CSNameTable[Context] = I++;
|
|
|
|
|
|
|
|
auto &OS = *OutputStream;
|
|
|
|
encodeULEB128(OrderedContexts.size(), OS);
|
|
|
|
support::endian::Writer Writer(OS, support::little);
|
|
|
|
for (auto Context : OrderedContexts) {
|
|
|
|
auto Frames = Context.getContextFrames();
|
|
|
|
encodeULEB128(Frames.size(), OS);
|
|
|
|
for (auto &Callsite : Frames) {
|
2021-10-01 16:51:38 -07:00
|
|
|
if (std::error_code EC = writeNameIdx(Callsite.FuncName))
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
return EC;
|
2021-10-01 16:51:38 -07:00
|
|
|
encodeULEB128(Callsite.Location.LineOffset, OS);
|
|
|
|
encodeULEB128(Callsite.Location.Discriminator, OS);
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
2020-10-15 15:17:28 -07:00
|
|
|
std::error_code
|
|
|
|
SampleProfileWriterExtBinaryBase::writeProfileSymbolListSection() {
|
|
|
|
if (ProfSymList && ProfSymList->size() > 0)
|
|
|
|
if (std::error_code EC = ProfSymList->write(*OutputStream))
|
|
|
|
return EC;
|
|
|
|
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
2019-10-07 16:12:37 +00:00
|
|
|
|
2020-10-15 15:17:28 -07:00
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
SecType Type, uint32_t LayoutIdx, const SampleProfileMap &ProfileMap) {
|
2020-10-15 15:17:28 -07:00
|
|
|
// The setting of SecFlagCompress should happen before markSectionStart.
|
|
|
|
if (Type == SecProfileSymbolList && ProfSymList && ProfSymList->toCompress())
|
2019-10-07 16:12:37 +00:00
|
|
|
setToCompressSection(SecProfileSymbolList);
|
2020-12-16 12:54:50 -08:00
|
|
|
if (Type == SecFuncMetadata && FunctionSamples::ProfileIsProbeBased)
|
|
|
|
addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagIsProbeBased);
|
[CSSPGO] Load context profile for external functions in PreLink and populate ThinLTO import list
For ThinLTO's prelink compilation, we need to put external inline candidates into an import list attached to function's entry count metadata. This enables ThinLink to treat such cross module callee as hot in summary index, and later helps postlink to import them for profile guided cross module inlining.
For AutoFDO, the import list is retrieved by traversing the nested inlinee functions. For CSSPGO, since profile is flatterned, a few things need to happen for it to work:
- When loading input profile in extended binary format, we need to load all child context profile whose parent is in current module, so context trie for current module includes potential cross module inlinee.
- In order to make the above happen, we need to know whether input profile is CSSPGO profile before start reading function profile, hence a flag for profile summary section is added.
- When searching for cross module inline candidate, we need to walk through the context trie instead of nested inlinee profile (callsite sample of AutoFDO profile).
- Now that we have more accurate counts with CSSPGO, we swtiched to use entry count instead of total count to decided if an external callee is potentially beneficial to inline. This make it consistent with how we determine whether call tagert is potential inline candidate.
Differential Revision: https://reviews.llvm.org/D98590
2021-03-13 13:55:28 -08:00
|
|
|
if (Type == SecProfSummary && FunctionSamples::ProfileIsCS)
|
|
|
|
addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFullContext);
|
2021-02-19 22:46:30 -08:00
|
|
|
if (Type == SecFuncMetadata && FunctionSamples::ProfileIsCS)
|
|
|
|
addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagHasAttribute);
|
2021-05-27 11:34:22 -07:00
|
|
|
if (Type == SecProfSummary && FunctionSamples::ProfileIsFS)
|
|
|
|
addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator);
|
2019-08-31 02:27:26 +00:00
|
|
|
|
2020-12-14 14:49:20 -08:00
|
|
|
uint64_t SectionStart = markSectionStart(Type, LayoutIdx);
|
2020-10-15 15:17:28 -07:00
|
|
|
switch (Type) {
|
|
|
|
case SecProfSummary:
|
|
|
|
computeSummary(ProfileMap);
|
|
|
|
if (auto EC = writeSummary())
|
|
|
|
return EC;
|
|
|
|
break;
|
|
|
|
case SecNameTable:
|
|
|
|
if (auto EC = writeNameTableSection(ProfileMap))
|
|
|
|
return EC;
|
|
|
|
break;
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
case SecCSNameTable:
|
|
|
|
if (auto EC = writeCSNameTableSection())
|
|
|
|
return EC;
|
|
|
|
break;
|
2020-10-15 15:17:28 -07:00
|
|
|
case SecLBRProfile:
|
|
|
|
SecLBRProfileStart = OutputStream->tell();
|
|
|
|
if (std::error_code EC = writeFuncProfiles(ProfileMap))
|
2019-08-31 02:27:26 +00:00
|
|
|
return EC;
|
2020-10-15 15:17:28 -07:00
|
|
|
break;
|
|
|
|
case SecFuncOffsetTable:
|
|
|
|
if (auto EC = writeFuncOffsetTable())
|
|
|
|
return EC;
|
|
|
|
break;
|
2020-12-16 12:54:50 -08:00
|
|
|
case SecFuncMetadata:
|
|
|
|
if (std::error_code EC = writeFuncMetadata(ProfileMap))
|
|
|
|
return EC;
|
|
|
|
break;
|
2020-10-15 15:17:28 -07:00
|
|
|
case SecProfileSymbolList:
|
|
|
|
if (auto EC = writeProfileSymbolListSection())
|
|
|
|
return EC;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
if (auto EC = writeCustomSection(Type))
|
|
|
|
return EC;
|
|
|
|
break;
|
|
|
|
}
|
2020-12-14 14:49:20 -08:00
|
|
|
if (std::error_code EC = addNewSection(Type, LayoutIdx, SectionStart))
|
2019-10-07 16:12:37 +00:00
|
|
|
return EC;
|
2020-10-15 15:17:28 -07:00
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
2019-08-23 19:05:30 +00:00
|
|
|
|
2021-01-05 23:24:43 -08:00
|
|
|
std::error_code SampleProfileWriterExtBinary::writeDefaultLayout(
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
const SampleProfileMap &ProfileMap) {
|
2020-12-14 14:49:20 -08:00
|
|
|
// The const indices passed to writeOneSection below are specifying the
|
|
|
|
// positions of the sections in SectionHdrLayout. Look at
|
|
|
|
// initSectionHdrLayout to find out where each section is located in
|
|
|
|
// SectionHdrLayout.
|
|
|
|
if (auto EC = writeOneSection(SecProfSummary, 0, ProfileMap))
|
2019-10-09 21:36:03 +00:00
|
|
|
return EC;
|
2020-12-14 14:49:20 -08:00
|
|
|
if (auto EC = writeOneSection(SecNameTable, 1, ProfileMap))
|
2020-10-15 15:17:28 -07:00
|
|
|
return EC;
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
if (auto EC = writeOneSection(SecCSNameTable, 2, ProfileMap))
|
2020-10-15 15:17:28 -07:00
|
|
|
return EC;
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
if (auto EC = writeOneSection(SecLBRProfile, 4, ProfileMap))
|
2020-10-15 15:17:28 -07:00
|
|
|
return EC;
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
if (auto EC = writeOneSection(SecProfileSymbolList, 5, ProfileMap))
|
2019-10-09 21:36:03 +00:00
|
|
|
return EC;
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
if (auto EC = writeOneSection(SecFuncOffsetTable, 3, ProfileMap))
|
|
|
|
return EC;
|
|
|
|
if (auto EC = writeOneSection(SecFuncMetadata, 6, ProfileMap))
|
2020-12-16 12:54:50 -08:00
|
|
|
return EC;
|
2019-08-23 19:05:30 +00:00
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
static void splitProfileMapToTwo(const SampleProfileMap &ProfileMap,
|
|
|
|
SampleProfileMap &ContextProfileMap,
|
|
|
|
SampleProfileMap &NoContextProfileMap) {
|
2021-01-05 23:24:43 -08:00
|
|
|
for (const auto &I : ProfileMap) {
|
|
|
|
if (I.second.getCallsiteSamples().size())
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
ContextProfileMap.insert({I.first, I.second});
|
2021-01-05 23:24:43 -08:00
|
|
|
else
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
NoContextProfileMap.insert({I.first, I.second});
|
2021-01-05 23:24:43 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::error_code SampleProfileWriterExtBinary::writeCtxSplitLayout(
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
const SampleProfileMap &ProfileMap) {
|
|
|
|
SampleProfileMap ContextProfileMap, NoContextProfileMap;
|
2021-01-05 23:24:43 -08:00
|
|
|
splitProfileMapToTwo(ProfileMap, ContextProfileMap, NoContextProfileMap);
|
|
|
|
|
|
|
|
if (auto EC = writeOneSection(SecProfSummary, 0, ProfileMap))
|
|
|
|
return EC;
|
|
|
|
if (auto EC = writeOneSection(SecNameTable, 1, ProfileMap))
|
|
|
|
return EC;
|
|
|
|
if (auto EC = writeOneSection(SecLBRProfile, 3, ContextProfileMap))
|
|
|
|
return EC;
|
|
|
|
if (auto EC = writeOneSection(SecFuncOffsetTable, 2, ContextProfileMap))
|
|
|
|
return EC;
|
|
|
|
// Mark the section to have no context. Note section flag needs to be set
|
|
|
|
// before writing the section.
|
|
|
|
addSectionFlag(5, SecCommonFlags::SecFlagFlat);
|
|
|
|
if (auto EC = writeOneSection(SecLBRProfile, 5, NoContextProfileMap))
|
|
|
|
return EC;
|
|
|
|
// Mark the section to have no context. Note section flag needs to be set
|
|
|
|
// before writing the section.
|
|
|
|
addSectionFlag(4, SecCommonFlags::SecFlagFlat);
|
|
|
|
if (auto EC = writeOneSection(SecFuncOffsetTable, 4, NoContextProfileMap))
|
|
|
|
return EC;
|
|
|
|
if (auto EC = writeOneSection(SecProfileSymbolList, 6, ProfileMap))
|
|
|
|
return EC;
|
|
|
|
if (auto EC = writeOneSection(SecFuncMetadata, 7, ProfileMap))
|
|
|
|
return EC;
|
|
|
|
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::error_code SampleProfileWriterExtBinary::writeSections(
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
const SampleProfileMap &ProfileMap) {
|
2021-01-05 23:24:43 -08:00
|
|
|
std::error_code EC;
|
|
|
|
if (SecLayout == DefaultLayout)
|
|
|
|
EC = writeDefaultLayout(ProfileMap);
|
|
|
|
else if (SecLayout == CtxSplitLayout)
|
|
|
|
EC = writeCtxSplitLayout(ProfileMap);
|
|
|
|
else
|
|
|
|
llvm_unreachable("Unsupported layout");
|
|
|
|
return EC;
|
|
|
|
}
|
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
std::error_code
|
|
|
|
SampleProfileWriterCompactBinary::write(const SampleProfileMap &ProfileMap) {
|
2018-09-14 20:52:59 +00:00
|
|
|
if (std::error_code EC = SampleProfileWriter::write(ProfileMap))
|
|
|
|
return EC;
|
|
|
|
if (std::error_code EC = writeFuncOffsetTable())
|
|
|
|
return EC;
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
2018-05-01 15:54:18 +00:00
|
|
|
/// Write samples to a text file.
|
2015-11-13 20:24:28 +00:00
|
|
|
///
|
|
|
|
/// Note: it may be tempting to implement this in terms of
|
2015-11-19 15:33:08 +00:00
|
|
|
/// FunctionSamples::print(). Please don't. The dump functionality is intended
|
2015-11-13 20:24:28 +00:00
|
|
|
/// for debugging and has no specified form.
|
|
|
|
///
|
|
|
|
/// The format used here is more structured and deliberate because
|
|
|
|
/// it needs to be parsed by the SampleProfileReaderText class.
|
2019-08-23 19:05:30 +00:00
|
|
|
std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
|
2015-12-10 17:21:42 +00:00
|
|
|
auto &OS = *OutputStream;
|
2021-04-07 23:06:39 -07:00
|
|
|
if (FunctionSamples::ProfileIsCS)
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
OS << "[" << S.getContext().toString() << "]:" << S.getTotalSamples();
|
2021-04-07 23:06:39 -07:00
|
|
|
else
|
|
|
|
OS << S.getName() << ":" << S.getTotalSamples();
|
|
|
|
|
2015-10-08 19:40:37 +00:00
|
|
|
if (Indent == 0)
|
|
|
|
OS << ":" << S.getHeadSamples();
|
|
|
|
OS << "\n";
|
2014-10-30 18:00:06 +00:00
|
|
|
|
2015-11-19 15:33:08 +00:00
|
|
|
SampleSorter<LineLocation, SampleRecord> SortedSamples(S.getBodySamples());
|
|
|
|
for (const auto &I : SortedSamples.get()) {
|
|
|
|
LineLocation Loc = I->first;
|
|
|
|
const SampleRecord &Sample = I->second;
|
2015-10-08 19:40:37 +00:00
|
|
|
OS.indent(Indent + 1);
|
2014-10-30 18:00:06 +00:00
|
|
|
if (Loc.Discriminator == 0)
|
|
|
|
OS << Loc.LineOffset << ": ";
|
|
|
|
else
|
|
|
|
OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
|
|
|
|
|
|
|
|
OS << Sample.getSamples();
|
|
|
|
|
2019-08-20 20:52:00 +00:00
|
|
|
for (const auto &J : Sample.getSortedCallTargets())
|
|
|
|
OS << " " << J.first << ":" << J.second;
|
2014-10-30 18:00:06 +00:00
|
|
|
OS << "\n";
|
|
|
|
}
|
|
|
|
|
2017-04-13 19:52:10 +00:00
|
|
|
SampleSorter<LineLocation, FunctionSamplesMap> SortedCallsiteSamples(
|
2015-11-19 15:33:08 +00:00
|
|
|
S.getCallsiteSamples());
|
2015-10-08 19:40:37 +00:00
|
|
|
Indent += 1;
|
2017-04-13 19:52:10 +00:00
|
|
|
for (const auto &I : SortedCallsiteSamples.get())
|
|
|
|
for (const auto &FS : I->second) {
|
|
|
|
LineLocation Loc = I->first;
|
|
|
|
const FunctionSamples &CalleeSamples = FS.second;
|
|
|
|
OS.indent(Indent);
|
|
|
|
if (Loc.Discriminator == 0)
|
|
|
|
OS << Loc.LineOffset << ": ";
|
|
|
|
else
|
|
|
|
OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
|
2019-08-23 19:05:30 +00:00
|
|
|
if (std::error_code EC = writeSample(CalleeSamples))
|
2017-04-13 19:52:10 +00:00
|
|
|
return EC;
|
|
|
|
}
|
2015-10-08 19:40:37 +00:00
|
|
|
Indent -= 1;
|
|
|
|
|
2020-12-16 12:54:50 -08:00
|
|
|
if (Indent == 0) {
|
|
|
|
if (FunctionSamples::ProfileIsProbeBased) {
|
|
|
|
OS.indent(Indent + 1);
|
|
|
|
OS << "!CFGChecksum: " << S.getFunctionHash() << "\n";
|
|
|
|
}
|
2021-02-19 22:46:30 -08:00
|
|
|
if (FunctionSamples::ProfileIsCS) {
|
|
|
|
OS.indent(Indent + 1);
|
|
|
|
OS << "!Attributes: " << S.getContext().getAllAttributes() << "\n";
|
|
|
|
}
|
2020-12-16 12:54:50 -08:00
|
|
|
}
|
|
|
|
|
2015-10-13 22:48:46 +00:00
|
|
|
return sampleprof_error::success;
|
2014-10-30 18:00:06 +00:00
|
|
|
}
|
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
std::error_code
|
|
|
|
SampleProfileWriterBinary::writeContextIdx(const SampleContext &Context) {
|
|
|
|
assert(!Context.hasContext() && "cs profile is not supported");
|
|
|
|
return writeNameIdx(Context.getName());
|
|
|
|
}
|
2021-04-07 23:06:39 -07:00
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName) {
|
|
|
|
auto &NTable = getNameTable();
|
|
|
|
const auto &Ret = NTable.find(FName);
|
|
|
|
if (Ret == NTable.end())
|
2015-10-13 22:48:46 +00:00
|
|
|
return sampleprof_error::truncated_name_table;
|
2021-04-07 23:06:39 -07:00
|
|
|
encodeULEB128(Ret->second, *OutputStream);
|
2015-10-13 22:48:46 +00:00
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
void SampleProfileWriterBinary::addName(StringRef FName) {
|
|
|
|
auto &NTable = getNameTable();
|
|
|
|
NTable.insert(std::make_pair(FName, 0));
|
|
|
|
}
|
|
|
|
|
|
|
|
void SampleProfileWriterBinary::addContext(const SampleContext &Context) {
|
|
|
|
addName(Context.getName());
|
2015-10-13 22:48:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void SampleProfileWriterBinary::addNames(const FunctionSamples &S) {
|
|
|
|
// Add all the names in indirect call targets.
|
|
|
|
for (const auto &I : S.getBodySamples()) {
|
|
|
|
const SampleRecord &Sample = I.second;
|
|
|
|
for (const auto &J : Sample.getCallTargets())
|
|
|
|
addName(J.first());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Recursively add all the names for inlined callsites.
|
2017-04-13 19:52:10 +00:00
|
|
|
for (const auto &J : S.getCallsiteSamples())
|
|
|
|
for (const auto &FS : J.second) {
|
|
|
|
const FunctionSamples &CalleeSamples = FS.second;
|
|
|
|
addName(CalleeSamples.getName());
|
|
|
|
addNames(CalleeSamples);
|
|
|
|
}
|
2015-10-13 22:48:46 +00:00
|
|
|
}
|
2014-10-30 18:00:06 +00:00
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
void SampleProfileWriterExtBinaryBase::addContext(
|
|
|
|
const SampleContext &Context) {
|
|
|
|
if (Context.hasContext()) {
|
|
|
|
for (auto &Callsite : Context.getContextFrames())
|
2021-10-01 16:51:38 -07:00
|
|
|
SampleProfileWriterBinary::addName(Callsite.FuncName);
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
CSNameTable.insert(std::make_pair(Context, 0));
|
|
|
|
} else {
|
|
|
|
SampleProfileWriterBinary::addName(Context.getName());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void SampleProfileWriterBinary::stablizeNameTable(
|
|
|
|
MapVector<StringRef, uint32_t> &NameTable, std::set<StringRef> &V) {
|
2018-06-11 22:40:43 +00:00
|
|
|
// Sort the names to make NameTable deterministic.
|
|
|
|
for (const auto &I : NameTable)
|
|
|
|
V.insert(I.first);
|
|
|
|
int i = 0;
|
|
|
|
for (const StringRef &N : V)
|
|
|
|
NameTable[N] = i++;
|
|
|
|
}
|
|
|
|
|
2019-08-23 19:05:30 +00:00
|
|
|
std::error_code SampleProfileWriterBinary::writeNameTable() {
|
2015-12-10 17:21:42 +00:00
|
|
|
auto &OS = *OutputStream;
|
2018-06-11 22:40:43 +00:00
|
|
|
std::set<StringRef> V;
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
stablizeNameTable(NameTable, V);
|
2018-06-11 22:40:43 +00:00
|
|
|
|
|
|
|
// Write out the name table.
|
|
|
|
encodeULEB128(NameTable.size(), OS);
|
|
|
|
for (auto N : V) {
|
|
|
|
OS << N;
|
|
|
|
encodeULEB128(0, OS);
|
|
|
|
}
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
2018-09-14 20:52:59 +00:00
|
|
|
std::error_code SampleProfileWriterCompactBinary::writeFuncOffsetTable() {
|
|
|
|
auto &OS = *OutputStream;
|
|
|
|
|
|
|
|
// Fill the slot remembered by TableOffset with the offset of FuncOffsetTable.
|
|
|
|
auto &OFS = static_cast<raw_fd_ostream &>(OS);
|
|
|
|
uint64_t FuncOffsetTableStart = OS.tell();
|
|
|
|
if (OFS.seek(TableOffset) == (uint64_t)-1)
|
|
|
|
return sampleprof_error::ostream_seek_unsupported;
|
|
|
|
support::endian::Writer Writer(*OutputStream, support::little);
|
|
|
|
Writer.write(FuncOffsetTableStart);
|
|
|
|
if (OFS.seek(FuncOffsetTableStart) == (uint64_t)-1)
|
|
|
|
return sampleprof_error::ostream_seek_unsupported;
|
|
|
|
|
|
|
|
// Write out the table size.
|
|
|
|
encodeULEB128(FuncOffsetTable.size(), OS);
|
|
|
|
|
|
|
|
// Write out FuncOffsetTable.
|
2021-04-07 23:06:39 -07:00
|
|
|
for (auto Entry : FuncOffsetTable) {
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
if (std::error_code EC = writeNameIdx(Entry.first))
|
2021-04-07 23:06:39 -07:00
|
|
|
return EC;
|
|
|
|
encodeULEB128(Entry.second, OS);
|
2018-09-14 20:52:59 +00:00
|
|
|
}
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
2018-06-11 22:40:43 +00:00
|
|
|
std::error_code SampleProfileWriterCompactBinary::writeNameTable() {
|
|
|
|
auto &OS = *OutputStream;
|
|
|
|
std::set<StringRef> V;
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
stablizeNameTable(NameTable, V);
|
2018-06-11 22:40:43 +00:00
|
|
|
|
|
|
|
// Write out the name table.
|
|
|
|
encodeULEB128(NameTable.size(), OS);
|
|
|
|
for (auto N : V) {
|
|
|
|
encodeULEB128(MD5Hash(N), OS);
|
|
|
|
}
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
2015-12-10 17:21:42 +00:00
|
|
|
|
2019-08-23 19:05:30 +00:00
|
|
|
std::error_code
|
|
|
|
SampleProfileWriterBinary::writeMagicIdent(SampleProfileFormat Format) {
|
2018-06-11 22:40:43 +00:00
|
|
|
auto &OS = *OutputStream;
|
|
|
|
// Write file magic identifier.
|
2019-08-23 19:05:30 +00:00
|
|
|
encodeULEB128(SPMagic(Format), OS);
|
2018-06-11 22:40:43 +00:00
|
|
|
encodeULEB128(SPVersion(), OS);
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
std::error_code
|
|
|
|
SampleProfileWriterBinary::writeHeader(const SampleProfileMap &ProfileMap) {
|
2019-08-23 19:05:30 +00:00
|
|
|
writeMagicIdent(Format);
|
2015-10-13 22:48:46 +00:00
|
|
|
|
2016-02-19 03:15:33 +00:00
|
|
|
computeSummary(ProfileMap);
|
|
|
|
if (auto EC = writeSummary())
|
|
|
|
return EC;
|
|
|
|
|
2015-10-13 22:48:46 +00:00
|
|
|
// Generate the name table for all the functions referenced in the profile.
|
|
|
|
for (const auto &I : ProfileMap) {
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
assert(I.first == I.second.getContext() && "Inconsistent profile map");
|
|
|
|
addContext(I.first);
|
2015-10-13 22:48:46 +00:00
|
|
|
addNames(I.second);
|
|
|
|
}
|
|
|
|
|
2018-06-11 22:40:43 +00:00
|
|
|
writeNameTable();
|
2015-10-13 22:48:46 +00:00
|
|
|
return sampleprof_error::success;
|
2014-10-30 18:00:06 +00:00
|
|
|
}
|
|
|
|
|
2019-10-07 16:12:37 +00:00
|
|
|
void SampleProfileWriterExtBinaryBase::setToCompressAllSections() {
|
2019-10-09 21:36:03 +00:00
|
|
|
for (auto &Entry : SectionHdrLayout)
|
2020-03-03 13:19:32 -08:00
|
|
|
addSecFlag(Entry, SecCommonFlags::SecFlagCompress);
|
2019-10-07 16:12:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void SampleProfileWriterExtBinaryBase::setToCompressSection(SecType Type) {
|
2020-03-03 13:19:32 -08:00
|
|
|
addSectionFlag(Type, SecCommonFlags::SecFlagCompress);
|
2019-10-07 16:12:37 +00:00
|
|
|
}
|
|
|
|
|
2019-08-23 19:05:30 +00:00
|
|
|
void SampleProfileWriterExtBinaryBase::allocSecHdrTable() {
|
|
|
|
support::endian::Writer Writer(*OutputStream, support::little);
|
|
|
|
|
2019-10-09 21:36:03 +00:00
|
|
|
Writer.write(static_cast<uint64_t>(SectionHdrLayout.size()));
|
2019-08-23 19:05:30 +00:00
|
|
|
SecHdrTableOffset = OutputStream->tell();
|
2019-10-09 21:36:03 +00:00
|
|
|
for (uint32_t i = 0; i < SectionHdrLayout.size(); i++) {
|
2019-08-23 19:05:30 +00:00
|
|
|
Writer.write(static_cast<uint64_t>(-1));
|
|
|
|
Writer.write(static_cast<uint64_t>(-1));
|
|
|
|
Writer.write(static_cast<uint64_t>(-1));
|
|
|
|
Writer.write(static_cast<uint64_t>(-1));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() {
|
|
|
|
auto &OFS = static_cast<raw_fd_ostream &>(*OutputStream);
|
|
|
|
uint64_t Saved = OutputStream->tell();
|
|
|
|
|
|
|
|
// Set OutputStream to the location saved in SecHdrTableOffset.
|
|
|
|
if (OFS.seek(SecHdrTableOffset) == (uint64_t)-1)
|
|
|
|
return sampleprof_error::ostream_seek_unsupported;
|
|
|
|
support::endian::Writer Writer(*OutputStream, support::little);
|
|
|
|
|
2020-12-14 14:49:20 -08:00
|
|
|
assert(SecHdrTable.size() == SectionHdrLayout.size() &&
|
|
|
|
"SecHdrTable entries doesn't match SectionHdrLayout");
|
|
|
|
SmallVector<uint32_t, 16> IndexMap(SecHdrTable.size(), -1);
|
|
|
|
for (uint32_t TableIdx = 0; TableIdx < SecHdrTable.size(); TableIdx++) {
|
|
|
|
IndexMap[SecHdrTable[TableIdx].LayoutIndex] = TableIdx;
|
2019-08-23 19:05:30 +00:00
|
|
|
}
|
|
|
|
|
2019-10-09 21:36:03 +00:00
|
|
|
// Write the section header table in the order specified in
|
2020-12-14 14:49:20 -08:00
|
|
|
// SectionHdrLayout. SectionHdrLayout specifies the sections
|
|
|
|
// order in which profile reader expect to read, so the section
|
|
|
|
// header table should be written in the order in SectionHdrLayout.
|
|
|
|
// Note that the section order in SecHdrTable may be different
|
|
|
|
// from the order in SectionHdrLayout, for example, SecFuncOffsetTable
|
|
|
|
// needs to be computed after SecLBRProfile (the order in SecHdrTable),
|
|
|
|
// but it needs to be read before SecLBRProfile (the order in
|
|
|
|
// SectionHdrLayout). So we use IndexMap above to switch the order.
|
|
|
|
for (uint32_t LayoutIdx = 0; LayoutIdx < SectionHdrLayout.size();
|
|
|
|
LayoutIdx++) {
|
|
|
|
assert(IndexMap[LayoutIdx] < SecHdrTable.size() &&
|
|
|
|
"Incorrect LayoutIdx in SecHdrTable");
|
|
|
|
auto Entry = SecHdrTable[IndexMap[LayoutIdx]];
|
|
|
|
Writer.write(static_cast<uint64_t>(Entry.Type));
|
|
|
|
Writer.write(static_cast<uint64_t>(Entry.Flags));
|
|
|
|
Writer.write(static_cast<uint64_t>(Entry.Offset));
|
|
|
|
Writer.write(static_cast<uint64_t>(Entry.Size));
|
2019-08-23 19:05:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Reset OutputStream.
|
|
|
|
if (OFS.seek(Saved) == (uint64_t)-1)
|
|
|
|
return sampleprof_error::ostream_seek_unsupported;
|
|
|
|
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::error_code SampleProfileWriterExtBinaryBase::writeHeader(
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
const SampleProfileMap &ProfileMap) {
|
2019-08-23 19:05:30 +00:00
|
|
|
auto &OS = *OutputStream;
|
|
|
|
FileStart = OS.tell();
|
|
|
|
writeMagicIdent(Format);
|
|
|
|
|
|
|
|
allocSecHdrTable();
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
2018-09-14 20:52:59 +00:00
|
|
|
std::error_code SampleProfileWriterCompactBinary::writeHeader(
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
const SampleProfileMap &ProfileMap) {
|
2018-09-14 20:52:59 +00:00
|
|
|
support::endian::Writer Writer(*OutputStream, support::little);
|
|
|
|
if (auto EC = SampleProfileWriterBinary::writeHeader(ProfileMap))
|
|
|
|
return EC;
|
|
|
|
|
|
|
|
// Reserve a slot for the offset of function offset table. The slot will
|
|
|
|
// be populated with the offset of FuncOffsetTable later.
|
|
|
|
TableOffset = OutputStream->tell();
|
|
|
|
Writer.write(static_cast<uint64_t>(-2));
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
|
|
|
|
2016-02-19 03:15:33 +00:00
|
|
|
std::error_code SampleProfileWriterBinary::writeSummary() {
|
|
|
|
auto &OS = *OutputStream;
|
2016-05-19 21:53:28 +00:00
|
|
|
encodeULEB128(Summary->getTotalCount(), OS);
|
|
|
|
encodeULEB128(Summary->getMaxCount(), OS);
|
2016-03-28 23:14:29 +00:00
|
|
|
encodeULEB128(Summary->getMaxFunctionCount(), OS);
|
2016-05-19 21:53:28 +00:00
|
|
|
encodeULEB128(Summary->getNumCounts(), OS);
|
2016-02-19 03:15:33 +00:00
|
|
|
encodeULEB128(Summary->getNumFunctions(), OS);
|
|
|
|
std::vector<ProfileSummaryEntry> &Entries = Summary->getDetailedSummary();
|
|
|
|
encodeULEB128(Entries.size(), OS);
|
|
|
|
for (auto Entry : Entries) {
|
|
|
|
encodeULEB128(Entry.Cutoff, OS);
|
|
|
|
encodeULEB128(Entry.MinCount, OS);
|
|
|
|
encodeULEB128(Entry.NumCounts, OS);
|
|
|
|
}
|
|
|
|
return sampleprof_error::success;
|
|
|
|
}
|
2016-03-03 18:09:32 +00:00
|
|
|
std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) {
|
2015-12-10 17:21:42 +00:00
|
|
|
auto &OS = *OutputStream;
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
if (std::error_code EC = writeContextIdx(S.getContext()))
|
2015-10-13 22:48:46 +00:00
|
|
|
return EC;
|
|
|
|
|
2014-10-30 18:00:06 +00:00
|
|
|
encodeULEB128(S.getTotalSamples(), OS);
|
2015-10-09 17:54:24 +00:00
|
|
|
|
|
|
|
// Emit all the body samples.
|
2015-10-16 18:54:35 +00:00
|
|
|
encodeULEB128(S.getBodySamples().size(), OS);
|
2014-11-01 00:56:55 +00:00
|
|
|
for (const auto &I : S.getBodySamples()) {
|
|
|
|
LineLocation Loc = I.first;
|
|
|
|
const SampleRecord &Sample = I.second;
|
2014-10-30 18:00:06 +00:00
|
|
|
encodeULEB128(Loc.LineOffset, OS);
|
|
|
|
encodeULEB128(Loc.Discriminator, OS);
|
|
|
|
encodeULEB128(Sample.getSamples(), OS);
|
|
|
|
encodeULEB128(Sample.getCallTargets().size(), OS);
|
2019-08-20 20:52:00 +00:00
|
|
|
for (const auto &J : Sample.getSortedCallTargets()) {
|
|
|
|
StringRef Callee = J.first;
|
2015-10-15 16:36:21 +00:00
|
|
|
uint64_t CalleeSamples = J.second;
|
2015-10-13 22:48:46 +00:00
|
|
|
if (std::error_code EC = writeNameIdx(Callee))
|
|
|
|
return EC;
|
2014-10-30 18:00:06 +00:00
|
|
|
encodeULEB128(CalleeSamples, OS);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-10-09 17:54:24 +00:00
|
|
|
// Recursively emit all the callsite samples.
|
2017-08-03 00:09:18 +00:00
|
|
|
uint64_t NumCallsites = 0;
|
|
|
|
for (const auto &J : S.getCallsiteSamples())
|
|
|
|
NumCallsites += J.second.size();
|
|
|
|
encodeULEB128(NumCallsites, OS);
|
2017-04-13 19:52:10 +00:00
|
|
|
for (const auto &J : S.getCallsiteSamples())
|
|
|
|
for (const auto &FS : J.second) {
|
|
|
|
LineLocation Loc = J.first;
|
|
|
|
const FunctionSamples &CalleeSamples = FS.second;
|
|
|
|
encodeULEB128(Loc.LineOffset, OS);
|
|
|
|
encodeULEB128(Loc.Discriminator, OS);
|
|
|
|
if (std::error_code EC = writeBody(CalleeSamples))
|
|
|
|
return EC;
|
|
|
|
}
|
2015-10-09 17:54:24 +00:00
|
|
|
|
2015-10-13 22:48:46 +00:00
|
|
|
return sampleprof_error::success;
|
2014-10-30 18:00:06 +00:00
|
|
|
}
|
2014-11-01 00:56:55 +00:00
|
|
|
|
2018-05-01 15:54:18 +00:00
|
|
|
/// Write samples of a top-level function to a binary file.
|
2015-10-16 18:54:35 +00:00
|
|
|
///
|
|
|
|
/// \returns true if the samples were written successfully, false otherwise.
|
2019-08-23 19:05:30 +00:00
|
|
|
std::error_code
|
|
|
|
SampleProfileWriterBinary::writeSample(const FunctionSamples &S) {
|
2015-12-10 17:21:42 +00:00
|
|
|
encodeULEB128(S.getHeadSamples(), *OutputStream);
|
2018-09-14 20:52:59 +00:00
|
|
|
return writeBody(S);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::error_code
|
2019-08-23 19:05:30 +00:00
|
|
|
SampleProfileWriterCompactBinary::writeSample(const FunctionSamples &S) {
|
2018-09-14 20:52:59 +00:00
|
|
|
uint64_t Offset = OutputStream->tell();
|
|
|
|
StringRef Name = S.getName();
|
|
|
|
FuncOffsetTable[Name] = Offset;
|
|
|
|
encodeULEB128(S.getHeadSamples(), *OutputStream);
|
2016-03-03 18:09:32 +00:00
|
|
|
return writeBody(S);
|
2015-10-16 18:54:35 +00:00
|
|
|
}
|
|
|
|
|
2018-05-01 15:54:18 +00:00
|
|
|
/// Create a sample profile file writer based on the specified format.
|
2014-11-01 00:56:55 +00:00
|
|
|
///
|
|
|
|
/// \param Filename The file to create.
|
|
|
|
///
|
|
|
|
/// \param Format Encoding format for the profile file.
|
|
|
|
///
|
|
|
|
/// \returns an error code indicating the status of the created writer.
|
2014-11-03 00:51:45 +00:00
|
|
|
ErrorOr<std::unique_ptr<SampleProfileWriter>>
|
|
|
|
SampleProfileWriter::create(StringRef Filename, SampleProfileFormat Format) {
|
2014-11-01 00:56:55 +00:00
|
|
|
std::error_code EC;
|
2015-12-10 17:21:42 +00:00
|
|
|
std::unique_ptr<raw_ostream> OS;
|
2019-08-23 19:05:30 +00:00
|
|
|
if (Format == SPF_Binary || Format == SPF_Ext_Binary ||
|
|
|
|
Format == SPF_Compact_Binary)
|
2019-08-05 05:43:48 +00:00
|
|
|
OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::OF_None));
|
2015-12-10 17:21:42 +00:00
|
|
|
else
|
2021-04-06 07:22:41 -04:00
|
|
|
OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::OF_TextWithCRLF));
|
2015-12-10 17:21:42 +00:00
|
|
|
if (EC)
|
|
|
|
return EC;
|
|
|
|
|
|
|
|
return create(OS, Format);
|
|
|
|
}
|
|
|
|
|
2018-05-01 15:54:18 +00:00
|
|
|
/// Create a sample profile stream writer based on the specified format.
|
2015-12-10 17:21:42 +00:00
|
|
|
///
|
|
|
|
/// \param OS The output stream to store the profile data to.
|
|
|
|
///
|
|
|
|
/// \param Format Encoding format for the profile file.
|
|
|
|
///
|
|
|
|
/// \returns an error code indicating the status of the created writer.
|
|
|
|
ErrorOr<std::unique_ptr<SampleProfileWriter>>
|
|
|
|
SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
|
|
|
|
SampleProfileFormat Format) {
|
|
|
|
std::error_code EC;
|
2014-11-03 00:51:45 +00:00
|
|
|
std::unique_ptr<SampleProfileWriter> Writer;
|
2014-11-01 00:56:55 +00:00
|
|
|
|
2021-04-24 10:37:55 -07:00
|
|
|
// Currently only Text and Extended Binary format are supported for CSSPGO.
|
|
|
|
if ((FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsProbeBased) &&
|
|
|
|
(Format == SPF_Binary || Format == SPF_Compact_Binary))
|
|
|
|
return sampleprof_error::unsupported_writing_format;
|
|
|
|
|
2018-06-12 05:53:49 +00:00
|
|
|
if (Format == SPF_Binary)
|
2018-06-11 22:40:43 +00:00
|
|
|
Writer.reset(new SampleProfileWriterRawBinary(OS));
|
2019-08-23 19:05:30 +00:00
|
|
|
else if (Format == SPF_Ext_Binary)
|
|
|
|
Writer.reset(new SampleProfileWriterExtBinary(OS));
|
2018-06-11 22:40:43 +00:00
|
|
|
else if (Format == SPF_Compact_Binary)
|
|
|
|
Writer.reset(new SampleProfileWriterCompactBinary(OS));
|
2014-11-01 00:56:55 +00:00
|
|
|
else if (Format == SPF_Text)
|
2015-12-10 17:21:42 +00:00
|
|
|
Writer.reset(new SampleProfileWriterText(OS));
|
2015-10-13 22:48:46 +00:00
|
|
|
else if (Format == SPF_GCC)
|
|
|
|
EC = sampleprof_error::unsupported_writing_format;
|
2014-11-01 00:56:55 +00:00
|
|
|
else
|
|
|
|
EC = sampleprof_error::unrecognized_format;
|
|
|
|
|
2014-11-03 00:51:45 +00:00
|
|
|
if (EC)
|
|
|
|
return EC;
|
|
|
|
|
2019-08-23 19:05:30 +00:00
|
|
|
Writer->Format = Format;
|
2020-02-10 07:06:45 -08:00
|
|
|
return std::move(Writer);
|
2014-11-01 00:56:55 +00:00
|
|
|
}
|
2016-02-19 03:15:33 +00:00
|
|
|
|
[CSSPGO] Split context string to deduplicate function name used in the context.
Currently context strings contain a lot of duplicated function names and that significantly increase the profile size. This change split the context into a series of {name, offset, discriminator} tuples so function names used in the context can be replaced by the index into the name table and that significantly reduce the size consumed by context.
A follow-up improvement made in the compiler and profiling tools is to avoid reconstructing full context strings which is time- and memory- consuming. Instead a context vector of `StringRef` is adopted to represent the full context in all scenarios. As a result, the previous prevalent profile map which was implemented as a `StringRef` is now engineered as an unordered map keyed by `SampleContext`. `SampleContext` is reshaped to using an `ArrayRef` to represent a full context for CS profile. For non-CS profile, it falls back to use `StringRef` to represent a contextless function name. Both the `ArrayRef` and `StringRef` objects are underpinned by real array and string objects that are stored in producer buffers. For compiler, they are maintained by the sample reader. For llvm-profgen, they are maintained in `ProfiledBinary` and `ProfileGenerator`. Full context strings can be generated only in those cases of debugging and printing.
When it comes to profile format, nothing has changed to the text format, though internally CS context is implemented as a vector. Extbinary format is only changed for CS profile, with an additional `SecCSNameTable` section which stores all full contexts logically in the form of `vector<int>`, which each element as an offset points to `SecNameTable`. All occurrences of contexts elsewhere are redirected to using the offset of `SecCSNameTable`.
Testing
This is no-diff change in terms of code quality and profile content (for text profile).
For our internal large service (aka ads), the profile generation is cut to half, with a 20x smaller string-based extbinary format generated.
The compile time of ads is dropped by 25%.
Differential Revision: https://reviews.llvm.org/D107299
2021-08-25 11:40:34 -07:00
|
|
|
void SampleProfileWriter::computeSummary(const SampleProfileMap &ProfileMap) {
|
2016-05-19 21:07:12 +00:00
|
|
|
SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
|
2021-02-03 13:27:35 -08:00
|
|
|
Summary = Builder.computeSummaryForProfiles(ProfileMap);
|
2016-02-19 03:15:33 +00:00
|
|
|
}
|