llvm-project/compiler-rt/lib/profile/InstrProfilingWriter.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

380 lines
14 KiB
C
Raw Normal View History

/*===- InstrProfilingWriter.c - Write instrumentation to a file or buffer -===*\
|*
|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|* See https://llvm.org/LICENSE.txt for license information.
|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|*
\*===----------------------------------------------------------------------===*/
// Note: This is linked into the Darwin kernel, and must remain compatible
// with freestanding compilation. See `darwin_add_builtin_libraries`.
#ifdef _MSC_VER
/* For _alloca */
#include <malloc.h>
#endif
#include <string.h>
#include "InstrProfiling.h"
#include "InstrProfilingInternal.h"
[profile] Add a mode to continuously sync counter updates to a file Add support for continuously syncing profile counter updates to a file. The motivation for this is that programs do not always exit cleanly. On iOS, for example, programs are usually killed via a signal from the OS. Running atexit() handlers after catching a signal is unreliable, so some method for progressively writing out profile data is necessary. The approach taken here is to mmap() the `__llvm_prf_cnts` section onto a raw profile. To do this, the linker must page-align the counter and data sections, and the runtime must ensure that counters are mapped to a page-aligned offset within a raw profile. Continuous mode is (for the moment) incompatible with the online merging mode. This limitation is lifted in https://reviews.llvm.org/D69586. Continuous mode is also (for the moment) incompatible with value profiling, as I'm not sure whether there is interest in this and the implementation may be tricky. As I have not been able to test extensively on non-Darwin platforms, only Darwin support is included for the moment. However, continuous mode may "just work" without modification on Linux and some UNIX-likes. AIUI the default value for the GNU linker's `--section-alignment` flag is set to the page size on many systems. This appears to be true for LLD as well, as its `no_nmagic` option is on by default. Continuous mode will not "just work" on Fuchsia or Windows, as it's not possible to mmap() a section on these platforms. There is a proposal to add a layer of indirection to the profile instrumentation to support these platforms. rdar://54210980 Differential Revision: https://reviews.llvm.org/D68351
2019-09-19 11:56:43 -07:00
#include "InstrProfilingPort.h"
#define INSTR_PROF_VALUE_PROF_DATA
#include "profile/InstrProfData.inc"
COMPILER_RT_VISIBILITY void (*FreeHook)(void *) = NULL;
static ProfBufferIO TheBufferIO;
#define VP_BUFFER_SIZE 8 * 1024
static uint8_t BufferIOBuffer[VP_BUFFER_SIZE];
static InstrProfValueData VPDataArray[16];
static uint32_t VPDataArraySize = sizeof(VPDataArray) / sizeof(*VPDataArray);
COMPILER_RT_VISIBILITY uint8_t *DynamicBufferIOBuffer = 0;
COMPILER_RT_VISIBILITY uint32_t VPBufferSize = 0;
/* The buffer writer is responsible in keeping writer state
* across the call.
*/
COMPILER_RT_VISIBILITY uint32_t lprofBufferWriter(ProfDataWriter *This,
ProfDataIOVec *IOVecs,
uint32_t NumIOVecs) {
uint32_t I;
char **Buffer = (char **)&This->WriterCtx;
for (I = 0; I < NumIOVecs; I++) {
size_t Length = IOVecs[I].ElmSize * IOVecs[I].NumElm;
if (IOVecs[I].Data)
memcpy(*Buffer, IOVecs[I].Data, Length);
else if (IOVecs[I].UseZeroPadding) {
/* Allocating the buffer should zero fill. */
}
*Buffer += Length;
}
return 0;
}
static void llvmInitBufferIO(ProfBufferIO *BufferIO, ProfDataWriter *FileWriter,
uint8_t *Buffer, uint32_t BufferSz) {
BufferIO->FileWriter = FileWriter;
BufferIO->OwnFileWriter = 0;
BufferIO->BufferStart = Buffer;
BufferIO->BufferSz = BufferSz;
BufferIO->CurOffset = 0;
}
COMPILER_RT_VISIBILITY ProfBufferIO *
lprofCreateBufferIO(ProfDataWriter *FileWriter) {
uint8_t *Buffer = DynamicBufferIOBuffer;
uint32_t BufferSize = VPBufferSize;
if (!Buffer) {
Buffer = &BufferIOBuffer[0];
BufferSize = sizeof(BufferIOBuffer);
}
llvmInitBufferIO(&TheBufferIO, FileWriter, Buffer, BufferSize);
return &TheBufferIO;
}
COMPILER_RT_VISIBILITY void lprofDeleteBufferIO(ProfBufferIO *BufferIO) {
if (BufferIO->OwnFileWriter)
FreeHook(BufferIO->FileWriter);
if (DynamicBufferIOBuffer) {
FreeHook(DynamicBufferIOBuffer);
DynamicBufferIOBuffer = 0;
VPBufferSize = 0;
}
}
COMPILER_RT_VISIBILITY int
lprofBufferIOWrite(ProfBufferIO *BufferIO, const uint8_t *Data, uint32_t Size) {
/* Buffer is not large enough, it is time to flush. */
if (Size + BufferIO->CurOffset > BufferIO->BufferSz) {
if (lprofBufferIOFlush(BufferIO) != 0)
return -1;
}
/* Special case, bypass the buffer completely. */
ProfDataIOVec IO[] = {{Data, sizeof(uint8_t), Size, 0}};
if (Size > BufferIO->BufferSz) {
if (BufferIO->FileWriter->Write(BufferIO->FileWriter, IO, 1))
return -1;
} else {
/* Write the data to buffer */
uint8_t *Buffer = BufferIO->BufferStart + BufferIO->CurOffset;
ProfDataWriter BufferWriter;
initBufferWriter(&BufferWriter, (char *)Buffer);
lprofBufferWriter(&BufferWriter, IO, 1);
BufferIO->CurOffset =
(uint8_t *)BufferWriter.WriterCtx - BufferIO->BufferStart;
}
return 0;
}
COMPILER_RT_VISIBILITY int lprofBufferIOFlush(ProfBufferIO *BufferIO) {
if (BufferIO->CurOffset) {
ProfDataIOVec IO[] = {
{BufferIO->BufferStart, sizeof(uint8_t), BufferIO->CurOffset, 0}};
if (BufferIO->FileWriter->Write(BufferIO->FileWriter, IO, 1))
return -1;
BufferIO->CurOffset = 0;
}
return 0;
}
/* Write out value profile data for function specified with \c Data.
* The implementation does not use the method \c serializeValueProfData
* which depends on dynamic memory allocation. In this implementation,
* value profile data is written out to \c BufferIO piecemeal.
*/
static int writeOneValueProfData(ProfBufferIO *BufferIO,
VPDataReaderType *VPDataReader,
const __llvm_profile_data *Data) {
unsigned I, NumValueKinds = 0;
ValueProfData VPHeader;
uint8_t *SiteCountArray[IPVK_Last + 1];
for (I = 0; I <= IPVK_Last; I++) {
if (!Data->NumValueSites[I])
SiteCountArray[I] = 0;
else {
uint32_t Sz =
VPDataReader->GetValueProfRecordHeaderSize(Data->NumValueSites[I]) -
offsetof(ValueProfRecord, SiteCountArray);
/* Only use alloca for this small byte array to avoid excessive
* stack growth. */
SiteCountArray[I] = (uint8_t *)COMPILER_RT_ALLOCA(Sz);
memset(SiteCountArray[I], 0, Sz);
}
}
/* If NumValueKinds returned is 0, there is nothing to write, report
success and return. This should match the raw profile reader's behavior. */
if (!(NumValueKinds = VPDataReader->InitRTRecord(Data, SiteCountArray)))
return 0;
/* First write the header structure. */
VPHeader.TotalSize = VPDataReader->GetValueProfDataSize();
VPHeader.NumValueKinds = NumValueKinds;
if (lprofBufferIOWrite(BufferIO, (const uint8_t *)&VPHeader,
sizeof(ValueProfData)))
return -1;
/* Make sure nothing else needs to be written before value profile
* records. */
if ((void *)VPDataReader->GetFirstValueProfRecord(&VPHeader) !=
(void *)(&VPHeader + 1))
return -1;
/* Write out the value profile record for each value kind
* one by one. */
for (I = 0; I <= IPVK_Last; I++) {
uint32_t J;
ValueProfRecord RecordHeader;
/* The size of the value prof record header without counting the
* site count array .*/
uint32_t RecordHeaderSize = offsetof(ValueProfRecord, SiteCountArray);
uint32_t SiteCountArraySize;
if (!Data->NumValueSites[I])
continue;
/* Write out the record header. */
RecordHeader.Kind = I;
RecordHeader.NumValueSites = Data->NumValueSites[I];
if (lprofBufferIOWrite(BufferIO, (const uint8_t *)&RecordHeader,
RecordHeaderSize))
return -1;
/* Write out the site value count array including padding space. */
SiteCountArraySize =
VPDataReader->GetValueProfRecordHeaderSize(Data->NumValueSites[I]) -
RecordHeaderSize;
if (lprofBufferIOWrite(BufferIO, SiteCountArray[I], SiteCountArraySize))
return -1;
/* Write out the value profile data for each value site. */
for (J = 0; J < Data->NumValueSites[I]; J++) {
uint32_t NRead, NRemain;
ValueProfNode *NextStartNode = 0;
NRemain = VPDataReader->GetNumValueDataForSite(I, J);
if (!NRemain)
continue;
/* Read and write out value data in small chunks till it is done. */
do {
NRead = (NRemain > VPDataArraySize ? VPDataArraySize : NRemain);
NextStartNode =
VPDataReader->GetValueData(I, /* ValueKind */
J, /* Site */
&VPDataArray[0], NextStartNode, NRead);
if (lprofBufferIOWrite(BufferIO, (const uint8_t *)&VPDataArray[0],
NRead * sizeof(InstrProfValueData)))
return -1;
NRemain -= NRead;
} while (NRemain != 0);
}
}
/* All done report success. */
return 0;
}
static int writeValueProfData(ProfDataWriter *Writer,
VPDataReaderType *VPDataReader,
const __llvm_profile_data *DataBegin,
const __llvm_profile_data *DataEnd) {
ProfBufferIO *BufferIO;
const __llvm_profile_data *DI = 0;
if (!VPDataReader)
return 0;
BufferIO = lprofCreateBufferIO(Writer);
for (DI = DataBegin; DI < DataEnd; DI++) {
if (writeOneValueProfData(BufferIO, VPDataReader, DI))
return -1;
}
if (lprofBufferIOFlush(BufferIO) != 0)
return -1;
lprofDeleteBufferIO(BufferIO);
return 0;
}
COMPILER_RT_VISIBILITY int lprofWriteData(ProfDataWriter *Writer,
VPDataReaderType *VPDataReader,
int SkipNameDataWrite) {
/* Match logic in __llvm_profile_write_buffer(). */
const __llvm_profile_data *DataBegin = __llvm_profile_begin_data();
const __llvm_profile_data *DataEnd = __llvm_profile_end_data();
const char *CountersBegin = __llvm_profile_begin_counters();
const char *CountersEnd = __llvm_profile_end_counters();
const char *BitmapBegin = __llvm_profile_begin_bitmap();
const char *BitmapEnd = __llvm_profile_end_bitmap();
const char *NamesBegin = __llvm_profile_begin_names();
const char *NamesEnd = __llvm_profile_end_names();
Reland "[TypeProf][InstrPGO] Introduce raw and instr profile format change for type profiling." (#82711) New change on top of [reviewed patch](https://github.com/llvm/llvm-project/pull/81691) are [in commits after this one](https://github.com/llvm/llvm-project/pull/82711/commits/d0757f46b3e3865b5f7c552bc0744309a363e0ac). Previous commits are restored from the remote branch with timestamps. 1. Fix build breakage for non-ELF platforms, by defining the missing functions {`__llvm_profile_begin_vtables`, `__llvm_profile_end_vtables`, `__llvm_profile_begin_vtabnames `, `__llvm_profile_end_vtabnames`} everywhere. * Tested on mac laptop (for darwins) and Windows. Specifically, functions in `InstrProfilingPlatformWindows.c` returns `NULL` to make it more explicit that type prof isn't supported; see comments for the reason. * For the rest (AIX, other), mostly follow existing examples (like this [one](https://github.com/llvm/llvm-project/commit/f95b2f1acf1171abb0d00089fd4c9238753847e3)) 2. Rename `__llvm_prf_vtabnames` -> `__llvm_prf_vns` for shorter section name, and make returned pointers [const](https://github.com/llvm/llvm-project/pull/82711/commits/a825d2a4ec00f07772a373091a702f149c3b0c34#diff-4de780ce726d76b7abc9d3353aef95013e7b21e7bda01be8940cc6574fb0b5ffR120-R121) **Original Description** * Raw profile format - Header: records the byte size of compressed vtable names, and the number of profiled vtable entries (call it `VTableProfData`). Header also records padded bytes of each section. - Payload: adds a section for compressed vtable names, and a section to store `VTableProfData`. Both sections are padded so the size is a multiple of 8. * Indexed profile format - Header: records the byte offset of compressed vtable names. - Payload: adds a section to store compressed vtable names. This section is used by `llvm-profdata` to show the list of vtables profiled for an instrumented site. [The originally reviewed patch](https://github.com/llvm/llvm-project/pull/66825) will have profile reader/write change and llvm-profdata change. - To ensure this PR has all the necessary profile format change along with profile version bump, created a copy of the originally reviewed patch in https://github.com/llvm/llvm-project/pull/80761. The copy doesn't have profile format change, but it has the set of tests which covers type profile generation, profile read and profile merge. Tests pass there. rfc in https://discourse.llvm.org/t/rfc-dynamic-type-profiling-and-optimizations-in-llvm/74600 --------- Co-authored-by: modiking <modiking213@gmail.com>
2024-02-27 11:07:40 -08:00
const VTableProfData *VTableBegin = __llvm_profile_begin_vtables();
const VTableProfData *VTableEnd = __llvm_profile_end_vtables();
const char *VNamesBegin = __llvm_profile_begin_vtabnames();
const char *VNamesEnd = __llvm_profile_end_vtabnames();
return lprofWriteDataImpl(Writer, DataBegin, DataEnd, CountersBegin,
CountersEnd, BitmapBegin, BitmapEnd, VPDataReader,
Reland "[TypeProf][InstrPGO] Introduce raw and instr profile format change for type profiling." (#82711) New change on top of [reviewed patch](https://github.com/llvm/llvm-project/pull/81691) are [in commits after this one](https://github.com/llvm/llvm-project/pull/82711/commits/d0757f46b3e3865b5f7c552bc0744309a363e0ac). Previous commits are restored from the remote branch with timestamps. 1. Fix build breakage for non-ELF platforms, by defining the missing functions {`__llvm_profile_begin_vtables`, `__llvm_profile_end_vtables`, `__llvm_profile_begin_vtabnames `, `__llvm_profile_end_vtabnames`} everywhere. * Tested on mac laptop (for darwins) and Windows. Specifically, functions in `InstrProfilingPlatformWindows.c` returns `NULL` to make it more explicit that type prof isn't supported; see comments for the reason. * For the rest (AIX, other), mostly follow existing examples (like this [one](https://github.com/llvm/llvm-project/commit/f95b2f1acf1171abb0d00089fd4c9238753847e3)) 2. Rename `__llvm_prf_vtabnames` -> `__llvm_prf_vns` for shorter section name, and make returned pointers [const](https://github.com/llvm/llvm-project/pull/82711/commits/a825d2a4ec00f07772a373091a702f149c3b0c34#diff-4de780ce726d76b7abc9d3353aef95013e7b21e7bda01be8940cc6574fb0b5ffR120-R121) **Original Description** * Raw profile format - Header: records the byte size of compressed vtable names, and the number of profiled vtable entries (call it `VTableProfData`). Header also records padded bytes of each section. - Payload: adds a section for compressed vtable names, and a section to store `VTableProfData`. Both sections are padded so the size is a multiple of 8. * Indexed profile format - Header: records the byte offset of compressed vtable names. - Payload: adds a section to store compressed vtable names. This section is used by `llvm-profdata` to show the list of vtables profiled for an instrumented site. [The originally reviewed patch](https://github.com/llvm/llvm-project/pull/66825) will have profile reader/write change and llvm-profdata change. - To ensure this PR has all the necessary profile format change along with profile version bump, created a copy of the originally reviewed patch in https://github.com/llvm/llvm-project/pull/80761. The copy doesn't have profile format change, but it has the set of tests which covers type profile generation, profile read and profile merge. Tests pass there. rfc in https://discourse.llvm.org/t/rfc-dynamic-type-profiling-and-optimizations-in-llvm/74600 --------- Co-authored-by: modiking <modiking213@gmail.com>
2024-02-27 11:07:40 -08:00
NamesBegin, NamesEnd, VTableBegin, VTableEnd,
VNamesBegin, VNamesEnd, SkipNameDataWrite);
}
COMPILER_RT_VISIBILITY int
lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
const __llvm_profile_data *DataEnd,
const char *CountersBegin, const char *CountersEnd,
const char *BitmapBegin, const char *BitmapEnd,
VPDataReaderType *VPDataReader, const char *NamesBegin,
Reland "[TypeProf][InstrPGO] Introduce raw and instr profile format change for type profiling." (#82711) New change on top of [reviewed patch](https://github.com/llvm/llvm-project/pull/81691) are [in commits after this one](https://github.com/llvm/llvm-project/pull/82711/commits/d0757f46b3e3865b5f7c552bc0744309a363e0ac). Previous commits are restored from the remote branch with timestamps. 1. Fix build breakage for non-ELF platforms, by defining the missing functions {`__llvm_profile_begin_vtables`, `__llvm_profile_end_vtables`, `__llvm_profile_begin_vtabnames `, `__llvm_profile_end_vtabnames`} everywhere. * Tested on mac laptop (for darwins) and Windows. Specifically, functions in `InstrProfilingPlatformWindows.c` returns `NULL` to make it more explicit that type prof isn't supported; see comments for the reason. * For the rest (AIX, other), mostly follow existing examples (like this [one](https://github.com/llvm/llvm-project/commit/f95b2f1acf1171abb0d00089fd4c9238753847e3)) 2. Rename `__llvm_prf_vtabnames` -> `__llvm_prf_vns` for shorter section name, and make returned pointers [const](https://github.com/llvm/llvm-project/pull/82711/commits/a825d2a4ec00f07772a373091a702f149c3b0c34#diff-4de780ce726d76b7abc9d3353aef95013e7b21e7bda01be8940cc6574fb0b5ffR120-R121) **Original Description** * Raw profile format - Header: records the byte size of compressed vtable names, and the number of profiled vtable entries (call it `VTableProfData`). Header also records padded bytes of each section. - Payload: adds a section for compressed vtable names, and a section to store `VTableProfData`. Both sections are padded so the size is a multiple of 8. * Indexed profile format - Header: records the byte offset of compressed vtable names. - Payload: adds a section to store compressed vtable names. This section is used by `llvm-profdata` to show the list of vtables profiled for an instrumented site. [The originally reviewed patch](https://github.com/llvm/llvm-project/pull/66825) will have profile reader/write change and llvm-profdata change. - To ensure this PR has all the necessary profile format change along with profile version bump, created a copy of the originally reviewed patch in https://github.com/llvm/llvm-project/pull/80761. The copy doesn't have profile format change, but it has the set of tests which covers type profile generation, profile read and profile merge. Tests pass there. rfc in https://discourse.llvm.org/t/rfc-dynamic-type-profiling-and-optimizations-in-llvm/74600 --------- Co-authored-by: modiking <modiking213@gmail.com>
2024-02-27 11:07:40 -08:00
const char *NamesEnd, const VTableProfData *VTableBegin,
const VTableProfData *VTableEnd, const char *VNamesBegin,
const char *VNamesEnd, int SkipNameDataWrite) {
/* Calculate size of sections. */
const uint64_t DataSectionSize =
__llvm_profile_get_data_size(DataBegin, DataEnd);
const uint64_t NumData = __llvm_profile_get_num_data(DataBegin, DataEnd);
const uint64_t CountersSectionSize =
__llvm_profile_get_counters_size(CountersBegin, CountersEnd);
const uint64_t NumCounters =
__llvm_profile_get_num_counters(CountersBegin, CountersEnd);
const uint64_t NumBitmapBytes =
__llvm_profile_get_num_bitmap_bytes(BitmapBegin, BitmapEnd);
const uint64_t NamesSize = __llvm_profile_get_name_size(NamesBegin, NamesEnd);
Reland "[TypeProf][InstrPGO] Introduce raw and instr profile format change for type profiling." (#82711) New change on top of [reviewed patch](https://github.com/llvm/llvm-project/pull/81691) are [in commits after this one](https://github.com/llvm/llvm-project/pull/82711/commits/d0757f46b3e3865b5f7c552bc0744309a363e0ac). Previous commits are restored from the remote branch with timestamps. 1. Fix build breakage for non-ELF platforms, by defining the missing functions {`__llvm_profile_begin_vtables`, `__llvm_profile_end_vtables`, `__llvm_profile_begin_vtabnames `, `__llvm_profile_end_vtabnames`} everywhere. * Tested on mac laptop (for darwins) and Windows. Specifically, functions in `InstrProfilingPlatformWindows.c` returns `NULL` to make it more explicit that type prof isn't supported; see comments for the reason. * For the rest (AIX, other), mostly follow existing examples (like this [one](https://github.com/llvm/llvm-project/commit/f95b2f1acf1171abb0d00089fd4c9238753847e3)) 2. Rename `__llvm_prf_vtabnames` -> `__llvm_prf_vns` for shorter section name, and make returned pointers [const](https://github.com/llvm/llvm-project/pull/82711/commits/a825d2a4ec00f07772a373091a702f149c3b0c34#diff-4de780ce726d76b7abc9d3353aef95013e7b21e7bda01be8940cc6574fb0b5ffR120-R121) **Original Description** * Raw profile format - Header: records the byte size of compressed vtable names, and the number of profiled vtable entries (call it `VTableProfData`). Header also records padded bytes of each section. - Payload: adds a section for compressed vtable names, and a section to store `VTableProfData`. Both sections are padded so the size is a multiple of 8. * Indexed profile format - Header: records the byte offset of compressed vtable names. - Payload: adds a section to store compressed vtable names. This section is used by `llvm-profdata` to show the list of vtables profiled for an instrumented site. [The originally reviewed patch](https://github.com/llvm/llvm-project/pull/66825) will have profile reader/write change and llvm-profdata change. - To ensure this PR has all the necessary profile format change along with profile version bump, created a copy of the originally reviewed patch in https://github.com/llvm/llvm-project/pull/80761. The copy doesn't have profile format change, but it has the set of tests which covers type profile generation, profile read and profile merge. Tests pass there. rfc in https://discourse.llvm.org/t/rfc-dynamic-type-profiling-and-optimizations-in-llvm/74600 --------- Co-authored-by: modiking <modiking213@gmail.com>
2024-02-27 11:07:40 -08:00
const uint64_t NumVTables =
__llvm_profile_get_num_vtable(VTableBegin, VTableEnd);
const uint64_t VTableSectionSize =
__llvm_profile_get_vtable_section_size(VTableBegin, VTableEnd);
const uint64_t VNamesSize =
__llvm_profile_get_name_size(VNamesBegin, VNamesEnd);
/* Create the header. */
__llvm_profile_header Header;
[profile] Add a mode to continuously sync counter updates to a file Add support for continuously syncing profile counter updates to a file. The motivation for this is that programs do not always exit cleanly. On iOS, for example, programs are usually killed via a signal from the OS. Running atexit() handlers after catching a signal is unreliable, so some method for progressively writing out profile data is necessary. The approach taken here is to mmap() the `__llvm_prf_cnts` section onto a raw profile. To do this, the linker must page-align the counter and data sections, and the runtime must ensure that counters are mapped to a page-aligned offset within a raw profile. Continuous mode is (for the moment) incompatible with the online merging mode. This limitation is lifted in https://reviews.llvm.org/D69586. Continuous mode is also (for the moment) incompatible with value profiling, as I'm not sure whether there is interest in this and the implementation may be tricky. As I have not been able to test extensively on non-Darwin platforms, only Darwin support is included for the moment. However, continuous mode may "just work" without modification on Linux and some UNIX-likes. AIUI the default value for the GNU linker's `--section-alignment` flag is set to the page size on many systems. This appears to be true for LLD as well, as its `no_nmagic` option is on by default. Continuous mode will not "just work" on Fuchsia or Windows, as it's not possible to mmap() a section on these platforms. There is a proposal to add a layer of indirection to the profile instrumentation to support these platforms. rdar://54210980 Differential Revision: https://reviews.llvm.org/D68351
2019-09-19 11:56:43 -07:00
/* Determine how much padding is needed before/after the counters and after
* the names. */
uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters,
Reland "[TypeProf][InstrPGO] Introduce raw and instr profile format change for type profiling." (#82711) New change on top of [reviewed patch](https://github.com/llvm/llvm-project/pull/81691) are [in commits after this one](https://github.com/llvm/llvm-project/pull/82711/commits/d0757f46b3e3865b5f7c552bc0744309a363e0ac). Previous commits are restored from the remote branch with timestamps. 1. Fix build breakage for non-ELF platforms, by defining the missing functions {`__llvm_profile_begin_vtables`, `__llvm_profile_end_vtables`, `__llvm_profile_begin_vtabnames `, `__llvm_profile_end_vtabnames`} everywhere. * Tested on mac laptop (for darwins) and Windows. Specifically, functions in `InstrProfilingPlatformWindows.c` returns `NULL` to make it more explicit that type prof isn't supported; see comments for the reason. * For the rest (AIX, other), mostly follow existing examples (like this [one](https://github.com/llvm/llvm-project/commit/f95b2f1acf1171abb0d00089fd4c9238753847e3)) 2. Rename `__llvm_prf_vtabnames` -> `__llvm_prf_vns` for shorter section name, and make returned pointers [const](https://github.com/llvm/llvm-project/pull/82711/commits/a825d2a4ec00f07772a373091a702f149c3b0c34#diff-4de780ce726d76b7abc9d3353aef95013e7b21e7bda01be8940cc6574fb0b5ffR120-R121) **Original Description** * Raw profile format - Header: records the byte size of compressed vtable names, and the number of profiled vtable entries (call it `VTableProfData`). Header also records padded bytes of each section. - Payload: adds a section for compressed vtable names, and a section to store `VTableProfData`. Both sections are padded so the size is a multiple of 8. * Indexed profile format - Header: records the byte offset of compressed vtable names. - Payload: adds a section to store compressed vtable names. This section is used by `llvm-profdata` to show the list of vtables profiled for an instrumented site. [The originally reviewed patch](https://github.com/llvm/llvm-project/pull/66825) will have profile reader/write change and llvm-profdata change. - To ensure this PR has all the necessary profile format change along with profile version bump, created a copy of the originally reviewed patch in https://github.com/llvm/llvm-project/pull/80761. The copy doesn't have profile format change, but it has the set of tests which covers type profile generation, profile read and profile merge. Tests pass there. rfc in https://discourse.llvm.org/t/rfc-dynamic-type-profiling-and-optimizations-in-llvm/74600 --------- Co-authored-by: modiking <modiking213@gmail.com>
2024-02-27 11:07:40 -08:00
PaddingBytesAfterBitmapBytes, PaddingBytesAfterNames,
PaddingBytesAfterVTable, PaddingBytesAfterVNames;
if (__llvm_profile_get_padding_sizes_for_counters(
DataSectionSize, CountersSectionSize, NumBitmapBytes, NamesSize,
VTableSectionSize, VNamesSize, &PaddingBytesBeforeCounters,
&PaddingBytesAfterCounters, &PaddingBytesAfterBitmapBytes,
&PaddingBytesAfterNames, &PaddingBytesAfterVTable,
&PaddingBytesAfterVNames) == -1)
return -1;
[profile] Add a mode to continuously sync counter updates to a file Add support for continuously syncing profile counter updates to a file. The motivation for this is that programs do not always exit cleanly. On iOS, for example, programs are usually killed via a signal from the OS. Running atexit() handlers after catching a signal is unreliable, so some method for progressively writing out profile data is necessary. The approach taken here is to mmap() the `__llvm_prf_cnts` section onto a raw profile. To do this, the linker must page-align the counter and data sections, and the runtime must ensure that counters are mapped to a page-aligned offset within a raw profile. Continuous mode is (for the moment) incompatible with the online merging mode. This limitation is lifted in https://reviews.llvm.org/D69586. Continuous mode is also (for the moment) incompatible with value profiling, as I'm not sure whether there is interest in this and the implementation may be tricky. As I have not been able to test extensively on non-Darwin platforms, only Darwin support is included for the moment. However, continuous mode may "just work" without modification on Linux and some UNIX-likes. AIUI the default value for the GNU linker's `--section-alignment` flag is set to the page size on many systems. This appears to be true for LLD as well, as its `no_nmagic` option is on by default. Continuous mode will not "just work" on Fuchsia or Windows, as it's not possible to mmap() a section on these platforms. There is a proposal to add a layer of indirection to the profile instrumentation to support these platforms. rdar://54210980 Differential Revision: https://reviews.llvm.org/D68351
2019-09-19 11:56:43 -07:00
{
/* Initialize header structure. */
#define INSTR_PROF_RAW_HEADER(Type, Name, Init) Header.Name = Init;
#include "profile/InstrProfData.inc"
}
[InstrProfiling] Make CountersPtr in __profd_ relative Change `CountersPtr` in `__profd_` to a label difference, which is a link-time constant. On ELF, when linking a shared object, this requires that `__profc_` is either private or linkonce/linkonce_odr hidden. On COFF, we need D104564 so that `.quad a-b` (64-bit label difference) can lower to a 32-bit PC-relative relocation. ``` # ELF: R_X86_64_PC64 (PC-relative) .quad .L__profc_foo-.L__profd_foo # Mach-O: a pair of 8-byte X86_64_RELOC_UNSIGNED and X86_64_RELOC_SUBTRACTOR .quad l___profc_foo-l___profd_foo # COFF: we actually use IMAGE_REL_AMD64_REL32/IMAGE_REL_ARM64_REL32 so # the high 32-bit value is zero even if .L__profc_foo < .L__profd_foo # As compensation, we truncate CountersDelta in the header so that # __llvm_profile_merge_from_buffer and llvm-profdata reader keep working. .quad .L__profc_foo-.L__profd_foo ``` (Note: link.exe sorts `.lprfc` before `.lprfd` even if the object writer has `.lprfd` before `.lprfc`, so we cannot work around by reordering `.lprfc` and `.lprfd`.) With this change, a stage 2 (`-DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_BUILD_INSTRUMENTED=IR`) `ld -pie` linked clang is 1.74% smaller due to fewer R_X86_64_RELATIVE relocations. ``` % readelf -r pie | awk '$3~/R.*/{s[$3]++} END {for (k in s) print k, s[k]}' R_X86_64_JUMP_SLO 331 R_X86_64_TPOFF64 2 R_X86_64_RELATIVE 476059 # was: 607712 R_X86_64_64 2616 R_X86_64_GLOB_DAT 31 ``` The absolute function address (used by llvm-profdata to collect indirect call targets) can be converted to relative as well, but is not done in this patch. Differential Revision: https://reviews.llvm.org/D104556
2021-07-30 11:52:18 -07:00
/* On WIN64, label differences are truncated 32-bit values. Truncate
* CountersDelta to match. */
#ifdef _WIN64
Header.CountersDelta = (uint32_t)Header.CountersDelta;
Header.BitmapDelta = (uint32_t)Header.BitmapDelta;
[InstrProfiling] Make CountersPtr in __profd_ relative Change `CountersPtr` in `__profd_` to a label difference, which is a link-time constant. On ELF, when linking a shared object, this requires that `__profc_` is either private or linkonce/linkonce_odr hidden. On COFF, we need D104564 so that `.quad a-b` (64-bit label difference) can lower to a 32-bit PC-relative relocation. ``` # ELF: R_X86_64_PC64 (PC-relative) .quad .L__profc_foo-.L__profd_foo # Mach-O: a pair of 8-byte X86_64_RELOC_UNSIGNED and X86_64_RELOC_SUBTRACTOR .quad l___profc_foo-l___profd_foo # COFF: we actually use IMAGE_REL_AMD64_REL32/IMAGE_REL_ARM64_REL32 so # the high 32-bit value is zero even if .L__profc_foo < .L__profd_foo # As compensation, we truncate CountersDelta in the header so that # __llvm_profile_merge_from_buffer and llvm-profdata reader keep working. .quad .L__profc_foo-.L__profd_foo ``` (Note: link.exe sorts `.lprfc` before `.lprfd` even if the object writer has `.lprfd` before `.lprfc`, so we cannot work around by reordering `.lprfc` and `.lprfd`.) With this change, a stage 2 (`-DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_BUILD_INSTRUMENTED=IR`) `ld -pie` linked clang is 1.74% smaller due to fewer R_X86_64_RELATIVE relocations. ``` % readelf -r pie | awk '$3~/R.*/{s[$3]++} END {for (k in s) print k, s[k]}' R_X86_64_JUMP_SLO 331 R_X86_64_TPOFF64 2 R_X86_64_RELATIVE 476059 # was: 607712 R_X86_64_64 2616 R_X86_64_GLOB_DAT 31 ``` The absolute function address (used by llvm-profdata to collect indirect call targets) can be converted to relative as well, but is not done in this patch. Differential Revision: https://reviews.llvm.org/D104556
2021-07-30 11:52:18 -07:00
#endif
/* The data and names sections are omitted in lightweight mode. */
if (NumData == 0 && NamesSize == 0) {
Header.CountersDelta = 0;
Header.NamesDelta = 0;
}
/* Write the profile header. */
ProfDataIOVec IOVec[] = {{&Header, sizeof(__llvm_profile_header), 1, 0}};
if (Writer->Write(Writer, IOVec, sizeof(IOVec) / sizeof(*IOVec)))
return -1;
/* Write the binary id lengths and data. */
if (__llvm_write_binary_ids(Writer) == -1)
return -1;
/* Write the profile data. */
ProfDataIOVec IOVecData[] = {
{DataBegin, sizeof(uint8_t), DataSectionSize, 0},
{NULL, sizeof(uint8_t), PaddingBytesBeforeCounters, 1},
{CountersBegin, sizeof(uint8_t), CountersSectionSize, 0},
{NULL, sizeof(uint8_t), PaddingBytesAfterCounters, 1},
{BitmapBegin, sizeof(uint8_t), NumBitmapBytes, 0},
{NULL, sizeof(uint8_t), PaddingBytesAfterBitmapBytes, 1},
{SkipNameDataWrite ? NULL : NamesBegin, sizeof(uint8_t), NamesSize, 0},
Reland "[TypeProf][InstrPGO] Introduce raw and instr profile format change for type profiling." (#82711) New change on top of [reviewed patch](https://github.com/llvm/llvm-project/pull/81691) are [in commits after this one](https://github.com/llvm/llvm-project/pull/82711/commits/d0757f46b3e3865b5f7c552bc0744309a363e0ac). Previous commits are restored from the remote branch with timestamps. 1. Fix build breakage for non-ELF platforms, by defining the missing functions {`__llvm_profile_begin_vtables`, `__llvm_profile_end_vtables`, `__llvm_profile_begin_vtabnames `, `__llvm_profile_end_vtabnames`} everywhere. * Tested on mac laptop (for darwins) and Windows. Specifically, functions in `InstrProfilingPlatformWindows.c` returns `NULL` to make it more explicit that type prof isn't supported; see comments for the reason. * For the rest (AIX, other), mostly follow existing examples (like this [one](https://github.com/llvm/llvm-project/commit/f95b2f1acf1171abb0d00089fd4c9238753847e3)) 2. Rename `__llvm_prf_vtabnames` -> `__llvm_prf_vns` for shorter section name, and make returned pointers [const](https://github.com/llvm/llvm-project/pull/82711/commits/a825d2a4ec00f07772a373091a702f149c3b0c34#diff-4de780ce726d76b7abc9d3353aef95013e7b21e7bda01be8940cc6574fb0b5ffR120-R121) **Original Description** * Raw profile format - Header: records the byte size of compressed vtable names, and the number of profiled vtable entries (call it `VTableProfData`). Header also records padded bytes of each section. - Payload: adds a section for compressed vtable names, and a section to store `VTableProfData`. Both sections are padded so the size is a multiple of 8. * Indexed profile format - Header: records the byte offset of compressed vtable names. - Payload: adds a section to store compressed vtable names. This section is used by `llvm-profdata` to show the list of vtables profiled for an instrumented site. [The originally reviewed patch](https://github.com/llvm/llvm-project/pull/66825) will have profile reader/write change and llvm-profdata change. - To ensure this PR has all the necessary profile format change along with profile version bump, created a copy of the originally reviewed patch in https://github.com/llvm/llvm-project/pull/80761. The copy doesn't have profile format change, but it has the set of tests which covers type profile generation, profile read and profile merge. Tests pass there. rfc in https://discourse.llvm.org/t/rfc-dynamic-type-profiling-and-optimizations-in-llvm/74600 --------- Co-authored-by: modiking <modiking213@gmail.com>
2024-02-27 11:07:40 -08:00
{NULL, sizeof(uint8_t), PaddingBytesAfterNames, 1},
{VTableBegin, sizeof(uint8_t), VTableSectionSize, 0},
{NULL, sizeof(uint8_t), PaddingBytesAfterVTable, 1},
{SkipNameDataWrite ? NULL : VNamesBegin, sizeof(uint8_t), VNamesSize, 0},
{NULL, sizeof(uint8_t), PaddingBytesAfterVNames, 1}};
if (Writer->Write(Writer, IOVecData, sizeof(IOVecData) / sizeof(*IOVecData)))
return -1;
/* Value profiling is not yet supported in continuous mode and profile
* correlation mode. */
if (__llvm_profile_is_continuous_mode_enabled() ||
(NumData == 0 && NamesSize == 0))
[profile] Add a mode to continuously sync counter updates to a file Add support for continuously syncing profile counter updates to a file. The motivation for this is that programs do not always exit cleanly. On iOS, for example, programs are usually killed via a signal from the OS. Running atexit() handlers after catching a signal is unreliable, so some method for progressively writing out profile data is necessary. The approach taken here is to mmap() the `__llvm_prf_cnts` section onto a raw profile. To do this, the linker must page-align the counter and data sections, and the runtime must ensure that counters are mapped to a page-aligned offset within a raw profile. Continuous mode is (for the moment) incompatible with the online merging mode. This limitation is lifted in https://reviews.llvm.org/D69586. Continuous mode is also (for the moment) incompatible with value profiling, as I'm not sure whether there is interest in this and the implementation may be tricky. As I have not been able to test extensively on non-Darwin platforms, only Darwin support is included for the moment. However, continuous mode may "just work" without modification on Linux and some UNIX-likes. AIUI the default value for the GNU linker's `--section-alignment` flag is set to the page size on many systems. This appears to be true for LLD as well, as its `no_nmagic` option is on by default. Continuous mode will not "just work" on Fuchsia or Windows, as it's not possible to mmap() a section on these platforms. There is a proposal to add a layer of indirection to the profile instrumentation to support these platforms. rdar://54210980 Differential Revision: https://reviews.llvm.org/D68351
2019-09-19 11:56:43 -07:00
return 0;
return writeValueProfData(Writer, VPDataReader, DataBegin, DataEnd);
}
/*
* Write binary id length and then its data, because binary id does not
* have a fixed length.
*/
COMPILER_RT_VISIBILITY
int lprofWriteOneBinaryId(ProfDataWriter *Writer, uint64_t BinaryIdLen,
const uint8_t *BinaryIdData,
uint64_t BinaryIdPadding) {
ProfDataIOVec BinaryIdIOVec[] = {
{&BinaryIdLen, sizeof(uint64_t), 1, 0},
{BinaryIdData, sizeof(uint8_t), BinaryIdLen, 0},
{NULL, sizeof(uint8_t), BinaryIdPadding, 1},
};
if (Writer->Write(Writer, BinaryIdIOVec,
sizeof(BinaryIdIOVec) / sizeof(*BinaryIdIOVec)))
return -1;
/* Successfully wrote binary id, report success. */
return 0;
}