mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-21 19:57:00 +00:00
This pull request is the second part of an ongoing effort to extends PGO instrumentation to GPU device code and depends on #76587. This PR makes the following changes: - Introduces `__llvm_write_custom_profile` to PGO compiler-rt library. This is an external function that can be used to write profiles with custom data to target-specific files. - Adds `__llvm_write_custom_profile` as weak symbol to libomptarget so that it can write the collected data to a profraw file. - Adds `PGODump` debug flag and only displays dump when the aforementioned flag is set
This commit is contained in:
parent
84e3c6ff95
commit
9e5c136d5a
compiler-rt/lib/profile
offload
include/Shared
plugins-nextgen/common
test
openmp/docs/design
@ -304,6 +304,17 @@ int __llvm_profile_get_padding_sizes_for_counters(
|
||||
*/
|
||||
void __llvm_profile_set_dumped(void);
|
||||
|
||||
/*!
|
||||
* \brief Write custom target-specific profiling data to a seperate file.
|
||||
* Used by offload PGO.
|
||||
*/
|
||||
int __llvm_write_custom_profile(const char *Target,
|
||||
const __llvm_profile_data *DataBegin,
|
||||
const __llvm_profile_data *DataEnd,
|
||||
const char *CountersBegin,
|
||||
const char *CountersEnd, const char *NamesBegin,
|
||||
const char *NamesEnd);
|
||||
|
||||
/*!
|
||||
* This variable is defined in InstrProfilingRuntime.cpp as a hidden
|
||||
* symbol. Its main purpose is to enable profile runtime user to
|
||||
|
@ -541,6 +541,17 @@ static FILE *getFileObject(const char *OutputName) {
|
||||
return fopen(OutputName, "ab");
|
||||
}
|
||||
|
||||
static void closeFileObject(FILE *OutputFile) {
|
||||
if (OutputFile == getProfileFile()) {
|
||||
fflush(OutputFile);
|
||||
if (doMerging() && !__llvm_profile_is_continuous_mode_enabled()) {
|
||||
lprofUnlockFileHandle(OutputFile);
|
||||
}
|
||||
} else {
|
||||
fclose(OutputFile);
|
||||
}
|
||||
}
|
||||
|
||||
/* Write profile data to file \c OutputName. */
|
||||
static int writeFile(const char *OutputName) {
|
||||
int RetVal;
|
||||
@ -562,15 +573,7 @@ static int writeFile(const char *OutputName) {
|
||||
initFileWriter(&fileWriter, OutputFile);
|
||||
RetVal = lprofWriteData(&fileWriter, lprofGetVPDataReader(), MergeDone);
|
||||
|
||||
if (OutputFile == getProfileFile()) {
|
||||
fflush(OutputFile);
|
||||
if (doMerging() && !__llvm_profile_is_continuous_mode_enabled()) {
|
||||
lprofUnlockFileHandle(OutputFile);
|
||||
}
|
||||
} else {
|
||||
fclose(OutputFile);
|
||||
}
|
||||
|
||||
closeFileObject(OutputFile);
|
||||
return RetVal;
|
||||
}
|
||||
|
||||
@ -1359,4 +1362,107 @@ COMPILER_RT_VISIBILITY int __llvm_profile_set_file_object(FILE *File,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __llvm_write_custom_profile(const char *Target,
|
||||
const __llvm_profile_data *DataBegin,
|
||||
const __llvm_profile_data *DataEnd,
|
||||
const char *CountersBegin,
|
||||
const char *CountersEnd, const char *NamesBegin,
|
||||
const char *NamesEnd) {
|
||||
int ReturnValue = 0, FilenameLength, TargetLength;
|
||||
char *FilenameBuf, *TargetFilename;
|
||||
const char *Filename;
|
||||
|
||||
/* Save old profile data */
|
||||
FILE *oldFile = getProfileFile();
|
||||
|
||||
// Temporarily suspend getting SIGKILL when the parent exits.
|
||||
int PDeathSig = lprofSuspendSigKill();
|
||||
|
||||
if (lprofProfileDumped() || __llvm_profile_is_continuous_mode_enabled()) {
|
||||
PROF_NOTE("Profile data not written to file: %s.\n", "already written");
|
||||
if (PDeathSig == 1)
|
||||
lprofRestoreSigKill();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check if there is llvm/runtime version mismatch. */
|
||||
if (GET_VERSION(__llvm_profile_get_version()) != INSTR_PROF_RAW_VERSION) {
|
||||
PROF_ERR("Runtime and instrumentation version mismatch : "
|
||||
"expected %d, but get %d\n",
|
||||
INSTR_PROF_RAW_VERSION,
|
||||
(int)GET_VERSION(__llvm_profile_get_version()));
|
||||
if (PDeathSig == 1)
|
||||
lprofRestoreSigKill();
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Get current filename */
|
||||
FilenameLength = getCurFilenameLength();
|
||||
FilenameBuf = (char *)COMPILER_RT_ALLOCA(FilenameLength + 1);
|
||||
Filename = getCurFilename(FilenameBuf, 0);
|
||||
|
||||
/* Check the filename. */
|
||||
if (!Filename) {
|
||||
PROF_ERR("Failed to write file : %s\n", "Filename not set");
|
||||
if (PDeathSig == 1)
|
||||
lprofRestoreSigKill();
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Allocate new space for our target-specific PGO filename */
|
||||
TargetLength = strlen(Target);
|
||||
TargetFilename =
|
||||
(char *)COMPILER_RT_ALLOCA(FilenameLength + TargetLength + 2);
|
||||
|
||||
/* Find file basename and path sizes */
|
||||
int32_t DirEnd = FilenameLength - 1;
|
||||
while (DirEnd >= 0 && !IS_DIR_SEPARATOR(Filename[DirEnd])) {
|
||||
DirEnd--;
|
||||
}
|
||||
uint32_t DirSize = DirEnd + 1, BaseSize = FilenameLength - DirSize;
|
||||
|
||||
/* Prepend "TARGET." to current filename */
|
||||
if (DirSize > 0) {
|
||||
memcpy(TargetFilename, Filename, DirSize);
|
||||
}
|
||||
memcpy(TargetFilename + DirSize, Target, TargetLength);
|
||||
TargetFilename[TargetLength + DirSize] = '.';
|
||||
memcpy(TargetFilename + DirSize + 1 + TargetLength, Filename + DirSize,
|
||||
BaseSize);
|
||||
TargetFilename[FilenameLength + 1 + TargetLength] = 0;
|
||||
|
||||
/* Open and truncate target-specific PGO file */
|
||||
FILE *OutputFile = fopen(TargetFilename, "w");
|
||||
setProfileFile(OutputFile);
|
||||
|
||||
if (!OutputFile) {
|
||||
PROF_ERR("Failed to open file : %s\n", TargetFilename);
|
||||
if (PDeathSig == 1)
|
||||
lprofRestoreSigKill();
|
||||
return -1;
|
||||
}
|
||||
|
||||
FreeHook = &free;
|
||||
setupIOBuffer();
|
||||
|
||||
/* Write custom data */
|
||||
ProfDataWriter fileWriter;
|
||||
initFileWriter(&fileWriter, OutputFile);
|
||||
|
||||
/* Write custom data to the file */
|
||||
ReturnValue = lprofWriteDataImpl(
|
||||
&fileWriter, DataBegin, DataEnd, CountersBegin, CountersEnd, NULL, NULL,
|
||||
lprofGetVPDataReader(), NULL, NULL, NULL, NULL, NamesBegin, NamesEnd, 0);
|
||||
closeFileObject(OutputFile);
|
||||
|
||||
// Restore SIGKILL.
|
||||
if (PDeathSig == 1)
|
||||
lprofRestoreSigKill();
|
||||
|
||||
/* Restore old profiling file */
|
||||
setProfileFile(oldFile);
|
||||
|
||||
return ReturnValue;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -30,6 +30,7 @@ enum class DeviceDebugKind : uint32_t {
|
||||
FunctionTracing = 1U << 1,
|
||||
CommonIssues = 1U << 2,
|
||||
AllocationTracker = 1U << 3,
|
||||
PGODump = 1U << 4,
|
||||
};
|
||||
|
||||
struct DeviceEnvironmentTy {
|
||||
|
@ -63,14 +63,22 @@ struct __llvm_profile_data {
|
||||
#include "llvm/ProfileData/InstrProfData.inc"
|
||||
};
|
||||
|
||||
extern "C" {
|
||||
extern int __attribute__((weak)) __llvm_write_custom_profile(
|
||||
const char *Target, const __llvm_profile_data *DataBegin,
|
||||
const __llvm_profile_data *DataEnd, const char *CountersBegin,
|
||||
const char *CountersEnd, const char *NamesBegin, const char *NamesEnd);
|
||||
}
|
||||
|
||||
/// PGO profiling data extracted from a GPU device
|
||||
struct GPUProfGlobals {
|
||||
SmallVector<uint8_t> NamesData;
|
||||
SmallVector<SmallVector<int64_t>> Counts;
|
||||
SmallVector<int64_t> Counts;
|
||||
SmallVector<__llvm_profile_data> Data;
|
||||
SmallVector<uint8_t> NamesData;
|
||||
Triple TargetTriple;
|
||||
|
||||
void dump() const;
|
||||
Error write() const;
|
||||
};
|
||||
|
||||
/// Subclass of GlobalTy that holds the memory for a global of \p Ty.
|
||||
|
@ -206,7 +206,7 @@ GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
|
||||
GlobalTy CountGlobal(NameOrErr->str(), Sym.getSize(), Counts.data());
|
||||
if (auto Err = readGlobalFromDevice(Device, Image, CountGlobal))
|
||||
return Err;
|
||||
DeviceProfileData.Counts.push_back(std::move(Counts));
|
||||
DeviceProfileData.Counts.append(std::move(Counts));
|
||||
} else if (NameOrErr->starts_with(getInstrProfDataVarPrefix())) {
|
||||
// Read profiling data for this global variable
|
||||
__llvm_profile_data Data{};
|
||||
@ -224,15 +224,14 @@ void GPUProfGlobals::dump() const {
|
||||
<< "\n";
|
||||
|
||||
outs() << "======== Counters =========\n";
|
||||
for (const auto &Count : Counts) {
|
||||
outs() << "[";
|
||||
for (size_t i = 0; i < Count.size(); i++) {
|
||||
if (i == 0)
|
||||
outs() << " ";
|
||||
outs() << Count[i] << " ";
|
||||
}
|
||||
outs() << "]\n";
|
||||
for (size_t i = 0; i < Counts.size(); i++) {
|
||||
if (i > 0 && i % 10 == 0)
|
||||
outs() << "\n";
|
||||
else if (i != 0)
|
||||
outs() << " ";
|
||||
outs() << Counts[i];
|
||||
}
|
||||
outs() << "\n";
|
||||
|
||||
outs() << "========== Data ===========\n";
|
||||
for (const auto &ProfData : Data) {
|
||||
@ -264,3 +263,43 @@ void GPUProfGlobals::dump() const {
|
||||
Symtab.dumpNames(outs());
|
||||
outs() << "===========================\n";
|
||||
}
|
||||
|
||||
Error GPUProfGlobals::write() const {
|
||||
if (!__llvm_write_custom_profile)
|
||||
return Plugin::error("Could not find symbol __llvm_write_custom_profile. "
|
||||
"The compiler-rt profiling library must be linked for "
|
||||
"GPU PGO to work.");
|
||||
|
||||
size_t DataSize = Data.size() * sizeof(__llvm_profile_data),
|
||||
CountsSize = Counts.size() * sizeof(int64_t);
|
||||
__llvm_profile_data *DataBegin, *DataEnd;
|
||||
char *CountersBegin, *CountersEnd, *NamesBegin, *NamesEnd;
|
||||
|
||||
// Initialize array of contiguous data. We need to make sure each section is
|
||||
// contiguous so that the PGO library can compute deltas properly
|
||||
SmallVector<uint8_t> ContiguousData(NamesData.size() + DataSize + CountsSize);
|
||||
|
||||
// Compute region pointers
|
||||
DataBegin = (__llvm_profile_data *)(ContiguousData.data() + CountsSize);
|
||||
DataEnd =
|
||||
(__llvm_profile_data *)(ContiguousData.data() + CountsSize + DataSize);
|
||||
CountersBegin = (char *)ContiguousData.data();
|
||||
CountersEnd = (char *)(ContiguousData.data() + CountsSize);
|
||||
NamesBegin = (char *)(ContiguousData.data() + CountsSize + DataSize);
|
||||
NamesEnd = (char *)(ContiguousData.data() + CountsSize + DataSize +
|
||||
NamesData.size());
|
||||
|
||||
// Copy data to contiguous buffer
|
||||
memcpy(DataBegin, Data.data(), DataSize);
|
||||
memcpy(CountersBegin, Counts.data(), CountsSize);
|
||||
memcpy(NamesBegin, NamesData.data(), NamesData.size());
|
||||
|
||||
// Invoke compiler-rt entrypoint
|
||||
int result = __llvm_write_custom_profile(TargetTriple.str().c_str(),
|
||||
DataBegin, DataEnd, CountersBegin,
|
||||
CountersEnd, NamesBegin, NamesEnd);
|
||||
if (result != 0)
|
||||
return Plugin::error("Error writing GPU PGO data to file");
|
||||
|
||||
return Plugin::success();
|
||||
}
|
||||
|
@ -861,8 +861,14 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
|
||||
if (!ProfOrErr)
|
||||
return ProfOrErr.takeError();
|
||||
|
||||
// TODO: write data to profiling file
|
||||
ProfOrErr->dump();
|
||||
// Dump out profdata
|
||||
if ((OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::PGODump)) ==
|
||||
uint32_t(DeviceDebugKind::PGODump))
|
||||
ProfOrErr->dump();
|
||||
|
||||
// Write data to profiling file
|
||||
if (auto Err = ProfOrErr->write())
|
||||
return Err;
|
||||
}
|
||||
|
||||
// Delete the memory manager before deinitializing the device. Otherwise,
|
||||
|
@ -112,8 +112,10 @@ config.available_features.add(config.libomptarget_current_target)
|
||||
if config.libomptarget_has_libc:
|
||||
config.available_features.add('libc')
|
||||
|
||||
profdata_path = os.path.join(config.bin_llvm_tools_dir, "llvm-profdata")
|
||||
if config.libomptarget_test_pgo:
|
||||
config.available_features.add('pgo')
|
||||
config.substitutions.append(("%profdata", profdata_path))
|
||||
|
||||
# Determine whether the test system supports unified memory.
|
||||
# For CUDA, this is the case with compute capability 70 (Volta) or higher.
|
||||
@ -407,6 +409,8 @@ if config.test_fortran_compiler:
|
||||
config.available_features.add('flang')
|
||||
config.substitutions.append(("%flang", config.test_fortran_compiler))
|
||||
|
||||
config.substitutions.append(("%target_triple", config.libomptarget_current_target))
|
||||
|
||||
config.substitutions.append(("%openmp_flags", config.test_openmp_flags))
|
||||
if config.libomptarget_current_target.startswith('nvptx') and config.cuda_path:
|
||||
config.substitutions.append(("%cuda_flags", "--cuda-path=" + config.cuda_path))
|
||||
|
@ -1,6 +1,6 @@
|
||||
@AUTO_GEN_COMMENT@
|
||||
|
||||
config.bin_llvm_tools_dir = "@CMAKE_BINARY_DIR@/bin"
|
||||
config.bin_llvm_tools_dir = "@LLVM_RUNTIME_OUTPUT_INTDIR@"
|
||||
config.test_c_compiler = "@OPENMP_TEST_C_COMPILER@"
|
||||
config.test_cxx_compiler = "@OPENMP_TEST_CXX_COMPILER@"
|
||||
config.test_fortran_compiler="@OPENMP_TEST_Fortran_COMPILER@"
|
||||
|
@ -1,12 +1,17 @@
|
||||
// RUN: %libomptarget-compile-generic -fprofile-instr-generate \
|
||||
// RUN: -Xclang "-fprofile-instrument=clang"
|
||||
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic \
|
||||
// RUN: --check-prefix="CLANG-PGO"
|
||||
// RUN: %libomptarget-compile-generic -fprofile-generate \
|
||||
// RUN: -Xclang "-fprofile-instrument=llvm"
|
||||
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic \
|
||||
// RUN: env LLVM_PROFILE_FILE=llvm.profraw %libomptarget-run-generic 2>&1
|
||||
// RUN: %profdata show --all-functions --counts \
|
||||
// RUN: %target_triple.llvm.profraw | %fcheck-generic \
|
||||
// RUN: --check-prefix="LLVM-PGO"
|
||||
|
||||
// RUN: %libomptarget-compile-generic -fprofile-instr-generate \
|
||||
// RUN: -Xclang "-fprofile-instrument=clang"
|
||||
// RUN: env LLVM_PROFILE_FILE=clang.profraw %libomptarget-run-generic 2>&1
|
||||
// RUN: %profdata show --all-functions --counts \
|
||||
// RUN: %target_triple.clang.profraw | %fcheck-generic \
|
||||
// RUN: --check-prefix="CLANG-PGO"
|
||||
|
||||
// REQUIRES: gpu
|
||||
// REQUIRES: pgo
|
||||
|
||||
|
@ -1522,3 +1522,4 @@ debugging features are supported.
|
||||
* Enable debugging assertions in the device. ``0x01``
|
||||
* Enable diagnosing common problems during offloading . ``0x4``
|
||||
* Enable device malloc statistics (amdgpu only). ``0x8``
|
||||
* Dump device PGO counters (only if PGO on GPU is enabled). ``0x10``
|
||||
|
Loading…
x
Reference in New Issue
Block a user