mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-16 01:06:32 +00:00
[ELF] Add BPSectionOrderer options (#125559)
Reland #120514 after 2f6e3df08a8b7cd29273980e47310cf09c6fdbd8 fixed iteration order issue and libstdc++/libc++ differences. --- Both options instruct the linker to optimize section layout with the following goals: * `--bp-compression-sort=[data|function|both]`: Improve Lempel-Ziv compression by grouping similar sections together, resulting in a smaller compressed app size. * `--bp-startup-sort=function --irpgo-profile=<file>`: Utilize a temporal profile file to reduce page faults during program startup. The linker determines the section order by considering three groups: * Function sections ordered according to the temporal profile (`--irpgo-profile=`), prioritizing early-accessed and frequently accessed functions. * Function sections. Sections containing similar functions are placed together, maximizing compression opportunities. * Data sections. Similar data sections are placed together. Within each group, the sections are ordered using the Balanced Partitioning algorithm. The linker constructs a bipartite graph with two sets of vertices: sections and utility vertices. * For profile-guided function sections: + The number of utility vertices is determined by the symbol order within the profile file. + If `--bp-compression-sort-startup-functions` is specified, extra utility vertices are allocated to prioritize nearby function similarity. * For sections ordered for compression: Utility vertices are determined by analyzing k-mers of the section content and relocations. The call graph profile is disabled during this optimization. When `--symbol-ordering-file=` is specified, sections described in that file are placed earlier. Co-authored-by: Pengying Xu <xpy66swsry@gmail.com>
This commit is contained in:
parent
0c7bd879d2
commit
6ab034b828
95
lld/ELF/BPSectionOrderer.cpp
Normal file
95
lld/ELF/BPSectionOrderer.cpp
Normal file
@ -0,0 +1,95 @@
|
||||
//===- BPSectionOrderer.cpp -----------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "BPSectionOrderer.h"
|
||||
#include "InputFiles.h"
|
||||
#include "InputSection.h"
|
||||
#include "SymbolTable.h"
|
||||
#include "Symbols.h"
|
||||
#include "lld/Common/BPSectionOrdererBase.inc"
|
||||
#include "llvm/Support/Endian.h"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace lld::elf;
|
||||
|
||||
namespace {
|
||||
struct BPOrdererELF;
|
||||
}
|
||||
template <> struct lld::BPOrdererTraits<struct BPOrdererELF> {
|
||||
using Section = elf::InputSectionBase;
|
||||
using Defined = elf::Defined;
|
||||
};
|
||||
namespace {
|
||||
struct BPOrdererELF : lld::BPOrderer<BPOrdererELF> {
|
||||
DenseMap<const InputSectionBase *, Defined *> secToSym;
|
||||
|
||||
static uint64_t getSize(const Section &sec) { return sec.getSize(); }
|
||||
static bool isCodeSection(const Section &sec) {
|
||||
return sec.flags & ELF::SHF_EXECINSTR;
|
||||
}
|
||||
ArrayRef<Defined *> getSymbols(const Section &sec) {
|
||||
auto it = secToSym.find(&sec);
|
||||
if (it == secToSym.end())
|
||||
return {};
|
||||
return ArrayRef(it->second);
|
||||
}
|
||||
|
||||
static void
|
||||
getSectionHashes(const Section &sec, SmallVectorImpl<uint64_t> &hashes,
|
||||
const DenseMap<const void *, uint64_t> §ionToIdx) {
|
||||
constexpr unsigned windowSize = 4;
|
||||
|
||||
// Calculate content hashes: k-mers and the last k-1 bytes.
|
||||
ArrayRef<uint8_t> data = sec.content();
|
||||
if (data.size() >= windowSize)
|
||||
for (size_t i = 0; i <= data.size() - windowSize; ++i)
|
||||
hashes.push_back(support::endian::read32le(data.data() + i));
|
||||
for (uint8_t byte : data.take_back(windowSize - 1))
|
||||
hashes.push_back(byte);
|
||||
|
||||
llvm::sort(hashes);
|
||||
hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
|
||||
}
|
||||
|
||||
static StringRef getSymName(const Defined &sym) { return sym.getName(); }
|
||||
static uint64_t getSymValue(const Defined &sym) { return sym.value; }
|
||||
static uint64_t getSymSize(const Defined &sym) { return sym.size; }
|
||||
};
|
||||
} // namespace
|
||||
|
||||
DenseMap<const InputSectionBase *, int> elf::runBalancedPartitioning(
|
||||
Ctx &ctx, StringRef profilePath, bool forFunctionCompression,
|
||||
bool forDataCompression, bool compressionSortStartupFunctions,
|
||||
bool verbose) {
|
||||
// Collect candidate sections and associated symbols.
|
||||
SmallVector<InputSectionBase *> sections;
|
||||
DenseMap<CachedHashStringRef, std::set<unsigned>> rootSymbolToSectionIdxs;
|
||||
BPOrdererELF orderer;
|
||||
|
||||
auto addSection = [&](Symbol &sym) {
|
||||
auto *d = dyn_cast<Defined>(&sym);
|
||||
if (!d)
|
||||
return;
|
||||
auto *sec = dyn_cast_or_null<InputSectionBase>(d->section);
|
||||
if (!sec || sec->size == 0 || !orderer.secToSym.try_emplace(sec, d).second)
|
||||
return;
|
||||
rootSymbolToSectionIdxs[CachedHashStringRef(getRootSymbol(sym.getName()))]
|
||||
.insert(sections.size());
|
||||
sections.emplace_back(sec);
|
||||
};
|
||||
|
||||
for (Symbol *sym : ctx.symtab->getSymbols())
|
||||
addSection(*sym);
|
||||
for (ELFFileBase *file : ctx.objectFiles)
|
||||
for (Symbol *sym : file->getLocalSymbols())
|
||||
addSection(*sym);
|
||||
return orderer.computeOrder(profilePath, forFunctionCompression,
|
||||
forDataCompression,
|
||||
compressionSortStartupFunctions, verbose,
|
||||
sections, rootSymbolToSectionIdxs);
|
||||
}
|
37
lld/ELF/BPSectionOrderer.h
Normal file
37
lld/ELF/BPSectionOrderer.h
Normal file
@ -0,0 +1,37 @@
|
||||
//===- BPSectionOrderer.h -------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// This file uses Balanced Partitioning to order sections to improve startup
|
||||
/// time and compressed size.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLD_ELF_BPSECTION_ORDERER_H
|
||||
#define LLD_ELF_BPSECTION_ORDERER_H
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
|
||||
namespace lld::elf {
|
||||
struct Ctx;
|
||||
class InputSectionBase;
|
||||
|
||||
/// Run Balanced Partitioning to find the optimal function and data order to
|
||||
/// improve startup time and compressed size.
|
||||
///
|
||||
/// It is important that -ffunction-sections and -fdata-sections compiler flags
|
||||
/// are used to ensure functions and data are in their own sections and thus
|
||||
/// can be reordered.
|
||||
llvm::DenseMap<const InputSectionBase *, int>
|
||||
runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
|
||||
bool forFunctionCompression, bool forDataCompression,
|
||||
bool compressionSortStartupFunctions, bool verbose);
|
||||
|
||||
} // namespace lld::elf
|
||||
|
||||
#endif
|
@ -37,6 +37,7 @@ add_lld_library(lldELF
|
||||
Arch/X86.cpp
|
||||
Arch/X86_64.cpp
|
||||
ARMErrataFix.cpp
|
||||
BPSectionOrderer.cpp
|
||||
CallGraphSort.cpp
|
||||
DWARF.cpp
|
||||
Driver.cpp
|
||||
@ -72,6 +73,7 @@ add_lld_library(lldELF
|
||||
Object
|
||||
Option
|
||||
Passes
|
||||
ProfileData
|
||||
Support
|
||||
TargetParser
|
||||
TransformUtils
|
||||
|
@ -264,6 +264,12 @@ struct Config {
|
||||
bool armBe8 = false;
|
||||
BsymbolicKind bsymbolic = BsymbolicKind::None;
|
||||
CGProfileSortKind callGraphProfileSort;
|
||||
llvm::StringRef irpgoProfilePath;
|
||||
bool bpStartupFunctionSort = false;
|
||||
bool bpCompressionSortStartupFunctions = false;
|
||||
bool bpFunctionOrderForCompression = false;
|
||||
bool bpDataOrderForCompression = false;
|
||||
bool bpVerboseSectionOrderer = false;
|
||||
bool checkSections;
|
||||
bool checkDynamicRelocs;
|
||||
std::optional<llvm::DebugCompressionType> compressDebugSections;
|
||||
|
@ -1121,6 +1121,53 @@ static CGProfileSortKind getCGProfileSortKind(Ctx &ctx,
|
||||
return CGProfileSortKind::None;
|
||||
}
|
||||
|
||||
static void parseBPOrdererOptions(Ctx &ctx, opt::InputArgList &args) {
|
||||
if (auto *arg = args.getLastArg(OPT_bp_compression_sort)) {
|
||||
StringRef s = arg->getValue();
|
||||
if (s == "function") {
|
||||
ctx.arg.bpFunctionOrderForCompression = true;
|
||||
} else if (s == "data") {
|
||||
ctx.arg.bpDataOrderForCompression = true;
|
||||
} else if (s == "both") {
|
||||
ctx.arg.bpFunctionOrderForCompression = true;
|
||||
ctx.arg.bpDataOrderForCompression = true;
|
||||
} else if (s != "none") {
|
||||
ErrAlways(ctx) << arg->getSpelling()
|
||||
<< ": expected [none|function|data|both]";
|
||||
}
|
||||
if (s != "none" && args.hasArg(OPT_call_graph_ordering_file))
|
||||
ErrAlways(ctx) << "--bp-compression-sort is incompatible with "
|
||||
"--call-graph-ordering-file";
|
||||
}
|
||||
if (auto *arg = args.getLastArg(OPT_bp_startup_sort)) {
|
||||
StringRef s = arg->getValue();
|
||||
if (s == "function") {
|
||||
ctx.arg.bpStartupFunctionSort = true;
|
||||
} else if (s != "none") {
|
||||
ErrAlways(ctx) << arg->getSpelling() << ": expected [none|function]";
|
||||
}
|
||||
if (s != "none" && args.hasArg(OPT_call_graph_ordering_file))
|
||||
ErrAlways(ctx) << "--bp-startup-sort=function is incompatible with "
|
||||
"--call-graph-ordering-file";
|
||||
}
|
||||
|
||||
ctx.arg.bpCompressionSortStartupFunctions =
|
||||
args.hasFlag(OPT_bp_compression_sort_startup_functions,
|
||||
OPT_no_bp_compression_sort_startup_functions, false);
|
||||
ctx.arg.bpVerboseSectionOrderer = args.hasArg(OPT_verbose_bp_section_orderer);
|
||||
|
||||
ctx.arg.irpgoProfilePath = args.getLastArgValue(OPT_irpgo_profile);
|
||||
if (ctx.arg.irpgoProfilePath.empty()) {
|
||||
if (ctx.arg.bpStartupFunctionSort)
|
||||
ErrAlways(ctx) << "--bp-startup-sort=function must be used with "
|
||||
"--irpgo-profile";
|
||||
if (ctx.arg.bpCompressionSortStartupFunctions)
|
||||
ErrAlways(ctx)
|
||||
<< "--bp-compression-sort-startup-functions must be used with "
|
||||
"--irpgo-profile";
|
||||
}
|
||||
}
|
||||
|
||||
static DebugCompressionType getCompressionType(Ctx &ctx, StringRef s,
|
||||
StringRef option) {
|
||||
DebugCompressionType type = StringSwitch<DebugCompressionType>(s)
|
||||
@ -1262,6 +1309,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
|
||||
ctx.arg.bsymbolic = BsymbolicKind::All;
|
||||
}
|
||||
ctx.arg.callGraphProfileSort = getCGProfileSortKind(ctx, args);
|
||||
parseBPOrdererOptions(ctx, args);
|
||||
ctx.arg.checkSections =
|
||||
args.hasFlag(OPT_check_sections, OPT_no_check_sections, true);
|
||||
ctx.arg.chroot = args.getLastArgValue(OPT_chroot);
|
||||
|
@ -141,6 +141,19 @@ def call_graph_profile_sort: JJ<"call-graph-profile-sort=">,
|
||||
def : FF<"no-call-graph-profile-sort">, Alias<call_graph_profile_sort>, AliasArgs<["none"]>,
|
||||
Flags<[HelpHidden]>;
|
||||
|
||||
defm irpgo_profile: EEq<"irpgo-profile",
|
||||
"Read a temporary profile file for use with --bp-startup-sort=">;
|
||||
def bp_compression_sort: JJ<"bp-compression-sort=">, MetaVarName<"[none,function,data,both]">,
|
||||
HelpText<"Improve Lempel-Ziv compression by grouping similar sections together, resulting in a smaller compressed app size">;
|
||||
def bp_startup_sort: JJ<"bp-startup-sort=">, MetaVarName<"[none,function]">,
|
||||
HelpText<"Utilize a temporal profile file to reduce page faults during program startup">;
|
||||
|
||||
// Auxiliary options related to balanced partition
|
||||
defm bp_compression_sort_startup_functions: BB<"bp-compression-sort-startup-functions",
|
||||
"When --irpgo-profile is pecified, prioritize function similarity for compression in addition to startup time", "">;
|
||||
def verbose_bp_section_orderer: FF<"verbose-bp-section-orderer">,
|
||||
HelpText<"Print information on balanced partitioning">;
|
||||
|
||||
// --chroot doesn't have a help text because it is an internal option.
|
||||
def chroot: Separate<["--"], "chroot">;
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "Writer.h"
|
||||
#include "AArch64ErrataFix.h"
|
||||
#include "ARMErrataFix.h"
|
||||
#include "BPSectionOrderer.h"
|
||||
#include "CallGraphSort.h"
|
||||
#include "Config.h"
|
||||
#include "InputFiles.h"
|
||||
@ -1080,8 +1081,18 @@ static void maybeShuffle(Ctx &ctx,
|
||||
// that don't appear in the order file.
|
||||
static DenseMap<const InputSectionBase *, int> buildSectionOrder(Ctx &ctx) {
|
||||
DenseMap<const InputSectionBase *, int> sectionOrder;
|
||||
if (!ctx.arg.callGraphProfile.empty())
|
||||
if (ctx.arg.bpStartupFunctionSort || ctx.arg.bpFunctionOrderForCompression ||
|
||||
ctx.arg.bpDataOrderForCompression) {
|
||||
TimeTraceScope timeScope("Balanced Partitioning Section Orderer");
|
||||
sectionOrder = runBalancedPartitioning(
|
||||
ctx, ctx.arg.bpStartupFunctionSort ? ctx.arg.irpgoProfilePath : "",
|
||||
ctx.arg.bpFunctionOrderForCompression,
|
||||
ctx.arg.bpDataOrderForCompression,
|
||||
ctx.arg.bpCompressionSortStartupFunctions,
|
||||
ctx.arg.bpVerboseSectionOrderer);
|
||||
} else if (!ctx.arg.callGraphProfile.empty()) {
|
||||
sectionOrder = computeCallGraphProfileOrder(ctx);
|
||||
}
|
||||
|
||||
if (ctx.arg.symbolOrderingFile.empty())
|
||||
return sectionOrder;
|
||||
|
@ -64,6 +64,10 @@ template <class D> struct BPOrderer {
|
||||
const DenseMap<CachedHashStringRef, std::set<unsigned>>
|
||||
&rootSymbolToSectionIdxs)
|
||||
-> llvm::DenseMap<const Section *, int>;
|
||||
|
||||
std::optional<StringRef> static getResolvedLinkageName(StringRef name) {
|
||||
return {};
|
||||
}
|
||||
};
|
||||
} // namespace lld
|
||||
|
||||
@ -98,10 +102,11 @@ static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
|
||||
// Merge sections that are nearly identical
|
||||
SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> newSectionHashes;
|
||||
DenseMap<uint64_t, unsigned> wholeHashToSectionIdx;
|
||||
unsigned threshold = sectionHashes.size() > 10000 ? 5 : 0;
|
||||
for (auto &[sectionIdx, hashes] : sectionHashes) {
|
||||
uint64_t wholeHash = 0;
|
||||
for (auto hash : hashes)
|
||||
if (hashFrequency[hash] > 5)
|
||||
if (hashFrequency[hash] > threshold)
|
||||
wholeHash ^= hash;
|
||||
auto [it, wasInserted] =
|
||||
wholeHashToSectionIdx.insert(std::make_pair(wholeHash, sectionIdx));
|
||||
|
104
lld/test/ELF/bp-section-orderer-stress.s
Normal file
104
lld/test/ELF/bp-section-orderer-stress.s
Normal file
@ -0,0 +1,104 @@
|
||||
# REQUIRES: aarch64
|
||||
|
||||
## Generate a large test case and check that the output is deterministic.
|
||||
|
||||
# RUN: %python %s %t.s %t.proftext
|
||||
|
||||
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t.s -o %t.o
|
||||
# RUN: llvm-profdata merge %t.proftext -o %t.profdata
|
||||
|
||||
# RUN: ld.lld --icf=all -o %t1.o %t.o --irpgo-profile=%t.profdata --bp-startup-sort=function --bp-compression-sort-startup-functions --bp-compression-sort=both
|
||||
# RUN: ld.lld --icf=all -o %t2.o %t.o --irpgo-profile=%t.profdata --bp-startup-sort=function --bp-compression-sort-startup-functions --bp-compression-sort=both
|
||||
# RUN: cmp %t1.o %t2.o
|
||||
|
||||
import random
|
||||
import sys
|
||||
|
||||
assembly_filepath = sys.argv[1]
|
||||
proftext_filepath = sys.argv[2]
|
||||
|
||||
random.seed(1234)
|
||||
num_functions = 1000
|
||||
num_data = 100
|
||||
num_traces = 10
|
||||
|
||||
function_names = [f"f{n}" for n in range(num_functions)]
|
||||
data_names = [f"d{n}" for n in range(num_data)]
|
||||
profiled_functions = function_names[: int(num_functions / 2)]
|
||||
|
||||
function_contents = [
|
||||
f"""
|
||||
{name}:
|
||||
add w0, w0, #{i % 4096}
|
||||
add w1, w1, #{i % 10}
|
||||
add w2, w0, #{i % 20}
|
||||
adrp x3, {name}
|
||||
ret
|
||||
"""
|
||||
for i, name in enumerate(function_names)
|
||||
]
|
||||
|
||||
data_contents = [
|
||||
f"""
|
||||
{name}:
|
||||
.ascii "s{i % 2}-{i % 3}-{i % 5}"
|
||||
.xword {name}
|
||||
"""
|
||||
for i, name in enumerate(data_names)
|
||||
]
|
||||
|
||||
trace_contents = [
|
||||
f"""
|
||||
# Weight
|
||||
1
|
||||
{", ".join(random.sample(profiled_functions, len(profiled_functions)))}
|
||||
"""
|
||||
for i in range(num_traces)
|
||||
]
|
||||
|
||||
profile_contents = [
|
||||
f"""
|
||||
{name}
|
||||
# Func Hash:
|
||||
{i}
|
||||
# Num Counters:
|
||||
1
|
||||
# Counter Values:
|
||||
1
|
||||
"""
|
||||
for i, name in enumerate(profiled_functions)
|
||||
]
|
||||
|
||||
with open(assembly_filepath, "w") as f:
|
||||
f.write(
|
||||
f"""
|
||||
.text
|
||||
.globl _start
|
||||
|
||||
_start:
|
||||
ret
|
||||
|
||||
{"".join(function_contents)}
|
||||
|
||||
.data
|
||||
{"".join(data_contents)}
|
||||
|
||||
"""
|
||||
)
|
||||
|
||||
with open(proftext_filepath, "w") as f:
|
||||
f.write(
|
||||
f"""
|
||||
:ir
|
||||
:temporal_prof_traces
|
||||
|
||||
# Num Traces
|
||||
{num_traces}
|
||||
# Trace Stream Size:
|
||||
{num_traces}
|
||||
|
||||
{"".join(trace_contents)}
|
||||
|
||||
{"".join(profile_contents)}
|
||||
"""
|
||||
)
|
335
lld/test/ELF/bp-section-orderer.s
Normal file
335
lld/test/ELF/bp-section-orderer.s
Normal file
@ -0,0 +1,335 @@
|
||||
# REQUIRES: aarch64
|
||||
# RUN: rm -rf %t && split-file %s %t && cd %t
|
||||
|
||||
## Check for incompatible cases
|
||||
# RUN: not ld.lld %t --irpgo-profile=/dev/null --bp-startup-sort=function --call-graph-ordering-file=/dev/null 2>&1 | FileCheck %s --check-prefix=BP-STARTUP-CALLGRAPH-ERR
|
||||
# RUN: not ld.lld --bp-compression-sort=function --call-graph-ordering-file /dev/null 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-CALLGRAPH-ERR
|
||||
# RUN: not ld.lld --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=BP-STARTUP-ERR
|
||||
# RUN: not ld.lld --bp-compression-sort-startup-functions 2>&1 | FileCheck %s --check-prefix=BP-STARTUP-COMPRESSION-ERR
|
||||
# RUN: not ld.lld --bp-startup-sort=invalid --bp-compression-sort=invalid 2>&1 | FileCheck %s --check-prefix=BP-INVALID
|
||||
|
||||
# BP-STARTUP-CALLGRAPH-ERR: error: --bp-startup-sort=function is incompatible with --call-graph-ordering-file
|
||||
# BP-COMPRESSION-CALLGRAPH-ERR: error: --bp-compression-sort is incompatible with --call-graph-ordering-file
|
||||
# BP-STARTUP-ERR: error: --bp-startup-sort=function must be used with --irpgo-profile
|
||||
# BP-STARTUP-COMPRESSION-ERR: error: --bp-compression-sort-startup-functions must be used with --irpgo-profile
|
||||
|
||||
# BP-INVALID: error: --bp-compression-sort=: expected [none|function|data|both]
|
||||
# BP-INVALID: error: --bp-startup-sort=: expected [none|function]
|
||||
|
||||
# RUN: llvm-mc -filetype=obj -triple=aarch64 a.s -o a.o
|
||||
# RUN: llvm-profdata merge a.proftext -o a.profdata
|
||||
# RUN: ld.lld a.o --irpgo-profile=a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all 2>&1 | FileCheck %s --check-prefix=STARTUP-FUNC-ORDER
|
||||
|
||||
# STARTUP-FUNC-ORDER: Ordered 3 sections using balanced partitioning
|
||||
# STARTUP-FUNC-ORDER: Total area under the page fault curve: 3.
|
||||
|
||||
# RUN: ld.lld -o out.s a.o --irpgo-profile=a.profdata --bp-startup-sort=function
|
||||
# RUN: llvm-nm -jn out.s | tr '\n' , | FileCheck %s --check-prefix=STARTUP
|
||||
# STARTUP: s5,s4,s3,s2,s1,A,B,C,F,E,D,_start,d4,d3,d2,d1,{{$}}
|
||||
|
||||
# RUN: ld.lld -o out.os a.o --irpgo-profile=a.profdata --bp-startup-sort=function --symbol-ordering-file a.txt
|
||||
# RUN: llvm-nm -jn out.os | tr '\n' , | FileCheck %s --check-prefix=ORDER-STARTUP
|
||||
# ORDER-STARTUP: s2,s1,s5,s4,s3,A,F,E,D,B,C,_start,d3,d2,d4,d1,{{$}}
|
||||
|
||||
# RUN: ld.lld -o out.cf a.o --verbose-bp-section-orderer --bp-compression-sort=function 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-FUNC
|
||||
# RUN: llvm-nm -jn out.cf | tr '\n' , | FileCheck %s --check-prefix=CFUNC
|
||||
# CFUNC: s5,s4,s3,s2,s1,F,C,E,D,B,A,_start,d4,d3,d2,d1,{{$}}
|
||||
|
||||
# RUN: ld.lld -o out.cd a.o --verbose-bp-section-orderer --bp-compression-sort=data 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-DATA
|
||||
# RUN: llvm-nm -jn out.cd | tr '\n' , | FileCheck %s --check-prefix=CDATA
|
||||
# CDATA: s5,s3,s4,s2,s1,F,C,E,D,B,A,_start,d4,d1,d3,d2,{{$}}
|
||||
|
||||
# RUN: ld.lld -o out.cb a.o --verbose-bp-section-orderer --bp-compression-sort=both 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-BOTH
|
||||
# RUN: llvm-nm -jn out.cb | tr '\n' , | FileCheck %s --check-prefix=CDATA
|
||||
|
||||
# RUN: ld.lld -o out.cbs a.o --verbose-bp-section-orderer --bp-compression-sort=both --irpgo-profile=a.profdata --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-BOTH
|
||||
# RUN: llvm-nm -jn out.cbs | tr '\n' , | FileCheck %s --check-prefix=CBOTH-STARTUP
|
||||
# CBOTH-STARTUP: s5,s3,s4,s2,s1,A,B,C,F,E,D,_start,d4,d1,d3,d2,{{$}}
|
||||
|
||||
# BP-COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
|
||||
# BP-COMPRESSION-DATA: Ordered 9 sections using balanced partitioning
|
||||
# BP-COMPRESSION-BOTH: Ordered 16 sections using balanced partitioning
|
||||
|
||||
#--- a.proftext
|
||||
:ir
|
||||
:temporal_prof_traces
|
||||
# Num Traces
|
||||
1
|
||||
# Trace Stream Size:
|
||||
1
|
||||
# Weight
|
||||
1
|
||||
A, B, C
|
||||
|
||||
A
|
||||
# Func Hash:
|
||||
1111
|
||||
# Num Counters:
|
||||
1
|
||||
# Counter Values:
|
||||
1
|
||||
|
||||
B
|
||||
# Func Hash:
|
||||
2222
|
||||
# Num Counters:
|
||||
1
|
||||
# Counter Values:
|
||||
1
|
||||
|
||||
C
|
||||
# Func Hash:
|
||||
3333
|
||||
# Num Counters:
|
||||
1
|
||||
# Counter Values:
|
||||
1
|
||||
|
||||
D
|
||||
# Func Hash:
|
||||
4444
|
||||
# Num Counters:
|
||||
1
|
||||
# Counter Values:
|
||||
1
|
||||
|
||||
#--- a.txt
|
||||
A
|
||||
F
|
||||
E
|
||||
D
|
||||
s2
|
||||
s1
|
||||
d3
|
||||
d2
|
||||
|
||||
#--- a.c
|
||||
const char s5[] = "engineering";
|
||||
const char s4[] = "computer program";
|
||||
const char s3[] = "hardware engineer";
|
||||
const char s2[] = "computer software";
|
||||
const char s1[] = "hello world program";
|
||||
int d4[] = {1,2,3,4,5,6};
|
||||
int d3[] = {5,6,7,8};
|
||||
int d2[] = {7,8,9,10};
|
||||
int d1[] = {3,4,5,6};
|
||||
|
||||
int C(int a);
|
||||
int B(int a);
|
||||
void A();
|
||||
|
||||
int F(int a) { return C(a + 3); }
|
||||
int E(int a) { return C(a + 2); }
|
||||
int D(int a) { return B(a + 2); }
|
||||
int C(int a) { A(); return a + 2; }
|
||||
int B(int a) { A(); return a + 1; }
|
||||
void A() {}
|
||||
|
||||
int _start() { return 0; }
|
||||
|
||||
#--- gen
|
||||
clang --target=aarch64-linux-gnu -O0 -ffunction-sections -fdata-sections -fno-asynchronous-unwind-tables -S a.c -o -
|
||||
;--- a.s
|
||||
.file "a.c"
|
||||
.section .text.F,"ax",@progbits
|
||||
.globl F // -- Begin function F
|
||||
.p2align 2
|
||||
.type F,@function
|
||||
F: // @F
|
||||
// %bb.0: // %entry
|
||||
sub sp, sp, #32
|
||||
stp x29, x30, [sp, #16] // 16-byte Folded Spill
|
||||
add x29, sp, #16
|
||||
stur w0, [x29, #-4]
|
||||
ldur w8, [x29, #-4]
|
||||
add w0, w8, #3
|
||||
bl C
|
||||
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
|
||||
add sp, sp, #32
|
||||
ret
|
||||
.Lfunc_end0:
|
||||
.size F, .Lfunc_end0-F
|
||||
// -- End function
|
||||
.section .text.C,"ax",@progbits
|
||||
.globl C // -- Begin function C
|
||||
.p2align 2
|
||||
.type C,@function
|
||||
C: // @C
|
||||
// %bb.0: // %entry
|
||||
sub sp, sp, #32
|
||||
stp x29, x30, [sp, #16] // 16-byte Folded Spill
|
||||
add x29, sp, #16
|
||||
stur w0, [x29, #-4]
|
||||
bl A
|
||||
ldur w8, [x29, #-4]
|
||||
add w0, w8, #2
|
||||
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
|
||||
add sp, sp, #32
|
||||
ret
|
||||
.Lfunc_end1:
|
||||
.size C, .Lfunc_end1-C
|
||||
// -- End function
|
||||
.section .text.E,"ax",@progbits
|
||||
.globl E // -- Begin function E
|
||||
.p2align 2
|
||||
.type E,@function
|
||||
E: // @E
|
||||
// %bb.0: // %entry
|
||||
sub sp, sp, #32
|
||||
stp x29, x30, [sp, #16] // 16-byte Folded Spill
|
||||
add x29, sp, #16
|
||||
stur w0, [x29, #-4]
|
||||
ldur w8, [x29, #-4]
|
||||
add w0, w8, #2
|
||||
bl C
|
||||
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
|
||||
add sp, sp, #32
|
||||
ret
|
||||
.Lfunc_end2:
|
||||
.size E, .Lfunc_end2-E
|
||||
// -- End function
|
||||
.section .text.D,"ax",@progbits
|
||||
.globl D // -- Begin function D
|
||||
.p2align 2
|
||||
.type D,@function
|
||||
D: // @D
|
||||
// %bb.0: // %entry
|
||||
sub sp, sp, #32
|
||||
stp x29, x30, [sp, #16] // 16-byte Folded Spill
|
||||
add x29, sp, #16
|
||||
stur w0, [x29, #-4]
|
||||
ldur w8, [x29, #-4]
|
||||
add w0, w8, #2
|
||||
bl B
|
||||
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
|
||||
add sp, sp, #32
|
||||
ret
|
||||
.Lfunc_end3:
|
||||
.size D, .Lfunc_end3-D
|
||||
// -- End function
|
||||
.section .text.B,"ax",@progbits
|
||||
.globl B // -- Begin function B
|
||||
.p2align 2
|
||||
.type B,@function
|
||||
B: // @B
|
||||
// %bb.0: // %entry
|
||||
sub sp, sp, #32
|
||||
stp x29, x30, [sp, #16] // 16-byte Folded Spill
|
||||
add x29, sp, #16
|
||||
stur w0, [x29, #-4]
|
||||
bl A
|
||||
ldur w8, [x29, #-4]
|
||||
add w0, w8, #1
|
||||
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
|
||||
add sp, sp, #32
|
||||
ret
|
||||
.Lfunc_end4:
|
||||
.size B, .Lfunc_end4-B
|
||||
// -- End function
|
||||
.section .text.A,"ax",@progbits
|
||||
.globl A // -- Begin function A
|
||||
.p2align 2
|
||||
.type A,@function
|
||||
A: // @A
|
||||
// %bb.0: // %entry
|
||||
ret
|
||||
.Lfunc_end5:
|
||||
.size A, .Lfunc_end5-A
|
||||
// -- End function
|
||||
.section .text._start,"ax",@progbits
|
||||
.globl _start // -- Begin function _start
|
||||
.p2align 2
|
||||
.type _start,@function
|
||||
_start: // @_start
|
||||
// %bb.0: // %entry
|
||||
mov w0, wzr
|
||||
ret
|
||||
.Lfunc_end6:
|
||||
.size _start, .Lfunc_end6-_start
|
||||
// -- End function
|
||||
.type s5,@object // @s5
|
||||
.section .rodata.s5,"a",@progbits
|
||||
.globl s5
|
||||
s5:
|
||||
.asciz "engineering"
|
||||
.size s5, 12
|
||||
|
||||
.type s4,@object // @s4
|
||||
.section .rodata.s4,"a",@progbits
|
||||
.globl s4
|
||||
s4:
|
||||
.asciz "computer program"
|
||||
.size s4, 17
|
||||
|
||||
.type s3,@object // @s3
|
||||
.section .rodata.s3,"a",@progbits
|
||||
.globl s3
|
||||
s3:
|
||||
.asciz "hardware engineer"
|
||||
.size s3, 18
|
||||
|
||||
.type s2,@object // @s2
|
||||
.section .rodata.s2,"a",@progbits
|
||||
.globl s2
|
||||
s2:
|
||||
.asciz "computer software"
|
||||
.size s2, 18
|
||||
|
||||
.type s1,@object // @s1
|
||||
.section .rodata.s1,"a",@progbits
|
||||
.globl s1
|
||||
s1:
|
||||
.asciz "hello world program"
|
||||
.size s1, 20
|
||||
|
||||
.type d4,@object // @d4
|
||||
.section .data.d4,"aw",@progbits
|
||||
.globl d4
|
||||
.p2align 2, 0x0
|
||||
d4:
|
||||
.word 1 // 0x1
|
||||
.word 2 // 0x2
|
||||
.word 3 // 0x3
|
||||
.word 4 // 0x4
|
||||
.word 5 // 0x5
|
||||
.word 6 // 0x6
|
||||
.size d4, 24
|
||||
|
||||
.type d3,@object // @d3
|
||||
.section .data.d3,"aw",@progbits
|
||||
.globl d3
|
||||
.p2align 2, 0x0
|
||||
d3:
|
||||
.word 5 // 0x5
|
||||
.word 6 // 0x6
|
||||
.word 7 // 0x7
|
||||
.word 8 // 0x8
|
||||
.size d3, 16
|
||||
|
||||
.type d2,@object // @d2
|
||||
.section .data.d2,"aw",@progbits
|
||||
.globl d2
|
||||
.p2align 2, 0x0
|
||||
d2:
|
||||
.word 7 // 0x7
|
||||
.word 8 // 0x8
|
||||
.word 9 // 0x9
|
||||
.word 10 // 0xa
|
||||
.size d2, 16
|
||||
|
||||
.type d1,@object // @d1
|
||||
.section .data.d1,"aw",@progbits
|
||||
.globl d1
|
||||
.p2align 2, 0x0
|
||||
d1:
|
||||
.word 3 // 0x3
|
||||
.word 4 // 0x4
|
||||
.word 5 // 0x5
|
||||
.word 6 // 0x6
|
||||
.size d1, 16
|
||||
|
||||
.section ".note.GNU-stack","",@progbits
|
||||
.addrsig
|
||||
.addrsig_sym C
|
||||
.addrsig_sym B
|
||||
.addrsig_sym A
|
Loading…
x
Reference in New Issue
Block a user