mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-18 07:16:40 +00:00
[lld-macho][arm64] Enhance safe ICF with thunk-based deduplication (#106573)
Currently, our `safe` ICF mode only merges non-address-significant code, leaving duplicate address-significant functions in the output. This patch introduces `safe_thunks` ICF mode, which keeps a single master copy of each function and replaces address-significant duplicates with thunks that branch to the master copy. Currently `--icf=safe_thunks` is only supported for `arm64` architectures. **Perf stats for a large binary:** | ICF Option | Total Size | __text Size | __unwind_info | % total | |-------------------|------------|-------------|---------------------|---------------------------| | `--icf=none` | 91.738 MB | 55.220 MB | 1.424 MB | 0% | | `--icf=safe` | 85.042 MB | 49.572 MB | 1.168 MB | 7.30% | | `--icf=safe_thunks` | 84.650 MB | 49.219 MB | 1.143 MB | 7.72% | | `--icf=all` | 82.060 MB | 48.726 MB | 1.111 MB | 10.55% | So overall we can expect a `~0.45%` binary size reduction for a typical large binary compared to the `--icf=safe` option. **Runtime:** Linking the above binary took ~10 seconds. Comparing the link performance of --icf=safe_thunks vs --icf=safe, a ~2% slowdown was observed.
This commit is contained in:
parent
1be9a80768
commit
d1756165a9
@ -41,6 +41,10 @@ struct ARM64 : ARM64Common {
|
||||
Symbol *objcMsgSend) const override;
|
||||
void populateThunk(InputSection *thunk, Symbol *funcSym) override;
|
||||
void applyOptimizationHints(uint8_t *, const ObjFile &) const override;
|
||||
|
||||
void initICFSafeThunkBody(InputSection *thunk,
|
||||
InputSection *branchTarget) const override;
|
||||
uint32_t getICFSafeThunkSize() const override;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
@ -175,6 +179,25 @@ void ARM64::populateThunk(InputSection *thunk, Symbol *funcSym) {
|
||||
/*offset=*/0, /*addend=*/0,
|
||||
/*referent=*/funcSym);
|
||||
}
|
||||
// Just a single direct branch to the target function.
|
||||
static constexpr uint32_t icfSafeThunkCode[] = {
|
||||
0x14000000, // 08: b target
|
||||
};
|
||||
|
||||
void ARM64::initICFSafeThunkBody(InputSection *thunk,
|
||||
InputSection *branchTarget) const {
|
||||
// The base data here will not be itself modified, we'll just be adding a
|
||||
// reloc below. So we can directly use the constexpr above as the data.
|
||||
thunk->data = {reinterpret_cast<const uint8_t *>(icfSafeThunkCode),
|
||||
sizeof(icfSafeThunkCode)};
|
||||
|
||||
thunk->relocs.emplace_back(/*type=*/ARM64_RELOC_BRANCH26,
|
||||
/*pcrel=*/true, /*length=*/2,
|
||||
/*offset=*/0, /*addend=*/0,
|
||||
/*referent=*/branchTarget);
|
||||
}
|
||||
|
||||
uint32_t ARM64::getICFSafeThunkSize() const { return sizeof(icfSafeThunkCode); }
|
||||
|
||||
ARM64::ARM64() : ARM64Common(LP64()) {
|
||||
cpuType = CPU_TYPE_ARM64;
|
||||
|
@ -68,6 +68,7 @@ enum class ICFLevel {
|
||||
unknown,
|
||||
none,
|
||||
safe,
|
||||
safe_thunks,
|
||||
all,
|
||||
};
|
||||
|
||||
|
@ -847,8 +847,14 @@ static ICFLevel getICFLevel(const ArgList &args) {
|
||||
auto icfLevel = StringSwitch<ICFLevel>(icfLevelStr)
|
||||
.Cases("none", "", ICFLevel::none)
|
||||
.Case("safe", ICFLevel::safe)
|
||||
.Case("safe_thunks", ICFLevel::safe_thunks)
|
||||
.Case("all", ICFLevel::all)
|
||||
.Default(ICFLevel::unknown);
|
||||
|
||||
if ((icfLevel == ICFLevel::safe_thunks) && (config->arch() != AK_arm64)) {
|
||||
error("--icf=safe_thunks is only supported on arm64 targets");
|
||||
}
|
||||
|
||||
if (icfLevel == ICFLevel::unknown) {
|
||||
warn(Twine("unknown --icf=OPTION `") + icfLevelStr +
|
||||
"', defaulting to `none'");
|
||||
@ -2116,7 +2122,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
|
||||
// foldIdenticalLiterals before foldIdenticalSections.
|
||||
foldIdenticalLiterals();
|
||||
if (config->icfLevel != ICFLevel::none) {
|
||||
if (config->icfLevel == ICFLevel::safe)
|
||||
if (config->icfLevel == ICFLevel::safe ||
|
||||
config->icfLevel == ICFLevel::safe_thunks)
|
||||
markAddrSigSymbols();
|
||||
foldIdenticalSections(/*onlyCfStrings=*/false);
|
||||
} else if (config->dedupStrings) {
|
||||
|
@ -45,6 +45,7 @@ public:
|
||||
const ConcatInputSection *ib);
|
||||
bool equalsVariable(const ConcatInputSection *ia,
|
||||
const ConcatInputSection *ib);
|
||||
void applySafeThunksToRange(size_t begin, size_t end);
|
||||
|
||||
// ICF needs a copy of the inputs vector because its equivalence-class
|
||||
// segregation algorithm destroys the proper sequence.
|
||||
@ -251,6 +252,50 @@ void ICF::forEachClassRange(size_t begin, size_t end,
|
||||
}
|
||||
}
|
||||
|
||||
// Given a range of identical icfInputs, replace address significant functions
|
||||
// with a thunk that is just a direct branch to the first function in the
|
||||
// series. This way we keep only one main body of the function but we still
|
||||
// retain the address uniqueness of relevant functions by having them be a
|
||||
// direct branch thunk rather than containing a full copy of the actual function
|
||||
// body.
|
||||
void ICF::applySafeThunksToRange(size_t begin, size_t end) {
|
||||
// If the functions we're dealing with are smaller than the thunk size, then
|
||||
// just leave them all as-is - creating thunks would be a net loss.
|
||||
uint32_t thunkSize = target->getICFSafeThunkSize();
|
||||
if (icfInputs[begin]->data.size() <= thunkSize)
|
||||
return;
|
||||
|
||||
// When creating a unique ICF thunk, use the first section as the section that
|
||||
// all thunks will branch to.
|
||||
ConcatInputSection *masterIsec = icfInputs[begin];
|
||||
|
||||
for (size_t i = begin + 1; i < end; ++i) {
|
||||
ConcatInputSection *isec = icfInputs[i];
|
||||
// When we're done processing keepUnique entries, we can stop. Sorting
|
||||
// guaratees that all keepUnique will be at the front.
|
||||
if (!isec->keepUnique)
|
||||
break;
|
||||
|
||||
ConcatInputSection *thunk =
|
||||
makeSyntheticInputSection(isec->getSegName(), isec->getName());
|
||||
addInputSection(thunk);
|
||||
|
||||
target->initICFSafeThunkBody(thunk, masterIsec);
|
||||
thunk->foldIdentical(isec, Symbol::ICFFoldKind::Thunk);
|
||||
|
||||
// Since we're folding the target function into a thunk, we need to adjust
|
||||
// the symbols that now got relocated from the target function to the thunk.
|
||||
// Since the thunk is only one branch, we move all symbols to offset 0 and
|
||||
// make sure that the size of all non-zero-size symbols is equal to the size
|
||||
// of the branch.
|
||||
for (auto *sym : thunk->symbols) {
|
||||
sym->value = 0;
|
||||
if (sym->size != 0)
|
||||
sym->size = thunkSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Split icfInputs into shards, then parallelize invocation of FUNC on subranges
|
||||
// with matching equivalence class
|
||||
void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) {
|
||||
@ -312,6 +357,12 @@ void ICF::run() {
|
||||
|
||||
llvm::stable_sort(
|
||||
icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) {
|
||||
// When using safe_thunks, ensure that we first sort by icfEqClass and
|
||||
// then by keepUnique (descending). This guarantees that within an
|
||||
// equivalence class, the keepUnique inputs are always first.
|
||||
if (config->icfLevel == ICFLevel::safe_thunks)
|
||||
if (a->icfEqClass[0] == b->icfEqClass[0])
|
||||
return a->keepUnique > b->keepUnique;
|
||||
return a->icfEqClass[0] < b->icfEqClass[0];
|
||||
});
|
||||
forEachClass([&](size_t begin, size_t end) {
|
||||
@ -331,13 +382,37 @@ void ICF::run() {
|
||||
log("equalsVariable() called " + Twine(equalsVariableCount) + " times");
|
||||
}
|
||||
|
||||
// When using safe_thunks, we need to create thunks for all keepUnique
|
||||
// functions that can be deduplicated. Since we're creating / adding new
|
||||
// InputSections, we can't paralellize this.
|
||||
if (config->icfLevel == ICFLevel::safe_thunks)
|
||||
forEachClassRange(0, icfInputs.size(), [&](size_t begin, size_t end) {
|
||||
applySafeThunksToRange(begin, end);
|
||||
});
|
||||
|
||||
// Fold sections within equivalence classes
|
||||
forEachClass([&](size_t begin, size_t end) {
|
||||
if (end - begin < 2)
|
||||
return;
|
||||
bool useSafeThunks = config->icfLevel == ICFLevel::safe_thunks;
|
||||
|
||||
// For ICF level safe_thunks, replace keepUnique function bodies with
|
||||
// thunks. For all other ICF levles, directly merge the functions.
|
||||
|
||||
ConcatInputSection *beginIsec = icfInputs[begin];
|
||||
for (size_t i = begin + 1; i < end; ++i)
|
||||
for (size_t i = begin + 1; i < end; ++i) {
|
||||
// Skip keepUnique inputs when using safe_thunks (already handeled above)
|
||||
if (useSafeThunks && icfInputs[i]->keepUnique) {
|
||||
// Assert keepUnique sections are either small or replaced with thunks.
|
||||
assert(!icfInputs[i]->live ||
|
||||
icfInputs[i]->data.size() <= target->getICFSafeThunkSize());
|
||||
assert(!icfInputs[i]->replacement ||
|
||||
icfInputs[i]->replacement->data.size() ==
|
||||
target->getICFSafeThunkSize());
|
||||
continue;
|
||||
}
|
||||
beginIsec->foldIdentical(icfInputs[i]);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@ -421,11 +496,22 @@ void macho::foldIdenticalSections(bool onlyCfStrings) {
|
||||
// can still fold it.
|
||||
bool hasFoldableFlags = (isSelRefsSection(isec) ||
|
||||
sectionType(isec->getFlags()) == MachO::S_REGULAR);
|
||||
|
||||
bool isCodeSec = isCodeSection(isec);
|
||||
|
||||
// When keepUnique is true, the section is not foldable. Unless we are at
|
||||
// icf level safe_thunks, in which case we still want to fold code sections.
|
||||
// When using safe_thunks we'll apply the safe_thunks logic at merge time
|
||||
// based on the 'keepUnique' flag.
|
||||
bool noUniqueRequirement =
|
||||
!isec->keepUnique ||
|
||||
((config->icfLevel == ICFLevel::safe_thunks) && isCodeSec);
|
||||
|
||||
// FIXME: consider non-code __text sections as foldable?
|
||||
bool isFoldable = (!onlyCfStrings || isCfStringSection(isec)) &&
|
||||
(isCodeSection(isec) || isFoldableWithAddendsRemoved ||
|
||||
(isCodeSec || isFoldableWithAddendsRemoved ||
|
||||
isGccExceptTabSection(isec)) &&
|
||||
!isec->keepUnique && !isec->hasAltEntry &&
|
||||
noUniqueRequirement && !isec->hasAltEntry &&
|
||||
!isec->shouldOmitFromOutput() && hasFoldableFlags;
|
||||
if (isFoldable) {
|
||||
foldable.push_back(isec);
|
||||
|
@ -190,13 +190,14 @@ const Reloc *InputSection::getRelocAt(uint32_t off) const {
|
||||
return &*it;
|
||||
}
|
||||
|
||||
void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
|
||||
void ConcatInputSection::foldIdentical(ConcatInputSection *copy,
|
||||
Symbol::ICFFoldKind foldKind) {
|
||||
align = std::max(align, copy->align);
|
||||
copy->live = false;
|
||||
copy->wasCoalesced = true;
|
||||
copy->replacement = this;
|
||||
for (auto ©Sym : copy->symbols)
|
||||
copySym->wasIdenticalCodeFolded = true;
|
||||
copySym->identicalCodeFoldingKind = foldKind;
|
||||
|
||||
symbols.insert(symbols.end(), copy->symbols.begin(), copy->symbols.end());
|
||||
copy->symbols.clear();
|
||||
|
@ -117,7 +117,8 @@ public:
|
||||
bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
|
||||
void writeTo(uint8_t *buf);
|
||||
|
||||
void foldIdentical(ConcatInputSection *redundant);
|
||||
void foldIdentical(ConcatInputSection *redundant,
|
||||
Symbol::ICFFoldKind foldKind = Symbol::ICFFoldKind::Body);
|
||||
ConcatInputSection *canonical() override {
|
||||
return replacement ? replacement : this;
|
||||
}
|
||||
|
@ -156,7 +156,7 @@ static void printNonLazyPointerSection(raw_fd_ostream &os,
|
||||
}
|
||||
|
||||
static uint64_t getSymSizeForMap(Defined *sym) {
|
||||
if (sym->wasIdenticalCodeFolded)
|
||||
if (sym->identicalCodeFoldingKind == Symbol::ICFFoldKind::Body)
|
||||
return 0;
|
||||
return sym->size;
|
||||
}
|
||||
|
@ -60,7 +60,7 @@ Defined::Defined(StringRef name, InputFile *file, InputSection *isec,
|
||||
bool interposable)
|
||||
: Symbol(DefinedKind, name, file), overridesWeakDef(canOverrideWeakDef),
|
||||
privateExtern(isPrivateExtern), includeInSymtab(includeInSymtab),
|
||||
wasIdenticalCodeFolded(false),
|
||||
identicalCodeFoldingKind(ICFFoldKind::None),
|
||||
referencedDynamically(isReferencedDynamically), noDeadStrip(noDeadStrip),
|
||||
interposable(interposable), weakDefCanBeHidden(isWeakDefCanBeHidden),
|
||||
weakDef(isWeakDef), external(isExternal), originalIsec(isec),
|
||||
|
@ -33,6 +33,15 @@ public:
|
||||
AliasKind,
|
||||
};
|
||||
|
||||
// Enum that describes the type of Identical Code Folding (ICF) applied to a
|
||||
// symbol. This information is crucial for accurately representing symbol
|
||||
// sizes in the map file.
|
||||
enum ICFFoldKind {
|
||||
None, // No folding is applied.
|
||||
Body, // The entire body (function or data) is folded.
|
||||
Thunk // The function body is folded into a single branch thunk.
|
||||
};
|
||||
|
||||
virtual ~Symbol() {}
|
||||
|
||||
Kind kind() const { return symbolKind; }
|
||||
@ -142,8 +151,8 @@ public:
|
||||
bool privateExtern : 1;
|
||||
// Whether this symbol should appear in the output symbol table.
|
||||
bool includeInSymtab : 1;
|
||||
// Whether this symbol was folded into a different symbol during ICF.
|
||||
bool wasIdenticalCodeFolded : 1;
|
||||
// The ICF folding kind of this symbol: None / Body / Thunk.
|
||||
ICFFoldKind identicalCodeFoldingKind : 2;
|
||||
// Symbols marked referencedDynamically won't be removed from the output's
|
||||
// symbol table by tools like strip. In theory, this could be set on arbitrary
|
||||
// symbols in input object files. In practice, it's used solely for the
|
||||
|
@ -1231,7 +1231,8 @@ void SymtabSection::emitStabs() {
|
||||
|
||||
// Constant-folded symbols go in the executable's symbol table, but don't
|
||||
// get a stabs entry unless --keep-icf-stabs flag is specified
|
||||
if (!config->keepICFStabs && defined->wasIdenticalCodeFolded)
|
||||
if (!config->keepICFStabs &&
|
||||
defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Body)
|
||||
continue;
|
||||
|
||||
ObjFile *file = defined->getObjectFile();
|
||||
|
@ -74,6 +74,16 @@ public:
|
||||
uint64_t selrefVA,
|
||||
Symbol *objcMsgSend) const = 0;
|
||||
|
||||
// Init 'thunk' so that it be a direct jump to 'branchTarget'.
|
||||
virtual void initICFSafeThunkBody(InputSection *thunk,
|
||||
InputSection *branchTarget) const {
|
||||
llvm_unreachable("target does not support ICF safe thunks");
|
||||
}
|
||||
|
||||
virtual uint32_t getICFSafeThunkSize() const {
|
||||
llvm_unreachable("target does not support ICF safe thunks");
|
||||
}
|
||||
|
||||
// Symbols may be referenced via either the GOT or the stubs section,
|
||||
// depending on the relocation type. prepareSymbolRelocation() will set up the
|
||||
// GOT/stubs entries, and resolveSymbolVA() will return the addresses of those
|
||||
|
254
lld/test/MachO/icf-safe-thunks.ll
Normal file
254
lld/test/MachO/icf-safe-thunks.ll
Normal file
@ -0,0 +1,254 @@
|
||||
; REQUIRES: aarch64
|
||||
|
||||
; RUN: rm -rf %t; mkdir %t
|
||||
; RUN: llc -filetype=obj %s -O3 -o %t/icf-obj-safe-thunks.o -enable-machine-outliner=never -mtriple arm64-apple-macos -addrsig
|
||||
; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks -dylib -o %t/icf-safe.dylib -map %t/icf-safe.map %t/icf-obj-safe-thunks.o
|
||||
; RUN: llvm-objdump %t/icf-safe.dylib -d --macho | FileCheck %s --check-prefixes=CHECK-ARM64
|
||||
; RUN: cat %t/icf-safe.map | FileCheck %s --check-prefixes=CHECK-ARM64-MAP
|
||||
|
||||
; CHECK-ARM64: (__TEXT,__text) section
|
||||
; CHECK-ARM64-NEXT: _func_unique_1:
|
||||
; CHECK-ARM64-NEXT: mov {{.*}}, #0x1
|
||||
;
|
||||
; CHECK-ARM64: _func_unique_2_canmerge:
|
||||
; CHECK-ARM64-NEXT: _func_2identical_v1:
|
||||
; CHECK-ARM64-NEXT: mov {{.*}}, #0x2
|
||||
;
|
||||
; CHECK-ARM64: _func_3identical_v1:
|
||||
; CHECK-ARM64-NEXT: mov {{.*}}, #0x3
|
||||
;
|
||||
; CHECK-ARM64: _func_3identical_v1_canmerge:
|
||||
; CHECK-ARM64-NEXT: _func_3identical_v2_canmerge:
|
||||
; CHECK-ARM64-NEXT: _func_3identical_v3_canmerge:
|
||||
; CHECK-ARM64-NEXT: mov {{.*}}, #0x21
|
||||
;
|
||||
; CHECK-ARM64: _call_all_funcs:
|
||||
; CHECK-ARM64-NEXT: stp x29
|
||||
;
|
||||
; CHECK-ARM64: _take_func_addr:
|
||||
; CHECK-ARM64-NEXT: adr
|
||||
;
|
||||
; CHECK-ARM64: _func_2identical_v2:
|
||||
; CHECK-ARM64-NEXT: b _func_2identical_v1
|
||||
; CHECK-ARM64-NEXT: _func_3identical_v2:
|
||||
; CHECK-ARM64-NEXT: b _func_3identical_v1
|
||||
; CHECK-ARM64-NEXT: _func_3identical_v3:
|
||||
; CHECK-ARM64-NEXT: b _func_3identical_v1
|
||||
|
||||
|
||||
; CHECK-ARM64-MAP: 0x00000010 [ 2] _func_unique_1
|
||||
; CHECK-ARM64-MAP-NEXT: 0x00000010 [ 2] _func_2identical_v1
|
||||
; CHECK-ARM64-MAP-NEXT: 0x00000000 [ 2] _func_unique_2_canmerge
|
||||
; CHECK-ARM64-MAP-NEXT: 0x00000010 [ 2] _func_3identical_v1
|
||||
; CHECK-ARM64-MAP-NEXT: 0x00000010 [ 2] _func_3identical_v1_canmerge
|
||||
; CHECK-ARM64-MAP-NEXT: 0x00000000 [ 2] _func_3identical_v2_canmerge
|
||||
; CHECK-ARM64-MAP-NEXT: 0x00000000 [ 2] _func_3identical_v3_canmerge
|
||||
; CHECK-ARM64-MAP-NEXT: 0x00000034 [ 2] _call_all_funcs
|
||||
; CHECK-ARM64-MAP-NEXT: 0x00000050 [ 2] _take_func_addr
|
||||
; CHECK-ARM64-MAP-NEXT: 0x00000004 [ 2] _func_2identical_v2
|
||||
; CHECK-ARM64-MAP-NEXT: 0x00000004 [ 2] _func_3identical_v2
|
||||
; CHECK-ARM64-MAP-NEXT: 0x00000004 [ 2] _func_3identical_v3
|
||||
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
|
||||
target triple = "arm64-apple-macosx11.0.0"
|
||||
|
||||
@g_val = global i8 0, align 1
|
||||
@g_ptr = global ptr null, align 8
|
||||
|
||||
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
|
||||
define void @func_unique_1() #0 {
|
||||
entry:
|
||||
store volatile i8 1, ptr @g_val, align 1, !tbaa !5
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
|
||||
define void @func_unique_2_canmerge() local_unnamed_addr #0 {
|
||||
entry:
|
||||
store volatile i8 2, ptr @g_val, align 1, !tbaa !5
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
|
||||
define void @func_2identical_v1() #0 {
|
||||
entry:
|
||||
store volatile i8 2, ptr @g_val, align 1, !tbaa !5
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
|
||||
define void @func_2identical_v2() #0 {
|
||||
entry:
|
||||
store volatile i8 2, ptr @g_val, align 1, !tbaa !5
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
|
||||
define void @func_3identical_v1() #0 {
|
||||
entry:
|
||||
store volatile i8 3, ptr @g_val, align 1, !tbaa !5
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
|
||||
define void @func_3identical_v2() #0 {
|
||||
entry:
|
||||
store volatile i8 3, ptr @g_val, align 1, !tbaa !5
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
|
||||
define void @func_3identical_v3() #0 {
|
||||
entry:
|
||||
store volatile i8 3, ptr @g_val, align 1, !tbaa !5
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
|
||||
define void @func_3identical_v1_canmerge() local_unnamed_addr #0 {
|
||||
entry:
|
||||
store volatile i8 33, ptr @g_val, align 1, !tbaa !5
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
|
||||
define void @func_3identical_v2_canmerge() local_unnamed_addr #0 {
|
||||
entry:
|
||||
store volatile i8 33, ptr @g_val, align 1, !tbaa !5
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
|
||||
define void @func_3identical_v3_canmerge() local_unnamed_addr #0 {
|
||||
entry:
|
||||
store volatile i8 33, ptr @g_val, align 1, !tbaa !5
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp uwtable(sync)
|
||||
define void @call_all_funcs() local_unnamed_addr #1 {
|
||||
entry:
|
||||
tail call void @func_unique_1()
|
||||
tail call void @func_unique_2_canmerge()
|
||||
tail call void @func_2identical_v1()
|
||||
tail call void @func_2identical_v2()
|
||||
tail call void @func_3identical_v1()
|
||||
tail call void @func_3identical_v2()
|
||||
tail call void @func_3identical_v3()
|
||||
tail call void @func_3identical_v1_canmerge()
|
||||
tail call void @func_3identical_v2_canmerge()
|
||||
tail call void @func_3identical_v3_canmerge()
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
|
||||
define void @take_func_addr() local_unnamed_addr #0 {
|
||||
entry:
|
||||
store volatile ptr @func_unique_1, ptr @g_ptr, align 8, !tbaa !8
|
||||
store volatile ptr @func_2identical_v1, ptr @g_ptr, align 8, !tbaa !8
|
||||
store volatile ptr @func_2identical_v2, ptr @g_ptr, align 8, !tbaa !8
|
||||
store volatile ptr @func_3identical_v1, ptr @g_ptr, align 8, !tbaa !8
|
||||
store volatile ptr @func_3identical_v2, ptr @g_ptr, align 8, !tbaa !8
|
||||
store volatile ptr @func_3identical_v3, ptr @g_ptr, align 8, !tbaa !8
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
|
||||
attributes #1 = { mustprogress nofree noinline norecurse nounwind ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
|
||||
|
||||
!llvm.module.flags = !{!0, !1, !2, !3}
|
||||
!llvm.ident = !{!4}
|
||||
|
||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
!1 = !{i32 8, !"PIC Level", i32 2}
|
||||
!2 = !{i32 7, !"uwtable", i32 1}
|
||||
!3 = !{i32 7, !"frame-pointer", i32 1}
|
||||
!4 = !{!"clang"}
|
||||
!5 = !{!6, !6, i64 0}
|
||||
!6 = !{!"omnipotent char", !7, i64 0}
|
||||
!7 = !{!"Simple C++ TBAA"}
|
||||
!8 = !{!9, !9, i64 0}
|
||||
!9 = !{!"any pointer", !6, i64 0}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;; Generate the above LLVM IR with the below script ;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; #!/bin/bash
|
||||
; set -ex
|
||||
; TOOLCHAIN_BIN="llvm-project/build/Debug/bin"
|
||||
;
|
||||
; # Create icf-safe-thunks.cpp file
|
||||
; cat > icf-safe-thunks.cpp <<EOF
|
||||
;
|
||||
; #define ATTR __attribute__((noinline)) extern "C"
|
||||
; typedef unsigned long long ULL;
|
||||
;
|
||||
; volatile char g_val = 0;
|
||||
; void *volatile g_ptr = 0;
|
||||
;
|
||||
; ATTR void func_unique_1() {
|
||||
; g_val = 1;
|
||||
; }
|
||||
;
|
||||
; ATTR void func_unique_2_canmerge() {
|
||||
; g_val = 2;
|
||||
; }
|
||||
;
|
||||
; ATTR void func_2identical_v1() {
|
||||
; g_val = 2;
|
||||
; }
|
||||
;
|
||||
; ATTR void func_2identical_v2() {
|
||||
; g_val = 2;
|
||||
; }
|
||||
;
|
||||
; ATTR void func_3identical_v1() {
|
||||
; g_val = 3;
|
||||
; }
|
||||
;
|
||||
; ATTR void func_3identical_v2() {
|
||||
; g_val = 3;
|
||||
; }
|
||||
;
|
||||
; ATTR void func_3identical_v3() {
|
||||
; g_val = 3;
|
||||
; }
|
||||
;
|
||||
; ATTR void func_3identical_v1_canmerge() {
|
||||
; g_val = 33;
|
||||
; }
|
||||
;
|
||||
; ATTR void func_3identical_v2_canmerge() {
|
||||
; g_val = 33;
|
||||
; }
|
||||
;
|
||||
; ATTR void func_3identical_v3_canmerge() {
|
||||
; g_val = 33;
|
||||
; }
|
||||
;
|
||||
; ATTR void call_all_funcs() {
|
||||
; func_unique_1();
|
||||
; func_unique_2_canmerge();
|
||||
; func_2identical_v1();
|
||||
; func_2identical_v2();
|
||||
; func_3identical_v1();
|
||||
; func_3identical_v2();
|
||||
; func_3identical_v3();
|
||||
; func_3identical_v1_canmerge();
|
||||
; func_3identical_v2_canmerge();
|
||||
; func_3identical_v3_canmerge();
|
||||
; }
|
||||
;
|
||||
; ATTR void take_func_addr() {
|
||||
; g_ptr = (void*)func_unique_1;
|
||||
; g_ptr = (void*)func_2identical_v1;
|
||||
; g_ptr = (void*)func_2identical_v2;
|
||||
; g_ptr = (void*)func_3identical_v1;
|
||||
; g_ptr = (void*)func_3identical_v2;
|
||||
; g_ptr = (void*)func_3identical_v3;
|
||||
; }
|
||||
; EOF
|
||||
;
|
||||
; $TOOLCHAIN_BIN/clang -target arm64-apple-macos11.0 -S -emit-llvm \
|
||||
; icf-safe-thunks.cpp -O3 -o icf-safe-thunks.ll
|
Loading…
x
Reference in New Issue
Block a user