[lld-macho][arm64] Enhance safe ICF with thunk-based deduplication (#106573)

Currently, our `safe` ICF mode only merges non-address-significant code,
leaving duplicate address-significant functions in the output. This
patch introduces `safe_thunks` ICF mode, which keeps a single master
copy of each function and replaces address-significant duplicates with
thunks that branch to the master copy.
Currently `--icf=safe_thunks` is only supported for `arm64`
architectures.

**Perf stats for a large binary:**
| ICF Option | Total Size | __text Size | __unwind_info | % total |

|-------------------|------------|-------------|---------------------|---------------------------|
| `--icf=none` | 91.738 MB | 55.220 MB | 1.424 MB | 0% |
| `--icf=safe` | 85.042 MB | 49.572 MB | 1.168 MB | 7.30% |
| `--icf=safe_thunks` | 84.650 MB | 49.219 MB | 1.143 MB | 7.72% |
| `--icf=all` | 82.060 MB | 48.726 MB | 1.111 MB | 10.55% |

So overall we can expect a `~0.45%` binary size reduction for a typical
large binary compared to the `--icf=safe` option.

**Runtime:**
Linking the above binary took ~10 seconds. Comparing the link
performance of --icf=safe_thunks vs --icf=safe, a ~2% slowdown was
observed.
This commit is contained in:
alx32 2024-09-05 16:36:21 -07:00 committed by GitHub
parent 1be9a80768
commit d1756165a9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 405 additions and 12 deletions

View File

@ -41,6 +41,10 @@ struct ARM64 : ARM64Common {
Symbol *objcMsgSend) const override;
void populateThunk(InputSection *thunk, Symbol *funcSym) override;
void applyOptimizationHints(uint8_t *, const ObjFile &) const override;
void initICFSafeThunkBody(InputSection *thunk,
InputSection *branchTarget) const override;
uint32_t getICFSafeThunkSize() const override;
};
} // namespace
@ -175,6 +179,25 @@ void ARM64::populateThunk(InputSection *thunk, Symbol *funcSym) {
/*offset=*/0, /*addend=*/0,
/*referent=*/funcSym);
}
// Just a single direct branch to the target function.
static constexpr uint32_t icfSafeThunkCode[] = {
0x14000000, // 08: b target
};
void ARM64::initICFSafeThunkBody(InputSection *thunk,
InputSection *branchTarget) const {
// The base data here will not be itself modified, we'll just be adding a
// reloc below. So we can directly use the constexpr above as the data.
thunk->data = {reinterpret_cast<const uint8_t *>(icfSafeThunkCode),
sizeof(icfSafeThunkCode)};
thunk->relocs.emplace_back(/*type=*/ARM64_RELOC_BRANCH26,
/*pcrel=*/true, /*length=*/2,
/*offset=*/0, /*addend=*/0,
/*referent=*/branchTarget);
}
uint32_t ARM64::getICFSafeThunkSize() const { return sizeof(icfSafeThunkCode); }
ARM64::ARM64() : ARM64Common(LP64()) {
cpuType = CPU_TYPE_ARM64;

View File

@ -68,6 +68,7 @@ enum class ICFLevel {
unknown,
none,
safe,
safe_thunks,
all,
};

View File

@ -847,8 +847,14 @@ static ICFLevel getICFLevel(const ArgList &args) {
auto icfLevel = StringSwitch<ICFLevel>(icfLevelStr)
.Cases("none", "", ICFLevel::none)
.Case("safe", ICFLevel::safe)
.Case("safe_thunks", ICFLevel::safe_thunks)
.Case("all", ICFLevel::all)
.Default(ICFLevel::unknown);
if ((icfLevel == ICFLevel::safe_thunks) && (config->arch() != AK_arm64)) {
error("--icf=safe_thunks is only supported on arm64 targets");
}
if (icfLevel == ICFLevel::unknown) {
warn(Twine("unknown --icf=OPTION `") + icfLevelStr +
"', defaulting to `none'");
@ -2116,7 +2122,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
// foldIdenticalLiterals before foldIdenticalSections.
foldIdenticalLiterals();
if (config->icfLevel != ICFLevel::none) {
if (config->icfLevel == ICFLevel::safe)
if (config->icfLevel == ICFLevel::safe ||
config->icfLevel == ICFLevel::safe_thunks)
markAddrSigSymbols();
foldIdenticalSections(/*onlyCfStrings=*/false);
} else if (config->dedupStrings) {

View File

@ -45,6 +45,7 @@ public:
const ConcatInputSection *ib);
bool equalsVariable(const ConcatInputSection *ia,
const ConcatInputSection *ib);
void applySafeThunksToRange(size_t begin, size_t end);
// ICF needs a copy of the inputs vector because its equivalence-class
// segregation algorithm destroys the proper sequence.
@ -251,6 +252,50 @@ void ICF::forEachClassRange(size_t begin, size_t end,
}
}
// Given a range of identical icfInputs, replace address significant functions
// with a thunk that is just a direct branch to the first function in the
// series. This way we keep only one main body of the function but we still
// retain the address uniqueness of relevant functions by having them be a
// direct branch thunk rather than containing a full copy of the actual function
// body.
void ICF::applySafeThunksToRange(size_t begin, size_t end) {
// If the functions we're dealing with are smaller than the thunk size, then
// just leave them all as-is - creating thunks would be a net loss.
uint32_t thunkSize = target->getICFSafeThunkSize();
if (icfInputs[begin]->data.size() <= thunkSize)
return;
// When creating a unique ICF thunk, use the first section as the section that
// all thunks will branch to.
ConcatInputSection *masterIsec = icfInputs[begin];
for (size_t i = begin + 1; i < end; ++i) {
ConcatInputSection *isec = icfInputs[i];
// When we're done processing keepUnique entries, we can stop. Sorting
// guaratees that all keepUnique will be at the front.
if (!isec->keepUnique)
break;
ConcatInputSection *thunk =
makeSyntheticInputSection(isec->getSegName(), isec->getName());
addInputSection(thunk);
target->initICFSafeThunkBody(thunk, masterIsec);
thunk->foldIdentical(isec, Symbol::ICFFoldKind::Thunk);
// Since we're folding the target function into a thunk, we need to adjust
// the symbols that now got relocated from the target function to the thunk.
// Since the thunk is only one branch, we move all symbols to offset 0 and
// make sure that the size of all non-zero-size symbols is equal to the size
// of the branch.
for (auto *sym : thunk->symbols) {
sym->value = 0;
if (sym->size != 0)
sym->size = thunkSize;
}
}
}
// Split icfInputs into shards, then parallelize invocation of FUNC on subranges
// with matching equivalence class
void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) {
@ -312,6 +357,12 @@ void ICF::run() {
llvm::stable_sort(
icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) {
// When using safe_thunks, ensure that we first sort by icfEqClass and
// then by keepUnique (descending). This guarantees that within an
// equivalence class, the keepUnique inputs are always first.
if (config->icfLevel == ICFLevel::safe_thunks)
if (a->icfEqClass[0] == b->icfEqClass[0])
return a->keepUnique > b->keepUnique;
return a->icfEqClass[0] < b->icfEqClass[0];
});
forEachClass([&](size_t begin, size_t end) {
@ -331,13 +382,37 @@ void ICF::run() {
log("equalsVariable() called " + Twine(equalsVariableCount) + " times");
}
// When using safe_thunks, we need to create thunks for all keepUnique
// functions that can be deduplicated. Since we're creating / adding new
// InputSections, we can't paralellize this.
if (config->icfLevel == ICFLevel::safe_thunks)
forEachClassRange(0, icfInputs.size(), [&](size_t begin, size_t end) {
applySafeThunksToRange(begin, end);
});
// Fold sections within equivalence classes
forEachClass([&](size_t begin, size_t end) {
if (end - begin < 2)
return;
bool useSafeThunks = config->icfLevel == ICFLevel::safe_thunks;
// For ICF level safe_thunks, replace keepUnique function bodies with
// thunks. For all other ICF levles, directly merge the functions.
ConcatInputSection *beginIsec = icfInputs[begin];
for (size_t i = begin + 1; i < end; ++i)
for (size_t i = begin + 1; i < end; ++i) {
// Skip keepUnique inputs when using safe_thunks (already handeled above)
if (useSafeThunks && icfInputs[i]->keepUnique) {
// Assert keepUnique sections are either small or replaced with thunks.
assert(!icfInputs[i]->live ||
icfInputs[i]->data.size() <= target->getICFSafeThunkSize());
assert(!icfInputs[i]->replacement ||
icfInputs[i]->replacement->data.size() ==
target->getICFSafeThunkSize());
continue;
}
beginIsec->foldIdentical(icfInputs[i]);
}
});
}
@ -421,11 +496,22 @@ void macho::foldIdenticalSections(bool onlyCfStrings) {
// can still fold it.
bool hasFoldableFlags = (isSelRefsSection(isec) ||
sectionType(isec->getFlags()) == MachO::S_REGULAR);
bool isCodeSec = isCodeSection(isec);
// When keepUnique is true, the section is not foldable. Unless we are at
// icf level safe_thunks, in which case we still want to fold code sections.
// When using safe_thunks we'll apply the safe_thunks logic at merge time
// based on the 'keepUnique' flag.
bool noUniqueRequirement =
!isec->keepUnique ||
((config->icfLevel == ICFLevel::safe_thunks) && isCodeSec);
// FIXME: consider non-code __text sections as foldable?
bool isFoldable = (!onlyCfStrings || isCfStringSection(isec)) &&
(isCodeSection(isec) || isFoldableWithAddendsRemoved ||
(isCodeSec || isFoldableWithAddendsRemoved ||
isGccExceptTabSection(isec)) &&
!isec->keepUnique && !isec->hasAltEntry &&
noUniqueRequirement && !isec->hasAltEntry &&
!isec->shouldOmitFromOutput() && hasFoldableFlags;
if (isFoldable) {
foldable.push_back(isec);

View File

@ -190,13 +190,14 @@ const Reloc *InputSection::getRelocAt(uint32_t off) const {
return &*it;
}
void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
void ConcatInputSection::foldIdentical(ConcatInputSection *copy,
Symbol::ICFFoldKind foldKind) {
align = std::max(align, copy->align);
copy->live = false;
copy->wasCoalesced = true;
copy->replacement = this;
for (auto &copySym : copy->symbols)
copySym->wasIdenticalCodeFolded = true;
copySym->identicalCodeFoldingKind = foldKind;
symbols.insert(symbols.end(), copy->symbols.begin(), copy->symbols.end());
copy->symbols.clear();

View File

@ -117,7 +117,8 @@ public:
bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
void writeTo(uint8_t *buf);
void foldIdentical(ConcatInputSection *redundant);
void foldIdentical(ConcatInputSection *redundant,
Symbol::ICFFoldKind foldKind = Symbol::ICFFoldKind::Body);
ConcatInputSection *canonical() override {
return replacement ? replacement : this;
}

View File

@ -156,7 +156,7 @@ static void printNonLazyPointerSection(raw_fd_ostream &os,
}
static uint64_t getSymSizeForMap(Defined *sym) {
if (sym->wasIdenticalCodeFolded)
if (sym->identicalCodeFoldingKind == Symbol::ICFFoldKind::Body)
return 0;
return sym->size;
}

View File

@ -60,7 +60,7 @@ Defined::Defined(StringRef name, InputFile *file, InputSection *isec,
bool interposable)
: Symbol(DefinedKind, name, file), overridesWeakDef(canOverrideWeakDef),
privateExtern(isPrivateExtern), includeInSymtab(includeInSymtab),
wasIdenticalCodeFolded(false),
identicalCodeFoldingKind(ICFFoldKind::None),
referencedDynamically(isReferencedDynamically), noDeadStrip(noDeadStrip),
interposable(interposable), weakDefCanBeHidden(isWeakDefCanBeHidden),
weakDef(isWeakDef), external(isExternal), originalIsec(isec),

View File

@ -33,6 +33,15 @@ public:
AliasKind,
};
// Enum that describes the type of Identical Code Folding (ICF) applied to a
// symbol. This information is crucial for accurately representing symbol
// sizes in the map file.
enum ICFFoldKind {
None, // No folding is applied.
Body, // The entire body (function or data) is folded.
Thunk // The function body is folded into a single branch thunk.
};
virtual ~Symbol() {}
Kind kind() const { return symbolKind; }
@ -142,8 +151,8 @@ public:
bool privateExtern : 1;
// Whether this symbol should appear in the output symbol table.
bool includeInSymtab : 1;
// Whether this symbol was folded into a different symbol during ICF.
bool wasIdenticalCodeFolded : 1;
// The ICF folding kind of this symbol: None / Body / Thunk.
ICFFoldKind identicalCodeFoldingKind : 2;
// Symbols marked referencedDynamically won't be removed from the output's
// symbol table by tools like strip. In theory, this could be set on arbitrary
// symbols in input object files. In practice, it's used solely for the

View File

@ -1231,7 +1231,8 @@ void SymtabSection::emitStabs() {
// Constant-folded symbols go in the executable's symbol table, but don't
// get a stabs entry unless --keep-icf-stabs flag is specified
if (!config->keepICFStabs && defined->wasIdenticalCodeFolded)
if (!config->keepICFStabs &&
defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Body)
continue;
ObjFile *file = defined->getObjectFile();

View File

@ -74,6 +74,16 @@ public:
uint64_t selrefVA,
Symbol *objcMsgSend) const = 0;
// Init 'thunk' so that it be a direct jump to 'branchTarget'.
virtual void initICFSafeThunkBody(InputSection *thunk,
InputSection *branchTarget) const {
llvm_unreachable("target does not support ICF safe thunks");
}
virtual uint32_t getICFSafeThunkSize() const {
llvm_unreachable("target does not support ICF safe thunks");
}
// Symbols may be referenced via either the GOT or the stubs section,
// depending on the relocation type. prepareSymbolRelocation() will set up the
// GOT/stubs entries, and resolveSymbolVA() will return the addresses of those

View File

@ -0,0 +1,254 @@
; REQUIRES: aarch64
; RUN: rm -rf %t; mkdir %t
; RUN: llc -filetype=obj %s -O3 -o %t/icf-obj-safe-thunks.o -enable-machine-outliner=never -mtriple arm64-apple-macos -addrsig
; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks -dylib -o %t/icf-safe.dylib -map %t/icf-safe.map %t/icf-obj-safe-thunks.o
; RUN: llvm-objdump %t/icf-safe.dylib -d --macho | FileCheck %s --check-prefixes=CHECK-ARM64
; RUN: cat %t/icf-safe.map | FileCheck %s --check-prefixes=CHECK-ARM64-MAP
; CHECK-ARM64: (__TEXT,__text) section
; CHECK-ARM64-NEXT: _func_unique_1:
; CHECK-ARM64-NEXT: mov {{.*}}, #0x1
;
; CHECK-ARM64: _func_unique_2_canmerge:
; CHECK-ARM64-NEXT: _func_2identical_v1:
; CHECK-ARM64-NEXT: mov {{.*}}, #0x2
;
; CHECK-ARM64: _func_3identical_v1:
; CHECK-ARM64-NEXT: mov {{.*}}, #0x3
;
; CHECK-ARM64: _func_3identical_v1_canmerge:
; CHECK-ARM64-NEXT: _func_3identical_v2_canmerge:
; CHECK-ARM64-NEXT: _func_3identical_v3_canmerge:
; CHECK-ARM64-NEXT: mov {{.*}}, #0x21
;
; CHECK-ARM64: _call_all_funcs:
; CHECK-ARM64-NEXT: stp x29
;
; CHECK-ARM64: _take_func_addr:
; CHECK-ARM64-NEXT: adr
;
; CHECK-ARM64: _func_2identical_v2:
; CHECK-ARM64-NEXT: b _func_2identical_v1
; CHECK-ARM64-NEXT: _func_3identical_v2:
; CHECK-ARM64-NEXT: b _func_3identical_v1
; CHECK-ARM64-NEXT: _func_3identical_v3:
; CHECK-ARM64-NEXT: b _func_3identical_v1
; CHECK-ARM64-MAP: 0x00000010 [ 2] _func_unique_1
; CHECK-ARM64-MAP-NEXT: 0x00000010 [ 2] _func_2identical_v1
; CHECK-ARM64-MAP-NEXT: 0x00000000 [ 2] _func_unique_2_canmerge
; CHECK-ARM64-MAP-NEXT: 0x00000010 [ 2] _func_3identical_v1
; CHECK-ARM64-MAP-NEXT: 0x00000010 [ 2] _func_3identical_v1_canmerge
; CHECK-ARM64-MAP-NEXT: 0x00000000 [ 2] _func_3identical_v2_canmerge
; CHECK-ARM64-MAP-NEXT: 0x00000000 [ 2] _func_3identical_v3_canmerge
; CHECK-ARM64-MAP-NEXT: 0x00000034 [ 2] _call_all_funcs
; CHECK-ARM64-MAP-NEXT: 0x00000050 [ 2] _take_func_addr
; CHECK-ARM64-MAP-NEXT: 0x00000004 [ 2] _func_2identical_v2
; CHECK-ARM64-MAP-NEXT: 0x00000004 [ 2] _func_3identical_v2
; CHECK-ARM64-MAP-NEXT: 0x00000004 [ 2] _func_3identical_v3
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "arm64-apple-macosx11.0.0"
@g_val = global i8 0, align 1
@g_ptr = global ptr null, align 8
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
define void @func_unique_1() #0 {
entry:
store volatile i8 1, ptr @g_val, align 1, !tbaa !5
ret void
}
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
define void @func_unique_2_canmerge() local_unnamed_addr #0 {
entry:
store volatile i8 2, ptr @g_val, align 1, !tbaa !5
ret void
}
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
define void @func_2identical_v1() #0 {
entry:
store volatile i8 2, ptr @g_val, align 1, !tbaa !5
ret void
}
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
define void @func_2identical_v2() #0 {
entry:
store volatile i8 2, ptr @g_val, align 1, !tbaa !5
ret void
}
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
define void @func_3identical_v1() #0 {
entry:
store volatile i8 3, ptr @g_val, align 1, !tbaa !5
ret void
}
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
define void @func_3identical_v2() #0 {
entry:
store volatile i8 3, ptr @g_val, align 1, !tbaa !5
ret void
}
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
define void @func_3identical_v3() #0 {
entry:
store volatile i8 3, ptr @g_val, align 1, !tbaa !5
ret void
}
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
define void @func_3identical_v1_canmerge() local_unnamed_addr #0 {
entry:
store volatile i8 33, ptr @g_val, align 1, !tbaa !5
ret void
}
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
define void @func_3identical_v2_canmerge() local_unnamed_addr #0 {
entry:
store volatile i8 33, ptr @g_val, align 1, !tbaa !5
ret void
}
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
define void @func_3identical_v3_canmerge() local_unnamed_addr #0 {
entry:
store volatile i8 33, ptr @g_val, align 1, !tbaa !5
ret void
}
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp uwtable(sync)
define void @call_all_funcs() local_unnamed_addr #1 {
entry:
tail call void @func_unique_1()
tail call void @func_unique_2_canmerge()
tail call void @func_2identical_v1()
tail call void @func_2identical_v2()
tail call void @func_3identical_v1()
tail call void @func_3identical_v2()
tail call void @func_3identical_v3()
tail call void @func_3identical_v1_canmerge()
tail call void @func_3identical_v2_canmerge()
tail call void @func_3identical_v3_canmerge()
ret void
}
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
define void @take_func_addr() local_unnamed_addr #0 {
entry:
store volatile ptr @func_unique_1, ptr @g_ptr, align 8, !tbaa !8
store volatile ptr @func_2identical_v1, ptr @g_ptr, align 8, !tbaa !8
store volatile ptr @func_2identical_v2, ptr @g_ptr, align 8, !tbaa !8
store volatile ptr @func_3identical_v1, ptr @g_ptr, align 8, !tbaa !8
store volatile ptr @func_3identical_v2, ptr @g_ptr, align 8, !tbaa !8
store volatile ptr @func_3identical_v3, ptr @g_ptr, align 8, !tbaa !8
ret void
}
attributes #0 = { mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
attributes #1 = { mustprogress nofree noinline norecurse nounwind ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
!llvm.module.flags = !{!0, !1, !2, !3}
!llvm.ident = !{!4}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"uwtable", i32 1}
!3 = !{i32 7, !"frame-pointer", i32 1}
!4 = !{!"clang"}
!5 = !{!6, !6, i64 0}
!6 = !{!"omnipotent char", !7, i64 0}
!7 = !{!"Simple C++ TBAA"}
!8 = !{!9, !9, i64 0}
!9 = !{!"any pointer", !6, i64 0}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;; Generate the above LLVM IR with the below script ;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; #!/bin/bash
; set -ex
; TOOLCHAIN_BIN="llvm-project/build/Debug/bin"
;
; # Create icf-safe-thunks.cpp file
; cat > icf-safe-thunks.cpp <<EOF
;
; #define ATTR __attribute__((noinline)) extern "C"
; typedef unsigned long long ULL;
;
; volatile char g_val = 0;
; void *volatile g_ptr = 0;
;
; ATTR void func_unique_1() {
; g_val = 1;
; }
;
; ATTR void func_unique_2_canmerge() {
; g_val = 2;
; }
;
; ATTR void func_2identical_v1() {
; g_val = 2;
; }
;
; ATTR void func_2identical_v2() {
; g_val = 2;
; }
;
; ATTR void func_3identical_v1() {
; g_val = 3;
; }
;
; ATTR void func_3identical_v2() {
; g_val = 3;
; }
;
; ATTR void func_3identical_v3() {
; g_val = 3;
; }
;
; ATTR void func_3identical_v1_canmerge() {
; g_val = 33;
; }
;
; ATTR void func_3identical_v2_canmerge() {
; g_val = 33;
; }
;
; ATTR void func_3identical_v3_canmerge() {
; g_val = 33;
; }
;
; ATTR void call_all_funcs() {
; func_unique_1();
; func_unique_2_canmerge();
; func_2identical_v1();
; func_2identical_v2();
; func_3identical_v1();
; func_3identical_v2();
; func_3identical_v3();
; func_3identical_v1_canmerge();
; func_3identical_v2_canmerge();
; func_3identical_v3_canmerge();
; }
;
; ATTR void take_func_addr() {
; g_ptr = (void*)func_unique_1;
; g_ptr = (void*)func_2identical_v1;
; g_ptr = (void*)func_2identical_v2;
; g_ptr = (void*)func_3identical_v1;
; g_ptr = (void*)func_3identical_v2;
; g_ptr = (void*)func_3identical_v3;
; }
; EOF
;
; $TOOLCHAIN_BIN/clang -target arm64-apple-macos11.0 -S -emit-llvm \
; icf-safe-thunks.cpp -O3 -o icf-safe-thunks.ll