[BOLT] Avoid EH trampolines for PIEs/DSOs (#117106)

We used to emit EH trampolines for PIE/DSO whenever a function fragment
contained a landing pad outside of it. However, it is common to have all
landing pads in a cold fragment even when their throwers are in a hot
one.

To reduce the number of trampolines, analyze landing pads for any given
function fragment, and if they all belong to the same (possibly
different) fragment, designate that fragment as a landing pad fragment
for the "thrower" fragment. Later, emit landing pad fragment symbol as
an LPStart for the thrower LSDA.
This commit is contained in:
Maksim Panchenko 2024-11-21 18:18:30 -08:00 committed by GitHub
parent dc580c9cf6
commit 105ecd8bb2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 200 additions and 34 deletions

View File

@ -527,6 +527,11 @@ private:
/// fragment of the function.
SmallVector<MCSymbol *, 0> LSDASymbols;
/// Each function fragment may have another fragment containing all landing
/// pads for it. If that's the case, the LP fragment will be stored in the
/// vector below with indexing starting with the main fragment.
SmallVector<std::optional<FragmentNum>, 0> LPFragments;
/// Map to discover which CFIs are attached to a given instruction offset.
/// Maps an instruction offset into a FrameInstructions offset.
/// This is only relevant to the buildCFG phase and is discarded afterwards.
@ -1885,6 +1890,42 @@ public:
return LSDASymbols[F.get()];
}
/// If all landing pads for the function fragment \p F are located in fragment
/// \p LPF, designate \p LPF as a landing-pad fragment for \p F. Passing
/// std::nullopt in LPF, means that landing pads for \p F are located in more
/// than one fragment.
void setLPFragment(const FragmentNum F, std::optional<FragmentNum> LPF) {
if (F.get() >= LPFragments.size())
LPFragments.resize(F.get() + 1);
LPFragments[F.get()] = LPF;
}
/// If function fragment \p F has a designated landing pad fragment, i.e. a
/// fragment that contains all landing pads for throwers in \p F, then return
/// that landing pad fragment number. If \p F does not need landing pads,
/// return \p F. Return nullptr if landing pads for \p F are scattered among
/// several function fragments.
std::optional<FragmentNum> getLPFragment(const FragmentNum F) {
if (!isSplit()) {
assert(F == FragmentNum::main() && "Invalid fragment number");
return FragmentNum::main();
}
if (F.get() >= LPFragments.size())
return std::nullopt;
return LPFragments[F.get()];
}
/// Return a symbol corresponding to a landing pad fragment for fragment \p F.
/// See getLPFragment().
MCSymbol *getLPStartSymbol(const FragmentNum F) {
if (std::optional<FragmentNum> LPFragment = getLPFragment(F))
return getSymbol(*LPFragment);
return nullptr;
}
void setOutputDataAddress(uint64_t Address) { OutputDataOffset = Address; }
uint64_t getOutputDataAddress() const { return OutputDataOffset; }

View File

@ -140,7 +140,7 @@ private:
void emitCFIInstruction(const MCCFIInstruction &Inst) const;
/// Emit exception handling ranges for the function.
/// Emit exception handling ranges for the function fragment.
void emitLSDA(BinaryFunction &BF, const FunctionFragment &FF);
/// Emit line number information corresponding to \p NewLoc. \p PrevLoc
@ -915,15 +915,15 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, const FunctionFragment &FF) {
// Emit the LSDA header.
// If LPStart is omitted, then the start of the FDE is used as a base for
// landing pad displacements. Then, if a cold fragment starts with a landing
// pad, this means that the first landing pad offset will be 0. However, C++
// runtime treats 0 as if there is no landing pad present, thus we *must* emit
// non-zero offsets for all valid LPs.
// landing pad displacements. Then, if a cold fragment starts with
// a landing pad, this means that the first landing pad offset will be 0.
// However, C++ runtime will treat 0 as if there is no landing pad, thus we
// cannot emit LP offset as 0.
//
// As a solution, for fixed-address binaries we set LPStart to 0, and for
// position-independent binaries we set LP start to FDE start minus one byte
// for FDEs that start with a landing pad.
const bool NeedsLPAdjustment = !FF.empty() && FF.front()->isLandingPad();
// position-independent binaries we offset LP start by one byte.
bool NeedsLPAdjustment = false;
const MCSymbol *LPStartSymbol = nullptr;
std::function<void(const MCSymbol *)> emitLandingPad;
if (BC.HasFixedLoadAddress) {
Streamer.emitIntValue(dwarf::DW_EH_PE_udata4, 1); // LPStart format
@ -935,17 +935,26 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, const FunctionFragment &FF) {
Streamer.emitIntValue(0, 4);
};
} else {
if (NeedsLPAdjustment) {
// Use relative LPStart format and emit LPStart as [SymbolStart - 1].
std::optional<FragmentNum> LPFN = BF.getLPFragment(FF.getFragmentNum());
LPStartSymbol = BF.getLPStartSymbol(FF.getFragmentNum());
assert(LPFN && LPStartSymbol && "Expected LPStart symbol to be set");
const FunctionFragment &LPFragment = BF.getLayout().getFragment(*LPFN);
NeedsLPAdjustment =
(!LPFragment.empty() && LPFragment.front()->isLandingPad());
// Emit LPStart encoding and optionally LPStart.
if (NeedsLPAdjustment || LPStartSymbol != StartSymbol) {
Streamer.emitIntValue(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4, 1);
MCSymbol *DotSymbol = BC.Ctx->createTempSymbol("LPBase");
Streamer.emitLabel(DotSymbol);
const MCExpr *LPStartExpr = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(StartSymbol, *BC.Ctx),
MCSymbolRefExpr::create(LPStartSymbol, *BC.Ctx),
MCSymbolRefExpr::create(DotSymbol, *BC.Ctx), *BC.Ctx);
LPStartExpr = MCBinaryExpr::createSub(
LPStartExpr, MCConstantExpr::create(1, *BC.Ctx), *BC.Ctx);
if (NeedsLPAdjustment)
LPStartExpr = MCBinaryExpr::createSub(
LPStartExpr, MCConstantExpr::create(1, *BC.Ctx), *BC.Ctx);
Streamer.emitValue(LPStartExpr, 4);
} else {
// DW_EH_PE_omit means FDE start (StartSymbol) will be used as LPStart.
@ -955,7 +964,7 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, const FunctionFragment &FF) {
if (LPSymbol) {
const MCExpr *LPOffsetExpr = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(LPSymbol, *BC.Ctx),
MCSymbolRefExpr::create(StartSymbol, *BC.Ctx), *BC.Ctx);
MCSymbolRefExpr::create(LPStartSymbol, *BC.Ctx), *BC.Ctx);
if (NeedsLPAdjustment)
LPOffsetExpr = MCBinaryExpr::createAdd(
LPOffsetExpr, MCConstantExpr::create(1, *BC.Ctx), *BC.Ctx);

View File

@ -901,8 +901,43 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
// have to be placed in the same fragment. When we split them, create
// trampoline landing pads that will redirect the execution to real LPs.
TrampolineSetType Trampolines;
if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit())
Trampolines = createEHTrampolines(BF);
if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit()) {
// If all landing pads for this fragment are grouped in one (potentially
// different) fragment, we can set LPStart to the start of that fragment
// and avoid trampoline code.
bool NeedsTrampolines = false;
for (FunctionFragment &FF : BF.getLayout().fragments()) {
// Vector of fragments that contain landing pads for this fragment.
SmallVector<FragmentNum, 4> LandingPadFragments;
for (const BinaryBasicBlock *BB : FF)
for (const BinaryBasicBlock *LPB : BB->landing_pads())
LandingPadFragments.push_back(LPB->getFragmentNum());
// Eliminate duplicate entries from the vector.
llvm::sort(LandingPadFragments);
auto Last = llvm::unique(LandingPadFragments);
LandingPadFragments.erase(Last, LandingPadFragments.end());
if (LandingPadFragments.size() == 0) {
// If the fragment has no landing pads, we can safely set itself as its
// landing pad fragment.
BF.setLPFragment(FF.getFragmentNum(), FF.getFragmentNum());
} else if (LandingPadFragments.size() == 1) {
BF.setLPFragment(FF.getFragmentNum(), LandingPadFragments.front());
} else {
NeedsTrampolines = true;
break;
}
}
// Trampolines guarantee that all landing pads for any given fragment will
// be contained in the same fragment.
if (NeedsTrampolines) {
for (FunctionFragment &FF : BF.getLayout().fragments())
BF.setLPFragment(FF.getFragmentNum(), FF.getFragmentNum());
Trampolines = createEHTrampolines(BF);
}
}
// Check the new size to see if it's worth splitting the function.
if (BC.isX86() && LayoutUpdated) {
@ -933,6 +968,10 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
}
}
// Restore LP fragment for the main fragment if the splitting was undone.
if (BF.hasEHRanges() && !BF.isSplit())
BF.setLPFragment(FragmentNum::main(), FragmentNum::main());
// Fix branches if the splitting decision of the pass after function
// reordering is different from that of the pass before function reordering.
if (LayoutUpdated && BC.HasFinalizedFunctionOrder)

View File

@ -0,0 +1,86 @@
# REQUIRES: system-linux
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t.o
# RUN: link_fdata %s %t.o %t.fdata
# RUN: llvm-strip --strip-unneeded %t.o
# RUN: ld.lld --pie %t.o -o %t.exe -q
# RUN: llvm-bolt %t.exe -o %t.out --data %t.fdata --split-functions --split-eh \
# RUN: --split-all-cold --print-after-lowering --print-only=_start 2>&1 \
# RUN: | FileCheck %s
## _start has two landing pads: one hot and one cold. Hence, BOLT will introduce
## a landing pad trampoline. However, the trampoline code will make the main
## split fragment larger than the whole function before split. Then BOLT will
## undo the splitting and remove the trampoline.
# CHECK: Binary Function "_start"
# CHECK: IsSplit :
# CHECK-SAME: 0
## Check that a landing pad trampoline was created, but contains no instructions
## and falls though to the real landing pad.
# CHECK: {{^[^[:space:]]+}} (0 instructions
# CHECK-NEXT: Landing Pad{{$}}
# CHECK: Exec Count
# CHECK-SAME: : 0
# CHECK: Successors:
# CHECK-SAME: [[LP:[^[:space:]]+]]
# CHECK-EMPTY:
# CHECK-NEXT: [[LP]]
.text
.global foo
.type foo, %function
foo:
.cfi_startproc
ret
.cfi_endproc
.size foo, .-foo
.globl _start
.type _start, %function
_start:
# FDATA: 0 [unknown] 0 1 _start 0 1 100
.Lfunc_begin0:
.cfi_startproc
.cfi_lsda 27, .Lexception0
call foo
.Ltmp0:
call foo
.Ltmp1:
ret
## Cold landing pad.
.LLP1:
ret
## Hot landing pad.
LLP0:
# FDATA: 0 [unknown] 0 1 _start #LLP0# 1 100
ret
.cfi_endproc
.Lfunc_end0:
.size _start, .-_start
## EH table.
.section .gcc_except_table,"a",@progbits
.p2align 2
GCC_except_table0:
.Lexception0:
.byte 255 # @LPStart Encoding = omit
.byte 255 # @TType Encoding = omit
.byte 1 # Call site Encoding = uleb128
.uleb128 .Lcst_end0-.Lcst_begin0
.Lcst_begin0:
.uleb128 .Lfunc_begin0-.Lfunc_begin0 # >> Call Site 1 <<
.uleb128 .Ltmp0-.Lfunc_begin0 # Call between .Lfunc_begin0 and .Ltmp0
.uleb128 LLP0-.Lfunc_begin0 # jumps to LLP0
.byte 0 # On action: cleanup
.uleb128 .Ltmp0-.Lfunc_begin0 # >> Call Site 2 <<
.uleb128 .Ltmp1-.Ltmp0 # Call between .Ltmp0 and .Ltmp1
.uleb128 .LLP1-.Lfunc_begin0 # jumps to .LLP1
.byte 0 # On action: cleanup
.Lcst_end0:

View File

@ -1,4 +1,4 @@
# Assembly generated from building the followingC++ code with the following
# Assembly generated from building the following C++ code with the following
# command using trunk clang. Then, basic block at .LBB1_7 was moved before the
# landing pad.
#

View File

@ -11,25 +11,16 @@ RUN: llvm-bolt %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp \
RUN: --split-functions --split-eh --print-after-lowering \
RUN: --print-only=_Z10throw_testiPPc 2>&1 | FileCheck %s
## Hot code in the test case gets larger after splitting because of jump
## instruction relaxation. Check that BOLT reverts the split correctly.
## Check that a landing pad is split from its thrower and does not require a
## trampoline LP.
CHECK: Binary Function "_Z10throw_testiPPc"
CHECK: IsSplit :
CHECK-SAME: 0
## Check that the landing pad trampoline was created, but contains no
## instructions and falls to the real landing pad.
CHECK: {{^[^[:space:]]+}} (0 instructions
CHECK-NEXT: Landing Pad{{$}}
CHECK: Exec Count
CHECK-SAME: : 0
CHECK: Successors:
CHECK-SAME: [[LP:[^[:space:]]+]]
CHECK-EMPTY:
CHECK-NEXT: [[LP]]
CHECK-DAG: Exec Count
CHECK-NOT: Exec Count
CHECK-DAG: callq __cxa_begin_catch
CHECK-SAME: 1
CHECK: callq {{.*}} # handler: [[LPAD:.*]];
CHECK-NOT: Landing Pad{{$}}
CHECK: HOT-COLD SPLIT POINT
CHECK: {{^}}[[LPAD]]
CHECK-NEXT: Landing Pad
## Verify the output still executes correctly when the exception path is being
## taken.