From fa4ac19f0fc937e30fd7711dad98d0fcdb34f8ba Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Fri, 11 Apr 2025 21:26:19 -0700 Subject: [PATCH] [BOLT] Accept PLT fall-throughs as valid traces (#129481) We used to report PLT traces as invalid (mismatching disassembled function contents) because PLT functions are marked as pseudo and ignored, thus missing CFG. However, such traces are not mismatching the function contents. Accept them without attaching the profile. Test Plan: updated callcont-fallthru.s --- bolt/lib/Profile/DataAggregator.cpp | 19 ++++++++++++++----- bolt/test/X86/callcont-fallthru.s | 10 ++++++++++ bolt/test/link_fdata.py | 11 ++++++++++- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index d20626bd5062..f016576ff61a 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -871,11 +871,6 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, BinaryContext &BC = BF.getBinaryContext(); - if (!BF.isSimple()) - return std::nullopt; - - assert(BF.hasCFG() && "can only record traces in CFG state"); - // Offsets of the trace within this function. const uint64_t From = FirstLBR.To - BF.getAddress(); const uint64_t To = SecondLBR.From - BF.getAddress(); @@ -883,6 +878,20 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, if (From > To) return std::nullopt; + // Accept fall-throughs inside pseudo functions (PLT/thunks). + // This check has to be above BF.empty as pseudo functions would pass it: + // pseudo => ignored => CFG not built => empty. + // If we return nullopt, trace would be reported as mismatching disassembled + // function contents which it is not. To avoid this, return an empty + // fall-through list instead. + if (BF.isPseudo()) + return Branches; + + if (!BF.isSimple()) + return std::nullopt; + + assert(BF.hasCFG() && "can only record traces in CFG state"); + const BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From); const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To); diff --git a/bolt/test/X86/callcont-fallthru.s b/bolt/test/X86/callcont-fallthru.s index 95cb4c5fc2df..ee72d8f62e03 100644 --- a/bolt/test/X86/callcont-fallthru.s +++ b/bolt/test/X86/callcont-fallthru.s @@ -9,6 +9,7 @@ # RUN: link_fdata %s %t %t.pa3 PREAGG3 # RUN: link_fdata %s %t %t.pat PREAGGT1 # RUN: link_fdata %s %t %t.pat2 PREAGGT2 +# RUN: link_fdata %s %t %t.patplt PREAGGPLT ## Check normal case: fallthrough is not LP or secondary entry. # RUN: llvm-strip --strip-unneeded %t -o %t.strip @@ -42,6 +43,12 @@ # RUN: llvm-bolt %t.strip --pa -p %t.pat2 -o %t.out \ # RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK3 +## Check pre-aggregated traces don't report zero-sized PLT fall-through as +## invalid trace +# RUN: llvm-bolt %t.strip --pa -p %t.patplt -o %t.out | FileCheck %s \ +# RUN: --check-prefix=CHECK-PLT +# CHECK-PLT: traces mismatching disassembled function contents: 0 + .globl foo .type foo, %function foo: @@ -65,7 +72,10 @@ main: movl $0x0, -0x4(%rbp) movl %edi, -0x8(%rbp) movq %rsi, -0x10(%rbp) +Ltmp0_br: callq puts@PLT +## Check PLT traces are accepted +# PREAGGPLT: T #Ltmp0_br# #puts@plt# #puts@plt# 3 ## Target is an external-origin call continuation # PREAGG1: B X:0 #Ltmp1# 2 0 # PREAGGT1: T X:0 #Ltmp1# #Ltmp4_br# 2 diff --git a/bolt/test/link_fdata.py b/bolt/test/link_fdata.py index 028823a69ce0..f1d83458346d 100755 --- a/bolt/test/link_fdata.py +++ b/bolt/test/link_fdata.py @@ -8,6 +8,7 @@ respective anchor symbols, and prints the resulting file to stdout. """ import argparse +import os import subprocess import sys import re @@ -84,8 +85,16 @@ with open(args.input, "r") as f: exit("ERROR: unexpected input:\n%s" % line) # Read nm output: +is_llvm_nm = os.path.basename(args.nmtool) == "llvm-nm" nm_output = subprocess.run( - [args.nmtool, "--defined-only", args.objfile], text=True, capture_output=True + [ + args.nmtool, + "--defined-only", + "--special-syms" if is_llvm_nm else "--synthetic", + args.objfile, + ], + text=True, + capture_output=True, ).stdout # Populate symbol map symbols = {}