0
0
mirror of https://github.com/llvm/llvm-project.git synced 2025-04-21 09:46:49 +00:00

[lld-macho] Fix STABS entries for --icf=safe_thunks and --keep-icf-stabs ()

When using the linker flags `--icf=safe_thunks` and `--keep-icf-stabs`
together, an issue arises with the STABS debugging entries in the linked
output. The problem affects STABS entries for functions that are folded
via ICF using thunks.
For instance, if `func1` is merged into `func2` through a thunk, the
STABS entry for `func1` incorrectly points to the object file of
`func2`. This is incorrect behavior—each function’s STABS entry should
consistently point to its own original object file (e.g., the STABS
entry for `func1` should reference `func1`’s object file). This issue
causes `dsymutil` to not be able to retrieve the debug information for
the problematic function.
This patch corrects this behavior - making it so that STABS entries
always point to the correct object file.
This commit is contained in:
alx32 2025-03-27 14:11:19 -07:00 committed by GitHub
parent 7c3ecffe9b
commit 57e5b82e8c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 166 additions and 25 deletions

@ -1205,18 +1205,6 @@ void SymtabSection::emitEndFunStab(Defined *defined) {
stabs.emplace_back(std::move(stab));
}
// Given a pointer to a function symbol, return the symbol that points to the
// actual function body that will go in the final binary. Generally this is the
// symbol itself, but if the symbol was folded using a thunk, we retrieve the
// target function body from the thunk.
Defined *SymtabSection::getFuncBodySym(Defined *originalSym) {
if (originalSym->identicalCodeFoldingKind == Symbol::ICFFoldKind::None ||
originalSym->identicalCodeFoldingKind == Symbol::ICFFoldKind::Body)
return originalSym;
return macho::getBodyForThunkFoldedSym(originalSym);
}
void SymtabSection::emitStabs() {
if (config->omitDebugInfo)
return;
@ -1252,10 +1240,11 @@ void SymtabSection::emitStabs() {
if (!file || !file->compileUnit)
continue;
// We use 'originalIsec' to get the file id of the symbol since 'isec()'
// might point to the merged ICF symbol's file
symbolsNeedingStabs.emplace_back(
defined, getFuncBodySym(defined)->originalIsec->getFile()->id);
// We use the symbol's original InputSection to get the file id,
// even for ICF folded symbols, to ensure STABS entries point to the
// correct object file where the symbol was originally defined
symbolsNeedingStabs.emplace_back(defined,
defined->originalIsec->getFile()->id);
}
}
@ -1270,10 +1259,12 @@ void SymtabSection::emitStabs() {
InputFile *lastFile = nullptr;
for (SortingPair &pair : symbolsNeedingStabs) {
Defined *defined = pair.first;
// We use 'originalIsec' of the symbol since we care about the actual origin
// of the symbol, not the canonical location returned by `isec()`.
Defined *funcBodySym = getFuncBodySym(defined);
InputSection *isec = funcBodySym->originalIsec;
// When emitting STABS entries for a symbol, always use the original
// InputSection of the defined symbol, not the section of the function body
// (which might be a different function entirely if ICF folded this
// function). This ensures STABS entries point back to the original object
// file.
InputSection *isec = defined->originalIsec;
ObjFile *file = cast<ObjFile>(isec->getFile());
if (lastFile == nullptr || lastFile != file) {
@ -1288,12 +1279,30 @@ void SymtabSection::emitStabs() {
StabsEntry symStab;
symStab.sect = isec->parent->index;
symStab.strx = stringTableSection.addString(defined->getName());
symStab.value = funcBodySym->getVA();
// When using --keep-icf-stabs, we need to use the VA of the actual function
// body that the linker will place in the binary. This is the function that
// the symbol refers to after ICF folding.
if (defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Thunk) {
// For thunks, we need to get the function they point to
Defined *target = getBodyForThunkFoldedSym(defined);
symStab.value = target->getVA();
} else {
symStab.value = defined->getVA();
}
if (isCodeSection(isec)) {
symStab.type = N_FUN;
stabs.emplace_back(std::move(symStab));
emitEndFunStab(funcBodySym);
// For the end function marker in STABS, we need to use the size of the
// actual function body that exists in the output binary
if (defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Thunk) {
// For thunks, we use the target's size
Defined *target = getBodyForThunkFoldedSym(defined);
emitEndFunStab(target);
} else {
emitEndFunStab(defined);
}
} else {
symStab.type = defined->isExternal() ? N_GSYM : N_STSYM;
stabs.emplace_back(std::move(symStab));

@ -3,6 +3,7 @@
; RUN: rm -rf %t && split-file %s %t
; Test single object file case
; RUN: llc -filetype=obj %t/a.ll -O3 -o %t/a.o -enable-machine-outliner=never -mtriple arm64-apple-macos -addrsig
; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks -dylib -o %t/a.dylib %t/a.o
@ -26,6 +27,42 @@
; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks --keep-icf-stabs -dylib -o %t/a_thunks.dylib %t/a.o
; RUN: dsymutil -s %t/a_thunks.dylib > %t/a_thunks.txt
;;;;;;;;;;;;;;;;;;;;;;;;;;;; Test multiple object files with identical functions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; RUN: llc -filetype=obj %t/b.ll -O3 -o %t/b.o -enable-machine-outliner=never -mtriple arm64-apple-macos -addrsig
; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks --keep-icf-stabs -dylib -o %t/multi_thunks.dylib %t/a.o %t/b.o
; RUN: dsymutil -s %t/multi_thunks.dylib | FileCheck %s --check-prefix=VERIFY-MULTI-STABS
# Check that STABS entries correctly associate functions with their originating object files
# VERIFY-MULTI-STABS-LABEL: Symbol table for: '{{.*}}/multi_thunks.dylib'
# First object file's source and object file entries
# VERIFY-MULTI-STABS: N_SO{{.*}}a.cpp
# VERIFY-MULTI-STABS-NEXT: N_OSO{{.*}}a.o
# Functions from the first object file - all functions share the same address but belong to a.o
# VERIFY-MULTI-STABS: N_FUN{{.*}}[[FUNC_ADDR:[0-9a-f]+]] '_func_A'
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}00 0000 {{.*}}
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}[[FUNC_ADDR]] '_func_B'
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}00 0000 {{.*}}
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}[[FUNC_ADDR]] '_func_C'
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}00 0000 {{.*}}
# VERIFY-MULTI-STABS-NEXT-NEXT: N_FUN{{.*}}[0-9a-f]+ '_take_func_addr'
# End of first object file's entries
# VERIFY-MULTI-STABS: N_SO{{.*}}01 0000 0000000000000000
# Second object file's source and object file entries
# VERIFY-MULTI-STABS: N_SO{{.*}}b.cpp
# VERIFY-MULTI-STABS-NEXT: N_OSO{{.*}}b.o
# Functions from the second object file - same addresses but different object file
# VERIFY-MULTI-STABS: N_FUN{{.*}}[[FUNC_ADDR]] '_func_D'
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}00 0000 {{.*}}
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}[[FUNC_ADDR]] '_func_E'
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}00 0000 {{.*}}
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}[[FUNC_ADDR]] '_func_F'
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}00 0000 {{.*}}
# VERIFY-MULTI-STABS-NEXT-NEXT: N_FUN{{.*}}[0-9a-f]+ '_take_func_addr_b'
; RUN: dsymutil --flat --verify-dwarf=none %t/a_thunks.dylib -o %t/a_thunks.dSYM
; RUN: dsymutil -s %t/a_thunks.dSYM >> %t/a_thunks.txt
@ -73,6 +110,9 @@
# VERIFY-THUNK-NEXT: {{ +}}DW_AT_low_pc (0x[[MERGED_FUN_ADDR]])
# VERIFY-THUNK-NEXT-NEXT-NEXT-NEXT-NEXT: {{ +}}DW_AT_name ("func_C")
;--- a.cpp
#define ATTR __attribute__((noinline)) extern "C"
typedef unsigned long long ULL;
@ -89,32 +129,56 @@ ATTR ULL take_func_addr() {
return val;
}
;--- b.cpp
#define ATTR __attribute__((noinline)) extern "C"
typedef unsigned long long ULL;
// Identical functions in a different object file
ATTR int func_D() { return 1; }
ATTR int func_E() { return 1; }
ATTR int func_F() { return 1; }
ATTR ULL take_func_addr_b() {
ULL val = 0;
val += (ULL)(void*)func_D;
val += (ULL)(void*)func_E;
val += (ULL)(void*)func_F;
return val;
}
;--- gen
clang -target arm64-apple-macos11.0 -S -emit-llvm a.cpp -O3 -g -o -
clang -target arm64-apple-macos11.0 -S -emit-llvm a.cpp -O3 -g -fdebug-compilation-dir=/proc/self/cwd -o -
echo ""
echo ";--- b.ll"
clang -target arm64-apple-macos11.0 -S -emit-llvm b.cpp -O3 -g -fdebug-compilation-dir=/proc/self/cwd -o -
;--- a.ll
; ModuleID = 'a.cpp'
source_filename = "a.cpp"
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "arm64-apple-macosx11.0.0"
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
define noundef i32 @func_A() #0 !dbg !12 {
entry:
ret i32 1, !dbg !16
}
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
define noundef i32 @func_B() #0 !dbg !17 {
entry:
ret i32 1, !dbg !18
}
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
define noundef i32 @func_C() #0 !dbg !19 {
entry:
ret i32 1, !dbg !20
}
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
define noundef i64 @take_func_addr() local_unnamed_addr #0 !dbg !21 {
entry:
#dbg_value(i64 0, !25, !DIExpression(), !26)
#dbg_value(i64 ptrtoint (ptr @func_A to i64), !25, !DIExpression(), !26)
#dbg_value(i64 add (i64 ptrtoint (ptr @func_A to i64), i64 ptrtoint (ptr @func_B to i64)), !25, !DIExpression(), !26)
@ -122,7 +186,7 @@ define noundef i64 @take_func_addr() local_unnamed_addr #0 !dbg !21 {
ret i64 add (i64 add (i64 ptrtoint (ptr @func_A to i64), i64 ptrtoint (ptr @func_B to i64)), i64 ptrtoint (ptr @func_C to i64)), !dbg !27
}
attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!6, !7, !8, !9, !10, !11}
@ -155,3 +219,71 @@ attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind ssp wil
!25 = !DILocalVariable(name: "val", scope: !21, file: !1, line: 9, type: !3)
!26 = !DILocation(line: 0, scope: !21)
!27 = !DILocation(line: 13, column: 5, scope: !21)
;--- b.ll
; ModuleID = 'b.cpp'
source_filename = "b.cpp"
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "arm64-apple-macosx11.0.0"
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
define noundef i32 @func_D() #0 !dbg !12 {
entry:
ret i32 1, !dbg !16
}
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
define noundef i32 @func_E() #0 !dbg !17 {
entry:
ret i32 1, !dbg !18
}
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
define noundef i32 @func_F() #0 !dbg !19 {
entry:
ret i32 1, !dbg !20
}
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
define noundef i64 @take_func_addr_b() local_unnamed_addr #0 !dbg !21 {
entry:
#dbg_value(i64 0, !25, !DIExpression(), !26)
#dbg_value(i64 ptrtoint (ptr @func_D to i64), !25, !DIExpression(), !26)
#dbg_value(i64 add (i64 ptrtoint (ptr @func_D to i64), i64 ptrtoint (ptr @func_E to i64)), !25, !DIExpression(), !26)
#dbg_value(i64 add (i64 add (i64 ptrtoint (ptr @func_D to i64), i64 ptrtoint (ptr @func_E to i64)), i64 ptrtoint (ptr @func_F to i64)), !25, !DIExpression(), !26)
ret i64 add (i64 add (i64 ptrtoint (ptr @func_D to i64), i64 ptrtoint (ptr @func_E to i64)), i64 ptrtoint (ptr @func_F to i64)), !dbg !27
}
attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!6, !7, !8, !9, !10, !11}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !2, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/")
!1 = !DIFile(filename: "b.cpp", directory: "/proc/self/cwd")
!2 = !{!3, !5}
!3 = !DIDerivedType(tag: DW_TAG_typedef, name: "ULL", file: !1, line: 2, baseType: !4)
!4 = !DIBasicType(name: "unsigned long long", size: 64, encoding: DW_ATE_unsigned)
!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)
!6 = !{i32 7, !"Dwarf Version", i32 4}
!7 = !{i32 2, !"Debug Info Version", i32 3}
!8 = !{i32 1, !"wchar_size", i32 4}
!9 = !{i32 8, !"PIC Level", i32 2}
!10 = !{i32 7, !"uwtable", i32 1}
!11 = !{i32 7, !"frame-pointer", i32 1}
!12 = distinct !DISubprogram(name: "func_D", scope: !1, file: !1, line: 5, type: !13, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
!13 = !DISubroutineType(types: !14)
!14 = !{!15}
!15 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!16 = !DILocation(line: 5, column: 21, scope: !12)
!17 = distinct !DISubprogram(name: "func_E", scope: !1, file: !1, line: 6, type: !13, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
!18 = !DILocation(line: 6, column: 21, scope: !17)
!19 = distinct !DISubprogram(name: "func_F", scope: !1, file: !1, line: 7, type: !13, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
!20 = !DILocation(line: 7, column: 21, scope: !19)
!21 = distinct !DISubprogram(name: "take_func_addr_b", scope: !1, file: !1, line: 9, type: !22, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !24)
!22 = !DISubroutineType(types: !23)
!23 = !{!3}
!24 = !{!25}
!25 = !DILocalVariable(name: "val", scope: !21, file: !1, line: 10, type: !3)
!26 = !DILocation(line: 0, scope: !21)
!27 = !DILocation(line: 14, column: 5, scope: !21)