mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-18 21:06:50 +00:00

Reland #120514 after 2f6e3df08a8b7cd29273980e47310cf09c6fdbd8 fixed iteration order issue and libstdc++/libc++ differences. --- Both options instruct the linker to optimize section layout with the following goals: * `--bp-compression-sort=[data|function|both]`: Improve Lempel-Ziv compression by grouping similar sections together, resulting in a smaller compressed app size. * `--bp-startup-sort=function --irpgo-profile=<file>`: Utilize a temporal profile file to reduce page faults during program startup. The linker determines the section order by considering three groups: * Function sections ordered according to the temporal profile (`--irpgo-profile=`), prioritizing early-accessed and frequently accessed functions. * Function sections. Sections containing similar functions are placed together, maximizing compression opportunities. * Data sections. Similar data sections are placed together. Within each group, the sections are ordered using the Balanced Partitioning algorithm. The linker constructs a bipartite graph with two sets of vertices: sections and utility vertices. * For profile-guided function sections: + The number of utility vertices is determined by the symbol order within the profile file. + If `--bp-compression-sort-startup-functions` is specified, extra utility vertices are allocated to prioritize nearby function similarity. * For sections ordered for compression: Utility vertices are determined by analyzing k-mers of the section content and relocations. The call graph profile is disabled during this optimization. When `--symbol-ordering-file=` is specified, sections described in that file are placed earlier. Co-authored-by: Pengying Xu <xpy66swsry@gmail.com>
336 lines
10 KiB
ArmAsm
336 lines
10 KiB
ArmAsm
# REQUIRES: aarch64
|
|
# RUN: rm -rf %t && split-file %s %t && cd %t
|
|
|
|
## Check for incompatible cases
|
|
# RUN: not ld.lld %t --irpgo-profile=/dev/null --bp-startup-sort=function --call-graph-ordering-file=/dev/null 2>&1 | FileCheck %s --check-prefix=BP-STARTUP-CALLGRAPH-ERR
|
|
# RUN: not ld.lld --bp-compression-sort=function --call-graph-ordering-file /dev/null 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-CALLGRAPH-ERR
|
|
# RUN: not ld.lld --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=BP-STARTUP-ERR
|
|
# RUN: not ld.lld --bp-compression-sort-startup-functions 2>&1 | FileCheck %s --check-prefix=BP-STARTUP-COMPRESSION-ERR
|
|
# RUN: not ld.lld --bp-startup-sort=invalid --bp-compression-sort=invalid 2>&1 | FileCheck %s --check-prefix=BP-INVALID
|
|
|
|
# BP-STARTUP-CALLGRAPH-ERR: error: --bp-startup-sort=function is incompatible with --call-graph-ordering-file
|
|
# BP-COMPRESSION-CALLGRAPH-ERR: error: --bp-compression-sort is incompatible with --call-graph-ordering-file
|
|
# BP-STARTUP-ERR: error: --bp-startup-sort=function must be used with --irpgo-profile
|
|
# BP-STARTUP-COMPRESSION-ERR: error: --bp-compression-sort-startup-functions must be used with --irpgo-profile
|
|
|
|
# BP-INVALID: error: --bp-compression-sort=: expected [none|function|data|both]
|
|
# BP-INVALID: error: --bp-startup-sort=: expected [none|function]
|
|
|
|
# RUN: llvm-mc -filetype=obj -triple=aarch64 a.s -o a.o
|
|
# RUN: llvm-profdata merge a.proftext -o a.profdata
|
|
# RUN: ld.lld a.o --irpgo-profile=a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all 2>&1 | FileCheck %s --check-prefix=STARTUP-FUNC-ORDER
|
|
|
|
# STARTUP-FUNC-ORDER: Ordered 3 sections using balanced partitioning
|
|
# STARTUP-FUNC-ORDER: Total area under the page fault curve: 3.
|
|
|
|
# RUN: ld.lld -o out.s a.o --irpgo-profile=a.profdata --bp-startup-sort=function
|
|
# RUN: llvm-nm -jn out.s | tr '\n' , | FileCheck %s --check-prefix=STARTUP
|
|
# STARTUP: s5,s4,s3,s2,s1,A,B,C,F,E,D,_start,d4,d3,d2,d1,{{$}}
|
|
|
|
# RUN: ld.lld -o out.os a.o --irpgo-profile=a.profdata --bp-startup-sort=function --symbol-ordering-file a.txt
|
|
# RUN: llvm-nm -jn out.os | tr '\n' , | FileCheck %s --check-prefix=ORDER-STARTUP
|
|
# ORDER-STARTUP: s2,s1,s5,s4,s3,A,F,E,D,B,C,_start,d3,d2,d4,d1,{{$}}
|
|
|
|
# RUN: ld.lld -o out.cf a.o --verbose-bp-section-orderer --bp-compression-sort=function 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-FUNC
|
|
# RUN: llvm-nm -jn out.cf | tr '\n' , | FileCheck %s --check-prefix=CFUNC
|
|
# CFUNC: s5,s4,s3,s2,s1,F,C,E,D,B,A,_start,d4,d3,d2,d1,{{$}}
|
|
|
|
# RUN: ld.lld -o out.cd a.o --verbose-bp-section-orderer --bp-compression-sort=data 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-DATA
|
|
# RUN: llvm-nm -jn out.cd | tr '\n' , | FileCheck %s --check-prefix=CDATA
|
|
# CDATA: s5,s3,s4,s2,s1,F,C,E,D,B,A,_start,d4,d1,d3,d2,{{$}}
|
|
|
|
# RUN: ld.lld -o out.cb a.o --verbose-bp-section-orderer --bp-compression-sort=both 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-BOTH
|
|
# RUN: llvm-nm -jn out.cb | tr '\n' , | FileCheck %s --check-prefix=CDATA
|
|
|
|
# RUN: ld.lld -o out.cbs a.o --verbose-bp-section-orderer --bp-compression-sort=both --irpgo-profile=a.profdata --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-BOTH
|
|
# RUN: llvm-nm -jn out.cbs | tr '\n' , | FileCheck %s --check-prefix=CBOTH-STARTUP
|
|
# CBOTH-STARTUP: s5,s3,s4,s2,s1,A,B,C,F,E,D,_start,d4,d1,d3,d2,{{$}}
|
|
|
|
# BP-COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
|
|
# BP-COMPRESSION-DATA: Ordered 9 sections using balanced partitioning
|
|
# BP-COMPRESSION-BOTH: Ordered 16 sections using balanced partitioning
|
|
|
|
#--- a.proftext
|
|
:ir
|
|
:temporal_prof_traces
|
|
# Num Traces
|
|
1
|
|
# Trace Stream Size:
|
|
1
|
|
# Weight
|
|
1
|
|
A, B, C
|
|
|
|
A
|
|
# Func Hash:
|
|
1111
|
|
# Num Counters:
|
|
1
|
|
# Counter Values:
|
|
1
|
|
|
|
B
|
|
# Func Hash:
|
|
2222
|
|
# Num Counters:
|
|
1
|
|
# Counter Values:
|
|
1
|
|
|
|
C
|
|
# Func Hash:
|
|
3333
|
|
# Num Counters:
|
|
1
|
|
# Counter Values:
|
|
1
|
|
|
|
D
|
|
# Func Hash:
|
|
4444
|
|
# Num Counters:
|
|
1
|
|
# Counter Values:
|
|
1
|
|
|
|
#--- a.txt
|
|
A
|
|
F
|
|
E
|
|
D
|
|
s2
|
|
s1
|
|
d3
|
|
d2
|
|
|
|
#--- a.c
|
|
const char s5[] = "engineering";
|
|
const char s4[] = "computer program";
|
|
const char s3[] = "hardware engineer";
|
|
const char s2[] = "computer software";
|
|
const char s1[] = "hello world program";
|
|
int d4[] = {1,2,3,4,5,6};
|
|
int d3[] = {5,6,7,8};
|
|
int d2[] = {7,8,9,10};
|
|
int d1[] = {3,4,5,6};
|
|
|
|
int C(int a);
|
|
int B(int a);
|
|
void A();
|
|
|
|
int F(int a) { return C(a + 3); }
|
|
int E(int a) { return C(a + 2); }
|
|
int D(int a) { return B(a + 2); }
|
|
int C(int a) { A(); return a + 2; }
|
|
int B(int a) { A(); return a + 1; }
|
|
void A() {}
|
|
|
|
int _start() { return 0; }
|
|
|
|
#--- gen
|
|
clang --target=aarch64-linux-gnu -O0 -ffunction-sections -fdata-sections -fno-asynchronous-unwind-tables -S a.c -o -
|
|
;--- a.s
|
|
.file "a.c"
|
|
.section .text.F,"ax",@progbits
|
|
.globl F // -- Begin function F
|
|
.p2align 2
|
|
.type F,@function
|
|
F: // @F
|
|
// %bb.0: // %entry
|
|
sub sp, sp, #32
|
|
stp x29, x30, [sp, #16] // 16-byte Folded Spill
|
|
add x29, sp, #16
|
|
stur w0, [x29, #-4]
|
|
ldur w8, [x29, #-4]
|
|
add w0, w8, #3
|
|
bl C
|
|
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
|
|
add sp, sp, #32
|
|
ret
|
|
.Lfunc_end0:
|
|
.size F, .Lfunc_end0-F
|
|
// -- End function
|
|
.section .text.C,"ax",@progbits
|
|
.globl C // -- Begin function C
|
|
.p2align 2
|
|
.type C,@function
|
|
C: // @C
|
|
// %bb.0: // %entry
|
|
sub sp, sp, #32
|
|
stp x29, x30, [sp, #16] // 16-byte Folded Spill
|
|
add x29, sp, #16
|
|
stur w0, [x29, #-4]
|
|
bl A
|
|
ldur w8, [x29, #-4]
|
|
add w0, w8, #2
|
|
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
|
|
add sp, sp, #32
|
|
ret
|
|
.Lfunc_end1:
|
|
.size C, .Lfunc_end1-C
|
|
// -- End function
|
|
.section .text.E,"ax",@progbits
|
|
.globl E // -- Begin function E
|
|
.p2align 2
|
|
.type E,@function
|
|
E: // @E
|
|
// %bb.0: // %entry
|
|
sub sp, sp, #32
|
|
stp x29, x30, [sp, #16] // 16-byte Folded Spill
|
|
add x29, sp, #16
|
|
stur w0, [x29, #-4]
|
|
ldur w8, [x29, #-4]
|
|
add w0, w8, #2
|
|
bl C
|
|
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
|
|
add sp, sp, #32
|
|
ret
|
|
.Lfunc_end2:
|
|
.size E, .Lfunc_end2-E
|
|
// -- End function
|
|
.section .text.D,"ax",@progbits
|
|
.globl D // -- Begin function D
|
|
.p2align 2
|
|
.type D,@function
|
|
D: // @D
|
|
// %bb.0: // %entry
|
|
sub sp, sp, #32
|
|
stp x29, x30, [sp, #16] // 16-byte Folded Spill
|
|
add x29, sp, #16
|
|
stur w0, [x29, #-4]
|
|
ldur w8, [x29, #-4]
|
|
add w0, w8, #2
|
|
bl B
|
|
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
|
|
add sp, sp, #32
|
|
ret
|
|
.Lfunc_end3:
|
|
.size D, .Lfunc_end3-D
|
|
// -- End function
|
|
.section .text.B,"ax",@progbits
|
|
.globl B // -- Begin function B
|
|
.p2align 2
|
|
.type B,@function
|
|
B: // @B
|
|
// %bb.0: // %entry
|
|
sub sp, sp, #32
|
|
stp x29, x30, [sp, #16] // 16-byte Folded Spill
|
|
add x29, sp, #16
|
|
stur w0, [x29, #-4]
|
|
bl A
|
|
ldur w8, [x29, #-4]
|
|
add w0, w8, #1
|
|
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
|
|
add sp, sp, #32
|
|
ret
|
|
.Lfunc_end4:
|
|
.size B, .Lfunc_end4-B
|
|
// -- End function
|
|
.section .text.A,"ax",@progbits
|
|
.globl A // -- Begin function A
|
|
.p2align 2
|
|
.type A,@function
|
|
A: // @A
|
|
// %bb.0: // %entry
|
|
ret
|
|
.Lfunc_end5:
|
|
.size A, .Lfunc_end5-A
|
|
// -- End function
|
|
.section .text._start,"ax",@progbits
|
|
.globl _start // -- Begin function _start
|
|
.p2align 2
|
|
.type _start,@function
|
|
_start: // @_start
|
|
// %bb.0: // %entry
|
|
mov w0, wzr
|
|
ret
|
|
.Lfunc_end6:
|
|
.size _start, .Lfunc_end6-_start
|
|
// -- End function
|
|
.type s5,@object // @s5
|
|
.section .rodata.s5,"a",@progbits
|
|
.globl s5
|
|
s5:
|
|
.asciz "engineering"
|
|
.size s5, 12
|
|
|
|
.type s4,@object // @s4
|
|
.section .rodata.s4,"a",@progbits
|
|
.globl s4
|
|
s4:
|
|
.asciz "computer program"
|
|
.size s4, 17
|
|
|
|
.type s3,@object // @s3
|
|
.section .rodata.s3,"a",@progbits
|
|
.globl s3
|
|
s3:
|
|
.asciz "hardware engineer"
|
|
.size s3, 18
|
|
|
|
.type s2,@object // @s2
|
|
.section .rodata.s2,"a",@progbits
|
|
.globl s2
|
|
s2:
|
|
.asciz "computer software"
|
|
.size s2, 18
|
|
|
|
.type s1,@object // @s1
|
|
.section .rodata.s1,"a",@progbits
|
|
.globl s1
|
|
s1:
|
|
.asciz "hello world program"
|
|
.size s1, 20
|
|
|
|
.type d4,@object // @d4
|
|
.section .data.d4,"aw",@progbits
|
|
.globl d4
|
|
.p2align 2, 0x0
|
|
d4:
|
|
.word 1 // 0x1
|
|
.word 2 // 0x2
|
|
.word 3 // 0x3
|
|
.word 4 // 0x4
|
|
.word 5 // 0x5
|
|
.word 6 // 0x6
|
|
.size d4, 24
|
|
|
|
.type d3,@object // @d3
|
|
.section .data.d3,"aw",@progbits
|
|
.globl d3
|
|
.p2align 2, 0x0
|
|
d3:
|
|
.word 5 // 0x5
|
|
.word 6 // 0x6
|
|
.word 7 // 0x7
|
|
.word 8 // 0x8
|
|
.size d3, 16
|
|
|
|
.type d2,@object // @d2
|
|
.section .data.d2,"aw",@progbits
|
|
.globl d2
|
|
.p2align 2, 0x0
|
|
d2:
|
|
.word 7 // 0x7
|
|
.word 8 // 0x8
|
|
.word 9 // 0x9
|
|
.word 10 // 0xa
|
|
.size d2, 16
|
|
|
|
.type d1,@object // @d1
|
|
.section .data.d1,"aw",@progbits
|
|
.globl d1
|
|
.p2align 2, 0x0
|
|
d1:
|
|
.word 3 // 0x3
|
|
.word 4 // 0x4
|
|
.word 5 // 0x5
|
|
.word 6 // 0x6
|
|
.size d1, 16
|
|
|
|
.section ".note.GNU-stack","",@progbits
|
|
.addrsig
|
|
.addrsig_sym C
|
|
.addrsig_sym B
|
|
.addrsig_sym A
|