mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-29 01:16:05 +00:00

BlockFrequencyInfo calculates block frequencies as Scaled64 numbers but as a last step converts them to unsigned 64bit integers (`BlockFrequency`). This improves the factors picked for this conversion so that: * Avoid big numbers close to UINT64_MAX to avoid users overflowing/saturating when adding multiply frequencies together or when multiplying with integers. This leaves the topmost 10 bits unused to allow for some room. * Spread the difference between hottest/coldest block as much as possible to increase precision. * If the hot/cold spread cannot be represented loose precision at the lower end, but keep the frequencies at the upper end for hot blocks differentiable.
635 lines
18 KiB
LLVM
635 lines
18 KiB
LLVM
; RUN: llc -O2 -ppc-reduce-cr-logicals -o - %s | FileCheck \
|
|
; RUN: --check-prefix=CHECK --check-prefix=CHECK-O2 %s
|
|
; RUN: llc -O3 -ppc-reduce-cr-logicals -o - %s | FileCheck \
|
|
; RUN: --check-prefix=CHECK --check-prefix=CHECK-O3 %s
|
|
target datalayout = "e-m:e-i64:64-n32:64"
|
|
target triple = "powerpc64le-grtev4-linux-gnu"
|
|
|
|
; Intended layout:
|
|
; The chain-based outlining produces the layout
|
|
; test1
|
|
; test2
|
|
; test3
|
|
; test4
|
|
; optional1
|
|
; optional2
|
|
; optional3
|
|
; optional4
|
|
; exit
|
|
; Tail duplication puts test n+1 at the end of optional n
|
|
; so optional1 includes a copy of test2 at the end, and branches
|
|
; to test3 (at the top) or falls through to optional 2.
|
|
; The CHECK statements check for the whole string of tests
|
|
; and then check that the correct test has been duplicated into the end of
|
|
; the optional blocks and that the optional blocks are in the correct order.
|
|
;CHECK-LABEL: straight_test:
|
|
; test1 may have been merged with entry
|
|
;CHECK: mr [[TAGREG:[0-9]+]], 3
|
|
;CHECK: andi. {{[0-9]+}}, [[TAGREG:[0-9]+]], 1
|
|
;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-NEXT: # %test2
|
|
;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 2
|
|
;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
|
|
;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 4
|
|
;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-NEXT: .[[TEST4LABEL:[_0-9A-Za-z]+]]: # %test4
|
|
;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 8
|
|
;CHECK-NEXT: bne 0, .[[OPT4LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
|
|
;CHECK: blr
|
|
;CHECK-NEXT: .[[OPT1LABEL]]:
|
|
;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 2
|
|
;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
|
|
;CHECK-NEXT: .[[OPT2LABEL]]:
|
|
;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 4
|
|
;CHECK-NEXT: beq 0, .[[TEST4LABEL]]
|
|
;CHECK-NEXT: .[[OPT3LABEL]]:
|
|
;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 8
|
|
;CHECK-NEXT: beq 0, .[[EXITLABEL]]
|
|
;CHECK-NEXT: .[[OPT4LABEL]]:
|
|
;CHECK: b .[[EXITLABEL]]
|
|
|
|
define void @straight_test(i32 %tag) {
|
|
entry:
|
|
br label %test1
|
|
test1:
|
|
%tagbit1 = and i32 %tag, 1
|
|
%tagbit1eq0 = icmp eq i32 %tagbit1, 0
|
|
br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1
|
|
optional1:
|
|
call void @a()
|
|
call void @a()
|
|
call void @a()
|
|
call void @a()
|
|
br label %test2
|
|
test2:
|
|
%tagbit2 = and i32 %tag, 2
|
|
%tagbit2eq0 = icmp eq i32 %tagbit2, 0
|
|
br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1
|
|
optional2:
|
|
call void @b()
|
|
call void @b()
|
|
call void @b()
|
|
call void @b()
|
|
br label %test3
|
|
test3:
|
|
%tagbit3 = and i32 %tag, 4
|
|
%tagbit3eq0 = icmp eq i32 %tagbit3, 0
|
|
br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1
|
|
optional3:
|
|
call void @c()
|
|
call void @c()
|
|
call void @c()
|
|
call void @c()
|
|
br label %test4
|
|
test4:
|
|
%tagbit4 = and i32 %tag, 8
|
|
%tagbit4eq0 = icmp eq i32 %tagbit4, 0
|
|
br i1 %tagbit4eq0, label %exit, label %optional4, !prof !1
|
|
optional4:
|
|
call void @d()
|
|
call void @d()
|
|
call void @d()
|
|
call void @d()
|
|
br label %exit
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Intended layout:
|
|
; The chain-of-triangles based duplicating produces the layout
|
|
; test1
|
|
; test2
|
|
; test3
|
|
; optional1
|
|
; optional2
|
|
; optional3
|
|
; exit
|
|
; even for 50/50 branches.
|
|
; Tail duplication puts test n+1 at the end of optional n
|
|
; so optional1 includes a copy of test2 at the end, and branches
|
|
; to test3 (at the top) or falls through to optional 2.
|
|
; The CHECK statements check for the whole string of tests
|
|
; and then check that the correct test has been duplicated into the end of
|
|
; the optional blocks and that the optional blocks are in the correct order.
|
|
;CHECK-LABEL: straight_test_50:
|
|
; test1 may have been merged with entry
|
|
;CHECK: mr [[TAGREG:[0-9]+]], 3
|
|
;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
|
|
;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-NEXT: # %test2
|
|
;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 2
|
|
;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
|
|
;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 4
|
|
;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
|
|
;CHECK: blr
|
|
;CHECK-NEXT: .[[OPT1LABEL]]:
|
|
;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 2
|
|
;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
|
|
;CHECK-NEXT: .[[OPT2LABEL]]:
|
|
;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 4
|
|
;CHECK-NEXT: beq 0, .[[EXITLABEL]]
|
|
;CHECK-NEXT: .[[OPT3LABEL]]:
|
|
;CHECK: b .[[EXITLABEL]]
|
|
|
|
define void @straight_test_50(i32 %tag) {
|
|
entry:
|
|
br label %test1
|
|
test1:
|
|
%tagbit1 = and i32 %tag, 1
|
|
%tagbit1eq0 = icmp eq i32 %tagbit1, 0
|
|
br i1 %tagbit1eq0, label %test2, label %optional1, !prof !2
|
|
optional1:
|
|
call void @a()
|
|
br label %test2
|
|
test2:
|
|
%tagbit2 = and i32 %tag, 2
|
|
%tagbit2eq0 = icmp eq i32 %tagbit2, 0
|
|
br i1 %tagbit2eq0, label %test3, label %optional2, !prof !2
|
|
optional2:
|
|
call void @b()
|
|
br label %test3
|
|
test3:
|
|
%tagbit3 = and i32 %tag, 4
|
|
%tagbit3eq0 = icmp eq i32 %tagbit3, 0
|
|
br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1
|
|
optional3:
|
|
call void @c()
|
|
br label %exit
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Intended layout:
|
|
; The chain-of-triangles based duplicating produces the layout when 3
|
|
; instructions are allowed for tail-duplication.
|
|
; test1
|
|
; test2
|
|
; test3
|
|
; optional1
|
|
; optional2
|
|
; optional3
|
|
; exit
|
|
;
|
|
; Otherwise it produces the layout:
|
|
; test1
|
|
; optional1
|
|
; test2
|
|
; optional2
|
|
; test3
|
|
; optional3
|
|
; exit
|
|
|
|
;CHECK-LABEL: straight_test_3_instr_test:
|
|
; test1 may have been merged with entry
|
|
;CHECK: mr [[TAGREG:[0-9]+]], 3
|
|
;CHECK: clrlwi {{[0-9]+}}, [[TAGREG]], 30
|
|
;CHECK-NEXT: cmplwi {{[0-9]+}}, 2
|
|
|
|
;CHECK-O3-NEXT: bne 0, .[[OPT1LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-O3-NEXT: # %test2
|
|
;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
|
|
;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8
|
|
;CHECK-O3-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-O3-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
|
|
;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
|
|
;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32
|
|
;CHECK-O3-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-O3-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
|
|
;CHECK-O3: blr
|
|
;CHECK-O3-NEXT: .[[OPT1LABEL]]:
|
|
;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
|
|
;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8
|
|
;CHECK-O3-NEXT: beq 0, .[[TEST3LABEL]]
|
|
;CHECK-O3-NEXT: .[[OPT2LABEL]]:
|
|
;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
|
|
;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32
|
|
;CHECK-O3-NEXT: beq 0, .[[EXITLABEL]]
|
|
;CHECK-O3-NEXT: .[[OPT3LABEL]]:
|
|
;CHECK-O3: b .[[EXITLABEL]]
|
|
|
|
;CHECK-O2-NEXT: beq 0, .[[TEST2LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-O2-NEXT: # %optional1
|
|
;CHECK-O2: .[[TEST2LABEL]]: # %test2
|
|
;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
|
|
;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 8
|
|
;CHECK-O2-NEXT: beq 0, .[[TEST3LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-O2-NEXT: # %optional2
|
|
;CHECK-O2: .[[TEST3LABEL]]: # %test3
|
|
;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
|
|
;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 32
|
|
;CHECK-O2-NEXT: beq 0, .[[EXITLABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-O2-NEXT: # %optional3
|
|
;CHECK-O2: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
|
|
;CHECK-O2: blr
|
|
|
|
|
|
define void @straight_test_3_instr_test(i32 %tag) {
|
|
entry:
|
|
br label %test1
|
|
test1:
|
|
%tagbit1 = and i32 %tag, 3
|
|
%tagbit1eq0 = icmp eq i32 %tagbit1, 2
|
|
br i1 %tagbit1eq0, label %test2, label %optional1, !prof !2
|
|
optional1:
|
|
call void @a()
|
|
br label %test2
|
|
test2:
|
|
%tagbit2 = and i32 %tag, 12
|
|
%tagbit2eq0 = icmp eq i32 %tagbit2, 8
|
|
br i1 %tagbit2eq0, label %test3, label %optional2, !prof !2
|
|
optional2:
|
|
call void @b()
|
|
br label %test3
|
|
test3:
|
|
%tagbit3 = and i32 %tag, 48
|
|
%tagbit3eq0 = icmp eq i32 %tagbit3, 32
|
|
br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1
|
|
optional3:
|
|
call void @c()
|
|
br label %exit
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Intended layout:
|
|
; The chain-based outlining produces the layout
|
|
; entry
|
|
; --- Begin loop ---
|
|
; for.latch
|
|
; for.check
|
|
; test1
|
|
; test2
|
|
; test3
|
|
; test4
|
|
; optional1
|
|
; optional2
|
|
; optional3
|
|
; optional4
|
|
; --- End loop ---
|
|
; exit
|
|
; The CHECK statements check for the whole string of tests and exit block,
|
|
; and then check that the correct test has been duplicated into the end of
|
|
; the optional blocks and that the optional blocks are in the correct order.
|
|
;CHECK-LABEL: loop_test:
|
|
;CHECK: add [[TAGPTRREG:[0-9]+]], 3, 4
|
|
;CHECK: .[[LATCHLABEL:[._0-9A-Za-z]+]]: # %for.latch
|
|
;CHECK: addi
|
|
;CHECK-O2: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check
|
|
;CHECK: lwz [[TAGREG:[0-9]+]], 0([[TAGPTRREG]])
|
|
;CHECK-O3: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check
|
|
;CHECK: # %bb.{{[0-9]+}}: # %test1
|
|
;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
|
|
;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[._0-9A-Za-z]+]]
|
|
;CHECK-NEXT: # %test2
|
|
;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 2
|
|
;CHECK-NEXT: bne 0, .[[OPT2LABEL:[._0-9A-Za-z]+]]
|
|
;CHECK-NEXT: .[[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3
|
|
;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 4
|
|
;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]]
|
|
;CHECK-NEXT: .[[TEST4LABEL:[._0-9A-Za-z]+]]: # %{{(test4|optional3)}}
|
|
;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 8
|
|
;CHECK-NEXT: beq 0, .[[LATCHLABEL]]
|
|
;CHECK-NEXT: b .[[OPT4LABEL:[._0-9A-Za-z]+]]
|
|
;CHECK: [[OPT1LABEL]]
|
|
;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 2
|
|
;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
|
|
;CHECK-NEXT: .[[OPT2LABEL]]
|
|
;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 4
|
|
;CHECK-NEXT: beq 0, .[[TEST4LABEL]]
|
|
;CHECK-NEXT: .[[OPT3LABEL]]
|
|
;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 8
|
|
;CHECK-NEXT: beq 0, .[[LATCHLABEL]]
|
|
;CHECK: [[OPT4LABEL]]:
|
|
;CHECK: b .[[LATCHLABEL]]
|
|
define void @loop_test(ptr %tags, i32 %count) {
|
|
entry:
|
|
br label %for.check
|
|
for.check:
|
|
%count.loop = phi i32 [%count, %entry], [%count.sub, %for.latch]
|
|
%done.count = icmp ugt i32 %count.loop, 0
|
|
%tag_ptr = getelementptr inbounds i32, ptr %tags, i32 %count
|
|
%tag = load i32, ptr %tag_ptr
|
|
%done.tag = icmp eq i32 %tag, 0
|
|
%done = and i1 %done.count, %done.tag
|
|
br i1 %done, label %test1, label %exit, !prof !1
|
|
test1:
|
|
%tagbit1 = and i32 %tag, 1
|
|
%tagbit1eq0 = icmp eq i32 %tagbit1, 0
|
|
br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1
|
|
optional1:
|
|
call void @a()
|
|
call void @a()
|
|
call void @a()
|
|
call void @a()
|
|
br label %test2
|
|
test2:
|
|
%tagbit2 = and i32 %tag, 2
|
|
%tagbit2eq0 = icmp eq i32 %tagbit2, 0
|
|
br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1
|
|
optional2:
|
|
call void @b()
|
|
call void @b()
|
|
call void @b()
|
|
call void @b()
|
|
br label %test3
|
|
test3:
|
|
%tagbit3 = and i32 %tag, 4
|
|
%tagbit3eq0 = icmp eq i32 %tagbit3, 0
|
|
br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1
|
|
optional3:
|
|
call void @c()
|
|
call void @c()
|
|
call void @c()
|
|
call void @c()
|
|
br label %test4
|
|
test4:
|
|
%tagbit4 = and i32 %tag, 8
|
|
%tagbit4eq0 = icmp eq i32 %tagbit4, 0
|
|
br i1 %tagbit4eq0, label %for.latch, label %optional4, !prof !1
|
|
optional4:
|
|
call void @d()
|
|
call void @d()
|
|
call void @d()
|
|
call void @d()
|
|
br label %for.latch
|
|
for.latch:
|
|
%count.sub = sub i32 %count.loop, 1
|
|
br label %for.check
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; The block then2 is not unavoidable, meaning it does not dominate the exit.
|
|
; But since it can be tail-duplicated, it should be placed as a fallthrough from
|
|
; test2 and copied. The purpose here is to make sure that the tail-duplication
|
|
; code is independent of the outlining code, which works by choosing the
|
|
; "unavoidable" blocks.
|
|
; CHECK-LABEL: avoidable_test:
|
|
; CHECK: # %bb.{{[0-9]+}}: # %entry
|
|
; CHECK: andi.
|
|
; CHECK: # %bb.{{[0-9]+}}: # %test2
|
|
; Make sure else2 falls through from test2
|
|
; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}}
|
|
; CHECK: # %bb.{{[0-9]+}}: # %else2
|
|
; CHECK: bl c
|
|
; CHECK: # %else1
|
|
; CHECK: bl a
|
|
; CHECK: bl a
|
|
; CHECK: # %then2
|
|
; CHECK: andi. {{[0-9]+}}, {{[0-9]+}}, 4
|
|
; CHECK: # %end1
|
|
; CHECK: bl d
|
|
; CHECK: # %end2
|
|
define void @avoidable_test(i32 %tag) {
|
|
entry:
|
|
br label %test1
|
|
test1:
|
|
%tagbit1 = and i32 %tag, 1
|
|
%tagbit1eq0 = icmp eq i32 %tagbit1, 0
|
|
br i1 %tagbit1eq0, label %test2, label %else1, !prof !1 ; %test2 more likely
|
|
else1:
|
|
call void @a()
|
|
call void @a()
|
|
br label %then2
|
|
test2:
|
|
%tagbit2 = and i32 %tag, 2
|
|
%tagbit2eq0 = icmp eq i32 %tagbit2, 0
|
|
br i1 %tagbit2eq0, label %then2, label %else2, !prof !1 ; %then2 more likely
|
|
then2:
|
|
%tagbit3 = and i32 %tag, 4
|
|
%tagbit3eq0 = icmp eq i32 %tagbit3, 0
|
|
br i1 %tagbit3eq0, label %end2, label %end1, !prof !1 ; %end2 more likely
|
|
else2:
|
|
call void @c()
|
|
br label %end2
|
|
end2:
|
|
ret void
|
|
end1:
|
|
call void @d()
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: trellis_test
|
|
; The number in the block labels is the expected block frequency given the
|
|
; probabilities annotated. There is a conflict in the b;c->d;e trellis that
|
|
; should be resolved as c->e;b->d.
|
|
; The d;e->f;g trellis should be resolved as e->g;d->f.
|
|
; The f;g->h;i trellis should be resolved as f->i;g->h.
|
|
; The h;i->j;ret trellis contains a triangle edge, and should be resolved as
|
|
; h->j->ret
|
|
; CHECK: # %bb.{{[0-9]+}}: # %entry
|
|
; CHECK: # %bb.{{[0-9]+}}: # %c10
|
|
; CHECK: # %e9
|
|
; CHECK: # %g10
|
|
; CHECK: # %h10
|
|
; CHECK: # %j8
|
|
; CHECK: # %ret
|
|
; CHECK: # %b6
|
|
; CHECK: # %d7
|
|
; CHECK: # %f6
|
|
; CHECK: # %i6
|
|
define void @trellis_test(i32 %tag) {
|
|
entry:
|
|
br label %a16
|
|
a16:
|
|
call void @a()
|
|
call void @a()
|
|
%tagbits.a = and i32 %tag, 3
|
|
%tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0
|
|
br i1 %tagbits.a.eq0, label %c10, label %b6, !prof !1 ; 10 to 6
|
|
c10:
|
|
call void @c()
|
|
call void @c()
|
|
%tagbits.c = and i32 %tag, 12
|
|
%tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0
|
|
; Both of these edges should be hotter than the other incoming edge
|
|
; for e9 or d7
|
|
br i1 %tagbits.c.eq0, label %e9, label %d7, !prof !3 ; 6 to 4
|
|
e9:
|
|
call void @e()
|
|
call void @e()
|
|
%tagbits.e = and i32 %tag, 48
|
|
%tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0
|
|
br i1 %tagbits.e.eq0, label %g10, label %f6, !prof !4 ; 7 to 2
|
|
g10:
|
|
call void @g()
|
|
call void @g()
|
|
%tagbits.g = and i32 %tag, 192
|
|
%tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0
|
|
br i1 %tagbits.g.eq0, label %i6, label %h10, !prof !5 ; 2 to 8
|
|
i6:
|
|
call void @i()
|
|
call void @i()
|
|
%tagbits.i = and i32 %tag, 768
|
|
%tagbits.i.eq0 = icmp eq i32 %tagbits.i, 0
|
|
br i1 %tagbits.i.eq0, label %ret, label %j8, !prof !2 ; balanced (3 to 3)
|
|
b6:
|
|
call void @b()
|
|
call void @b()
|
|
%tagbits.b = and i32 %tag, 12
|
|
%tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8
|
|
br i1 %tagbits.b.eq1, label %e9, label %d7, !prof !2 ; balanced (3 to 3)
|
|
d7:
|
|
call void @d()
|
|
call void @d()
|
|
%tagbits.d = and i32 %tag, 48
|
|
%tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32
|
|
br i1 %tagbits.d.eq1, label %g10, label %f6, !prof !6 ; 3 to 4
|
|
f6:
|
|
call void @f()
|
|
call void @f()
|
|
%tagbits.f = and i32 %tag, 192
|
|
%tagbits.f.eq1 = icmp eq i32 %tagbits.f, 128
|
|
br i1 %tagbits.f.eq1, label %i6, label %h10, !prof !7 ; 4 to 2
|
|
h10:
|
|
call void @h()
|
|
call void @h()
|
|
%tagbits.h = and i32 %tag, 768
|
|
%tagbits.h.eq1 = icmp eq i32 %tagbits.h, 512
|
|
br i1 %tagbits.h.eq1, label %ret, label %j8, !prof !2 ; balanced (5 to 5)
|
|
j8:
|
|
call void @j()
|
|
call void @j()
|
|
br label %ret
|
|
ret:
|
|
ret void
|
|
}
|
|
|
|
; Verify that we still consider tail-duplication opportunities if we find a
|
|
; triangle trellis. Here D->F->G is the triangle, and D;E are both predecessors
|
|
; of both F and G. The basic trellis algorithm picks the F->G edge, but after
|
|
; checking, it's profitable to duplicate G into F. The weights here are not
|
|
; really important. They are there to help make the test stable.
|
|
; CHECK-LABEL: trellis_then_dup_test
|
|
; CHECK: # %bb.{{[0-9]+}}: # %entry
|
|
; CHECK: # %bb.{{[0-9]+}}: # %b
|
|
; CHECK: # %d
|
|
; CHECK: # %g
|
|
; CHECK: # %ret1
|
|
; CHECK: # %c
|
|
; CHECK: # %e
|
|
; CHECK: # %f
|
|
; CHECK: # %ret2
|
|
; CHECK: # %ret
|
|
define void @trellis_then_dup_test(i32 %tag) {
|
|
entry:
|
|
br label %a
|
|
a:
|
|
call void @a()
|
|
call void @a()
|
|
%tagbits.a = and i32 %tag, 3
|
|
%tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0
|
|
br i1 %tagbits.a.eq0, label %b, label %c, !prof !1 ; 5 to 3
|
|
b:
|
|
call void @b()
|
|
call void @b()
|
|
%tagbits.b = and i32 %tag, 12
|
|
%tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8
|
|
br i1 %tagbits.b.eq1, label %d, label %e, !prof !1 ; 5 to 3
|
|
d:
|
|
call void @d()
|
|
call void @d()
|
|
%tagbits.d = and i32 %tag, 48
|
|
%tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32
|
|
br i1 %tagbits.d.eq1, label %g, label %f, !prof !1 ; 5 to 3
|
|
f:
|
|
call void @f()
|
|
call void @f()
|
|
br label %g
|
|
g:
|
|
%tagbits.g = and i32 %tag, 192
|
|
%tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0
|
|
br i1 %tagbits.g.eq0, label %ret1, label %ret2, !prof !2 ; balanced
|
|
c:
|
|
call void @c()
|
|
call void @c()
|
|
%tagbits.c = and i32 %tag, 12
|
|
%tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0
|
|
br i1 %tagbits.c.eq0, label %d, label %e, !prof !1 ; 5 to 3
|
|
e:
|
|
call void @e()
|
|
call void @e()
|
|
%tagbits.e = and i32 %tag, 48
|
|
%tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0
|
|
br i1 %tagbits.e.eq0, label %g, label %f, !prof !1 ; 5 to 3
|
|
ret1:
|
|
call void @a()
|
|
br label %ret
|
|
ret2:
|
|
call void @b()
|
|
br label %ret
|
|
ret:
|
|
ret void
|
|
}
|
|
|
|
; Verify that we did not mis-identify triangle trellises if it is not
|
|
; really a triangle.
|
|
; CHECK-LABEL: trellis_no_triangle
|
|
; CHECK: # %bb.{{[0-9]+}}: # %entry
|
|
; CHECK: # %bb.{{[0-9]+}}: # %b
|
|
; CHECK: # %d
|
|
; CHECK: # %ret
|
|
; CHECK: # %c
|
|
; CHECK: # %e
|
|
define void @trellis_no_triangle(i32 %tag) {
|
|
entry:
|
|
br label %a
|
|
a:
|
|
call void @a()
|
|
call void @a()
|
|
%tagbits.a = and i32 %tag, 3
|
|
%tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0
|
|
br i1 %tagbits.a.eq0, label %b, label %c, !prof !8 ; 98 to 2
|
|
b:
|
|
call void @b()
|
|
call void @b()
|
|
%tagbits.b = and i32 %tag, 12
|
|
%tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8
|
|
br i1 %tagbits.b.eq1, label %d, label %e, !prof !9 ; 97 to 1
|
|
d:
|
|
call void @d()
|
|
call void @d()
|
|
%tagbits.d = and i32 %tag, 48
|
|
%tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32
|
|
br i1 %tagbits.d.eq1, label %ret, label %e, !prof !10 ; 96 to 2
|
|
c:
|
|
call void @c()
|
|
call void @c()
|
|
%tagbits.c = and i32 %tag, 12
|
|
%tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0
|
|
br i1 %tagbits.c.eq0, label %d, label %e, !prof !2 ; 1 to 1
|
|
e:
|
|
call void @e()
|
|
call void @e()
|
|
br label %ret
|
|
ret:
|
|
call void @f()
|
|
ret void
|
|
}
|
|
|
|
declare void @a()
|
|
declare void @b()
|
|
declare void @c()
|
|
declare void @d()
|
|
declare void @e()
|
|
declare void @f()
|
|
declare void @g()
|
|
declare void @h()
|
|
declare void @i()
|
|
declare void @j()
|
|
|
|
!1 = !{!"branch_weights", i32 5, i32 3}
|
|
!2 = !{!"branch_weights", i32 50, i32 50}
|
|
!3 = !{!"branch_weights", i32 6, i32 4}
|
|
!4 = !{!"branch_weights", i32 7, i32 2}
|
|
!5 = !{!"branch_weights", i32 2, i32 8}
|
|
!6 = !{!"branch_weights", i32 3, i32 4}
|
|
!7 = !{!"branch_weights", i32 4, i32 2}
|
|
!8 = !{!"branch_weights", i32 98, i32 2}
|
|
!9 = !{!"branch_weights", i32 97, i32 1}
|
|
!10 = !{!"branch_weights", i32 96, i32 2}
|