85 Commits

Author SHA1 Message Date
Chris Lattner
bd22d83d15 Fold:
// (X != 0) | (Y != 0) -> (X|Y != 0)
        // (X == 0) & (Y == 0) -> (X|Y == 0)

Compiling this:

int %bar(int %a, int %b) {
        entry:
        %tmp.1 = setne int %a, 0
        %tmp.2 = setne int %b, 0
        %tmp.3 = or bool %tmp.1, %tmp.2
        %retval = cast bool %tmp.3 to int
        ret int %retval
        }

to this:

_bar:
        or r2, r3, r4
        addic r3, r2, -1
        subfe r3, r3, r2
        blr

instead of:

_bar:
        addic r2, r3, -1
        subfe r2, r2, r3
        addic r3, r4, -1
        subfe r3, r3, r4
        or r3, r2, r3
        blr

llvm-svn: 21316
2005-04-18 03:59:53 +00:00
Chris Lattner
d929f8bcd3 Make the AND elimination operation recursive and significantly more powerful,
eliminating an and for Nate's testcase:

int %bar(int %a, int %b) {
        entry:
        %tmp.1 = setne int %a, 0
        %tmp.2 = setne int %b, 0
        %tmp.3 = or bool %tmp.1, %tmp.2
        %retval = cast bool %tmp.3 to int
        ret int %retval
        }

generating:

_bar:
        addic r2, r3, -1
        subfe r2, r2, r3
        addic r3, r4, -1
        subfe r3, r3, r4
        or r3, r2, r3
        blr

instead of:

_bar:
        addic r2, r3, -1
        subfe r2, r2, r3
        addic r3, r4, -1
        subfe r3, r3, r4
        or r2, r2, r3
        rlwinm r3, r2, 0, 31, 31
        blr

llvm-svn: 21315
2005-04-18 03:48:41 +00:00
Nate Begeman
80c095f422 Add a couple missing transforms in getSetCC that were triggering assertions
in the PPC Pattern ISel

llvm-svn: 21297
2005-04-14 08:56:52 +00:00
Nate Begeman
4ddd81657b Disbale the broken fold of shift + sz[ext] for now
Move the transform for select (a < 0) ? b : 0 into the dag from ppc isel
Enable the dag to fold and (setcc, 1) -> setcc for targets where setcc
  always produces zero or one.

llvm-svn: 21291
2005-04-13 21:23:31 +00:00
Chris Lattner
56d177a344 fix an infinite loop
llvm-svn: 21289
2005-04-13 20:06:29 +00:00
Chris Lattner
e3d17d8225 fix some serious miscompiles on ia64, alpha, and ppc
llvm-svn: 21288
2005-04-13 19:53:40 +00:00
Chris Lattner
8c3d409dc7 avoid work when possible, perhaps fix the problem nate and andrew are seeing
with != 0 comparisons vanishing.

llvm-svn: 21287
2005-04-13 19:41:05 +00:00
Chris Lattner
b1f25ac188 add back the optimization that Nate added for shl X, (zext_inreg y)
llvm-svn: 21273
2005-04-13 02:58:13 +00:00
Chris Lattner
39844ac337 Oops, remove these too.
llvm-svn: 21272
2005-04-13 02:47:57 +00:00
Chris Lattner
2b4e3fca38 Remove all foldings of ZERO_EXTEND_INREG, moving them to work for AND nodes
instead.  OVerall, this increases the amount of folding we can do.

llvm-svn: 21265
2005-04-13 02:38:18 +00:00
Nate Begeman
ca916ba4a0 Fold shift x, [sz]ext(y) -> shift x, y
llvm-svn: 21262
2005-04-12 23:32:28 +00:00
Nate Begeman
af1c0f7a00 Fold shift by size larger than type size to undef
Make llvm undef values generate ISD::UNDEF nodes

llvm-svn: 21261
2005-04-12 23:12:17 +00:00
Chris Lattner
af5b25f139 Remove some redundant checks, add a couple of new ones. This allows us to
compile this:

int foo (unsigned long a, unsigned long long g) {
  return a >= g;
}

To:

foo:
        movl 8(%esp), %eax
        cmpl %eax, 4(%esp)
        setae %al
        cmpl $0, 12(%esp)
        sete %cl
        andb %al, %cl
        movzbl %cl, %eax
        ret

instead of:

foo:
        movl 8(%esp), %eax
        cmpl %eax, 4(%esp)
        setae %al
        movzbw %al, %cx
        movl 12(%esp), %edx
        cmpl $0, %edx
        sete %al
        movzbw %al, %ax
        cmpl $0, %edx
        cmove %cx, %ax
        movzbl %al, %eax
        ret

llvm-svn: 21244
2005-04-12 02:54:39 +00:00
Chris Lattner
87bd69884a canonicalize x <u 1 -> x == 0. On this testcase:
unsigned long long g;
unsigned long foo (unsigned long a) {
  return (a >= g) ? 1 : 0;
}

It changes the ppc code from:

_foo:
.LBB_foo_0:     ; entry
        mflr r11
        stw r11, 8(r1)
        bl "L00000$pb"
"L00000$pb":
        mflr r2
        addis r2, r2, ha16(L_g$non_lazy_ptr-"L00000$pb")
        lwz r2, lo16(L_g$non_lazy_ptr-"L00000$pb")(r2)
        lwz r4, 0(r2)
        lwz r2, 4(r2)
        cmplw cr0, r3, r2
        li r2, 1
        li r3, 0
        bge .LBB_foo_2  ; entry
.LBB_foo_1:     ; entry
        or r2, r3, r3
.LBB_foo_2:     ; entry
        cmplwi cr0, r4, 1
        li r3, 1
        li r5, 0
        blt .LBB_foo_4  ; entry
.LBB_foo_3:     ; entry
        or r3, r5, r5
.LBB_foo_4:     ; entry
        cmpwi cr0, r4, 0
        beq .LBB_foo_6  ; entry
.LBB_foo_5:     ; entry
        or r2, r3, r3
.LBB_foo_6:     ; entry
        rlwinm r3, r2, 0, 31, 31
        lwz r11, 8(r1)
        mtlr r11
        blr


to:

_foo:
.LBB_foo_0:     ; entry
        mflr r11
        stw r11, 8(r1)
        bl "L00000$pb"
"L00000$pb":
        mflr r2
        addis r2, r2, ha16(L_g$non_lazy_ptr-"L00000$pb")
        lwz r2, lo16(L_g$non_lazy_ptr-"L00000$pb")(r2)
        lwz r4, 0(r2)
        lwz r2, 4(r2)
        cmplw cr0, r3, r2
        li r2, 1
        li r3, 0
        bge .LBB_foo_2  ; entry
.LBB_foo_1:     ; entry
        or r2, r3, r3
.LBB_foo_2:     ; entry
        cntlzw r3, r4
        srwi r3, r3, 5
        cmpwi cr0, r4, 0
        beq .LBB_foo_4  ; entry
.LBB_foo_3:     ; entry
        or r2, r3, r3
.LBB_foo_4:     ; entry
        rlwinm r3, r2, 0, 31, 31
        lwz r11, 8(r1)
        mtlr r11
        blr

llvm-svn: 21241
2005-04-12 00:28:49 +00:00
Chris Lattner
e2427c9afc Don't bother sign/zext_inreg'ing the result of an and operation if we know
the result does change as a result of the extend.

This improves codegen for Alpha on this testcase:

int %a(ushort* %i) {
        %tmp.1 = load ushort* %i
        %tmp.2 = cast ushort %tmp.1 to int
        %tmp.4 = and int %tmp.2, 1
        ret int %tmp.4
}

Generating:

a:
        ldgp $29, 0($27)
        ldwu $0,0($16)
        and $0,1,$0
        ret $31,($26),1

instead of:

a:
        ldgp $29, 0($27)
        ldwu $0,0($16)
        and $0,1,$0
        addl $0,0,$0
        ret $31,($26),1

btw, alpha really should switch to livein/outs for args :)

llvm-svn: 21213
2005-04-10 23:37:16 +00:00
Chris Lattner
f74c794ccf Fold zext_inreg(zextload), likewise for sext's
llvm-svn: 21204
2005-04-10 04:33:08 +00:00
Chris Lattner
f2bff92411 add a simple xform
llvm-svn: 21203
2005-04-10 04:04:49 +00:00
Chris Lattner
d8cbfe82ba Fix a thinko. If the operand is promoted, pass the promoted value into
the new zero extend, not the original operand.  This fixes cast bool -> long
on ppc.

Add an unrelated fixme

llvm-svn: 21196
2005-04-10 01:13:15 +00:00
Chris Lattner
da504741da add a little peephole optimization. This allows us to codegen:
int a(short i) {
        return i & 1;
}

as

_a:
        andi. r3, r3, 1
        blr

instead of:

_a:
        rlwinm r2, r3, 0, 16, 31
        andi. r3, r2, 1
        blr

on ppc.  It should also help the other risc targets.

llvm-svn: 21189
2005-04-09 21:43:54 +00:00
Chris Lattner
6a31b878f8 recognize some patterns as fabs operations, so that fabs at the source level
is deconstructed then reconstructed here.  This catches 19 fabs's in 177.mesa
9 in 168.wupwise, 5 in 171.swim, 3 in 172.mgrid, and 14 in 173.applu out of
specfp2000.

This allows the X86 code generator to make MUCH better code than before for
each of these and saves one instr on ppc.

This depends on the previous CFE patch to expose these correctly.

llvm-svn: 21171
2005-04-09 05:15:53 +00:00
Chris Lattner
b0713c74a2 print and fold BRCONDTWOWAY correctly
llvm-svn: 21165
2005-04-09 03:27:28 +00:00
Chris Lattner
0ea81f9db4 canonicalize a bunch of operations involving fneg
llvm-svn: 21160
2005-04-09 03:02:46 +00:00
Chris Lattner
b32d9318d2 If a target zero or sign extends the result of its setcc, allow folding of
this into sign/zero extension instructions later.

On PPC, for example, this testcase:

%G = external global sbyte
implementation
void %test(int %X, int %Y) {
  %C = setlt int %X, %Y
  %D = cast bool %C to sbyte
  store sbyte %D, sbyte* %G
  ret void
}

Now codegens to:

        cmpw cr0, r3, r4
        li r3, 1
        li r4, 0
        blt .LBB_test_2 ;
.LBB_test_1:    ;
        or r3, r4, r4
.LBB_test_2:    ;
        addis r2, r2, ha16(L_G$non_lazy_ptr-"L00000$pb")
        lwz r2, lo16(L_G$non_lazy_ptr-"L00000$pb")(r2)
        stb r3, 0(r2)

instead of:

        cmpw cr0, r3, r4
        li r3, 1
        li r4, 0
        blt .LBB_test_2 ;
.LBB_test_1:    ;
        or r3, r4, r4
.LBB_test_2:    ;
***     rlwinm r3, r3, 0, 31, 31
        addis r2, r2, ha16(L_G$non_lazy_ptr-"L00000$pb")
        lwz r2, lo16(L_G$non_lazy_ptr-"L00000$pb")(r2)
        stb r3, 0(r2)

llvm-svn: 21148
2005-04-07 19:43:53 +00:00
Chris Lattner
dfed7355c9 Remove somethign I had for testing
llvm-svn: 21144
2005-04-07 18:58:54 +00:00
Chris Lattner
6b03a0cba1 This patch does two things. First, it canonicalizes 'X >= C' -> 'X > C-1'
(likewise for <= >=u >=u).

Second, it implements a special case hack to turn 'X gtu SINTMAX' -> 'X lt 0'

On powerpc, for example, this changes this:

        lis r2, 32767
        ori r2, r2, 65535
        cmplw cr0, r3, r2
        bgt .LBB_test_2

into:

        cmpwi cr0, r3, 0
        blt .LBB_test_2

llvm-svn: 21142
2005-04-07 18:14:58 +00:00
Chris Lattner
7d13eae254 Fix a really scary bug that Nate found where we weren't deleting the right
elements auto of the autoCSE maps.

llvm-svn: 21128
2005-04-07 00:30:13 +00:00
Nate Begeman
55e8625c69 Add MULHU and MULHS nodes for the high part of an (un)signed 32x32=64b
multiply.

llvm-svn: 21102
2005-04-05 22:36:56 +00:00
Chris Lattner
c4a2046a88 print fneg/fabs
llvm-svn: 21008
2005-04-02 04:58:41 +00:00
Chris Lattner
4157c417a1 fix some bugs in the implementation of SHL_PARTS and friends.
llvm-svn: 21004
2005-04-02 04:00:59 +00:00
Chris Lattner
5b7bb56ef8 Print some new nodes
llvm-svn: 21001
2005-04-02 03:30:42 +00:00
Nate Begeman
cda9aa7fa9 Add ISD::UNDEF node
Teach the SelectionDAG code how to expand and promote it
Have PPC32 LowerCallTo generate ISD::UNDEF for int arg regs used up by fp
  arguments, but not shadowing their value.  This allows us to do the right
  thing with both fixed and vararg floating point arguments.

llvm-svn: 20988
2005-04-01 22:34:39 +00:00
Andrew Lenharth
dec53920b4 PCMarker support for DAG and Alpha
llvm-svn: 20965
2005-03-31 21:24:06 +00:00
Chris Lattner
85e7163947 Fix a bug where we would incorrectly do a sign ext instead of a zero ext
because we were checking the wrong thing.  Thanks to andrew for pointing
this out!

llvm-svn: 20554
2005-03-10 20:55:51 +00:00
Chris Lattner
7f26946709 constant fold FP_ROUND_INREG, ZERO_EXTEND_INREG, and SIGN_EXTEND_INREG
This allows the alpha backend to compile:

bool %test(uint %P) {
        %c = seteq uint %P, 0
        ret bool %c
}

into:

test:
        ldgp $29, 0($27)
        ZAP $16,240,$0
        CMPEQ $0,0,$0
        AND $0,1,$0
        ret $31,($26),1

instead of:

test:
        ldgp $29, 0($27)
        ZAP $16,240,$0
        ldiq $1,0
        ZAP $1,240,$1
        CMPEQ $0,$1,$0
        AND $0,1,$0
        ret $31,($26),1

... and fixes PR534.

llvm-svn: 20534
2005-03-09 18:37:12 +00:00
Chris Lattner
381dddc90c Don't rely on doubles comparing identical to each other, which doesn't work
for 0.0 and -0.0.

llvm-svn: 20230
2005-02-17 20:17:32 +00:00
Chris Lattner
90b7c13f3a Remove the 3 HACK HACK HACKs I put in before, fixing them properly with
the new TLI that is available.

Implement support for handling out of range shifts.  This allows us to
compile this code (a 64-bit rotate):

unsigned long long f3(unsigned long long x) {
  return (x << 32) | (x >> (64-32));
}

into this:

f3:
        mov %EDX, DWORD PTR [%ESP + 4]
        mov %EAX, DWORD PTR [%ESP + 8]
        ret

GCC produces this:

$ gcc t.c -masm=intel -O3 -S -o - -fomit-frame-pointer
..
f3:
        push    %ebx
        mov     %ebx, DWORD PTR [%esp+12]
        mov     %ecx, DWORD PTR [%esp+8]
        mov     %eax, %ebx
        mov     %edx, %ecx
        pop     %ebx
        ret

The Simple ISEL produces (eww gross):

f3:
        sub %ESP, 4
        mov DWORD PTR [%ESP], %ESI
        mov %EDX, DWORD PTR [%ESP + 8]
        mov %ECX, DWORD PTR [%ESP + 12]
        mov %EAX, 0
        mov %ESI, 0
        or %EAX, %ECX
        or %EDX, %ESI
        mov %ESI, DWORD PTR [%ESP]
        add %ESP, 4
        ret

llvm-svn: 19780
2005-01-23 04:39:44 +00:00
Chris Lattner
3bc78b2e0b More bugfixes for IA64 shifts.
llvm-svn: 19739
2005-01-22 00:33:03 +00:00
Chris Lattner
d637c96fac Add a nasty hack to fix Alpha/IA64 multiplies by a power of two.
llvm-svn: 19737
2005-01-22 00:20:42 +00:00
Chris Lattner
d53e763f18 Remove unneeded line.
llvm-svn: 19736
2005-01-21 23:43:12 +00:00
Chris Lattner
4f987bf16d test commit
llvm-svn: 19735
2005-01-21 23:38:56 +00:00
Chris Lattner
96e809c47d Unary token factor nodes are unneeded.
llvm-svn: 19727
2005-01-21 18:01:22 +00:00
Chris Lattner
1fe9b40981 implement add_parts/sub_parts.
llvm-svn: 19714
2005-01-20 18:50:55 +00:00
Chris Lattner
9b75e148fd Know some identities about tokenfactor nodes.
llvm-svn: 19699
2005-01-19 18:01:40 +00:00
Chris Lattner
32a5f02598 Know some simple identities. This improves codegen for (1LL << N).
llvm-svn: 19698
2005-01-19 17:29:49 +00:00
Chris Lattner
a9d53f9fb9 Keep track of the retval type as well.
llvm-svn: 19670
2005-01-18 19:26:36 +00:00
Chris Lattner
b07e2d2084 Allow setcc operations to have nonbool types.
llvm-svn: 19656
2005-01-18 02:52:03 +00:00
Chris Lattner
2b4b79581d Fix the completely broken FP constant folds for setcc's.
llvm-svn: 19651
2005-01-18 02:11:55 +00:00
Chris Lattner
16f64df93a Refactor code into a new method.
llvm-svn: 19635
2005-01-17 17:15:02 +00:00
Chris Lattner
4e550ebb38 Add assertions.
llvm-svn: 19596
2005-01-16 02:23:22 +00:00
Chris Lattner
0fe7776da5 Eliminate unneeded extensions.
llvm-svn: 19577
2005-01-16 00:17:20 +00:00