mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-16 19:56:38 +00:00
[BOLT][tests] Fix jrcxz instruction test (#95861)
Rewrite the test case intended to check that BOLT does not separate jrcxz instruction from its destination by more than a one-byte offset.
This commit is contained in:
parent
bea329ecb0
commit
c67ecf3853
@ -1,640 +1,33 @@
|
||||
## Test performs a BB reordering with unsupported
|
||||
## instruction jrcxz. Reordering works correctly with the
|
||||
## follow options: None, Normal or Reverse. Other strategies
|
||||
## are completed with Assertion `isIntN(Size * 8 + 1, Value).
|
||||
## The cause is the distance between BB where one contains
|
||||
## jrcxz instruction.
|
||||
## Example: OpenSSL
|
||||
## https://github.com/openssl/openssl/blob/master/crypto/bn/asm/x86_64-mont5.pl#L3319
|
||||
## Check that BOLT handles code with jrcxz instruction that has a one-byte
|
||||
## signed offset restriction. If we try to separate jrcxz instruction from its
|
||||
## destination, e.g. by placing it in a different code fragment, then the link
|
||||
## step will fail.
|
||||
|
||||
# REQUIRES: system-linux
|
||||
|
||||
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
|
||||
# RUN: %s -o %t.o
|
||||
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
|
||||
# RUN: link_fdata %s %t.o %t.fdata
|
||||
# RUN: %clang %cflags %t.o -falign-labels -march=native -o %t.exe -Wl,-q
|
||||
# RUN: llvm-strip --strip-unneeded %t.o
|
||||
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
|
||||
|
||||
# RUN: llvm-bolt %t.exe -o %t.bolted --data %t.fdata \
|
||||
# RUN: --reorder-blocks=ext-tsp --reorder-functions=hfsort \
|
||||
# RUN: --split-functions --split-all-cold --split-eh --dyno-stats \
|
||||
# RUN: --print-finalized 2>&1 | FileCheck %s
|
||||
## Disable relocation mode to leave main fragment in its original location.
|
||||
|
||||
# CHECK-NOT: value of -2105 is too large for field of 1 byte.
|
||||
# RUN: llvm-bolt %t.exe -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp \
|
||||
# RUN: --split-functions --relocs=0
|
||||
|
||||
.text
|
||||
.section .text.startup,"ax",@progbits
|
||||
.p2align 5,,31
|
||||
.globl main
|
||||
.type main, @function
|
||||
.text
|
||||
.globl main
|
||||
.type main,@function
|
||||
main:
|
||||
jmp bn_sqrx8x_internal
|
||||
|
||||
.globl bn_sqrx8x_internal
|
||||
.hidden bn_sqrx8x_internal
|
||||
.type bn_sqrx8x_internal,@function
|
||||
.align 32
|
||||
bn_sqrx8x_internal:
|
||||
__bn_sqrx8x_internal:
|
||||
# FDATA: 1 bn_from_mont8x 160 1 bn_sqrx8x_internal 0 0 56
|
||||
# FDATA: 1 bn_sqrx8x_internal 13 1 bn_sqrx8x_internal 40 0 60972
|
||||
# FDATA: 1 bn_sqrx8x_internal 5f 1 bn_sqrx8x_internal 2c 0 60972
|
||||
# FDATA: 1 bn_sqrx8x_internal 2f1 1 bn_sqrx8x_internal 500 0 60972
|
||||
# FDATA: 1 bn_sqrx8x_internal 34a 1 bn_sqrx8x_internal 360 0 60972
|
||||
# FDATA: 1 bn_sqrx8x_internal 411 1 bn_sqrx8x_internal 360 0 447888
|
||||
# FDATA: 1 bn_sqrx8x_internal 411 1 bn_sqrx8x_internal 417 0 63984
|
||||
# FDATA: 1 bn_sqrx8x_internal 427 1 bn_sqrx8x_internal 480 0 60972
|
||||
# FDATA: 1 bn_sqrx8x_internal 427 1 bn_sqrx8x_internal 429 0 3012
|
||||
# FDATA: 1 bn_sqrx8x_internal 467 1 bn_sqrx8x_internal 360 0 3012
|
||||
# FDATA: 1 bn_sqrx8x_internal 4ba 1 bn_sqrx8x_internal 80 0 58964
|
||||
# FDATA: 1 bn_sqrx8x_internal 4ba 1 bn_sqrx8x_internal 4c0 0 2008
|
||||
# FDATA: 1 bn_sqrx8x_internal 4fb 1 bn_sqrx8x_internal 80 0 2008
|
||||
# FDATA: 1 bn_sqrx8x_internal 5f0 1 bn_sqrx8x_internal 5f2 0 180908
|
||||
# FDATA: 1 bn_sqrx8x_internal 61b 1 bn_sqrx8x_internal 540 0 180908
|
||||
# FDATA: 1 bn_sqrx8x_internal 632 1 bn_sqrx8x_internal 637 0 59020
|
||||
# FDATA: 1 bn_sqrx8x_internal 657 1 bn_sqrx8x_internal 660 0 59020
|
||||
# FDATA: 1 bn_sqrx8x_internal 696 1 bn_sqrx8x_internal 6a0 0 120048
|
||||
# FDATA: 1 bn_sqrx8x_internal 75a 1 bn_sqrx8x_internal 6a0 0 840336
|
||||
# FDATA: 1 bn_sqrx8x_internal 75a 1 bn_sqrx8x_internal 760 0 120048
|
||||
# FDATA: 1 bn_sqrx8x_internal 768 1 bn_sqrx8x_internal 76e 0 120048
|
||||
# FDATA: 1 bn_sqrx8x_internal 7b2 1 bn_sqrx8x_internal 7c0 0 120048
|
||||
# FDATA: 1 bn_sqrx8x_internal 86e 1 bn_sqrx8x_internal 7c0 0 896560
|
||||
# FDATA: 1 bn_sqrx8x_internal 86e 1 bn_sqrx8x_internal 874 0 128080
|
||||
# FDATA: 1 bn_sqrx8x_internal 879 1 bn_sqrx8x_internal 8c0 0 120048
|
||||
# FDATA: 1 bn_sqrx8x_internal 879 1 bn_sqrx8x_internal 87b 0 8032
|
||||
# FDATA: 1 bn_sqrx8x_internal 8bb 1 bn_sqrx8x_internal 7c0 0 8032
|
||||
# FDATA: 1 bn_sqrx8x_internal 8e8 1 bn_sqrx8x_internal 8ed 0 120048
|
||||
# FDATA: 1 bn_sqrx8x_internal 955 1 bn_sqrx8x_internal 660 0 61028
|
||||
# FDATA: 1 bn_sqrx8x_internal 955 1 bn_sqrx8x_internal 95b 0 59020
|
||||
# FDATA: 0 [unknown] 0 1 bn_sqrx8x_internal 5f0 0 59020
|
||||
# FDATA: 0 [unknown] 0 1 main 0 0 1
|
||||
# FDATA: 1 main 0 1 main #.hot# 0 1
|
||||
.cfi_startproc
|
||||
leaq 48+8(%rsp),%rdi
|
||||
leaq (%rsi,%r9,1),%rbp
|
||||
movq %r9,0+8(%rsp)
|
||||
movq %rbp,8+8(%rsp)
|
||||
jmp .Lsqr8x_zero_start
|
||||
|
||||
.align 32
|
||||
.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
|
||||
.Lsqrx8x_zero:
|
||||
.byte 0x3e
|
||||
movdqa %xmm0,0(%rdi)
|
||||
movdqa %xmm0,16(%rdi)
|
||||
movdqa %xmm0,32(%rdi)
|
||||
movdqa %xmm0,48(%rdi)
|
||||
.Lsqr8x_zero_start:
|
||||
movdqa %xmm0,64(%rdi)
|
||||
movdqa %xmm0,80(%rdi)
|
||||
movdqa %xmm0,96(%rdi)
|
||||
movdqa %xmm0,112(%rdi)
|
||||
leaq 128(%rdi),%rdi
|
||||
subq $64,%r9
|
||||
jnz .Lsqrx8x_zero
|
||||
|
||||
movq 0(%rsi),%rdx
|
||||
|
||||
xorq %r10,%r10
|
||||
xorq %r11,%r11
|
||||
xorq %r12,%r12
|
||||
xorq %r13,%r13
|
||||
xorq %r14,%r14
|
||||
xorq %r15,%r15
|
||||
leaq 48+8(%rsp),%rdi
|
||||
xorq %rbp,%rbp
|
||||
jmp .Lsqrx8x_outer_loop
|
||||
|
||||
.align 32
|
||||
.Lsqrx8x_outer_loop:
|
||||
mulxq 8(%rsi),%r8,%rax
|
||||
adcxq %r9,%r8
|
||||
adoxq %rax,%r10
|
||||
mulxq 16(%rsi),%r9,%rax
|
||||
adcxq %r10,%r9
|
||||
adoxq %rax,%r11
|
||||
.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00
|
||||
adcxq %r11,%r10
|
||||
adoxq %rax,%r12
|
||||
.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00
|
||||
adcxq %r12,%r11
|
||||
adoxq %rax,%r13
|
||||
mulxq 40(%rsi),%r12,%rax
|
||||
adcxq %r13,%r12
|
||||
adoxq %rax,%r14
|
||||
mulxq 48(%rsi),%r13,%rax
|
||||
adcxq %r14,%r13
|
||||
adoxq %r15,%rax
|
||||
mulxq 56(%rsi),%r14,%r15
|
||||
movq 8(%rsi),%rdx
|
||||
adcxq %rax,%r14
|
||||
adoxq %rbp,%r15
|
||||
adcq 64(%rdi),%r15
|
||||
movq %r8,8(%rdi)
|
||||
movq %r9,16(%rdi)
|
||||
sbbq %rcx,%rcx
|
||||
xorq %rbp,%rbp
|
||||
|
||||
mulxq 16(%rsi),%r8,%rbx
|
||||
mulxq 24(%rsi),%r9,%rax
|
||||
adcxq %r10,%r8
|
||||
adoxq %rbx,%r9
|
||||
mulxq 32(%rsi),%r10,%rbx
|
||||
adcxq %r11,%r9
|
||||
adoxq %rax,%r10
|
||||
.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00
|
||||
adcxq %r12,%r10
|
||||
adoxq %rbx,%r11
|
||||
.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00
|
||||
adcxq %r13,%r11
|
||||
adoxq %r14,%r12
|
||||
.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00
|
||||
movq 16(%rsi),%rdx
|
||||
adcxq %rax,%r12
|
||||
adoxq %rbx,%r13
|
||||
adcxq %r15,%r13
|
||||
adoxq %rbp,%r14
|
||||
adcxq %rbp,%r14
|
||||
|
||||
movq %r8,24(%rdi)
|
||||
movq %r9,32(%rdi)
|
||||
|
||||
mulxq 24(%rsi),%r8,%rbx
|
||||
mulxq 32(%rsi),%r9,%rax
|
||||
adcxq %r10,%r8
|
||||
adoxq %rbx,%r9
|
||||
mulxq 40(%rsi),%r10,%rbx
|
||||
adcxq %r11,%r9
|
||||
adoxq %rax,%r10
|
||||
.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00
|
||||
adcxq %r12,%r10
|
||||
adoxq %r13,%r11
|
||||
.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00
|
||||
.byte 0x3e
|
||||
movq 24(%rsi),%rdx
|
||||
adcxq %rbx,%r11
|
||||
adoxq %rax,%r12
|
||||
adcxq %r14,%r12
|
||||
movq %r8,40(%rdi)
|
||||
movq %r9,48(%rdi)
|
||||
mulxq 32(%rsi),%r8,%rax
|
||||
adoxq %rbp,%r13
|
||||
adcxq %rbp,%r13
|
||||
|
||||
mulxq 40(%rsi),%r9,%rbx
|
||||
adcxq %r10,%r8
|
||||
adoxq %rax,%r9
|
||||
mulxq 48(%rsi),%r10,%rax
|
||||
adcxq %r11,%r9
|
||||
adoxq %r12,%r10
|
||||
mulxq 56(%rsi),%r11,%r12
|
||||
movq 32(%rsi),%rdx
|
||||
movq 40(%rsi),%r14
|
||||
adcxq %rbx,%r10
|
||||
adoxq %rax,%r11
|
||||
movq 48(%rsi),%r15
|
||||
adcxq %r13,%r11
|
||||
adoxq %rbp,%r12
|
||||
adcxq %rbp,%r12
|
||||
|
||||
movq %r8,56(%rdi)
|
||||
movq %r9,64(%rdi)
|
||||
|
||||
mulxq %r14,%r9,%rax
|
||||
movq 56(%rsi),%r8
|
||||
adcxq %r10,%r9
|
||||
mulxq %r15,%r10,%rbx
|
||||
adoxq %rax,%r10
|
||||
adcxq %r11,%r10
|
||||
mulxq %r8,%r11,%rax
|
||||
movq %r14,%rdx
|
||||
adoxq %rbx,%r11
|
||||
adcxq %r12,%r11
|
||||
|
||||
adcxq %rbp,%rax
|
||||
|
||||
mulxq %r15,%r14,%rbx
|
||||
mulxq %r8,%r12,%r13
|
||||
movq %r15,%rdx
|
||||
leaq 64(%rsi),%rsi
|
||||
adcxq %r14,%r11
|
||||
adoxq %rbx,%r12
|
||||
adcxq %rax,%r12
|
||||
adoxq %rbp,%r13
|
||||
|
||||
.byte 0x67,0x67
|
||||
mulxq %r8,%r8,%r14
|
||||
adcxq %r8,%r13
|
||||
adcxq %rbp,%r14
|
||||
|
||||
cmpq 8+8(%rsp),%rsi
|
||||
je .Lsqrx8x_outer_break
|
||||
|
||||
negq %rcx
|
||||
movq $-8,%rcx
|
||||
movq %rbp,%r15
|
||||
movq 64(%rdi),%r8
|
||||
adcxq 72(%rdi),%r9
|
||||
adcxq 80(%rdi),%r10
|
||||
adcxq 88(%rdi),%r11
|
||||
adcq 96(%rdi),%r12
|
||||
adcq 104(%rdi),%r13
|
||||
adcq 112(%rdi),%r14
|
||||
adcq 120(%rdi),%r15
|
||||
leaq (%rsi),%rbp
|
||||
leaq 128(%rdi),%rdi
|
||||
sbbq %rax,%rax
|
||||
|
||||
movq -64(%rsi),%rdx
|
||||
movq %rax,16+8(%rsp)
|
||||
movq %rdi,24+8(%rsp)
|
||||
|
||||
jrcxz .Lcold
|
||||
.hot:
|
||||
ret
|
||||
|
||||
.Lcold:
|
||||
xorl %eax,%eax
|
||||
jmp .Lsqrx8x_loop
|
||||
|
||||
.align 32
|
||||
.Lsqrx8x_loop:
|
||||
movq %r8,%rbx
|
||||
mulxq 0(%rbp),%rax,%r8
|
||||
adcxq %rax,%rbx
|
||||
adoxq %r9,%r8
|
||||
|
||||
mulxq 8(%rbp),%rax,%r9
|
||||
adcxq %rax,%r8
|
||||
adoxq %r10,%r9
|
||||
|
||||
mulxq 16(%rbp),%rax,%r10
|
||||
adcxq %rax,%r9
|
||||
adoxq %r11,%r10
|
||||
|
||||
mulxq 24(%rbp),%rax,%r11
|
||||
adcxq %rax,%r10
|
||||
adoxq %r12,%r11
|
||||
|
||||
.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
|
||||
adcxq %rax,%r11
|
||||
adoxq %r13,%r12
|
||||
|
||||
mulxq 40(%rbp),%rax,%r13
|
||||
adcxq %rax,%r12
|
||||
adoxq %r14,%r13
|
||||
|
||||
mulxq 48(%rbp),%rax,%r14
|
||||
movq %rbx,(%rdi,%rcx,8)
|
||||
movl $0,%ebx
|
||||
adcxq %rax,%r13
|
||||
adoxq %r15,%r14
|
||||
|
||||
.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00
|
||||
movq 8(%rsi,%rcx,8),%rdx
|
||||
adcxq %rax,%r14
|
||||
adoxq %rbx,%r15
|
||||
adcxq %rbx,%r15
|
||||
|
||||
.byte 0x67
|
||||
incq %rcx
|
||||
jnz .Lsqrx8x_loop
|
||||
|
||||
leaq 64(%rbp),%rbp
|
||||
movq $-8,%rcx
|
||||
cmpq 8+8(%rsp),%rbp
|
||||
je .Lsqrx8x_break
|
||||
|
||||
subq 16+8(%rsp),%rbx
|
||||
.byte 0x66
|
||||
movq -64(%rsi),%rdx
|
||||
adcxq 0(%rdi),%r8
|
||||
adcxq 8(%rdi),%r9
|
||||
adcq 16(%rdi),%r10
|
||||
adcq 24(%rdi),%r11
|
||||
adcq 32(%rdi),%r12
|
||||
adcq 40(%rdi),%r13
|
||||
adcq 48(%rdi),%r14
|
||||
adcq 56(%rdi),%r15
|
||||
leaq 64(%rdi),%rdi
|
||||
.byte 0x67
|
||||
sbbq %rax,%rax
|
||||
xorl %ebx,%ebx
|
||||
movq %rax,16+8(%rsp)
|
||||
jmp .Lsqrx8x_loop
|
||||
|
||||
.align 32
|
||||
.Lsqrx8x_break:
|
||||
xorq %rbp,%rbp
|
||||
subq 16+8(%rsp),%rbx
|
||||
adcxq %rbp,%r8
|
||||
movq 24+8(%rsp),%rcx
|
||||
adcxq %rbp,%r9
|
||||
movq 0(%rsi),%rdx
|
||||
adcq $0,%r10
|
||||
movq %r8,0(%rdi)
|
||||
adcq $0,%r11
|
||||
adcq $0,%r12
|
||||
adcq $0,%r13
|
||||
adcq $0,%r14
|
||||
adcq $0,%r15
|
||||
cmpq %rcx,%rdi
|
||||
je .Lsqrx8x_outer_loop
|
||||
|
||||
movq %r9,8(%rdi)
|
||||
movq 8(%rcx),%r9
|
||||
movq %r10,16(%rdi)
|
||||
movq 16(%rcx),%r10
|
||||
movq %r11,24(%rdi)
|
||||
movq 24(%rcx),%r11
|
||||
movq %r12,32(%rdi)
|
||||
movq 32(%rcx),%r12
|
||||
movq %r13,40(%rdi)
|
||||
movq 40(%rcx),%r13
|
||||
movq %r14,48(%rdi)
|
||||
movq 48(%rcx),%r14
|
||||
movq %r15,56(%rdi)
|
||||
movq 56(%rcx),%r15
|
||||
movq %rcx,%rdi
|
||||
jmp .Lsqrx8x_outer_loop
|
||||
|
||||
.align 32
|
||||
.Lsqrx8x_outer_break:
|
||||
movq %r9,72(%rdi)
|
||||
.byte 102,72,15,126,217
|
||||
movq %r10,80(%rdi)
|
||||
movq %r11,88(%rdi)
|
||||
movq %r12,96(%rdi)
|
||||
movq %r13,104(%rdi)
|
||||
movq %r14,112(%rdi)
|
||||
leaq 48+8(%rsp),%rdi
|
||||
movq (%rsi,%rcx,1),%rdx
|
||||
|
||||
movq 8(%rdi),%r11
|
||||
xorq %r10,%r10
|
||||
movq 0+8(%rsp),%r9
|
||||
adoxq %r11,%r11
|
||||
movq 16(%rdi),%r12
|
||||
movq 24(%rdi),%r13
|
||||
|
||||
.align 32
|
||||
.Lsqrx4x_shift_n_add:
|
||||
mulxq %rdx,%rax,%rbx
|
||||
adoxq %r12,%r12
|
||||
adcxq %r10,%rax
|
||||
.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00
|
||||
.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00
|
||||
adoxq %r13,%r13
|
||||
adcxq %r11,%rbx
|
||||
movq 40(%rdi),%r11
|
||||
movq %rax,0(%rdi)
|
||||
movq %rbx,8(%rdi)
|
||||
|
||||
mulxq %rdx,%rax,%rbx
|
||||
adoxq %r10,%r10
|
||||
adcxq %r12,%rax
|
||||
movq 16(%rsi,%rcx,1),%rdx
|
||||
movq 48(%rdi),%r12
|
||||
adoxq %r11,%r11
|
||||
adcxq %r13,%rbx
|
||||
movq 56(%rdi),%r13
|
||||
movq %rax,16(%rdi)
|
||||
movq %rbx,24(%rdi)
|
||||
|
||||
mulxq %rdx,%rax,%rbx
|
||||
adoxq %r12,%r12
|
||||
adcxq %r10,%rax
|
||||
movq 24(%rsi,%rcx,1),%rdx
|
||||
leaq 32(%rcx),%rcx
|
||||
movq 64(%rdi),%r10
|
||||
adoxq %r13,%r13
|
||||
adcxq %r11,%rbx
|
||||
movq 72(%rdi),%r11
|
||||
movq %rax,32(%rdi)
|
||||
movq %rbx,40(%rdi)
|
||||
|
||||
mulxq %rdx,%rax,%rbx
|
||||
adoxq %r10,%r10
|
||||
adcxq %r12,%rax
|
||||
jrcxz .Lsqrx4x_shift_n_add_break
|
||||
.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00
|
||||
adoxq %r11,%r11
|
||||
adcxq %r13,%rbx
|
||||
movq 80(%rdi),%r12
|
||||
movq 88(%rdi),%r13
|
||||
movq %rax,48(%rdi)
|
||||
movq %rbx,56(%rdi)
|
||||
leaq 64(%rdi),%rdi
|
||||
nop
|
||||
jmp .Lsqrx4x_shift_n_add
|
||||
|
||||
.align 32
|
||||
.Lsqrx4x_shift_n_add_break:
|
||||
adcxq %r13,%rbx
|
||||
movq %rax,48(%rdi)
|
||||
movq %rbx,56(%rdi)
|
||||
leaq 64(%rdi),%rdi
|
||||
.byte 102,72,15,126,213
|
||||
__bn_sqrx8x_reduction:
|
||||
xorl %eax,%eax
|
||||
movq 32+8(%rsp),%rbx
|
||||
movq 48+8(%rsp),%rdx
|
||||
leaq -64(%rbp,%r9,1),%rcx
|
||||
|
||||
movq %rcx,0+8(%rsp)
|
||||
movq %rdi,8+8(%rsp)
|
||||
|
||||
leaq 48+8(%rsp),%rdi
|
||||
jmp .Lsqrx8x_reduction_loop
|
||||
|
||||
.align 32
|
||||
.Lsqrx8x_reduction_loop:
|
||||
movq 8(%rdi),%r9
|
||||
movq 16(%rdi),%r10
|
||||
movq 24(%rdi),%r11
|
||||
movq 32(%rdi),%r12
|
||||
movq %rdx,%r8
|
||||
imulq %rbx,%rdx
|
||||
movq 40(%rdi),%r13
|
||||
movq 48(%rdi),%r14
|
||||
movq 56(%rdi),%r15
|
||||
movq %rax,24+8(%rsp)
|
||||
|
||||
leaq 64(%rdi),%rdi
|
||||
xorq %rsi,%rsi
|
||||
movq $-8,%rcx
|
||||
jmp .Lsqrx8x_reduce
|
||||
|
||||
.align 32
|
||||
.Lsqrx8x_reduce:
|
||||
movq %r8,%rbx
|
||||
mulxq 0(%rbp),%rax,%r8
|
||||
adcxq %rbx,%rax
|
||||
adoxq %r9,%r8
|
||||
|
||||
mulxq 8(%rbp),%rbx,%r9
|
||||
adcxq %rbx,%r8
|
||||
adoxq %r10,%r9
|
||||
|
||||
mulxq 16(%rbp),%rbx,%r10
|
||||
adcxq %rbx,%r9
|
||||
adoxq %r11,%r10
|
||||
|
||||
mulxq 24(%rbp),%rbx,%r11
|
||||
adcxq %rbx,%r10
|
||||
adoxq %r12,%r11
|
||||
|
||||
.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
|
||||
movq %rdx,%rax
|
||||
movq %r8,%rdx
|
||||
adcxq %rbx,%r11
|
||||
adoxq %r13,%r12
|
||||
|
||||
mulxq 32+8(%rsp),%rbx,%rdx
|
||||
movq %rax,%rdx
|
||||
movq %rax,64+48+8(%rsp,%rcx,8)
|
||||
|
||||
mulxq 40(%rbp),%rax,%r13
|
||||
adcxq %rax,%r12
|
||||
adoxq %r14,%r13
|
||||
|
||||
mulxq 48(%rbp),%rax,%r14
|
||||
adcxq %rax,%r13
|
||||
adoxq %r15,%r14
|
||||
|
||||
mulxq 56(%rbp),%rax,%r15
|
||||
movq %rbx,%rdx
|
||||
adcxq %rax,%r14
|
||||
adoxq %rsi,%r15
|
||||
adcxq %rsi,%r15
|
||||
|
||||
.byte 0x67,0x67,0x67
|
||||
incq %rcx
|
||||
jnz .Lsqrx8x_reduce
|
||||
|
||||
movq %rsi,%rax
|
||||
cmpq 0+8(%rsp),%rbp
|
||||
jae .Lsqrx8x_no_tail
|
||||
|
||||
movq 48+8(%rsp),%rdx
|
||||
addq 0(%rdi),%r8
|
||||
leaq 64(%rbp),%rbp
|
||||
movq $-8,%rcx
|
||||
adcxq 8(%rdi),%r9
|
||||
adcxq 16(%rdi),%r10
|
||||
adcq 24(%rdi),%r11
|
||||
adcq 32(%rdi),%r12
|
||||
adcq 40(%rdi),%r13
|
||||
adcq 48(%rdi),%r14
|
||||
adcq 56(%rdi),%r15
|
||||
leaq 64(%rdi),%rdi
|
||||
sbbq %rax,%rax
|
||||
|
||||
xorq %rsi,%rsi
|
||||
movq %rax,16+8(%rsp)
|
||||
jmp .Lsqrx8x_tail
|
||||
|
||||
.align 32
|
||||
.Lsqrx8x_tail:
|
||||
movq %r8,%rbx
|
||||
mulxq 0(%rbp),%rax,%r8
|
||||
adcxq %rax,%rbx
|
||||
adoxq %r9,%r8
|
||||
|
||||
mulxq 8(%rbp),%rax,%r9
|
||||
adcxq %rax,%r8
|
||||
adoxq %r10,%r9
|
||||
|
||||
mulxq 16(%rbp),%rax,%r10
|
||||
adcxq %rax,%r9
|
||||
adoxq %r11,%r10
|
||||
|
||||
mulxq 24(%rbp),%rax,%r11
|
||||
adcxq %rax,%r10
|
||||
adoxq %r12,%r11
|
||||
|
||||
.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
|
||||
adcxq %rax,%r11
|
||||
adoxq %r13,%r12
|
||||
|
||||
mulxq 40(%rbp),%rax,%r13
|
||||
adcxq %rax,%r12
|
||||
adoxq %r14,%r13
|
||||
|
||||
mulxq 48(%rbp),%rax,%r14
|
||||
adcxq %rax,%r13
|
||||
adoxq %r15,%r14
|
||||
|
||||
mulxq 56(%rbp),%rax,%r15
|
||||
movq 72+48+8(%rsp,%rcx,8),%rdx
|
||||
adcxq %rax,%r14
|
||||
adoxq %rsi,%r15
|
||||
movq %rbx,(%rdi,%rcx,8)
|
||||
movq %r8,%rbx
|
||||
adcxq %rsi,%r15
|
||||
|
||||
incq %rcx
|
||||
jnz .Lsqrx8x_tail
|
||||
|
||||
cmpq 0+8(%rsp),%rbp
|
||||
jae .Lsqrx8x_tail_done
|
||||
|
||||
subq 16+8(%rsp),%rsi
|
||||
movq 48+8(%rsp),%rdx
|
||||
leaq 64(%rbp),%rbp
|
||||
adcq 0(%rdi),%r8
|
||||
adcq 8(%rdi),%r9
|
||||
adcq 16(%rdi),%r10
|
||||
adcq 24(%rdi),%r11
|
||||
adcq 32(%rdi),%r12
|
||||
adcq 40(%rdi),%r13
|
||||
adcq 48(%rdi),%r14
|
||||
adcq 56(%rdi),%r15
|
||||
leaq 64(%rdi),%rdi
|
||||
sbbq %rax,%rax
|
||||
subq $8,%rcx
|
||||
|
||||
xorq %rsi,%rsi
|
||||
movq %rax,16+8(%rsp)
|
||||
jmp .Lsqrx8x_tail
|
||||
|
||||
.align 32
|
||||
.Lsqrx8x_tail_done:
|
||||
xorq %rax,%rax
|
||||
addq 24+8(%rsp),%r8
|
||||
adcq $0,%r9
|
||||
adcq $0,%r10
|
||||
adcq $0,%r11
|
||||
adcq $0,%r12
|
||||
adcq $0,%r13
|
||||
adcq $0,%r14
|
||||
adcq $0,%r15
|
||||
adcq $0,%rax
|
||||
|
||||
subq 16+8(%rsp),%rsi
|
||||
.Lsqrx8x_no_tail:
|
||||
adcq 0(%rdi),%r8
|
||||
.byte 102,72,15,126,217
|
||||
adcq 8(%rdi),%r9
|
||||
movq 56(%rbp),%rsi
|
||||
.byte 102,72,15,126,213
|
||||
adcq 16(%rdi),%r10
|
||||
adcq 24(%rdi),%r11
|
||||
adcq 32(%rdi),%r12
|
||||
adcq 40(%rdi),%r13
|
||||
adcq 48(%rdi),%r14
|
||||
adcq 56(%rdi),%r15
|
||||
adcq $0,%rax
|
||||
|
||||
movq 32+8(%rsp),%rbx
|
||||
movq 64(%rdi,%rcx,1),%rdx
|
||||
|
||||
movq %r8,0(%rdi)
|
||||
leaq 64(%rdi),%r8
|
||||
movq %r9,8(%rdi)
|
||||
movq %r10,16(%rdi)
|
||||
movq %r11,24(%rdi)
|
||||
movq %r12,32(%rdi)
|
||||
movq %r13,40(%rdi)
|
||||
movq %r14,48(%rdi)
|
||||
movq %r15,56(%rdi)
|
||||
|
||||
leaq 64(%rdi,%rcx,1),%rdi
|
||||
cmpq 8+8(%rsp),%r8
|
||||
jb .Lsqrx8x_reduction_loop
|
||||
.byte 0xf3,0xc3
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size bn_sqrx8x_internal,.-bn_sqrx8x_internal
|
||||
.size main,.-main
|
||||
|
Loading…
x
Reference in New Issue
Block a user