security/nss/lib/freebl/arcfour-amd64-sun.s

branch
TOR_BUG_9701
changeset 15
b8a032363ba2
equal deleted inserted replaced
-1:000000000000 0:5eb1e2ab89eb
1 / This Source Code Form is subject to the terms of the Mozilla Public
2 / License, v. 2.0. If a copy of the MPL was not distributed with this
3 / file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5 / ** ARCFOUR implementation optimized for AMD64.
6 / **
7 / ** The throughput achieved by this code is about 320 MBytes/sec, on
8 / ** a 1.8 GHz AMD Opteron (rev C0) processor.
9
10 .text
11 .align 16
12 .globl ARCFOUR
13 .type ARCFOUR,@function
14 ARCFOUR:
15 pushq %rbp
16 pushq %rbx
17 movq %rdi, %rbp / key = ARG(key)
18 movq %rsi, %rbx / rbx = ARG(len)
19 movq %rdx, %rsi / in = ARG(in)
20 movq %rcx, %rdi / out = ARG(out)
21 movq (%rbp), %rcx / x = key->x
22 movq 8(%rbp), %rdx / y = key->y
23 addq $16, %rbp / d = key->data
24 incq %rcx / x++
25 andq $255, %rcx / x &= 0xff
26 leaq -8(%rbx,%rsi), %rbx / rbx = in+len-8
27 movq %rbx, %r9 / tmp = in+len-8
28 movq 0(%rbp,%rcx,8), %rax / tx = d[x]
29 cmpq %rsi, %rbx / cmp in with in+len-8
30 jl .Lend / jump if (in+len-8 < in)
31
32 .Lstart:
33 addq $8, %rsi / increment in
34 addq $8, %rdi / increment out
35
36 / generate the next 8 bytes of the rc4 stream into %r8
37 movq $8, %r11 / byte counter
38 1: addb %al, %dl / y += tx
39 movl 0(%rbp,%rdx,8), %ebx / ty = d[y]
40 movl %ebx, 0(%rbp,%rcx,8) / d[x] = ty
41 addb %al, %bl / val = ty + tx
42 movl %eax, 0(%rbp,%rdx,8) / d[y] = tx
43 incb %cl / x++ (NEXT ROUND)
44 movl 0(%rbp,%rcx,8), %eax / tx = d[x] (NEXT ROUND)
45 movb 0(%rbp,%rbx,8), %r8b / val = d[val]
46 decb %r11b
47 rorq $8, %r8 / (ror does not change ZF)
48 jnz 1b
49
50 / xor 8 bytes
51 xorq -8(%rsi), %r8
52 cmpq %r9, %rsi / cmp in+len-8 with in
53 movq %r8, -8(%rdi)
54 jle .Lstart / jump if (in <= in+len-8)
55
56 .Lend:
57 addq $8, %r9 / tmp = in+len
58
59 / handle the last bytes, one by one
60 1: cmpq %rsi, %r9 / cmp in with in+len
61 jle .Lfinished / jump if (in+len <= in)
62 addb %al, %dl / y += tx
63 movl 0(%rbp,%rdx,8), %ebx / ty = d[y]
64 movl %ebx, 0(%rbp,%rcx,8) / d[x] = ty
65 addb %al, %bl / val = ty + tx
66 movl %eax, 0(%rbp,%rdx,8) / d[y] = tx
67 incb %cl / x++ (NEXT ROUND)
68 movl 0(%rbp,%rcx,8), %eax / tx = d[x] (NEXT ROUND)
69 movb 0(%rbp,%rbx,8), %r8b / val = d[val]
70 xorb (%rsi), %r8b / xor 1 byte
71 movb %r8b, (%rdi)
72 incq %rsi / in++
73 incq %rdi / out++
74 jmp 1b
75
76 .Lfinished:
77 decq %rcx / x--
78 movb %dl, -8(%rbp) / key->y = y
79 movb %cl, -16(%rbp) / key->x = x
80 popq %rbx
81 popq %rbp
82 ret
83 .L_ARCFOUR_end:
84 .size ARCFOUR,.L_ARCFOUR_end-ARCFOUR

mercurial