security/nss/lib/freebl/arcfour-amd64-gas.s

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 # This Source Code Form is subject to the terms of the Mozilla Public
michael@0 2 # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0 4
michael@0 5 # ** ARCFOUR implementation optimized for AMD64.
michael@0 6 # **
michael@0 7 # ** The throughput achieved by this code is about 320 MBytes/sec, on
michael@0 8 # ** a 1.8 GHz AMD Opteron (rev C0) processor.
michael@0 9
michael@0 10 .text
michael@0 11 .align 16
michael@0 12 .globl ARCFOUR
michael@0 13 .type ARCFOUR,@function
michael@0 14 ARCFOUR:
michael@0 15 pushq %rbp
michael@0 16 pushq %rbx
michael@0 17 movq %rdi, %rbp # key = ARG(key)
michael@0 18 movq %rsi, %rbx # rbx = ARG(len)
michael@0 19 movq %rdx, %rsi # in = ARG(in)
michael@0 20 movq %rcx, %rdi # out = ARG(out)
michael@0 21 movq (%rbp), %rcx # x = key->x
michael@0 22 movq 8(%rbp), %rdx # y = key->y
michael@0 23 addq $16, %rbp # d = key->data
michael@0 24 incq %rcx # x++
michael@0 25 andq $255, %rcx # x &= 0xff
michael@0 26 leaq -8(%rbx,%rsi), %rbx # rbx = in+len-8
michael@0 27 movq %rbx, %r9 # tmp = in+len-8
michael@0 28 movq 0(%rbp,%rcx,8), %rax # tx = d[x]
michael@0 29 cmpq %rsi, %rbx # cmp in with in+len-8
michael@0 30 jl .Lend # jump if (in+len-8 < in)
michael@0 31
michael@0 32 .Lstart:
michael@0 33 addq $8, %rsi # increment in
michael@0 34 addq $8, %rdi # increment out
michael@0 35
michael@0 36 # generate the next 8 bytes of the rc4 stream into %r8
michael@0 37 movq $8, %r11 # byte counter
michael@0 38 1: addb %al, %dl # y += tx
michael@0 39 movl 0(%rbp,%rdx,8), %ebx # ty = d[y]
michael@0 40 movl %ebx, 0(%rbp,%rcx,8) # d[x] = ty
michael@0 41 addb %al, %bl # val = ty + tx
michael@0 42 movl %eax, 0(%rbp,%rdx,8) # d[y] = tx
michael@0 43 incb %cl # x++ (NEXT ROUND)
michael@0 44 movl 0(%rbp,%rcx,8), %eax # tx = d[x] (NEXT ROUND)
michael@0 45 movb 0(%rbp,%rbx,8), %r8b # val = d[val]
michael@0 46 decb %r11b
michael@0 47 rorq $8, %r8 # (ror does not change ZF)
michael@0 48 jnz 1b
michael@0 49
michael@0 50 # xor 8 bytes
michael@0 51 xorq -8(%rsi), %r8
michael@0 52 cmpq %r9, %rsi # cmp in+len-8 with in
michael@0 53 movq %r8, -8(%rdi)
michael@0 54 jle .Lstart # jump if (in <= in+len-8)
michael@0 55
michael@0 56 .Lend:
michael@0 57 addq $8, %r9 # tmp = in+len
michael@0 58
michael@0 59 # handle the last bytes, one by one
michael@0 60 1: cmpq %rsi, %r9 # cmp in with in+len
michael@0 61 jle .Lfinished # jump if (in+len <= in)
michael@0 62 addb %al, %dl # y += tx
michael@0 63 movl 0(%rbp,%rdx,8), %ebx # ty = d[y]
michael@0 64 movl %ebx, 0(%rbp,%rcx,8) # d[x] = ty
michael@0 65 addb %al, %bl # val = ty + tx
michael@0 66 movl %eax, 0(%rbp,%rdx,8) # d[y] = tx
michael@0 67 incb %cl # x++ (NEXT ROUND)
michael@0 68 movl 0(%rbp,%rcx,8), %eax # tx = d[x] (NEXT ROUND)
michael@0 69 movb 0(%rbp,%rbx,8), %r8b # val = d[val]
michael@0 70 xorb (%rsi), %r8b # xor 1 byte
michael@0 71 movb %r8b, (%rdi)
michael@0 72 incq %rsi # in++
michael@0 73 incq %rdi # out++
michael@0 74 jmp 1b
michael@0 75
michael@0 76 .Lfinished:
michael@0 77 decq %rcx # x--
michael@0 78 movb %dl, -8(%rbp) # key->y = y
michael@0 79 movb %cl, -16(%rbp) # key->x = x
michael@0 80 popq %rbx
michael@0 81 popq %rbp
michael@0 82 ret
michael@0 83 .L_ARCFOUR_end:
michael@0 84 .size ARCFOUR,.L_ARCFOUR_end-ARCFOUR
michael@0 85
michael@0 86 # Magic indicating no need for an executable stack
michael@0 87 .section .note.GNU-stack,"",@progbits
michael@0 88 .previous

mercurial