security/nss/lib/freebl/arcfour-amd64-masm.asm

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 ; This Source Code Form is subject to the terms of the Mozilla Public
michael@0 2 ; License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 3 ; file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0 4
michael@0 5 ; ** ARCFOUR implementation optimized for AMD64.
michael@0 6 ; **
michael@0 7 ; ** The throughput achieved by this code is about 320 MBytes/sec, on
michael@0 8 ; ** a 1.8 GHz AMD Opteron (rev C0) processor.
michael@0 9
michael@0 10 .CODE
michael@0 11
michael@0 12 ; extern void ARCFOUR(RC4Context *cx, unsigned long long inputLen,
michael@0 13 ; const unsigned char *input, unsigned char *output);
michael@0 14
michael@0 15
michael@0 16 ARCFOUR PROC
michael@0 17
michael@0 18 push rbp
michael@0 19 push rbx
michael@0 20 push rsi
michael@0 21 push rdi
michael@0 22
michael@0 23 mov rbp, rcx ; key = ARG(key)
michael@0 24 mov rbx, rdx ; rbx = ARG(len)
michael@0 25 mov rsi, r8 ; in = ARG(in)
michael@0 26 mov rdi, r9 ; out = ARG(out)
michael@0 27 mov rcx, [rbp] ; x = key->x
michael@0 28 mov rdx, [rbp+8] ; y = key->y
michael@0 29 add rbp, 16 ; d = key->data
michael@0 30 inc rcx ; x++
michael@0 31 and rcx, 0ffh ; x &= 0xff
michael@0 32 lea rbx, [rbx+rsi-8] ; rbx = in+len-8
michael@0 33 mov r9, rbx ; tmp = in+len-8
michael@0 34 mov rax, [rbp+rcx*8] ; tx = d[x]
michael@0 35 cmp rbx, rsi ; cmp in with in+len-8
michael@0 36 jl Lend ; jump if (in+len-8 < in)
michael@0 37
michael@0 38 Lstart:
michael@0 39 add rsi, 8 ; increment in
michael@0 40 add rdi, 8 ; increment out
michael@0 41
michael@0 42 ;
michael@0 43 ; generate the next 8 bytes of the rc4 stream into r8
michael@0 44 ;
michael@0 45
michael@0 46 mov r11, 8 ; byte counter
michael@0 47
michael@0 48 @@:
michael@0 49 add dl, al ; y += tx
michael@0 50 mov ebx, [rbp+rdx*8] ; ty = d[y]
michael@0 51 mov [rbp+rcx*8], ebx ; d[x] = ty
michael@0 52 add bl, al ; val = ty + tx
michael@0 53 mov [rbp+rdx*8], eax ; d[y] = tx
michael@0 54 inc cl ; x++ (NEXT ROUND)
michael@0 55 mov eax, [rbp+rcx*8] ; tx = d[x] (NEXT ROUND)
michael@0 56 mov r8b, [rbp+rbx*8] ; val = d[val]
michael@0 57 dec r11b
michael@0 58 ror r8, 8 ; (ror does not change ZF)
michael@0 59 jnz @b
michael@0 60
michael@0 61 ;
michael@0 62 ; xor 8 bytes
michael@0 63 ;
michael@0 64
michael@0 65 xor r8, [rsi-8]
michael@0 66 cmp rsi, r9 ; cmp in+len-8 with in
michael@0 67 mov [rdi-8], r8
michael@0 68 jle Lstart
michael@0 69
michael@0 70 Lend:
michael@0 71 add r9, 8 ; tmp = in+len
michael@0 72
michael@0 73 ;
michael@0 74 ; handle the last bytes, one by one
michael@0 75 ;
michael@0 76
michael@0 77 @@:
michael@0 78 cmp r9, rsi ; cmp in with in+len
michael@0 79 jle Lfinished ; jump if (in+len <= in)
michael@0 80 add dl, al ; y += tx
michael@0 81 mov ebx, [rbp+rdx*8] ; ty = d[y]
michael@0 82 mov [rbp+rcx*8], ebx ; d[x] = ty
michael@0 83 add bl, al ; val = ty + tx
michael@0 84 mov [rbp+rdx*8], eax ; d[y] = tx
michael@0 85 inc cl ; x++ (NEXT ROUND)
michael@0 86 mov eax, [rbp+rcx*8] ; tx = d[x] (NEXT ROUND)
michael@0 87 mov r8b, [rbp+rbx*8] ; val = d[val]
michael@0 88 xor r8b, [rsi] ; xor 1 byte
michael@0 89 mov [rdi], r8b
michael@0 90 inc rsi ; in++
michael@0 91 inc rdi
michael@0 92 jmp @b
michael@0 93
michael@0 94 Lfinished:
michael@0 95 dec rcx ; x--
michael@0 96 mov [rbp-8], dl ; key->y = y
michael@0 97 mov [rbp-16], cl ; key->x = x
michael@0 98
michael@0 99 pop rdi
michael@0 100 pop rsi
michael@0 101 pop rbx
michael@0 102 pop rbp
michael@0 103 ret
michael@0 104
michael@0 105 ARCFOUR ENDP
michael@0 106
michael@0 107 END

mercurial