security/nss/lib/freebl/arcfour-amd64-masm.asm

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 ; This Source Code Form is subject to the terms of the Mozilla Public
     2 ; License, v. 2.0. If a copy of the MPL was not distributed with this
     3 ; file, You can obtain one at http://mozilla.org/MPL/2.0/.
     5 ; ** ARCFOUR implementation optimized for AMD64.
     6 ; **
     7 ; ** The throughput achieved by this code is about 320 MBytes/sec, on
     8 ; ** a 1.8 GHz AMD Opteron (rev C0) processor.
    10 .CODE
    12 ; extern void ARCFOUR(RC4Context *cx, unsigned long long inputLen, 
    13 ;                     const unsigned char *input, unsigned char *output);
    16 ARCFOUR PROC
    18         push    rbp
    19         push    rbx
    20         push    rsi
    21         push    rdi
    23         mov     rbp, rcx                        ; key = ARG(key)
    24         mov     rbx, rdx                        ; rbx = ARG(len)
    25         mov     rsi, r8                         ; in = ARG(in)
    26         mov     rdi, r9                         ; out = ARG(out)
    27         mov     rcx, [rbp]                      ; x = key->x
    28         mov     rdx, [rbp+8]                    ; y = key->y
    29         add     rbp, 16                         ; d = key->data
    30         inc     rcx                             ; x++
    31         and     rcx, 0ffh                       ; x &= 0xff
    32         lea     rbx, [rbx+rsi-8]                ; rbx = in+len-8
    33         mov     r9, rbx                         ; tmp = in+len-8
    34         mov     rax, [rbp+rcx*8]                ; tx = d[x]
    35         cmp     rbx, rsi                        ; cmp in with in+len-8
    36         jl      Lend                            ; jump if (in+len-8 < in)
    38 Lstart:
    39         add     rsi, 8                          ; increment in
    40         add     rdi, 8                          ; increment out
    42         ;
    43         ; generate the next 8 bytes of the rc4 stream into r8
    44         ;
    46         mov     r11, 8                          ; byte counter
    48 @@:
    49         add     dl, al                          ; y += tx
    50         mov     ebx, [rbp+rdx*8]                ; ty = d[y]
    51         mov     [rbp+rcx*8], ebx                ; d[x] = ty
    52         add     bl, al                          ; val = ty + tx
    53         mov     [rbp+rdx*8], eax                ; d[y] = tx
    54         inc     cl                              ; x++ (NEXT ROUND)
    55         mov     eax, [rbp+rcx*8]                ; tx = d[x] (NEXT ROUND)
    56         mov     r8b, [rbp+rbx*8]                ; val = d[val]
    57         dec     r11b
    58         ror     r8, 8                           ; (ror does not change ZF)
    59         jnz     @b
    61         ;
    62         ; xor 8 bytes
    63         ;
    65         xor     r8, [rsi-8]
    66         cmp     rsi, r9                         ; cmp in+len-8 with in
    67         mov     [rdi-8], r8
    68         jle     Lstart
    70 Lend:
    71         add     r9, 8                           ; tmp = in+len
    73         ;
    74         ; handle the last bytes, one by one
    75         ;
    77 @@:
    78         cmp     r9, rsi                         ; cmp in with in+len
    79         jle     Lfinished                       ; jump if (in+len <= in)
    80         add     dl, al                          ; y += tx
    81         mov     ebx, [rbp+rdx*8]                ; ty = d[y]
    82         mov     [rbp+rcx*8], ebx                ; d[x] = ty
    83         add     bl, al                          ; val = ty + tx
    84         mov     [rbp+rdx*8], eax                ; d[y] = tx
    85         inc     cl                              ; x++ (NEXT ROUND)
    86         mov     eax, [rbp+rcx*8]                ; tx = d[x] (NEXT ROUND)
    87         mov     r8b, [rbp+rbx*8]                ; val = d[val]
    88         xor     r8b, [rsi]                      ; xor 1 byte
    89         mov     [rdi], r8b
    90         inc     rsi                             ; in++
    91         inc     rdi
    92         jmp     @b
    94 Lfinished:
    95         dec     rcx                             ; x--
    96         mov     [rbp-8], dl                     ; key->y = y
    97         mov     [rbp-16], cl                    ; key->x = x
    99         pop     rdi
   100         pop     rsi
   101         pop     rbx
   102         pop     rbp
   103         ret
   105 ARCFOUR ENDP
   107 END

mercurial