Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | ; This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 2 | ; License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 3 | ; file, You can obtain one at http://mozilla.org/MPL/2.0/. |
michael@0 | 4 | |
michael@0 | 5 | ; ** ARCFOUR implementation optimized for AMD64. |
michael@0 | 6 | ; ** |
michael@0 | 7 | ; ** The throughput achieved by this code is about 320 MBytes/sec, on |
michael@0 | 8 | ; ** a 1.8 GHz AMD Opteron (rev C0) processor. |
michael@0 | 9 | |
michael@0 | 10 | .CODE |
michael@0 | 11 | |
michael@0 | 12 | ; extern void ARCFOUR(RC4Context *cx, unsigned long long inputLen, |
michael@0 | 13 | ; const unsigned char *input, unsigned char *output); |
michael@0 | 14 | |
michael@0 | 15 | |
michael@0 | 16 | ARCFOUR PROC |
michael@0 | 17 | |
michael@0 | 18 | push rbp |
michael@0 | 19 | push rbx |
michael@0 | 20 | push rsi |
michael@0 | 21 | push rdi |
michael@0 | 22 | |
michael@0 | 23 | mov rbp, rcx ; key = ARG(key) |
michael@0 | 24 | mov rbx, rdx ; rbx = ARG(len) |
michael@0 | 25 | mov rsi, r8 ; in = ARG(in) |
michael@0 | 26 | mov rdi, r9 ; out = ARG(out) |
michael@0 | 27 | mov rcx, [rbp] ; x = key->x |
michael@0 | 28 | mov rdx, [rbp+8] ; y = key->y |
michael@0 | 29 | add rbp, 16 ; d = key->data |
michael@0 | 30 | inc rcx ; x++ |
michael@0 | 31 | and rcx, 0ffh ; x &= 0xff |
michael@0 | 32 | lea rbx, [rbx+rsi-8] ; rbx = in+len-8 |
michael@0 | 33 | mov r9, rbx ; tmp = in+len-8 |
michael@0 | 34 | mov rax, [rbp+rcx*8] ; tx = d[x] |
michael@0 | 35 | cmp rbx, rsi ; cmp in with in+len-8 |
michael@0 | 36 | jl Lend ; jump if (in+len-8 < in) |
michael@0 | 37 | |
michael@0 | 38 | Lstart: |
michael@0 | 39 | add rsi, 8 ; increment in |
michael@0 | 40 | add rdi, 8 ; increment out |
michael@0 | 41 | |
michael@0 | 42 | ; |
michael@0 | 43 | ; generate the next 8 bytes of the rc4 stream into r8 |
michael@0 | 44 | ; |
michael@0 | 45 | |
michael@0 | 46 | mov r11, 8 ; byte counter |
michael@0 | 47 | |
michael@0 | 48 | @@: |
michael@0 | 49 | add dl, al ; y += tx |
michael@0 | 50 | mov ebx, [rbp+rdx*8] ; ty = d[y] |
michael@0 | 51 | mov [rbp+rcx*8], ebx ; d[x] = ty |
michael@0 | 52 | add bl, al ; val = ty + tx |
michael@0 | 53 | mov [rbp+rdx*8], eax ; d[y] = tx |
michael@0 | 54 | inc cl ; x++ (NEXT ROUND) |
michael@0 | 55 | mov eax, [rbp+rcx*8] ; tx = d[x] (NEXT ROUND) |
michael@0 | 56 | mov r8b, [rbp+rbx*8] ; val = d[val] |
michael@0 | 57 | dec r11b |
michael@0 | 58 | ror r8, 8 ; (ror does not change ZF) |
michael@0 | 59 | jnz @b |
michael@0 | 60 | |
michael@0 | 61 | ; |
michael@0 | 62 | ; xor 8 bytes |
michael@0 | 63 | ; |
michael@0 | 64 | |
michael@0 | 65 | xor r8, [rsi-8] |
michael@0 | 66 | cmp rsi, r9 ; cmp in+len-8 with in |
michael@0 | 67 | mov [rdi-8], r8 |
michael@0 | 68 | jle Lstart |
michael@0 | 69 | |
michael@0 | 70 | Lend: |
michael@0 | 71 | add r9, 8 ; tmp = in+len |
michael@0 | 72 | |
michael@0 | 73 | ; |
michael@0 | 74 | ; handle the last bytes, one by one |
michael@0 | 75 | ; |
michael@0 | 76 | |
michael@0 | 77 | @@: |
michael@0 | 78 | cmp r9, rsi ; cmp in with in+len |
michael@0 | 79 | jle Lfinished ; jump if (in+len <= in) |
michael@0 | 80 | add dl, al ; y += tx |
michael@0 | 81 | mov ebx, [rbp+rdx*8] ; ty = d[y] |
michael@0 | 82 | mov [rbp+rcx*8], ebx ; d[x] = ty |
michael@0 | 83 | add bl, al ; val = ty + tx |
michael@0 | 84 | mov [rbp+rdx*8], eax ; d[y] = tx |
michael@0 | 85 | inc cl ; x++ (NEXT ROUND) |
michael@0 | 86 | mov eax, [rbp+rcx*8] ; tx = d[x] (NEXT ROUND) |
michael@0 | 87 | mov r8b, [rbp+rbx*8] ; val = d[val] |
michael@0 | 88 | xor r8b, [rsi] ; xor 1 byte |
michael@0 | 89 | mov [rdi], r8b |
michael@0 | 90 | inc rsi ; in++ |
michael@0 | 91 | inc rdi |
michael@0 | 92 | jmp @b |
michael@0 | 93 | |
michael@0 | 94 | Lfinished: |
michael@0 | 95 | dec rcx ; x-- |
michael@0 | 96 | mov [rbp-8], dl ; key->y = y |
michael@0 | 97 | mov [rbp-16], cl ; key->x = x |
michael@0 | 98 | |
michael@0 | 99 | pop rdi |
michael@0 | 100 | pop rsi |
michael@0 | 101 | pop rbx |
michael@0 | 102 | pop rbp |
michael@0 | 103 | ret |
michael@0 | 104 | |
michael@0 | 105 | ARCFOUR ENDP |
michael@0 | 106 | |
michael@0 | 107 | END |