security/nss/lib/freebl/intel-aes.s

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 2 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 4
michael@0 5 .text
michael@0 6
michael@0 7 #define IV_OFFSET 16
michael@0 8 #define EXPANDED_KEY_OFFSET 48
michael@0 9
michael@0 10
michael@0 11 /* in %rdi : the key
michael@0 12 in %rsi : buffer for expanded key
michael@0 13 */
michael@0 14 .type intel_aes_encrypt_init_128,@function
michael@0 15 .globl intel_aes_encrypt_init_128
michael@0 16 .align 16
michael@0 17 intel_aes_encrypt_init_128:
michael@0 18 movups (%rdi), %xmm1
michael@0 19 movups %xmm1, (%rsi)
michael@0 20 leaq 16(%rsi), %rsi
michael@0 21 xorl %eax, %eax
michael@0 22
michael@0 23 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */
michael@0 24 call key_expansion128
michael@0 25 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */
michael@0 26 call key_expansion128
michael@0 27 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */
michael@0 28 call key_expansion128
michael@0 29 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */
michael@0 30 call key_expansion128
michael@0 31 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */
michael@0 32 call key_expansion128
michael@0 33 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */
michael@0 34 call key_expansion128
michael@0 35 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */
michael@0 36 call key_expansion128
michael@0 37 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */
michael@0 38 call key_expansion128
michael@0 39 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */
michael@0 40 call key_expansion128
michael@0 41 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */
michael@0 42 call key_expansion128
michael@0 43
michael@0 44 ret
michael@0 45 .size intel_aes_encrypt_init_128, .-intel_aes_encrypt_init_128
michael@0 46
michael@0 47
michael@0 48 /* in %rdi : the key
michael@0 49 in %rsi : buffer for expanded key
michael@0 50 */
michael@0 51 .type intel_aes_decrypt_init_128,@function
michael@0 52 .globl intel_aes_decrypt_init_128
michael@0 53 .align 16
michael@0 54 intel_aes_decrypt_init_128:
michael@0 55 movups (%rdi), %xmm1
michael@0 56 movups %xmm1, (%rsi)
michael@0 57 leaq 16(%rsi), %rsi
michael@0 58 xorl %eax, %eax
michael@0 59
michael@0 60 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */
michael@0 61 call key_expansion128
michael@0 62 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
michael@0 63 movups %xmm2, -16(%rsi)
michael@0 64 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */
michael@0 65 call key_expansion128
michael@0 66 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
michael@0 67 movups %xmm2, -16(%rsi)
michael@0 68 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */
michael@0 69 call key_expansion128
michael@0 70 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
michael@0 71 movups %xmm2, -16(%rsi)
michael@0 72 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */
michael@0 73 call key_expansion128
michael@0 74 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
michael@0 75 movups %xmm2, -16(%rsi)
michael@0 76 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */
michael@0 77 call key_expansion128
michael@0 78 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
michael@0 79 movups %xmm2, -16(%rsi)
michael@0 80 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */
michael@0 81 call key_expansion128
michael@0 82 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
michael@0 83 movups %xmm2, -16(%rsi)
michael@0 84 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */
michael@0 85 call key_expansion128
michael@0 86 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
michael@0 87 movups %xmm2, -16(%rsi)
michael@0 88 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */
michael@0 89 call key_expansion128
michael@0 90 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
michael@0 91 movups %xmm2, -16(%rsi)
michael@0 92 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */
michael@0 93 call key_expansion128
michael@0 94 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
michael@0 95 movups %xmm2, -16(%rsi)
michael@0 96 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */
michael@0 97 call key_expansion128
michael@0 98
michael@0 99 ret
michael@0 100 .size intel_aes_decrypt_init_128, .-intel_aes_decrypt_init_128
michael@0 101
michael@0 102
michael@0 103 .type key_expansion128,@function
michael@0 104 .align 16
michael@0 105 key_expansion128:
michael@0 106 movd %eax, %xmm3
michael@0 107 pshufd $0xff, %xmm2, %xmm2
michael@0 108 shufps $0x10, %xmm1, %xmm3
michael@0 109 pxor %xmm3, %xmm1
michael@0 110 shufps $0x8c, %xmm1, %xmm3
michael@0 111 pxor %xmm2, %xmm1
michael@0 112 pxor %xmm3, %xmm1
michael@0 113 movdqu %xmm1, (%rsi)
michael@0 114 addq $16, %rsi
michael@0 115 ret
michael@0 116 .size key_expansion128, .-key_expansion128
michael@0 117
michael@0 118
michael@0 119 /* in %rdi : cx - context
michael@0 120 in %rsi : output - pointer to output buffer
michael@0 121 in %rdx : outputLen - pointer to variable for length of output
michael@0 122 (filled by caller)
michael@0 123 in %rcx : maxOutputLen - length of output buffer
michael@0 124 in %r8 : input - pointer to input buffer
michael@0 125 in %r9 : inputLen - length of input buffer
michael@0 126 on stack: blocksize - AES blocksize (always 16, unused)
michael@0 127 */
michael@0 128 .type intel_aes_encrypt_ecb_128,@function
michael@0 129 .globl intel_aes_encrypt_ecb_128
michael@0 130 .align 16
michael@0 131 intel_aes_encrypt_ecb_128:
michael@0 132 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
michael@0 133 leaq 48(%rdi), %rdi
michael@0 134
michael@0 135 movdqu (%rdi), %xmm2
michael@0 136 movdqu 160(%rdi), %xmm12
michael@0 137 xor %eax, %eax
michael@0 138 // cmpq $8*16, %r9
michael@0 139 cmpq $128, %r9
michael@0 140 jb 1f
michael@0 141 // leaq -8*16(%r9), %r11
michael@0 142 leaq -128(%r9), %r11
michael@0 143 2: movdqu (%r8, %rax), %xmm3
michael@0 144 movdqu 16(%r8, %rax), %xmm4
michael@0 145 movdqu 32(%r8, %rax), %xmm5
michael@0 146 movdqu 48(%r8, %rax), %xmm6
michael@0 147 movdqu 64(%r8, %rax), %xmm7
michael@0 148 movdqu 80(%r8, %rax), %xmm8
michael@0 149 movdqu 96(%r8, %rax), %xmm9
michael@0 150 movdqu 112(%r8, %rax), %xmm10
michael@0 151 pxor %xmm2, %xmm3
michael@0 152 pxor %xmm2, %xmm4
michael@0 153 pxor %xmm2, %xmm5
michael@0 154 pxor %xmm2, %xmm6
michael@0 155 pxor %xmm2, %xmm7
michael@0 156 pxor %xmm2, %xmm8
michael@0 157 pxor %xmm2, %xmm9
michael@0 158 pxor %xmm2, %xmm10
michael@0 159
michael@0 160 // complete loop unrolling
michael@0 161 movdqu 16(%rdi), %xmm1
michael@0 162 movdqu 32(%rdi), %xmm11
michael@0 163 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
michael@0 164 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
michael@0 165 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
michael@0 166 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
michael@0 167 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
michael@0 168 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
michael@0 169 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
michael@0 170 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
michael@0 171 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
michael@0 172 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
michael@0 173 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
michael@0 174 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
michael@0 175 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
michael@0 176 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
michael@0 177 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
michael@0 178 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
michael@0 179
michael@0 180 movdqu 48(%rdi), %xmm1
michael@0 181 movdqu 64(%rdi), %xmm11
michael@0 182 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
michael@0 183 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
michael@0 184 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
michael@0 185 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
michael@0 186 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
michael@0 187 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
michael@0 188 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
michael@0 189 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
michael@0 190 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
michael@0 191 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
michael@0 192 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
michael@0 193 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
michael@0 194 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
michael@0 195 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
michael@0 196 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
michael@0 197 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
michael@0 198
michael@0 199 movdqu 80(%rdi), %xmm1
michael@0 200 movdqu 96(%rdi), %xmm11
michael@0 201 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
michael@0 202 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
michael@0 203 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
michael@0 204 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
michael@0 205 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
michael@0 206 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
michael@0 207 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
michael@0 208 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
michael@0 209 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
michael@0 210 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
michael@0 211 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
michael@0 212 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
michael@0 213 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
michael@0 214 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
michael@0 215 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
michael@0 216 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
michael@0 217
michael@0 218 movdqu 112(%rdi), %xmm1
michael@0 219 movdqu 128(%rdi), %xmm11
michael@0 220 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
michael@0 221 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
michael@0 222 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
michael@0 223 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
michael@0 224 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
michael@0 225 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
michael@0 226 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
michael@0 227 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
michael@0 228 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
michael@0 229 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
michael@0 230 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
michael@0 231 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
michael@0 232 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
michael@0 233 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
michael@0 234 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
michael@0 235 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
michael@0 236
michael@0 237 movdqu 144(%rdi), %xmm1
michael@0 238 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
michael@0 239 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
michael@0 240 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
michael@0 241 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
michael@0 242 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
michael@0 243 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
michael@0 244 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
michael@0 245 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
michael@0 246 .byte 0x66,0x41,0x0f,0x38,0xdd,0xdc /* aesenclast %xmm12, %xmm3 */
michael@0 247 .byte 0x66,0x41,0x0f,0x38,0xdd,0xe4 /* aesenclast %xmm12, %xmm4 */
michael@0 248 .byte 0x66,0x41,0x0f,0x38,0xdd,0xec /* aesenclast %xmm12, %xmm5 */
michael@0 249 .byte 0x66,0x41,0x0f,0x38,0xdd,0xf4 /* aesenclast %xmm12, %xmm6 */
michael@0 250 .byte 0x66,0x41,0x0f,0x38,0xdd,0xfc /* aesenclast %xmm12, %xmm7 */
michael@0 251 .byte 0x66,0x45,0x0f,0x38,0xdd,0xc4 /* aesenclast %xmm12, %xmm8 */
michael@0 252 .byte 0x66,0x45,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm9 */
michael@0 253 .byte 0x66,0x45,0x0f,0x38,0xdd,0xd4 /* aesenclast %xmm12, %xmm10 */
michael@0 254
michael@0 255 movdqu %xmm3, (%rsi, %rax)
michael@0 256 movdqu %xmm4, 16(%rsi, %rax)
michael@0 257 movdqu %xmm5, 32(%rsi, %rax)
michael@0 258 movdqu %xmm6, 48(%rsi, %rax)
michael@0 259 movdqu %xmm7, 64(%rsi, %rax)
michael@0 260 movdqu %xmm8, 80(%rsi, %rax)
michael@0 261 movdqu %xmm9, 96(%rsi, %rax)
michael@0 262 movdqu %xmm10, 112(%rsi, %rax)
michael@0 263 // addq $8*16, %rax
michael@0 264 addq $128, %rax
michael@0 265 cmpq %r11, %rax
michael@0 266 jbe 2b
michael@0 267 1: cmpq %rax, %r9
michael@0 268 je 5f
michael@0 269
michael@0 270 movdqu 16(%rdi), %xmm3
michael@0 271 movdqu 32(%rdi), %xmm4
michael@0 272 movdqu 48(%rdi), %xmm5
michael@0 273 movdqu 64(%rdi), %xmm6
michael@0 274 movdqu 80(%rdi), %xmm7
michael@0 275 movdqu 96(%rdi), %xmm8
michael@0 276 movdqu 112(%rdi), %xmm9
michael@0 277 movdqu 128(%rdi), %xmm10
michael@0 278 movdqu 144(%rdi), %xmm11
michael@0 279
michael@0 280 4: movdqu (%r8, %rax), %xmm1
michael@0 281 pxor %xmm2, %xmm1
michael@0 282 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
michael@0 283 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
michael@0 284 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
michael@0 285 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
michael@0 286 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
michael@0 287 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
michael@0 288 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
michael@0 289 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
michael@0 290 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
michael@0 291 .byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */
michael@0 292 movdqu %xmm1, (%rsi, %rax)
michael@0 293 addq $16, %rax
michael@0 294 cmpq %rax, %r9
michael@0 295 jne 4b
michael@0 296
michael@0 297 5: xor %eax, %eax
michael@0 298 ret
michael@0 299 .size intel_aes_encrypt_ecb_128, .-intel_aes_encrypt_ecb_128
michael@0 300
michael@0 301
michael@0 302 /* in %rdi : cx - context
michael@0 303 in %rsi : output - pointer to output buffer
michael@0 304 in %rdx : outputLen - pointer to variable for length of output
michael@0 305 (filled by caller)
michael@0 306 in %rcx : maxOutputLen - length of output buffer
michael@0 307 in %r8 : input - pointer to input buffer
michael@0 308 in %r9 : inputLen - length of input buffer
michael@0 309 on stack: blocksize - AES blocksize (always 16, unused)
michael@0 310 */
michael@0 311 .type intel_aes_decrypt_ecb_128,@function
michael@0 312 .globl intel_aes_decrypt_ecb_128
michael@0 313 .align 16
michael@0 314 intel_aes_decrypt_ecb_128:
michael@0 315 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
michael@0 316 leaq 48(%rdi), %rdi
michael@0 317
michael@0 318 movdqu (%rdi), %xmm2
michael@0 319 movdqu 160(%rdi), %xmm12
michael@0 320 xorl %eax, %eax
michael@0 321 // cmpq $8*16, %r9
michael@0 322 cmpq $128, %r9
michael@0 323 jb 1f
michael@0 324 // leaq -8*16(%r9), %r11
michael@0 325 leaq -128(%r9), %r11
michael@0 326 2: movdqu (%r8, %rax), %xmm3
michael@0 327 movdqu 16(%r8, %rax), %xmm4
michael@0 328 movdqu 32(%r8, %rax), %xmm5
michael@0 329 movdqu 48(%r8, %rax), %xmm6
michael@0 330 movdqu 64(%r8, %rax), %xmm7
michael@0 331 movdqu 80(%r8, %rax), %xmm8
michael@0 332 movdqu 96(%r8, %rax), %xmm9
michael@0 333 movdqu 112(%r8, %rax), %xmm10
michael@0 334 pxor %xmm12, %xmm3
michael@0 335 pxor %xmm12, %xmm4
michael@0 336 pxor %xmm12, %xmm5
michael@0 337 pxor %xmm12, %xmm6
michael@0 338 pxor %xmm12, %xmm7
michael@0 339 pxor %xmm12, %xmm8
michael@0 340 pxor %xmm12, %xmm9
michael@0 341 pxor %xmm12, %xmm10
michael@0 342
michael@0 343 // complete loop unrolling
michael@0 344 movdqu 144(%rdi), %xmm1
michael@0 345 movdqu 128(%rdi), %xmm11
michael@0 346 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 347 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 348 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 349 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 350 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 351 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 352 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 353 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 354 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 355 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 356 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 357 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 358 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 359 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 360 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 361 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 362
michael@0 363 movdqu 112(%rdi), %xmm1
michael@0 364 movdqu 96(%rdi), %xmm11
michael@0 365 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 366 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 367 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 368 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 369 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 370 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 371 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 372 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 373 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 374 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 375 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 376 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 377 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 378 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 379 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 380 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 381
michael@0 382 movdqu 80(%rdi), %xmm1
michael@0 383 movdqu 64(%rdi), %xmm11
michael@0 384 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 385 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 386 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 387 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 388 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 389 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 390 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 391 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 392 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 393 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 394 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 395 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 396 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 397 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 398 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 399 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 400
michael@0 401 movdqu 48(%rdi), %xmm1
michael@0 402 movdqu 32(%rdi), %xmm11
michael@0 403 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 404 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 405 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 406 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 407 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 408 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 409 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 410 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 411 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 412 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 413 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 414 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 415 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 416 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 417 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 418 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 419
michael@0 420 movdqu 16(%rdi), %xmm1
michael@0 421 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 422 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 423 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 424 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 425 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 426 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 427 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 428 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 429 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
michael@0 430 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
michael@0 431 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
michael@0 432 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
michael@0 433 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
michael@0 434 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
michael@0 435 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
michael@0 436 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
michael@0 437
michael@0 438 movdqu %xmm3, (%rsi, %rax)
michael@0 439 movdqu %xmm4, 16(%rsi, %rax)
michael@0 440 movdqu %xmm5, 32(%rsi, %rax)
michael@0 441 movdqu %xmm6, 48(%rsi, %rax)
michael@0 442 movdqu %xmm7, 64(%rsi, %rax)
michael@0 443 movdqu %xmm8, 80(%rsi, %rax)
michael@0 444 movdqu %xmm9, 96(%rsi, %rax)
michael@0 445 movdqu %xmm10, 112(%rsi, %rax)
michael@0 446 // addq $8*16, %rax
michael@0 447 addq $128, %rax
michael@0 448 cmpq %r11, %rax
michael@0 449 jbe 2b
michael@0 450 1: cmpq %rax, %r9
michael@0 451 je 5f
michael@0 452
michael@0 453 movdqu 16(%rdi), %xmm3
michael@0 454 movdqu 32(%rdi), %xmm4
michael@0 455 movdqu 48(%rdi), %xmm5
michael@0 456 movdqu 64(%rdi), %xmm6
michael@0 457 movdqu 80(%rdi), %xmm7
michael@0 458 movdqu 96(%rdi), %xmm8
michael@0 459 movdqu 112(%rdi), %xmm9
michael@0 460 movdqu 128(%rdi), %xmm10
michael@0 461 movdqu 144(%rdi), %xmm11
michael@0 462
michael@0 463 4: movdqu (%r8, %rax), %xmm1
michael@0 464 pxor %xmm12, %xmm1
michael@0 465 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
michael@0 466 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
michael@0 467 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
michael@0 468 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
michael@0 469 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
michael@0 470 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm7, %xmm1 */
michael@0 471 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm7, %xmm1 */
michael@0 472 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm7, %xmm1 */
michael@0 473 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm7, %xmm1 */
michael@0 474 .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
michael@0 475 movdqu %xmm1, (%rsi, %rax)
michael@0 476 addq $16, %rax
michael@0 477 cmpq %rax, %r9
michael@0 478 jne 4b
michael@0 479
michael@0 480 5: xor %eax, %eax
michael@0 481 ret
michael@0 482 .size intel_aes_decrypt_ecb_128, .-intel_aes_decrypt_ecb_128
michael@0 483
michael@0 484
michael@0 485 /* in %rdi : cx - context
michael@0 486 in %rsi : output - pointer to output buffer
michael@0 487 in %rdx : outputLen - pointer to variable for length of output
michael@0 488 (filled by caller)
michael@0 489 in %rcx : maxOutputLen - length of output buffer
michael@0 490 in %r8 : input - pointer to input buffer
michael@0 491 in %r9 : inputLen - length of input buffer
michael@0 492 on stack: blocksize - AES blocksize (always 16, unused)
michael@0 493 */
michael@0 494 .type intel_aes_encrypt_cbc_128,@function
michael@0 495 .globl intel_aes_encrypt_cbc_128
michael@0 496 .align 16
michael@0 497 intel_aes_encrypt_cbc_128:
michael@0 498 testq %r9, %r9
michael@0 499 je 2f
michael@0 500
michael@0 501 // leaq IV_OFFSET(%rdi), %rdx
michael@0 502 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
michael@0 503 leaq 16(%rdi), %rdx
michael@0 504 leaq 48(%rdi), %rdi
michael@0 505
michael@0 506 movdqu (%rdx), %xmm0
michael@0 507 movdqu (%rdi), %xmm2
michael@0 508 movdqu 16(%rdi), %xmm3
michael@0 509 movdqu 32(%rdi), %xmm4
michael@0 510 movdqu 48(%rdi), %xmm5
michael@0 511 movdqu 64(%rdi), %xmm6
michael@0 512 movdqu 80(%rdi), %xmm7
michael@0 513 movdqu 96(%rdi), %xmm8
michael@0 514 movdqu 112(%rdi), %xmm9
michael@0 515 movdqu 128(%rdi), %xmm10
michael@0 516 movdqu 144(%rdi), %xmm11
michael@0 517 movdqu 160(%rdi), %xmm12
michael@0 518
michael@0 519 xorl %eax, %eax
michael@0 520 1: movdqu (%r8, %rax), %xmm1
michael@0 521 pxor %xmm0, %xmm1
michael@0 522 pxor %xmm2, %xmm1
michael@0 523 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
michael@0 524 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
michael@0 525 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
michael@0 526 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
michael@0 527 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
michael@0 528 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
michael@0 529 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
michael@0 530 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmma, %xmm1 */
michael@0 531 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmmb, %xmm1 */
michael@0 532 .byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */
michael@0 533 movdqu %xmm1, (%rsi, %rax)
michael@0 534 movdqa %xmm1, %xmm0
michael@0 535 addq $16, %rax
michael@0 536 cmpq %rax, %r9
michael@0 537 jne 1b
michael@0 538
michael@0 539 movdqu %xmm0, (%rdx)
michael@0 540
michael@0 541 2: xor %eax, %eax
michael@0 542 ret
michael@0 543 .size intel_aes_encrypt_cbc_128, .-intel_aes_encrypt_cbc_128
michael@0 544
michael@0 545
michael@0 546 /* in %rdi : cx - context
michael@0 547 in %rsi : output - pointer to output buffer
michael@0 548 in %rdx : outputLen - pointer to variable for length of output
michael@0 549 (filled by caller)
michael@0 550 in %rcx : maxOutputLen - length of output buffer
michael@0 551 in %r8 : input - pointer to input buffer
michael@0 552 in %r9 : inputLen - length of input buffer
michael@0 553 on stack: blocksize - AES blocksize (always 16, unused)
michael@0 554 */
michael@0 555 .type intel_aes_decrypt_cbc_128,@function
michael@0 556 .globl intel_aes_decrypt_cbc_128
michael@0 557 .align 16
michael@0 558 intel_aes_decrypt_cbc_128:
michael@0 559 // leaq IV_OFFSET(%rdi), %rdx
michael@0 560 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
michael@0 561 leaq 16(%rdi), %rdx
michael@0 562 leaq 48(%rdi), %rdi
michael@0 563
michael@0 564 movdqu (%rdx), %xmm0 /* iv */
michael@0 565 movdqu (%rdi), %xmm2 /* first key block */
michael@0 566 movdqu 160(%rdi), %xmm12 /* last key block */
michael@0 567 xorl %eax, %eax
michael@0 568 cmpq $128, %r9
michael@0 569 jb 1f
michael@0 570 leaq -128(%r9), %r11
michael@0 571 2: movdqu (%r8, %rax), %xmm3 /* 1st data block */
michael@0 572 movdqu 16(%r8, %rax), %xmm4 /* 2d data block */
michael@0 573 movdqu 32(%r8, %rax), %xmm5
michael@0 574 movdqu 48(%r8, %rax), %xmm6
michael@0 575 movdqu 64(%r8, %rax), %xmm7
michael@0 576 movdqu 80(%r8, %rax), %xmm8
michael@0 577 movdqu 96(%r8, %rax), %xmm9
michael@0 578 movdqu 112(%r8, %rax), %xmm10
michael@0 579 pxor %xmm12, %xmm3
michael@0 580 pxor %xmm12, %xmm4
michael@0 581 pxor %xmm12, %xmm5
michael@0 582 pxor %xmm12, %xmm6
michael@0 583 pxor %xmm12, %xmm7
michael@0 584 pxor %xmm12, %xmm8
michael@0 585 pxor %xmm12, %xmm9
michael@0 586 pxor %xmm12, %xmm10
michael@0 587
michael@0 588 // complete loop unrolling
michael@0 589 movdqu 144(%rdi), %xmm1
michael@0 590 movdqu 128(%rdi), %xmm11
michael@0 591 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 592 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 593 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 594 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 595 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 596 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 597 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 598 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 599 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 600 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 601 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 602 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 603 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 604 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 605 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 606 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 607
michael@0 608 movdqu 112(%rdi), %xmm1
michael@0 609 movdqu 96(%rdi), %xmm11
michael@0 610 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 611 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 612 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 613 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 614 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 615 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 616 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 617 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 618 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 619 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 620 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 621 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 622 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 623 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 624 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 625 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 626
michael@0 627 movdqu 80(%rdi), %xmm1
michael@0 628 movdqu 64(%rdi), %xmm11
michael@0 629 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 630 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 631 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 632 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 633 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 634 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 635 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 636 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 637 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 638 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 639 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 640 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 641 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 642 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 643 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 644 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 645
michael@0 646 movdqu 48(%rdi), %xmm1
michael@0 647 movdqu 32(%rdi), %xmm11
michael@0 648 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 649 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 650 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 651 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 652 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 653 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 654 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 655 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 656 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 657 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 658 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 659 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 660 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 661 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 662 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 663 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 664
michael@0 665 movdqu 16(%rdi), %xmm1
michael@0 666 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 667 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 668 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 669 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 670 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 671 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 672 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 673 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 674 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
michael@0 675 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
michael@0 676 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
michael@0 677 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
michael@0 678 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
michael@0 679 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
michael@0 680 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
michael@0 681 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
michael@0 682
michael@0 683 pxor %xmm0, %xmm3
michael@0 684 movdqu (%r8, %rax), %xmm0
michael@0 685 pxor %xmm0, %xmm4
michael@0 686 movdqu 16(%r8, %rax), %xmm0
michael@0 687 pxor %xmm0, %xmm5
michael@0 688 movdqu 32(%r8, %rax), %xmm0
michael@0 689 pxor %xmm0, %xmm6
michael@0 690 movdqu 48(%r8, %rax), %xmm0
michael@0 691 pxor %xmm0, %xmm7
michael@0 692 movdqu 64(%r8, %rax), %xmm0
michael@0 693 pxor %xmm0, %xmm8
michael@0 694 movdqu 80(%r8, %rax), %xmm0
michael@0 695 pxor %xmm0, %xmm9
michael@0 696 movdqu 96(%r8, %rax), %xmm0
michael@0 697 pxor %xmm0, %xmm10
michael@0 698 movdqu 112(%r8, %rax), %xmm0
michael@0 699 movdqu %xmm3, (%rsi, %rax)
michael@0 700 movdqu %xmm4, 16(%rsi, %rax)
michael@0 701 movdqu %xmm5, 32(%rsi, %rax)
michael@0 702 movdqu %xmm6, 48(%rsi, %rax)
michael@0 703 movdqu %xmm7, 64(%rsi, %rax)
michael@0 704 movdqu %xmm8, 80(%rsi, %rax)
michael@0 705 movdqu %xmm9, 96(%rsi, %rax)
michael@0 706 movdqu %xmm10, 112(%rsi, %rax)
michael@0 707 addq $128, %rax
michael@0 708 cmpq %r11, %rax
michael@0 709 jbe 2b
michael@0 710 1: cmpq %rax, %r9
michael@0 711 je 5f
michael@0 712
michael@0 713 movdqu 16(%rdi), %xmm3
michael@0 714 movdqu 32(%rdi), %xmm4
michael@0 715 movdqu 48(%rdi), %xmm5
michael@0 716 movdqu 64(%rdi), %xmm6
michael@0 717 movdqu 80(%rdi), %xmm7
michael@0 718 movdqu 96(%rdi), %xmm8
michael@0 719 movdqu 112(%rdi), %xmm9
michael@0 720 movdqu 128(%rdi), %xmm10
michael@0 721 movdqu 144(%rdi), %xmm11
michael@0 722
michael@0 723 4: movdqu (%r8, %rax), %xmm1
michael@0 724 movdqa %xmm1, %xmm13
michael@0 725 pxor %xmm12, %xmm1
michael@0 726 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
michael@0 727 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
michael@0 728 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
michael@0 729 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
michael@0 730 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
michael@0 731 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
michael@0 732 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
michael@0 733 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
michael@0 734 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
michael@0 735 .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
michael@0 736 pxor %xmm0, %xmm1
michael@0 737 movdqu %xmm1, (%rsi, %rax)
michael@0 738 movdqa %xmm13, %xmm0
michael@0 739 addq $16, %rax
michael@0 740 cmpq %rax, %r9
michael@0 741 jne 4b
michael@0 742
michael@0 743 5: movdqu %xmm0, (%rdx)
michael@0 744
michael@0 745 xor %eax, %eax
michael@0 746 ret
michael@0 747 .size intel_aes_decrypt_cbc_128, .-intel_aes_decrypt_cbc_128
michael@0 748
michael@0 749 /* in %rdi : the key
michael@0 750 in %rsi : buffer for expanded key
michael@0 751 */
michael@0 752 .type intel_aes_encrypt_init_192,@function
michael@0 753 .globl intel_aes_encrypt_init_192
michael@0 754 .align 16
michael@0 755 intel_aes_encrypt_init_192:
michael@0 756 movdqu (%rdi), %xmm1
michael@0 757 movq 16(%rdi), %xmm3
michael@0 758 movdqu %xmm1, (%rsi)
michael@0 759 movq %xmm3, 16(%rsi)
michael@0 760 leaq 24(%rsi), %rsi
michael@0 761
michael@0 762 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
michael@0 763 call key_expansion192
michael@0 764 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
michael@0 765 call key_expansion192
michael@0 766 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
michael@0 767 call key_expansion192
michael@0 768 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
michael@0 769 call key_expansion192
michael@0 770 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
michael@0 771 call key_expansion192
michael@0 772 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
michael@0 773 call key_expansion192
michael@0 774 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
michael@0 775 call key_expansion192
michael@0 776 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */
michael@0 777 call key_expansion192
michael@0 778
michael@0 779 ret
michael@0 780 .size intel_aes_encrypt_init_192, .-intel_aes_encrypt_init_192
michael@0 781
michael@0 782
michael@0 783 /* in %rdi : the key
michael@0 784 in %rsi : buffer for expanded key
michael@0 785 */
michael@0 786 .type intel_aes_decrypt_init_192,@function
michael@0 787 .globl intel_aes_decrypt_init_192
michael@0 788 .align 16
michael@0 789 intel_aes_decrypt_init_192:
michael@0 790 movdqu (%rdi), %xmm1
michael@0 791 movq 16(%rdi), %xmm3
michael@0 792 movdqu %xmm1, (%rsi)
michael@0 793 movq %xmm3, 16(%rsi)
michael@0 794 leaq 24(%rsi), %rsi
michael@0 795
michael@0 796 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
michael@0 797 call key_expansion192
michael@0 798 movups -32(%rsi), %xmm2
michael@0 799 movups -16(%rsi), %xmm4
michael@0 800 .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
michael@0 801 .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
michael@0 802 movups %xmm2, -32(%rsi)
michael@0 803 movups %xmm4, -16(%rsi)
michael@0 804 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
michael@0 805 call key_expansion192
michael@0 806 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
michael@0 807 movups %xmm2, -24(%rsi)
michael@0 808 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
michael@0 809 call key_expansion192
michael@0 810 movups -32(%rsi), %xmm2
michael@0 811 movups -16(%rsi), %xmm4
michael@0 812 .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
michael@0 813 .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
michael@0 814 movups %xmm2, -32(%rsi)
michael@0 815 movups %xmm4, -16(%rsi)
michael@0 816 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
michael@0 817 call key_expansion192
michael@0 818 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
michael@0 819 movups %xmm2, -24(%rsi)
michael@0 820 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
michael@0 821 call key_expansion192
michael@0 822 movups -32(%rsi), %xmm2
michael@0 823 movups -16(%rsi), %xmm4
michael@0 824 .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
michael@0 825 .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
michael@0 826 movups %xmm2, -32(%rsi)
michael@0 827 movups %xmm4, -16(%rsi)
michael@0 828 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
michael@0 829 call key_expansion192
michael@0 830 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
michael@0 831 movups %xmm2, -24(%rsi)
michael@0 832 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
michael@0 833 call key_expansion192
michael@0 834 movups -32(%rsi), %xmm2
michael@0 835 movups -16(%rsi), %xmm4
michael@0 836 .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
michael@0 837 .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
michael@0 838 movups %xmm2, -32(%rsi)
michael@0 839 movups %xmm4, -16(%rsi)
michael@0 840 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */
michael@0 841 call key_expansion192
michael@0 842
michael@0 843 ret
michael@0 844 .size intel_aes_decrypt_init_192, .-intel_aes_decrypt_init_192
michael@0 845
michael@0 846
michael@0 847 .type key_expansion192,@function
michael@0 848 .align 16
michael@0 849 key_expansion192:
michael@0 850 pshufd $0x55, %xmm2, %xmm2
michael@0 851 xor %eax, %eax
michael@0 852 movd %eax, %xmm4
michael@0 853 shufps $0x10, %xmm1, %xmm4
michael@0 854 pxor %xmm4, %xmm1
michael@0 855 shufps $0x8c, %xmm1, %xmm4
michael@0 856 pxor %xmm2, %xmm1
michael@0 857 pxor %xmm4, %xmm1
michael@0 858 movdqu %xmm1, (%rsi)
michael@0 859 addq $16, %rsi
michael@0 860
michael@0 861 pshufd $0xff, %xmm1, %xmm4
michael@0 862 movd %eax, %xmm5
michael@0 863 shufps $0x00, %xmm3, %xmm5
michael@0 864 shufps $0x08, %xmm3, %xmm5
michael@0 865 pxor %xmm4, %xmm3
michael@0 866 pxor %xmm5, %xmm3
michael@0 867 movq %xmm3, (%rsi)
michael@0 868 addq $8, %rsi
michael@0 869 ret
michael@0 870 .size key_expansion192, .-key_expansion192
michael@0 871
michael@0 872
michael@0 873 /* in %rdi : cx - context
michael@0 874 in %rsi : output - pointer to output buffer
michael@0 875 in %rdx : outputLen - pointer to variable for length of output
michael@0 876 (filled by caller)
michael@0 877 in %rcx : maxOutputLen - length of output buffer
michael@0 878 in %r8 : input - pointer to input buffer
michael@0 879 in %r9 : inputLen - length of input buffer
michael@0 880 on stack: blocksize - AES blocksize (always 16, unused)
michael@0 881 */
michael@0 882 .type intel_aes_encrypt_ecb_192,@function
michael@0 883 .globl intel_aes_encrypt_ecb_192
michael@0 884 .align 16
michael@0 885 intel_aes_encrypt_ecb_192:
michael@0 886 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
michael@0 887 leaq 48(%rdi), %rdi
michael@0 888
michael@0 889 movdqu (%rdi), %xmm2
michael@0 890 movdqu 192(%rdi), %xmm14
michael@0 891 xorl %eax, %eax
michael@0 892 // cmpq $8*16, %r9
michael@0 893 cmpq $128, %r9
michael@0 894 jb 1f
michael@0 895 // leaq -8*16(%r9), %r11
michael@0 896 leaq -128(%r9), %r11
michael@0 897 2: movdqu (%r8, %rax), %xmm3
michael@0 898 movdqu 16(%r8, %rax), %xmm4
michael@0 899 movdqu 32(%r8, %rax), %xmm5
michael@0 900 movdqu 48(%r8, %rax), %xmm6
michael@0 901 movdqu 64(%r8, %rax), %xmm7
michael@0 902 movdqu 80(%r8, %rax), %xmm8
michael@0 903 movdqu 96(%r8, %rax), %xmm9
michael@0 904 movdqu 112(%r8, %rax), %xmm10
michael@0 905 pxor %xmm2, %xmm3
michael@0 906 pxor %xmm2, %xmm4
michael@0 907 pxor %xmm2, %xmm5
michael@0 908 pxor %xmm2, %xmm6
michael@0 909 pxor %xmm2, %xmm7
michael@0 910 pxor %xmm2, %xmm8
michael@0 911 pxor %xmm2, %xmm9
michael@0 912 pxor %xmm2, %xmm10
michael@0 913
michael@0 914 // complete loop unrolling
michael@0 915 movdqu 16(%rdi), %xmm1
michael@0 916 movdqu 32(%rdi), %xmm11
michael@0 917 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
michael@0 918 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
michael@0 919 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
michael@0 920 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
michael@0 921 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
michael@0 922 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
michael@0 923 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
michael@0 924 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
michael@0 925 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
michael@0 926 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
michael@0 927 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
michael@0 928 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
michael@0 929 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
michael@0 930 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
michael@0 931 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
michael@0 932 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
michael@0 933
michael@0 934 movdqu 48(%rdi), %xmm1
michael@0 935 movdqu 64(%rdi), %xmm11
michael@0 936 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
michael@0 937 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
michael@0 938 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
michael@0 939 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
michael@0 940 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
michael@0 941 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
michael@0 942 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
michael@0 943 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
michael@0 944 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
michael@0 945 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
michael@0 946 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
michael@0 947 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
michael@0 948 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
michael@0 949 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
michael@0 950 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
michael@0 951 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
michael@0 952
michael@0 953 movdqu 80(%rdi), %xmm1
michael@0 954 movdqu 96(%rdi), %xmm11
michael@0 955 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
michael@0 956 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
michael@0 957 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
michael@0 958 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
michael@0 959 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
michael@0 960 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
michael@0 961 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
michael@0 962 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
michael@0 963 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
michael@0 964 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
michael@0 965 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
michael@0 966 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
michael@0 967 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
michael@0 968 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
michael@0 969 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
michael@0 970 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
michael@0 971
michael@0 972 movdqu 112(%rdi), %xmm1
michael@0 973 movdqu 128(%rdi), %xmm11
michael@0 974 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
michael@0 975 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
michael@0 976 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
michael@0 977 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
michael@0 978 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
michael@0 979 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
michael@0 980 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
michael@0 981 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
michael@0 982 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
michael@0 983 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
michael@0 984 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
michael@0 985 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
michael@0 986 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
michael@0 987 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
michael@0 988 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
michael@0 989 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
michael@0 990
michael@0 991 movdqu 144(%rdi), %xmm1
michael@0 992 movdqu 160(%rdi), %xmm11
michael@0 993 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
michael@0 994 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
michael@0 995 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
michael@0 996 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
michael@0 997 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
michael@0 998 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
michael@0 999 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
michael@0 1000 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
michael@0 1001 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
michael@0 1002 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
michael@0 1003 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
michael@0 1004 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
michael@0 1005 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
michael@0 1006 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
michael@0 1007 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
michael@0 1008 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
michael@0 1009
michael@0 1010 movdqu 176(%rdi), %xmm1
michael@0 1011 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
michael@0 1012 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
michael@0 1013 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
michael@0 1014 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
michael@0 1015 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
michael@0 1016 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
michael@0 1017 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
michael@0 1018 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
michael@0 1019 .byte 0x66,0x41,0x0f,0x38,0xdd,0xde /* aesenclast %xmm14, %xmm3 */
michael@0 1020 .byte 0x66,0x41,0x0f,0x38,0xdd,0xe6 /* aesenclast %xmm14, %xmm4 */
michael@0 1021 .byte 0x66,0x41,0x0f,0x38,0xdd,0xee /* aesenclast %xmm14, %xmm5 */
michael@0 1022 .byte 0x66,0x41,0x0f,0x38,0xdd,0xf6 /* aesenclast %xmm14, %xmm7 */
michael@0 1023 .byte 0x66,0x41,0x0f,0x38,0xdd,0xfe /* aesenclast %xmm14, %xmm3 */
michael@0 1024 .byte 0x66,0x45,0x0f,0x38,0xdd,0xc6 /* aesenclast %xmm14, %xmm8 */
michael@0 1025 .byte 0x66,0x45,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm9 */
michael@0 1026 .byte 0x66,0x45,0x0f,0x38,0xdd,0xd6 /* aesenclast %xmm14, %xmm10 */
michael@0 1027
michael@0 1028 movdqu %xmm3, (%rsi, %rax)
michael@0 1029 movdqu %xmm4, 16(%rsi, %rax)
michael@0 1030 movdqu %xmm5, 32(%rsi, %rax)
michael@0 1031 movdqu %xmm6, 48(%rsi, %rax)
michael@0 1032 movdqu %xmm7, 64(%rsi, %rax)
michael@0 1033 movdqu %xmm8, 80(%rsi, %rax)
michael@0 1034 movdqu %xmm9, 96(%rsi, %rax)
michael@0 1035 movdqu %xmm10, 112(%rsi, %rax)
michael@0 1036 // addq $8*16, %rax
michael@0 1037 addq $128, %rax
michael@0 1038 cmpq %r11, %rax
michael@0 1039 jbe 2b
michael@0 1040 1: cmpq %rax, %r9
michael@0 1041 je 5f
michael@0 1042
michael@0 1043 movdqu 16(%rdi), %xmm3
michael@0 1044 movdqu 32(%rdi), %xmm4
michael@0 1045 movdqu 48(%rdi), %xmm5
michael@0 1046 movdqu 64(%rdi), %xmm6
michael@0 1047 movdqu 80(%rdi), %xmm7
michael@0 1048 movdqu 96(%rdi), %xmm8
michael@0 1049 movdqu 112(%rdi), %xmm9
michael@0 1050 movdqu 128(%rdi), %xmm10
michael@0 1051 movdqu 144(%rdi), %xmm11
michael@0 1052 movdqu 160(%rdi), %xmm12
michael@0 1053 movdqu 176(%rdi), %xmm13
michael@0 1054
michael@0 1055 4: movdqu (%r8, %rax), %xmm1
michael@0 1056 pxor %xmm2, %xmm1
michael@0 1057 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
michael@0 1058 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
michael@0 1059 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
michael@0 1060 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
michael@0 1061 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
michael@0 1062 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
michael@0 1063 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
michael@0 1064 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
michael@0 1065 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
michael@0 1066 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
michael@0 1067 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
michael@0 1068 .byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */
michael@0 1069 movdqu %xmm1, (%rsi, %rax)
michael@0 1070 addq $16, %rax
michael@0 1071 cmpq %rax, %r9
michael@0 1072 jne 4b
michael@0 1073
michael@0 1074 5: xor %eax, %eax
michael@0 1075 ret
michael@0 1076 .size intel_aes_encrypt_ecb_192, .-intel_aes_encrypt_ecb_192
michael@0 1077
michael@0 1078
michael@0 1079 /* in %rdi : cx - context
michael@0 1080 in %rsi : output - pointer to output buffer
michael@0 1081 in %rdx : outputLen - pointer to variable for length of output
michael@0 1082 (filled by caller)
michael@0 1083 in %rcx : maxOutputLen - length of output buffer
michael@0 1084 in %r8 : input - pointer to input buffer
michael@0 1085 in %r9 : inputLen - length of input buffer
michael@0 1086 on stack: blocksize - AES blocksize (always 16, unused)
michael@0 1087 */
michael@0 1088 .type intel_aes_decrypt_ecb_192,@function
michael@0 1089 .globl intel_aes_decrypt_ecb_192
michael@0 1090 .align 16
michael@0 1091 intel_aes_decrypt_ecb_192:
michael@0 1092 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
michael@0 1093 leaq 48(%rdi), %rdi
michael@0 1094
michael@0 1095 movdqu (%rdi), %xmm2
michael@0 1096 movdqu 192(%rdi), %xmm14
michael@0 1097 xorl %eax, %eax
michael@0 1098 // cmpq $8*16, %r9
michael@0 1099 cmpq $128, %r9
michael@0 1100 jb 1f
michael@0 1101 // leaq -8*16(%r9), %r11
michael@0 1102 leaq -128(%r9), %r11
michael@0 1103 2: movdqu (%r8, %rax), %xmm3
michael@0 1104 movdqu 16(%r8, %rax), %xmm4
michael@0 1105 movdqu 32(%r8, %rax), %xmm5
michael@0 1106 movdqu 48(%r8, %rax), %xmm6
michael@0 1107 movdqu 64(%r8, %rax), %xmm7
michael@0 1108 movdqu 80(%r8, %rax), %xmm8
michael@0 1109 movdqu 96(%r8, %rax), %xmm9
michael@0 1110 movdqu 112(%r8, %rax), %xmm10
michael@0 1111 pxor %xmm14, %xmm3
michael@0 1112 pxor %xmm14, %xmm4
michael@0 1113 pxor %xmm14, %xmm5
michael@0 1114 pxor %xmm14, %xmm6
michael@0 1115 pxor %xmm14, %xmm7
michael@0 1116 pxor %xmm14, %xmm8
michael@0 1117 pxor %xmm14, %xmm9
michael@0 1118 pxor %xmm14, %xmm10
michael@0 1119
michael@0 1120 // complete loop unrolling
michael@0 1121 movdqu 176(%rdi), %xmm1
michael@0 1122 movdqu 160(%rdi), %xmm11
michael@0 1123 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 1124 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 1125 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 1126 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 1127 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 1128 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 1129 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 1130 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 1131 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 1132 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 1133 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 1134 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 1135 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 1136 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 1137 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 1138 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 1139
michael@0 1140 movdqu 144(%rdi), %xmm1
michael@0 1141 movdqu 128(%rdi), %xmm11
michael@0 1142 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 1143 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 1144 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 1145 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 1146 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 1147 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 1148 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 1149 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 1150 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 1151 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 1152 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 1153 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 1154 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 1155 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 1156 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 1157 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 1158
michael@0 1159 movdqu 112(%rdi), %xmm1
michael@0 1160 movdqu 96(%rdi), %xmm11
michael@0 1161 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 1162 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 1163 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 1164 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 1165 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 1166 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 1167 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 1168 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 1169 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 1170 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 1171 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 1172 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 1173 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 1174 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 1175 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 1176 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 1177
michael@0 1178 movdqu 80(%rdi), %xmm1
michael@0 1179 movdqu 64(%rdi), %xmm11
michael@0 1180 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 1181 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 1182 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 1183 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 1184 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 1185 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 1186 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 1187 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 1188 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 1189 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 1190 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 1191 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 1192 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 1193 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 1194 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 1195 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 1196
michael@0 1197 movdqu 48(%rdi), %xmm1
michael@0 1198 movdqu 32(%rdi), %xmm11
michael@0 1199 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 1200 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 1201 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 1202 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 1203 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 1204 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 1205 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 1206 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 1207 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 1208 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 1209 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 1210 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 1211 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 1212 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 1213 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 1214 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 1215
michael@0 1216 movdqu 16(%rdi), %xmm1
michael@0 1217 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 1218 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 1219 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 1220 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 1221 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 1222 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 1223 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 1224 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 1225 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
michael@0 1226 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
michael@0 1227 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
michael@0 1228 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
michael@0 1229 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
michael@0 1230 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
michael@0 1231 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
michael@0 1232 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
michael@0 1233
michael@0 1234 movdqu %xmm3, (%rsi, %rax)
michael@0 1235 movdqu %xmm4, 16(%rsi, %rax)
michael@0 1236 movdqu %xmm5, 32(%rsi, %rax)
michael@0 1237 movdqu %xmm6, 48(%rsi, %rax)
michael@0 1238 movdqu %xmm7, 64(%rsi, %rax)
michael@0 1239 movdqu %xmm8, 80(%rsi, %rax)
michael@0 1240 movdqu %xmm9, 96(%rsi, %rax)
michael@0 1241 movdqu %xmm10, 112(%rsi, %rax)
michael@0 1242 // addq $8*16, %rax
michael@0 1243 addq $128, %rax
michael@0 1244 cmpq %r11, %rax
michael@0 1245 jbe 2b
michael@0 1246 1: cmpq %rax, %r9
michael@0 1247 je 5f
michael@0 1248
michael@0 1249 movdqu 16(%rdi), %xmm3
michael@0 1250 movdqu 32(%rdi), %xmm4
michael@0 1251 movdqu 48(%rdi), %xmm5
michael@0 1252 movdqu 64(%rdi), %xmm6
michael@0 1253 movdqu 80(%rdi), %xmm7
michael@0 1254 movdqu 96(%rdi), %xmm8
michael@0 1255 movdqu 112(%rdi), %xmm9
michael@0 1256 movdqu 128(%rdi), %xmm10
michael@0 1257 movdqu 144(%rdi), %xmm11
michael@0 1258 movdqu 160(%rdi), %xmm12
michael@0 1259 movdqu 176(%rdi), %xmm13
michael@0 1260
michael@0 1261 4: movdqu (%r8, %rax), %xmm1
michael@0 1262 pxor %xmm14, %xmm1
michael@0 1263 .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
michael@0 1264 .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
michael@0 1265 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
michael@0 1266 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
michael@0 1267 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
michael@0 1268 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
michael@0 1269 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
michael@0 1270 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
michael@0 1271 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
michael@0 1272 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
michael@0 1273 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
michael@0 1274 .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
michael@0 1275 movdqu %xmm1, (%rsi, %rax)
michael@0 1276 addq $16, %rax
michael@0 1277 cmpq %rax, %r9
michael@0 1278 jne 4b
michael@0 1279
michael@0 1280 5: xor %eax, %eax
michael@0 1281 ret
michael@0 1282 .size intel_aes_decrypt_ecb_192, .-intel_aes_decrypt_ecb_192
michael@0 1283
michael@0 1284
michael@0 1285 /* in %rdi : cx - context
michael@0 1286 in %rsi : output - pointer to output buffer
michael@0 1287 in %rdx : outputLen - pointer to variable for length of output
michael@0 1288 (filled by caller)
michael@0 1289 in %rcx : maxOutputLen - length of output buffer
michael@0 1290 in %r8 : input - pointer to input buffer
michael@0 1291 in %r9 : inputLen - length of input buffer
michael@0 1292 on stack: blocksize - AES blocksize (always 16, unused)
michael@0 1293 */
michael@0 1294 .type intel_aes_encrypt_cbc_192,@function
michael@0 1295 .globl intel_aes_encrypt_cbc_192
michael@0 1296 .align 16
michael@0 1297 intel_aes_encrypt_cbc_192:
michael@0 1298 testq %r9, %r9
michael@0 1299 je 2f
michael@0 1300
michael@0 1301 // leaq IV_OFFSET(%rdi), %rdx
michael@0 1302 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
michael@0 1303 leaq 16(%rdi), %rdx
michael@0 1304 leaq 48(%rdi), %rdi
michael@0 1305
michael@0 1306 movdqu (%rdx), %xmm0
michael@0 1307 movdqu (%rdi), %xmm2
michael@0 1308 movdqu 16(%rdi), %xmm3
michael@0 1309 movdqu 32(%rdi), %xmm4
michael@0 1310 movdqu 48(%rdi), %xmm5
michael@0 1311 movdqu 64(%rdi), %xmm6
michael@0 1312 movdqu 80(%rdi), %xmm7
michael@0 1313 movdqu 96(%rdi), %xmm8
michael@0 1314 movdqu 112(%rdi), %xmm9
michael@0 1315 movdqu 128(%rdi), %xmm10
michael@0 1316 movdqu 144(%rdi), %xmm11
michael@0 1317 movdqu 160(%rdi), %xmm12
michael@0 1318 movdqu 176(%rdi), %xmm13
michael@0 1319 movdqu 192(%rdi), %xmm14
michael@0 1320
michael@0 1321 xorl %eax, %eax
michael@0 1322 1: movdqu (%r8, %rax), %xmm1
michael@0 1323 pxor %xmm0, %xmm1
michael@0 1324 pxor %xmm2, %xmm1
michael@0 1325 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
michael@0 1326 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
michael@0 1327 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
michael@0 1328 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
michael@0 1329 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
michael@0 1330 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
michael@0 1331 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
michael@0 1332 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
michael@0 1333 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
michael@0 1334 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
michael@0 1335 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
michael@0 1336 .byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */
michael@0 1337 movdqu %xmm1, (%rsi, %rax)
michael@0 1338 movdqa %xmm1, %xmm0
michael@0 1339 addq $16, %rax
michael@0 1340 cmpq %rax, %r9
michael@0 1341 jne 1b
michael@0 1342
michael@0 1343 movdqu %xmm0, (%rdx)
michael@0 1344
michael@0 1345 2: xor %eax, %eax
michael@0 1346 ret
michael@0 1347 .size intel_aes_encrypt_cbc_192, .-intel_aes_encrypt_cbc_192
michael@0 1348
michael@0 1349
michael@0 1350 /* in %rdi : cx - context
michael@0 1351 in %rsi : output - pointer to output buffer
michael@0 1352 in %rdx : outputLen - pointer to variable for length of output
michael@0 1353 (filled by caller)
michael@0 1354 in %rcx : maxOutputLen - length of output buffer
michael@0 1355 in %r8 : input - pointer to input buffer
michael@0 1356 in %r9 : inputLen - length of input buffer
michael@0 1357 on stack: blocksize - AES blocksize (always 16, unused)
michael@0 1358 */
michael@0 1359 .type intel_aes_decrypt_cbc_192,@function
michael@0 1360 .globl intel_aes_decrypt_cbc_192
michael@0 1361 .align 16
michael@0 1362 intel_aes_decrypt_cbc_192:
michael@0 1363 leaq 16(%rdi), %rdx
michael@0 1364 leaq 48(%rdi), %rdi
michael@0 1365
michael@0 1366 movdqu (%rdx), %xmm0
michael@0 1367 movdqu (%rdi), %xmm2
michael@0 1368 movdqu 192(%rdi), %xmm14
michael@0 1369 xorl %eax, %eax
michael@0 1370 cmpq $128, %r9
michael@0 1371 jb 1f
michael@0 1372 leaq -128(%r9), %r11
michael@0 1373 2: movdqu (%r8, %rax), %xmm3
michael@0 1374 movdqu 16(%r8, %rax), %xmm4
michael@0 1375 movdqu 32(%r8, %rax), %xmm5
michael@0 1376 movdqu 48(%r8, %rax), %xmm6
michael@0 1377 movdqu 64(%r8, %rax), %xmm7
michael@0 1378 movdqu 80(%r8, %rax), %xmm8
michael@0 1379 movdqu 96(%r8, %rax), %xmm9
michael@0 1380 movdqu 112(%r8, %rax), %xmm10
michael@0 1381 pxor %xmm14, %xmm3
michael@0 1382 pxor %xmm14, %xmm4
michael@0 1383 pxor %xmm14, %xmm5
michael@0 1384 pxor %xmm14, %xmm6
michael@0 1385 pxor %xmm14, %xmm7
michael@0 1386 pxor %xmm14, %xmm8
michael@0 1387 pxor %xmm14, %xmm9
michael@0 1388 pxor %xmm14, %xmm10
michael@0 1389
michael@0 1390 // complete loop unrolling
michael@0 1391 movdqu 176(%rdi), %xmm1
michael@0 1392 movdqu 160(%rdi), %xmm11
michael@0 1393 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 1394 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 1395 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 1396 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 1397 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 1398 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 1399 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 1400 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 1401 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 1402 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 1403 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 1404 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 1405 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 1406 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 1407 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 1408 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 1409
michael@0 1410 movdqu 144(%rdi), %xmm1
michael@0 1411 movdqu 128(%rdi), %xmm11
michael@0 1412 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 1413 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 1414 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 1415 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 1416 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 1417 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 1418 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 1419 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 1420 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 1421 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 1422 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 1423 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 1424 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 1425 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 1426 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 1427 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 1428
michael@0 1429 movdqu 112(%rdi), %xmm1
michael@0 1430 movdqu 96(%rdi), %xmm11
michael@0 1431 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 1432 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 1433 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 1434 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 1435 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 1436 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 1437 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 1438 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 1439 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 1440 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 1441 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 1442 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 1443 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 1444 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 1445 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 1446 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 1447
michael@0 1448 movdqu 80(%rdi), %xmm1
michael@0 1449 movdqu 64(%rdi), %xmm11
michael@0 1450 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 1451 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 1452 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 1453 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 1454 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 1455 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 1456 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 1457 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 1458 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 1459 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 1460 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 1461 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 1462 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 1463 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 1464 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 1465 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 1466
michael@0 1467 movdqu 48(%rdi), %xmm1
michael@0 1468 movdqu 32(%rdi), %xmm11
michael@0 1469 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 1470 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 1471 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 1472 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 1473 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 1474 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 1475 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 1476 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 1477 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 1478 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 1479 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 1480 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 1481 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 1482 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 1483 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 1484 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 1485
michael@0 1486 movdqu 16(%rdi), %xmm1
michael@0 1487 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 1488 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 1489 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 1490 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 1491 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 1492 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 1493 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 1494 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 1495 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
michael@0 1496 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
michael@0 1497 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
michael@0 1498 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
michael@0 1499 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
michael@0 1500 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
michael@0 1501 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
michael@0 1502 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
michael@0 1503
michael@0 1504 pxor %xmm0, %xmm3
michael@0 1505 movdqu (%r8, %rax), %xmm0
michael@0 1506 pxor %xmm0, %xmm4
michael@0 1507 movdqu 16(%r8, %rax), %xmm0
michael@0 1508 pxor %xmm0, %xmm5
michael@0 1509 movdqu 32(%r8, %rax), %xmm0
michael@0 1510 pxor %xmm0, %xmm6
michael@0 1511 movdqu 48(%r8, %rax), %xmm0
michael@0 1512 pxor %xmm0, %xmm7
michael@0 1513 movdqu 64(%r8, %rax), %xmm0
michael@0 1514 pxor %xmm0, %xmm8
michael@0 1515 movdqu 80(%r8, %rax), %xmm0
michael@0 1516 pxor %xmm0, %xmm9
michael@0 1517 movdqu 96(%r8, %rax), %xmm0
michael@0 1518 pxor %xmm0, %xmm10
michael@0 1519 movdqu 112(%r8, %rax), %xmm0
michael@0 1520 movdqu %xmm3, (%rsi, %rax)
michael@0 1521 movdqu %xmm4, 16(%rsi, %rax)
michael@0 1522 movdqu %xmm5, 32(%rsi, %rax)
michael@0 1523 movdqu %xmm6, 48(%rsi, %rax)
michael@0 1524 movdqu %xmm7, 64(%rsi, %rax)
michael@0 1525 movdqu %xmm8, 80(%rsi, %rax)
michael@0 1526 movdqu %xmm9, 96(%rsi, %rax)
michael@0 1527 movdqu %xmm10, 112(%rsi, %rax)
michael@0 1528 addq $128, %rax
michael@0 1529 cmpq %r11, %rax
michael@0 1530 jbe 2b
michael@0 1531 1: cmpq %rax, %r9
michael@0 1532 je 5f
michael@0 1533
michael@0 1534 movdqu 16(%rdi), %xmm3
michael@0 1535 movdqu 32(%rdi), %xmm4
michael@0 1536 movdqu 48(%rdi), %xmm5
michael@0 1537 movdqu 64(%rdi), %xmm6
michael@0 1538 movdqu 80(%rdi), %xmm7
michael@0 1539 movdqu 96(%rdi), %xmm8
michael@0 1540 movdqu 112(%rdi), %xmm9
michael@0 1541 movdqu 128(%rdi), %xmm10
michael@0 1542 movdqu 144(%rdi), %xmm11
michael@0 1543 movdqu 160(%rdi), %xmm12
michael@0 1544 movdqu 176(%rdi), %xmm13
michael@0 1545
michael@0 1546 4: movdqu (%r8, %rax), %xmm1
michael@0 1547 movdqa %xmm1, %xmm15
michael@0 1548 pxor %xmm14, %xmm1
michael@0 1549 .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
michael@0 1550 .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
michael@0 1551 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
michael@0 1552 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
michael@0 1553 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
michael@0 1554 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
michael@0 1555 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
michael@0 1556 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
michael@0 1557 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
michael@0 1558 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
michael@0 1559 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
michael@0 1560 .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
michael@0 1561 pxor %xmm0, %xmm1
michael@0 1562 movdqu %xmm1, (%rsi, %rax)
michael@0 1563 movdqa %xmm15, %xmm0
michael@0 1564 addq $16, %rax
michael@0 1565 cmpq %rax, %r9
michael@0 1566 jne 4b
michael@0 1567
michael@0 1568 5: movdqu %xmm0, (%rdx)
michael@0 1569
michael@0 1570 xor %eax, %eax
michael@0 1571 ret
michael@0 1572 .size intel_aes_decrypt_cbc_192, .-intel_aes_decrypt_cbc_192
michael@0 1573
michael@0 1574 /* in %rdi : the key
michael@0 1575 in %rsi : buffer for expanded key
michael@0 1576 */
michael@0 1577 .type intel_aes_encrypt_init_256,@function
michael@0 1578 .globl intel_aes_encrypt_init_256
michael@0 1579 .align 16
michael@0 1580 intel_aes_encrypt_init_256:
michael@0 1581 movdqu (%rdi), %xmm1
michael@0 1582 movdqu 16(%rdi), %xmm3
michael@0 1583 movdqu %xmm1, (%rsi)
michael@0 1584 movdqu %xmm3, 16(%rsi)
michael@0 1585 leaq 32(%rsi), %rsi
michael@0 1586 xor %eax, %eax
michael@0 1587
michael@0 1588 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
michael@0 1589 call key_expansion256
michael@0 1590 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
michael@0 1591 call key_expansion256
michael@0 1592 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
michael@0 1593 call key_expansion256
michael@0 1594 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
michael@0 1595 call key_expansion256
michael@0 1596 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
michael@0 1597 call key_expansion256
michael@0 1598 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
michael@0 1599 call key_expansion256
michael@0 1600 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
michael@0 1601 pxor %xmm6, %xmm6
michael@0 1602 pshufd $0xff, %xmm2, %xmm2
michael@0 1603 shufps $0x10, %xmm1, %xmm6
michael@0 1604 pxor %xmm6, %xmm1
michael@0 1605 shufps $0x8c, %xmm1, %xmm6
michael@0 1606 pxor %xmm2, %xmm1
michael@0 1607 pxor %xmm6, %xmm1
michael@0 1608 movdqu %xmm1, (%rsi)
michael@0 1609
michael@0 1610 ret
michael@0 1611 .size intel_aes_encrypt_init_256, .-intel_aes_encrypt_init_256
michael@0 1612
michael@0 1613
michael@0 1614 /* in %rdi : the key
michael@0 1615 in %rsi : buffer for expanded key
michael@0 1616 */
michael@0 1617 .type intel_aes_decrypt_init_256,@function
michael@0 1618 .globl intel_aes_decrypt_init_256
michael@0 1619 .align 16
michael@0 1620 intel_aes_decrypt_init_256:
michael@0 1621 movdqu (%rdi), %xmm1
michael@0 1622 movdqu 16(%rdi), %xmm3
michael@0 1623 movdqu %xmm1, (%rsi)
michael@0 1624 .byte 0x66,0x0f,0x38,0xdb,0xe3 /* aesimc %xmm3, %xmm4 */
michael@0 1625 movdqu %xmm4, 16(%rsi)
michael@0 1626 leaq 32(%rsi), %rsi
michael@0 1627 xor %eax, %eax
michael@0 1628
michael@0 1629 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
michael@0 1630 call key_expansion256
michael@0 1631 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
michael@0 1632 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
michael@0 1633 movdqu %xmm4, -32(%rsi)
michael@0 1634 movdqu %xmm5, -16(%rsi)
michael@0 1635 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
michael@0 1636 call key_expansion256
michael@0 1637 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
michael@0 1638 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
michael@0 1639 movdqu %xmm4, -32(%rsi)
michael@0 1640 movdqu %xmm5, -16(%rsi)
michael@0 1641 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
michael@0 1642 call key_expansion256
michael@0 1643 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
michael@0 1644 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
michael@0 1645 movdqu %xmm4, -32(%rsi)
michael@0 1646 movdqu %xmm5, -16(%rsi)
michael@0 1647 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
michael@0 1648 call key_expansion256
michael@0 1649 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
michael@0 1650 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
michael@0 1651 movdqu %xmm4, -32(%rsi)
michael@0 1652 movdqu %xmm5, -16(%rsi)
michael@0 1653 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
michael@0 1654 call key_expansion256
michael@0 1655 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
michael@0 1656 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
michael@0 1657 movdqu %xmm4, -32(%rsi)
michael@0 1658 movdqu %xmm5, -16(%rsi)
michael@0 1659 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
michael@0 1660 call key_expansion256
michael@0 1661 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
michael@0 1662 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
michael@0 1663 movdqu %xmm4, -32(%rsi)
michael@0 1664 movdqu %xmm5, -16(%rsi)
michael@0 1665 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
michael@0 1666 pxor %xmm6, %xmm6
michael@0 1667 pshufd $0xff, %xmm2, %xmm2
michael@0 1668 shufps $0x10, %xmm1, %xmm6
michael@0 1669 pxor %xmm6, %xmm1
michael@0 1670 shufps $0x8c, %xmm1, %xmm6
michael@0 1671 pxor %xmm2, %xmm1
michael@0 1672 pxor %xmm6, %xmm1
michael@0 1673 movdqu %xmm1, (%rsi)
michael@0 1674
michael@0 1675 ret
michael@0 1676 .size intel_aes_decrypt_init_256, .-intel_aes_decrypt_init_256
michael@0 1677
michael@0 1678
michael@0 1679 .type key_expansion256,@function
michael@0 1680 .align 16
michael@0 1681 key_expansion256:
michael@0 1682 movd %eax, %xmm6
michael@0 1683 pshufd $0xff, %xmm2, %xmm2
michael@0 1684 shufps $0x10, %xmm1, %xmm6
michael@0 1685 pxor %xmm6, %xmm1
michael@0 1686 shufps $0x8c, %xmm1, %xmm6
michael@0 1687 pxor %xmm2, %xmm1
michael@0 1688 pxor %xmm6, %xmm1
michael@0 1689 movdqu %xmm1, (%rsi)
michael@0 1690
michael@0 1691 addq $16, %rsi
michael@0 1692 .byte 0x66,0x0f,0x3a,0xdf,0xe1,0x00 /* aeskeygenassist $0, %xmm1, %xmm4 */
michael@0 1693 pshufd $0xaa, %xmm4, %xmm4
michael@0 1694 shufps $0x10, %xmm3, %xmm6
michael@0 1695 pxor %xmm6, %xmm3
michael@0 1696 shufps $0x8c, %xmm3, %xmm6
michael@0 1697 pxor %xmm4, %xmm3
michael@0 1698 pxor %xmm6, %xmm3
michael@0 1699 movdqu %xmm3, (%rsi)
michael@0 1700 addq $16, %rsi
michael@0 1701 ret
michael@0 1702 .size key_expansion256, .-key_expansion256
michael@0 1703
michael@0 1704
michael@0 1705 /* in %rdi : cx - context
michael@0 1706 in %rsi : output - pointer to output buffer
michael@0 1707 in %rdx : outputLen - pointer to variable for length of output
michael@0 1708 (filled by caller)
michael@0 1709 in %rcx : maxOutputLen - length of output buffer
michael@0 1710 in %r8 : input - pointer to input buffer
michael@0 1711 in %r9 : inputLen - length of input buffer
michael@0 1712 on stack: blocksize - AES blocksize (always 16, unused)
michael@0 1713 */
michael@0 1714 .type intel_aes_encrypt_ecb_256,@function
michael@0 1715 .globl intel_aes_encrypt_ecb_256
michael@0 1716 .align 16
michael@0 1717 intel_aes_encrypt_ecb_256:
michael@0 1718 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
michael@0 1719 leaq 48(%rdi), %rdi
michael@0 1720
michael@0 1721 movdqu (%rdi), %xmm2
michael@0 1722 movdqu 224(%rdi), %xmm15
michael@0 1723 xorl %eax, %eax
michael@0 1724 // cmpq $8*16, %r9
michael@0 1725 cmpq $128, %r9
michael@0 1726 jb 1f
michael@0 1727 // leaq -8*16(%r9), %r11
michael@0 1728 leaq -128(%r9), %r11
michael@0 1729 2: movdqu (%r8, %rax), %xmm3
michael@0 1730 movdqu 16(%r8, %rax), %xmm4
michael@0 1731 movdqu 32(%r8, %rax), %xmm5
michael@0 1732 movdqu 48(%r8, %rax), %xmm6
michael@0 1733 movdqu 64(%r8, %rax), %xmm7
michael@0 1734 movdqu 80(%r8, %rax), %xmm8
michael@0 1735 movdqu 96(%r8, %rax), %xmm9
michael@0 1736 movdqu 112(%r8, %rax), %xmm10
michael@0 1737 pxor %xmm2, %xmm3
michael@0 1738 pxor %xmm2, %xmm4
michael@0 1739 pxor %xmm2, %xmm5
michael@0 1740 pxor %xmm2, %xmm6
michael@0 1741 pxor %xmm2, %xmm7
michael@0 1742 pxor %xmm2, %xmm8
michael@0 1743 pxor %xmm2, %xmm9
michael@0 1744 pxor %xmm2, %xmm10
michael@0 1745
michael@0 1746 // complete loop unrolling
michael@0 1747 movdqu 16(%rdi), %xmm1
michael@0 1748 movdqu 32(%rdi), %xmm11
michael@0 1749 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
michael@0 1750 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
michael@0 1751 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
michael@0 1752 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
michael@0 1753 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
michael@0 1754 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
michael@0 1755 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
michael@0 1756 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
michael@0 1757 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
michael@0 1758 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
michael@0 1759 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
michael@0 1760 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
michael@0 1761 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
michael@0 1762 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
michael@0 1763 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
michael@0 1764 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
michael@0 1765
michael@0 1766 movdqu 48(%rdi), %xmm1
michael@0 1767 movdqu 64(%rdi), %xmm11
michael@0 1768 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
michael@0 1769 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
michael@0 1770 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
michael@0 1771 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
michael@0 1772 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
michael@0 1773 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
michael@0 1774 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
michael@0 1775 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
michael@0 1776 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
michael@0 1777 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
michael@0 1778 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
michael@0 1779 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
michael@0 1780 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
michael@0 1781 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
michael@0 1782 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
michael@0 1783 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
michael@0 1784
michael@0 1785 movdqu 80(%rdi), %xmm1
michael@0 1786 movdqu 96(%rdi), %xmm11
michael@0 1787 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
michael@0 1788 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
michael@0 1789 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
michael@0 1790 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
michael@0 1791 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
michael@0 1792 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
michael@0 1793 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
michael@0 1794 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
michael@0 1795 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
michael@0 1796 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
michael@0 1797 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
michael@0 1798 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
michael@0 1799 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
michael@0 1800 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
michael@0 1801 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
michael@0 1802 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
michael@0 1803
michael@0 1804 movdqu 112(%rdi), %xmm1
michael@0 1805 movdqu 128(%rdi), %xmm11
michael@0 1806 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
michael@0 1807 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
michael@0 1808 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
michael@0 1809 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
michael@0 1810 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
michael@0 1811 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
michael@0 1812 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
michael@0 1813 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
michael@0 1814 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
michael@0 1815 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
michael@0 1816 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
michael@0 1817 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
michael@0 1818 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
michael@0 1819 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
michael@0 1820 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
michael@0 1821 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
michael@0 1822
michael@0 1823 movdqu 144(%rdi), %xmm1
michael@0 1824 movdqu 160(%rdi), %xmm11
michael@0 1825 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
michael@0 1826 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
michael@0 1827 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
michael@0 1828 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
michael@0 1829 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
michael@0 1830 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
michael@0 1831 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
michael@0 1832 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
michael@0 1833 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
michael@0 1834 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
michael@0 1835 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
michael@0 1836 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
michael@0 1837 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
michael@0 1838 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
michael@0 1839 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
michael@0 1840 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
michael@0 1841
michael@0 1842 movdqu 176(%rdi), %xmm1
michael@0 1843 movdqu 192(%rdi), %xmm11
michael@0 1844 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
michael@0 1845 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
michael@0 1846 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
michael@0 1847 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
michael@0 1848 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
michael@0 1849 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
michael@0 1850 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
michael@0 1851 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
michael@0 1852 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
michael@0 1853 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
michael@0 1854 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
michael@0 1855 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
michael@0 1856 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
michael@0 1857 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
michael@0 1858 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
michael@0 1859 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
michael@0 1860
michael@0 1861 movdqu 208(%rdi), %xmm1
michael@0 1862 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
michael@0 1863 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
michael@0 1864 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
michael@0 1865 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
michael@0 1866 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
michael@0 1867 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
michael@0 1868 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
michael@0 1869 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
michael@0 1870 .byte 0x66,0x41,0x0f,0x38,0xdd,0xdf /* aesenclast %xmm15, %xmm3 */
michael@0 1871 .byte 0x66,0x41,0x0f,0x38,0xdd,0xe7 /* aesenclast %xmm15, %xmm4 */
michael@0 1872 .byte 0x66,0x41,0x0f,0x38,0xdd,0xef /* aesenclast %xmm15, %xmm5 */
michael@0 1873 .byte 0x66,0x41,0x0f,0x38,0xdd,0xf7 /* aesenclast %xmm15, %xmm6 */
michael@0 1874 .byte 0x66,0x41,0x0f,0x38,0xdd,0xff /* aesenclast %xmm15, %xmm7 */
michael@0 1875 .byte 0x66,0x45,0x0f,0x38,0xdd,0xc7 /* aesenclast %xmm15, %xmm8 */
michael@0 1876 .byte 0x66,0x45,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm9 */
michael@0 1877 .byte 0x66,0x45,0x0f,0x38,0xdd,0xd7 /* aesenclast %xmm15, %xmm10 */
michael@0 1878
michael@0 1879 movdqu %xmm3, (%rsi, %rax)
michael@0 1880 movdqu %xmm4, 16(%rsi, %rax)
michael@0 1881 movdqu %xmm5, 32(%rsi, %rax)
michael@0 1882 movdqu %xmm6, 48(%rsi, %rax)
michael@0 1883 movdqu %xmm7, 64(%rsi, %rax)
michael@0 1884 movdqu %xmm8, 80(%rsi, %rax)
michael@0 1885 movdqu %xmm9, 96(%rsi, %rax)
michael@0 1886 movdqu %xmm10, 112(%rsi, %rax)
michael@0 1887 // addq $8*16, %rax
michael@0 1888 addq $128, %rax
michael@0 1889 cmpq %r11, %rax
michael@0 1890 jbe 2b
michael@0 1891 1: cmpq %rax, %r9
michael@0 1892 je 5f
michael@0 1893
michael@0 1894 movdqu (%rdi), %xmm8
michael@0 1895 movdqu 16(%rdi), %xmm2
michael@0 1896 movdqu 32(%rdi), %xmm3
michael@0 1897 movdqu 48(%rdi), %xmm4
michael@0 1898 movdqu 64(%rdi), %xmm5
michael@0 1899 movdqu 80(%rdi), %xmm6
michael@0 1900 movdqu 96(%rdi), %xmm7
michael@0 1901 movdqu 128(%rdi), %xmm9
michael@0 1902 movdqu 144(%rdi), %xmm10
michael@0 1903 movdqu 160(%rdi), %xmm11
michael@0 1904 movdqu 176(%rdi), %xmm12
michael@0 1905 movdqu 192(%rdi), %xmm13
michael@0 1906 movdqu 208(%rdi), %xmm14
michael@0 1907
michael@0 1908 4: movdqu (%r8, %rax), %xmm1
michael@0 1909 pxor %xmm8, %xmm1
michael@0 1910 movdqu 112(%rdi), %xmm8
michael@0 1911 .byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */
michael@0 1912 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
michael@0 1913 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
michael@0 1914 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
michael@0 1915 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
michael@0 1916 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
michael@0 1917 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
michael@0 1918 movdqu (%rdi), %xmm8
michael@0 1919 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
michael@0 1920 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
michael@0 1921 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
michael@0 1922 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
michael@0 1923 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
michael@0 1924 .byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */
michael@0 1925 .byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */
michael@0 1926 movdqu %xmm1, (%rsi, %rax)
michael@0 1927 addq $16, %rax
michael@0 1928 cmpq %rax, %r9
michael@0 1929 jne 4b
michael@0 1930
michael@0 1931 5: xor %eax, %eax
michael@0 1932 ret
michael@0 1933 .size intel_aes_encrypt_ecb_256, .-intel_aes_encrypt_ecb_256
michael@0 1934
michael@0 1935
michael@0 1936 /* in %rdi : cx - context
michael@0 1937 in %rsi : output - pointer to output buffer
michael@0 1938 in %rdx : outputLen - pointer to variable for length of output
michael@0 1939 (filled by caller)
michael@0 1940 in %rcx : maxOutputLen - length of output buffer
michael@0 1941 in %r8 : input - pointer to input buffer
michael@0 1942 in %r9 : inputLen - length of input buffer
michael@0 1943 on stack: blocksize - AES blocksize (always 16, unused)
michael@0 1944 */
michael@0 1945 .type intel_aes_decrypt_ecb_256,@function
michael@0 1946 .globl intel_aes_decrypt_ecb_256
michael@0 1947 .align 16
michael@0 1948 intel_aes_decrypt_ecb_256:
michael@0 1949 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
michael@0 1950 leaq 48(%rdi), %rdi
michael@0 1951
michael@0 1952 movdqu (%rdi), %xmm2
michael@0 1953 movdqu 224(%rdi), %xmm15
michael@0 1954 xorl %eax, %eax
michael@0 1955 // cmpq $8*16, %r9
michael@0 1956 cmpq $128, %r9
michael@0 1957 jb 1f
michael@0 1958 // leaq -8*16(%r9), %r11
michael@0 1959 leaq -128(%r9), %r11
michael@0 1960 2: movdqu (%r8, %rax), %xmm3
michael@0 1961 movdqu 16(%r8, %rax), %xmm4
michael@0 1962 movdqu 32(%r8, %rax), %xmm5
michael@0 1963 movdqu 48(%r8, %rax), %xmm6
michael@0 1964 movdqu 64(%r8, %rax), %xmm7
michael@0 1965 movdqu 80(%r8, %rax), %xmm8
michael@0 1966 movdqu 96(%r8, %rax), %xmm9
michael@0 1967 movdqu 112(%r8, %rax), %xmm10
michael@0 1968 pxor %xmm15, %xmm3
michael@0 1969 pxor %xmm15, %xmm4
michael@0 1970 pxor %xmm15, %xmm5
michael@0 1971 pxor %xmm15, %xmm6
michael@0 1972 pxor %xmm15, %xmm7
michael@0 1973 pxor %xmm15, %xmm8
michael@0 1974 pxor %xmm15, %xmm9
michael@0 1975 pxor %xmm15, %xmm10
michael@0 1976
michael@0 1977 // complete loop unrolling
michael@0 1978 movdqu 208(%rdi), %xmm1
michael@0 1979 movdqu 192(%rdi), %xmm11
michael@0 1980 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 1981 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 1982 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 1983 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 1984 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 1985 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 1986 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 1987 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 1988 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 1989 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 1990 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 1991 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 1992 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 1993 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 1994 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 1995 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 1996
michael@0 1997 movdqu 176(%rdi), %xmm1
michael@0 1998 movdqu 160(%rdi), %xmm11
michael@0 1999 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 2000 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 2001 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 2002 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 2003 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 2004 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 2005 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 2006 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 2007 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 2008 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 2009 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 2010 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 2011 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 2012 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 2013 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 2014 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 2015
michael@0 2016 movdqu 144(%rdi), %xmm1
michael@0 2017 movdqu 128(%rdi), %xmm11
michael@0 2018 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 2019 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 2020 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 2021 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 2022 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 2023 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 2024 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 2025 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 2026 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 2027 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 2028 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 2029 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 2030 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 2031 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 2032 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 2033 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 2034
michael@0 2035 movdqu 112(%rdi), %xmm1
michael@0 2036 movdqu 96(%rdi), %xmm11
michael@0 2037 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 2038 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 2039 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 2040 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 2041 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 2042 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 2043 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 2044 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 2045 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 2046 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 2047 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 2048 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 2049 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 2050 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 2051 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 2052 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 2053
michael@0 2054 movdqu 80(%rdi), %xmm1
michael@0 2055 movdqu 64(%rdi), %xmm11
michael@0 2056 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 2057 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 2058 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 2059 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 2060 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 2061 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 2062 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 2063 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 2064 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 2065 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 2066 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 2067 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 2068 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 2069 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 2070 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 2071 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 2072
michael@0 2073 movdqu 48(%rdi), %xmm1
michael@0 2074 movdqu 32(%rdi), %xmm11
michael@0 2075 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 2076 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 2077 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 2078 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 2079 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 2080 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 2081 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 2082 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 2083 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 2084 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 2085 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 2086 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 2087 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 2088 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 2089 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 2090 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 2091
michael@0 2092 movdqu 16(%rdi), %xmm1
michael@0 2093 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 2094 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 2095 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 2096 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 2097 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 2098 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 2099 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 2100 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 2101 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
michael@0 2102 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
michael@0 2103 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
michael@0 2104 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
michael@0 2105 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
michael@0 2106 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
michael@0 2107 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
michael@0 2108 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
michael@0 2109
michael@0 2110 movdqu %xmm3, (%rsi, %rax)
michael@0 2111 movdqu %xmm4, 16(%rsi, %rax)
michael@0 2112 movdqu %xmm5, 32(%rsi, %rax)
michael@0 2113 movdqu %xmm6, 48(%rsi, %rax)
michael@0 2114 movdqu %xmm7, 64(%rsi, %rax)
michael@0 2115 movdqu %xmm8, 80(%rsi, %rax)
michael@0 2116 movdqu %xmm9, 96(%rsi, %rax)
michael@0 2117 movdqu %xmm10, 112(%rsi, %rax)
michael@0 2118 // addq $8*16, %rax
michael@0 2119 addq $128, %rax
michael@0 2120 cmpq %r11, %rax
michael@0 2121 jbe 2b
michael@0 2122 1: cmpq %rax, %r9
michael@0 2123 je 5f
michael@0 2124
michael@0 2125 movdqu 16(%rdi), %xmm2
michael@0 2126 movdqu 32(%rdi), %xmm3
michael@0 2127 movdqu 48(%rdi), %xmm4
michael@0 2128 movdqu 64(%rdi), %xmm5
michael@0 2129 movdqu 80(%rdi), %xmm6
michael@0 2130 movdqu 96(%rdi), %xmm7
michael@0 2131 movdqu 112(%rdi), %xmm8
michael@0 2132 movdqu 128(%rdi), %xmm9
michael@0 2133 movdqu 144(%rdi), %xmm10
michael@0 2134 movdqu 160(%rdi), %xmm11
michael@0 2135 movdqu 176(%rdi), %xmm12
michael@0 2136 movdqu 192(%rdi), %xmm13
michael@0 2137 movdqu 208(%rdi), %xmm14
michael@0 2138
michael@0 2139 4: movdqu (%r8, %rax), %xmm1
michael@0 2140 pxor %xmm15, %xmm1
michael@0 2141 .byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */
michael@0 2142 .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
michael@0 2143 .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
michael@0 2144 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
michael@0 2145 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
michael@0 2146 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
michael@0 2147 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
michael@0 2148 movdqu (%rdi), %xmm8
michael@0 2149 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
michael@0 2150 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
michael@0 2151 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
michael@0 2152 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
michael@0 2153 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
michael@0 2154 .byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */
michael@0 2155 .byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */
michael@0 2156 movdqu 112(%rdi), %xmm8
michael@0 2157 movdqu %xmm1, (%rsi, %rax)
michael@0 2158 addq $16, %rax
michael@0 2159 cmpq %rax, %r9
michael@0 2160 jne 4b
michael@0 2161
michael@0 2162 5: xor %eax, %eax
michael@0 2163 ret
michael@0 2164 .size intel_aes_decrypt_ecb_256, .-intel_aes_decrypt_ecb_256
michael@0 2165
michael@0 2166
michael@0 2167 /* in %rdi : cx - context
michael@0 2168 in %rsi : output - pointer to output buffer
michael@0 2169 in %rdx : outputLen - pointer to variable for length of output
michael@0 2170 (filled by caller)
michael@0 2171 in %rcx : maxOutputLen - length of output buffer
michael@0 2172 in %r8 : input - pointer to input buffer
michael@0 2173 in %r9 : inputLen - length of input buffer
michael@0 2174 on stack: blocksize - AES blocksize (always 16, unused)
michael@0 2175 */
michael@0 2176 .type intel_aes_encrypt_cbc_256,@function
michael@0 2177 .globl intel_aes_encrypt_cbc_256
michael@0 2178 .align 16
michael@0 2179 intel_aes_encrypt_cbc_256:
michael@0 2180 testq %r9, %r9
michael@0 2181 je 2f
michael@0 2182
michael@0 2183 // leaq IV_OFFSET(%rdi), %rdx
michael@0 2184 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
michael@0 2185 leaq 16(%rdi), %rdx
michael@0 2186 leaq 48(%rdi), %rdi
michael@0 2187
michael@0 2188 movdqu (%rdx), %xmm0
michael@0 2189 movdqu (%rdi), %xmm8
michael@0 2190 movdqu 16(%rdi), %xmm2
michael@0 2191 movdqu 32(%rdi), %xmm3
michael@0 2192 movdqu 48(%rdi), %xmm4
michael@0 2193 movdqu 64(%rdi), %xmm5
michael@0 2194 movdqu 80(%rdi), %xmm6
michael@0 2195 movdqu 96(%rdi), %xmm7
michael@0 2196 movdqu 128(%rdi), %xmm9
michael@0 2197 movdqu 144(%rdi), %xmm10
michael@0 2198 movdqu 160(%rdi), %xmm11
michael@0 2199 movdqu 176(%rdi), %xmm12
michael@0 2200 movdqu 192(%rdi), %xmm13
michael@0 2201 movdqu 208(%rdi), %xmm14
michael@0 2202 movdqu 224(%rdi), %xmm15
michael@0 2203
michael@0 2204 xorl %eax, %eax
michael@0 2205 1: movdqu (%r8, %rax), %xmm1
michael@0 2206 pxor %xmm0, %xmm1
michael@0 2207 pxor %xmm8, %xmm1
michael@0 2208 movdqu 112(%rdi), %xmm8
michael@0 2209 .byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */
michael@0 2210 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
michael@0 2211 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
michael@0 2212 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
michael@0 2213 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
michael@0 2214 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
michael@0 2215 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
michael@0 2216 movdqu (%rdi), %xmm8
michael@0 2217 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
michael@0 2218 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
michael@0 2219 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
michael@0 2220 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
michael@0 2221 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
michael@0 2222 .byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */
michael@0 2223 .byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */
michael@0 2224 movdqu %xmm1, (%rsi, %rax)
michael@0 2225 movdqa %xmm1, %xmm0
michael@0 2226 addq $16, %rax
michael@0 2227 cmpq %rax, %r9
michael@0 2228 jne 1b
michael@0 2229
michael@0 2230 movdqu %xmm0, (%rdx)
michael@0 2231
michael@0 2232 2: xor %eax, %eax
michael@0 2233 ret
michael@0 2234 .size intel_aes_encrypt_cbc_256, .-intel_aes_encrypt_cbc_256
michael@0 2235
michael@0 2236
michael@0 2237 /* in %rdi : cx - context
michael@0 2238 in %rsi : output - pointer to output buffer
michael@0 2239 in %rdx : outputLen - pointer to variable for length of output
michael@0 2240 (filled by caller)
michael@0 2241 in %rcx : maxOutputLen - length of output buffer
michael@0 2242 in %r8 : input - pointer to input buffer
michael@0 2243 in %r9 : inputLen - length of input buffer
michael@0 2244 on stack: blocksize - AES blocksize (always 16, unused)
michael@0 2245 */
michael@0 2246 .type intel_aes_decrypt_cbc_256,@function
michael@0 2247 .globl intel_aes_decrypt_cbc_256
michael@0 2248 .align 16
michael@0 2249 intel_aes_decrypt_cbc_256:
michael@0 2250 // leaq IV_OFFSET(%rdi), %rdx
michael@0 2251 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
michael@0 2252 leaq 16(%rdi), %rdx
michael@0 2253 leaq 48(%rdi), %rdi
michael@0 2254
michael@0 2255 movdqu (%rdx), %xmm0
michael@0 2256 movdqu (%rdi), %xmm2
michael@0 2257 movdqu 224(%rdi), %xmm15
michael@0 2258 xorl %eax, %eax
michael@0 2259 // cmpq $8*16, %r9
michael@0 2260 cmpq $128, %r9
michael@0 2261 jb 1f
michael@0 2262 // leaq -8*16(%r9), %r11
michael@0 2263 leaq -128(%r9), %r11
michael@0 2264 2: movdqu (%r8, %rax), %xmm3
michael@0 2265 movdqu 16(%r8, %rax), %xmm4
michael@0 2266 movdqu 32(%r8, %rax), %xmm5
michael@0 2267 movdqu 48(%r8, %rax), %xmm6
michael@0 2268 movdqu 64(%r8, %rax), %xmm7
michael@0 2269 movdqu 80(%r8, %rax), %xmm8
michael@0 2270 movdqu 96(%r8, %rax), %xmm9
michael@0 2271 movdqu 112(%r8, %rax), %xmm10
michael@0 2272 pxor %xmm15, %xmm3
michael@0 2273 pxor %xmm15, %xmm4
michael@0 2274 pxor %xmm15, %xmm5
michael@0 2275 pxor %xmm15, %xmm6
michael@0 2276 pxor %xmm15, %xmm7
michael@0 2277 pxor %xmm15, %xmm8
michael@0 2278 pxor %xmm15, %xmm9
michael@0 2279 pxor %xmm15, %xmm10
michael@0 2280
michael@0 2281 // complete loop unrolling
michael@0 2282 movdqu 208(%rdi), %xmm1
michael@0 2283 movdqu 192(%rdi), %xmm11
michael@0 2284 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 2285 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 2286 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 2287 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 2288 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 2289 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 2290 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 2291 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 2292 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 2293 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 2294 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 2295 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 2296 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 2297 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 2298 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 2299 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 2300
michael@0 2301 movdqu 176(%rdi), %xmm1
michael@0 2302 movdqu 160(%rdi), %xmm11
michael@0 2303 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 2304 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 2305 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 2306 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 2307 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 2308 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 2309 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 2310 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 2311 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 2312 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 2313 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 2314 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 2315 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 2316 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 2317 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 2318 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 2319
michael@0 2320 movdqu 144(%rdi), %xmm1
michael@0 2321 movdqu 128(%rdi), %xmm11
michael@0 2322 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 2323 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 2324 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 2325 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 2326 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 2327 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 2328 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 2329 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 2330 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 2331 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 2332 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 2333 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 2334 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 2335 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 2336 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 2337 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 2338
michael@0 2339 movdqu 112(%rdi), %xmm1
michael@0 2340 movdqu 96(%rdi), %xmm11
michael@0 2341 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 2342 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 2343 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 2344 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 2345 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 2346 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 2347 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 2348 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 2349 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 2350 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 2351 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 2352 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 2353 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 2354 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 2355 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 2356 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 2357
michael@0 2358 movdqu 80(%rdi), %xmm1
michael@0 2359 movdqu 64(%rdi), %xmm11
michael@0 2360 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 2361 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 2362 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 2363 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 2364 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 2365 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 2366 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 2367 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 2368 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 2369 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 2370 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 2371 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 2372 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 2373 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 2374 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 2375 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 2376
michael@0 2377 movdqu 48(%rdi), %xmm1
michael@0 2378 movdqu 32(%rdi), %xmm11
michael@0 2379 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 2380 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 2381 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 2382 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 2383 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 2384 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 2385 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 2386 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 2387 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
michael@0 2388 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
michael@0 2389 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
michael@0 2390 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
michael@0 2391 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
michael@0 2392 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
michael@0 2393 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
michael@0 2394 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
michael@0 2395
michael@0 2396 movdqu 16(%rdi), %xmm1
michael@0 2397 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
michael@0 2398 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
michael@0 2399 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
michael@0 2400 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
michael@0 2401 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
michael@0 2402 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
michael@0 2403 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
michael@0 2404 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
michael@0 2405 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
michael@0 2406 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
michael@0 2407 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
michael@0 2408 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
michael@0 2409 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
michael@0 2410 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
michael@0 2411 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
michael@0 2412 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
michael@0 2413
michael@0 2414 pxor %xmm0, %xmm3
michael@0 2415 movdqu (%r8, %rax), %xmm0
michael@0 2416 pxor %xmm0, %xmm4
michael@0 2417 movdqu 16(%r8, %rax), %xmm0
michael@0 2418 pxor %xmm0, %xmm5
michael@0 2419 movdqu 32(%r8, %rax), %xmm0
michael@0 2420 pxor %xmm0, %xmm6
michael@0 2421 movdqu 48(%r8, %rax), %xmm0
michael@0 2422 pxor %xmm0, %xmm7
michael@0 2423 movdqu 64(%r8, %rax), %xmm0
michael@0 2424 pxor %xmm0, %xmm8
michael@0 2425 movdqu 80(%r8, %rax), %xmm0
michael@0 2426 pxor %xmm0, %xmm9
michael@0 2427 movdqu 96(%r8, %rax), %xmm0
michael@0 2428 pxor %xmm0, %xmm10
michael@0 2429 movdqu 112(%r8, %rax), %xmm0
michael@0 2430 movdqu %xmm3, (%rsi, %rax)
michael@0 2431 movdqu %xmm4, 16(%rsi, %rax)
michael@0 2432 movdqu %xmm5, 32(%rsi, %rax)
michael@0 2433 movdqu %xmm6, 48(%rsi, %rax)
michael@0 2434 movdqu %xmm7, 64(%rsi, %rax)
michael@0 2435 movdqu %xmm8, 80(%rsi, %rax)
michael@0 2436 movdqu %xmm9, 96(%rsi, %rax)
michael@0 2437 movdqu %xmm10, 112(%rsi, %rax)
michael@0 2438 // addq $8*16, %rax
michael@0 2439 addq $128, %rax
michael@0 2440 cmpq %r11, %rax
michael@0 2441 jbe 2b
michael@0 2442 1: cmpq %rax, %r9
michael@0 2443 je 5f
michael@0 2444
michael@0 2445 movdqu 16(%rdi), %xmm2
michael@0 2446 movdqu 32(%rdi), %xmm3
michael@0 2447 movdqu 48(%rdi), %xmm4
michael@0 2448 movdqu 64(%rdi), %xmm5
michael@0 2449 movdqu 80(%rdi), %xmm6
michael@0 2450 movdqu 96(%rdi), %xmm7
michael@0 2451 movdqu 112(%rdi), %xmm8
michael@0 2452 movdqu 128(%rdi), %xmm9
michael@0 2453 movdqu 144(%rdi), %xmm10
michael@0 2454 movdqu 160(%rdi), %xmm11
michael@0 2455 movdqu 176(%rdi), %xmm12
michael@0 2456 movdqu 192(%rdi), %xmm13
michael@0 2457 movdqu 208(%rdi), %xmm14
michael@0 2458
michael@0 2459 4: movdqu (%r8, %rax), %xmm1
michael@0 2460 pxor %xmm15, %xmm1
michael@0 2461 .byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */
michael@0 2462 .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
michael@0 2463 .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
michael@0 2464 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
michael@0 2465 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
michael@0 2466 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
michael@0 2467 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
michael@0 2468 movdqu (%rdi), %xmm8
michael@0 2469 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
michael@0 2470 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
michael@0 2471 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
michael@0 2472 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
michael@0 2473 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
michael@0 2474 .byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */
michael@0 2475 .byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */
michael@0 2476 movdqu 112(%rdi), %xmm8
michael@0 2477 pxor %xmm0, %xmm1
michael@0 2478 movdqu (%r8, %rax), %xmm0 /* fetch the IV before we store the block */
michael@0 2479 movdqu %xmm1, (%rsi, %rax) /* in case input buf = output buf */
michael@0 2480 addq $16, %rax
michael@0 2481 cmpq %rax, %r9
michael@0 2482 jne 4b
michael@0 2483
michael@0 2484 5: movdqu %xmm0, (%rdx)
michael@0 2485
michael@0 2486 xor %eax, %eax
michael@0 2487 ret
michael@0 2488 .size intel_aes_decrypt_cbc_256, .-intel_aes_decrypt_cbc_256

mercurial