Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 .text
7 #define IV_OFFSET 16
8 #define EXPANDED_KEY_OFFSET 48
11 /* in %rdi : the key
12 in %rsi : buffer for expanded key
13 */
14 .type intel_aes_encrypt_init_128,@function
15 .globl intel_aes_encrypt_init_128
16 .align 16
17 intel_aes_encrypt_init_128:
18 movups (%rdi), %xmm1
19 movups %xmm1, (%rsi)
20 leaq 16(%rsi), %rsi
21 xorl %eax, %eax
23 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */
24 call key_expansion128
25 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */
26 call key_expansion128
27 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */
28 call key_expansion128
29 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */
30 call key_expansion128
31 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */
32 call key_expansion128
33 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */
34 call key_expansion128
35 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */
36 call key_expansion128
37 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */
38 call key_expansion128
39 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */
40 call key_expansion128
41 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */
42 call key_expansion128
44 ret
45 .size intel_aes_encrypt_init_128, .-intel_aes_encrypt_init_128
48 /* in %rdi : the key
49 in %rsi : buffer for expanded key
50 */
51 .type intel_aes_decrypt_init_128,@function
52 .globl intel_aes_decrypt_init_128
53 .align 16
54 intel_aes_decrypt_init_128:
55 movups (%rdi), %xmm1
56 movups %xmm1, (%rsi)
57 leaq 16(%rsi), %rsi
58 xorl %eax, %eax
60 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */
61 call key_expansion128
62 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
63 movups %xmm2, -16(%rsi)
64 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */
65 call key_expansion128
66 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
67 movups %xmm2, -16(%rsi)
68 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */
69 call key_expansion128
70 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
71 movups %xmm2, -16(%rsi)
72 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */
73 call key_expansion128
74 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
75 movups %xmm2, -16(%rsi)
76 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */
77 call key_expansion128
78 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
79 movups %xmm2, -16(%rsi)
80 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */
81 call key_expansion128
82 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
83 movups %xmm2, -16(%rsi)
84 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */
85 call key_expansion128
86 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
87 movups %xmm2, -16(%rsi)
88 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */
89 call key_expansion128
90 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
91 movups %xmm2, -16(%rsi)
92 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */
93 call key_expansion128
94 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
95 movups %xmm2, -16(%rsi)
96 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */
97 call key_expansion128
99 ret
100 .size intel_aes_decrypt_init_128, .-intel_aes_decrypt_init_128
103 .type key_expansion128,@function
104 .align 16
105 key_expansion128:
106 movd %eax, %xmm3
107 pshufd $0xff, %xmm2, %xmm2
108 shufps $0x10, %xmm1, %xmm3
109 pxor %xmm3, %xmm1
110 shufps $0x8c, %xmm1, %xmm3
111 pxor %xmm2, %xmm1
112 pxor %xmm3, %xmm1
113 movdqu %xmm1, (%rsi)
114 addq $16, %rsi
115 ret
116 .size key_expansion128, .-key_expansion128
119 /* in %rdi : cx - context
120 in %rsi : output - pointer to output buffer
121 in %rdx : outputLen - pointer to variable for length of output
122 (filled by caller)
123 in %rcx : maxOutputLen - length of output buffer
124 in %r8 : input - pointer to input buffer
125 in %r9 : inputLen - length of input buffer
126 on stack: blocksize - AES blocksize (always 16, unused)
127 */
128 .type intel_aes_encrypt_ecb_128,@function
129 .globl intel_aes_encrypt_ecb_128
130 .align 16
131 intel_aes_encrypt_ecb_128:
132 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
133 leaq 48(%rdi), %rdi
135 movdqu (%rdi), %xmm2
136 movdqu 160(%rdi), %xmm12
137 xor %eax, %eax
138 // cmpq $8*16, %r9
139 cmpq $128, %r9
140 jb 1f
141 // leaq -8*16(%r9), %r11
142 leaq -128(%r9), %r11
143 2: movdqu (%r8, %rax), %xmm3
144 movdqu 16(%r8, %rax), %xmm4
145 movdqu 32(%r8, %rax), %xmm5
146 movdqu 48(%r8, %rax), %xmm6
147 movdqu 64(%r8, %rax), %xmm7
148 movdqu 80(%r8, %rax), %xmm8
149 movdqu 96(%r8, %rax), %xmm9
150 movdqu 112(%r8, %rax), %xmm10
151 pxor %xmm2, %xmm3
152 pxor %xmm2, %xmm4
153 pxor %xmm2, %xmm5
154 pxor %xmm2, %xmm6
155 pxor %xmm2, %xmm7
156 pxor %xmm2, %xmm8
157 pxor %xmm2, %xmm9
158 pxor %xmm2, %xmm10
160 // complete loop unrolling
161 movdqu 16(%rdi), %xmm1
162 movdqu 32(%rdi), %xmm11
163 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
164 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
165 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
166 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
167 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
168 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
169 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
170 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
171 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
172 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
173 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
174 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
175 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
176 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
177 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
178 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
180 movdqu 48(%rdi), %xmm1
181 movdqu 64(%rdi), %xmm11
182 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
183 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
184 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
185 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
186 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
187 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
188 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
189 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
190 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
191 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
192 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
193 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
194 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
195 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
196 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
197 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
199 movdqu 80(%rdi), %xmm1
200 movdqu 96(%rdi), %xmm11
201 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
202 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
203 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
204 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
205 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
206 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
207 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
208 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
209 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
210 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
211 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
212 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
213 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
214 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
215 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
216 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
218 movdqu 112(%rdi), %xmm1
219 movdqu 128(%rdi), %xmm11
220 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
221 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
222 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
223 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
224 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
225 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
226 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
227 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
228 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
229 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
230 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
231 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
232 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
233 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
234 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
235 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
237 movdqu 144(%rdi), %xmm1
238 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
239 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
240 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
241 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
242 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
243 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
244 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
245 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
246 .byte 0x66,0x41,0x0f,0x38,0xdd,0xdc /* aesenclast %xmm12, %xmm3 */
247 .byte 0x66,0x41,0x0f,0x38,0xdd,0xe4 /* aesenclast %xmm12, %xmm4 */
248 .byte 0x66,0x41,0x0f,0x38,0xdd,0xec /* aesenclast %xmm12, %xmm5 */
249 .byte 0x66,0x41,0x0f,0x38,0xdd,0xf4 /* aesenclast %xmm12, %xmm6 */
250 .byte 0x66,0x41,0x0f,0x38,0xdd,0xfc /* aesenclast %xmm12, %xmm7 */
251 .byte 0x66,0x45,0x0f,0x38,0xdd,0xc4 /* aesenclast %xmm12, %xmm8 */
252 .byte 0x66,0x45,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm9 */
253 .byte 0x66,0x45,0x0f,0x38,0xdd,0xd4 /* aesenclast %xmm12, %xmm10 */
255 movdqu %xmm3, (%rsi, %rax)
256 movdqu %xmm4, 16(%rsi, %rax)
257 movdqu %xmm5, 32(%rsi, %rax)
258 movdqu %xmm6, 48(%rsi, %rax)
259 movdqu %xmm7, 64(%rsi, %rax)
260 movdqu %xmm8, 80(%rsi, %rax)
261 movdqu %xmm9, 96(%rsi, %rax)
262 movdqu %xmm10, 112(%rsi, %rax)
263 // addq $8*16, %rax
264 addq $128, %rax
265 cmpq %r11, %rax
266 jbe 2b
267 1: cmpq %rax, %r9
268 je 5f
270 movdqu 16(%rdi), %xmm3
271 movdqu 32(%rdi), %xmm4
272 movdqu 48(%rdi), %xmm5
273 movdqu 64(%rdi), %xmm6
274 movdqu 80(%rdi), %xmm7
275 movdqu 96(%rdi), %xmm8
276 movdqu 112(%rdi), %xmm9
277 movdqu 128(%rdi), %xmm10
278 movdqu 144(%rdi), %xmm11
280 4: movdqu (%r8, %rax), %xmm1
281 pxor %xmm2, %xmm1
282 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
283 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
284 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
285 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
286 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
287 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
288 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
289 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
290 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
291 .byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */
292 movdqu %xmm1, (%rsi, %rax)
293 addq $16, %rax
294 cmpq %rax, %r9
295 jne 4b
297 5: xor %eax, %eax
298 ret
299 .size intel_aes_encrypt_ecb_128, .-intel_aes_encrypt_ecb_128
302 /* in %rdi : cx - context
303 in %rsi : output - pointer to output buffer
304 in %rdx : outputLen - pointer to variable for length of output
305 (filled by caller)
306 in %rcx : maxOutputLen - length of output buffer
307 in %r8 : input - pointer to input buffer
308 in %r9 : inputLen - length of input buffer
309 on stack: blocksize - AES blocksize (always 16, unused)
310 */
311 .type intel_aes_decrypt_ecb_128,@function
312 .globl intel_aes_decrypt_ecb_128
313 .align 16
314 intel_aes_decrypt_ecb_128:
315 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
316 leaq 48(%rdi), %rdi
318 movdqu (%rdi), %xmm2
319 movdqu 160(%rdi), %xmm12
320 xorl %eax, %eax
321 // cmpq $8*16, %r9
322 cmpq $128, %r9
323 jb 1f
324 // leaq -8*16(%r9), %r11
325 leaq -128(%r9), %r11
326 2: movdqu (%r8, %rax), %xmm3
327 movdqu 16(%r8, %rax), %xmm4
328 movdqu 32(%r8, %rax), %xmm5
329 movdqu 48(%r8, %rax), %xmm6
330 movdqu 64(%r8, %rax), %xmm7
331 movdqu 80(%r8, %rax), %xmm8
332 movdqu 96(%r8, %rax), %xmm9
333 movdqu 112(%r8, %rax), %xmm10
334 pxor %xmm12, %xmm3
335 pxor %xmm12, %xmm4
336 pxor %xmm12, %xmm5
337 pxor %xmm12, %xmm6
338 pxor %xmm12, %xmm7
339 pxor %xmm12, %xmm8
340 pxor %xmm12, %xmm9
341 pxor %xmm12, %xmm10
343 // complete loop unrolling
344 movdqu 144(%rdi), %xmm1
345 movdqu 128(%rdi), %xmm11
346 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
347 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
348 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
349 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
350 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
351 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
352 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
353 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
354 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
355 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
356 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
357 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
358 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
359 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
360 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
361 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
363 movdqu 112(%rdi), %xmm1
364 movdqu 96(%rdi), %xmm11
365 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
366 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
367 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
368 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
369 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
370 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
371 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
372 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
373 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
374 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
375 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
376 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
377 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
378 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
379 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
380 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
382 movdqu 80(%rdi), %xmm1
383 movdqu 64(%rdi), %xmm11
384 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
385 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
386 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
387 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
388 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
389 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
390 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
391 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
392 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
393 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
394 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
395 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
396 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
397 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
398 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
399 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
401 movdqu 48(%rdi), %xmm1
402 movdqu 32(%rdi), %xmm11
403 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
404 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
405 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
406 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
407 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
408 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
409 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
410 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
411 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
412 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
413 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
414 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
415 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
416 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
417 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
418 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
420 movdqu 16(%rdi), %xmm1
421 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
422 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
423 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
424 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
425 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
426 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
427 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
428 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
429 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
430 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
431 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
432 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
433 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
434 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
435 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
436 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
438 movdqu %xmm3, (%rsi, %rax)
439 movdqu %xmm4, 16(%rsi, %rax)
440 movdqu %xmm5, 32(%rsi, %rax)
441 movdqu %xmm6, 48(%rsi, %rax)
442 movdqu %xmm7, 64(%rsi, %rax)
443 movdqu %xmm8, 80(%rsi, %rax)
444 movdqu %xmm9, 96(%rsi, %rax)
445 movdqu %xmm10, 112(%rsi, %rax)
446 // addq $8*16, %rax
447 addq $128, %rax
448 cmpq %r11, %rax
449 jbe 2b
450 1: cmpq %rax, %r9
451 je 5f
453 movdqu 16(%rdi), %xmm3
454 movdqu 32(%rdi), %xmm4
455 movdqu 48(%rdi), %xmm5
456 movdqu 64(%rdi), %xmm6
457 movdqu 80(%rdi), %xmm7
458 movdqu 96(%rdi), %xmm8
459 movdqu 112(%rdi), %xmm9
460 movdqu 128(%rdi), %xmm10
461 movdqu 144(%rdi), %xmm11
463 4: movdqu (%r8, %rax), %xmm1
464 pxor %xmm12, %xmm1
465 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
466 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
467 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
468 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
469 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
470 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm7, %xmm1 */
471 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm7, %xmm1 */
472 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm7, %xmm1 */
473 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm7, %xmm1 */
474 .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
475 movdqu %xmm1, (%rsi, %rax)
476 addq $16, %rax
477 cmpq %rax, %r9
478 jne 4b
480 5: xor %eax, %eax
481 ret
482 .size intel_aes_decrypt_ecb_128, .-intel_aes_decrypt_ecb_128
485 /* in %rdi : cx - context
486 in %rsi : output - pointer to output buffer
487 in %rdx : outputLen - pointer to variable for length of output
488 (filled by caller)
489 in %rcx : maxOutputLen - length of output buffer
490 in %r8 : input - pointer to input buffer
491 in %r9 : inputLen - length of input buffer
492 on stack: blocksize - AES blocksize (always 16, unused)
493 */
494 .type intel_aes_encrypt_cbc_128,@function
495 .globl intel_aes_encrypt_cbc_128
496 .align 16
497 intel_aes_encrypt_cbc_128:
498 testq %r9, %r9
499 je 2f
501 // leaq IV_OFFSET(%rdi), %rdx
502 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
503 leaq 16(%rdi), %rdx
504 leaq 48(%rdi), %rdi
506 movdqu (%rdx), %xmm0
507 movdqu (%rdi), %xmm2
508 movdqu 16(%rdi), %xmm3
509 movdqu 32(%rdi), %xmm4
510 movdqu 48(%rdi), %xmm5
511 movdqu 64(%rdi), %xmm6
512 movdqu 80(%rdi), %xmm7
513 movdqu 96(%rdi), %xmm8
514 movdqu 112(%rdi), %xmm9
515 movdqu 128(%rdi), %xmm10
516 movdqu 144(%rdi), %xmm11
517 movdqu 160(%rdi), %xmm12
519 xorl %eax, %eax
520 1: movdqu (%r8, %rax), %xmm1
521 pxor %xmm0, %xmm1
522 pxor %xmm2, %xmm1
523 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
524 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
525 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
526 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
527 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
528 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
529 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
530 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmma, %xmm1 */
531 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmmb, %xmm1 */
532 .byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */
533 movdqu %xmm1, (%rsi, %rax)
534 movdqa %xmm1, %xmm0
535 addq $16, %rax
536 cmpq %rax, %r9
537 jne 1b
539 movdqu %xmm0, (%rdx)
541 2: xor %eax, %eax
542 ret
543 .size intel_aes_encrypt_cbc_128, .-intel_aes_encrypt_cbc_128
546 /* in %rdi : cx - context
547 in %rsi : output - pointer to output buffer
548 in %rdx : outputLen - pointer to variable for length of output
549 (filled by caller)
550 in %rcx : maxOutputLen - length of output buffer
551 in %r8 : input - pointer to input buffer
552 in %r9 : inputLen - length of input buffer
553 on stack: blocksize - AES blocksize (always 16, unused)
554 */
555 .type intel_aes_decrypt_cbc_128,@function
556 .globl intel_aes_decrypt_cbc_128
557 .align 16
558 intel_aes_decrypt_cbc_128:
559 // leaq IV_OFFSET(%rdi), %rdx
560 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
561 leaq 16(%rdi), %rdx
562 leaq 48(%rdi), %rdi
564 movdqu (%rdx), %xmm0 /* iv */
565 movdqu (%rdi), %xmm2 /* first key block */
566 movdqu 160(%rdi), %xmm12 /* last key block */
567 xorl %eax, %eax
568 cmpq $128, %r9
569 jb 1f
570 leaq -128(%r9), %r11
571 2: movdqu (%r8, %rax), %xmm3 /* 1st data block */
572 movdqu 16(%r8, %rax), %xmm4 /* 2d data block */
573 movdqu 32(%r8, %rax), %xmm5
574 movdqu 48(%r8, %rax), %xmm6
575 movdqu 64(%r8, %rax), %xmm7
576 movdqu 80(%r8, %rax), %xmm8
577 movdqu 96(%r8, %rax), %xmm9
578 movdqu 112(%r8, %rax), %xmm10
579 pxor %xmm12, %xmm3
580 pxor %xmm12, %xmm4
581 pxor %xmm12, %xmm5
582 pxor %xmm12, %xmm6
583 pxor %xmm12, %xmm7
584 pxor %xmm12, %xmm8
585 pxor %xmm12, %xmm9
586 pxor %xmm12, %xmm10
588 // complete loop unrolling
589 movdqu 144(%rdi), %xmm1
590 movdqu 128(%rdi), %xmm11
591 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
592 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
593 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
594 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
595 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
596 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
597 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
598 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
599 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
600 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
601 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
602 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
603 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
604 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
605 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
606 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
608 movdqu 112(%rdi), %xmm1
609 movdqu 96(%rdi), %xmm11
610 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
611 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
612 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
613 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
614 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
615 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
616 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
617 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
618 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
619 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
620 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
621 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
622 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
623 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
624 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
625 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
627 movdqu 80(%rdi), %xmm1
628 movdqu 64(%rdi), %xmm11
629 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
630 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
631 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
632 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
633 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
634 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
635 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
636 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
637 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
638 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
639 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
640 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
641 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
642 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
643 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
644 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
646 movdqu 48(%rdi), %xmm1
647 movdqu 32(%rdi), %xmm11
648 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
649 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
650 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
651 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
652 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
653 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
654 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
655 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
656 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
657 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
658 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
659 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
660 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
661 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
662 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
663 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
665 movdqu 16(%rdi), %xmm1
666 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
667 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
668 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
669 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
670 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
671 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
672 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
673 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
674 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
675 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
676 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
677 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
678 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
679 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
680 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
681 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
683 pxor %xmm0, %xmm3
684 movdqu (%r8, %rax), %xmm0
685 pxor %xmm0, %xmm4
686 movdqu 16(%r8, %rax), %xmm0
687 pxor %xmm0, %xmm5
688 movdqu 32(%r8, %rax), %xmm0
689 pxor %xmm0, %xmm6
690 movdqu 48(%r8, %rax), %xmm0
691 pxor %xmm0, %xmm7
692 movdqu 64(%r8, %rax), %xmm0
693 pxor %xmm0, %xmm8
694 movdqu 80(%r8, %rax), %xmm0
695 pxor %xmm0, %xmm9
696 movdqu 96(%r8, %rax), %xmm0
697 pxor %xmm0, %xmm10
698 movdqu 112(%r8, %rax), %xmm0
699 movdqu %xmm3, (%rsi, %rax)
700 movdqu %xmm4, 16(%rsi, %rax)
701 movdqu %xmm5, 32(%rsi, %rax)
702 movdqu %xmm6, 48(%rsi, %rax)
703 movdqu %xmm7, 64(%rsi, %rax)
704 movdqu %xmm8, 80(%rsi, %rax)
705 movdqu %xmm9, 96(%rsi, %rax)
706 movdqu %xmm10, 112(%rsi, %rax)
707 addq $128, %rax
708 cmpq %r11, %rax
709 jbe 2b
710 1: cmpq %rax, %r9
711 je 5f
713 movdqu 16(%rdi), %xmm3
714 movdqu 32(%rdi), %xmm4
715 movdqu 48(%rdi), %xmm5
716 movdqu 64(%rdi), %xmm6
717 movdqu 80(%rdi), %xmm7
718 movdqu 96(%rdi), %xmm8
719 movdqu 112(%rdi), %xmm9
720 movdqu 128(%rdi), %xmm10
721 movdqu 144(%rdi), %xmm11
723 4: movdqu (%r8, %rax), %xmm1
724 movdqa %xmm1, %xmm13
725 pxor %xmm12, %xmm1
726 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
727 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
728 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
729 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
730 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
731 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
732 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
733 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
734 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
735 .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
736 pxor %xmm0, %xmm1
737 movdqu %xmm1, (%rsi, %rax)
738 movdqa %xmm13, %xmm0
739 addq $16, %rax
740 cmpq %rax, %r9
741 jne 4b
743 5: movdqu %xmm0, (%rdx)
745 xor %eax, %eax
746 ret
747 .size intel_aes_decrypt_cbc_128, .-intel_aes_decrypt_cbc_128
749 /* in %rdi : the key
750 in %rsi : buffer for expanded key
751 */
752 .type intel_aes_encrypt_init_192,@function
753 .globl intel_aes_encrypt_init_192
754 .align 16
755 intel_aes_encrypt_init_192:
756 movdqu (%rdi), %xmm1
757 movq 16(%rdi), %xmm3
758 movdqu %xmm1, (%rsi)
759 movq %xmm3, 16(%rsi)
760 leaq 24(%rsi), %rsi
762 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
763 call key_expansion192
764 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
765 call key_expansion192
766 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
767 call key_expansion192
768 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
769 call key_expansion192
770 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
771 call key_expansion192
772 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
773 call key_expansion192
774 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
775 call key_expansion192
776 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */
777 call key_expansion192
779 ret
780 .size intel_aes_encrypt_init_192, .-intel_aes_encrypt_init_192
783 /* in %rdi : the key
784 in %rsi : buffer for expanded key
785 */
786 .type intel_aes_decrypt_init_192,@function
787 .globl intel_aes_decrypt_init_192
788 .align 16
789 intel_aes_decrypt_init_192:
790 movdqu (%rdi), %xmm1
791 movq 16(%rdi), %xmm3
792 movdqu %xmm1, (%rsi)
793 movq %xmm3, 16(%rsi)
794 leaq 24(%rsi), %rsi
796 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
797 call key_expansion192
798 movups -32(%rsi), %xmm2
799 movups -16(%rsi), %xmm4
800 .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
801 .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
802 movups %xmm2, -32(%rsi)
803 movups %xmm4, -16(%rsi)
804 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
805 call key_expansion192
806 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
807 movups %xmm2, -24(%rsi)
808 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
809 call key_expansion192
810 movups -32(%rsi), %xmm2
811 movups -16(%rsi), %xmm4
812 .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
813 .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
814 movups %xmm2, -32(%rsi)
815 movups %xmm4, -16(%rsi)
816 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
817 call key_expansion192
818 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
819 movups %xmm2, -24(%rsi)
820 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
821 call key_expansion192
822 movups -32(%rsi), %xmm2
823 movups -16(%rsi), %xmm4
824 .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
825 .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
826 movups %xmm2, -32(%rsi)
827 movups %xmm4, -16(%rsi)
828 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
829 call key_expansion192
830 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
831 movups %xmm2, -24(%rsi)
832 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
833 call key_expansion192
834 movups -32(%rsi), %xmm2
835 movups -16(%rsi), %xmm4
836 .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
837 .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
838 movups %xmm2, -32(%rsi)
839 movups %xmm4, -16(%rsi)
840 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */
841 call key_expansion192
843 ret
844 .size intel_aes_decrypt_init_192, .-intel_aes_decrypt_init_192
847 .type key_expansion192,@function
848 .align 16
849 key_expansion192:
850 pshufd $0x55, %xmm2, %xmm2
851 xor %eax, %eax
852 movd %eax, %xmm4
853 shufps $0x10, %xmm1, %xmm4
854 pxor %xmm4, %xmm1
855 shufps $0x8c, %xmm1, %xmm4
856 pxor %xmm2, %xmm1
857 pxor %xmm4, %xmm1
858 movdqu %xmm1, (%rsi)
859 addq $16, %rsi
861 pshufd $0xff, %xmm1, %xmm4
862 movd %eax, %xmm5
863 shufps $0x00, %xmm3, %xmm5
864 shufps $0x08, %xmm3, %xmm5
865 pxor %xmm4, %xmm3
866 pxor %xmm5, %xmm3
867 movq %xmm3, (%rsi)
868 addq $8, %rsi
869 ret
870 .size key_expansion192, .-key_expansion192
873 /* in %rdi : cx - context
874 in %rsi : output - pointer to output buffer
875 in %rdx : outputLen - pointer to variable for length of output
876 (filled by caller)
877 in %rcx : maxOutputLen - length of output buffer
878 in %r8 : input - pointer to input buffer
879 in %r9 : inputLen - length of input buffer
880 on stack: blocksize - AES blocksize (always 16, unused)
881 */
882 .type intel_aes_encrypt_ecb_192,@function
883 .globl intel_aes_encrypt_ecb_192
884 .align 16
885 intel_aes_encrypt_ecb_192:
886 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
887 leaq 48(%rdi), %rdi
889 movdqu (%rdi), %xmm2
890 movdqu 192(%rdi), %xmm14
891 xorl %eax, %eax
892 // cmpq $8*16, %r9
893 cmpq $128, %r9
894 jb 1f
895 // leaq -8*16(%r9), %r11
896 leaq -128(%r9), %r11
897 2: movdqu (%r8, %rax), %xmm3
898 movdqu 16(%r8, %rax), %xmm4
899 movdqu 32(%r8, %rax), %xmm5
900 movdqu 48(%r8, %rax), %xmm6
901 movdqu 64(%r8, %rax), %xmm7
902 movdqu 80(%r8, %rax), %xmm8
903 movdqu 96(%r8, %rax), %xmm9
904 movdqu 112(%r8, %rax), %xmm10
905 pxor %xmm2, %xmm3
906 pxor %xmm2, %xmm4
907 pxor %xmm2, %xmm5
908 pxor %xmm2, %xmm6
909 pxor %xmm2, %xmm7
910 pxor %xmm2, %xmm8
911 pxor %xmm2, %xmm9
912 pxor %xmm2, %xmm10
914 // complete loop unrolling
915 movdqu 16(%rdi), %xmm1
916 movdqu 32(%rdi), %xmm11
917 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
918 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
919 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
920 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
921 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
922 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
923 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
924 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
925 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
926 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
927 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
928 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
929 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
930 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
931 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
932 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
934 movdqu 48(%rdi), %xmm1
935 movdqu 64(%rdi), %xmm11
936 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
937 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
938 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
939 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
940 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
941 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
942 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
943 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
944 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
945 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
946 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
947 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
948 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
949 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
950 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
951 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
953 movdqu 80(%rdi), %xmm1
954 movdqu 96(%rdi), %xmm11
955 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
956 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
957 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
958 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
959 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
960 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
961 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
962 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
963 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
964 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
965 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
966 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
967 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
968 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
969 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
970 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
972 movdqu 112(%rdi), %xmm1
973 movdqu 128(%rdi), %xmm11
974 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
975 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
976 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
977 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
978 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
979 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
980 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
981 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
982 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
983 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
984 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
985 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
986 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
987 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
988 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
989 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
991 movdqu 144(%rdi), %xmm1
992 movdqu 160(%rdi), %xmm11
993 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
994 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
995 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
996 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
997 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
998 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
999 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
1000 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
1001 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
1002 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
1003 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
1004 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
1005 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
1006 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
1007 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
1008 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
1010 movdqu 176(%rdi), %xmm1
1011 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
1012 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
1013 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
1014 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
1015 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
1016 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
1017 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
1018 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
1019 .byte 0x66,0x41,0x0f,0x38,0xdd,0xde /* aesenclast %xmm14, %xmm3 */
1020 .byte 0x66,0x41,0x0f,0x38,0xdd,0xe6 /* aesenclast %xmm14, %xmm4 */
1021 .byte 0x66,0x41,0x0f,0x38,0xdd,0xee /* aesenclast %xmm14, %xmm5 */
1022 .byte 0x66,0x41,0x0f,0x38,0xdd,0xf6 /* aesenclast %xmm14, %xmm7 */
1023 .byte 0x66,0x41,0x0f,0x38,0xdd,0xfe /* aesenclast %xmm14, %xmm3 */
1024 .byte 0x66,0x45,0x0f,0x38,0xdd,0xc6 /* aesenclast %xmm14, %xmm8 */
1025 .byte 0x66,0x45,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm9 */
1026 .byte 0x66,0x45,0x0f,0x38,0xdd,0xd6 /* aesenclast %xmm14, %xmm10 */
1028 movdqu %xmm3, (%rsi, %rax)
1029 movdqu %xmm4, 16(%rsi, %rax)
1030 movdqu %xmm5, 32(%rsi, %rax)
1031 movdqu %xmm6, 48(%rsi, %rax)
1032 movdqu %xmm7, 64(%rsi, %rax)
1033 movdqu %xmm8, 80(%rsi, %rax)
1034 movdqu %xmm9, 96(%rsi, %rax)
1035 movdqu %xmm10, 112(%rsi, %rax)
1036 // addq $8*16, %rax
1037 addq $128, %rax
1038 cmpq %r11, %rax
1039 jbe 2b
1040 1: cmpq %rax, %r9
1041 je 5f
1043 movdqu 16(%rdi), %xmm3
1044 movdqu 32(%rdi), %xmm4
1045 movdqu 48(%rdi), %xmm5
1046 movdqu 64(%rdi), %xmm6
1047 movdqu 80(%rdi), %xmm7
1048 movdqu 96(%rdi), %xmm8
1049 movdqu 112(%rdi), %xmm9
1050 movdqu 128(%rdi), %xmm10
1051 movdqu 144(%rdi), %xmm11
1052 movdqu 160(%rdi), %xmm12
1053 movdqu 176(%rdi), %xmm13
1055 4: movdqu (%r8, %rax), %xmm1
1056 pxor %xmm2, %xmm1
1057 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
1058 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
1059 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
1060 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
1061 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
1062 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
1063 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
1064 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
1065 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
1066 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
1067 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
1068 .byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */
1069 movdqu %xmm1, (%rsi, %rax)
1070 addq $16, %rax
1071 cmpq %rax, %r9
1072 jne 4b
1074 5: xor %eax, %eax
1075 ret
1076 .size intel_aes_encrypt_ecb_192, .-intel_aes_encrypt_ecb_192
1079 /* in %rdi : cx - context
1080 in %rsi : output - pointer to output buffer
1081 in %rdx : outputLen - pointer to variable for length of output
1082 (filled by caller)
1083 in %rcx : maxOutputLen - length of output buffer
1084 in %r8 : input - pointer to input buffer
1085 in %r9 : inputLen - length of input buffer
1086 on stack: blocksize - AES blocksize (always 16, unused)
1087 */
1088 .type intel_aes_decrypt_ecb_192,@function
1089 .globl intel_aes_decrypt_ecb_192
1090 .align 16
1091 intel_aes_decrypt_ecb_192:
1092 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
1093 leaq 48(%rdi), %rdi
1095 movdqu (%rdi), %xmm2
1096 movdqu 192(%rdi), %xmm14
1097 xorl %eax, %eax
1098 // cmpq $8*16, %r9
1099 cmpq $128, %r9
1100 jb 1f
1101 // leaq -8*16(%r9), %r11
1102 leaq -128(%r9), %r11
1103 2: movdqu (%r8, %rax), %xmm3
1104 movdqu 16(%r8, %rax), %xmm4
1105 movdqu 32(%r8, %rax), %xmm5
1106 movdqu 48(%r8, %rax), %xmm6
1107 movdqu 64(%r8, %rax), %xmm7
1108 movdqu 80(%r8, %rax), %xmm8
1109 movdqu 96(%r8, %rax), %xmm9
1110 movdqu 112(%r8, %rax), %xmm10
1111 pxor %xmm14, %xmm3
1112 pxor %xmm14, %xmm4
1113 pxor %xmm14, %xmm5
1114 pxor %xmm14, %xmm6
1115 pxor %xmm14, %xmm7
1116 pxor %xmm14, %xmm8
1117 pxor %xmm14, %xmm9
1118 pxor %xmm14, %xmm10
1120 // complete loop unrolling
1121 movdqu 176(%rdi), %xmm1
1122 movdqu 160(%rdi), %xmm11
1123 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
1124 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
1125 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
1126 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
1127 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
1128 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
1129 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
1130 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
1131 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
1132 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
1133 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
1134 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
1135 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
1136 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
1137 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
1138 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
1140 movdqu 144(%rdi), %xmm1
1141 movdqu 128(%rdi), %xmm11
1142 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
1143 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
1144 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
1145 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
1146 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
1147 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
1148 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
1149 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
1150 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
1151 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
1152 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
1153 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
1154 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
1155 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
1156 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
1157 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
1159 movdqu 112(%rdi), %xmm1
1160 movdqu 96(%rdi), %xmm11
1161 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
1162 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
1163 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
1164 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
1165 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
1166 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
1167 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
1168 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
1169 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
1170 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
1171 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
1172 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
1173 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
1174 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
1175 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
1176 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
1178 movdqu 80(%rdi), %xmm1
1179 movdqu 64(%rdi), %xmm11
1180 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
1181 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
1182 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
1183 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
1184 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
1185 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
1186 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
1187 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
1188 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
1189 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
1190 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
1191 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
1192 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
1193 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
1194 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
1195 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
1197 movdqu 48(%rdi), %xmm1
1198 movdqu 32(%rdi), %xmm11
1199 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
1200 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
1201 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
1202 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
1203 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
1204 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
1205 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
1206 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
1207 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
1208 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
1209 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
1210 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
1211 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
1212 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
1213 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
1214 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
1216 movdqu 16(%rdi), %xmm1
1217 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
1218 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
1219 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
1220 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
1221 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
1222 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
1223 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
1224 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
1225 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
1226 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
1227 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
1228 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
1229 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
1230 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
1231 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
1232 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
1234 movdqu %xmm3, (%rsi, %rax)
1235 movdqu %xmm4, 16(%rsi, %rax)
1236 movdqu %xmm5, 32(%rsi, %rax)
1237 movdqu %xmm6, 48(%rsi, %rax)
1238 movdqu %xmm7, 64(%rsi, %rax)
1239 movdqu %xmm8, 80(%rsi, %rax)
1240 movdqu %xmm9, 96(%rsi, %rax)
1241 movdqu %xmm10, 112(%rsi, %rax)
1242 // addq $8*16, %rax
1243 addq $128, %rax
1244 cmpq %r11, %rax
1245 jbe 2b
1246 1: cmpq %rax, %r9
1247 je 5f
1249 movdqu 16(%rdi), %xmm3
1250 movdqu 32(%rdi), %xmm4
1251 movdqu 48(%rdi), %xmm5
1252 movdqu 64(%rdi), %xmm6
1253 movdqu 80(%rdi), %xmm7
1254 movdqu 96(%rdi), %xmm8
1255 movdqu 112(%rdi), %xmm9
1256 movdqu 128(%rdi), %xmm10
1257 movdqu 144(%rdi), %xmm11
1258 movdqu 160(%rdi), %xmm12
1259 movdqu 176(%rdi), %xmm13
1261 4: movdqu (%r8, %rax), %xmm1
1262 pxor %xmm14, %xmm1
1263 .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
1264 .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
1265 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
1266 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
1267 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
1268 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
1269 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
1270 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
1271 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
1272 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
1273 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
1274 .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
1275 movdqu %xmm1, (%rsi, %rax)
1276 addq $16, %rax
1277 cmpq %rax, %r9
1278 jne 4b
1280 5: xor %eax, %eax
1281 ret
1282 .size intel_aes_decrypt_ecb_192, .-intel_aes_decrypt_ecb_192
1285 /* in %rdi : cx - context
1286 in %rsi : output - pointer to output buffer
1287 in %rdx : outputLen - pointer to variable for length of output
1288 (filled by caller)
1289 in %rcx : maxOutputLen - length of output buffer
1290 in %r8 : input - pointer to input buffer
1291 in %r9 : inputLen - length of input buffer
1292 on stack: blocksize - AES blocksize (always 16, unused)
1293 */
1294 .type intel_aes_encrypt_cbc_192,@function
1295 .globl intel_aes_encrypt_cbc_192
1296 .align 16
1297 intel_aes_encrypt_cbc_192:
1298 testq %r9, %r9
1299 je 2f
1301 // leaq IV_OFFSET(%rdi), %rdx
1302 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
1303 leaq 16(%rdi), %rdx
1304 leaq 48(%rdi), %rdi
1306 movdqu (%rdx), %xmm0
1307 movdqu (%rdi), %xmm2
1308 movdqu 16(%rdi), %xmm3
1309 movdqu 32(%rdi), %xmm4
1310 movdqu 48(%rdi), %xmm5
1311 movdqu 64(%rdi), %xmm6
1312 movdqu 80(%rdi), %xmm7
1313 movdqu 96(%rdi), %xmm8
1314 movdqu 112(%rdi), %xmm9
1315 movdqu 128(%rdi), %xmm10
1316 movdqu 144(%rdi), %xmm11
1317 movdqu 160(%rdi), %xmm12
1318 movdqu 176(%rdi), %xmm13
1319 movdqu 192(%rdi), %xmm14
1321 xorl %eax, %eax
1322 1: movdqu (%r8, %rax), %xmm1
1323 pxor %xmm0, %xmm1
1324 pxor %xmm2, %xmm1
1325 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
1326 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
1327 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
1328 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
1329 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
1330 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
1331 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
1332 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
1333 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
1334 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
1335 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
1336 .byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */
1337 movdqu %xmm1, (%rsi, %rax)
1338 movdqa %xmm1, %xmm0
1339 addq $16, %rax
1340 cmpq %rax, %r9
1341 jne 1b
1343 movdqu %xmm0, (%rdx)
1345 2: xor %eax, %eax
1346 ret
1347 .size intel_aes_encrypt_cbc_192, .-intel_aes_encrypt_cbc_192
1350 /* in %rdi : cx - context
1351 in %rsi : output - pointer to output buffer
1352 in %rdx : outputLen - pointer to variable for length of output
1353 (filled by caller)
1354 in %rcx : maxOutputLen - length of output buffer
1355 in %r8 : input - pointer to input buffer
1356 in %r9 : inputLen - length of input buffer
1357 on stack: blocksize - AES blocksize (always 16, unused)
1358 */
1359 .type intel_aes_decrypt_cbc_192,@function
1360 .globl intel_aes_decrypt_cbc_192
1361 .align 16
1362 intel_aes_decrypt_cbc_192:
1363 leaq 16(%rdi), %rdx
1364 leaq 48(%rdi), %rdi
1366 movdqu (%rdx), %xmm0
1367 movdqu (%rdi), %xmm2
1368 movdqu 192(%rdi), %xmm14
1369 xorl %eax, %eax
1370 cmpq $128, %r9
1371 jb 1f
1372 leaq -128(%r9), %r11
1373 2: movdqu (%r8, %rax), %xmm3
1374 movdqu 16(%r8, %rax), %xmm4
1375 movdqu 32(%r8, %rax), %xmm5
1376 movdqu 48(%r8, %rax), %xmm6
1377 movdqu 64(%r8, %rax), %xmm7
1378 movdqu 80(%r8, %rax), %xmm8
1379 movdqu 96(%r8, %rax), %xmm9
1380 movdqu 112(%r8, %rax), %xmm10
1381 pxor %xmm14, %xmm3
1382 pxor %xmm14, %xmm4
1383 pxor %xmm14, %xmm5
1384 pxor %xmm14, %xmm6
1385 pxor %xmm14, %xmm7
1386 pxor %xmm14, %xmm8
1387 pxor %xmm14, %xmm9
1388 pxor %xmm14, %xmm10
1390 // complete loop unrolling
1391 movdqu 176(%rdi), %xmm1
1392 movdqu 160(%rdi), %xmm11
1393 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
1394 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
1395 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
1396 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
1397 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
1398 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
1399 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
1400 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
1401 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
1402 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
1403 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
1404 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
1405 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
1406 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
1407 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
1408 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
1410 movdqu 144(%rdi), %xmm1
1411 movdqu 128(%rdi), %xmm11
1412 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
1413 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
1414 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
1415 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
1416 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
1417 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
1418 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
1419 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
1420 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
1421 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
1422 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
1423 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
1424 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
1425 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
1426 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
1427 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
1429 movdqu 112(%rdi), %xmm1
1430 movdqu 96(%rdi), %xmm11
1431 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
1432 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
1433 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
1434 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
1435 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
1436 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
1437 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
1438 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
1439 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
1440 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
1441 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
1442 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
1443 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
1444 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
1445 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
1446 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
1448 movdqu 80(%rdi), %xmm1
1449 movdqu 64(%rdi), %xmm11
1450 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
1451 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
1452 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
1453 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
1454 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
1455 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
1456 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
1457 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
1458 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
1459 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
1460 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
1461 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
1462 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
1463 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
1464 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
1465 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
1467 movdqu 48(%rdi), %xmm1
1468 movdqu 32(%rdi), %xmm11
1469 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
1470 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
1471 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
1472 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
1473 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
1474 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
1475 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
1476 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
1477 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
1478 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
1479 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
1480 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
1481 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
1482 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
1483 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
1484 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
1486 movdqu 16(%rdi), %xmm1
1487 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
1488 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
1489 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
1490 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
1491 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
1492 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
1493 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
1494 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
1495 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
1496 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
1497 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
1498 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
1499 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
1500 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
1501 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
1502 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
1504 pxor %xmm0, %xmm3
1505 movdqu (%r8, %rax), %xmm0
1506 pxor %xmm0, %xmm4
1507 movdqu 16(%r8, %rax), %xmm0
1508 pxor %xmm0, %xmm5
1509 movdqu 32(%r8, %rax), %xmm0
1510 pxor %xmm0, %xmm6
1511 movdqu 48(%r8, %rax), %xmm0
1512 pxor %xmm0, %xmm7
1513 movdqu 64(%r8, %rax), %xmm0
1514 pxor %xmm0, %xmm8
1515 movdqu 80(%r8, %rax), %xmm0
1516 pxor %xmm0, %xmm9
1517 movdqu 96(%r8, %rax), %xmm0
1518 pxor %xmm0, %xmm10
1519 movdqu 112(%r8, %rax), %xmm0
1520 movdqu %xmm3, (%rsi, %rax)
1521 movdqu %xmm4, 16(%rsi, %rax)
1522 movdqu %xmm5, 32(%rsi, %rax)
1523 movdqu %xmm6, 48(%rsi, %rax)
1524 movdqu %xmm7, 64(%rsi, %rax)
1525 movdqu %xmm8, 80(%rsi, %rax)
1526 movdqu %xmm9, 96(%rsi, %rax)
1527 movdqu %xmm10, 112(%rsi, %rax)
1528 addq $128, %rax
1529 cmpq %r11, %rax
1530 jbe 2b
1531 1: cmpq %rax, %r9
1532 je 5f
1534 movdqu 16(%rdi), %xmm3
1535 movdqu 32(%rdi), %xmm4
1536 movdqu 48(%rdi), %xmm5
1537 movdqu 64(%rdi), %xmm6
1538 movdqu 80(%rdi), %xmm7
1539 movdqu 96(%rdi), %xmm8
1540 movdqu 112(%rdi), %xmm9
1541 movdqu 128(%rdi), %xmm10
1542 movdqu 144(%rdi), %xmm11
1543 movdqu 160(%rdi), %xmm12
1544 movdqu 176(%rdi), %xmm13
1546 4: movdqu (%r8, %rax), %xmm1
1547 movdqa %xmm1, %xmm15
1548 pxor %xmm14, %xmm1
1549 .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
1550 .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
1551 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
1552 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
1553 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
1554 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
1555 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
1556 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
1557 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
1558 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
1559 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
1560 .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
1561 pxor %xmm0, %xmm1
1562 movdqu %xmm1, (%rsi, %rax)
1563 movdqa %xmm15, %xmm0
1564 addq $16, %rax
1565 cmpq %rax, %r9
1566 jne 4b
1568 5: movdqu %xmm0, (%rdx)
1570 xor %eax, %eax
1571 ret
1572 .size intel_aes_decrypt_cbc_192, .-intel_aes_decrypt_cbc_192
1574 /* in %rdi : the key
1575 in %rsi : buffer for expanded key
1576 */
1577 .type intel_aes_encrypt_init_256,@function
1578 .globl intel_aes_encrypt_init_256
1579 .align 16
1580 intel_aes_encrypt_init_256:
1581 movdqu (%rdi), %xmm1
1582 movdqu 16(%rdi), %xmm3
1583 movdqu %xmm1, (%rsi)
1584 movdqu %xmm3, 16(%rsi)
1585 leaq 32(%rsi), %rsi
1586 xor %eax, %eax
1588 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
1589 call key_expansion256
1590 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
1591 call key_expansion256
1592 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
1593 call key_expansion256
1594 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
1595 call key_expansion256
1596 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
1597 call key_expansion256
1598 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
1599 call key_expansion256
1600 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
1601 pxor %xmm6, %xmm6
1602 pshufd $0xff, %xmm2, %xmm2
1603 shufps $0x10, %xmm1, %xmm6
1604 pxor %xmm6, %xmm1
1605 shufps $0x8c, %xmm1, %xmm6
1606 pxor %xmm2, %xmm1
1607 pxor %xmm6, %xmm1
1608 movdqu %xmm1, (%rsi)
1610 ret
1611 .size intel_aes_encrypt_init_256, .-intel_aes_encrypt_init_256
1614 /* in %rdi : the key
1615 in %rsi : buffer for expanded key
1616 */
1617 .type intel_aes_decrypt_init_256,@function
1618 .globl intel_aes_decrypt_init_256
1619 .align 16
1620 intel_aes_decrypt_init_256:
1621 movdqu (%rdi), %xmm1
1622 movdqu 16(%rdi), %xmm3
1623 movdqu %xmm1, (%rsi)
1624 .byte 0x66,0x0f,0x38,0xdb,0xe3 /* aesimc %xmm3, %xmm4 */
1625 movdqu %xmm4, 16(%rsi)
1626 leaq 32(%rsi), %rsi
1627 xor %eax, %eax
1629 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
1630 call key_expansion256
1631 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
1632 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
1633 movdqu %xmm4, -32(%rsi)
1634 movdqu %xmm5, -16(%rsi)
1635 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
1636 call key_expansion256
1637 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
1638 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
1639 movdqu %xmm4, -32(%rsi)
1640 movdqu %xmm5, -16(%rsi)
1641 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
1642 call key_expansion256
1643 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
1644 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
1645 movdqu %xmm4, -32(%rsi)
1646 movdqu %xmm5, -16(%rsi)
1647 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
1648 call key_expansion256
1649 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
1650 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
1651 movdqu %xmm4, -32(%rsi)
1652 movdqu %xmm5, -16(%rsi)
1653 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
1654 call key_expansion256
1655 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
1656 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
1657 movdqu %xmm4, -32(%rsi)
1658 movdqu %xmm5, -16(%rsi)
1659 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
1660 call key_expansion256
1661 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
1662 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
1663 movdqu %xmm4, -32(%rsi)
1664 movdqu %xmm5, -16(%rsi)
1665 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
1666 pxor %xmm6, %xmm6
1667 pshufd $0xff, %xmm2, %xmm2
1668 shufps $0x10, %xmm1, %xmm6
1669 pxor %xmm6, %xmm1
1670 shufps $0x8c, %xmm1, %xmm6
1671 pxor %xmm2, %xmm1
1672 pxor %xmm6, %xmm1
1673 movdqu %xmm1, (%rsi)
1675 ret
1676 .size intel_aes_decrypt_init_256, .-intel_aes_decrypt_init_256
1679 .type key_expansion256,@function
1680 .align 16
1681 key_expansion256:
1682 movd %eax, %xmm6
1683 pshufd $0xff, %xmm2, %xmm2
1684 shufps $0x10, %xmm1, %xmm6
1685 pxor %xmm6, %xmm1
1686 shufps $0x8c, %xmm1, %xmm6
1687 pxor %xmm2, %xmm1
1688 pxor %xmm6, %xmm1
1689 movdqu %xmm1, (%rsi)
1691 addq $16, %rsi
1692 .byte 0x66,0x0f,0x3a,0xdf,0xe1,0x00 /* aeskeygenassist $0, %xmm1, %xmm4 */
1693 pshufd $0xaa, %xmm4, %xmm4
1694 shufps $0x10, %xmm3, %xmm6
1695 pxor %xmm6, %xmm3
1696 shufps $0x8c, %xmm3, %xmm6
1697 pxor %xmm4, %xmm3
1698 pxor %xmm6, %xmm3
1699 movdqu %xmm3, (%rsi)
1700 addq $16, %rsi
1701 ret
1702 .size key_expansion256, .-key_expansion256
1705 /* in %rdi : cx - context
1706 in %rsi : output - pointer to output buffer
1707 in %rdx : outputLen - pointer to variable for length of output
1708 (filled by caller)
1709 in %rcx : maxOutputLen - length of output buffer
1710 in %r8 : input - pointer to input buffer
1711 in %r9 : inputLen - length of input buffer
1712 on stack: blocksize - AES blocksize (always 16, unused)
1713 */
1714 .type intel_aes_encrypt_ecb_256,@function
1715 .globl intel_aes_encrypt_ecb_256
1716 .align 16
1717 intel_aes_encrypt_ecb_256:
1718 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
1719 leaq 48(%rdi), %rdi
1721 movdqu (%rdi), %xmm2
1722 movdqu 224(%rdi), %xmm15
1723 xorl %eax, %eax
1724 // cmpq $8*16, %r9
1725 cmpq $128, %r9
1726 jb 1f
1727 // leaq -8*16(%r9), %r11
1728 leaq -128(%r9), %r11
1729 2: movdqu (%r8, %rax), %xmm3
1730 movdqu 16(%r8, %rax), %xmm4
1731 movdqu 32(%r8, %rax), %xmm5
1732 movdqu 48(%r8, %rax), %xmm6
1733 movdqu 64(%r8, %rax), %xmm7
1734 movdqu 80(%r8, %rax), %xmm8
1735 movdqu 96(%r8, %rax), %xmm9
1736 movdqu 112(%r8, %rax), %xmm10
1737 pxor %xmm2, %xmm3
1738 pxor %xmm2, %xmm4
1739 pxor %xmm2, %xmm5
1740 pxor %xmm2, %xmm6
1741 pxor %xmm2, %xmm7
1742 pxor %xmm2, %xmm8
1743 pxor %xmm2, %xmm9
1744 pxor %xmm2, %xmm10
1746 // complete loop unrolling
1747 movdqu 16(%rdi), %xmm1
1748 movdqu 32(%rdi), %xmm11
1749 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
1750 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
1751 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
1752 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
1753 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
1754 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
1755 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
1756 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
1757 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
1758 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
1759 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
1760 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
1761 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
1762 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
1763 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
1764 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
1766 movdqu 48(%rdi), %xmm1
1767 movdqu 64(%rdi), %xmm11
1768 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
1769 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
1770 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
1771 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
1772 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
1773 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
1774 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
1775 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
1776 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
1777 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
1778 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
1779 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
1780 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
1781 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
1782 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
1783 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
1785 movdqu 80(%rdi), %xmm1
1786 movdqu 96(%rdi), %xmm11
1787 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
1788 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
1789 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
1790 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
1791 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
1792 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
1793 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
1794 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
1795 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
1796 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
1797 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
1798 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
1799 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
1800 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
1801 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
1802 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
1804 movdqu 112(%rdi), %xmm1
1805 movdqu 128(%rdi), %xmm11
1806 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
1807 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
1808 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
1809 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
1810 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
1811 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
1812 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
1813 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
1814 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
1815 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
1816 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
1817 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
1818 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
1819 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
1820 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
1821 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
1823 movdqu 144(%rdi), %xmm1
1824 movdqu 160(%rdi), %xmm11
1825 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
1826 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
1827 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
1828 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
1829 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
1830 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
1831 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
1832 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
1833 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
1834 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
1835 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
1836 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
1837 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
1838 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
1839 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
1840 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
1842 movdqu 176(%rdi), %xmm1
1843 movdqu 192(%rdi), %xmm11
1844 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
1845 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
1846 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
1847 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
1848 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
1849 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
1850 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
1851 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
1852 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
1853 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
1854 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
1855 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
1856 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
1857 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
1858 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
1859 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
1861 movdqu 208(%rdi), %xmm1
1862 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
1863 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
1864 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
1865 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
1866 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
1867 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
1868 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
1869 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
1870 .byte 0x66,0x41,0x0f,0x38,0xdd,0xdf /* aesenclast %xmm15, %xmm3 */
1871 .byte 0x66,0x41,0x0f,0x38,0xdd,0xe7 /* aesenclast %xmm15, %xmm4 */
1872 .byte 0x66,0x41,0x0f,0x38,0xdd,0xef /* aesenclast %xmm15, %xmm5 */
1873 .byte 0x66,0x41,0x0f,0x38,0xdd,0xf7 /* aesenclast %xmm15, %xmm6 */
1874 .byte 0x66,0x41,0x0f,0x38,0xdd,0xff /* aesenclast %xmm15, %xmm7 */
1875 .byte 0x66,0x45,0x0f,0x38,0xdd,0xc7 /* aesenclast %xmm15, %xmm8 */
1876 .byte 0x66,0x45,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm9 */
1877 .byte 0x66,0x45,0x0f,0x38,0xdd,0xd7 /* aesenclast %xmm15, %xmm10 */
1879 movdqu %xmm3, (%rsi, %rax)
1880 movdqu %xmm4, 16(%rsi, %rax)
1881 movdqu %xmm5, 32(%rsi, %rax)
1882 movdqu %xmm6, 48(%rsi, %rax)
1883 movdqu %xmm7, 64(%rsi, %rax)
1884 movdqu %xmm8, 80(%rsi, %rax)
1885 movdqu %xmm9, 96(%rsi, %rax)
1886 movdqu %xmm10, 112(%rsi, %rax)
1887 // addq $8*16, %rax
1888 addq $128, %rax
1889 cmpq %r11, %rax
1890 jbe 2b
1891 1: cmpq %rax, %r9
1892 je 5f
1894 movdqu (%rdi), %xmm8
1895 movdqu 16(%rdi), %xmm2
1896 movdqu 32(%rdi), %xmm3
1897 movdqu 48(%rdi), %xmm4
1898 movdqu 64(%rdi), %xmm5
1899 movdqu 80(%rdi), %xmm6
1900 movdqu 96(%rdi), %xmm7
1901 movdqu 128(%rdi), %xmm9
1902 movdqu 144(%rdi), %xmm10
1903 movdqu 160(%rdi), %xmm11
1904 movdqu 176(%rdi), %xmm12
1905 movdqu 192(%rdi), %xmm13
1906 movdqu 208(%rdi), %xmm14
1908 4: movdqu (%r8, %rax), %xmm1
1909 pxor %xmm8, %xmm1
1910 movdqu 112(%rdi), %xmm8
1911 .byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */
1912 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
1913 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
1914 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
1915 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
1916 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
1917 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
1918 movdqu (%rdi), %xmm8
1919 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
1920 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
1921 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
1922 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
1923 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
1924 .byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */
1925 .byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */
1926 movdqu %xmm1, (%rsi, %rax)
1927 addq $16, %rax
1928 cmpq %rax, %r9
1929 jne 4b
1931 5: xor %eax, %eax
1932 ret
1933 .size intel_aes_encrypt_ecb_256, .-intel_aes_encrypt_ecb_256
1936 /* in %rdi : cx - context
1937 in %rsi : output - pointer to output buffer
1938 in %rdx : outputLen - pointer to variable for length of output
1939 (filled by caller)
1940 in %rcx : maxOutputLen - length of output buffer
1941 in %r8 : input - pointer to input buffer
1942 in %r9 : inputLen - length of input buffer
1943 on stack: blocksize - AES blocksize (always 16, unused)
1944 */
1945 .type intel_aes_decrypt_ecb_256,@function
1946 .globl intel_aes_decrypt_ecb_256
1947 .align 16
1948 intel_aes_decrypt_ecb_256:
1949 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
1950 leaq 48(%rdi), %rdi
1952 movdqu (%rdi), %xmm2
1953 movdqu 224(%rdi), %xmm15
1954 xorl %eax, %eax
1955 // cmpq $8*16, %r9
1956 cmpq $128, %r9
1957 jb 1f
1958 // leaq -8*16(%r9), %r11
1959 leaq -128(%r9), %r11
1960 2: movdqu (%r8, %rax), %xmm3
1961 movdqu 16(%r8, %rax), %xmm4
1962 movdqu 32(%r8, %rax), %xmm5
1963 movdqu 48(%r8, %rax), %xmm6
1964 movdqu 64(%r8, %rax), %xmm7
1965 movdqu 80(%r8, %rax), %xmm8
1966 movdqu 96(%r8, %rax), %xmm9
1967 movdqu 112(%r8, %rax), %xmm10
1968 pxor %xmm15, %xmm3
1969 pxor %xmm15, %xmm4
1970 pxor %xmm15, %xmm5
1971 pxor %xmm15, %xmm6
1972 pxor %xmm15, %xmm7
1973 pxor %xmm15, %xmm8
1974 pxor %xmm15, %xmm9
1975 pxor %xmm15, %xmm10
1977 // complete loop unrolling
1978 movdqu 208(%rdi), %xmm1
1979 movdqu 192(%rdi), %xmm11
1980 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
1981 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
1982 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
1983 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
1984 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
1985 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
1986 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
1987 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
1988 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
1989 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
1990 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
1991 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
1992 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
1993 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
1994 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
1995 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
1997 movdqu 176(%rdi), %xmm1
1998 movdqu 160(%rdi), %xmm11
1999 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
2000 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
2001 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
2002 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
2003 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
2004 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
2005 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
2006 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
2007 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
2008 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
2009 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
2010 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
2011 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
2012 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
2013 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
2014 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
2016 movdqu 144(%rdi), %xmm1
2017 movdqu 128(%rdi), %xmm11
2018 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
2019 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
2020 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
2021 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
2022 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
2023 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
2024 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
2025 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
2026 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
2027 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
2028 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
2029 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
2030 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
2031 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
2032 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
2033 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
2035 movdqu 112(%rdi), %xmm1
2036 movdqu 96(%rdi), %xmm11
2037 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
2038 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
2039 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
2040 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
2041 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
2042 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
2043 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
2044 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
2045 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
2046 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
2047 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
2048 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
2049 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
2050 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
2051 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
2052 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
2054 movdqu 80(%rdi), %xmm1
2055 movdqu 64(%rdi), %xmm11
2056 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
2057 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
2058 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
2059 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
2060 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
2061 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
2062 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
2063 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
2064 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
2065 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
2066 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
2067 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
2068 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
2069 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
2070 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
2071 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
2073 movdqu 48(%rdi), %xmm1
2074 movdqu 32(%rdi), %xmm11
2075 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
2076 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
2077 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
2078 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
2079 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
2080 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
2081 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
2082 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
2083 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
2084 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
2085 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
2086 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
2087 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
2088 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
2089 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
2090 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
2092 movdqu 16(%rdi), %xmm1
2093 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
2094 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
2095 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
2096 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
2097 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
2098 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
2099 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
2100 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
2101 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
2102 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
2103 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
2104 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
2105 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
2106 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
2107 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
2108 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
2110 movdqu %xmm3, (%rsi, %rax)
2111 movdqu %xmm4, 16(%rsi, %rax)
2112 movdqu %xmm5, 32(%rsi, %rax)
2113 movdqu %xmm6, 48(%rsi, %rax)
2114 movdqu %xmm7, 64(%rsi, %rax)
2115 movdqu %xmm8, 80(%rsi, %rax)
2116 movdqu %xmm9, 96(%rsi, %rax)
2117 movdqu %xmm10, 112(%rsi, %rax)
2118 // addq $8*16, %rax
2119 addq $128, %rax
2120 cmpq %r11, %rax
2121 jbe 2b
2122 1: cmpq %rax, %r9
2123 je 5f
2125 movdqu 16(%rdi), %xmm2
2126 movdqu 32(%rdi), %xmm3
2127 movdqu 48(%rdi), %xmm4
2128 movdqu 64(%rdi), %xmm5
2129 movdqu 80(%rdi), %xmm6
2130 movdqu 96(%rdi), %xmm7
2131 movdqu 112(%rdi), %xmm8
2132 movdqu 128(%rdi), %xmm9
2133 movdqu 144(%rdi), %xmm10
2134 movdqu 160(%rdi), %xmm11
2135 movdqu 176(%rdi), %xmm12
2136 movdqu 192(%rdi), %xmm13
2137 movdqu 208(%rdi), %xmm14
2139 4: movdqu (%r8, %rax), %xmm1
2140 pxor %xmm15, %xmm1
2141 .byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */
2142 .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
2143 .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
2144 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
2145 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
2146 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
2147 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
2148 movdqu (%rdi), %xmm8
2149 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
2150 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
2151 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
2152 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
2153 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
2154 .byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */
2155 .byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */
2156 movdqu 112(%rdi), %xmm8
2157 movdqu %xmm1, (%rsi, %rax)
2158 addq $16, %rax
2159 cmpq %rax, %r9
2160 jne 4b
2162 5: xor %eax, %eax
2163 ret
2164 .size intel_aes_decrypt_ecb_256, .-intel_aes_decrypt_ecb_256
2167 /* in %rdi : cx - context
2168 in %rsi : output - pointer to output buffer
2169 in %rdx : outputLen - pointer to variable for length of output
2170 (filled by caller)
2171 in %rcx : maxOutputLen - length of output buffer
2172 in %r8 : input - pointer to input buffer
2173 in %r9 : inputLen - length of input buffer
2174 on stack: blocksize - AES blocksize (always 16, unused)
2175 */
2176 .type intel_aes_encrypt_cbc_256,@function
2177 .globl intel_aes_encrypt_cbc_256
2178 .align 16
2179 intel_aes_encrypt_cbc_256:
2180 testq %r9, %r9
2181 je 2f
2183 // leaq IV_OFFSET(%rdi), %rdx
2184 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
2185 leaq 16(%rdi), %rdx
2186 leaq 48(%rdi), %rdi
2188 movdqu (%rdx), %xmm0
2189 movdqu (%rdi), %xmm8
2190 movdqu 16(%rdi), %xmm2
2191 movdqu 32(%rdi), %xmm3
2192 movdqu 48(%rdi), %xmm4
2193 movdqu 64(%rdi), %xmm5
2194 movdqu 80(%rdi), %xmm6
2195 movdqu 96(%rdi), %xmm7
2196 movdqu 128(%rdi), %xmm9
2197 movdqu 144(%rdi), %xmm10
2198 movdqu 160(%rdi), %xmm11
2199 movdqu 176(%rdi), %xmm12
2200 movdqu 192(%rdi), %xmm13
2201 movdqu 208(%rdi), %xmm14
2202 movdqu 224(%rdi), %xmm15
2204 xorl %eax, %eax
2205 1: movdqu (%r8, %rax), %xmm1
2206 pxor %xmm0, %xmm1
2207 pxor %xmm8, %xmm1
2208 movdqu 112(%rdi), %xmm8
2209 .byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */
2210 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
2211 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
2212 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
2213 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
2214 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
2215 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
2216 movdqu (%rdi), %xmm8
2217 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
2218 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
2219 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
2220 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
2221 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
2222 .byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */
2223 .byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */
2224 movdqu %xmm1, (%rsi, %rax)
2225 movdqa %xmm1, %xmm0
2226 addq $16, %rax
2227 cmpq %rax, %r9
2228 jne 1b
2230 movdqu %xmm0, (%rdx)
2232 2: xor %eax, %eax
2233 ret
2234 .size intel_aes_encrypt_cbc_256, .-intel_aes_encrypt_cbc_256
2237 /* in %rdi : cx - context
2238 in %rsi : output - pointer to output buffer
2239 in %rdx : outputLen - pointer to variable for length of output
2240 (filled by caller)
2241 in %rcx : maxOutputLen - length of output buffer
2242 in %r8 : input - pointer to input buffer
2243 in %r9 : inputLen - length of input buffer
2244 on stack: blocksize - AES blocksize (always 16, unused)
2245 */
2246 .type intel_aes_decrypt_cbc_256,@function
2247 .globl intel_aes_decrypt_cbc_256
2248 .align 16
2249 intel_aes_decrypt_cbc_256:
2250 // leaq IV_OFFSET(%rdi), %rdx
2251 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
2252 leaq 16(%rdi), %rdx
2253 leaq 48(%rdi), %rdi
2255 movdqu (%rdx), %xmm0
2256 movdqu (%rdi), %xmm2
2257 movdqu 224(%rdi), %xmm15
2258 xorl %eax, %eax
2259 // cmpq $8*16, %r9
2260 cmpq $128, %r9
2261 jb 1f
2262 // leaq -8*16(%r9), %r11
2263 leaq -128(%r9), %r11
2264 2: movdqu (%r8, %rax), %xmm3
2265 movdqu 16(%r8, %rax), %xmm4
2266 movdqu 32(%r8, %rax), %xmm5
2267 movdqu 48(%r8, %rax), %xmm6
2268 movdqu 64(%r8, %rax), %xmm7
2269 movdqu 80(%r8, %rax), %xmm8
2270 movdqu 96(%r8, %rax), %xmm9
2271 movdqu 112(%r8, %rax), %xmm10
2272 pxor %xmm15, %xmm3
2273 pxor %xmm15, %xmm4
2274 pxor %xmm15, %xmm5
2275 pxor %xmm15, %xmm6
2276 pxor %xmm15, %xmm7
2277 pxor %xmm15, %xmm8
2278 pxor %xmm15, %xmm9
2279 pxor %xmm15, %xmm10
2281 // complete loop unrolling
2282 movdqu 208(%rdi), %xmm1
2283 movdqu 192(%rdi), %xmm11
2284 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
2285 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
2286 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
2287 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
2288 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
2289 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
2290 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
2291 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
2292 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
2293 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
2294 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
2295 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
2296 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
2297 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
2298 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
2299 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
2301 movdqu 176(%rdi), %xmm1
2302 movdqu 160(%rdi), %xmm11
2303 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
2304 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
2305 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
2306 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
2307 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
2308 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
2309 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
2310 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
2311 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
2312 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
2313 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
2314 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
2315 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
2316 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
2317 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
2318 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
2320 movdqu 144(%rdi), %xmm1
2321 movdqu 128(%rdi), %xmm11
2322 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
2323 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
2324 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
2325 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
2326 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
2327 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
2328 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
2329 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
2330 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
2331 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
2332 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
2333 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
2334 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
2335 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
2336 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
2337 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
2339 movdqu 112(%rdi), %xmm1
2340 movdqu 96(%rdi), %xmm11
2341 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
2342 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
2343 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
2344 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
2345 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
2346 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
2347 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
2348 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
2349 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
2350 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
2351 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
2352 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
2353 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
2354 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
2355 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
2356 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
2358 movdqu 80(%rdi), %xmm1
2359 movdqu 64(%rdi), %xmm11
2360 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
2361 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
2362 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
2363 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
2364 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
2365 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
2366 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
2367 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
2368 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
2369 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
2370 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
2371 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
2372 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
2373 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
2374 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
2375 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
2377 movdqu 48(%rdi), %xmm1
2378 movdqu 32(%rdi), %xmm11
2379 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
2380 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
2381 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
2382 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
2383 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
2384 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
2385 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
2386 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
2387 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
2388 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
2389 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
2390 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
2391 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
2392 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
2393 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
2394 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
2396 movdqu 16(%rdi), %xmm1
2397 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
2398 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
2399 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
2400 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
2401 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
2402 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
2403 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
2404 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
2405 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
2406 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
2407 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
2408 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
2409 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
2410 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
2411 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
2412 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
2414 pxor %xmm0, %xmm3
2415 movdqu (%r8, %rax), %xmm0
2416 pxor %xmm0, %xmm4
2417 movdqu 16(%r8, %rax), %xmm0
2418 pxor %xmm0, %xmm5
2419 movdqu 32(%r8, %rax), %xmm0
2420 pxor %xmm0, %xmm6
2421 movdqu 48(%r8, %rax), %xmm0
2422 pxor %xmm0, %xmm7
2423 movdqu 64(%r8, %rax), %xmm0
2424 pxor %xmm0, %xmm8
2425 movdqu 80(%r8, %rax), %xmm0
2426 pxor %xmm0, %xmm9
2427 movdqu 96(%r8, %rax), %xmm0
2428 pxor %xmm0, %xmm10
2429 movdqu 112(%r8, %rax), %xmm0
2430 movdqu %xmm3, (%rsi, %rax)
2431 movdqu %xmm4, 16(%rsi, %rax)
2432 movdqu %xmm5, 32(%rsi, %rax)
2433 movdqu %xmm6, 48(%rsi, %rax)
2434 movdqu %xmm7, 64(%rsi, %rax)
2435 movdqu %xmm8, 80(%rsi, %rax)
2436 movdqu %xmm9, 96(%rsi, %rax)
2437 movdqu %xmm10, 112(%rsi, %rax)
2438 // addq $8*16, %rax
2439 addq $128, %rax
2440 cmpq %r11, %rax
2441 jbe 2b
2442 1: cmpq %rax, %r9
2443 je 5f
2445 movdqu 16(%rdi), %xmm2
2446 movdqu 32(%rdi), %xmm3
2447 movdqu 48(%rdi), %xmm4
2448 movdqu 64(%rdi), %xmm5
2449 movdqu 80(%rdi), %xmm6
2450 movdqu 96(%rdi), %xmm7
2451 movdqu 112(%rdi), %xmm8
2452 movdqu 128(%rdi), %xmm9
2453 movdqu 144(%rdi), %xmm10
2454 movdqu 160(%rdi), %xmm11
2455 movdqu 176(%rdi), %xmm12
2456 movdqu 192(%rdi), %xmm13
2457 movdqu 208(%rdi), %xmm14
2459 4: movdqu (%r8, %rax), %xmm1
2460 pxor %xmm15, %xmm1
2461 .byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */
2462 .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
2463 .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
2464 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
2465 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
2466 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
2467 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
2468 movdqu (%rdi), %xmm8
2469 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
2470 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
2471 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
2472 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
2473 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
2474 .byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */
2475 .byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */
2476 movdqu 112(%rdi), %xmm8
2477 pxor %xmm0, %xmm1
2478 movdqu (%r8, %rax), %xmm0 /* fetch the IV before we store the block */
2479 movdqu %xmm1, (%rsi, %rax) /* in case input buf = output buf */
2480 addq $16, %rax
2481 cmpq %rax, %r9
2482 jne 4b
2484 5: movdqu %xmm0, (%rdx)
2486 xor %eax, %eax
2487 ret
2488 .size intel_aes_decrypt_cbc_256, .-intel_aes_decrypt_cbc_256