|
1 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
2 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
4 |
|
5 .text |
|
6 |
|
7 #define IV_OFFSET 16 |
|
8 #define EXPANDED_KEY_OFFSET 48 |
|
9 |
|
10 |
|
11 /* in %rdi : the key |
|
12 in %rsi : buffer for expanded key |
|
13 */ |
|
14 .type intel_aes_encrypt_init_128,@function |
|
15 .globl intel_aes_encrypt_init_128 |
|
16 .align 16 |
|
17 intel_aes_encrypt_init_128: |
|
18 movups (%rdi), %xmm1 |
|
19 movups %xmm1, (%rsi) |
|
20 leaq 16(%rsi), %rsi |
|
21 xorl %eax, %eax |
|
22 |
|
23 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */ |
|
24 call key_expansion128 |
|
25 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */ |
|
26 call key_expansion128 |
|
27 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */ |
|
28 call key_expansion128 |
|
29 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */ |
|
30 call key_expansion128 |
|
31 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */ |
|
32 call key_expansion128 |
|
33 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */ |
|
34 call key_expansion128 |
|
35 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */ |
|
36 call key_expansion128 |
|
37 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */ |
|
38 call key_expansion128 |
|
39 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */ |
|
40 call key_expansion128 |
|
41 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */ |
|
42 call key_expansion128 |
|
43 |
|
44 ret |
|
45 .size intel_aes_encrypt_init_128, .-intel_aes_encrypt_init_128 |
|
46 |
|
47 |
|
48 /* in %rdi : the key |
|
49 in %rsi : buffer for expanded key |
|
50 */ |
|
51 .type intel_aes_decrypt_init_128,@function |
|
52 .globl intel_aes_decrypt_init_128 |
|
53 .align 16 |
|
54 intel_aes_decrypt_init_128: |
|
55 movups (%rdi), %xmm1 |
|
56 movups %xmm1, (%rsi) |
|
57 leaq 16(%rsi), %rsi |
|
58 xorl %eax, %eax |
|
59 |
|
60 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */ |
|
61 call key_expansion128 |
|
62 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ |
|
63 movups %xmm2, -16(%rsi) |
|
64 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */ |
|
65 call key_expansion128 |
|
66 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ |
|
67 movups %xmm2, -16(%rsi) |
|
68 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */ |
|
69 call key_expansion128 |
|
70 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ |
|
71 movups %xmm2, -16(%rsi) |
|
72 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */ |
|
73 call key_expansion128 |
|
74 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ |
|
75 movups %xmm2, -16(%rsi) |
|
76 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */ |
|
77 call key_expansion128 |
|
78 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ |
|
79 movups %xmm2, -16(%rsi) |
|
80 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */ |
|
81 call key_expansion128 |
|
82 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ |
|
83 movups %xmm2, -16(%rsi) |
|
84 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */ |
|
85 call key_expansion128 |
|
86 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ |
|
87 movups %xmm2, -16(%rsi) |
|
88 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */ |
|
89 call key_expansion128 |
|
90 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ |
|
91 movups %xmm2, -16(%rsi) |
|
92 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */ |
|
93 call key_expansion128 |
|
94 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ |
|
95 movups %xmm2, -16(%rsi) |
|
96 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */ |
|
97 call key_expansion128 |
|
98 |
|
99 ret |
|
100 .size intel_aes_decrypt_init_128, .-intel_aes_decrypt_init_128 |
|
101 |
|
102 |
|
103 .type key_expansion128,@function |
|
104 .align 16 |
|
105 key_expansion128: |
|
106 movd %eax, %xmm3 |
|
107 pshufd $0xff, %xmm2, %xmm2 |
|
108 shufps $0x10, %xmm1, %xmm3 |
|
109 pxor %xmm3, %xmm1 |
|
110 shufps $0x8c, %xmm1, %xmm3 |
|
111 pxor %xmm2, %xmm1 |
|
112 pxor %xmm3, %xmm1 |
|
113 movdqu %xmm1, (%rsi) |
|
114 addq $16, %rsi |
|
115 ret |
|
116 .size key_expansion128, .-key_expansion128 |
|
117 |
|
118 |
|
119 /* in %rdi : cx - context |
|
120 in %rsi : output - pointer to output buffer |
|
121 in %rdx : outputLen - pointer to variable for length of output |
|
122 (filled by caller) |
|
123 in %rcx : maxOutputLen - length of output buffer |
|
124 in %r8 : input - pointer to input buffer |
|
125 in %r9 : inputLen - length of input buffer |
|
126 on stack: blocksize - AES blocksize (always 16, unused) |
|
127 */ |
|
128 .type intel_aes_encrypt_ecb_128,@function |
|
129 .globl intel_aes_encrypt_ecb_128 |
|
130 .align 16 |
|
131 intel_aes_encrypt_ecb_128: |
|
132 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi |
|
133 leaq 48(%rdi), %rdi |
|
134 |
|
135 movdqu (%rdi), %xmm2 |
|
136 movdqu 160(%rdi), %xmm12 |
|
137 xor %eax, %eax |
|
138 // cmpq $8*16, %r9 |
|
139 cmpq $128, %r9 |
|
140 jb 1f |
|
141 // leaq -8*16(%r9), %r11 |
|
142 leaq -128(%r9), %r11 |
|
143 2: movdqu (%r8, %rax), %xmm3 |
|
144 movdqu 16(%r8, %rax), %xmm4 |
|
145 movdqu 32(%r8, %rax), %xmm5 |
|
146 movdqu 48(%r8, %rax), %xmm6 |
|
147 movdqu 64(%r8, %rax), %xmm7 |
|
148 movdqu 80(%r8, %rax), %xmm8 |
|
149 movdqu 96(%r8, %rax), %xmm9 |
|
150 movdqu 112(%r8, %rax), %xmm10 |
|
151 pxor %xmm2, %xmm3 |
|
152 pxor %xmm2, %xmm4 |
|
153 pxor %xmm2, %xmm5 |
|
154 pxor %xmm2, %xmm6 |
|
155 pxor %xmm2, %xmm7 |
|
156 pxor %xmm2, %xmm8 |
|
157 pxor %xmm2, %xmm9 |
|
158 pxor %xmm2, %xmm10 |
|
159 |
|
160 // complete loop unrolling |
|
161 movdqu 16(%rdi), %xmm1 |
|
162 movdqu 32(%rdi), %xmm11 |
|
163 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ |
|
164 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ |
|
165 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ |
|
166 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ |
|
167 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ |
|
168 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ |
|
169 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ |
|
170 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ |
|
171 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ |
|
172 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ |
|
173 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ |
|
174 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ |
|
175 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ |
|
176 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ |
|
177 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ |
|
178 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ |
|
179 |
|
180 movdqu 48(%rdi), %xmm1 |
|
181 movdqu 64(%rdi), %xmm11 |
|
182 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ |
|
183 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ |
|
184 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ |
|
185 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ |
|
186 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ |
|
187 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ |
|
188 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ |
|
189 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ |
|
190 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ |
|
191 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ |
|
192 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ |
|
193 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ |
|
194 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ |
|
195 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ |
|
196 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ |
|
197 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ |
|
198 |
|
199 movdqu 80(%rdi), %xmm1 |
|
200 movdqu 96(%rdi), %xmm11 |
|
201 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ |
|
202 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ |
|
203 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ |
|
204 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ |
|
205 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ |
|
206 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ |
|
207 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ |
|
208 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ |
|
209 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ |
|
210 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ |
|
211 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ |
|
212 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ |
|
213 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ |
|
214 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ |
|
215 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ |
|
216 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ |
|
217 |
|
218 movdqu 112(%rdi), %xmm1 |
|
219 movdqu 128(%rdi), %xmm11 |
|
220 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ |
|
221 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ |
|
222 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ |
|
223 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ |
|
224 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ |
|
225 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ |
|
226 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ |
|
227 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ |
|
228 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ |
|
229 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ |
|
230 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ |
|
231 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ |
|
232 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ |
|
233 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ |
|
234 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ |
|
235 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ |
|
236 |
|
237 movdqu 144(%rdi), %xmm1 |
|
238 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ |
|
239 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ |
|
240 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ |
|
241 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ |
|
242 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ |
|
243 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ |
|
244 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ |
|
245 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ |
|
246 .byte 0x66,0x41,0x0f,0x38,0xdd,0xdc /* aesenclast %xmm12, %xmm3 */ |
|
247 .byte 0x66,0x41,0x0f,0x38,0xdd,0xe4 /* aesenclast %xmm12, %xmm4 */ |
|
248 .byte 0x66,0x41,0x0f,0x38,0xdd,0xec /* aesenclast %xmm12, %xmm5 */ |
|
249 .byte 0x66,0x41,0x0f,0x38,0xdd,0xf4 /* aesenclast %xmm12, %xmm6 */ |
|
250 .byte 0x66,0x41,0x0f,0x38,0xdd,0xfc /* aesenclast %xmm12, %xmm7 */ |
|
251 .byte 0x66,0x45,0x0f,0x38,0xdd,0xc4 /* aesenclast %xmm12, %xmm8 */ |
|
252 .byte 0x66,0x45,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm9 */ |
|
253 .byte 0x66,0x45,0x0f,0x38,0xdd,0xd4 /* aesenclast %xmm12, %xmm10 */ |
|
254 |
|
255 movdqu %xmm3, (%rsi, %rax) |
|
256 movdqu %xmm4, 16(%rsi, %rax) |
|
257 movdqu %xmm5, 32(%rsi, %rax) |
|
258 movdqu %xmm6, 48(%rsi, %rax) |
|
259 movdqu %xmm7, 64(%rsi, %rax) |
|
260 movdqu %xmm8, 80(%rsi, %rax) |
|
261 movdqu %xmm9, 96(%rsi, %rax) |
|
262 movdqu %xmm10, 112(%rsi, %rax) |
|
263 // addq $8*16, %rax |
|
264 addq $128, %rax |
|
265 cmpq %r11, %rax |
|
266 jbe 2b |
|
267 1: cmpq %rax, %r9 |
|
268 je 5f |
|
269 |
|
270 movdqu 16(%rdi), %xmm3 |
|
271 movdqu 32(%rdi), %xmm4 |
|
272 movdqu 48(%rdi), %xmm5 |
|
273 movdqu 64(%rdi), %xmm6 |
|
274 movdqu 80(%rdi), %xmm7 |
|
275 movdqu 96(%rdi), %xmm8 |
|
276 movdqu 112(%rdi), %xmm9 |
|
277 movdqu 128(%rdi), %xmm10 |
|
278 movdqu 144(%rdi), %xmm11 |
|
279 |
|
280 4: movdqu (%r8, %rax), %xmm1 |
|
281 pxor %xmm2, %xmm1 |
|
282 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ |
|
283 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ |
|
284 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ |
|
285 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ |
|
286 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ |
|
287 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ |
|
288 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ |
|
289 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */ |
|
290 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */ |
|
291 .byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */ |
|
292 movdqu %xmm1, (%rsi, %rax) |
|
293 addq $16, %rax |
|
294 cmpq %rax, %r9 |
|
295 jne 4b |
|
296 |
|
297 5: xor %eax, %eax |
|
298 ret |
|
299 .size intel_aes_encrypt_ecb_128, .-intel_aes_encrypt_ecb_128 |
|
300 |
|
301 |
|
302 /* in %rdi : cx - context |
|
303 in %rsi : output - pointer to output buffer |
|
304 in %rdx : outputLen - pointer to variable for length of output |
|
305 (filled by caller) |
|
306 in %rcx : maxOutputLen - length of output buffer |
|
307 in %r8 : input - pointer to input buffer |
|
308 in %r9 : inputLen - length of input buffer |
|
309 on stack: blocksize - AES blocksize (always 16, unused) |
|
310 */ |
|
311 .type intel_aes_decrypt_ecb_128,@function |
|
312 .globl intel_aes_decrypt_ecb_128 |
|
313 .align 16 |
|
314 intel_aes_decrypt_ecb_128: |
|
315 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi |
|
316 leaq 48(%rdi), %rdi |
|
317 |
|
318 movdqu (%rdi), %xmm2 |
|
319 movdqu 160(%rdi), %xmm12 |
|
320 xorl %eax, %eax |
|
321 // cmpq $8*16, %r9 |
|
322 cmpq $128, %r9 |
|
323 jb 1f |
|
324 // leaq -8*16(%r9), %r11 |
|
325 leaq -128(%r9), %r11 |
|
326 2: movdqu (%r8, %rax), %xmm3 |
|
327 movdqu 16(%r8, %rax), %xmm4 |
|
328 movdqu 32(%r8, %rax), %xmm5 |
|
329 movdqu 48(%r8, %rax), %xmm6 |
|
330 movdqu 64(%r8, %rax), %xmm7 |
|
331 movdqu 80(%r8, %rax), %xmm8 |
|
332 movdqu 96(%r8, %rax), %xmm9 |
|
333 movdqu 112(%r8, %rax), %xmm10 |
|
334 pxor %xmm12, %xmm3 |
|
335 pxor %xmm12, %xmm4 |
|
336 pxor %xmm12, %xmm5 |
|
337 pxor %xmm12, %xmm6 |
|
338 pxor %xmm12, %xmm7 |
|
339 pxor %xmm12, %xmm8 |
|
340 pxor %xmm12, %xmm9 |
|
341 pxor %xmm12, %xmm10 |
|
342 |
|
343 // complete loop unrolling |
|
344 movdqu 144(%rdi), %xmm1 |
|
345 movdqu 128(%rdi), %xmm11 |
|
346 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
347 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
348 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
349 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
350 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
351 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
352 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
353 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
354 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
355 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
356 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
357 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
358 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
359 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
360 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
361 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
362 |
|
363 movdqu 112(%rdi), %xmm1 |
|
364 movdqu 96(%rdi), %xmm11 |
|
365 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
366 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
367 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
368 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
369 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
370 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
371 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
372 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
373 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
374 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
375 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
376 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
377 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
378 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
379 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
380 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
381 |
|
382 movdqu 80(%rdi), %xmm1 |
|
383 movdqu 64(%rdi), %xmm11 |
|
384 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
385 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
386 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
387 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
388 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
389 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
390 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
391 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
392 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
393 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
394 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
395 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
396 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
397 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
398 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
399 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
400 |
|
401 movdqu 48(%rdi), %xmm1 |
|
402 movdqu 32(%rdi), %xmm11 |
|
403 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
404 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
405 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
406 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
407 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
408 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
409 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
410 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
411 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
412 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
413 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
414 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
415 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
416 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
417 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
418 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
419 |
|
420 movdqu 16(%rdi), %xmm1 |
|
421 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
422 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
423 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
424 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
425 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
426 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
427 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
428 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
429 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ |
|
430 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ |
|
431 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ |
|
432 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ |
|
433 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ |
|
434 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ |
|
435 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ |
|
436 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ |
|
437 |
|
438 movdqu %xmm3, (%rsi, %rax) |
|
439 movdqu %xmm4, 16(%rsi, %rax) |
|
440 movdqu %xmm5, 32(%rsi, %rax) |
|
441 movdqu %xmm6, 48(%rsi, %rax) |
|
442 movdqu %xmm7, 64(%rsi, %rax) |
|
443 movdqu %xmm8, 80(%rsi, %rax) |
|
444 movdqu %xmm9, 96(%rsi, %rax) |
|
445 movdqu %xmm10, 112(%rsi, %rax) |
|
446 // addq $8*16, %rax |
|
447 addq $128, %rax |
|
448 cmpq %r11, %rax |
|
449 jbe 2b |
|
450 1: cmpq %rax, %r9 |
|
451 je 5f |
|
452 |
|
453 movdqu 16(%rdi), %xmm3 |
|
454 movdqu 32(%rdi), %xmm4 |
|
455 movdqu 48(%rdi), %xmm5 |
|
456 movdqu 64(%rdi), %xmm6 |
|
457 movdqu 80(%rdi), %xmm7 |
|
458 movdqu 96(%rdi), %xmm8 |
|
459 movdqu 112(%rdi), %xmm9 |
|
460 movdqu 128(%rdi), %xmm10 |
|
461 movdqu 144(%rdi), %xmm11 |
|
462 |
|
463 4: movdqu (%r8, %rax), %xmm1 |
|
464 pxor %xmm12, %xmm1 |
|
465 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ |
|
466 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ |
|
467 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ |
|
468 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ |
|
469 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ |
|
470 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm7, %xmm1 */ |
|
471 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm7, %xmm1 */ |
|
472 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm7, %xmm1 */ |
|
473 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm7, %xmm1 */ |
|
474 .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */ |
|
475 movdqu %xmm1, (%rsi, %rax) |
|
476 addq $16, %rax |
|
477 cmpq %rax, %r9 |
|
478 jne 4b |
|
479 |
|
480 5: xor %eax, %eax |
|
481 ret |
|
482 .size intel_aes_decrypt_ecb_128, .-intel_aes_decrypt_ecb_128 |
|
483 |
|
484 |
|
485 /* in %rdi : cx - context |
|
486 in %rsi : output - pointer to output buffer |
|
487 in %rdx : outputLen - pointer to variable for length of output |
|
488 (filled by caller) |
|
489 in %rcx : maxOutputLen - length of output buffer |
|
490 in %r8 : input - pointer to input buffer |
|
491 in %r9 : inputLen - length of input buffer |
|
492 on stack: blocksize - AES blocksize (always 16, unused) |
|
493 */ |
|
494 .type intel_aes_encrypt_cbc_128,@function |
|
495 .globl intel_aes_encrypt_cbc_128 |
|
496 .align 16 |
|
497 intel_aes_encrypt_cbc_128: |
|
498 testq %r9, %r9 |
|
499 je 2f |
|
500 |
|
501 // leaq IV_OFFSET(%rdi), %rdx |
|
502 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi |
|
503 leaq 16(%rdi), %rdx |
|
504 leaq 48(%rdi), %rdi |
|
505 |
|
506 movdqu (%rdx), %xmm0 |
|
507 movdqu (%rdi), %xmm2 |
|
508 movdqu 16(%rdi), %xmm3 |
|
509 movdqu 32(%rdi), %xmm4 |
|
510 movdqu 48(%rdi), %xmm5 |
|
511 movdqu 64(%rdi), %xmm6 |
|
512 movdqu 80(%rdi), %xmm7 |
|
513 movdqu 96(%rdi), %xmm8 |
|
514 movdqu 112(%rdi), %xmm9 |
|
515 movdqu 128(%rdi), %xmm10 |
|
516 movdqu 144(%rdi), %xmm11 |
|
517 movdqu 160(%rdi), %xmm12 |
|
518 |
|
519 xorl %eax, %eax |
|
520 1: movdqu (%r8, %rax), %xmm1 |
|
521 pxor %xmm0, %xmm1 |
|
522 pxor %xmm2, %xmm1 |
|
523 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ |
|
524 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ |
|
525 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ |
|
526 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ |
|
527 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ |
|
528 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ |
|
529 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ |
|
530 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmma, %xmm1 */ |
|
531 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmmb, %xmm1 */ |
|
532 .byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */ |
|
533 movdqu %xmm1, (%rsi, %rax) |
|
534 movdqa %xmm1, %xmm0 |
|
535 addq $16, %rax |
|
536 cmpq %rax, %r9 |
|
537 jne 1b |
|
538 |
|
539 movdqu %xmm0, (%rdx) |
|
540 |
|
541 2: xor %eax, %eax |
|
542 ret |
|
543 .size intel_aes_encrypt_cbc_128, .-intel_aes_encrypt_cbc_128 |
|
544 |
|
545 |
|
546 /* in %rdi : cx - context |
|
547 in %rsi : output - pointer to output buffer |
|
548 in %rdx : outputLen - pointer to variable for length of output |
|
549 (filled by caller) |
|
550 in %rcx : maxOutputLen - length of output buffer |
|
551 in %r8 : input - pointer to input buffer |
|
552 in %r9 : inputLen - length of input buffer |
|
553 on stack: blocksize - AES blocksize (always 16, unused) |
|
554 */ |
|
555 .type intel_aes_decrypt_cbc_128,@function |
|
556 .globl intel_aes_decrypt_cbc_128 |
|
557 .align 16 |
|
558 intel_aes_decrypt_cbc_128: |
|
559 // leaq IV_OFFSET(%rdi), %rdx |
|
560 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi |
|
561 leaq 16(%rdi), %rdx |
|
562 leaq 48(%rdi), %rdi |
|
563 |
|
564 movdqu (%rdx), %xmm0 /* iv */ |
|
565 movdqu (%rdi), %xmm2 /* first key block */ |
|
566 movdqu 160(%rdi), %xmm12 /* last key block */ |
|
567 xorl %eax, %eax |
|
568 cmpq $128, %r9 |
|
569 jb 1f |
|
570 leaq -128(%r9), %r11 |
|
571 2: movdqu (%r8, %rax), %xmm3 /* 1st data block */ |
|
572 movdqu 16(%r8, %rax), %xmm4 /* 2d data block */ |
|
573 movdqu 32(%r8, %rax), %xmm5 |
|
574 movdqu 48(%r8, %rax), %xmm6 |
|
575 movdqu 64(%r8, %rax), %xmm7 |
|
576 movdqu 80(%r8, %rax), %xmm8 |
|
577 movdqu 96(%r8, %rax), %xmm9 |
|
578 movdqu 112(%r8, %rax), %xmm10 |
|
579 pxor %xmm12, %xmm3 |
|
580 pxor %xmm12, %xmm4 |
|
581 pxor %xmm12, %xmm5 |
|
582 pxor %xmm12, %xmm6 |
|
583 pxor %xmm12, %xmm7 |
|
584 pxor %xmm12, %xmm8 |
|
585 pxor %xmm12, %xmm9 |
|
586 pxor %xmm12, %xmm10 |
|
587 |
|
588 // complete loop unrolling |
|
589 movdqu 144(%rdi), %xmm1 |
|
590 movdqu 128(%rdi), %xmm11 |
|
591 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
592 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
593 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
594 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
595 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
596 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
597 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
598 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
599 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
600 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
601 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
602 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
603 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
604 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
605 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
606 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
607 |
|
608 movdqu 112(%rdi), %xmm1 |
|
609 movdqu 96(%rdi), %xmm11 |
|
610 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
611 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
612 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
613 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
614 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
615 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
616 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
617 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
618 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
619 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
620 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
621 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
622 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
623 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
624 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
625 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
626 |
|
627 movdqu 80(%rdi), %xmm1 |
|
628 movdqu 64(%rdi), %xmm11 |
|
629 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
630 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
631 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
632 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
633 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
634 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
635 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
636 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
637 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
638 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
639 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
640 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
641 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
642 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
643 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
644 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
645 |
|
646 movdqu 48(%rdi), %xmm1 |
|
647 movdqu 32(%rdi), %xmm11 |
|
648 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
649 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
650 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
651 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
652 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
653 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
654 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
655 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
656 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
657 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
658 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
659 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
660 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
661 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
662 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
663 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
664 |
|
665 movdqu 16(%rdi), %xmm1 |
|
666 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
667 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
668 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
669 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
670 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
671 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
672 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
673 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
674 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ |
|
675 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ |
|
676 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ |
|
677 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ |
|
678 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ |
|
679 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ |
|
680 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ |
|
681 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ |
|
682 |
|
683 pxor %xmm0, %xmm3 |
|
684 movdqu (%r8, %rax), %xmm0 |
|
685 pxor %xmm0, %xmm4 |
|
686 movdqu 16(%r8, %rax), %xmm0 |
|
687 pxor %xmm0, %xmm5 |
|
688 movdqu 32(%r8, %rax), %xmm0 |
|
689 pxor %xmm0, %xmm6 |
|
690 movdqu 48(%r8, %rax), %xmm0 |
|
691 pxor %xmm0, %xmm7 |
|
692 movdqu 64(%r8, %rax), %xmm0 |
|
693 pxor %xmm0, %xmm8 |
|
694 movdqu 80(%r8, %rax), %xmm0 |
|
695 pxor %xmm0, %xmm9 |
|
696 movdqu 96(%r8, %rax), %xmm0 |
|
697 pxor %xmm0, %xmm10 |
|
698 movdqu 112(%r8, %rax), %xmm0 |
|
699 movdqu %xmm3, (%rsi, %rax) |
|
700 movdqu %xmm4, 16(%rsi, %rax) |
|
701 movdqu %xmm5, 32(%rsi, %rax) |
|
702 movdqu %xmm6, 48(%rsi, %rax) |
|
703 movdqu %xmm7, 64(%rsi, %rax) |
|
704 movdqu %xmm8, 80(%rsi, %rax) |
|
705 movdqu %xmm9, 96(%rsi, %rax) |
|
706 movdqu %xmm10, 112(%rsi, %rax) |
|
707 addq $128, %rax |
|
708 cmpq %r11, %rax |
|
709 jbe 2b |
|
710 1: cmpq %rax, %r9 |
|
711 je 5f |
|
712 |
|
713 movdqu 16(%rdi), %xmm3 |
|
714 movdqu 32(%rdi), %xmm4 |
|
715 movdqu 48(%rdi), %xmm5 |
|
716 movdqu 64(%rdi), %xmm6 |
|
717 movdqu 80(%rdi), %xmm7 |
|
718 movdqu 96(%rdi), %xmm8 |
|
719 movdqu 112(%rdi), %xmm9 |
|
720 movdqu 128(%rdi), %xmm10 |
|
721 movdqu 144(%rdi), %xmm11 |
|
722 |
|
723 4: movdqu (%r8, %rax), %xmm1 |
|
724 movdqa %xmm1, %xmm13 |
|
725 pxor %xmm12, %xmm1 |
|
726 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ |
|
727 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ |
|
728 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ |
|
729 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ |
|
730 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ |
|
731 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */ |
|
732 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */ |
|
733 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */ |
|
734 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */ |
|
735 .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */ |
|
736 pxor %xmm0, %xmm1 |
|
737 movdqu %xmm1, (%rsi, %rax) |
|
738 movdqa %xmm13, %xmm0 |
|
739 addq $16, %rax |
|
740 cmpq %rax, %r9 |
|
741 jne 4b |
|
742 |
|
743 5: movdqu %xmm0, (%rdx) |
|
744 |
|
745 xor %eax, %eax |
|
746 ret |
|
747 .size intel_aes_decrypt_cbc_128, .-intel_aes_decrypt_cbc_128 |
|
748 |
|
749 /* in %rdi : the key |
|
750 in %rsi : buffer for expanded key |
|
751 */ |
|
752 .type intel_aes_encrypt_init_192,@function |
|
753 .globl intel_aes_encrypt_init_192 |
|
754 .align 16 |
|
755 intel_aes_encrypt_init_192: |
|
756 movdqu (%rdi), %xmm1 |
|
757 movq 16(%rdi), %xmm3 |
|
758 movdqu %xmm1, (%rsi) |
|
759 movq %xmm3, 16(%rsi) |
|
760 leaq 24(%rsi), %rsi |
|
761 |
|
762 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */ |
|
763 call key_expansion192 |
|
764 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */ |
|
765 call key_expansion192 |
|
766 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */ |
|
767 call key_expansion192 |
|
768 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */ |
|
769 call key_expansion192 |
|
770 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */ |
|
771 call key_expansion192 |
|
772 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */ |
|
773 call key_expansion192 |
|
774 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */ |
|
775 call key_expansion192 |
|
776 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */ |
|
777 call key_expansion192 |
|
778 |
|
779 ret |
|
780 .size intel_aes_encrypt_init_192, .-intel_aes_encrypt_init_192 |
|
781 |
|
782 |
|
783 /* in %rdi : the key |
|
784 in %rsi : buffer for expanded key |
|
785 */ |
|
786 .type intel_aes_decrypt_init_192,@function |
|
787 .globl intel_aes_decrypt_init_192 |
|
788 .align 16 |
|
789 intel_aes_decrypt_init_192: |
|
790 movdqu (%rdi), %xmm1 |
|
791 movq 16(%rdi), %xmm3 |
|
792 movdqu %xmm1, (%rsi) |
|
793 movq %xmm3, 16(%rsi) |
|
794 leaq 24(%rsi), %rsi |
|
795 |
|
796 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */ |
|
797 call key_expansion192 |
|
798 movups -32(%rsi), %xmm2 |
|
799 movups -16(%rsi), %xmm4 |
|
800 .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */ |
|
801 .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */ |
|
802 movups %xmm2, -32(%rsi) |
|
803 movups %xmm4, -16(%rsi) |
|
804 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */ |
|
805 call key_expansion192 |
|
806 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ |
|
807 movups %xmm2, -24(%rsi) |
|
808 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */ |
|
809 call key_expansion192 |
|
810 movups -32(%rsi), %xmm2 |
|
811 movups -16(%rsi), %xmm4 |
|
812 .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */ |
|
813 .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */ |
|
814 movups %xmm2, -32(%rsi) |
|
815 movups %xmm4, -16(%rsi) |
|
816 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */ |
|
817 call key_expansion192 |
|
818 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ |
|
819 movups %xmm2, -24(%rsi) |
|
820 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */ |
|
821 call key_expansion192 |
|
822 movups -32(%rsi), %xmm2 |
|
823 movups -16(%rsi), %xmm4 |
|
824 .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */ |
|
825 .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */ |
|
826 movups %xmm2, -32(%rsi) |
|
827 movups %xmm4, -16(%rsi) |
|
828 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */ |
|
829 call key_expansion192 |
|
830 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ |
|
831 movups %xmm2, -24(%rsi) |
|
832 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */ |
|
833 call key_expansion192 |
|
834 movups -32(%rsi), %xmm2 |
|
835 movups -16(%rsi), %xmm4 |
|
836 .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */ |
|
837 .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */ |
|
838 movups %xmm2, -32(%rsi) |
|
839 movups %xmm4, -16(%rsi) |
|
840 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */ |
|
841 call key_expansion192 |
|
842 |
|
843 ret |
|
844 .size intel_aes_decrypt_init_192, .-intel_aes_decrypt_init_192 |
|
845 |
|
846 |
|
847 .type key_expansion192,@function |
|
848 .align 16 |
|
849 key_expansion192: |
|
850 pshufd $0x55, %xmm2, %xmm2 |
|
851 xor %eax, %eax |
|
852 movd %eax, %xmm4 |
|
853 shufps $0x10, %xmm1, %xmm4 |
|
854 pxor %xmm4, %xmm1 |
|
855 shufps $0x8c, %xmm1, %xmm4 |
|
856 pxor %xmm2, %xmm1 |
|
857 pxor %xmm4, %xmm1 |
|
858 movdqu %xmm1, (%rsi) |
|
859 addq $16, %rsi |
|
860 |
|
861 pshufd $0xff, %xmm1, %xmm4 |
|
862 movd %eax, %xmm5 |
|
863 shufps $0x00, %xmm3, %xmm5 |
|
864 shufps $0x08, %xmm3, %xmm5 |
|
865 pxor %xmm4, %xmm3 |
|
866 pxor %xmm5, %xmm3 |
|
867 movq %xmm3, (%rsi) |
|
868 addq $8, %rsi |
|
869 ret |
|
870 .size key_expansion192, .-key_expansion192 |
|
871 |
|
872 |
|
873 /* in %rdi : cx - context |
|
874 in %rsi : output - pointer to output buffer |
|
875 in %rdx : outputLen - pointer to variable for length of output |
|
876 (filled by caller) |
|
877 in %rcx : maxOutputLen - length of output buffer |
|
878 in %r8 : input - pointer to input buffer |
|
879 in %r9 : inputLen - length of input buffer |
|
880 on stack: blocksize - AES blocksize (always 16, unused) |
|
881 */ |
|
882 .type intel_aes_encrypt_ecb_192,@function |
|
883 .globl intel_aes_encrypt_ecb_192 |
|
884 .align 16 |
|
885 intel_aes_encrypt_ecb_192: |
|
886 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi |
|
887 leaq 48(%rdi), %rdi |
|
888 |
|
889 movdqu (%rdi), %xmm2 |
|
890 movdqu 192(%rdi), %xmm14 |
|
891 xorl %eax, %eax |
|
892 // cmpq $8*16, %r9 |
|
893 cmpq $128, %r9 |
|
894 jb 1f |
|
895 // leaq -8*16(%r9), %r11 |
|
896 leaq -128(%r9), %r11 |
|
897 2: movdqu (%r8, %rax), %xmm3 |
|
898 movdqu 16(%r8, %rax), %xmm4 |
|
899 movdqu 32(%r8, %rax), %xmm5 |
|
900 movdqu 48(%r8, %rax), %xmm6 |
|
901 movdqu 64(%r8, %rax), %xmm7 |
|
902 movdqu 80(%r8, %rax), %xmm8 |
|
903 movdqu 96(%r8, %rax), %xmm9 |
|
904 movdqu 112(%r8, %rax), %xmm10 |
|
905 pxor %xmm2, %xmm3 |
|
906 pxor %xmm2, %xmm4 |
|
907 pxor %xmm2, %xmm5 |
|
908 pxor %xmm2, %xmm6 |
|
909 pxor %xmm2, %xmm7 |
|
910 pxor %xmm2, %xmm8 |
|
911 pxor %xmm2, %xmm9 |
|
912 pxor %xmm2, %xmm10 |
|
913 |
|
914 // complete loop unrolling |
|
915 movdqu 16(%rdi), %xmm1 |
|
916 movdqu 32(%rdi), %xmm11 |
|
917 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ |
|
918 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ |
|
919 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ |
|
920 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ |
|
921 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ |
|
922 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ |
|
923 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ |
|
924 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ |
|
925 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ |
|
926 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ |
|
927 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ |
|
928 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ |
|
929 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ |
|
930 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ |
|
931 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ |
|
932 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ |
|
933 |
|
934 movdqu 48(%rdi), %xmm1 |
|
935 movdqu 64(%rdi), %xmm11 |
|
936 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ |
|
937 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ |
|
938 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ |
|
939 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ |
|
940 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ |
|
941 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ |
|
942 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ |
|
943 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ |
|
944 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ |
|
945 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ |
|
946 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ |
|
947 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ |
|
948 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ |
|
949 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ |
|
950 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ |
|
951 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ |
|
952 |
|
953 movdqu 80(%rdi), %xmm1 |
|
954 movdqu 96(%rdi), %xmm11 |
|
955 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ |
|
956 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ |
|
957 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ |
|
958 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ |
|
959 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ |
|
960 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ |
|
961 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ |
|
962 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ |
|
963 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ |
|
964 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ |
|
965 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ |
|
966 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ |
|
967 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ |
|
968 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ |
|
969 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ |
|
970 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ |
|
971 |
|
972 movdqu 112(%rdi), %xmm1 |
|
973 movdqu 128(%rdi), %xmm11 |
|
974 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ |
|
975 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ |
|
976 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ |
|
977 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ |
|
978 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ |
|
979 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ |
|
980 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ |
|
981 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ |
|
982 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ |
|
983 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ |
|
984 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ |
|
985 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ |
|
986 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ |
|
987 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ |
|
988 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ |
|
989 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ |
|
990 |
|
991 movdqu 144(%rdi), %xmm1 |
|
992 movdqu 160(%rdi), %xmm11 |
|
993 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ |
|
994 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ |
|
995 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ |
|
996 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ |
|
997 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ |
|
998 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ |
|
999 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ |
|
1000 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ |
|
1001 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ |
|
1002 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ |
|
1003 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ |
|
1004 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ |
|
1005 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ |
|
1006 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ |
|
1007 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ |
|
1008 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ |
|
1009 |
|
1010 movdqu 176(%rdi), %xmm1 |
|
1011 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ |
|
1012 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ |
|
1013 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ |
|
1014 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ |
|
1015 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ |
|
1016 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ |
|
1017 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ |
|
1018 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ |
|
1019 .byte 0x66,0x41,0x0f,0x38,0xdd,0xde /* aesenclast %xmm14, %xmm3 */ |
|
1020 .byte 0x66,0x41,0x0f,0x38,0xdd,0xe6 /* aesenclast %xmm14, %xmm4 */ |
|
1021 .byte 0x66,0x41,0x0f,0x38,0xdd,0xee /* aesenclast %xmm14, %xmm5 */ |
|
1022 .byte 0x66,0x41,0x0f,0x38,0xdd,0xf6 /* aesenclast %xmm14, %xmm7 */ |
|
1023 .byte 0x66,0x41,0x0f,0x38,0xdd,0xfe /* aesenclast %xmm14, %xmm3 */ |
|
1024 .byte 0x66,0x45,0x0f,0x38,0xdd,0xc6 /* aesenclast %xmm14, %xmm8 */ |
|
1025 .byte 0x66,0x45,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm9 */ |
|
1026 .byte 0x66,0x45,0x0f,0x38,0xdd,0xd6 /* aesenclast %xmm14, %xmm10 */ |
|
1027 |
|
1028 movdqu %xmm3, (%rsi, %rax) |
|
1029 movdqu %xmm4, 16(%rsi, %rax) |
|
1030 movdqu %xmm5, 32(%rsi, %rax) |
|
1031 movdqu %xmm6, 48(%rsi, %rax) |
|
1032 movdqu %xmm7, 64(%rsi, %rax) |
|
1033 movdqu %xmm8, 80(%rsi, %rax) |
|
1034 movdqu %xmm9, 96(%rsi, %rax) |
|
1035 movdqu %xmm10, 112(%rsi, %rax) |
|
1036 // addq $8*16, %rax |
|
1037 addq $128, %rax |
|
1038 cmpq %r11, %rax |
|
1039 jbe 2b |
|
1040 1: cmpq %rax, %r9 |
|
1041 je 5f |
|
1042 |
|
1043 movdqu 16(%rdi), %xmm3 |
|
1044 movdqu 32(%rdi), %xmm4 |
|
1045 movdqu 48(%rdi), %xmm5 |
|
1046 movdqu 64(%rdi), %xmm6 |
|
1047 movdqu 80(%rdi), %xmm7 |
|
1048 movdqu 96(%rdi), %xmm8 |
|
1049 movdqu 112(%rdi), %xmm9 |
|
1050 movdqu 128(%rdi), %xmm10 |
|
1051 movdqu 144(%rdi), %xmm11 |
|
1052 movdqu 160(%rdi), %xmm12 |
|
1053 movdqu 176(%rdi), %xmm13 |
|
1054 |
|
1055 4: movdqu (%r8, %rax), %xmm1 |
|
1056 pxor %xmm2, %xmm1 |
|
1057 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ |
|
1058 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ |
|
1059 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ |
|
1060 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ |
|
1061 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ |
|
1062 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ |
|
1063 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ |
|
1064 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */ |
|
1065 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */ |
|
1066 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */ |
|
1067 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */ |
|
1068 .byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */ |
|
1069 movdqu %xmm1, (%rsi, %rax) |
|
1070 addq $16, %rax |
|
1071 cmpq %rax, %r9 |
|
1072 jne 4b |
|
1073 |
|
1074 5: xor %eax, %eax |
|
1075 ret |
|
1076 .size intel_aes_encrypt_ecb_192, .-intel_aes_encrypt_ecb_192 |
|
1077 |
|
1078 |
|
1079 /* in %rdi : cx - context |
|
1080 in %rsi : output - pointer to output buffer |
|
1081 in %rdx : outputLen - pointer to variable for length of output |
|
1082 (filled by caller) |
|
1083 in %rcx : maxOutputLen - length of output buffer |
|
1084 in %r8 : input - pointer to input buffer |
|
1085 in %r9 : inputLen - length of input buffer |
|
1086 on stack: blocksize - AES blocksize (always 16, unused) |
|
1087 */ |
|
1088 .type intel_aes_decrypt_ecb_192,@function |
|
1089 .globl intel_aes_decrypt_ecb_192 |
|
1090 .align 16 |
|
1091 intel_aes_decrypt_ecb_192: |
|
1092 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi |
|
1093 leaq 48(%rdi), %rdi |
|
1094 |
|
1095 movdqu (%rdi), %xmm2 |
|
1096 movdqu 192(%rdi), %xmm14 |
|
1097 xorl %eax, %eax |
|
1098 // cmpq $8*16, %r9 |
|
1099 cmpq $128, %r9 |
|
1100 jb 1f |
|
1101 // leaq -8*16(%r9), %r11 |
|
1102 leaq -128(%r9), %r11 |
|
1103 2: movdqu (%r8, %rax), %xmm3 |
|
1104 movdqu 16(%r8, %rax), %xmm4 |
|
1105 movdqu 32(%r8, %rax), %xmm5 |
|
1106 movdqu 48(%r8, %rax), %xmm6 |
|
1107 movdqu 64(%r8, %rax), %xmm7 |
|
1108 movdqu 80(%r8, %rax), %xmm8 |
|
1109 movdqu 96(%r8, %rax), %xmm9 |
|
1110 movdqu 112(%r8, %rax), %xmm10 |
|
1111 pxor %xmm14, %xmm3 |
|
1112 pxor %xmm14, %xmm4 |
|
1113 pxor %xmm14, %xmm5 |
|
1114 pxor %xmm14, %xmm6 |
|
1115 pxor %xmm14, %xmm7 |
|
1116 pxor %xmm14, %xmm8 |
|
1117 pxor %xmm14, %xmm9 |
|
1118 pxor %xmm14, %xmm10 |
|
1119 |
|
1120 // complete loop unrolling |
|
1121 movdqu 176(%rdi), %xmm1 |
|
1122 movdqu 160(%rdi), %xmm11 |
|
1123 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
1124 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
1125 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
1126 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
1127 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
1128 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
1129 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
1130 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
1131 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
1132 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
1133 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
1134 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
1135 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
1136 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
1137 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
1138 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
1139 |
|
1140 movdqu 144(%rdi), %xmm1 |
|
1141 movdqu 128(%rdi), %xmm11 |
|
1142 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
1143 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
1144 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
1145 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
1146 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
1147 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
1148 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
1149 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
1150 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
1151 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
1152 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
1153 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
1154 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
1155 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
1156 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
1157 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
1158 |
|
1159 movdqu 112(%rdi), %xmm1 |
|
1160 movdqu 96(%rdi), %xmm11 |
|
1161 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
1162 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
1163 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
1164 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
1165 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
1166 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
1167 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
1168 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
1169 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
1170 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
1171 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
1172 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
1173 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
1174 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
1175 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
1176 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
1177 |
|
1178 movdqu 80(%rdi), %xmm1 |
|
1179 movdqu 64(%rdi), %xmm11 |
|
1180 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
1181 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
1182 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
1183 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
1184 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
1185 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
1186 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
1187 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
1188 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
1189 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
1190 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
1191 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
1192 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
1193 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
1194 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
1195 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
1196 |
|
1197 movdqu 48(%rdi), %xmm1 |
|
1198 movdqu 32(%rdi), %xmm11 |
|
1199 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
1200 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
1201 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
1202 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
1203 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
1204 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
1205 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
1206 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
1207 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
1208 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
1209 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
1210 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
1211 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
1212 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
1213 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
1214 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
1215 |
|
1216 movdqu 16(%rdi), %xmm1 |
|
1217 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
1218 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
1219 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
1220 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
1221 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
1222 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
1223 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
1224 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
1225 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ |
|
1226 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ |
|
1227 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ |
|
1228 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ |
|
1229 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ |
|
1230 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ |
|
1231 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ |
|
1232 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ |
|
1233 |
|
1234 movdqu %xmm3, (%rsi, %rax) |
|
1235 movdqu %xmm4, 16(%rsi, %rax) |
|
1236 movdqu %xmm5, 32(%rsi, %rax) |
|
1237 movdqu %xmm6, 48(%rsi, %rax) |
|
1238 movdqu %xmm7, 64(%rsi, %rax) |
|
1239 movdqu %xmm8, 80(%rsi, %rax) |
|
1240 movdqu %xmm9, 96(%rsi, %rax) |
|
1241 movdqu %xmm10, 112(%rsi, %rax) |
|
1242 // addq $8*16, %rax |
|
1243 addq $128, %rax |
|
1244 cmpq %r11, %rax |
|
1245 jbe 2b |
|
1246 1: cmpq %rax, %r9 |
|
1247 je 5f |
|
1248 |
|
1249 movdqu 16(%rdi), %xmm3 |
|
1250 movdqu 32(%rdi), %xmm4 |
|
1251 movdqu 48(%rdi), %xmm5 |
|
1252 movdqu 64(%rdi), %xmm6 |
|
1253 movdqu 80(%rdi), %xmm7 |
|
1254 movdqu 96(%rdi), %xmm8 |
|
1255 movdqu 112(%rdi), %xmm9 |
|
1256 movdqu 128(%rdi), %xmm10 |
|
1257 movdqu 144(%rdi), %xmm11 |
|
1258 movdqu 160(%rdi), %xmm12 |
|
1259 movdqu 176(%rdi), %xmm13 |
|
1260 |
|
1261 4: movdqu (%r8, %rax), %xmm1 |
|
1262 pxor %xmm14, %xmm1 |
|
1263 .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */ |
|
1264 .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */ |
|
1265 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ |
|
1266 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ |
|
1267 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ |
|
1268 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ |
|
1269 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ |
|
1270 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */ |
|
1271 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */ |
|
1272 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */ |
|
1273 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */ |
|
1274 .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */ |
|
1275 movdqu %xmm1, (%rsi, %rax) |
|
1276 addq $16, %rax |
|
1277 cmpq %rax, %r9 |
|
1278 jne 4b |
|
1279 |
|
1280 5: xor %eax, %eax |
|
1281 ret |
|
1282 .size intel_aes_decrypt_ecb_192, .-intel_aes_decrypt_ecb_192 |
|
1283 |
|
1284 |
|
1285 /* in %rdi : cx - context |
|
1286 in %rsi : output - pointer to output buffer |
|
1287 in %rdx : outputLen - pointer to variable for length of output |
|
1288 (filled by caller) |
|
1289 in %rcx : maxOutputLen - length of output buffer |
|
1290 in %r8 : input - pointer to input buffer |
|
1291 in %r9 : inputLen - length of input buffer |
|
1292 on stack: blocksize - AES blocksize (always 16, unused) |
|
1293 */ |
|
1294 .type intel_aes_encrypt_cbc_192,@function |
|
1295 .globl intel_aes_encrypt_cbc_192 |
|
1296 .align 16 |
|
1297 intel_aes_encrypt_cbc_192: |
|
1298 testq %r9, %r9 |
|
1299 je 2f |
|
1300 |
|
1301 // leaq IV_OFFSET(%rdi), %rdx |
|
1302 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi |
|
1303 leaq 16(%rdi), %rdx |
|
1304 leaq 48(%rdi), %rdi |
|
1305 |
|
1306 movdqu (%rdx), %xmm0 |
|
1307 movdqu (%rdi), %xmm2 |
|
1308 movdqu 16(%rdi), %xmm3 |
|
1309 movdqu 32(%rdi), %xmm4 |
|
1310 movdqu 48(%rdi), %xmm5 |
|
1311 movdqu 64(%rdi), %xmm6 |
|
1312 movdqu 80(%rdi), %xmm7 |
|
1313 movdqu 96(%rdi), %xmm8 |
|
1314 movdqu 112(%rdi), %xmm9 |
|
1315 movdqu 128(%rdi), %xmm10 |
|
1316 movdqu 144(%rdi), %xmm11 |
|
1317 movdqu 160(%rdi), %xmm12 |
|
1318 movdqu 176(%rdi), %xmm13 |
|
1319 movdqu 192(%rdi), %xmm14 |
|
1320 |
|
1321 xorl %eax, %eax |
|
1322 1: movdqu (%r8, %rax), %xmm1 |
|
1323 pxor %xmm0, %xmm1 |
|
1324 pxor %xmm2, %xmm1 |
|
1325 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ |
|
1326 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ |
|
1327 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ |
|
1328 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ |
|
1329 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ |
|
1330 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ |
|
1331 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ |
|
1332 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */ |
|
1333 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */ |
|
1334 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */ |
|
1335 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */ |
|
1336 .byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */ |
|
1337 movdqu %xmm1, (%rsi, %rax) |
|
1338 movdqa %xmm1, %xmm0 |
|
1339 addq $16, %rax |
|
1340 cmpq %rax, %r9 |
|
1341 jne 1b |
|
1342 |
|
1343 movdqu %xmm0, (%rdx) |
|
1344 |
|
1345 2: xor %eax, %eax |
|
1346 ret |
|
1347 .size intel_aes_encrypt_cbc_192, .-intel_aes_encrypt_cbc_192 |
|
1348 |
|
1349 |
|
1350 /* in %rdi : cx - context |
|
1351 in %rsi : output - pointer to output buffer |
|
1352 in %rdx : outputLen - pointer to variable for length of output |
|
1353 (filled by caller) |
|
1354 in %rcx : maxOutputLen - length of output buffer |
|
1355 in %r8 : input - pointer to input buffer |
|
1356 in %r9 : inputLen - length of input buffer |
|
1357 on stack: blocksize - AES blocksize (always 16, unused) |
|
1358 */ |
|
1359 .type intel_aes_decrypt_cbc_192,@function |
|
1360 .globl intel_aes_decrypt_cbc_192 |
|
1361 .align 16 |
|
1362 intel_aes_decrypt_cbc_192: |
|
1363 leaq 16(%rdi), %rdx |
|
1364 leaq 48(%rdi), %rdi |
|
1365 |
|
1366 movdqu (%rdx), %xmm0 |
|
1367 movdqu (%rdi), %xmm2 |
|
1368 movdqu 192(%rdi), %xmm14 |
|
1369 xorl %eax, %eax |
|
1370 cmpq $128, %r9 |
|
1371 jb 1f |
|
1372 leaq -128(%r9), %r11 |
|
1373 2: movdqu (%r8, %rax), %xmm3 |
|
1374 movdqu 16(%r8, %rax), %xmm4 |
|
1375 movdqu 32(%r8, %rax), %xmm5 |
|
1376 movdqu 48(%r8, %rax), %xmm6 |
|
1377 movdqu 64(%r8, %rax), %xmm7 |
|
1378 movdqu 80(%r8, %rax), %xmm8 |
|
1379 movdqu 96(%r8, %rax), %xmm9 |
|
1380 movdqu 112(%r8, %rax), %xmm10 |
|
1381 pxor %xmm14, %xmm3 |
|
1382 pxor %xmm14, %xmm4 |
|
1383 pxor %xmm14, %xmm5 |
|
1384 pxor %xmm14, %xmm6 |
|
1385 pxor %xmm14, %xmm7 |
|
1386 pxor %xmm14, %xmm8 |
|
1387 pxor %xmm14, %xmm9 |
|
1388 pxor %xmm14, %xmm10 |
|
1389 |
|
1390 // complete loop unrolling |
|
1391 movdqu 176(%rdi), %xmm1 |
|
1392 movdqu 160(%rdi), %xmm11 |
|
1393 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
1394 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
1395 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
1396 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
1397 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
1398 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
1399 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
1400 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
1401 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
1402 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
1403 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
1404 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
1405 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
1406 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
1407 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
1408 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
1409 |
|
1410 movdqu 144(%rdi), %xmm1 |
|
1411 movdqu 128(%rdi), %xmm11 |
|
1412 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
1413 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
1414 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
1415 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
1416 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
1417 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
1418 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
1419 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
1420 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
1421 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
1422 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
1423 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
1424 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
1425 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
1426 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
1427 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
1428 |
|
1429 movdqu 112(%rdi), %xmm1 |
|
1430 movdqu 96(%rdi), %xmm11 |
|
1431 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
1432 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
1433 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
1434 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
1435 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
1436 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
1437 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
1438 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
1439 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
1440 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
1441 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
1442 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
1443 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
1444 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
1445 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
1446 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
1447 |
|
1448 movdqu 80(%rdi), %xmm1 |
|
1449 movdqu 64(%rdi), %xmm11 |
|
1450 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
1451 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
1452 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
1453 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
1454 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
1455 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
1456 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
1457 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
1458 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
1459 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
1460 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
1461 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
1462 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
1463 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
1464 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
1465 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
1466 |
|
1467 movdqu 48(%rdi), %xmm1 |
|
1468 movdqu 32(%rdi), %xmm11 |
|
1469 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
1470 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
1471 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
1472 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
1473 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
1474 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
1475 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
1476 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
1477 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
1478 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
1479 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
1480 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
1481 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
1482 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
1483 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
1484 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
1485 |
|
1486 movdqu 16(%rdi), %xmm1 |
|
1487 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
1488 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
1489 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
1490 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
1491 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
1492 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
1493 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
1494 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
1495 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ |
|
1496 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ |
|
1497 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ |
|
1498 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ |
|
1499 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ |
|
1500 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ |
|
1501 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ |
|
1502 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ |
|
1503 |
|
1504 pxor %xmm0, %xmm3 |
|
1505 movdqu (%r8, %rax), %xmm0 |
|
1506 pxor %xmm0, %xmm4 |
|
1507 movdqu 16(%r8, %rax), %xmm0 |
|
1508 pxor %xmm0, %xmm5 |
|
1509 movdqu 32(%r8, %rax), %xmm0 |
|
1510 pxor %xmm0, %xmm6 |
|
1511 movdqu 48(%r8, %rax), %xmm0 |
|
1512 pxor %xmm0, %xmm7 |
|
1513 movdqu 64(%r8, %rax), %xmm0 |
|
1514 pxor %xmm0, %xmm8 |
|
1515 movdqu 80(%r8, %rax), %xmm0 |
|
1516 pxor %xmm0, %xmm9 |
|
1517 movdqu 96(%r8, %rax), %xmm0 |
|
1518 pxor %xmm0, %xmm10 |
|
1519 movdqu 112(%r8, %rax), %xmm0 |
|
1520 movdqu %xmm3, (%rsi, %rax) |
|
1521 movdqu %xmm4, 16(%rsi, %rax) |
|
1522 movdqu %xmm5, 32(%rsi, %rax) |
|
1523 movdqu %xmm6, 48(%rsi, %rax) |
|
1524 movdqu %xmm7, 64(%rsi, %rax) |
|
1525 movdqu %xmm8, 80(%rsi, %rax) |
|
1526 movdqu %xmm9, 96(%rsi, %rax) |
|
1527 movdqu %xmm10, 112(%rsi, %rax) |
|
1528 addq $128, %rax |
|
1529 cmpq %r11, %rax |
|
1530 jbe 2b |
|
1531 1: cmpq %rax, %r9 |
|
1532 je 5f |
|
1533 |
|
1534 movdqu 16(%rdi), %xmm3 |
|
1535 movdqu 32(%rdi), %xmm4 |
|
1536 movdqu 48(%rdi), %xmm5 |
|
1537 movdqu 64(%rdi), %xmm6 |
|
1538 movdqu 80(%rdi), %xmm7 |
|
1539 movdqu 96(%rdi), %xmm8 |
|
1540 movdqu 112(%rdi), %xmm9 |
|
1541 movdqu 128(%rdi), %xmm10 |
|
1542 movdqu 144(%rdi), %xmm11 |
|
1543 movdqu 160(%rdi), %xmm12 |
|
1544 movdqu 176(%rdi), %xmm13 |
|
1545 |
|
1546 4: movdqu (%r8, %rax), %xmm1 |
|
1547 movdqa %xmm1, %xmm15 |
|
1548 pxor %xmm14, %xmm1 |
|
1549 .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */ |
|
1550 .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */ |
|
1551 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ |
|
1552 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ |
|
1553 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ |
|
1554 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ |
|
1555 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ |
|
1556 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */ |
|
1557 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */ |
|
1558 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */ |
|
1559 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */ |
|
1560 .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */ |
|
1561 pxor %xmm0, %xmm1 |
|
1562 movdqu %xmm1, (%rsi, %rax) |
|
1563 movdqa %xmm15, %xmm0 |
|
1564 addq $16, %rax |
|
1565 cmpq %rax, %r9 |
|
1566 jne 4b |
|
1567 |
|
1568 5: movdqu %xmm0, (%rdx) |
|
1569 |
|
1570 xor %eax, %eax |
|
1571 ret |
|
1572 .size intel_aes_decrypt_cbc_192, .-intel_aes_decrypt_cbc_192 |
|
1573 |
|
1574 /* in %rdi : the key |
|
1575 in %rsi : buffer for expanded key |
|
1576 */ |
|
1577 .type intel_aes_encrypt_init_256,@function |
|
1578 .globl intel_aes_encrypt_init_256 |
|
1579 .align 16 |
|
1580 intel_aes_encrypt_init_256: |
|
1581 movdqu (%rdi), %xmm1 |
|
1582 movdqu 16(%rdi), %xmm3 |
|
1583 movdqu %xmm1, (%rsi) |
|
1584 movdqu %xmm3, 16(%rsi) |
|
1585 leaq 32(%rsi), %rsi |
|
1586 xor %eax, %eax |
|
1587 |
|
1588 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */ |
|
1589 call key_expansion256 |
|
1590 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */ |
|
1591 call key_expansion256 |
|
1592 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */ |
|
1593 call key_expansion256 |
|
1594 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */ |
|
1595 call key_expansion256 |
|
1596 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */ |
|
1597 call key_expansion256 |
|
1598 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */ |
|
1599 call key_expansion256 |
|
1600 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */ |
|
1601 pxor %xmm6, %xmm6 |
|
1602 pshufd $0xff, %xmm2, %xmm2 |
|
1603 shufps $0x10, %xmm1, %xmm6 |
|
1604 pxor %xmm6, %xmm1 |
|
1605 shufps $0x8c, %xmm1, %xmm6 |
|
1606 pxor %xmm2, %xmm1 |
|
1607 pxor %xmm6, %xmm1 |
|
1608 movdqu %xmm1, (%rsi) |
|
1609 |
|
1610 ret |
|
1611 .size intel_aes_encrypt_init_256, .-intel_aes_encrypt_init_256 |
|
1612 |
|
1613 |
|
1614 /* in %rdi : the key |
|
1615 in %rsi : buffer for expanded key |
|
1616 */ |
|
1617 .type intel_aes_decrypt_init_256,@function |
|
1618 .globl intel_aes_decrypt_init_256 |
|
1619 .align 16 |
|
1620 intel_aes_decrypt_init_256: |
|
1621 movdqu (%rdi), %xmm1 |
|
1622 movdqu 16(%rdi), %xmm3 |
|
1623 movdqu %xmm1, (%rsi) |
|
1624 .byte 0x66,0x0f,0x38,0xdb,0xe3 /* aesimc %xmm3, %xmm4 */ |
|
1625 movdqu %xmm4, 16(%rsi) |
|
1626 leaq 32(%rsi), %rsi |
|
1627 xor %eax, %eax |
|
1628 |
|
1629 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */ |
|
1630 call key_expansion256 |
|
1631 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ |
|
1632 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ |
|
1633 movdqu %xmm4, -32(%rsi) |
|
1634 movdqu %xmm5, -16(%rsi) |
|
1635 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */ |
|
1636 call key_expansion256 |
|
1637 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ |
|
1638 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ |
|
1639 movdqu %xmm4, -32(%rsi) |
|
1640 movdqu %xmm5, -16(%rsi) |
|
1641 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */ |
|
1642 call key_expansion256 |
|
1643 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ |
|
1644 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ |
|
1645 movdqu %xmm4, -32(%rsi) |
|
1646 movdqu %xmm5, -16(%rsi) |
|
1647 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */ |
|
1648 call key_expansion256 |
|
1649 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ |
|
1650 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ |
|
1651 movdqu %xmm4, -32(%rsi) |
|
1652 movdqu %xmm5, -16(%rsi) |
|
1653 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */ |
|
1654 call key_expansion256 |
|
1655 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ |
|
1656 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ |
|
1657 movdqu %xmm4, -32(%rsi) |
|
1658 movdqu %xmm5, -16(%rsi) |
|
1659 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */ |
|
1660 call key_expansion256 |
|
1661 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ |
|
1662 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ |
|
1663 movdqu %xmm4, -32(%rsi) |
|
1664 movdqu %xmm5, -16(%rsi) |
|
1665 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */ |
|
1666 pxor %xmm6, %xmm6 |
|
1667 pshufd $0xff, %xmm2, %xmm2 |
|
1668 shufps $0x10, %xmm1, %xmm6 |
|
1669 pxor %xmm6, %xmm1 |
|
1670 shufps $0x8c, %xmm1, %xmm6 |
|
1671 pxor %xmm2, %xmm1 |
|
1672 pxor %xmm6, %xmm1 |
|
1673 movdqu %xmm1, (%rsi) |
|
1674 |
|
1675 ret |
|
1676 .size intel_aes_decrypt_init_256, .-intel_aes_decrypt_init_256 |
|
1677 |
|
1678 |
|
1679 .type key_expansion256,@function |
|
1680 .align 16 |
|
1681 key_expansion256: |
|
1682 movd %eax, %xmm6 |
|
1683 pshufd $0xff, %xmm2, %xmm2 |
|
1684 shufps $0x10, %xmm1, %xmm6 |
|
1685 pxor %xmm6, %xmm1 |
|
1686 shufps $0x8c, %xmm1, %xmm6 |
|
1687 pxor %xmm2, %xmm1 |
|
1688 pxor %xmm6, %xmm1 |
|
1689 movdqu %xmm1, (%rsi) |
|
1690 |
|
1691 addq $16, %rsi |
|
1692 .byte 0x66,0x0f,0x3a,0xdf,0xe1,0x00 /* aeskeygenassist $0, %xmm1, %xmm4 */ |
|
1693 pshufd $0xaa, %xmm4, %xmm4 |
|
1694 shufps $0x10, %xmm3, %xmm6 |
|
1695 pxor %xmm6, %xmm3 |
|
1696 shufps $0x8c, %xmm3, %xmm6 |
|
1697 pxor %xmm4, %xmm3 |
|
1698 pxor %xmm6, %xmm3 |
|
1699 movdqu %xmm3, (%rsi) |
|
1700 addq $16, %rsi |
|
1701 ret |
|
1702 .size key_expansion256, .-key_expansion256 |
|
1703 |
|
1704 |
|
1705 /* in %rdi : cx - context |
|
1706 in %rsi : output - pointer to output buffer |
|
1707 in %rdx : outputLen - pointer to variable for length of output |
|
1708 (filled by caller) |
|
1709 in %rcx : maxOutputLen - length of output buffer |
|
1710 in %r8 : input - pointer to input buffer |
|
1711 in %r9 : inputLen - length of input buffer |
|
1712 on stack: blocksize - AES blocksize (always 16, unused) |
|
1713 */ |
|
1714 .type intel_aes_encrypt_ecb_256,@function |
|
1715 .globl intel_aes_encrypt_ecb_256 |
|
1716 .align 16 |
|
1717 intel_aes_encrypt_ecb_256: |
|
1718 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi |
|
1719 leaq 48(%rdi), %rdi |
|
1720 |
|
1721 movdqu (%rdi), %xmm2 |
|
1722 movdqu 224(%rdi), %xmm15 |
|
1723 xorl %eax, %eax |
|
1724 // cmpq $8*16, %r9 |
|
1725 cmpq $128, %r9 |
|
1726 jb 1f |
|
1727 // leaq -8*16(%r9), %r11 |
|
1728 leaq -128(%r9), %r11 |
|
1729 2: movdqu (%r8, %rax), %xmm3 |
|
1730 movdqu 16(%r8, %rax), %xmm4 |
|
1731 movdqu 32(%r8, %rax), %xmm5 |
|
1732 movdqu 48(%r8, %rax), %xmm6 |
|
1733 movdqu 64(%r8, %rax), %xmm7 |
|
1734 movdqu 80(%r8, %rax), %xmm8 |
|
1735 movdqu 96(%r8, %rax), %xmm9 |
|
1736 movdqu 112(%r8, %rax), %xmm10 |
|
1737 pxor %xmm2, %xmm3 |
|
1738 pxor %xmm2, %xmm4 |
|
1739 pxor %xmm2, %xmm5 |
|
1740 pxor %xmm2, %xmm6 |
|
1741 pxor %xmm2, %xmm7 |
|
1742 pxor %xmm2, %xmm8 |
|
1743 pxor %xmm2, %xmm9 |
|
1744 pxor %xmm2, %xmm10 |
|
1745 |
|
1746 // complete loop unrolling |
|
1747 movdqu 16(%rdi), %xmm1 |
|
1748 movdqu 32(%rdi), %xmm11 |
|
1749 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ |
|
1750 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ |
|
1751 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ |
|
1752 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ |
|
1753 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ |
|
1754 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ |
|
1755 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ |
|
1756 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ |
|
1757 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ |
|
1758 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ |
|
1759 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ |
|
1760 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ |
|
1761 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ |
|
1762 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ |
|
1763 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ |
|
1764 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ |
|
1765 |
|
1766 movdqu 48(%rdi), %xmm1 |
|
1767 movdqu 64(%rdi), %xmm11 |
|
1768 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ |
|
1769 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ |
|
1770 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ |
|
1771 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ |
|
1772 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ |
|
1773 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ |
|
1774 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ |
|
1775 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ |
|
1776 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ |
|
1777 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ |
|
1778 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ |
|
1779 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ |
|
1780 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ |
|
1781 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ |
|
1782 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ |
|
1783 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ |
|
1784 |
|
1785 movdqu 80(%rdi), %xmm1 |
|
1786 movdqu 96(%rdi), %xmm11 |
|
1787 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ |
|
1788 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ |
|
1789 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ |
|
1790 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ |
|
1791 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ |
|
1792 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ |
|
1793 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ |
|
1794 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ |
|
1795 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ |
|
1796 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ |
|
1797 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ |
|
1798 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ |
|
1799 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ |
|
1800 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ |
|
1801 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ |
|
1802 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ |
|
1803 |
|
1804 movdqu 112(%rdi), %xmm1 |
|
1805 movdqu 128(%rdi), %xmm11 |
|
1806 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ |
|
1807 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ |
|
1808 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ |
|
1809 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ |
|
1810 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ |
|
1811 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ |
|
1812 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ |
|
1813 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ |
|
1814 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ |
|
1815 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ |
|
1816 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ |
|
1817 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ |
|
1818 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ |
|
1819 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ |
|
1820 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ |
|
1821 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ |
|
1822 |
|
1823 movdqu 144(%rdi), %xmm1 |
|
1824 movdqu 160(%rdi), %xmm11 |
|
1825 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ |
|
1826 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ |
|
1827 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ |
|
1828 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ |
|
1829 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ |
|
1830 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ |
|
1831 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ |
|
1832 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ |
|
1833 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ |
|
1834 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ |
|
1835 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ |
|
1836 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ |
|
1837 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ |
|
1838 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ |
|
1839 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ |
|
1840 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ |
|
1841 |
|
1842 movdqu 176(%rdi), %xmm1 |
|
1843 movdqu 192(%rdi), %xmm11 |
|
1844 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ |
|
1845 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ |
|
1846 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ |
|
1847 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ |
|
1848 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ |
|
1849 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ |
|
1850 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ |
|
1851 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ |
|
1852 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ |
|
1853 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ |
|
1854 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ |
|
1855 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ |
|
1856 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ |
|
1857 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ |
|
1858 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ |
|
1859 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ |
|
1860 |
|
1861 movdqu 208(%rdi), %xmm1 |
|
1862 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ |
|
1863 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ |
|
1864 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ |
|
1865 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ |
|
1866 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ |
|
1867 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ |
|
1868 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ |
|
1869 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ |
|
1870 .byte 0x66,0x41,0x0f,0x38,0xdd,0xdf /* aesenclast %xmm15, %xmm3 */ |
|
1871 .byte 0x66,0x41,0x0f,0x38,0xdd,0xe7 /* aesenclast %xmm15, %xmm4 */ |
|
1872 .byte 0x66,0x41,0x0f,0x38,0xdd,0xef /* aesenclast %xmm15, %xmm5 */ |
|
1873 .byte 0x66,0x41,0x0f,0x38,0xdd,0xf7 /* aesenclast %xmm15, %xmm6 */ |
|
1874 .byte 0x66,0x41,0x0f,0x38,0xdd,0xff /* aesenclast %xmm15, %xmm7 */ |
|
1875 .byte 0x66,0x45,0x0f,0x38,0xdd,0xc7 /* aesenclast %xmm15, %xmm8 */ |
|
1876 .byte 0x66,0x45,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm9 */ |
|
1877 .byte 0x66,0x45,0x0f,0x38,0xdd,0xd7 /* aesenclast %xmm15, %xmm10 */ |
|
1878 |
|
1879 movdqu %xmm3, (%rsi, %rax) |
|
1880 movdqu %xmm4, 16(%rsi, %rax) |
|
1881 movdqu %xmm5, 32(%rsi, %rax) |
|
1882 movdqu %xmm6, 48(%rsi, %rax) |
|
1883 movdqu %xmm7, 64(%rsi, %rax) |
|
1884 movdqu %xmm8, 80(%rsi, %rax) |
|
1885 movdqu %xmm9, 96(%rsi, %rax) |
|
1886 movdqu %xmm10, 112(%rsi, %rax) |
|
1887 // addq $8*16, %rax |
|
1888 addq $128, %rax |
|
1889 cmpq %r11, %rax |
|
1890 jbe 2b |
|
1891 1: cmpq %rax, %r9 |
|
1892 je 5f |
|
1893 |
|
1894 movdqu (%rdi), %xmm8 |
|
1895 movdqu 16(%rdi), %xmm2 |
|
1896 movdqu 32(%rdi), %xmm3 |
|
1897 movdqu 48(%rdi), %xmm4 |
|
1898 movdqu 64(%rdi), %xmm5 |
|
1899 movdqu 80(%rdi), %xmm6 |
|
1900 movdqu 96(%rdi), %xmm7 |
|
1901 movdqu 128(%rdi), %xmm9 |
|
1902 movdqu 144(%rdi), %xmm10 |
|
1903 movdqu 160(%rdi), %xmm11 |
|
1904 movdqu 176(%rdi), %xmm12 |
|
1905 movdqu 192(%rdi), %xmm13 |
|
1906 movdqu 208(%rdi), %xmm14 |
|
1907 |
|
1908 4: movdqu (%r8, %rax), %xmm1 |
|
1909 pxor %xmm8, %xmm1 |
|
1910 movdqu 112(%rdi), %xmm8 |
|
1911 .byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */ |
|
1912 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ |
|
1913 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ |
|
1914 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ |
|
1915 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ |
|
1916 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ |
|
1917 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ |
|
1918 movdqu (%rdi), %xmm8 |
|
1919 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ |
|
1920 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */ |
|
1921 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */ |
|
1922 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */ |
|
1923 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */ |
|
1924 .byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */ |
|
1925 .byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */ |
|
1926 movdqu %xmm1, (%rsi, %rax) |
|
1927 addq $16, %rax |
|
1928 cmpq %rax, %r9 |
|
1929 jne 4b |
|
1930 |
|
1931 5: xor %eax, %eax |
|
1932 ret |
|
1933 .size intel_aes_encrypt_ecb_256, .-intel_aes_encrypt_ecb_256 |
|
1934 |
|
1935 |
|
1936 /* in %rdi : cx - context |
|
1937 in %rsi : output - pointer to output buffer |
|
1938 in %rdx : outputLen - pointer to variable for length of output |
|
1939 (filled by caller) |
|
1940 in %rcx : maxOutputLen - length of output buffer |
|
1941 in %r8 : input - pointer to input buffer |
|
1942 in %r9 : inputLen - length of input buffer |
|
1943 on stack: blocksize - AES blocksize (always 16, unused) |
|
1944 */ |
|
1945 .type intel_aes_decrypt_ecb_256,@function |
|
1946 .globl intel_aes_decrypt_ecb_256 |
|
1947 .align 16 |
|
1948 intel_aes_decrypt_ecb_256: |
|
1949 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi |
|
1950 leaq 48(%rdi), %rdi |
|
1951 |
|
1952 movdqu (%rdi), %xmm2 |
|
1953 movdqu 224(%rdi), %xmm15 |
|
1954 xorl %eax, %eax |
|
1955 // cmpq $8*16, %r9 |
|
1956 cmpq $128, %r9 |
|
1957 jb 1f |
|
1958 // leaq -8*16(%r9), %r11 |
|
1959 leaq -128(%r9), %r11 |
|
1960 2: movdqu (%r8, %rax), %xmm3 |
|
1961 movdqu 16(%r8, %rax), %xmm4 |
|
1962 movdqu 32(%r8, %rax), %xmm5 |
|
1963 movdqu 48(%r8, %rax), %xmm6 |
|
1964 movdqu 64(%r8, %rax), %xmm7 |
|
1965 movdqu 80(%r8, %rax), %xmm8 |
|
1966 movdqu 96(%r8, %rax), %xmm9 |
|
1967 movdqu 112(%r8, %rax), %xmm10 |
|
1968 pxor %xmm15, %xmm3 |
|
1969 pxor %xmm15, %xmm4 |
|
1970 pxor %xmm15, %xmm5 |
|
1971 pxor %xmm15, %xmm6 |
|
1972 pxor %xmm15, %xmm7 |
|
1973 pxor %xmm15, %xmm8 |
|
1974 pxor %xmm15, %xmm9 |
|
1975 pxor %xmm15, %xmm10 |
|
1976 |
|
1977 // complete loop unrolling |
|
1978 movdqu 208(%rdi), %xmm1 |
|
1979 movdqu 192(%rdi), %xmm11 |
|
1980 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
1981 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
1982 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
1983 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
1984 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
1985 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
1986 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
1987 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
1988 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
1989 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
1990 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
1991 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
1992 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
1993 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
1994 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
1995 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
1996 |
|
1997 movdqu 176(%rdi), %xmm1 |
|
1998 movdqu 160(%rdi), %xmm11 |
|
1999 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
2000 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
2001 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
2002 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
2003 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
2004 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
2005 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
2006 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
2007 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
2008 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
2009 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
2010 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
2011 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
2012 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
2013 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
2014 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
2015 |
|
2016 movdqu 144(%rdi), %xmm1 |
|
2017 movdqu 128(%rdi), %xmm11 |
|
2018 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
2019 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
2020 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
2021 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
2022 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
2023 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
2024 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
2025 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
2026 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
2027 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
2028 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
2029 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
2030 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
2031 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
2032 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
2033 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
2034 |
|
2035 movdqu 112(%rdi), %xmm1 |
|
2036 movdqu 96(%rdi), %xmm11 |
|
2037 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
2038 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
2039 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
2040 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
2041 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
2042 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
2043 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
2044 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
2045 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
2046 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
2047 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
2048 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
2049 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
2050 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
2051 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
2052 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
2053 |
|
2054 movdqu 80(%rdi), %xmm1 |
|
2055 movdqu 64(%rdi), %xmm11 |
|
2056 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
2057 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
2058 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
2059 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
2060 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
2061 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
2062 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
2063 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
2064 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
2065 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
2066 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
2067 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
2068 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
2069 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
2070 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
2071 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
2072 |
|
2073 movdqu 48(%rdi), %xmm1 |
|
2074 movdqu 32(%rdi), %xmm11 |
|
2075 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
2076 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
2077 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
2078 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
2079 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
2080 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
2081 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
2082 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
2083 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
2084 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
2085 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
2086 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
2087 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
2088 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
2089 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
2090 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
2091 |
|
2092 movdqu 16(%rdi), %xmm1 |
|
2093 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
2094 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
2095 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
2096 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
2097 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
2098 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
2099 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
2100 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
2101 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ |
|
2102 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ |
|
2103 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ |
|
2104 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ |
|
2105 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ |
|
2106 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ |
|
2107 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ |
|
2108 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ |
|
2109 |
|
2110 movdqu %xmm3, (%rsi, %rax) |
|
2111 movdqu %xmm4, 16(%rsi, %rax) |
|
2112 movdqu %xmm5, 32(%rsi, %rax) |
|
2113 movdqu %xmm6, 48(%rsi, %rax) |
|
2114 movdqu %xmm7, 64(%rsi, %rax) |
|
2115 movdqu %xmm8, 80(%rsi, %rax) |
|
2116 movdqu %xmm9, 96(%rsi, %rax) |
|
2117 movdqu %xmm10, 112(%rsi, %rax) |
|
2118 // addq $8*16, %rax |
|
2119 addq $128, %rax |
|
2120 cmpq %r11, %rax |
|
2121 jbe 2b |
|
2122 1: cmpq %rax, %r9 |
|
2123 je 5f |
|
2124 |
|
2125 movdqu 16(%rdi), %xmm2 |
|
2126 movdqu 32(%rdi), %xmm3 |
|
2127 movdqu 48(%rdi), %xmm4 |
|
2128 movdqu 64(%rdi), %xmm5 |
|
2129 movdqu 80(%rdi), %xmm6 |
|
2130 movdqu 96(%rdi), %xmm7 |
|
2131 movdqu 112(%rdi), %xmm8 |
|
2132 movdqu 128(%rdi), %xmm9 |
|
2133 movdqu 144(%rdi), %xmm10 |
|
2134 movdqu 160(%rdi), %xmm11 |
|
2135 movdqu 176(%rdi), %xmm12 |
|
2136 movdqu 192(%rdi), %xmm13 |
|
2137 movdqu 208(%rdi), %xmm14 |
|
2138 |
|
2139 4: movdqu (%r8, %rax), %xmm1 |
|
2140 pxor %xmm15, %xmm1 |
|
2141 .byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */ |
|
2142 .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */ |
|
2143 .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */ |
|
2144 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ |
|
2145 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ |
|
2146 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ |
|
2147 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ |
|
2148 movdqu (%rdi), %xmm8 |
|
2149 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ |
|
2150 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */ |
|
2151 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */ |
|
2152 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */ |
|
2153 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */ |
|
2154 .byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */ |
|
2155 .byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */ |
|
2156 movdqu 112(%rdi), %xmm8 |
|
2157 movdqu %xmm1, (%rsi, %rax) |
|
2158 addq $16, %rax |
|
2159 cmpq %rax, %r9 |
|
2160 jne 4b |
|
2161 |
|
2162 5: xor %eax, %eax |
|
2163 ret |
|
2164 .size intel_aes_decrypt_ecb_256, .-intel_aes_decrypt_ecb_256 |
|
2165 |
|
2166 |
|
2167 /* in %rdi : cx - context |
|
2168 in %rsi : output - pointer to output buffer |
|
2169 in %rdx : outputLen - pointer to variable for length of output |
|
2170 (filled by caller) |
|
2171 in %rcx : maxOutputLen - length of output buffer |
|
2172 in %r8 : input - pointer to input buffer |
|
2173 in %r9 : inputLen - length of input buffer |
|
2174 on stack: blocksize - AES blocksize (always 16, unused) |
|
2175 */ |
|
2176 .type intel_aes_encrypt_cbc_256,@function |
|
2177 .globl intel_aes_encrypt_cbc_256 |
|
2178 .align 16 |
|
2179 intel_aes_encrypt_cbc_256: |
|
2180 testq %r9, %r9 |
|
2181 je 2f |
|
2182 |
|
2183 // leaq IV_OFFSET(%rdi), %rdx |
|
2184 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi |
|
2185 leaq 16(%rdi), %rdx |
|
2186 leaq 48(%rdi), %rdi |
|
2187 |
|
2188 movdqu (%rdx), %xmm0 |
|
2189 movdqu (%rdi), %xmm8 |
|
2190 movdqu 16(%rdi), %xmm2 |
|
2191 movdqu 32(%rdi), %xmm3 |
|
2192 movdqu 48(%rdi), %xmm4 |
|
2193 movdqu 64(%rdi), %xmm5 |
|
2194 movdqu 80(%rdi), %xmm6 |
|
2195 movdqu 96(%rdi), %xmm7 |
|
2196 movdqu 128(%rdi), %xmm9 |
|
2197 movdqu 144(%rdi), %xmm10 |
|
2198 movdqu 160(%rdi), %xmm11 |
|
2199 movdqu 176(%rdi), %xmm12 |
|
2200 movdqu 192(%rdi), %xmm13 |
|
2201 movdqu 208(%rdi), %xmm14 |
|
2202 movdqu 224(%rdi), %xmm15 |
|
2203 |
|
2204 xorl %eax, %eax |
|
2205 1: movdqu (%r8, %rax), %xmm1 |
|
2206 pxor %xmm0, %xmm1 |
|
2207 pxor %xmm8, %xmm1 |
|
2208 movdqu 112(%rdi), %xmm8 |
|
2209 .byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */ |
|
2210 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ |
|
2211 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ |
|
2212 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ |
|
2213 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ |
|
2214 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ |
|
2215 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ |
|
2216 movdqu (%rdi), %xmm8 |
|
2217 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ |
|
2218 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */ |
|
2219 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */ |
|
2220 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */ |
|
2221 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */ |
|
2222 .byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */ |
|
2223 .byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */ |
|
2224 movdqu %xmm1, (%rsi, %rax) |
|
2225 movdqa %xmm1, %xmm0 |
|
2226 addq $16, %rax |
|
2227 cmpq %rax, %r9 |
|
2228 jne 1b |
|
2229 |
|
2230 movdqu %xmm0, (%rdx) |
|
2231 |
|
2232 2: xor %eax, %eax |
|
2233 ret |
|
2234 .size intel_aes_encrypt_cbc_256, .-intel_aes_encrypt_cbc_256 |
|
2235 |
|
2236 |
|
2237 /* in %rdi : cx - context |
|
2238 in %rsi : output - pointer to output buffer |
|
2239 in %rdx : outputLen - pointer to variable for length of output |
|
2240 (filled by caller) |
|
2241 in %rcx : maxOutputLen - length of output buffer |
|
2242 in %r8 : input - pointer to input buffer |
|
2243 in %r9 : inputLen - length of input buffer |
|
2244 on stack: blocksize - AES blocksize (always 16, unused) |
|
2245 */ |
|
2246 .type intel_aes_decrypt_cbc_256,@function |
|
2247 .globl intel_aes_decrypt_cbc_256 |
|
2248 .align 16 |
|
2249 intel_aes_decrypt_cbc_256: |
|
2250 // leaq IV_OFFSET(%rdi), %rdx |
|
2251 // leaq EXPANDED_KEY_OFFSET(%rdi), %rdi |
|
2252 leaq 16(%rdi), %rdx |
|
2253 leaq 48(%rdi), %rdi |
|
2254 |
|
2255 movdqu (%rdx), %xmm0 |
|
2256 movdqu (%rdi), %xmm2 |
|
2257 movdqu 224(%rdi), %xmm15 |
|
2258 xorl %eax, %eax |
|
2259 // cmpq $8*16, %r9 |
|
2260 cmpq $128, %r9 |
|
2261 jb 1f |
|
2262 // leaq -8*16(%r9), %r11 |
|
2263 leaq -128(%r9), %r11 |
|
2264 2: movdqu (%r8, %rax), %xmm3 |
|
2265 movdqu 16(%r8, %rax), %xmm4 |
|
2266 movdqu 32(%r8, %rax), %xmm5 |
|
2267 movdqu 48(%r8, %rax), %xmm6 |
|
2268 movdqu 64(%r8, %rax), %xmm7 |
|
2269 movdqu 80(%r8, %rax), %xmm8 |
|
2270 movdqu 96(%r8, %rax), %xmm9 |
|
2271 movdqu 112(%r8, %rax), %xmm10 |
|
2272 pxor %xmm15, %xmm3 |
|
2273 pxor %xmm15, %xmm4 |
|
2274 pxor %xmm15, %xmm5 |
|
2275 pxor %xmm15, %xmm6 |
|
2276 pxor %xmm15, %xmm7 |
|
2277 pxor %xmm15, %xmm8 |
|
2278 pxor %xmm15, %xmm9 |
|
2279 pxor %xmm15, %xmm10 |
|
2280 |
|
2281 // complete loop unrolling |
|
2282 movdqu 208(%rdi), %xmm1 |
|
2283 movdqu 192(%rdi), %xmm11 |
|
2284 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
2285 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
2286 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
2287 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
2288 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
2289 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
2290 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
2291 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
2292 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
2293 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
2294 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
2295 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
2296 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
2297 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
2298 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
2299 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
2300 |
|
2301 movdqu 176(%rdi), %xmm1 |
|
2302 movdqu 160(%rdi), %xmm11 |
|
2303 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
2304 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
2305 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
2306 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
2307 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
2308 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
2309 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
2310 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
2311 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
2312 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
2313 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
2314 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
2315 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
2316 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
2317 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
2318 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
2319 |
|
2320 movdqu 144(%rdi), %xmm1 |
|
2321 movdqu 128(%rdi), %xmm11 |
|
2322 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
2323 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
2324 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
2325 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
2326 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
2327 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
2328 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
2329 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
2330 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
2331 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
2332 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
2333 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
2334 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
2335 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
2336 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
2337 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
2338 |
|
2339 movdqu 112(%rdi), %xmm1 |
|
2340 movdqu 96(%rdi), %xmm11 |
|
2341 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
2342 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
2343 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
2344 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
2345 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
2346 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
2347 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
2348 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
2349 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
2350 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
2351 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
2352 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
2353 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
2354 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
2355 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
2356 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
2357 |
|
2358 movdqu 80(%rdi), %xmm1 |
|
2359 movdqu 64(%rdi), %xmm11 |
|
2360 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
2361 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
2362 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
2363 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
2364 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
2365 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
2366 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
2367 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
2368 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
2369 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
2370 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
2371 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
2372 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
2373 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
2374 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
2375 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
2376 |
|
2377 movdqu 48(%rdi), %xmm1 |
|
2378 movdqu 32(%rdi), %xmm11 |
|
2379 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
2380 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
2381 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
2382 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
2383 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
2384 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
2385 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
2386 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
2387 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ |
|
2388 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ |
|
2389 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ |
|
2390 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ |
|
2391 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ |
|
2392 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ |
|
2393 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ |
|
2394 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ |
|
2395 |
|
2396 movdqu 16(%rdi), %xmm1 |
|
2397 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ |
|
2398 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ |
|
2399 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ |
|
2400 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ |
|
2401 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ |
|
2402 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ |
|
2403 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ |
|
2404 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ |
|
2405 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ |
|
2406 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ |
|
2407 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ |
|
2408 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ |
|
2409 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ |
|
2410 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ |
|
2411 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ |
|
2412 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ |
|
2413 |
|
2414 pxor %xmm0, %xmm3 |
|
2415 movdqu (%r8, %rax), %xmm0 |
|
2416 pxor %xmm0, %xmm4 |
|
2417 movdqu 16(%r8, %rax), %xmm0 |
|
2418 pxor %xmm0, %xmm5 |
|
2419 movdqu 32(%r8, %rax), %xmm0 |
|
2420 pxor %xmm0, %xmm6 |
|
2421 movdqu 48(%r8, %rax), %xmm0 |
|
2422 pxor %xmm0, %xmm7 |
|
2423 movdqu 64(%r8, %rax), %xmm0 |
|
2424 pxor %xmm0, %xmm8 |
|
2425 movdqu 80(%r8, %rax), %xmm0 |
|
2426 pxor %xmm0, %xmm9 |
|
2427 movdqu 96(%r8, %rax), %xmm0 |
|
2428 pxor %xmm0, %xmm10 |
|
2429 movdqu 112(%r8, %rax), %xmm0 |
|
2430 movdqu %xmm3, (%rsi, %rax) |
|
2431 movdqu %xmm4, 16(%rsi, %rax) |
|
2432 movdqu %xmm5, 32(%rsi, %rax) |
|
2433 movdqu %xmm6, 48(%rsi, %rax) |
|
2434 movdqu %xmm7, 64(%rsi, %rax) |
|
2435 movdqu %xmm8, 80(%rsi, %rax) |
|
2436 movdqu %xmm9, 96(%rsi, %rax) |
|
2437 movdqu %xmm10, 112(%rsi, %rax) |
|
2438 // addq $8*16, %rax |
|
2439 addq $128, %rax |
|
2440 cmpq %r11, %rax |
|
2441 jbe 2b |
|
2442 1: cmpq %rax, %r9 |
|
2443 je 5f |
|
2444 |
|
2445 movdqu 16(%rdi), %xmm2 |
|
2446 movdqu 32(%rdi), %xmm3 |
|
2447 movdqu 48(%rdi), %xmm4 |
|
2448 movdqu 64(%rdi), %xmm5 |
|
2449 movdqu 80(%rdi), %xmm6 |
|
2450 movdqu 96(%rdi), %xmm7 |
|
2451 movdqu 112(%rdi), %xmm8 |
|
2452 movdqu 128(%rdi), %xmm9 |
|
2453 movdqu 144(%rdi), %xmm10 |
|
2454 movdqu 160(%rdi), %xmm11 |
|
2455 movdqu 176(%rdi), %xmm12 |
|
2456 movdqu 192(%rdi), %xmm13 |
|
2457 movdqu 208(%rdi), %xmm14 |
|
2458 |
|
2459 4: movdqu (%r8, %rax), %xmm1 |
|
2460 pxor %xmm15, %xmm1 |
|
2461 .byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */ |
|
2462 .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */ |
|
2463 .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */ |
|
2464 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ |
|
2465 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ |
|
2466 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ |
|
2467 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ |
|
2468 movdqu (%rdi), %xmm8 |
|
2469 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ |
|
2470 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */ |
|
2471 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */ |
|
2472 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */ |
|
2473 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */ |
|
2474 .byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */ |
|
2475 .byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */ |
|
2476 movdqu 112(%rdi), %xmm8 |
|
2477 pxor %xmm0, %xmm1 |
|
2478 movdqu (%r8, %rax), %xmm0 /* fetch the IV before we store the block */ |
|
2479 movdqu %xmm1, (%rsi, %rax) /* in case input buf = output buf */ |
|
2480 addq $16, %rax |
|
2481 cmpq %rax, %r9 |
|
2482 jne 4b |
|
2483 |
|
2484 5: movdqu %xmm0, (%rdx) |
|
2485 |
|
2486 xor %eax, %eax |
|
2487 ret |
|
2488 .size intel_aes_decrypt_cbc_256, .-intel_aes_decrypt_cbc_256 |