security/nss/lib/freebl/intel-aes-x64-masm.asm

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 ; LICENSE:
michael@0 2 ; This submission to NSS is to be made available under the terms of the
michael@0 3 ; Mozilla Public License, v. 2.0. You can obtain one at http:
michael@0 4 ; //mozilla.org/MPL/2.0/.
michael@0 5 ;###############################################################################
michael@0 6 ; Copyright(c) 2014, Intel Corp.
michael@0 7 ; Developers and authors:
michael@0 8 ; Shay Gueron and Vlad Krasnov
michael@0 9 ; Intel Corporation, Israel Development Centre, Haifa, Israel
michael@0 10 ; Please send feedback directly to crypto.feedback.alias@intel.com
michael@0 11
michael@0 12
michael@0 13 .DATA
michael@0 14 ALIGN 16
michael@0 15 Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh
michael@0 16 Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h
michael@0 17 Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh
michael@0 18 Lcon1 dd 1,1,1,1
michael@0 19 Lcon2 dd 1bh,1bh,1bh,1bh
michael@0 20
michael@0 21 .CODE
michael@0 22
michael@0 23 ctx textequ <rcx>
michael@0 24 output textequ <rdx>
michael@0 25 input textequ <r8>
michael@0 26 inputLen textequ <r9d>
michael@0 27
michael@0 28
michael@0 29 aes_rnd MACRO i
michael@0 30 movdqu xmm8, [i*16 + ctx]
michael@0 31 aesenc xmm0, xmm8
michael@0 32 aesenc xmm1, xmm8
michael@0 33 aesenc xmm2, xmm8
michael@0 34 aesenc xmm3, xmm8
michael@0 35 aesenc xmm4, xmm8
michael@0 36 aesenc xmm5, xmm8
michael@0 37 aesenc xmm6, xmm8
michael@0 38 aesenc xmm7, xmm8
michael@0 39 ENDM
michael@0 40
michael@0 41 aes_last_rnd MACRO i
michael@0 42 movdqu xmm8, [i*16 + ctx]
michael@0 43 aesenclast xmm0, xmm8
michael@0 44 aesenclast xmm1, xmm8
michael@0 45 aesenclast xmm2, xmm8
michael@0 46 aesenclast xmm3, xmm8
michael@0 47 aesenclast xmm4, xmm8
michael@0 48 aesenclast xmm5, xmm8
michael@0 49 aesenclast xmm6, xmm8
michael@0 50 aesenclast xmm7, xmm8
michael@0 51 ENDM
michael@0 52
michael@0 53 aes_dec_rnd MACRO i
michael@0 54 movdqu xmm8, [i*16 + ctx]
michael@0 55 aesdec xmm0, xmm8
michael@0 56 aesdec xmm1, xmm8
michael@0 57 aesdec xmm2, xmm8
michael@0 58 aesdec xmm3, xmm8
michael@0 59 aesdec xmm4, xmm8
michael@0 60 aesdec xmm5, xmm8
michael@0 61 aesdec xmm6, xmm8
michael@0 62 aesdec xmm7, xmm8
michael@0 63 ENDM
michael@0 64
michael@0 65 aes_dec_last_rnd MACRO i
michael@0 66 movdqu xmm8, [i*16 + ctx]
michael@0 67 aesdeclast xmm0, xmm8
michael@0 68 aesdeclast xmm1, xmm8
michael@0 69 aesdeclast xmm2, xmm8
michael@0 70 aesdeclast xmm3, xmm8
michael@0 71 aesdeclast xmm4, xmm8
michael@0 72 aesdeclast xmm5, xmm8
michael@0 73 aesdeclast xmm6, xmm8
michael@0 74 aesdeclast xmm7, xmm8
michael@0 75 ENDM
michael@0 76
michael@0 77
michael@0 78 gen_aes_ecb_func MACRO enc, rnds
michael@0 79
michael@0 80 LOCAL loop8
michael@0 81 LOCAL loop1
michael@0 82 LOCAL bail
michael@0 83
michael@0 84 xor inputLen, inputLen
michael@0 85 mov input, [rsp + 1*8 + 8*4]
michael@0 86 mov inputLen, [rsp + 1*8 + 8*5]
michael@0 87
michael@0 88 sub rsp, 3*16
michael@0 89
michael@0 90 movdqu [rsp + 0*16], xmm6
michael@0 91 movdqu [rsp + 1*16], xmm7
michael@0 92 movdqu [rsp + 2*16], xmm8
michael@0 93
michael@0 94 lea ctx, [48+ctx]
michael@0 95
michael@0 96 loop8:
michael@0 97 cmp inputLen, 8*16
michael@0 98 jb loop1
michael@0 99
michael@0 100 movdqu xmm0, [0*16 + input]
michael@0 101 movdqu xmm1, [1*16 + input]
michael@0 102 movdqu xmm2, [2*16 + input]
michael@0 103 movdqu xmm3, [3*16 + input]
michael@0 104 movdqu xmm4, [4*16 + input]
michael@0 105 movdqu xmm5, [5*16 + input]
michael@0 106 movdqu xmm6, [6*16 + input]
michael@0 107 movdqu xmm7, [7*16 + input]
michael@0 108
michael@0 109 movdqu xmm8, [0*16 + ctx]
michael@0 110 pxor xmm0, xmm8
michael@0 111 pxor xmm1, xmm8
michael@0 112 pxor xmm2, xmm8
michael@0 113 pxor xmm3, xmm8
michael@0 114 pxor xmm4, xmm8
michael@0 115 pxor xmm5, xmm8
michael@0 116 pxor xmm6, xmm8
michael@0 117 pxor xmm7, xmm8
michael@0 118
michael@0 119 IF enc eq 1
michael@0 120 rnd textequ <aes_rnd>
michael@0 121 lastrnd textequ <aes_last_rnd>
michael@0 122 aesinst textequ <aesenc>
michael@0 123 aeslastinst textequ <aesenclast>
michael@0 124 ELSE
michael@0 125 rnd textequ <aes_dec_rnd>
michael@0 126 lastrnd textequ <aes_dec_last_rnd>
michael@0 127 aesinst textequ <aesdec>
michael@0 128 aeslastinst textequ <aesdeclast>
michael@0 129 ENDIF
michael@0 130
michael@0 131 i = 1
michael@0 132 WHILE i LT rnds
michael@0 133 rnd i
michael@0 134 i = i+1
michael@0 135 ENDM
michael@0 136 lastrnd rnds
michael@0 137
michael@0 138 movdqu [0*16 + output], xmm0
michael@0 139 movdqu [1*16 + output], xmm1
michael@0 140 movdqu [2*16 + output], xmm2
michael@0 141 movdqu [3*16 + output], xmm3
michael@0 142 movdqu [4*16 + output], xmm4
michael@0 143 movdqu [5*16 + output], xmm5
michael@0 144 movdqu [6*16 + output], xmm6
michael@0 145 movdqu [7*16 + output], xmm7
michael@0 146
michael@0 147 lea input, [8*16 + input]
michael@0 148 lea output, [8*16 + output]
michael@0 149 sub inputLen, 8*16
michael@0 150 jmp loop8
michael@0 151
michael@0 152 loop1:
michael@0 153 cmp inputLen, 1*16
michael@0 154 jb bail
michael@0 155
michael@0 156 movdqu xmm0, [input]
michael@0 157 movdqu xmm7, [0*16 + ctx]
michael@0 158 pxor xmm0, xmm7
michael@0 159
michael@0 160 i = 1
michael@0 161 WHILE i LT rnds
michael@0 162 movdqu xmm7, [i*16 + ctx]
michael@0 163 aesinst xmm0, xmm7
michael@0 164 i = i+1
michael@0 165 ENDM
michael@0 166 movdqu xmm7, [rnds*16 + ctx]
michael@0 167 aeslastinst xmm0, xmm7
michael@0 168
michael@0 169 movdqu [output], xmm0
michael@0 170
michael@0 171 lea input, [1*16 + input]
michael@0 172 lea output, [1*16 + output]
michael@0 173 sub inputLen, 1*16
michael@0 174 jmp loop1
michael@0 175
michael@0 176 bail:
michael@0 177 xor rax, rax
michael@0 178
michael@0 179 movdqu xmm6, [rsp + 0*16]
michael@0 180 movdqu xmm7, [rsp + 1*16]
michael@0 181 movdqu xmm8, [rsp + 2*16]
michael@0 182 add rsp, 3*16
michael@0 183 ret
michael@0 184 ENDM
michael@0 185
michael@0 186 intel_aes_encrypt_ecb_128 PROC
michael@0 187 gen_aes_ecb_func 1, 10
michael@0 188 intel_aes_encrypt_ecb_128 ENDP
michael@0 189
michael@0 190 intel_aes_encrypt_ecb_192 PROC
michael@0 191 gen_aes_ecb_func 1, 12
michael@0 192 intel_aes_encrypt_ecb_192 ENDP
michael@0 193
michael@0 194 intel_aes_encrypt_ecb_256 PROC
michael@0 195 gen_aes_ecb_func 1, 14
michael@0 196 intel_aes_encrypt_ecb_256 ENDP
michael@0 197
michael@0 198 intel_aes_decrypt_ecb_128 PROC
michael@0 199 gen_aes_ecb_func 0, 10
michael@0 200 intel_aes_decrypt_ecb_128 ENDP
michael@0 201
michael@0 202 intel_aes_decrypt_ecb_192 PROC
michael@0 203 gen_aes_ecb_func 0, 12
michael@0 204 intel_aes_decrypt_ecb_192 ENDP
michael@0 205
michael@0 206 intel_aes_decrypt_ecb_256 PROC
michael@0 207 gen_aes_ecb_func 0, 14
michael@0 208 intel_aes_decrypt_ecb_256 ENDP
michael@0 209
michael@0 210
michael@0 211 KEY textequ <rcx>
michael@0 212 KS textequ <rdx>
michael@0 213 ITR textequ <r8>
michael@0 214
michael@0 215 intel_aes_encrypt_init_128 PROC
michael@0 216
michael@0 217 movdqu xmm1, [KEY]
michael@0 218 movdqu [KS], xmm1
michael@0 219 movdqa xmm2, xmm1
michael@0 220
michael@0 221 lea ITR, Lcon1
michael@0 222 movdqa xmm0, [ITR]
michael@0 223 lea ITR, Lmask
michael@0 224 movdqa xmm4, [ITR]
michael@0 225
michael@0 226 mov ITR, 8
michael@0 227
michael@0 228 Lenc_128_ks_loop:
michael@0 229 lea KS, [16 + KS]
michael@0 230 dec ITR
michael@0 231
michael@0 232 pshufb xmm2, xmm4
michael@0 233 aesenclast xmm2, xmm0
michael@0 234 pslld xmm0, 1
michael@0 235 movdqa xmm3, xmm1
michael@0 236 pslldq xmm3, 4
michael@0 237 pxor xmm1, xmm3
michael@0 238 pslldq xmm3, 4
michael@0 239 pxor xmm1, xmm3
michael@0 240 pslldq xmm3, 4
michael@0 241 pxor xmm1, xmm3
michael@0 242 pxor xmm1, xmm2
michael@0 243 movdqu [KS], xmm1
michael@0 244 movdqa xmm2, xmm1
michael@0 245
michael@0 246 jne Lenc_128_ks_loop
michael@0 247
michael@0 248 lea ITR, Lcon2
michael@0 249 movdqa xmm0, [ITR]
michael@0 250
michael@0 251 pshufb xmm2, xmm4
michael@0 252 aesenclast xmm2, xmm0
michael@0 253 pslld xmm0, 1
michael@0 254 movdqa xmm3, xmm1
michael@0 255 pslldq xmm3, 4
michael@0 256 pxor xmm1, xmm3
michael@0 257 pslldq xmm3, 4
michael@0 258 pxor xmm1, xmm3
michael@0 259 pslldq xmm3, 4
michael@0 260 pxor xmm1, xmm3
michael@0 261 pxor xmm1, xmm2
michael@0 262 movdqu [16 + KS], xmm1
michael@0 263 movdqa xmm2, xmm1
michael@0 264
michael@0 265 pshufb xmm2, xmm4
michael@0 266 aesenclast xmm2, xmm0
michael@0 267 movdqa xmm3, xmm1
michael@0 268 pslldq xmm3, 4
michael@0 269 pxor xmm1, xmm3
michael@0 270 pslldq xmm3, 4
michael@0 271 pxor xmm1, xmm3
michael@0 272 pslldq xmm3, 4
michael@0 273 pxor xmm1, xmm3
michael@0 274 pxor xmm1, xmm2
michael@0 275 movdqu [32 + KS], xmm1
michael@0 276 movdqa xmm2, xmm1
michael@0 277
michael@0 278 ret
michael@0 279 intel_aes_encrypt_init_128 ENDP
michael@0 280
michael@0 281
michael@0 282 intel_aes_decrypt_init_128 PROC
michael@0 283
michael@0 284 push KS
michael@0 285 push KEY
michael@0 286
michael@0 287 call intel_aes_encrypt_init_128
michael@0 288
michael@0 289 pop KEY
michael@0 290 pop KS
michael@0 291
michael@0 292 movdqu xmm0, [0*16 + KS]
michael@0 293 movdqu xmm1, [10*16 + KS]
michael@0 294 movdqu [10*16 + KS], xmm0
michael@0 295 movdqu [0*16 + KS], xmm1
michael@0 296
michael@0 297 i = 1
michael@0 298 WHILE i LT 5
michael@0 299 movdqu xmm0, [i*16 + KS]
michael@0 300 movdqu xmm1, [(10-i)*16 + KS]
michael@0 301
michael@0 302 aesimc xmm0, xmm0
michael@0 303 aesimc xmm1, xmm1
michael@0 304
michael@0 305 movdqu [(10-i)*16 + KS], xmm0
michael@0 306 movdqu [i*16 + KS], xmm1
michael@0 307
michael@0 308 i = i+1
michael@0 309 ENDM
michael@0 310
michael@0 311 movdqu xmm0, [5*16 + KS]
michael@0 312 aesimc xmm0, xmm0
michael@0 313 movdqu [5*16 + KS], xmm0
michael@0 314 ret
michael@0 315 intel_aes_decrypt_init_128 ENDP
michael@0 316
michael@0 317
michael@0 318 intel_aes_encrypt_init_192 PROC
michael@0 319
michael@0 320 sub rsp, 16*2
michael@0 321 movdqu [16*0 + rsp], xmm6
michael@0 322 movdqu [16*1 + rsp], xmm7
michael@0 323
michael@0 324 movdqu xmm1, [KEY]
michael@0 325 mov ITR, [16 + KEY]
michael@0 326 movd xmm3, ITR
michael@0 327
michael@0 328 movdqu [KS], xmm1
michael@0 329 movdqa xmm5, xmm3
michael@0 330
michael@0 331 lea ITR, Lcon1
michael@0 332 movdqu xmm0, [ITR]
michael@0 333 lea ITR, Lmask192
michael@0 334 movdqu xmm4, [ITR]
michael@0 335
michael@0 336 mov ITR, 4
michael@0 337
michael@0 338 Lenc_192_ks_loop:
michael@0 339 movdqa xmm2, xmm3
michael@0 340 pshufb xmm2, xmm4
michael@0 341 aesenclast xmm2, xmm0
michael@0 342 pslld xmm0, 1
michael@0 343
michael@0 344 movdqa xmm6, xmm1
michael@0 345 movdqa xmm7, xmm3
michael@0 346 pslldq xmm6, 4
michael@0 347 pslldq xmm7, 4
michael@0 348 pxor xmm1, xmm6
michael@0 349 pxor xmm3, xmm7
michael@0 350 pslldq xmm6, 4
michael@0 351 pxor xmm1, xmm6
michael@0 352 pslldq xmm6, 4
michael@0 353 pxor xmm1, xmm6
michael@0 354 pxor xmm1, xmm2
michael@0 355 pshufd xmm2, xmm1, 0ffh
michael@0 356 pxor xmm3, xmm2
michael@0 357
michael@0 358 movdqa xmm6, xmm1
michael@0 359 shufpd xmm5, xmm1, 00h
michael@0 360 shufpd xmm6, xmm3, 01h
michael@0 361
michael@0 362 movdqu [16 + KS], xmm5
michael@0 363 movdqu [32 + KS], xmm6
michael@0 364
michael@0 365 movdqa xmm2, xmm3
michael@0 366 pshufb xmm2, xmm4
michael@0 367 aesenclast xmm2, xmm0
michael@0 368 pslld xmm0, 1
michael@0 369
michael@0 370 movdqa xmm6, xmm1
michael@0 371 movdqa xmm7, xmm3
michael@0 372 pslldq xmm6, 4
michael@0 373 pslldq xmm7, 4
michael@0 374 pxor xmm1, xmm6
michael@0 375 pxor xmm3, xmm7
michael@0 376 pslldq xmm6, 4
michael@0 377 pxor xmm1, xmm6
michael@0 378 pslldq xmm6, 4
michael@0 379 pxor xmm1, xmm6
michael@0 380 pxor xmm1, xmm2
michael@0 381 pshufd xmm2, xmm1, 0ffh
michael@0 382 pxor xmm3, xmm2
michael@0 383
michael@0 384 movdqu [48 + KS], xmm1
michael@0 385 movdqa xmm5, xmm3
michael@0 386
michael@0 387 lea KS, [48 + KS]
michael@0 388
michael@0 389 dec ITR
michael@0 390 jnz Lenc_192_ks_loop
michael@0 391
michael@0 392 movdqu [16 + KS], xmm5
michael@0 393
michael@0 394 movdqu xmm7, [16*1 + rsp]
michael@0 395 movdqu xmm6, [16*0 + rsp]
michael@0 396 add rsp, 16*2
michael@0 397 ret
michael@0 398 intel_aes_encrypt_init_192 ENDP
michael@0 399
michael@0 400 intel_aes_decrypt_init_192 PROC
michael@0 401 push KS
michael@0 402 push KEY
michael@0 403
michael@0 404 call intel_aes_encrypt_init_192
michael@0 405
michael@0 406 pop KEY
michael@0 407 pop KS
michael@0 408
michael@0 409 movdqu xmm0, [0*16 + KS]
michael@0 410 movdqu xmm1, [12*16 + KS]
michael@0 411 movdqu [12*16 + KS], xmm0
michael@0 412 movdqu [0*16 + KS], xmm1
michael@0 413
michael@0 414 i = 1
michael@0 415 WHILE i LT 6
michael@0 416 movdqu xmm0, [i*16 + KS]
michael@0 417 movdqu xmm1, [(12-i)*16 + KS]
michael@0 418
michael@0 419 aesimc xmm0, xmm0
michael@0 420 aesimc xmm1, xmm1
michael@0 421
michael@0 422 movdqu [(12-i)*16 + KS], xmm0
michael@0 423 movdqu [i*16 + KS], xmm1
michael@0 424
michael@0 425 i = i+1
michael@0 426 ENDM
michael@0 427
michael@0 428 movdqu xmm0, [6*16 + KS]
michael@0 429 aesimc xmm0, xmm0
michael@0 430 movdqu [6*16 + KS], xmm0
michael@0 431 ret
michael@0 432 intel_aes_decrypt_init_192 ENDP
michael@0 433
michael@0 434
michael@0 435 intel_aes_encrypt_init_256 PROC
michael@0 436 sub rsp, 16*2
michael@0 437 movdqu [16*0 + rsp], xmm6
michael@0 438 movdqu [16*1 + rsp], xmm7
michael@0 439
michael@0 440 movdqu xmm1, [16*0 + KEY]
michael@0 441 movdqu xmm3, [16*1 + KEY]
michael@0 442
michael@0 443 movdqu [16*0 + KS], xmm1
michael@0 444 movdqu [16*1 + KS], xmm3
michael@0 445
michael@0 446 lea ITR, Lcon1
michael@0 447 movdqu xmm0, [ITR]
michael@0 448 lea ITR, Lmask256
michael@0 449 movdqu xmm5, [ITR]
michael@0 450
michael@0 451 pxor xmm6, xmm6
michael@0 452
michael@0 453 mov ITR, 6
michael@0 454
michael@0 455 Lenc_256_ks_loop:
michael@0 456
michael@0 457 movdqa xmm2, xmm3
michael@0 458 pshufb xmm2, xmm5
michael@0 459 aesenclast xmm2, xmm0
michael@0 460 pslld xmm0, 1
michael@0 461 movdqa xmm4, xmm1
michael@0 462 pslldq xmm4, 4
michael@0 463 pxor xmm1, xmm4
michael@0 464 pslldq xmm4, 4
michael@0 465 pxor xmm1, xmm4
michael@0 466 pslldq xmm4, 4
michael@0 467 pxor xmm1, xmm4
michael@0 468 pxor xmm1, xmm2
michael@0 469 movdqu [16*2 + KS], xmm1
michael@0 470
michael@0 471 pshufd xmm2, xmm1, 0ffh
michael@0 472 aesenclast xmm2, xmm6
michael@0 473 movdqa xmm4, xmm3
michael@0 474 pslldq xmm4, 4
michael@0 475 pxor xmm3, xmm4
michael@0 476 pslldq xmm4, 4
michael@0 477 pxor xmm3, xmm4
michael@0 478 pslldq xmm4, 4
michael@0 479 pxor xmm3, xmm4
michael@0 480 pxor xmm3, xmm2
michael@0 481 movdqu [16*3 + KS], xmm3
michael@0 482
michael@0 483 lea KS, [32 + KS]
michael@0 484 dec ITR
michael@0 485 jnz Lenc_256_ks_loop
michael@0 486
michael@0 487 movdqa xmm2, xmm3
michael@0 488 pshufb xmm2, xmm5
michael@0 489 aesenclast xmm2, xmm0
michael@0 490 movdqa xmm4, xmm1
michael@0 491 pslldq xmm4, 4
michael@0 492 pxor xmm1, xmm4
michael@0 493 pslldq xmm4, 4
michael@0 494 pxor xmm1, xmm4
michael@0 495 pslldq xmm4, 4
michael@0 496 pxor xmm1, xmm4
michael@0 497 pxor xmm1, xmm2
michael@0 498 movdqu [16*2 + KS], xmm1
michael@0 499
michael@0 500 movdqu xmm7, [16*1 + rsp]
michael@0 501 movdqu xmm6, [16*0 + rsp]
michael@0 502 add rsp, 16*2
michael@0 503 ret
michael@0 504
michael@0 505 intel_aes_encrypt_init_256 ENDP
michael@0 506
michael@0 507
michael@0 508 intel_aes_decrypt_init_256 PROC
michael@0 509 push KS
michael@0 510 push KEY
michael@0 511
michael@0 512 call intel_aes_encrypt_init_256
michael@0 513
michael@0 514 pop KEY
michael@0 515 pop KS
michael@0 516
michael@0 517 movdqu xmm0, [0*16 + KS]
michael@0 518 movdqu xmm1, [14*16 + KS]
michael@0 519 movdqu [14*16 + KS], xmm0
michael@0 520 movdqu [0*16 + KS], xmm1
michael@0 521
michael@0 522 i = 1
michael@0 523 WHILE i LT 7
michael@0 524 movdqu xmm0, [i*16 + KS]
michael@0 525 movdqu xmm1, [(14-i)*16 + KS]
michael@0 526
michael@0 527 aesimc xmm0, xmm0
michael@0 528 aesimc xmm1, xmm1
michael@0 529
michael@0 530 movdqu [(14-i)*16 + KS], xmm0
michael@0 531 movdqu [i*16 + KS], xmm1
michael@0 532
michael@0 533 i = i+1
michael@0 534 ENDM
michael@0 535
michael@0 536 movdqu xmm0, [7*16 + KS]
michael@0 537 aesimc xmm0, xmm0
michael@0 538 movdqu [7*16 + KS], xmm0
michael@0 539 ret
michael@0 540 intel_aes_decrypt_init_256 ENDP
michael@0 541
michael@0 542
michael@0 543
michael@0 544 gen_aes_cbc_enc_func MACRO rnds
michael@0 545
michael@0 546 LOCAL loop1
michael@0 547 LOCAL bail
michael@0 548
michael@0 549 mov input, [rsp + 1*8 + 8*4]
michael@0 550 mov inputLen, [rsp + 1*8 + 8*5]
michael@0 551
michael@0 552 sub rsp, 3*16
michael@0 553
michael@0 554 movdqu [rsp + 0*16], xmm6
michael@0 555 movdqu [rsp + 1*16], xmm7
michael@0 556 movdqu [rsp + 2*16], xmm8
michael@0 557
michael@0 558 lea ctx, [48+ctx]
michael@0 559
michael@0 560 movdqu xmm0, [-32+ctx]
michael@0 561
michael@0 562 movdqu xmm2, [0*16 + ctx]
michael@0 563 movdqu xmm3, [1*16 + ctx]
michael@0 564 movdqu xmm4, [2*16 + ctx]
michael@0 565 movdqu xmm5, [3*16 + ctx]
michael@0 566 movdqu xmm6, [4*16 + ctx]
michael@0 567 movdqu xmm7, [5*16 + ctx]
michael@0 568
michael@0 569 loop1:
michael@0 570 cmp inputLen, 1*16
michael@0 571 jb bail
michael@0 572
michael@0 573 movdqu xmm1, [input]
michael@0 574 pxor xmm1, xmm2
michael@0 575 pxor xmm0, xmm1
michael@0 576
michael@0 577 aesenc xmm0, xmm3
michael@0 578 aesenc xmm0, xmm4
michael@0 579 aesenc xmm0, xmm5
michael@0 580 aesenc xmm0, xmm6
michael@0 581 aesenc xmm0, xmm7
michael@0 582
michael@0 583 i = 6
michael@0 584 WHILE i LT rnds
michael@0 585 movdqu xmm8, [i*16 + ctx]
michael@0 586 aesenc xmm0, xmm8
michael@0 587 i = i+1
michael@0 588 ENDM
michael@0 589 movdqu xmm8, [rnds*16 + ctx]
michael@0 590 aesenclast xmm0, xmm8
michael@0 591
michael@0 592 movdqu [output], xmm0
michael@0 593
michael@0 594 lea input, [1*16 + input]
michael@0 595 lea output, [1*16 + output]
michael@0 596 sub inputLen, 1*16
michael@0 597 jmp loop1
michael@0 598
michael@0 599 bail:
michael@0 600 movdqu [-32+ctx], xmm0
michael@0 601
michael@0 602 xor rax, rax
michael@0 603
michael@0 604 movdqu xmm6, [rsp + 0*16]
michael@0 605 movdqu xmm7, [rsp + 1*16]
michael@0 606 movdqu xmm8, [rsp + 2*16]
michael@0 607 add rsp, 3*16
michael@0 608 ret
michael@0 609
michael@0 610 ENDM
michael@0 611
michael@0 612 gen_aes_cbc_dec_func MACRO rnds
michael@0 613
michael@0 614 LOCAL loop8
michael@0 615 LOCAL loop1
michael@0 616 LOCAL dec1
michael@0 617 LOCAL bail
michael@0 618
michael@0 619 mov input, [rsp + 1*8 + 8*4]
michael@0 620 mov inputLen, [rsp + 1*8 + 8*5]
michael@0 621
michael@0 622 sub rsp, 3*16
michael@0 623
michael@0 624 movdqu [rsp + 0*16], xmm6
michael@0 625 movdqu [rsp + 1*16], xmm7
michael@0 626 movdqu [rsp + 2*16], xmm8
michael@0 627
michael@0 628 lea ctx, [48+ctx]
michael@0 629
michael@0 630 loop8:
michael@0 631 cmp inputLen, 8*16
michael@0 632 jb dec1
michael@0 633
michael@0 634 movdqu xmm0, [0*16 + input]
michael@0 635 movdqu xmm1, [1*16 + input]
michael@0 636 movdqu xmm2, [2*16 + input]
michael@0 637 movdqu xmm3, [3*16 + input]
michael@0 638 movdqu xmm4, [4*16 + input]
michael@0 639 movdqu xmm5, [5*16 + input]
michael@0 640 movdqu xmm6, [6*16 + input]
michael@0 641 movdqu xmm7, [7*16 + input]
michael@0 642
michael@0 643 movdqu xmm8, [0*16 + ctx]
michael@0 644 pxor xmm0, xmm8
michael@0 645 pxor xmm1, xmm8
michael@0 646 pxor xmm2, xmm8
michael@0 647 pxor xmm3, xmm8
michael@0 648 pxor xmm4, xmm8
michael@0 649 pxor xmm5, xmm8
michael@0 650 pxor xmm6, xmm8
michael@0 651 pxor xmm7, xmm8
michael@0 652
michael@0 653 i = 1
michael@0 654 WHILE i LT rnds
michael@0 655 aes_dec_rnd i
michael@0 656 i = i+1
michael@0 657 ENDM
michael@0 658 aes_dec_last_rnd rnds
michael@0 659
michael@0 660 movdqu xmm8, [-32 + ctx]
michael@0 661 pxor xmm0, xmm8
michael@0 662 movdqu xmm8, [0*16 + input]
michael@0 663 pxor xmm1, xmm8
michael@0 664 movdqu xmm8, [1*16 + input]
michael@0 665 pxor xmm2, xmm8
michael@0 666 movdqu xmm8, [2*16 + input]
michael@0 667 pxor xmm3, xmm8
michael@0 668 movdqu xmm8, [3*16 + input]
michael@0 669 pxor xmm4, xmm8
michael@0 670 movdqu xmm8, [4*16 + input]
michael@0 671 pxor xmm5, xmm8
michael@0 672 movdqu xmm8, [5*16 + input]
michael@0 673 pxor xmm6, xmm8
michael@0 674 movdqu xmm8, [6*16 + input]
michael@0 675 pxor xmm7, xmm8
michael@0 676 movdqu xmm8, [7*16 + input]
michael@0 677
michael@0 678 movdqu [0*16 + output], xmm0
michael@0 679 movdqu [1*16 + output], xmm1
michael@0 680 movdqu [2*16 + output], xmm2
michael@0 681 movdqu [3*16 + output], xmm3
michael@0 682 movdqu [4*16 + output], xmm4
michael@0 683 movdqu [5*16 + output], xmm5
michael@0 684 movdqu [6*16 + output], xmm6
michael@0 685 movdqu [7*16 + output], xmm7
michael@0 686 movdqu [-32 + ctx], xmm8
michael@0 687
michael@0 688 lea input, [8*16 + input]
michael@0 689 lea output, [8*16 + output]
michael@0 690 sub inputLen, 8*16
michael@0 691 jmp loop8
michael@0 692 dec1:
michael@0 693
michael@0 694 movdqu xmm3, [-32 + ctx]
michael@0 695
michael@0 696 loop1:
michael@0 697 cmp inputLen, 1*16
michael@0 698 jb bail
michael@0 699
michael@0 700 movdqu xmm0, [input]
michael@0 701 movdqa xmm4, xmm0
michael@0 702 movdqu xmm7, [0*16 + ctx]
michael@0 703 pxor xmm0, xmm7
michael@0 704
michael@0 705 i = 1
michael@0 706 WHILE i LT rnds
michael@0 707 movdqu xmm7, [i*16 + ctx]
michael@0 708 aesdec xmm0, xmm7
michael@0 709 i = i+1
michael@0 710 ENDM
michael@0 711 movdqu xmm7, [rnds*16 + ctx]
michael@0 712 aesdeclast xmm0, xmm7
michael@0 713 pxor xmm3, xmm0
michael@0 714
michael@0 715 movdqu [output], xmm3
michael@0 716 movdqa xmm3, xmm4
michael@0 717
michael@0 718 lea input, [1*16 + input]
michael@0 719 lea output, [1*16 + output]
michael@0 720 sub inputLen, 1*16
michael@0 721 jmp loop1
michael@0 722
michael@0 723 bail:
michael@0 724 movdqu [-32 + ctx], xmm3
michael@0 725 xor rax, rax
michael@0 726
michael@0 727 movdqu xmm6, [rsp + 0*16]
michael@0 728 movdqu xmm7, [rsp + 1*16]
michael@0 729 movdqu xmm8, [rsp + 2*16]
michael@0 730 add rsp, 3*16
michael@0 731 ret
michael@0 732 ENDM
michael@0 733
michael@0 734 intel_aes_encrypt_cbc_128 PROC
michael@0 735 gen_aes_cbc_enc_func 10
michael@0 736 intel_aes_encrypt_cbc_128 ENDP
michael@0 737
michael@0 738 intel_aes_encrypt_cbc_192 PROC
michael@0 739 gen_aes_cbc_enc_func 12
michael@0 740 intel_aes_encrypt_cbc_192 ENDP
michael@0 741
michael@0 742 intel_aes_encrypt_cbc_256 PROC
michael@0 743 gen_aes_cbc_enc_func 14
michael@0 744 intel_aes_encrypt_cbc_256 ENDP
michael@0 745
michael@0 746 intel_aes_decrypt_cbc_128 PROC
michael@0 747 gen_aes_cbc_dec_func 10
michael@0 748 intel_aes_decrypt_cbc_128 ENDP
michael@0 749
michael@0 750 intel_aes_decrypt_cbc_192 PROC
michael@0 751 gen_aes_cbc_dec_func 12
michael@0 752 intel_aes_decrypt_cbc_192 ENDP
michael@0 753
michael@0 754 intel_aes_decrypt_cbc_256 PROC
michael@0 755 gen_aes_cbc_dec_func 14
michael@0 756 intel_aes_decrypt_cbc_256 ENDP
michael@0 757
michael@0 758
michael@0 759
michael@0 760 ctrCtx textequ <r10>
michael@0 761 CTR textequ <r11d>
michael@0 762 CTRSave textequ <eax>
michael@0 763
michael@0 764 gen_aes_ctr_func MACRO rnds
michael@0 765
michael@0 766 LOCAL loop8
michael@0 767 LOCAL loop1
michael@0 768 LOCAL enc1
michael@0 769 LOCAL bail
michael@0 770
michael@0 771 mov input, [rsp + 8*1 + 4*8]
michael@0 772 mov inputLen, [rsp + 8*1 + 5*8]
michael@0 773
michael@0 774 mov ctrCtx, ctx
michael@0 775 mov ctx, [8+ctrCtx]
michael@0 776 lea ctx, [48+ctx]
michael@0 777
michael@0 778 sub rsp, 3*16
michael@0 779 movdqu [rsp + 0*16], xmm6
michael@0 780 movdqu [rsp + 1*16], xmm7
michael@0 781 movdqu [rsp + 2*16], xmm8
michael@0 782
michael@0 783
michael@0 784 push rbp
michael@0 785 mov rbp, rsp
michael@0 786 sub rsp, 8*16
michael@0 787 and rsp, -16
michael@0 788
michael@0 789
michael@0 790 movdqu xmm0, [16+ctrCtx]
michael@0 791 mov CTRSave, DWORD PTR [ctrCtx + 16 + 3*4]
michael@0 792 bswap CTRSave
michael@0 793 movdqu xmm1, [ctx + 0*16]
michael@0 794
michael@0 795 pxor xmm0, xmm1
michael@0 796
michael@0 797 movdqa [rsp + 0*16], xmm0
michael@0 798 movdqa [rsp + 1*16], xmm0
michael@0 799 movdqa [rsp + 2*16], xmm0
michael@0 800 movdqa [rsp + 3*16], xmm0
michael@0 801 movdqa [rsp + 4*16], xmm0
michael@0 802 movdqa [rsp + 5*16], xmm0
michael@0 803 movdqa [rsp + 6*16], xmm0
michael@0 804 movdqa [rsp + 7*16], xmm0
michael@0 805
michael@0 806 inc CTRSave
michael@0 807 mov CTR, CTRSave
michael@0 808 bswap CTR
michael@0 809 xor CTR, DWORD PTR [ctx + 3*4]
michael@0 810 mov DWORD PTR [rsp + 1*16 + 3*4], CTR
michael@0 811
michael@0 812 inc CTRSave
michael@0 813 mov CTR, CTRSave
michael@0 814 bswap CTR
michael@0 815 xor CTR, DWORD PTR [ctx + 3*4]
michael@0 816 mov DWORD PTR [rsp + 2*16 + 3*4], CTR
michael@0 817
michael@0 818 inc CTRSave
michael@0 819 mov CTR, CTRSave
michael@0 820 bswap CTR
michael@0 821 xor CTR, DWORD PTR [ctx + 3*4]
michael@0 822 mov DWORD PTR [rsp + 3*16 + 3*4], CTR
michael@0 823
michael@0 824 inc CTRSave
michael@0 825 mov CTR, CTRSave
michael@0 826 bswap CTR
michael@0 827 xor CTR, DWORD PTR [ctx + 3*4]
michael@0 828 mov DWORD PTR [rsp + 4*16 + 3*4], CTR
michael@0 829
michael@0 830 inc CTRSave
michael@0 831 mov CTR, CTRSave
michael@0 832 bswap CTR
michael@0 833 xor CTR, DWORD PTR [ctx + 3*4]
michael@0 834 mov DWORD PTR [rsp + 5*16 + 3*4], CTR
michael@0 835
michael@0 836 inc CTRSave
michael@0 837 mov CTR, CTRSave
michael@0 838 bswap CTR
michael@0 839 xor CTR, DWORD PTR [ctx + 3*4]
michael@0 840 mov DWORD PTR [rsp + 6*16 + 3*4], CTR
michael@0 841
michael@0 842 inc CTRSave
michael@0 843 mov CTR, CTRSave
michael@0 844 bswap CTR
michael@0 845 xor CTR, DWORD PTR [ctx + 3*4]
michael@0 846 mov DWORD PTR [rsp + 7*16 + 3*4], CTR
michael@0 847
michael@0 848
michael@0 849 loop8:
michael@0 850 cmp inputLen, 8*16
michael@0 851 jb loop1
michael@0 852
michael@0 853 movdqu xmm0, [0*16 + rsp]
michael@0 854 movdqu xmm1, [1*16 + rsp]
michael@0 855 movdqu xmm2, [2*16 + rsp]
michael@0 856 movdqu xmm3, [3*16 + rsp]
michael@0 857 movdqu xmm4, [4*16 + rsp]
michael@0 858 movdqu xmm5, [5*16 + rsp]
michael@0 859 movdqu xmm6, [6*16 + rsp]
michael@0 860 movdqu xmm7, [7*16 + rsp]
michael@0 861
michael@0 862 i = 1
michael@0 863 WHILE i LE 8
michael@0 864 aes_rnd i
michael@0 865
michael@0 866 inc CTRSave
michael@0 867 mov CTR, CTRSave
michael@0 868 bswap CTR
michael@0 869 xor CTR, DWORD PTR [ctx + 3*4]
michael@0 870 mov DWORD PTR [rsp + (i-1)*16 + 3*4], CTR
michael@0 871
michael@0 872 i = i+1
michael@0 873 ENDM
michael@0 874 WHILE i LT rnds
michael@0 875 aes_rnd i
michael@0 876 i = i+1
michael@0 877 ENDM
michael@0 878 aes_last_rnd rnds
michael@0 879
michael@0 880 movdqu xmm8, [0*16 + input]
michael@0 881 pxor xmm0, xmm8
michael@0 882 movdqu xmm8, [1*16 + input]
michael@0 883 pxor xmm1, xmm8
michael@0 884 movdqu xmm8, [2*16 + input]
michael@0 885 pxor xmm2, xmm8
michael@0 886 movdqu xmm8, [3*16 + input]
michael@0 887 pxor xmm3, xmm8
michael@0 888 movdqu xmm8, [4*16 + input]
michael@0 889 pxor xmm4, xmm8
michael@0 890 movdqu xmm8, [5*16 + input]
michael@0 891 pxor xmm5, xmm8
michael@0 892 movdqu xmm8, [6*16 + input]
michael@0 893 pxor xmm6, xmm8
michael@0 894 movdqu xmm8, [7*16 + input]
michael@0 895 pxor xmm7, xmm8
michael@0 896
michael@0 897 movdqu [0*16 + output], xmm0
michael@0 898 movdqu [1*16 + output], xmm1
michael@0 899 movdqu [2*16 + output], xmm2
michael@0 900 movdqu [3*16 + output], xmm3
michael@0 901 movdqu [4*16 + output], xmm4
michael@0 902 movdqu [5*16 + output], xmm5
michael@0 903 movdqu [6*16 + output], xmm6
michael@0 904 movdqu [7*16 + output], xmm7
michael@0 905
michael@0 906 lea input, [8*16 + input]
michael@0 907 lea output, [8*16 + output]
michael@0 908 sub inputLen, 8*16
michael@0 909 jmp loop8
michael@0 910
michael@0 911
michael@0 912 loop1:
michael@0 913 cmp inputLen, 1*16
michael@0 914 jb bail
michael@0 915
michael@0 916 movdqu xmm0, [rsp]
michael@0 917 add rsp, 16
michael@0 918
michael@0 919 i = 1
michael@0 920 WHILE i LT rnds
michael@0 921 movdqu xmm7, [i*16 + ctx]
michael@0 922 aesenc xmm0, xmm7
michael@0 923 i = i+1
michael@0 924 ENDM
michael@0 925 movdqu xmm7, [rnds*16 + ctx]
michael@0 926 aesenclast xmm0, xmm7
michael@0 927
michael@0 928 movdqu xmm7, [input]
michael@0 929 pxor xmm0, xmm7
michael@0 930 movdqu [output], xmm0
michael@0 931
michael@0 932 lea input, [1*16 + input]
michael@0 933 lea output, [1*16 + output]
michael@0 934 sub inputLen, 1*16
michael@0 935 jmp loop1
michael@0 936
michael@0 937 bail:
michael@0 938
michael@0 939 movdqu xmm0, [rsp]
michael@0 940 movdqu xmm1, [ctx + 0*16]
michael@0 941 pxor xmm0, xmm1
michael@0 942 movdqu [16+ctrCtx], xmm0
michael@0 943
michael@0 944
michael@0 945 xor rax, rax
michael@0 946 mov rsp, rbp
michael@0 947 pop rbp
michael@0 948
michael@0 949 movdqu xmm6, [rsp + 0*16]
michael@0 950 movdqu xmm7, [rsp + 1*16]
michael@0 951 movdqu xmm8, [rsp + 2*16]
michael@0 952 add rsp, 3*16
michael@0 953
michael@0 954 ret
michael@0 955 ENDM
michael@0 956
michael@0 957
michael@0 958 intel_aes_encrypt_ctr_128 PROC
michael@0 959 gen_aes_ctr_func 10
michael@0 960 intel_aes_encrypt_ctr_128 ENDP
michael@0 961
michael@0 962 intel_aes_encrypt_ctr_192 PROC
michael@0 963 gen_aes_ctr_func 12
michael@0 964 intel_aes_encrypt_ctr_192 ENDP
michael@0 965
michael@0 966 intel_aes_encrypt_ctr_256 PROC
michael@0 967 gen_aes_ctr_func 14
michael@0 968 intel_aes_encrypt_ctr_256 ENDP
michael@0 969
michael@0 970
michael@0 971 END

mercurial