security/nss/lib/freebl/intel-aes-x86-masm.asm

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 ; LICENSE:
michael@0 2 ; This submission to NSS is to be made available under the terms of the
michael@0 3 ; Mozilla Public License, v. 2.0. You can obtain one at http:
michael@0 4 ; //mozilla.org/MPL/2.0/.
michael@0 5 ;###############################################################################
michael@0 6 ; Copyright(c) 2014, Intel Corp.
michael@0 7 ; Developers and authors:
michael@0 8 ; Shay Gueron and Vlad Krasnov
michael@0 9 ; Intel Corporation, Israel Development Centre, Haifa, Israel
michael@0 10 ; Please send feedback directly to crypto.feedback.alias@intel.com
michael@0 11
michael@0 12
michael@0 13 .MODEL FLAT, C
michael@0 14 .XMM
michael@0 15
michael@0 16 .DATA
michael@0 17 ALIGN 16
michael@0 18 Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh
michael@0 19 Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h
michael@0 20 Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh
michael@0 21 Lcon1 dd 1,1,1,1
michael@0 22 Lcon2 dd 1bh,1bh,1bh,1bh
michael@0 23
michael@0 24 .CODE
michael@0 25
michael@0 26 ctx textequ <ecx>
michael@0 27 output textequ <edx>
michael@0 28 input textequ <eax>
michael@0 29 inputLen textequ <edi>
michael@0 30
michael@0 31
michael@0 32 aes_rnd MACRO i
michael@0 33 movdqu xmm7, [i*16 + ctx]
michael@0 34 aesenc xmm0, xmm7
michael@0 35 aesenc xmm1, xmm7
michael@0 36 aesenc xmm2, xmm7
michael@0 37 aesenc xmm3, xmm7
michael@0 38 aesenc xmm4, xmm7
michael@0 39 aesenc xmm5, xmm7
michael@0 40 aesenc xmm6, xmm7
michael@0 41 ENDM
michael@0 42
michael@0 43 aes_last_rnd MACRO i
michael@0 44 movdqu xmm7, [i*16 + ctx]
michael@0 45 aesenclast xmm0, xmm7
michael@0 46 aesenclast xmm1, xmm7
michael@0 47 aesenclast xmm2, xmm7
michael@0 48 aesenclast xmm3, xmm7
michael@0 49 aesenclast xmm4, xmm7
michael@0 50 aesenclast xmm5, xmm7
michael@0 51 aesenclast xmm6, xmm7
michael@0 52 ENDM
michael@0 53
michael@0 54 aes_dec_rnd MACRO i
michael@0 55 movdqu xmm7, [i*16 + ctx]
michael@0 56 aesdec xmm0, xmm7
michael@0 57 aesdec xmm1, xmm7
michael@0 58 aesdec xmm2, xmm7
michael@0 59 aesdec xmm3, xmm7
michael@0 60 aesdec xmm4, xmm7
michael@0 61 aesdec xmm5, xmm7
michael@0 62 aesdec xmm6, xmm7
michael@0 63 ENDM
michael@0 64
michael@0 65 aes_dec_last_rnd MACRO i
michael@0 66 movdqu xmm7, [i*16 + ctx]
michael@0 67 aesdeclast xmm0, xmm7
michael@0 68 aesdeclast xmm1, xmm7
michael@0 69 aesdeclast xmm2, xmm7
michael@0 70 aesdeclast xmm3, xmm7
michael@0 71 aesdeclast xmm4, xmm7
michael@0 72 aesdeclast xmm5, xmm7
michael@0 73 aesdeclast xmm6, xmm7
michael@0 74 ENDM
michael@0 75
michael@0 76
michael@0 77 gen_aes_ecb_func MACRO enc, rnds
michael@0 78
michael@0 79 LOCAL loop7
michael@0 80 LOCAL loop1
michael@0 81 LOCAL bail
michael@0 82
michael@0 83 push inputLen
michael@0 84
michael@0 85 mov ctx, [esp + 2*4 + 0*4]
michael@0 86 mov output, [esp + 2*4 + 1*4]
michael@0 87 mov input, [esp + 2*4 + 4*4]
michael@0 88 mov inputLen, [esp + 2*4 + 5*4]
michael@0 89
michael@0 90 lea ctx, [44+ctx]
michael@0 91
michael@0 92 loop7:
michael@0 93 cmp inputLen, 7*16
michael@0 94 jb loop1
michael@0 95
michael@0 96 movdqu xmm0, [0*16 + input]
michael@0 97 movdqu xmm1, [1*16 + input]
michael@0 98 movdqu xmm2, [2*16 + input]
michael@0 99 movdqu xmm3, [3*16 + input]
michael@0 100 movdqu xmm4, [4*16 + input]
michael@0 101 movdqu xmm5, [5*16 + input]
michael@0 102 movdqu xmm6, [6*16 + input]
michael@0 103
michael@0 104 movdqu xmm7, [0*16 + ctx]
michael@0 105 pxor xmm0, xmm7
michael@0 106 pxor xmm1, xmm7
michael@0 107 pxor xmm2, xmm7
michael@0 108 pxor xmm3, xmm7
michael@0 109 pxor xmm4, xmm7
michael@0 110 pxor xmm5, xmm7
michael@0 111 pxor xmm6, xmm7
michael@0 112
michael@0 113 IF enc eq 1
michael@0 114 rnd textequ <aes_rnd>
michael@0 115 lastrnd textequ <aes_last_rnd>
michael@0 116 aesinst textequ <aesenc>
michael@0 117 aeslastinst textequ <aesenclast>
michael@0 118 ELSE
michael@0 119 rnd textequ <aes_dec_rnd>
michael@0 120 lastrnd textequ <aes_dec_last_rnd>
michael@0 121 aesinst textequ <aesdec>
michael@0 122 aeslastinst textequ <aesdeclast>
michael@0 123 ENDIF
michael@0 124
michael@0 125 i = 1
michael@0 126 WHILE i LT rnds
michael@0 127 rnd i
michael@0 128 i = i+1
michael@0 129 ENDM
michael@0 130 lastrnd rnds
michael@0 131
michael@0 132 movdqu [0*16 + output], xmm0
michael@0 133 movdqu [1*16 + output], xmm1
michael@0 134 movdqu [2*16 + output], xmm2
michael@0 135 movdqu [3*16 + output], xmm3
michael@0 136 movdqu [4*16 + output], xmm4
michael@0 137 movdqu [5*16 + output], xmm5
michael@0 138 movdqu [6*16 + output], xmm6
michael@0 139
michael@0 140 lea input, [7*16 + input]
michael@0 141 lea output, [7*16 + output]
michael@0 142 sub inputLen, 7*16
michael@0 143 jmp loop7
michael@0 144
michael@0 145 loop1:
michael@0 146 cmp inputLen, 1*16
michael@0 147 jb bail
michael@0 148
michael@0 149 movdqu xmm0, [input]
michael@0 150 movdqu xmm7, [0*16 + ctx]
michael@0 151 pxor xmm0, xmm7
michael@0 152
michael@0 153 i = 1
michael@0 154 WHILE i LT rnds
michael@0 155 movdqu xmm7, [i*16 + ctx]
michael@0 156 aesinst xmm0, xmm7
michael@0 157 i = i+1
michael@0 158 ENDM
michael@0 159 movdqu xmm7, [rnds*16 + ctx]
michael@0 160 aeslastinst xmm0, xmm7
michael@0 161
michael@0 162 movdqu [output], xmm0
michael@0 163
michael@0 164 lea input, [1*16 + input]
michael@0 165 lea output, [1*16 + output]
michael@0 166 sub inputLen, 1*16
michael@0 167 jmp loop1
michael@0 168
michael@0 169 bail:
michael@0 170 xor eax, eax
michael@0 171 pop inputLen
michael@0 172 ret
michael@0 173
michael@0 174 ENDM
michael@0 175
michael@0 176 ALIGN 16
michael@0 177 intel_aes_encrypt_ecb_128 PROC
michael@0 178 gen_aes_ecb_func 1, 10
michael@0 179 intel_aes_encrypt_ecb_128 ENDP
michael@0 180
michael@0 181 ALIGN 16
michael@0 182 intel_aes_encrypt_ecb_192 PROC
michael@0 183 gen_aes_ecb_func 1, 12
michael@0 184 intel_aes_encrypt_ecb_192 ENDP
michael@0 185
michael@0 186 ALIGN 16
michael@0 187 intel_aes_encrypt_ecb_256 PROC
michael@0 188 gen_aes_ecb_func 1, 14
michael@0 189 intel_aes_encrypt_ecb_256 ENDP
michael@0 190
michael@0 191 ALIGN 16
michael@0 192 intel_aes_decrypt_ecb_128 PROC
michael@0 193 gen_aes_ecb_func 0, 10
michael@0 194 intel_aes_decrypt_ecb_128 ENDP
michael@0 195
michael@0 196 ALIGN 16
michael@0 197 intel_aes_decrypt_ecb_192 PROC
michael@0 198 gen_aes_ecb_func 0, 12
michael@0 199 intel_aes_decrypt_ecb_192 ENDP
michael@0 200
michael@0 201 ALIGN 16
michael@0 202 intel_aes_decrypt_ecb_256 PROC
michael@0 203 gen_aes_ecb_func 0, 14
michael@0 204 intel_aes_decrypt_ecb_256 ENDP
michael@0 205
michael@0 206
michael@0 207 KEY textequ <ecx>
michael@0 208 KS textequ <edx>
michael@0 209 ITR textequ <eax>
michael@0 210
michael@0 211 ALIGN 16
michael@0 212 intel_aes_encrypt_init_128 PROC
michael@0 213
michael@0 214 mov KEY, [esp + 1*4 + 0*4]
michael@0 215 mov KS, [esp + 1*4 + 1*4]
michael@0 216
michael@0 217
michael@0 218 movdqu xmm1, [KEY]
michael@0 219 movdqu [KS], xmm1
michael@0 220 movdqa xmm2, xmm1
michael@0 221
michael@0 222 lea ITR, Lcon1
michael@0 223 movdqa xmm0, [ITR]
michael@0 224 lea ITR, Lmask
michael@0 225 movdqa xmm4, [ITR]
michael@0 226
michael@0 227 mov ITR, 8
michael@0 228
michael@0 229 Lenc_128_ks_loop:
michael@0 230 lea KS, [16 + KS]
michael@0 231 dec ITR
michael@0 232
michael@0 233 pshufb xmm2, xmm4
michael@0 234 aesenclast xmm2, xmm0
michael@0 235 pslld xmm0, 1
michael@0 236 movdqa xmm3, xmm1
michael@0 237 pslldq xmm3, 4
michael@0 238 pxor xmm1, xmm3
michael@0 239 pslldq xmm3, 4
michael@0 240 pxor xmm1, xmm3
michael@0 241 pslldq xmm3, 4
michael@0 242 pxor xmm1, xmm3
michael@0 243 pxor xmm1, xmm2
michael@0 244 movdqu [KS], xmm1
michael@0 245 movdqa xmm2, xmm1
michael@0 246
michael@0 247 jne Lenc_128_ks_loop
michael@0 248
michael@0 249 lea ITR, Lcon2
michael@0 250 movdqa xmm0, [ITR]
michael@0 251
michael@0 252 pshufb xmm2, xmm4
michael@0 253 aesenclast xmm2, xmm0
michael@0 254 pslld xmm0, 1
michael@0 255 movdqa xmm3, xmm1
michael@0 256 pslldq xmm3, 4
michael@0 257 pxor xmm1, xmm3
michael@0 258 pslldq xmm3, 4
michael@0 259 pxor xmm1, xmm3
michael@0 260 pslldq xmm3, 4
michael@0 261 pxor xmm1, xmm3
michael@0 262 pxor xmm1, xmm2
michael@0 263 movdqu [16 + KS], xmm1
michael@0 264 movdqa xmm2, xmm1
michael@0 265
michael@0 266 pshufb xmm2, xmm4
michael@0 267 aesenclast xmm2, xmm0
michael@0 268 movdqa xmm3, xmm1
michael@0 269 pslldq xmm3, 4
michael@0 270 pxor xmm1, xmm3
michael@0 271 pslldq xmm3, 4
michael@0 272 pxor xmm1, xmm3
michael@0 273 pslldq xmm3, 4
michael@0 274 pxor xmm1, xmm3
michael@0 275 pxor xmm1, xmm2
michael@0 276 movdqu [32 + KS], xmm1
michael@0 277 movdqa xmm2, xmm1
michael@0 278
michael@0 279 ret
michael@0 280 intel_aes_encrypt_init_128 ENDP
michael@0 281
michael@0 282
michael@0 283 ALIGN 16
michael@0 284 intel_aes_decrypt_init_128 PROC
michael@0 285
michael@0 286 mov KEY, [esp + 1*4 + 0*4]
michael@0 287 mov KS, [esp + 1*4 + 1*4]
michael@0 288
michael@0 289 push KS
michael@0 290 push KEY
michael@0 291
michael@0 292 call intel_aes_encrypt_init_128
michael@0 293
michael@0 294 pop KEY
michael@0 295 pop KS
michael@0 296
michael@0 297 movdqu xmm0, [0*16 + KS]
michael@0 298 movdqu xmm1, [10*16 + KS]
michael@0 299 movdqu [10*16 + KS], xmm0
michael@0 300 movdqu [0*16 + KS], xmm1
michael@0 301
michael@0 302 i = 1
michael@0 303 WHILE i LT 5
michael@0 304 movdqu xmm0, [i*16 + KS]
michael@0 305 movdqu xmm1, [(10-i)*16 + KS]
michael@0 306
michael@0 307 aesimc xmm0, xmm0
michael@0 308 aesimc xmm1, xmm1
michael@0 309
michael@0 310 movdqu [(10-i)*16 + KS], xmm0
michael@0 311 movdqu [i*16 + KS], xmm1
michael@0 312
michael@0 313 i = i+1
michael@0 314 ENDM
michael@0 315
michael@0 316 movdqu xmm0, [5*16 + KS]
michael@0 317 aesimc xmm0, xmm0
michael@0 318 movdqu [5*16 + KS], xmm0
michael@0 319 ret
michael@0 320 intel_aes_decrypt_init_128 ENDP
michael@0 321
michael@0 322
michael@0 323 ALIGN 16
michael@0 324 intel_aes_encrypt_init_192 PROC
michael@0 325
michael@0 326 mov KEY, [esp + 1*4 + 0*4]
michael@0 327 mov KS, [esp + 1*4 + 1*4]
michael@0 328
michael@0 329 pxor xmm3, xmm3
michael@0 330 movdqu xmm1, [KEY]
michael@0 331 pinsrd xmm3, DWORD PTR [16 + KEY], 0
michael@0 332 pinsrd xmm3, DWORD PTR [20 + KEY], 1
michael@0 333
michael@0 334 movdqu [KS], xmm1
michael@0 335 movdqa xmm5, xmm3
michael@0 336
michael@0 337 lea ITR, Lcon1
michael@0 338 movdqu xmm0, [ITR]
michael@0 339 lea ITR, Lmask192
michael@0 340 movdqu xmm4, [ITR]
michael@0 341
michael@0 342 mov ITR, 4
michael@0 343
michael@0 344 Lenc_192_ks_loop:
michael@0 345 movdqa xmm2, xmm3
michael@0 346 pshufb xmm2, xmm4
michael@0 347 aesenclast xmm2, xmm0
michael@0 348 pslld xmm0, 1
michael@0 349
michael@0 350 movdqa xmm6, xmm1
michael@0 351 movdqa xmm7, xmm3
michael@0 352 pslldq xmm6, 4
michael@0 353 pslldq xmm7, 4
michael@0 354 pxor xmm1, xmm6
michael@0 355 pxor xmm3, xmm7
michael@0 356 pslldq xmm6, 4
michael@0 357 pxor xmm1, xmm6
michael@0 358 pslldq xmm6, 4
michael@0 359 pxor xmm1, xmm6
michael@0 360 pxor xmm1, xmm2
michael@0 361 pshufd xmm2, xmm1, 0ffh
michael@0 362 pxor xmm3, xmm2
michael@0 363
michael@0 364 movdqa xmm6, xmm1
michael@0 365 shufpd xmm5, xmm1, 00h
michael@0 366 shufpd xmm6, xmm3, 01h
michael@0 367
michael@0 368 movdqu [16 + KS], xmm5
michael@0 369 movdqu [32 + KS], xmm6
michael@0 370
michael@0 371 movdqa xmm2, xmm3
michael@0 372 pshufb xmm2, xmm4
michael@0 373 aesenclast xmm2, xmm0
michael@0 374 pslld xmm0, 1
michael@0 375
michael@0 376 movdqa xmm6, xmm1
michael@0 377 movdqa xmm7, xmm3
michael@0 378 pslldq xmm6, 4
michael@0 379 pslldq xmm7, 4
michael@0 380 pxor xmm1, xmm6
michael@0 381 pxor xmm3, xmm7
michael@0 382 pslldq xmm6, 4
michael@0 383 pxor xmm1, xmm6
michael@0 384 pslldq xmm6, 4
michael@0 385 pxor xmm1, xmm6
michael@0 386 pxor xmm1, xmm2
michael@0 387 pshufd xmm2, xmm1, 0ffh
michael@0 388 pxor xmm3, xmm2
michael@0 389
michael@0 390 movdqu [48 + KS], xmm1
michael@0 391 movdqa xmm5, xmm3
michael@0 392
michael@0 393 lea KS, [48 + KS]
michael@0 394
michael@0 395 dec ITR
michael@0 396 jnz Lenc_192_ks_loop
michael@0 397
michael@0 398 movdqu [16 + KS], xmm5
michael@0 399 ret
michael@0 400 intel_aes_encrypt_init_192 ENDP
michael@0 401
michael@0 402 ALIGN 16
michael@0 403 intel_aes_decrypt_init_192 PROC
michael@0 404 mov KEY, [esp + 1*4 + 0*4]
michael@0 405 mov KS, [esp + 1*4 + 1*4]
michael@0 406
michael@0 407 push KS
michael@0 408 push KEY
michael@0 409
michael@0 410 call intel_aes_encrypt_init_192
michael@0 411
michael@0 412 pop KEY
michael@0 413 pop KS
michael@0 414
michael@0 415 movdqu xmm0, [0*16 + KS]
michael@0 416 movdqu xmm1, [12*16 + KS]
michael@0 417 movdqu [12*16 + KS], xmm0
michael@0 418 movdqu [0*16 + KS], xmm1
michael@0 419
michael@0 420 i = 1
michael@0 421 WHILE i LT 6
michael@0 422 movdqu xmm0, [i*16 + KS]
michael@0 423 movdqu xmm1, [(12-i)*16 + KS]
michael@0 424
michael@0 425 aesimc xmm0, xmm0
michael@0 426 aesimc xmm1, xmm1
michael@0 427
michael@0 428 movdqu [(12-i)*16 + KS], xmm0
michael@0 429 movdqu [i*16 + KS], xmm1
michael@0 430
michael@0 431 i = i+1
michael@0 432 ENDM
michael@0 433
michael@0 434 movdqu xmm0, [6*16 + KS]
michael@0 435 aesimc xmm0, xmm0
michael@0 436 movdqu [6*16 + KS], xmm0
michael@0 437 ret
michael@0 438 intel_aes_decrypt_init_192 ENDP
michael@0 439
michael@0 440 ALIGN 16
michael@0 441 intel_aes_encrypt_init_256 PROC
michael@0 442
michael@0 443 mov KEY, [esp + 1*4 + 0*4]
michael@0 444 mov KS, [esp + 1*4 + 1*4]
michael@0 445 movdqu xmm1, [16*0 + KEY]
michael@0 446 movdqu xmm3, [16*1 + KEY]
michael@0 447
michael@0 448 movdqu [16*0 + KS], xmm1
michael@0 449 movdqu [16*1 + KS], xmm3
michael@0 450
michael@0 451 lea ITR, Lcon1
michael@0 452 movdqu xmm0, [ITR]
michael@0 453 lea ITR, Lmask256
michael@0 454 movdqu xmm5, [ITR]
michael@0 455
michael@0 456 pxor xmm6, xmm6
michael@0 457
michael@0 458 mov ITR, 6
michael@0 459
michael@0 460 Lenc_256_ks_loop:
michael@0 461
michael@0 462 movdqa xmm2, xmm3
michael@0 463 pshufb xmm2, xmm5
michael@0 464 aesenclast xmm2, xmm0
michael@0 465 pslld xmm0, 1
michael@0 466 movdqa xmm4, xmm1
michael@0 467 pslldq xmm4, 4
michael@0 468 pxor xmm1, xmm4
michael@0 469 pslldq xmm4, 4
michael@0 470 pxor xmm1, xmm4
michael@0 471 pslldq xmm4, 4
michael@0 472 pxor xmm1, xmm4
michael@0 473 pxor xmm1, xmm2
michael@0 474 movdqu [16*2 + KS], xmm1
michael@0 475
michael@0 476 pshufd xmm2, xmm1, 0ffh
michael@0 477 aesenclast xmm2, xmm6
michael@0 478 movdqa xmm4, xmm3
michael@0 479 pslldq xmm4, 4
michael@0 480 pxor xmm3, xmm4
michael@0 481 pslldq xmm4, 4
michael@0 482 pxor xmm3, xmm4
michael@0 483 pslldq xmm4, 4
michael@0 484 pxor xmm3, xmm4
michael@0 485 pxor xmm3, xmm2
michael@0 486 movdqu [16*3 + KS], xmm3
michael@0 487
michael@0 488 lea KS, [32 + KS]
michael@0 489 dec ITR
michael@0 490 jnz Lenc_256_ks_loop
michael@0 491
michael@0 492 movdqa xmm2, xmm3
michael@0 493 pshufb xmm2, xmm5
michael@0 494 aesenclast xmm2, xmm0
michael@0 495 movdqa xmm4, xmm1
michael@0 496 pslldq xmm4, 4
michael@0 497 pxor xmm1, xmm4
michael@0 498 pslldq xmm4, 4
michael@0 499 pxor xmm1, xmm4
michael@0 500 pslldq xmm4, 4
michael@0 501 pxor xmm1, xmm4
michael@0 502 pxor xmm1, xmm2
michael@0 503 movdqu [16*2 + KS], xmm1
michael@0 504
michael@0 505 ret
michael@0 506 intel_aes_encrypt_init_256 ENDP
michael@0 507
michael@0 508 ALIGN 16
michael@0 509 intel_aes_decrypt_init_256 PROC
michael@0 510 mov KEY, [esp + 1*4 + 0*4]
michael@0 511 mov KS, [esp + 1*4 + 1*4]
michael@0 512
michael@0 513 push KS
michael@0 514 push KEY
michael@0 515
michael@0 516 call intel_aes_encrypt_init_256
michael@0 517
michael@0 518 pop KEY
michael@0 519 pop KS
michael@0 520
michael@0 521 movdqu xmm0, [0*16 + KS]
michael@0 522 movdqu xmm1, [14*16 + KS]
michael@0 523 movdqu [14*16 + KS], xmm0
michael@0 524 movdqu [0*16 + KS], xmm1
michael@0 525
michael@0 526 i = 1
michael@0 527 WHILE i LT 7
michael@0 528 movdqu xmm0, [i*16 + KS]
michael@0 529 movdqu xmm1, [(14-i)*16 + KS]
michael@0 530
michael@0 531 aesimc xmm0, xmm0
michael@0 532 aesimc xmm1, xmm1
michael@0 533
michael@0 534 movdqu [(14-i)*16 + KS], xmm0
michael@0 535 movdqu [i*16 + KS], xmm1
michael@0 536
michael@0 537 i = i+1
michael@0 538 ENDM
michael@0 539
michael@0 540 movdqu xmm0, [7*16 + KS]
michael@0 541 aesimc xmm0, xmm0
michael@0 542 movdqu [7*16 + KS], xmm0
michael@0 543 ret
michael@0 544 intel_aes_decrypt_init_256 ENDP
michael@0 545
michael@0 546
michael@0 547
michael@0 548 gen_aes_cbc_enc_func MACRO rnds
michael@0 549
michael@0 550 LOCAL loop1
michael@0 551 LOCAL bail
michael@0 552
michael@0 553 push inputLen
michael@0 554
michael@0 555 mov ctx, [esp + 2*4 + 0*4]
michael@0 556 mov output, [esp + 2*4 + 1*4]
michael@0 557 mov input, [esp + 2*4 + 4*4]
michael@0 558 mov inputLen, [esp + 2*4 + 5*4]
michael@0 559
michael@0 560 lea ctx, [44+ctx]
michael@0 561
michael@0 562 movdqu xmm0, [-32+ctx]
michael@0 563
michael@0 564 movdqu xmm2, [0*16 + ctx]
michael@0 565 movdqu xmm3, [1*16 + ctx]
michael@0 566 movdqu xmm4, [2*16 + ctx]
michael@0 567 movdqu xmm5, [3*16 + ctx]
michael@0 568 movdqu xmm6, [4*16 + ctx]
michael@0 569
michael@0 570 loop1:
michael@0 571 cmp inputLen, 1*16
michael@0 572 jb bail
michael@0 573
michael@0 574 movdqu xmm1, [input]
michael@0 575 pxor xmm1, xmm2
michael@0 576 pxor xmm0, xmm1
michael@0 577
michael@0 578 aesenc xmm0, xmm3
michael@0 579 aesenc xmm0, xmm4
michael@0 580 aesenc xmm0, xmm5
michael@0 581 aesenc xmm0, xmm6
michael@0 582
michael@0 583 i = 5
michael@0 584 WHILE i LT rnds
michael@0 585 movdqu xmm7, [i*16 + ctx]
michael@0 586 aesenc xmm0, xmm7
michael@0 587 i = i+1
michael@0 588 ENDM
michael@0 589 movdqu xmm7, [rnds*16 + ctx]
michael@0 590 aesenclast xmm0, xmm7
michael@0 591
michael@0 592 movdqu [output], xmm0
michael@0 593
michael@0 594 lea input, [1*16 + input]
michael@0 595 lea output, [1*16 + output]
michael@0 596 sub inputLen, 1*16
michael@0 597 jmp loop1
michael@0 598
michael@0 599 bail:
michael@0 600 movdqu [-32+ctx], xmm0
michael@0 601
michael@0 602 xor eax, eax
michael@0 603 pop inputLen
michael@0 604 ret
michael@0 605
michael@0 606 ENDM
michael@0 607
michael@0 608 gen_aes_cbc_dec_func MACRO rnds
michael@0 609
michael@0 610 LOCAL loop7
michael@0 611 LOCAL loop1
michael@0 612 LOCAL dec1
michael@0 613 LOCAL bail
michael@0 614
michael@0 615 push inputLen
michael@0 616
michael@0 617 mov ctx, [esp + 2*4 + 0*4]
michael@0 618 mov output, [esp + 2*4 + 1*4]
michael@0 619 mov input, [esp + 2*4 + 4*4]
michael@0 620 mov inputLen, [esp + 2*4 + 5*4]
michael@0 621
michael@0 622 lea ctx, [44+ctx]
michael@0 623
michael@0 624 loop7:
michael@0 625 cmp inputLen, 7*16
michael@0 626 jb dec1
michael@0 627
michael@0 628 movdqu xmm0, [0*16 + input]
michael@0 629 movdqu xmm1, [1*16 + input]
michael@0 630 movdqu xmm2, [2*16 + input]
michael@0 631 movdqu xmm3, [3*16 + input]
michael@0 632 movdqu xmm4, [4*16 + input]
michael@0 633 movdqu xmm5, [5*16 + input]
michael@0 634 movdqu xmm6, [6*16 + input]
michael@0 635
michael@0 636 movdqu xmm7, [0*16 + ctx]
michael@0 637 pxor xmm0, xmm7
michael@0 638 pxor xmm1, xmm7
michael@0 639 pxor xmm2, xmm7
michael@0 640 pxor xmm3, xmm7
michael@0 641 pxor xmm4, xmm7
michael@0 642 pxor xmm5, xmm7
michael@0 643 pxor xmm6, xmm7
michael@0 644
michael@0 645 i = 1
michael@0 646 WHILE i LT rnds
michael@0 647 aes_dec_rnd i
michael@0 648 i = i+1
michael@0 649 ENDM
michael@0 650 aes_dec_last_rnd rnds
michael@0 651
michael@0 652 movdqu xmm7, [-32 + ctx]
michael@0 653 pxor xmm0, xmm7
michael@0 654 movdqu xmm7, [0*16 + input]
michael@0 655 pxor xmm1, xmm7
michael@0 656 movdqu xmm7, [1*16 + input]
michael@0 657 pxor xmm2, xmm7
michael@0 658 movdqu xmm7, [2*16 + input]
michael@0 659 pxor xmm3, xmm7
michael@0 660 movdqu xmm7, [3*16 + input]
michael@0 661 pxor xmm4, xmm7
michael@0 662 movdqu xmm7, [4*16 + input]
michael@0 663 pxor xmm5, xmm7
michael@0 664 movdqu xmm7, [5*16 + input]
michael@0 665 pxor xmm6, xmm7
michael@0 666 movdqu xmm7, [6*16 + input]
michael@0 667
michael@0 668 movdqu [0*16 + output], xmm0
michael@0 669 movdqu [1*16 + output], xmm1
michael@0 670 movdqu [2*16 + output], xmm2
michael@0 671 movdqu [3*16 + output], xmm3
michael@0 672 movdqu [4*16 + output], xmm4
michael@0 673 movdqu [5*16 + output], xmm5
michael@0 674 movdqu [6*16 + output], xmm6
michael@0 675 movdqu [-32 + ctx], xmm7
michael@0 676
michael@0 677 lea input, [7*16 + input]
michael@0 678 lea output, [7*16 + output]
michael@0 679 sub inputLen, 7*16
michael@0 680 jmp loop7
michael@0 681 dec1:
michael@0 682
michael@0 683 movdqu xmm3, [-32 + ctx]
michael@0 684
michael@0 685 loop1:
michael@0 686 cmp inputLen, 1*16
michael@0 687 jb bail
michael@0 688
michael@0 689 movdqu xmm0, [input]
michael@0 690 movdqa xmm4, xmm0
michael@0 691 movdqu xmm7, [0*16 + ctx]
michael@0 692 pxor xmm0, xmm7
michael@0 693
michael@0 694 i = 1
michael@0 695 WHILE i LT rnds
michael@0 696 movdqu xmm7, [i*16 + ctx]
michael@0 697 aesdec xmm0, xmm7
michael@0 698 i = i+1
michael@0 699 ENDM
michael@0 700 movdqu xmm7, [rnds*16 + ctx]
michael@0 701 aesdeclast xmm0, xmm7
michael@0 702 pxor xmm3, xmm0
michael@0 703
michael@0 704 movdqu [output], xmm3
michael@0 705 movdqa xmm3, xmm4
michael@0 706
michael@0 707 lea input, [1*16 + input]
michael@0 708 lea output, [1*16 + output]
michael@0 709 sub inputLen, 1*16
michael@0 710 jmp loop1
michael@0 711
michael@0 712 bail:
michael@0 713 movdqu [-32 + ctx], xmm3
michael@0 714 xor eax, eax
michael@0 715 pop inputLen
michael@0 716 ret
michael@0 717 ENDM
michael@0 718
michael@0 719 ALIGN 16
michael@0 720 intel_aes_encrypt_cbc_128 PROC
michael@0 721 gen_aes_cbc_enc_func 10
michael@0 722 intel_aes_encrypt_cbc_128 ENDP
michael@0 723
michael@0 724 ALIGN 16
michael@0 725 intel_aes_encrypt_cbc_192 PROC
michael@0 726 gen_aes_cbc_enc_func 12
michael@0 727 intel_aes_encrypt_cbc_192 ENDP
michael@0 728
michael@0 729 ALIGN 16
michael@0 730 intel_aes_encrypt_cbc_256 PROC
michael@0 731 gen_aes_cbc_enc_func 14
michael@0 732 intel_aes_encrypt_cbc_256 ENDP
michael@0 733
michael@0 734 ALIGN 16
michael@0 735 intel_aes_decrypt_cbc_128 PROC
michael@0 736 gen_aes_cbc_dec_func 10
michael@0 737 intel_aes_decrypt_cbc_128 ENDP
michael@0 738
michael@0 739 ALIGN 16
michael@0 740 intel_aes_decrypt_cbc_192 PROC
michael@0 741 gen_aes_cbc_dec_func 12
michael@0 742 intel_aes_decrypt_cbc_192 ENDP
michael@0 743
michael@0 744 ALIGN 16
michael@0 745 intel_aes_decrypt_cbc_256 PROC
michael@0 746 gen_aes_cbc_dec_func 14
michael@0 747 intel_aes_decrypt_cbc_256 ENDP
michael@0 748
michael@0 749
michael@0 750
michael@0 751 ctrCtx textequ <esi>
michael@0 752 CTR textequ <ebx>
michael@0 753
michael@0 754 gen_aes_ctr_func MACRO rnds
michael@0 755
michael@0 756 LOCAL loop7
michael@0 757 LOCAL loop1
michael@0 758 LOCAL enc1
michael@0 759 LOCAL bail
michael@0 760
michael@0 761 push inputLen
michael@0 762 push ctrCtx
michael@0 763 push CTR
michael@0 764 push ebp
michael@0 765
michael@0 766 mov ctrCtx, [esp + 4*5 + 0*4]
michael@0 767 mov output, [esp + 4*5 + 1*4]
michael@0 768 mov input, [esp + 4*5 + 4*4]
michael@0 769 mov inputLen, [esp + 4*5 + 5*4]
michael@0 770
michael@0 771 mov ctx, [4+ctrCtx]
michael@0 772 lea ctx, [44+ctx]
michael@0 773
michael@0 774 mov ebp, esp
michael@0 775 sub esp, 7*16
michael@0 776 and esp, -16
michael@0 777
michael@0 778 movdqu xmm0, [8+ctrCtx]
michael@0 779 mov ctrCtx, [ctrCtx + 8 + 3*4]
michael@0 780 bswap ctrCtx
michael@0 781 movdqu xmm1, [ctx + 0*16]
michael@0 782
michael@0 783 pxor xmm0, xmm1
michael@0 784
michael@0 785 movdqa [esp + 0*16], xmm0
michael@0 786 movdqa [esp + 1*16], xmm0
michael@0 787 movdqa [esp + 2*16], xmm0
michael@0 788 movdqa [esp + 3*16], xmm0
michael@0 789 movdqa [esp + 4*16], xmm0
michael@0 790 movdqa [esp + 5*16], xmm0
michael@0 791 movdqa [esp + 6*16], xmm0
michael@0 792
michael@0 793 inc ctrCtx
michael@0 794 mov CTR, ctrCtx
michael@0 795 bswap CTR
michael@0 796 xor CTR, [ctx + 3*4]
michael@0 797 mov [esp + 1*16 + 3*4], CTR
michael@0 798
michael@0 799 inc ctrCtx
michael@0 800 mov CTR, ctrCtx
michael@0 801 bswap CTR
michael@0 802 xor CTR, [ctx + 3*4]
michael@0 803 mov [esp + 2*16 + 3*4], CTR
michael@0 804
michael@0 805 inc ctrCtx
michael@0 806 mov CTR, ctrCtx
michael@0 807 bswap CTR
michael@0 808 xor CTR, [ctx + 3*4]
michael@0 809 mov [esp + 3*16 + 3*4], CTR
michael@0 810
michael@0 811 inc ctrCtx
michael@0 812 mov CTR, ctrCtx
michael@0 813 bswap CTR
michael@0 814 xor CTR, [ctx + 3*4]
michael@0 815 mov [esp + 4*16 + 3*4], CTR
michael@0 816
michael@0 817 inc ctrCtx
michael@0 818 mov CTR, ctrCtx
michael@0 819 bswap CTR
michael@0 820 xor CTR, [ctx + 3*4]
michael@0 821 mov [esp + 5*16 + 3*4], CTR
michael@0 822
michael@0 823 inc ctrCtx
michael@0 824 mov CTR, ctrCtx
michael@0 825 bswap CTR
michael@0 826 xor CTR, [ctx + 3*4]
michael@0 827 mov [esp + 6*16 + 3*4], CTR
michael@0 828
michael@0 829
michael@0 830 loop7:
michael@0 831 cmp inputLen, 7*16
michael@0 832 jb loop1
michael@0 833
michael@0 834 movdqu xmm0, [0*16 + esp]
michael@0 835 movdqu xmm1, [1*16 + esp]
michael@0 836 movdqu xmm2, [2*16 + esp]
michael@0 837 movdqu xmm3, [3*16 + esp]
michael@0 838 movdqu xmm4, [4*16 + esp]
michael@0 839 movdqu xmm5, [5*16 + esp]
michael@0 840 movdqu xmm6, [6*16 + esp]
michael@0 841
michael@0 842 i = 1
michael@0 843 WHILE i LE 7
michael@0 844 aes_rnd i
michael@0 845
michael@0 846 inc ctrCtx
michael@0 847 mov CTR, ctrCtx
michael@0 848 bswap CTR
michael@0 849 xor CTR, [ctx + 3*4]
michael@0 850 mov [esp + (i-1)*16 + 3*4], CTR
michael@0 851
michael@0 852 i = i+1
michael@0 853 ENDM
michael@0 854 WHILE i LT rnds
michael@0 855 aes_rnd i
michael@0 856 i = i+1
michael@0 857 ENDM
michael@0 858 aes_last_rnd rnds
michael@0 859
michael@0 860 movdqu xmm7, [0*16 + input]
michael@0 861 pxor xmm0, xmm7
michael@0 862 movdqu xmm7, [1*16 + input]
michael@0 863 pxor xmm1, xmm7
michael@0 864 movdqu xmm7, [2*16 + input]
michael@0 865 pxor xmm2, xmm7
michael@0 866 movdqu xmm7, [3*16 + input]
michael@0 867 pxor xmm3, xmm7
michael@0 868 movdqu xmm7, [4*16 + input]
michael@0 869 pxor xmm4, xmm7
michael@0 870 movdqu xmm7, [5*16 + input]
michael@0 871 pxor xmm5, xmm7
michael@0 872 movdqu xmm7, [6*16 + input]
michael@0 873 pxor xmm6, xmm7
michael@0 874
michael@0 875 movdqu [0*16 + output], xmm0
michael@0 876 movdqu [1*16 + output], xmm1
michael@0 877 movdqu [2*16 + output], xmm2
michael@0 878 movdqu [3*16 + output], xmm3
michael@0 879 movdqu [4*16 + output], xmm4
michael@0 880 movdqu [5*16 + output], xmm5
michael@0 881 movdqu [6*16 + output], xmm6
michael@0 882
michael@0 883 lea input, [7*16 + input]
michael@0 884 lea output, [7*16 + output]
michael@0 885 sub inputLen, 7*16
michael@0 886 jmp loop7
michael@0 887
michael@0 888
michael@0 889 loop1:
michael@0 890 cmp inputLen, 1*16
michael@0 891 jb bail
michael@0 892
michael@0 893 movdqu xmm0, [esp]
michael@0 894 add esp, 16
michael@0 895
michael@0 896 i = 1
michael@0 897 WHILE i LT rnds
michael@0 898 movdqu xmm7, [i*16 + ctx]
michael@0 899 aesenc xmm0, xmm7
michael@0 900 i = i+1
michael@0 901 ENDM
michael@0 902 movdqu xmm7, [rnds*16 + ctx]
michael@0 903 aesenclast xmm0, xmm7
michael@0 904
michael@0 905 movdqu xmm7, [input]
michael@0 906 pxor xmm0, xmm7
michael@0 907 movdqu [output], xmm0
michael@0 908
michael@0 909 lea input, [1*16 + input]
michael@0 910 lea output, [1*16 + output]
michael@0 911 sub inputLen, 1*16
michael@0 912 jmp loop1
michael@0 913
michael@0 914 bail:
michael@0 915
michael@0 916 mov ctrCtx, [ebp + 4*5 + 0*4]
michael@0 917 movdqu xmm0, [esp]
michael@0 918 movdqu xmm1, [ctx + 0*16]
michael@0 919 pxor xmm0, xmm1
michael@0 920 movdqu [8+ctrCtx], xmm0
michael@0 921
michael@0 922
michael@0 923 xor eax, eax
michael@0 924 mov esp, ebp
michael@0 925 pop ebp
michael@0 926 pop CTR
michael@0 927 pop ctrCtx
michael@0 928 pop inputLen
michael@0 929 ret
michael@0 930 ENDM
michael@0 931
michael@0 932
michael@0 933 ALIGN 16
michael@0 934 intel_aes_encrypt_ctr_128 PROC
michael@0 935 gen_aes_ctr_func 10
michael@0 936 intel_aes_encrypt_ctr_128 ENDP
michael@0 937
michael@0 938 ALIGN 16
michael@0 939 intel_aes_encrypt_ctr_192 PROC
michael@0 940 gen_aes_ctr_func 12
michael@0 941 intel_aes_encrypt_ctr_192 ENDP
michael@0 942
michael@0 943 ALIGN 16
michael@0 944 intel_aes_encrypt_ctr_256 PROC
michael@0 945 gen_aes_ctr_func 14
michael@0 946 intel_aes_encrypt_ctr_256 ENDP
michael@0 947
michael@0 948
michael@0 949 END

mercurial