security/nss/lib/freebl/intel-aes-x86-masm.asm

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 ; LICENSE:
     2 ; This submission to NSS is to be made available under the terms of the
     3 ; Mozilla Public License, v. 2.0. You can obtain one at http:
     4 ; //mozilla.org/MPL/2.0/.
     5 ;###############################################################################
     6 ; Copyright(c) 2014, Intel Corp.
     7 ; Developers and authors:
     8 ; Shay Gueron and Vlad Krasnov
     9 ; Intel Corporation, Israel Development Centre, Haifa, Israel
    10 ; Please send feedback directly to crypto.feedback.alias@intel.com
    13 .MODEL FLAT, C
    14 .XMM
    16 .DATA
    17 ALIGN 16
    18 Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh
    19 Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h
    20 Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh
    21 Lcon1 dd 1,1,1,1
    22 Lcon2 dd 1bh,1bh,1bh,1bh
    24 .CODE
    26 ctx     textequ <ecx>
    27 output  textequ <edx>
    28 input   textequ <eax>
    29 inputLen textequ <edi>
    32 aes_rnd MACRO i
    33     movdqu  xmm7, [i*16 + ctx]
    34     aesenc  xmm0, xmm7
    35     aesenc  xmm1, xmm7
    36     aesenc  xmm2, xmm7
    37     aesenc  xmm3, xmm7
    38     aesenc  xmm4, xmm7
    39     aesenc  xmm5, xmm7
    40     aesenc  xmm6, xmm7
    41     ENDM
    43 aes_last_rnd MACRO i
    44     movdqu  xmm7, [i*16 + ctx]
    45     aesenclast  xmm0, xmm7
    46     aesenclast  xmm1, xmm7
    47     aesenclast  xmm2, xmm7
    48     aesenclast  xmm3, xmm7
    49     aesenclast  xmm4, xmm7
    50     aesenclast  xmm5, xmm7
    51     aesenclast  xmm6, xmm7
    52     ENDM
    54 aes_dec_rnd MACRO i
    55     movdqu  xmm7, [i*16 + ctx]
    56     aesdec  xmm0, xmm7
    57     aesdec  xmm1, xmm7
    58     aesdec  xmm2, xmm7
    59     aesdec  xmm3, xmm7
    60     aesdec  xmm4, xmm7
    61     aesdec  xmm5, xmm7
    62     aesdec  xmm6, xmm7
    63     ENDM
    65 aes_dec_last_rnd MACRO i
    66     movdqu  xmm7, [i*16 + ctx]
    67     aesdeclast  xmm0, xmm7
    68     aesdeclast  xmm1, xmm7
    69     aesdeclast  xmm2, xmm7
    70     aesdeclast  xmm3, xmm7
    71     aesdeclast  xmm4, xmm7
    72     aesdeclast  xmm5, xmm7
    73     aesdeclast  xmm6, xmm7
    74     ENDM
    77 gen_aes_ecb_func MACRO enc, rnds
    79 LOCAL   loop7
    80 LOCAL   loop1
    81 LOCAL   bail
    83         push    inputLen
    85         mov     ctx,    [esp + 2*4 + 0*4]
    86         mov     output,     [esp + 2*4 + 1*4]
    87         mov     input,      [esp + 2*4 + 4*4]
    88         mov     inputLen,   [esp + 2*4 + 5*4]
    90         lea     ctx, [44+ctx]
    92 loop7:
    93         cmp     inputLen, 7*16
    94         jb      loop1
    96         movdqu  xmm0, [0*16 + input]
    97         movdqu  xmm1, [1*16 + input]
    98         movdqu  xmm2, [2*16 + input]
    99         movdqu  xmm3, [3*16 + input]
   100         movdqu  xmm4, [4*16 + input]
   101         movdqu  xmm5, [5*16 + input]
   102         movdqu  xmm6, [6*16 + input]
   104         movdqu  xmm7, [0*16 + ctx]
   105         pxor    xmm0, xmm7
   106         pxor    xmm1, xmm7
   107         pxor    xmm2, xmm7
   108         pxor    xmm3, xmm7
   109         pxor    xmm4, xmm7
   110         pxor    xmm5, xmm7
   111         pxor    xmm6, xmm7
   113 IF enc eq 1
   114         rnd textequ <aes_rnd>
   115         lastrnd textequ <aes_last_rnd>
   116         aesinst textequ <aesenc>
   117         aeslastinst textequ <aesenclast>
   118 ELSE
   119         rnd textequ <aes_dec_rnd>
   120         lastrnd textequ <aes_dec_last_rnd>
   121         aesinst textequ <aesdec>
   122         aeslastinst textequ <aesdeclast>
   123 ENDIF
   125         i = 1
   126         WHILE i LT rnds
   127             rnd i
   128             i = i+1
   129             ENDM
   130         lastrnd rnds
   132         movdqu  [0*16 + output], xmm0
   133         movdqu  [1*16 + output], xmm1
   134         movdqu  [2*16 + output], xmm2
   135         movdqu  [3*16 + output], xmm3
   136         movdqu  [4*16 + output], xmm4
   137         movdqu  [5*16 + output], xmm5
   138         movdqu  [6*16 + output], xmm6
   140         lea input, [7*16 + input]
   141         lea output, [7*16 + output]
   142         sub inputLen, 7*16
   143         jmp loop7
   145 loop1:
   146         cmp     inputLen, 1*16
   147         jb      bail
   149         movdqu  xmm0, [input]
   150         movdqu  xmm7, [0*16 + ctx]
   151         pxor    xmm0, xmm7
   153         i = 1
   154     WHILE i LT rnds
   155             movdqu  xmm7, [i*16 + ctx]
   156             aesinst  xmm0, xmm7
   157             i = i+1
   158         ENDM
   159         movdqu  xmm7, [rnds*16 + ctx]
   160         aeslastinst xmm0, xmm7
   162         movdqu  [output], xmm0
   164         lea input, [1*16 + input]
   165         lea output, [1*16 + output]
   166         sub inputLen, 1*16
   167         jmp loop1
   169 bail:
   170         xor eax, eax
   171         pop     inputLen
   172         ret
   174 ENDM
   176 ALIGN 16
   177 intel_aes_encrypt_ecb_128 PROC
   178 gen_aes_ecb_func 1, 10
   179 intel_aes_encrypt_ecb_128 ENDP
   181 ALIGN 16
   182 intel_aes_encrypt_ecb_192 PROC
   183 gen_aes_ecb_func 1, 12
   184 intel_aes_encrypt_ecb_192 ENDP
   186 ALIGN 16
   187 intel_aes_encrypt_ecb_256 PROC
   188 gen_aes_ecb_func 1, 14
   189 intel_aes_encrypt_ecb_256 ENDP
   191 ALIGN 16
   192 intel_aes_decrypt_ecb_128 PROC
   193 gen_aes_ecb_func 0, 10
   194 intel_aes_decrypt_ecb_128 ENDP
   196 ALIGN 16
   197 intel_aes_decrypt_ecb_192 PROC
   198 gen_aes_ecb_func 0, 12
   199 intel_aes_decrypt_ecb_192 ENDP
   201 ALIGN 16
   202 intel_aes_decrypt_ecb_256 PROC
   203 gen_aes_ecb_func 0, 14
   204 intel_aes_decrypt_ecb_256 ENDP
   207 KEY textequ <ecx>
   208 KS  textequ <edx>
   209 ITR textequ <eax>
   211 ALIGN 16
   212 intel_aes_encrypt_init_128  PROC
   214     mov     KEY,        [esp + 1*4 + 0*4]
   215     mov     KS,         [esp + 1*4 + 1*4]
   218     movdqu  xmm1, [KEY]
   219     movdqu  [KS], xmm1
   220     movdqa  xmm2, xmm1
   222     lea ITR, Lcon1
   223     movdqa  xmm0, [ITR]
   224     lea ITR, Lmask
   225     movdqa  xmm4, [ITR]
   227     mov ITR, 8
   229 Lenc_128_ks_loop:
   230         lea KS, [16 + KS]
   231         dec ITR
   233         pshufb  xmm2, xmm4
   234         aesenclast  xmm2, xmm0
   235         pslld   xmm0, 1
   236         movdqa  xmm3, xmm1
   237         pslldq  xmm3, 4
   238         pxor    xmm1, xmm3
   239         pslldq  xmm3, 4
   240         pxor    xmm1, xmm3
   241         pslldq  xmm3, 4
   242         pxor    xmm1, xmm3
   243         pxor    xmm1, xmm2
   244         movdqu  [KS], xmm1
   245         movdqa  xmm2, xmm1
   247         jne Lenc_128_ks_loop
   249     lea ITR, Lcon2
   250     movdqa  xmm0, [ITR]
   252     pshufb  xmm2, xmm4
   253     aesenclast  xmm2, xmm0
   254     pslld   xmm0, 1
   255     movdqa  xmm3, xmm1
   256     pslldq  xmm3, 4
   257     pxor    xmm1, xmm3
   258     pslldq  xmm3, 4
   259     pxor    xmm1, xmm3
   260     pslldq  xmm3, 4
   261     pxor    xmm1, xmm3
   262     pxor    xmm1, xmm2
   263     movdqu  [16 + KS], xmm1
   264     movdqa  xmm2, xmm1
   266     pshufb  xmm2, xmm4
   267     aesenclast  xmm2, xmm0
   268     movdqa  xmm3, xmm1
   269     pslldq  xmm3, 4
   270     pxor    xmm1, xmm3
   271     pslldq  xmm3, 4
   272     pxor    xmm1, xmm3
   273     pslldq  xmm3, 4
   274     pxor    xmm1, xmm3
   275     pxor    xmm1, xmm2
   276     movdqu  [32 + KS], xmm1
   277     movdqa  xmm2, xmm1
   279     ret
   280 intel_aes_encrypt_init_128  ENDP
   283 ALIGN 16
   284 intel_aes_decrypt_init_128  PROC
   286     mov     KEY,        [esp + 1*4 + 0*4]
   287     mov     KS,         [esp + 1*4 + 1*4]
   289     push    KS
   290     push    KEY
   292     call    intel_aes_encrypt_init_128
   294     pop     KEY
   295     pop     KS
   297     movdqu  xmm0, [0*16 + KS]
   298     movdqu  xmm1, [10*16 + KS]
   299     movdqu  [10*16 + KS], xmm0
   300     movdqu  [0*16 + KS], xmm1
   302     i = 1
   303     WHILE i LT 5
   304         movdqu  xmm0, [i*16 + KS]
   305         movdqu  xmm1, [(10-i)*16 + KS]
   307         aesimc  xmm0, xmm0
   308         aesimc  xmm1, xmm1
   310         movdqu  [(10-i)*16 + KS], xmm0
   311         movdqu  [i*16 + KS], xmm1
   313         i = i+1
   314     ENDM
   316     movdqu  xmm0, [5*16 + KS]
   317     aesimc  xmm0, xmm0
   318     movdqu  [5*16 + KS], xmm0
   319     ret
   320 intel_aes_decrypt_init_128  ENDP
   323 ALIGN 16
   324 intel_aes_encrypt_init_192  PROC
   326     mov     KEY, [esp + 1*4 + 0*4]
   327     mov     KS,  [esp + 1*4 + 1*4]
   329     pxor    xmm3, xmm3
   330     movdqu  xmm1, [KEY]
   331     pinsrd  xmm3, DWORD PTR [16 + KEY], 0
   332     pinsrd  xmm3, DWORD PTR [20 + KEY], 1
   334     movdqu  [KS], xmm1
   335     movdqa  xmm5, xmm3
   337     lea ITR, Lcon1
   338     movdqu  xmm0, [ITR]
   339     lea ITR, Lmask192
   340     movdqu  xmm4, [ITR]
   342     mov ITR, 4
   344 Lenc_192_ks_loop:
   345         movdqa  xmm2, xmm3
   346         pshufb  xmm2, xmm4
   347         aesenclast xmm2, xmm0
   348         pslld   xmm0, 1
   350         movdqa  xmm6, xmm1
   351         movdqa  xmm7, xmm3
   352         pslldq  xmm6, 4
   353         pslldq  xmm7, 4
   354         pxor    xmm1, xmm6
   355         pxor    xmm3, xmm7
   356         pslldq  xmm6, 4
   357         pxor    xmm1, xmm6
   358         pslldq  xmm6, 4
   359         pxor    xmm1, xmm6
   360         pxor    xmm1, xmm2
   361         pshufd  xmm2, xmm1, 0ffh
   362         pxor    xmm3, xmm2
   364         movdqa  xmm6, xmm1
   365         shufpd  xmm5, xmm1, 00h
   366         shufpd  xmm6, xmm3, 01h
   368         movdqu  [16 + KS], xmm5
   369         movdqu  [32 + KS], xmm6
   371         movdqa  xmm2, xmm3
   372         pshufb  xmm2, xmm4
   373         aesenclast  xmm2, xmm0
   374         pslld   xmm0, 1
   376         movdqa  xmm6, xmm1
   377         movdqa  xmm7, xmm3
   378         pslldq  xmm6, 4
   379         pslldq  xmm7, 4
   380         pxor    xmm1, xmm6
   381         pxor    xmm3, xmm7
   382         pslldq  xmm6, 4
   383         pxor    xmm1, xmm6
   384         pslldq  xmm6, 4
   385         pxor    xmm1, xmm6
   386         pxor    xmm1, xmm2
   387         pshufd  xmm2, xmm1, 0ffh
   388         pxor    xmm3, xmm2
   390         movdqu  [48 + KS], xmm1
   391         movdqa  xmm5, xmm3
   393         lea KS, [48 + KS]
   395         dec ITR
   396         jnz Lenc_192_ks_loop
   398     movdqu  [16 + KS], xmm5
   399 ret
   400 intel_aes_encrypt_init_192  ENDP
   402 ALIGN 16
   403 intel_aes_decrypt_init_192  PROC
   404     mov     KEY,        [esp + 1*4 + 0*4]
   405     mov     KS,         [esp + 1*4 + 1*4]
   407     push    KS
   408     push    KEY
   410     call    intel_aes_encrypt_init_192
   412     pop     KEY
   413     pop     KS
   415     movdqu  xmm0, [0*16 + KS]
   416     movdqu  xmm1, [12*16 + KS]
   417     movdqu  [12*16 + KS], xmm0
   418     movdqu  [0*16 + KS], xmm1
   420     i = 1
   421     WHILE i LT 6
   422         movdqu  xmm0, [i*16 + KS]
   423         movdqu  xmm1, [(12-i)*16 + KS]
   425         aesimc  xmm0, xmm0
   426         aesimc  xmm1, xmm1
   428         movdqu  [(12-i)*16 + KS], xmm0
   429         movdqu  [i*16 + KS], xmm1
   431         i = i+1
   432     ENDM
   434     movdqu  xmm0, [6*16 + KS]
   435     aesimc  xmm0, xmm0
   436     movdqu  [6*16 + KS], xmm0
   437     ret
   438 intel_aes_decrypt_init_192  ENDP
   440 ALIGN 16
   441 intel_aes_encrypt_init_256  PROC
   443     mov     KEY,    [esp + 1*4 + 0*4]
   444     mov     KS,     [esp + 1*4 + 1*4]
   445     movdqu  xmm1, [16*0 + KEY]
   446     movdqu  xmm3, [16*1 + KEY]
   448     movdqu  [16*0 + KS], xmm1
   449     movdqu  [16*1 + KS], xmm3
   451     lea ITR, Lcon1
   452     movdqu  xmm0, [ITR]
   453     lea ITR, Lmask256
   454     movdqu  xmm5, [ITR]
   456     pxor    xmm6, xmm6
   458     mov ITR, 6
   460 Lenc_256_ks_loop:
   462         movdqa  xmm2, xmm3
   463         pshufb  xmm2, xmm5
   464         aesenclast  xmm2, xmm0
   465         pslld   xmm0, 1
   466         movdqa  xmm4, xmm1
   467         pslldq  xmm4, 4
   468         pxor    xmm1, xmm4
   469         pslldq  xmm4, 4
   470         pxor    xmm1, xmm4
   471         pslldq  xmm4, 4
   472         pxor    xmm1, xmm4
   473         pxor    xmm1, xmm2
   474         movdqu  [16*2 + KS], xmm1
   476         pshufd  xmm2, xmm1, 0ffh
   477         aesenclast  xmm2, xmm6
   478         movdqa  xmm4, xmm3
   479         pslldq  xmm4, 4
   480         pxor    xmm3, xmm4
   481         pslldq  xmm4, 4
   482         pxor    xmm3, xmm4
   483         pslldq  xmm4, 4
   484         pxor    xmm3, xmm4
   485         pxor    xmm3, xmm2
   486         movdqu  [16*3 + KS], xmm3
   488         lea KS, [32 + KS]
   489         dec ITR
   490         jnz Lenc_256_ks_loop
   492     movdqa  xmm2, xmm3
   493     pshufb  xmm2, xmm5
   494     aesenclast  xmm2, xmm0
   495     movdqa  xmm4, xmm1
   496     pslldq  xmm4, 4
   497     pxor    xmm1, xmm4
   498     pslldq  xmm4, 4
   499     pxor    xmm1, xmm4
   500     pslldq  xmm4, 4
   501     pxor    xmm1, xmm4
   502     pxor    xmm1, xmm2
   503     movdqu  [16*2 + KS], xmm1
   505     ret
   506 intel_aes_encrypt_init_256  ENDP
   508 ALIGN 16
   509 intel_aes_decrypt_init_256  PROC
   510     mov     KEY,        [esp + 1*4 + 0*4]
   511     mov     KS,         [esp + 1*4 + 1*4]
   513     push    KS
   514     push    KEY
   516     call    intel_aes_encrypt_init_256
   518     pop     KEY
   519     pop     KS
   521     movdqu  xmm0, [0*16 + KS]
   522     movdqu  xmm1, [14*16 + KS]
   523     movdqu  [14*16 + KS], xmm0
   524     movdqu  [0*16 + KS], xmm1
   526     i = 1
   527     WHILE i LT 7
   528         movdqu  xmm0, [i*16 + KS]
   529         movdqu  xmm1, [(14-i)*16 + KS]
   531         aesimc  xmm0, xmm0
   532         aesimc  xmm1, xmm1
   534         movdqu  [(14-i)*16 + KS], xmm0
   535         movdqu  [i*16 + KS], xmm1
   537         i = i+1
   538     ENDM
   540     movdqu  xmm0, [7*16 + KS]
   541     aesimc  xmm0, xmm0
   542     movdqu  [7*16 + KS], xmm0
   543     ret
   544 intel_aes_decrypt_init_256  ENDP
   548 gen_aes_cbc_enc_func MACRO rnds
   550 LOCAL   loop1
   551 LOCAL   bail
   553         push    inputLen
   555         mov     ctx,    [esp + 2*4 + 0*4]
   556         mov     output,     [esp + 2*4 + 1*4]
   557         mov     input,      [esp + 2*4 + 4*4]
   558         mov     inputLen,   [esp + 2*4 + 5*4]
   560         lea     ctx, [44+ctx]
   562         movdqu  xmm0, [-32+ctx]
   564         movdqu  xmm2, [0*16 + ctx]
   565         movdqu  xmm3, [1*16 + ctx]
   566         movdqu  xmm4, [2*16 + ctx]
   567         movdqu  xmm5, [3*16 + ctx]
   568         movdqu  xmm6, [4*16 + ctx]
   570 loop1:
   571         cmp     inputLen, 1*16
   572         jb      bail
   574         movdqu  xmm1, [input]
   575         pxor    xmm1, xmm2
   576         pxor    xmm0, xmm1
   578         aesenc  xmm0, xmm3
   579         aesenc  xmm0, xmm4
   580         aesenc  xmm0, xmm5
   581         aesenc  xmm0, xmm6
   583         i = 5
   584     WHILE i LT rnds
   585             movdqu  xmm7, [i*16 + ctx]
   586             aesenc  xmm0, xmm7
   587             i = i+1
   588         ENDM
   589         movdqu  xmm7, [rnds*16 + ctx]
   590         aesenclast xmm0, xmm7
   592         movdqu  [output], xmm0
   594         lea input, [1*16 + input]
   595         lea output, [1*16 + output]
   596         sub inputLen, 1*16
   597         jmp loop1
   599 bail:
   600         movdqu  [-32+ctx], xmm0
   602         xor eax, eax
   603         pop inputLen
   604         ret
   606 ENDM
   608 gen_aes_cbc_dec_func MACRO rnds
   610 LOCAL   loop7
   611 LOCAL   loop1
   612 LOCAL   dec1
   613 LOCAL   bail
   615         push    inputLen
   617         mov     ctx,    [esp + 2*4 + 0*4]
   618         mov     output,     [esp + 2*4 + 1*4]
   619         mov     input,      [esp + 2*4 + 4*4]
   620         mov     inputLen,   [esp + 2*4 + 5*4]
   622         lea     ctx, [44+ctx]
   624 loop7:
   625         cmp     inputLen, 7*16
   626         jb      dec1
   628         movdqu  xmm0, [0*16 + input]
   629         movdqu  xmm1, [1*16 + input]
   630         movdqu  xmm2, [2*16 + input]
   631         movdqu  xmm3, [3*16 + input]
   632         movdqu  xmm4, [4*16 + input]
   633         movdqu  xmm5, [5*16 + input]
   634         movdqu  xmm6, [6*16 + input]
   636         movdqu  xmm7, [0*16 + ctx]
   637         pxor    xmm0, xmm7
   638         pxor    xmm1, xmm7
   639         pxor    xmm2, xmm7
   640         pxor    xmm3, xmm7
   641         pxor    xmm4, xmm7
   642         pxor    xmm5, xmm7
   643         pxor    xmm6, xmm7
   645         i = 1
   646         WHILE i LT rnds
   647             aes_dec_rnd i
   648             i = i+1
   649             ENDM
   650         aes_dec_last_rnd rnds
   652         movdqu  xmm7, [-32 + ctx]
   653         pxor    xmm0, xmm7
   654         movdqu  xmm7, [0*16 + input]
   655         pxor    xmm1, xmm7
   656         movdqu  xmm7, [1*16 + input]
   657         pxor    xmm2, xmm7
   658         movdqu  xmm7, [2*16 + input]
   659         pxor    xmm3, xmm7
   660         movdqu  xmm7, [3*16 + input]
   661         pxor    xmm4, xmm7
   662         movdqu  xmm7, [4*16 + input]
   663         pxor    xmm5, xmm7
   664         movdqu  xmm7, [5*16 + input]
   665         pxor    xmm6, xmm7
   666         movdqu  xmm7, [6*16 + input]
   668         movdqu  [0*16 + output], xmm0
   669         movdqu  [1*16 + output], xmm1
   670         movdqu  [2*16 + output], xmm2
   671         movdqu  [3*16 + output], xmm3
   672         movdqu  [4*16 + output], xmm4
   673         movdqu  [5*16 + output], xmm5
   674         movdqu  [6*16 + output], xmm6
   675         movdqu  [-32 + ctx], xmm7
   677         lea input, [7*16 + input]
   678         lea output, [7*16 + output]
   679         sub inputLen, 7*16
   680         jmp loop7
   681 dec1:
   683         movdqu  xmm3, [-32 + ctx]
   685 loop1:
   686         cmp     inputLen, 1*16
   687         jb      bail
   689         movdqu  xmm0, [input]
   690         movdqa  xmm4, xmm0
   691         movdqu  xmm7, [0*16 + ctx]
   692         pxor    xmm0, xmm7
   694         i = 1
   695     WHILE i LT rnds
   696             movdqu  xmm7, [i*16 + ctx]
   697             aesdec  xmm0, xmm7
   698             i = i+1
   699         ENDM
   700         movdqu  xmm7, [rnds*16 + ctx]
   701         aesdeclast xmm0, xmm7
   702         pxor    xmm3, xmm0
   704         movdqu  [output], xmm3
   705         movdqa  xmm3, xmm4
   707         lea input, [1*16 + input]
   708         lea output, [1*16 + output]
   709         sub inputLen, 1*16
   710         jmp loop1
   712 bail:
   713         movdqu  [-32 + ctx], xmm3
   714         xor eax, eax
   715         pop     inputLen
   716         ret
   717 ENDM
   719 ALIGN 16
   720 intel_aes_encrypt_cbc_128 PROC
   721 gen_aes_cbc_enc_func  10
   722 intel_aes_encrypt_cbc_128 ENDP
   724 ALIGN 16
   725 intel_aes_encrypt_cbc_192 PROC
   726 gen_aes_cbc_enc_func  12
   727 intel_aes_encrypt_cbc_192 ENDP
   729 ALIGN 16
   730 intel_aes_encrypt_cbc_256 PROC
   731 gen_aes_cbc_enc_func  14
   732 intel_aes_encrypt_cbc_256 ENDP
   734 ALIGN 16
   735 intel_aes_decrypt_cbc_128 PROC
   736 gen_aes_cbc_dec_func  10
   737 intel_aes_decrypt_cbc_128 ENDP
   739 ALIGN 16
   740 intel_aes_decrypt_cbc_192 PROC
   741 gen_aes_cbc_dec_func  12
   742 intel_aes_decrypt_cbc_192 ENDP
   744 ALIGN 16
   745 intel_aes_decrypt_cbc_256 PROC
   746 gen_aes_cbc_dec_func  14
   747 intel_aes_decrypt_cbc_256 ENDP
   751 ctrCtx textequ <esi>
   752 CTR textequ <ebx>
   754 gen_aes_ctr_func MACRO rnds
   756 LOCAL   loop7
   757 LOCAL   loop1
   758 LOCAL   enc1
   759 LOCAL   bail
   761         push    inputLen
   762         push    ctrCtx
   763         push    CTR
   764         push    ebp
   766         mov     ctrCtx, [esp + 4*5 + 0*4]
   767         mov     output, [esp + 4*5 + 1*4]
   768         mov     input,  [esp + 4*5 + 4*4]
   769         mov     inputLen, [esp + 4*5 + 5*4]
   771         mov     ctx, [4+ctrCtx]
   772         lea     ctx, [44+ctx]
   774         mov     ebp, esp
   775         sub     esp, 7*16
   776         and     esp, -16
   778         movdqu  xmm0, [8+ctrCtx]
   779         mov     ctrCtx, [ctrCtx + 8 + 3*4]
   780         bswap   ctrCtx
   781         movdqu  xmm1, [ctx + 0*16]
   783         pxor    xmm0, xmm1
   785         movdqa  [esp + 0*16], xmm0
   786         movdqa  [esp + 1*16], xmm0
   787         movdqa  [esp + 2*16], xmm0
   788         movdqa  [esp + 3*16], xmm0
   789         movdqa  [esp + 4*16], xmm0
   790         movdqa  [esp + 5*16], xmm0
   791         movdqa  [esp + 6*16], xmm0
   793         inc     ctrCtx
   794         mov     CTR, ctrCtx
   795         bswap   CTR
   796         xor     CTR, [ctx + 3*4]
   797         mov     [esp + 1*16 + 3*4], CTR
   799         inc     ctrCtx
   800         mov     CTR, ctrCtx
   801         bswap   CTR
   802         xor     CTR, [ctx + 3*4]
   803         mov     [esp + 2*16 + 3*4], CTR
   805         inc     ctrCtx
   806         mov     CTR, ctrCtx
   807         bswap   CTR
   808         xor     CTR, [ctx + 3*4]
   809         mov     [esp + 3*16 + 3*4], CTR
   811         inc     ctrCtx
   812         mov     CTR, ctrCtx
   813         bswap   CTR
   814         xor     CTR, [ctx + 3*4]
   815         mov     [esp + 4*16 + 3*4], CTR
   817         inc     ctrCtx
   818         mov     CTR, ctrCtx
   819         bswap   CTR
   820         xor     CTR, [ctx + 3*4]
   821         mov     [esp + 5*16 + 3*4], CTR
   823         inc     ctrCtx
   824         mov     CTR, ctrCtx
   825         bswap   CTR
   826         xor     CTR, [ctx + 3*4]
   827         mov     [esp + 6*16 + 3*4], CTR
   830 loop7:
   831         cmp     inputLen, 7*16
   832         jb      loop1
   834         movdqu  xmm0, [0*16 + esp]
   835         movdqu  xmm1, [1*16 + esp]
   836         movdqu  xmm2, [2*16 + esp]
   837         movdqu  xmm3, [3*16 + esp]
   838         movdqu  xmm4, [4*16 + esp]
   839         movdqu  xmm5, [5*16 + esp]
   840         movdqu  xmm6, [6*16 + esp]
   842         i = 1
   843         WHILE i LE 7
   844             aes_rnd i
   846             inc     ctrCtx
   847             mov     CTR, ctrCtx
   848             bswap   CTR
   849             xor     CTR, [ctx + 3*4]
   850             mov     [esp + (i-1)*16 + 3*4], CTR
   852             i = i+1
   853         ENDM
   854         WHILE i LT rnds
   855             aes_rnd i
   856             i = i+1
   857             ENDM
   858         aes_last_rnd rnds
   860         movdqu  xmm7, [0*16 + input]
   861         pxor    xmm0, xmm7
   862         movdqu  xmm7, [1*16 + input]
   863         pxor    xmm1, xmm7
   864         movdqu  xmm7, [2*16 + input]
   865         pxor    xmm2, xmm7
   866         movdqu  xmm7, [3*16 + input]
   867         pxor    xmm3, xmm7
   868         movdqu  xmm7, [4*16 + input]
   869         pxor    xmm4, xmm7
   870         movdqu  xmm7, [5*16 + input]
   871         pxor    xmm5, xmm7
   872         movdqu  xmm7, [6*16 + input]
   873         pxor    xmm6, xmm7
   875         movdqu  [0*16 + output], xmm0
   876         movdqu  [1*16 + output], xmm1
   877         movdqu  [2*16 + output], xmm2
   878         movdqu  [3*16 + output], xmm3
   879         movdqu  [4*16 + output], xmm4
   880         movdqu  [5*16 + output], xmm5
   881         movdqu  [6*16 + output], xmm6
   883         lea input, [7*16 + input]
   884         lea output, [7*16 + output]
   885         sub inputLen, 7*16
   886         jmp loop7
   889 loop1:
   890         cmp     inputLen, 1*16
   891         jb      bail
   893         movdqu  xmm0, [esp]
   894         add     esp, 16
   896         i = 1
   897     WHILE i LT rnds
   898             movdqu  xmm7, [i*16 + ctx]
   899             aesenc  xmm0, xmm7
   900             i = i+1
   901         ENDM
   902         movdqu  xmm7, [rnds*16 + ctx]
   903         aesenclast xmm0, xmm7
   905         movdqu  xmm7, [input]
   906         pxor    xmm0, xmm7
   907         movdqu  [output], xmm0
   909         lea input, [1*16 + input]
   910         lea output, [1*16 + output]
   911         sub inputLen, 1*16
   912         jmp loop1
   914 bail:
   916         mov     ctrCtx, [ebp + 4*5 + 0*4]
   917         movdqu  xmm0, [esp]
   918         movdqu  xmm1, [ctx + 0*16]
   919         pxor    xmm0, xmm1
   920         movdqu  [8+ctrCtx], xmm0
   923         xor     eax, eax
   924         mov     esp, ebp
   925         pop     ebp
   926         pop     CTR
   927         pop     ctrCtx
   928         pop     inputLen
   929         ret
   930 ENDM
   933 ALIGN 16
   934 intel_aes_encrypt_ctr_128 PROC
   935 gen_aes_ctr_func  10
   936 intel_aes_encrypt_ctr_128 ENDP
   938 ALIGN 16
   939 intel_aes_encrypt_ctr_192 PROC
   940 gen_aes_ctr_func  12
   941 intel_aes_encrypt_ctr_192 ENDP
   943 ALIGN 16
   944 intel_aes_encrypt_ctr_256 PROC
   945 gen_aes_ctr_func  14
   946 intel_aes_encrypt_ctr_256 ENDP
   949 END

mercurial