security/nss/lib/freebl/intel-aes-x64-masm.asm

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 ; LICENSE:
     2 ; This submission to NSS is to be made available under the terms of the
     3 ; Mozilla Public License, v. 2.0. You can obtain one at http:
     4 ; //mozilla.org/MPL/2.0/.
     5 ;###############################################################################
     6 ; Copyright(c) 2014, Intel Corp.
     7 ; Developers and authors:
     8 ; Shay Gueron and Vlad Krasnov
     9 ; Intel Corporation, Israel Development Centre, Haifa, Israel
    10 ; Please send feedback directly to crypto.feedback.alias@intel.com
    13 .DATA
    14 ALIGN 16
    15 Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh
    16 Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h
    17 Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh
    18 Lcon1 dd 1,1,1,1
    19 Lcon2 dd 1bh,1bh,1bh,1bh
    21 .CODE
    23 ctx     textequ <rcx>
    24 output  textequ <rdx>
    25 input   textequ <r8>
    26 inputLen textequ <r9d>
    29 aes_rnd MACRO i
    30     movdqu  xmm8, [i*16 + ctx]
    31     aesenc  xmm0, xmm8
    32     aesenc  xmm1, xmm8
    33     aesenc  xmm2, xmm8
    34     aesenc  xmm3, xmm8
    35     aesenc  xmm4, xmm8
    36     aesenc  xmm5, xmm8
    37     aesenc  xmm6, xmm8
    38     aesenc  xmm7, xmm8
    39     ENDM
    41 aes_last_rnd MACRO i
    42     movdqu  xmm8, [i*16 + ctx]
    43     aesenclast  xmm0, xmm8
    44     aesenclast  xmm1, xmm8
    45     aesenclast  xmm2, xmm8
    46     aesenclast  xmm3, xmm8
    47     aesenclast  xmm4, xmm8
    48     aesenclast  xmm5, xmm8
    49     aesenclast  xmm6, xmm8
    50     aesenclast  xmm7, xmm8
    51     ENDM
    53 aes_dec_rnd MACRO i
    54     movdqu  xmm8, [i*16 + ctx]
    55     aesdec  xmm0, xmm8
    56     aesdec  xmm1, xmm8
    57     aesdec  xmm2, xmm8
    58     aesdec  xmm3, xmm8
    59     aesdec  xmm4, xmm8
    60     aesdec  xmm5, xmm8
    61     aesdec  xmm6, xmm8
    62     aesdec  xmm7, xmm8
    63     ENDM
    65 aes_dec_last_rnd MACRO i
    66     movdqu  xmm8, [i*16 + ctx]
    67     aesdeclast  xmm0, xmm8
    68     aesdeclast  xmm1, xmm8
    69     aesdeclast  xmm2, xmm8
    70     aesdeclast  xmm3, xmm8
    71     aesdeclast  xmm4, xmm8
    72     aesdeclast  xmm5, xmm8
    73     aesdeclast  xmm6, xmm8
    74     aesdeclast  xmm7, xmm8
    75     ENDM
    78 gen_aes_ecb_func MACRO enc, rnds
    80 LOCAL   loop8
    81 LOCAL   loop1
    82 LOCAL   bail
    84         xor     inputLen, inputLen
    85         mov     input,      [rsp + 1*8 + 8*4]
    86         mov     inputLen,   [rsp + 1*8 + 8*5]
    88         sub     rsp, 3*16
    90         movdqu  [rsp + 0*16], xmm6
    91         movdqu  [rsp + 1*16], xmm7
    92         movdqu  [rsp + 2*16], xmm8
    94         lea     ctx, [48+ctx]
    96 loop8:
    97         cmp     inputLen, 8*16
    98         jb      loop1
   100         movdqu  xmm0, [0*16 + input]
   101         movdqu  xmm1, [1*16 + input]
   102         movdqu  xmm2, [2*16 + input]
   103         movdqu  xmm3, [3*16 + input]
   104         movdqu  xmm4, [4*16 + input]
   105         movdqu  xmm5, [5*16 + input]
   106         movdqu  xmm6, [6*16 + input]
   107         movdqu  xmm7, [7*16 + input]
   109         movdqu  xmm8, [0*16 + ctx]
   110         pxor    xmm0, xmm8
   111         pxor    xmm1, xmm8
   112         pxor    xmm2, xmm8
   113         pxor    xmm3, xmm8
   114         pxor    xmm4, xmm8
   115         pxor    xmm5, xmm8
   116         pxor    xmm6, xmm8
   117         pxor    xmm7, xmm8
   119 IF enc eq 1
   120         rnd textequ <aes_rnd>
   121         lastrnd textequ <aes_last_rnd>
   122         aesinst textequ <aesenc>
   123         aeslastinst textequ <aesenclast>
   124 ELSE
   125         rnd textequ <aes_dec_rnd>
   126         lastrnd textequ <aes_dec_last_rnd>
   127         aesinst textequ <aesdec>
   128         aeslastinst textequ <aesdeclast>
   129 ENDIF
   131         i = 1
   132         WHILE i LT rnds
   133             rnd i
   134             i = i+1
   135             ENDM
   136         lastrnd rnds
   138         movdqu  [0*16 + output], xmm0
   139         movdqu  [1*16 + output], xmm1
   140         movdqu  [2*16 + output], xmm2
   141         movdqu  [3*16 + output], xmm3
   142         movdqu  [4*16 + output], xmm4
   143         movdqu  [5*16 + output], xmm5
   144         movdqu  [6*16 + output], xmm6
   145         movdqu  [7*16 + output], xmm7
   147         lea input, [8*16 + input]
   148         lea output, [8*16 + output]
   149         sub inputLen, 8*16
   150         jmp loop8
   152 loop1:
   153         cmp     inputLen, 1*16
   154         jb      bail
   156         movdqu  xmm0, [input]
   157         movdqu  xmm7, [0*16 + ctx]
   158         pxor    xmm0, xmm7
   160         i = 1
   161     WHILE i LT rnds
   162             movdqu  xmm7, [i*16 + ctx]
   163             aesinst  xmm0, xmm7
   164             i = i+1
   165         ENDM
   166         movdqu  xmm7, [rnds*16 + ctx]
   167         aeslastinst xmm0, xmm7
   169         movdqu  [output], xmm0
   171         lea input, [1*16 + input]
   172         lea output, [1*16 + output]
   173         sub inputLen, 1*16
   174         jmp loop1
   176 bail:
   177         xor rax, rax
   179         movdqu  xmm6, [rsp + 0*16]
   180         movdqu  xmm7, [rsp + 1*16]
   181         movdqu  xmm8, [rsp + 2*16]
   182         add     rsp, 3*16
   183         ret
   184 ENDM
   186 intel_aes_encrypt_ecb_128 PROC
   187 gen_aes_ecb_func 1, 10
   188 intel_aes_encrypt_ecb_128 ENDP
   190 intel_aes_encrypt_ecb_192 PROC
   191 gen_aes_ecb_func 1, 12
   192 intel_aes_encrypt_ecb_192 ENDP
   194 intel_aes_encrypt_ecb_256 PROC
   195 gen_aes_ecb_func 1, 14
   196 intel_aes_encrypt_ecb_256 ENDP
   198 intel_aes_decrypt_ecb_128 PROC
   199 gen_aes_ecb_func 0, 10
   200 intel_aes_decrypt_ecb_128 ENDP
   202 intel_aes_decrypt_ecb_192 PROC
   203 gen_aes_ecb_func 0, 12
   204 intel_aes_decrypt_ecb_192 ENDP
   206 intel_aes_decrypt_ecb_256 PROC
   207 gen_aes_ecb_func 0, 14
   208 intel_aes_decrypt_ecb_256 ENDP
   211 KEY textequ <rcx>
   212 KS  textequ <rdx>
   213 ITR textequ <r8>
   215 intel_aes_encrypt_init_128  PROC
   217     movdqu  xmm1, [KEY]
   218     movdqu  [KS], xmm1
   219     movdqa  xmm2, xmm1
   221     lea ITR, Lcon1
   222     movdqa  xmm0, [ITR]
   223     lea ITR, Lmask
   224     movdqa  xmm4, [ITR]
   226     mov ITR, 8
   228 Lenc_128_ks_loop:
   229         lea KS, [16 + KS]
   230         dec ITR
   232         pshufb  xmm2, xmm4
   233         aesenclast  xmm2, xmm0
   234         pslld   xmm0, 1
   235         movdqa  xmm3, xmm1
   236         pslldq  xmm3, 4
   237         pxor    xmm1, xmm3
   238         pslldq  xmm3, 4
   239         pxor    xmm1, xmm3
   240         pslldq  xmm3, 4
   241         pxor    xmm1, xmm3
   242         pxor    xmm1, xmm2
   243         movdqu  [KS], xmm1
   244         movdqa  xmm2, xmm1
   246         jne Lenc_128_ks_loop
   248     lea ITR, Lcon2
   249     movdqa  xmm0, [ITR]
   251     pshufb  xmm2, xmm4
   252     aesenclast  xmm2, xmm0
   253     pslld   xmm0, 1
   254     movdqa  xmm3, xmm1
   255     pslldq  xmm3, 4
   256     pxor    xmm1, xmm3
   257     pslldq  xmm3, 4
   258     pxor    xmm1, xmm3
   259     pslldq  xmm3, 4
   260     pxor    xmm1, xmm3
   261     pxor    xmm1, xmm2
   262     movdqu  [16 + KS], xmm1
   263     movdqa  xmm2, xmm1
   265     pshufb  xmm2, xmm4
   266     aesenclast  xmm2, xmm0
   267     movdqa  xmm3, xmm1
   268     pslldq  xmm3, 4
   269     pxor    xmm1, xmm3
   270     pslldq  xmm3, 4
   271     pxor    xmm1, xmm3
   272     pslldq  xmm3, 4
   273     pxor    xmm1, xmm3
   274     pxor    xmm1, xmm2
   275     movdqu  [32 + KS], xmm1
   276     movdqa  xmm2, xmm1
   278     ret
   279 intel_aes_encrypt_init_128  ENDP
   282 intel_aes_decrypt_init_128  PROC
   284     push    KS
   285     push    KEY
   287     call    intel_aes_encrypt_init_128
   289     pop     KEY
   290     pop     KS
   292     movdqu  xmm0, [0*16 + KS]
   293     movdqu  xmm1, [10*16 + KS]
   294     movdqu  [10*16 + KS], xmm0
   295     movdqu  [0*16 + KS], xmm1
   297     i = 1
   298     WHILE i LT 5
   299         movdqu  xmm0, [i*16 + KS]
   300         movdqu  xmm1, [(10-i)*16 + KS]
   302         aesimc  xmm0, xmm0
   303         aesimc  xmm1, xmm1
   305         movdqu  [(10-i)*16 + KS], xmm0
   306         movdqu  [i*16 + KS], xmm1
   308         i = i+1
   309     ENDM
   311     movdqu  xmm0, [5*16 + KS]
   312     aesimc  xmm0, xmm0
   313     movdqu  [5*16 + KS], xmm0
   314     ret
   315 intel_aes_decrypt_init_128  ENDP
   318 intel_aes_encrypt_init_192  PROC
   320     sub     rsp, 16*2
   321     movdqu  [16*0 + rsp], xmm6
   322     movdqu  [16*1 + rsp], xmm7
   324     movdqu  xmm1, [KEY]
   325     mov     ITR, [16 + KEY]
   326     movd    xmm3, ITR
   328     movdqu  [KS], xmm1
   329     movdqa  xmm5, xmm3
   331     lea ITR, Lcon1
   332     movdqu  xmm0, [ITR]
   333     lea ITR, Lmask192
   334     movdqu  xmm4, [ITR]
   336     mov ITR, 4
   338 Lenc_192_ks_loop:
   339         movdqa  xmm2, xmm3
   340         pshufb  xmm2, xmm4
   341         aesenclast xmm2, xmm0
   342         pslld   xmm0, 1
   344         movdqa  xmm6, xmm1
   345         movdqa  xmm7, xmm3
   346         pslldq  xmm6, 4
   347         pslldq  xmm7, 4
   348         pxor    xmm1, xmm6
   349         pxor    xmm3, xmm7
   350         pslldq  xmm6, 4
   351         pxor    xmm1, xmm6
   352         pslldq  xmm6, 4
   353         pxor    xmm1, xmm6
   354         pxor    xmm1, xmm2
   355         pshufd  xmm2, xmm1, 0ffh
   356         pxor    xmm3, xmm2
   358         movdqa  xmm6, xmm1
   359         shufpd  xmm5, xmm1, 00h
   360         shufpd  xmm6, xmm3, 01h
   362         movdqu  [16 + KS], xmm5
   363         movdqu  [32 + KS], xmm6
   365         movdqa  xmm2, xmm3
   366         pshufb  xmm2, xmm4
   367         aesenclast  xmm2, xmm0
   368         pslld   xmm0, 1
   370         movdqa  xmm6, xmm1
   371         movdqa  xmm7, xmm3
   372         pslldq  xmm6, 4
   373         pslldq  xmm7, 4
   374         pxor    xmm1, xmm6
   375         pxor    xmm3, xmm7
   376         pslldq  xmm6, 4
   377         pxor    xmm1, xmm6
   378         pslldq  xmm6, 4
   379         pxor    xmm1, xmm6
   380         pxor    xmm1, xmm2
   381         pshufd  xmm2, xmm1, 0ffh
   382         pxor    xmm3, xmm2
   384         movdqu  [48 + KS], xmm1
   385         movdqa  xmm5, xmm3
   387         lea KS, [48 + KS]
   389         dec ITR
   390         jnz Lenc_192_ks_loop
   392     movdqu  [16 + KS], xmm5
   394     movdqu  xmm7, [16*1 + rsp]
   395     movdqu  xmm6, [16*0 + rsp]
   396     add rsp, 16*2
   397     ret
   398 intel_aes_encrypt_init_192  ENDP
   400 intel_aes_decrypt_init_192  PROC
   401     push    KS
   402     push    KEY
   404     call    intel_aes_encrypt_init_192
   406     pop     KEY
   407     pop     KS
   409     movdqu  xmm0, [0*16 + KS]
   410     movdqu  xmm1, [12*16 + KS]
   411     movdqu  [12*16 + KS], xmm0
   412     movdqu  [0*16 + KS], xmm1
   414     i = 1
   415     WHILE i LT 6
   416         movdqu  xmm0, [i*16 + KS]
   417         movdqu  xmm1, [(12-i)*16 + KS]
   419         aesimc  xmm0, xmm0
   420         aesimc  xmm1, xmm1
   422         movdqu  [(12-i)*16 + KS], xmm0
   423         movdqu  [i*16 + KS], xmm1
   425         i = i+1
   426     ENDM
   428     movdqu  xmm0, [6*16 + KS]
   429     aesimc  xmm0, xmm0
   430     movdqu  [6*16 + KS], xmm0
   431     ret
   432 intel_aes_decrypt_init_192  ENDP
   435 intel_aes_encrypt_init_256  PROC
   436     sub     rsp, 16*2
   437     movdqu  [16*0 + rsp], xmm6
   438     movdqu  [16*1 + rsp], xmm7
   440     movdqu  xmm1, [16*0 + KEY]
   441     movdqu  xmm3, [16*1 + KEY]
   443     movdqu  [16*0 + KS], xmm1
   444     movdqu  [16*1 + KS], xmm3
   446     lea ITR, Lcon1
   447     movdqu  xmm0, [ITR]
   448     lea ITR, Lmask256
   449     movdqu  xmm5, [ITR]
   451     pxor    xmm6, xmm6
   453     mov ITR, 6
   455 Lenc_256_ks_loop:
   457         movdqa  xmm2, xmm3
   458         pshufb  xmm2, xmm5
   459         aesenclast  xmm2, xmm0
   460         pslld   xmm0, 1
   461         movdqa  xmm4, xmm1
   462         pslldq  xmm4, 4
   463         pxor    xmm1, xmm4
   464         pslldq  xmm4, 4
   465         pxor    xmm1, xmm4
   466         pslldq  xmm4, 4
   467         pxor    xmm1, xmm4
   468         pxor    xmm1, xmm2
   469         movdqu  [16*2 + KS], xmm1
   471         pshufd  xmm2, xmm1, 0ffh
   472         aesenclast  xmm2, xmm6
   473         movdqa  xmm4, xmm3
   474         pslldq  xmm4, 4
   475         pxor    xmm3, xmm4
   476         pslldq  xmm4, 4
   477         pxor    xmm3, xmm4
   478         pslldq  xmm4, 4
   479         pxor    xmm3, xmm4
   480         pxor    xmm3, xmm2
   481         movdqu  [16*3 + KS], xmm3
   483         lea KS, [32 + KS]
   484         dec ITR
   485         jnz Lenc_256_ks_loop
   487     movdqa  xmm2, xmm3
   488     pshufb  xmm2, xmm5
   489     aesenclast  xmm2, xmm0
   490     movdqa  xmm4, xmm1
   491     pslldq  xmm4, 4
   492     pxor    xmm1, xmm4
   493     pslldq  xmm4, 4
   494     pxor    xmm1, xmm4
   495     pslldq  xmm4, 4
   496     pxor    xmm1, xmm4
   497     pxor    xmm1, xmm2
   498     movdqu  [16*2 + KS], xmm1
   500     movdqu  xmm7, [16*1 + rsp]
   501     movdqu  xmm6, [16*0 + rsp]
   502     add rsp, 16*2
   503     ret
   505 intel_aes_encrypt_init_256  ENDP
   508 intel_aes_decrypt_init_256  PROC
   509     push    KS
   510     push    KEY
   512     call    intel_aes_encrypt_init_256
   514     pop     KEY
   515     pop     KS
   517     movdqu  xmm0, [0*16 + KS]
   518     movdqu  xmm1, [14*16 + KS]
   519     movdqu  [14*16 + KS], xmm0
   520     movdqu  [0*16 + KS], xmm1
   522     i = 1
   523     WHILE i LT 7
   524         movdqu  xmm0, [i*16 + KS]
   525         movdqu  xmm1, [(14-i)*16 + KS]
   527         aesimc  xmm0, xmm0
   528         aesimc  xmm1, xmm1
   530         movdqu  [(14-i)*16 + KS], xmm0
   531         movdqu  [i*16 + KS], xmm1
   533         i = i+1
   534     ENDM
   536     movdqu  xmm0, [7*16 + KS]
   537     aesimc  xmm0, xmm0
   538     movdqu  [7*16 + KS], xmm0
   539     ret
   540 intel_aes_decrypt_init_256  ENDP
   544 gen_aes_cbc_enc_func MACRO rnds
   546 LOCAL   loop1
   547 LOCAL   bail
   549         mov     input,      [rsp + 1*8 + 8*4]
   550         mov     inputLen,   [rsp + 1*8 + 8*5]
   552         sub     rsp, 3*16
   554         movdqu  [rsp + 0*16], xmm6
   555         movdqu  [rsp + 1*16], xmm7
   556         movdqu  [rsp + 2*16], xmm8
   558         lea     ctx, [48+ctx]
   560         movdqu  xmm0, [-32+ctx]
   562         movdqu  xmm2, [0*16 + ctx]
   563         movdqu  xmm3, [1*16 + ctx]
   564         movdqu  xmm4, [2*16 + ctx]
   565         movdqu  xmm5, [3*16 + ctx]
   566         movdqu  xmm6, [4*16 + ctx]
   567         movdqu  xmm7, [5*16 + ctx]
   569 loop1:
   570         cmp     inputLen, 1*16
   571         jb      bail
   573         movdqu  xmm1, [input]
   574         pxor    xmm1, xmm2
   575         pxor    xmm0, xmm1
   577         aesenc  xmm0, xmm3
   578         aesenc  xmm0, xmm4
   579         aesenc  xmm0, xmm5
   580         aesenc  xmm0, xmm6
   581         aesenc  xmm0, xmm7
   583         i = 6
   584     WHILE i LT rnds
   585             movdqu  xmm8, [i*16 + ctx]
   586             aesenc  xmm0, xmm8
   587             i = i+1
   588         ENDM
   589         movdqu  xmm8, [rnds*16 + ctx]
   590         aesenclast xmm0, xmm8
   592         movdqu  [output], xmm0
   594         lea input, [1*16 + input]
   595         lea output, [1*16 + output]
   596         sub inputLen, 1*16
   597         jmp loop1
   599 bail:
   600         movdqu  [-32+ctx], xmm0
   602         xor rax, rax
   604         movdqu  xmm6, [rsp + 0*16]
   605         movdqu  xmm7, [rsp + 1*16]
   606         movdqu  xmm8, [rsp + 2*16]
   607         add     rsp, 3*16
   608         ret
   610 ENDM
   612 gen_aes_cbc_dec_func MACRO rnds
   614 LOCAL   loop8
   615 LOCAL   loop1
   616 LOCAL   dec1
   617 LOCAL   bail
   619         mov     input,      [rsp + 1*8 + 8*4]
   620         mov     inputLen,   [rsp + 1*8 + 8*5]
   622         sub     rsp, 3*16
   624         movdqu  [rsp + 0*16], xmm6
   625         movdqu  [rsp + 1*16], xmm7
   626         movdqu  [rsp + 2*16], xmm8
   628         lea     ctx, [48+ctx]
   630 loop8:
   631         cmp     inputLen, 8*16
   632         jb      dec1
   634         movdqu  xmm0, [0*16 + input]
   635         movdqu  xmm1, [1*16 + input]
   636         movdqu  xmm2, [2*16 + input]
   637         movdqu  xmm3, [3*16 + input]
   638         movdqu  xmm4, [4*16 + input]
   639         movdqu  xmm5, [5*16 + input]
   640         movdqu  xmm6, [6*16 + input]
   641         movdqu  xmm7, [7*16 + input]
   643         movdqu  xmm8, [0*16 + ctx]
   644         pxor    xmm0, xmm8
   645         pxor    xmm1, xmm8
   646         pxor    xmm2, xmm8
   647         pxor    xmm3, xmm8
   648         pxor    xmm4, xmm8
   649         pxor    xmm5, xmm8
   650         pxor    xmm6, xmm8
   651         pxor    xmm7, xmm8
   653         i = 1
   654         WHILE i LT rnds
   655             aes_dec_rnd i
   656             i = i+1
   657             ENDM
   658         aes_dec_last_rnd rnds
   660         movdqu  xmm8, [-32 + ctx]
   661         pxor    xmm0, xmm8
   662         movdqu  xmm8, [0*16 + input]
   663         pxor    xmm1, xmm8
   664         movdqu  xmm8, [1*16 + input]
   665         pxor    xmm2, xmm8
   666         movdqu  xmm8, [2*16 + input]
   667         pxor    xmm3, xmm8
   668         movdqu  xmm8, [3*16 + input]
   669         pxor    xmm4, xmm8
   670         movdqu  xmm8, [4*16 + input]
   671         pxor    xmm5, xmm8
   672         movdqu  xmm8, [5*16 + input]
   673         pxor    xmm6, xmm8
   674         movdqu  xmm8, [6*16 + input]
   675         pxor    xmm7, xmm8
   676         movdqu  xmm8, [7*16 + input]
   678         movdqu  [0*16 + output], xmm0
   679         movdqu  [1*16 + output], xmm1
   680         movdqu  [2*16 + output], xmm2
   681         movdqu  [3*16 + output], xmm3
   682         movdqu  [4*16 + output], xmm4
   683         movdqu  [5*16 + output], xmm5
   684         movdqu  [6*16 + output], xmm6
   685         movdqu  [7*16 + output], xmm7
   686         movdqu  [-32 + ctx], xmm8
   688         lea input, [8*16 + input]
   689         lea output, [8*16 + output]
   690         sub inputLen, 8*16
   691         jmp loop8
   692 dec1:
   694         movdqu  xmm3, [-32 + ctx]
   696 loop1:
   697         cmp     inputLen, 1*16
   698         jb      bail
   700         movdqu  xmm0, [input]
   701         movdqa  xmm4, xmm0
   702         movdqu  xmm7, [0*16 + ctx]
   703         pxor    xmm0, xmm7
   705         i = 1
   706     WHILE i LT rnds
   707             movdqu  xmm7, [i*16 + ctx]
   708             aesdec  xmm0, xmm7
   709             i = i+1
   710         ENDM
   711         movdqu  xmm7, [rnds*16 + ctx]
   712         aesdeclast xmm0, xmm7
   713         pxor    xmm3, xmm0
   715         movdqu  [output], xmm3
   716         movdqa  xmm3, xmm4
   718         lea input, [1*16 + input]
   719         lea output, [1*16 + output]
   720         sub inputLen, 1*16
   721         jmp loop1
   723 bail:
   724         movdqu  [-32 + ctx], xmm3
   725         xor rax, rax
   727         movdqu  xmm6, [rsp + 0*16]
   728         movdqu  xmm7, [rsp + 1*16]
   729         movdqu  xmm8, [rsp + 2*16]
   730         add     rsp, 3*16
   731         ret
   732 ENDM
   734 intel_aes_encrypt_cbc_128 PROC
   735 gen_aes_cbc_enc_func  10
   736 intel_aes_encrypt_cbc_128 ENDP
   738 intel_aes_encrypt_cbc_192 PROC
   739 gen_aes_cbc_enc_func  12
   740 intel_aes_encrypt_cbc_192 ENDP
   742 intel_aes_encrypt_cbc_256 PROC
   743 gen_aes_cbc_enc_func  14
   744 intel_aes_encrypt_cbc_256 ENDP
   746 intel_aes_decrypt_cbc_128 PROC
   747 gen_aes_cbc_dec_func  10
   748 intel_aes_decrypt_cbc_128 ENDP
   750 intel_aes_decrypt_cbc_192 PROC
   751 gen_aes_cbc_dec_func  12
   752 intel_aes_decrypt_cbc_192 ENDP
   754 intel_aes_decrypt_cbc_256 PROC
   755 gen_aes_cbc_dec_func  14
   756 intel_aes_decrypt_cbc_256 ENDP
   760 ctrCtx textequ <r10>
   761 CTR textequ <r11d>
   762 CTRSave textequ <eax>
   764 gen_aes_ctr_func MACRO rnds
   766 LOCAL   loop8
   767 LOCAL   loop1
   768 LOCAL   enc1
   769 LOCAL   bail
   771         mov     input,      [rsp + 8*1 + 4*8]
   772         mov     inputLen,   [rsp + 8*1 + 5*8]
   774         mov     ctrCtx, ctx
   775         mov     ctx, [8+ctrCtx]
   776         lea     ctx, [48+ctx]
   778         sub     rsp, 3*16
   779         movdqu  [rsp + 0*16], xmm6
   780         movdqu  [rsp + 1*16], xmm7
   781         movdqu  [rsp + 2*16], xmm8
   784         push    rbp
   785         mov     rbp, rsp
   786         sub     rsp, 8*16
   787         and     rsp, -16
   790         movdqu  xmm0, [16+ctrCtx]
   791         mov     CTRSave, DWORD PTR [ctrCtx + 16 + 3*4]
   792         bswap   CTRSave
   793         movdqu  xmm1, [ctx + 0*16]
   795         pxor    xmm0, xmm1
   797         movdqa  [rsp + 0*16], xmm0
   798         movdqa  [rsp + 1*16], xmm0
   799         movdqa  [rsp + 2*16], xmm0
   800         movdqa  [rsp + 3*16], xmm0
   801         movdqa  [rsp + 4*16], xmm0
   802         movdqa  [rsp + 5*16], xmm0
   803         movdqa  [rsp + 6*16], xmm0
   804         movdqa  [rsp + 7*16], xmm0
   806         inc     CTRSave
   807         mov     CTR, CTRSave
   808         bswap   CTR
   809         xor     CTR, DWORD PTR [ctx + 3*4]
   810         mov     DWORD PTR [rsp + 1*16 + 3*4], CTR
   812         inc     CTRSave
   813         mov     CTR, CTRSave
   814         bswap   CTR
   815         xor     CTR, DWORD PTR [ctx + 3*4]
   816         mov     DWORD PTR [rsp + 2*16 + 3*4], CTR
   818         inc     CTRSave
   819         mov     CTR, CTRSave
   820         bswap   CTR
   821         xor     CTR, DWORD PTR [ctx + 3*4]
   822         mov     DWORD PTR [rsp + 3*16 + 3*4], CTR
   824         inc     CTRSave
   825         mov     CTR, CTRSave
   826         bswap   CTR
   827         xor     CTR, DWORD PTR [ctx + 3*4]
   828         mov     DWORD PTR [rsp + 4*16 + 3*4], CTR
   830         inc     CTRSave
   831         mov     CTR, CTRSave
   832         bswap   CTR
   833         xor     CTR, DWORD PTR [ctx + 3*4]
   834         mov     DWORD PTR [rsp + 5*16 + 3*4], CTR
   836         inc     CTRSave
   837         mov     CTR, CTRSave
   838         bswap   CTR
   839         xor     CTR, DWORD PTR [ctx + 3*4]
   840         mov     DWORD PTR [rsp + 6*16 + 3*4], CTR
   842         inc     CTRSave
   843         mov     CTR, CTRSave
   844         bswap   CTR
   845         xor     CTR, DWORD PTR [ctx + 3*4]
   846         mov     DWORD PTR [rsp + 7*16 + 3*4], CTR
   849 loop8:
   850         cmp     inputLen, 8*16
   851         jb      loop1
   853         movdqu  xmm0, [0*16 + rsp]
   854         movdqu  xmm1, [1*16 + rsp]
   855         movdqu  xmm2, [2*16 + rsp]
   856         movdqu  xmm3, [3*16 + rsp]
   857         movdqu  xmm4, [4*16 + rsp]
   858         movdqu  xmm5, [5*16 + rsp]
   859         movdqu  xmm6, [6*16 + rsp]
   860         movdqu  xmm7, [7*16 + rsp]
   862         i = 1
   863         WHILE i LE 8
   864             aes_rnd i
   866             inc     CTRSave
   867             mov     CTR, CTRSave
   868             bswap   CTR
   869             xor     CTR, DWORD PTR [ctx + 3*4]
   870             mov     DWORD PTR [rsp + (i-1)*16 + 3*4], CTR
   872             i = i+1
   873         ENDM
   874         WHILE i LT rnds
   875             aes_rnd i
   876             i = i+1
   877             ENDM
   878         aes_last_rnd rnds
   880         movdqu  xmm8, [0*16 + input]
   881         pxor    xmm0, xmm8
   882         movdqu  xmm8, [1*16 + input]
   883         pxor    xmm1, xmm8
   884         movdqu  xmm8, [2*16 + input]
   885         pxor    xmm2, xmm8
   886         movdqu  xmm8, [3*16 + input]
   887         pxor    xmm3, xmm8
   888         movdqu  xmm8, [4*16 + input]
   889         pxor    xmm4, xmm8
   890         movdqu  xmm8, [5*16 + input]
   891         pxor    xmm5, xmm8
   892         movdqu  xmm8, [6*16 + input]
   893         pxor    xmm6, xmm8
   894         movdqu  xmm8, [7*16 + input]
   895         pxor    xmm7, xmm8
   897         movdqu  [0*16 + output], xmm0
   898         movdqu  [1*16 + output], xmm1
   899         movdqu  [2*16 + output], xmm2
   900         movdqu  [3*16 + output], xmm3
   901         movdqu  [4*16 + output], xmm4
   902         movdqu  [5*16 + output], xmm5
   903         movdqu  [6*16 + output], xmm6
   904         movdqu  [7*16 + output], xmm7
   906         lea input, [8*16 + input]
   907         lea output, [8*16 + output]
   908         sub inputLen, 8*16
   909         jmp loop8
   912 loop1:
   913         cmp     inputLen, 1*16
   914         jb      bail
   916         movdqu  xmm0, [rsp]
   917         add     rsp, 16
   919         i = 1
   920     WHILE i LT rnds
   921             movdqu  xmm7, [i*16 + ctx]
   922             aesenc  xmm0, xmm7
   923             i = i+1
   924         ENDM
   925         movdqu  xmm7, [rnds*16 + ctx]
   926         aesenclast xmm0, xmm7
   928         movdqu  xmm7, [input]
   929         pxor    xmm0, xmm7
   930         movdqu  [output], xmm0
   932         lea input, [1*16 + input]
   933         lea output, [1*16 + output]
   934         sub inputLen, 1*16
   935         jmp loop1
   937 bail:
   939         movdqu  xmm0, [rsp]
   940         movdqu  xmm1, [ctx + 0*16]
   941         pxor    xmm0, xmm1
   942         movdqu  [16+ctrCtx], xmm0
   945         xor     rax, rax
   946         mov     rsp, rbp
   947         pop     rbp
   949         movdqu  xmm6, [rsp + 0*16]
   950         movdqu  xmm7, [rsp + 1*16]
   951         movdqu  xmm8, [rsp + 2*16]
   952         add     rsp, 3*16
   954         ret
   955 ENDM
   958 intel_aes_encrypt_ctr_128 PROC
   959 gen_aes_ctr_func  10
   960 intel_aes_encrypt_ctr_128 ENDP
   962 intel_aes_encrypt_ctr_192 PROC
   963 gen_aes_ctr_func  12
   964 intel_aes_encrypt_ctr_192 ENDP
   966 intel_aes_encrypt_ctr_256 PROC
   967 gen_aes_ctr_func  14
   968 intel_aes_encrypt_ctr_256 ENDP
   971 END

mercurial