security/nss/lib/freebl/rijndael.c

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* This Source Code Form is subject to the terms of the Mozilla Public
     2  * License, v. 2.0. If a copy of the MPL was not distributed with this
     3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     5 #ifdef FREEBL_NO_DEPEND
     6 #include "stubs.h"
     7 #endif
     9 #include "prinit.h"
    10 #include "prerr.h"
    11 #include "secerr.h"
    13 #include "prtypes.h"
    14 #include "blapi.h"
    15 #include "rijndael.h"
    17 #include "cts.h"
    18 #include "ctr.h"
    19 #include "gcm.h"
    21 #ifdef USE_HW_AES
    22 #include "intel-aes.h"
    23 #include "mpi.h"
    25 static int has_intel_aes = 0;
    26 static PRBool use_hw_aes = PR_FALSE;
    28 #ifdef INTEL_GCM
    29 #include "intel-gcm.h"
    30 static int has_intel_avx = 0;
    31 static int has_intel_clmul = 0;
    32 static PRBool use_hw_gcm = PR_FALSE;
    33 #endif
    34 #endif  /* USE_HW_AES */
    36 /*
    37  * There are currently five ways to build this code, varying in performance
    38  * and code size.
    39  *
    40  * RIJNDAEL_INCLUDE_TABLES         Include all tables from rijndael32.tab
    41  * RIJNDAEL_GENERATE_TABLES        Generate tables on first 
    42  *                                 encryption/decryption, then store them;
    43  *                                 use the function gfm
    44  * RIJNDAEL_GENERATE_TABLES_MACRO  Same as above, but use macros to do
    45  *                                 the generation
    46  * RIJNDAEL_GENERATE_VALUES        Do not store tables, generate the table
    47  *                                 values "on-the-fly", using gfm
    48  * RIJNDAEL_GENERATE_VALUES_MACRO  Same as above, but use macros
    49  *
    50  * The default is RIJNDAEL_INCLUDE_TABLES.
    51  */
    53 /*
    54  * When building RIJNDAEL_INCLUDE_TABLES, includes S**-1, Rcon, T[0..4], 
    55  *                                                 T**-1[0..4], IMXC[0..4]
    56  * When building anything else, includes S, S**-1, Rcon
    57  */
    58 #include "rijndael32.tab"
    60 #if defined(RIJNDAEL_INCLUDE_TABLES)
    61 /*
    62  * RIJNDAEL_INCLUDE_TABLES
    63  */
    64 #define T0(i)    _T0[i]
    65 #define T1(i)    _T1[i]
    66 #define T2(i)    _T2[i]
    67 #define T3(i)    _T3[i]
    68 #define TInv0(i) _TInv0[i]
    69 #define TInv1(i) _TInv1[i]
    70 #define TInv2(i) _TInv2[i]
    71 #define TInv3(i) _TInv3[i]
    72 #define IMXC0(b) _IMXC0[b]
    73 #define IMXC1(b) _IMXC1[b]
    74 #define IMXC2(b) _IMXC2[b]
    75 #define IMXC3(b) _IMXC3[b]
    76 /* The S-box can be recovered from the T-tables */
    77 #ifdef IS_LITTLE_ENDIAN
    78 #define SBOX(b)    ((PRUint8)_T3[b])
    79 #else
    80 #define SBOX(b)    ((PRUint8)_T1[b])
    81 #endif
    82 #define SINV(b) (_SInv[b])
    84 #else /* not RIJNDAEL_INCLUDE_TABLES */
    86 /*
    87  * Code for generating T-table values.
    88  */
    90 #ifdef IS_LITTLE_ENDIAN
    91 #define WORD4(b0, b1, b2, b3) \
    92     (((b3) << 24) | ((b2) << 16) | ((b1) << 8) | (b0))
    93 #else
    94 #define WORD4(b0, b1, b2, b3) \
    95     (((b0) << 24) | ((b1) << 16) | ((b2) << 8) | (b3))
    96 #endif
    98 /*
    99  * Define the S and S**-1 tables (both have been stored)
   100  */
   101 #define SBOX(b)    (_S[b])
   102 #define SINV(b) (_SInv[b])
   104 /*
   105  * The function xtime, used for Galois field multiplication
   106  */
   107 #define XTIME(a) \
   108     ((a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1))
   110 /* Choose GFM method (macros or function) */
   111 #if defined(RIJNDAEL_GENERATE_TABLES_MACRO) ||  \
   112     defined(RIJNDAEL_GENERATE_VALUES_MACRO)
   114 /*
   115  * Galois field GF(2**8) multipliers, in macro form
   116  */
   117 #define GFM01(a) \
   118     (a)                                 /* a * 01 = a, the identity */
   119 #define GFM02(a) \
   120     (XTIME(a) & 0xff)                   /* a * 02 = xtime(a) */
   121 #define GFM04(a) \
   122     (GFM02(GFM02(a)))                   /* a * 04 = xtime**2(a) */
   123 #define GFM08(a) \
   124     (GFM02(GFM04(a)))                   /* a * 08 = xtime**3(a) */
   125 #define GFM03(a) \
   126     (GFM01(a) ^ GFM02(a))               /* a * 03 = a * (01 + 02) */
   127 #define GFM09(a) \
   128     (GFM01(a) ^ GFM08(a))               /* a * 09 = a * (01 + 08) */
   129 #define GFM0B(a) \
   130     (GFM01(a) ^ GFM02(a) ^ GFM08(a))    /* a * 0B = a * (01 + 02 + 08) */
   131 #define GFM0D(a) \
   132     (GFM01(a) ^ GFM04(a) ^ GFM08(a))    /* a * 0D = a * (01 + 04 + 08) */
   133 #define GFM0E(a) \
   134     (GFM02(a) ^ GFM04(a) ^ GFM08(a))    /* a * 0E = a * (02 + 04 + 08) */
   136 #else  /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_VALUES */
   138 /* GF_MULTIPLY
   139  *
   140  * multiply two bytes represented in GF(2**8), mod (x**4 + 1)
   141  */
   142 PRUint8 gfm(PRUint8 a, PRUint8 b)
   143 {
   144     PRUint8 res = 0;
   145     while (b > 0) {
   146 	res = (b & 0x01) ? res ^ a : res;
   147 	a = XTIME(a);
   148 	b >>= 1;
   149     }
   150     return res;
   151 }
   153 #define GFM01(a) \
   154     (a)                                 /* a * 01 = a, the identity */
   155 #define GFM02(a) \
   156     (XTIME(a) & 0xff)                   /* a * 02 = xtime(a) */
   157 #define GFM03(a) \
   158     (gfm(a, 0x03))                      /* a * 03 */
   159 #define GFM09(a) \
   160     (gfm(a, 0x09))                      /* a * 09 */
   161 #define GFM0B(a) \
   162     (gfm(a, 0x0B))                      /* a * 0B */
   163 #define GFM0D(a) \
   164     (gfm(a, 0x0D))                      /* a * 0D */
   165 #define GFM0E(a) \
   166     (gfm(a, 0x0E))                      /* a * 0E */
   168 #endif /* choosing GFM function */
   170 /*
   171  * The T-tables
   172  */
   173 #define G_T0(i) \
   174     ( WORD4( GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)) ) )
   175 #define G_T1(i) \
   176     ( WORD4( GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)) ) )
   177 #define G_T2(i) \
   178     ( WORD4( GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)) ) )
   179 #define G_T3(i) \
   180     ( WORD4( GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)) ) )
   182 /*
   183  * The inverse T-tables
   184  */
   185 #define G_TInv0(i) \
   186     ( WORD4( GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)) ) )
   187 #define G_TInv1(i) \
   188     ( WORD4( GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)) ) )
   189 #define G_TInv2(i) \
   190     ( WORD4( GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)) ) )
   191 #define G_TInv3(i) \
   192     ( WORD4( GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)) ) )
   194 /*
   195  * The inverse mix column tables
   196  */
   197 #define G_IMXC0(i) \
   198     ( WORD4( GFM0E(i), GFM09(i), GFM0D(i), GFM0B(i) ) )
   199 #define G_IMXC1(i) \
   200     ( WORD4( GFM0B(i), GFM0E(i), GFM09(i), GFM0D(i) ) )
   201 #define G_IMXC2(i) \
   202     ( WORD4( GFM0D(i), GFM0B(i), GFM0E(i), GFM09(i) ) )
   203 #define G_IMXC3(i) \
   204     ( WORD4( GFM09(i), GFM0D(i), GFM0B(i), GFM0E(i) ) )
   206 /* Now choose the T-table indexing method */
   207 #if defined(RIJNDAEL_GENERATE_VALUES)
   208 /* generate values for the tables with a function*/
   209 static PRUint32 gen_TInvXi(PRUint8 tx, PRUint8 i)
   210 {
   211     PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E;
   212     si01 = SINV(i);
   213     si02 = XTIME(si01);
   214     si04 = XTIME(si02);
   215     si08 = XTIME(si04);
   216     si03 = si02 ^ si01;
   217     si09 = si08 ^ si01;
   218     si0B = si08 ^ si03;
   219     si0D = si09 ^ si04;
   220     si0E = si08 ^ si04 ^ si02;
   221     switch (tx) {
   222     case 0:
   223 	return WORD4(si0E, si09, si0D, si0B);
   224     case 1:
   225 	return WORD4(si0B, si0E, si09, si0D);
   226     case 2:
   227 	return WORD4(si0D, si0B, si0E, si09);
   228     case 3:
   229 	return WORD4(si09, si0D, si0B, si0E);
   230     }
   231     return -1;
   232 }
   233 #define T0(i)    G_T0(i)
   234 #define T1(i)    G_T1(i)
   235 #define T2(i)    G_T2(i)
   236 #define T3(i)    G_T3(i)
   237 #define TInv0(i) gen_TInvXi(0, i)
   238 #define TInv1(i) gen_TInvXi(1, i)
   239 #define TInv2(i) gen_TInvXi(2, i)
   240 #define TInv3(i) gen_TInvXi(3, i)
   241 #define IMXC0(b) G_IMXC0(b)
   242 #define IMXC1(b) G_IMXC1(b)
   243 #define IMXC2(b) G_IMXC2(b)
   244 #define IMXC3(b) G_IMXC3(b)
   245 #elif defined(RIJNDAEL_GENERATE_VALUES_MACRO)
   246 /* generate values for the tables with macros */
   247 #define T0(i)    G_T0(i)
   248 #define T1(i)    G_T1(i)
   249 #define T2(i)    G_T2(i)
   250 #define T3(i)    G_T3(i)
   251 #define TInv0(i) G_TInv0(i)
   252 #define TInv1(i) G_TInv1(i)
   253 #define TInv2(i) G_TInv2(i)
   254 #define TInv3(i) G_TInv3(i)
   255 #define IMXC0(b) G_IMXC0(b)
   256 #define IMXC1(b) G_IMXC1(b)
   257 #define IMXC2(b) G_IMXC2(b)
   258 #define IMXC3(b) G_IMXC3(b)
   259 #else  /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_TABLES_MACRO */
   260 /* Generate T and T**-1 table values and store, then index */
   261 /* The inverse mix column tables are still generated */
   262 #define T0(i)    rijndaelTables->T0[i]
   263 #define T1(i)    rijndaelTables->T1[i]
   264 #define T2(i)    rijndaelTables->T2[i]
   265 #define T3(i)    rijndaelTables->T3[i]
   266 #define TInv0(i) rijndaelTables->TInv0[i]
   267 #define TInv1(i) rijndaelTables->TInv1[i]
   268 #define TInv2(i) rijndaelTables->TInv2[i]
   269 #define TInv3(i) rijndaelTables->TInv3[i]
   270 #define IMXC0(b) G_IMXC0(b)
   271 #define IMXC1(b) G_IMXC1(b)
   272 #define IMXC2(b) G_IMXC2(b)
   273 #define IMXC3(b) G_IMXC3(b)
   274 #endif /* choose T-table indexing method */
   276 #endif /* not RIJNDAEL_INCLUDE_TABLES */
   278 #if defined(RIJNDAEL_GENERATE_TABLES) ||  \
   279     defined(RIJNDAEL_GENERATE_TABLES_MACRO)
   281 /* Code to generate and store the tables */
   283 struct rijndael_tables_str {
   284     PRUint32 T0[256];
   285     PRUint32 T1[256];
   286     PRUint32 T2[256];
   287     PRUint32 T3[256];
   288     PRUint32 TInv0[256];
   289     PRUint32 TInv1[256];
   290     PRUint32 TInv2[256];
   291     PRUint32 TInv3[256];
   292 };
   294 static struct rijndael_tables_str *rijndaelTables = NULL;
   295 static PRCallOnceType coRTInit = { 0, 0, 0 };
   296 static PRStatus 
   297 init_rijndael_tables(void)
   298 {
   299     PRUint32 i;
   300     PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E;
   301     struct rijndael_tables_str *rts;
   302     rts = (struct rijndael_tables_str *)
   303                    PORT_Alloc(sizeof(struct rijndael_tables_str));
   304     if (!rts) return PR_FAILURE;
   305     for (i=0; i<256; i++) {
   306 	/* The forward values */
   307 	si01 = SBOX(i);
   308 	si02 = XTIME(si01);
   309 	si03 = si02 ^ si01;
   310 	rts->T0[i] = WORD4(si02, si01, si01, si03);
   311 	rts->T1[i] = WORD4(si03, si02, si01, si01);
   312 	rts->T2[i] = WORD4(si01, si03, si02, si01);
   313 	rts->T3[i] = WORD4(si01, si01, si03, si02);
   314 	/* The inverse values */
   315 	si01 = SINV(i);
   316 	si02 = XTIME(si01);
   317 	si04 = XTIME(si02);
   318 	si08 = XTIME(si04);
   319 	si03 = si02 ^ si01;
   320 	si09 = si08 ^ si01;
   321 	si0B = si08 ^ si03;
   322 	si0D = si09 ^ si04;
   323 	si0E = si08 ^ si04 ^ si02;
   324 	rts->TInv0[i] = WORD4(si0E, si09, si0D, si0B);
   325 	rts->TInv1[i] = WORD4(si0B, si0E, si09, si0D);
   326 	rts->TInv2[i] = WORD4(si0D, si0B, si0E, si09);
   327 	rts->TInv3[i] = WORD4(si09, si0D, si0B, si0E);
   328     }
   329     /* wait until all the values are in to set */
   330     rijndaelTables = rts;
   331     return PR_SUCCESS;
   332 }
   334 #endif /* code to generate tables */
   336 /**************************************************************************
   337  *
   338  * Stuff related to the Rijndael key schedule
   339  *
   340  *************************************************************************/
   342 #define SUBBYTE(w) \
   343     ((SBOX((w >> 24) & 0xff) << 24) | \
   344      (SBOX((w >> 16) & 0xff) << 16) | \
   345      (SBOX((w >>  8) & 0xff) <<  8) | \
   346      (SBOX((w      ) & 0xff)         ))
   348 #ifdef IS_LITTLE_ENDIAN
   349 #define ROTBYTE(b) \
   350     ((b >> 8) | (b << 24))
   351 #else
   352 #define ROTBYTE(b) \
   353     ((b << 8) | (b >> 24))
   354 #endif
   356 /* rijndael_key_expansion7
   357  *
   358  * Generate the expanded key from the key input by the user.
   359  * XXX
   360  * Nk == 7 (224 key bits) is a weird case.  Since Nk > 6, an added SubByte
   361  * transformation is done periodically.  The period is every 4 bytes, and
   362  * since 7%4 != 0 this happens at different times for each key word (unlike
   363  * Nk == 8 where it happens twice in every key word, in the same positions).
   364  * For now, I'm implementing this case "dumbly", w/o any unrolling.
   365  */
   366 static SECStatus
   367 rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int Nk)
   368 {
   369     unsigned int i;
   370     PRUint32 *W;
   371     PRUint32 *pW;
   372     PRUint32 tmp;
   373     W = cx->expandedKey;
   374     /* 1.  the first Nk words contain the cipher key */
   375     memcpy(W, key, Nk * 4);
   376     i = Nk;
   377     /* 2.  loop until full expanded key is obtained */
   378     pW = W + i - 1;
   379     for (; i < cx->Nb * (cx->Nr + 1); ++i) {
   380 	tmp = *pW++;
   381 	if (i % Nk == 0)
   382 	    tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
   383 	else if (i % Nk == 4)
   384 	    tmp = SUBBYTE(tmp);
   385 	*pW = W[i - Nk] ^ tmp;
   386     }
   387     return SECSuccess;
   388 }
   390 /* rijndael_key_expansion
   391  *
   392  * Generate the expanded key from the key input by the user.
   393  */
   394 static SECStatus
   395 rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk)
   396 {
   397     unsigned int i;
   398     PRUint32 *W;
   399     PRUint32 *pW;
   400     PRUint32 tmp;
   401     unsigned int round_key_words = cx->Nb * (cx->Nr + 1);
   402     if (Nk == 7)
   403 	return rijndael_key_expansion7(cx, key, Nk);
   404     W = cx->expandedKey;
   405     /* The first Nk words contain the input cipher key */
   406     memcpy(W, key, Nk * 4);
   407     i = Nk;
   408     pW = W + i - 1;
   409     /* Loop over all sets of Nk words, except the last */
   410     while (i < round_key_words - Nk) {
   411 	tmp = *pW++;
   412 	tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
   413 	*pW = W[i++ - Nk] ^ tmp;
   414 	tmp = *pW++; *pW = W[i++ - Nk] ^ tmp;
   415 	tmp = *pW++; *pW = W[i++ - Nk] ^ tmp;
   416 	tmp = *pW++; *pW = W[i++ - Nk] ^ tmp;
   417 	if (Nk == 4)
   418 	    continue;
   419 	switch (Nk) {
   420 	case 8: tmp = *pW++; tmp = SUBBYTE(tmp); *pW = W[i++ - Nk] ^ tmp;
   421 	case 7: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp;
   422 	case 6: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp;
   423 	case 5: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp;
   424 	}
   425     }
   426     /* Generate the last word */
   427     tmp = *pW++;
   428     tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
   429     *pW = W[i++ - Nk] ^ tmp;
   430     /* There may be overflow here, if Nk % (Nb * (Nr + 1)) > 0.  However,
   431      * since the above loop generated all but the last Nk key words, there
   432      * is no more need for the SubByte transformation.
   433      */
   434     if (Nk < 8) {
   435 	for (; i < round_key_words; ++i) {
   436 	    tmp = *pW++; 
   437 	    *pW = W[i - Nk] ^ tmp;
   438 	}
   439     } else {
   440 	/* except in the case when Nk == 8.  Then one more SubByte may have
   441 	 * to be performed, at i % Nk == 4.
   442 	 */
   443 	for (; i < round_key_words; ++i) {
   444 	    tmp = *pW++;
   445 	    if (i % Nk == 4)
   446 		tmp = SUBBYTE(tmp);
   447 	    *pW = W[i - Nk] ^ tmp;
   448 	}
   449     }
   450     return SECSuccess;
   451 }
   453 /* rijndael_invkey_expansion
   454  *
   455  * Generate the expanded key for the inverse cipher from the key input by 
   456  * the user.
   457  */
   458 static SECStatus
   459 rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk)
   460 {
   461     unsigned int r;
   462     PRUint32 *roundkeyw;
   463     PRUint8 *b;
   464     int Nb = cx->Nb;
   465     /* begins like usual key expansion ... */
   466     if (rijndael_key_expansion(cx, key, Nk) != SECSuccess)
   467 	return SECFailure;
   468     /* ... but has the additional step of InvMixColumn,
   469      * excepting the first and last round keys.
   470      */
   471     roundkeyw = cx->expandedKey + cx->Nb;
   472     for (r=1; r<cx->Nr; ++r) {
   473 	/* each key word, roundkeyw, represents a column in the key
   474 	 * matrix.  Each column is multiplied by the InvMixColumn matrix.
   475 	 *   [ 0E 0B 0D 09 ]   [ b0 ]
   476 	 *   [ 09 0E 0B 0D ] * [ b1 ]
   477 	 *   [ 0D 09 0E 0B ]   [ b2 ]
   478 	 *   [ 0B 0D 09 0E ]   [ b3 ]
   479 	 */
   480 	b = (PRUint8 *)roundkeyw;
   481 	*roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
   482 	b = (PRUint8 *)roundkeyw;
   483 	*roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
   484 	b = (PRUint8 *)roundkeyw;
   485 	*roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
   486 	b = (PRUint8 *)roundkeyw;
   487 	*roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
   488 	if (Nb <= 4)
   489 	    continue;
   490 	switch (Nb) {
   491 	case 8: b = (PRUint8 *)roundkeyw;
   492 	        *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ 
   493 	                       IMXC2(b[2]) ^ IMXC3(b[3]);
   494 	case 7: b = (PRUint8 *)roundkeyw;
   495 	        *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ 
   496 	                       IMXC2(b[2]) ^ IMXC3(b[3]);
   497 	case 6: b = (PRUint8 *)roundkeyw;
   498 	        *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ 
   499 	                       IMXC2(b[2]) ^ IMXC3(b[3]);
   500 	case 5: b = (PRUint8 *)roundkeyw;
   501 	        *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ 
   502 	                       IMXC2(b[2]) ^ IMXC3(b[3]);
   503 	}
   504     }
   505     return SECSuccess;
   506 }
   507 /**************************************************************************
   508  *
   509  * Stuff related to Rijndael encryption/decryption, optimized for
   510  * a 128-bit blocksize.
   511  *
   512  *************************************************************************/
   514 #ifdef IS_LITTLE_ENDIAN
   515 #define BYTE0WORD(w) ((w) & 0x000000ff)
   516 #define BYTE1WORD(w) ((w) & 0x0000ff00)
   517 #define BYTE2WORD(w) ((w) & 0x00ff0000)
   518 #define BYTE3WORD(w) ((w) & 0xff000000)
   519 #else
   520 #define BYTE0WORD(w) ((w) & 0xff000000)
   521 #define BYTE1WORD(w) ((w) & 0x00ff0000)
   522 #define BYTE2WORD(w) ((w) & 0x0000ff00)
   523 #define BYTE3WORD(w) ((w) & 0x000000ff)
   524 #endif
   526 typedef union {
   527     PRUint32 w[4];
   528     PRUint8  b[16];
   529 } rijndael_state;
   531 #define COLUMN_0(state) state.w[0]
   532 #define COLUMN_1(state) state.w[1]
   533 #define COLUMN_2(state) state.w[2]
   534 #define COLUMN_3(state) state.w[3]
   536 #define STATE_BYTE(i) state.b[i]
   538 static SECStatus 
   539 rijndael_encryptBlock128(AESContext *cx, 
   540                          unsigned char *output,
   541                          const unsigned char *input)
   542 {
   543     unsigned int r;
   544     PRUint32 *roundkeyw;
   545     rijndael_state state;
   546     PRUint32 C0, C1, C2, C3;
   547 #if defined(NSS_X86_OR_X64)
   548 #define pIn input
   549 #define pOut output
   550 #else
   551     unsigned char *pIn, *pOut;
   552     PRUint32 inBuf[4], outBuf[4];
   554     if ((ptrdiff_t)input & 0x3) {
   555 	memcpy(inBuf, input, sizeof inBuf);
   556 	pIn = (unsigned char *)inBuf;
   557     } else {
   558 	pIn = (unsigned char *)input;
   559     }
   560     if ((ptrdiff_t)output & 0x3) {
   561 	pOut = (unsigned char *)outBuf;
   562     } else {
   563 	pOut = (unsigned char *)output;
   564     }
   565 #endif
   566     roundkeyw = cx->expandedKey;
   567     /* Step 1: Add Round Key 0 to initial state */
   568     COLUMN_0(state) = *((PRUint32 *)(pIn     )) ^ *roundkeyw++;
   569     COLUMN_1(state) = *((PRUint32 *)(pIn + 4 )) ^ *roundkeyw++;
   570     COLUMN_2(state) = *((PRUint32 *)(pIn + 8 )) ^ *roundkeyw++;
   571     COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw++;
   572     /* Step 2: Loop over rounds [1..NR-1] */
   573     for (r=1; r<cx->Nr; ++r) {
   574         /* Do ShiftRow, ByteSub, and MixColumn all at once */
   575 	C0 = T0(STATE_BYTE(0))  ^
   576 	     T1(STATE_BYTE(5))  ^
   577 	     T2(STATE_BYTE(10)) ^
   578 	     T3(STATE_BYTE(15));
   579 	C1 = T0(STATE_BYTE(4))  ^
   580 	     T1(STATE_BYTE(9))  ^
   581 	     T2(STATE_BYTE(14)) ^
   582 	     T3(STATE_BYTE(3));
   583 	C2 = T0(STATE_BYTE(8))  ^
   584 	     T1(STATE_BYTE(13)) ^
   585 	     T2(STATE_BYTE(2))  ^
   586 	     T3(STATE_BYTE(7));
   587 	C3 = T0(STATE_BYTE(12)) ^
   588 	     T1(STATE_BYTE(1))  ^
   589 	     T2(STATE_BYTE(6))  ^
   590 	     T3(STATE_BYTE(11));
   591 	/* Round key addition */
   592 	COLUMN_0(state) = C0 ^ *roundkeyw++;
   593 	COLUMN_1(state) = C1 ^ *roundkeyw++;
   594 	COLUMN_2(state) = C2 ^ *roundkeyw++;
   595 	COLUMN_3(state) = C3 ^ *roundkeyw++;
   596     }
   597     /* Step 3: Do the last round */
   598     /* Final round does not employ MixColumn */
   599     C0 = ((BYTE0WORD(T2(STATE_BYTE(0))))   |
   600           (BYTE1WORD(T3(STATE_BYTE(5))))   |
   601           (BYTE2WORD(T0(STATE_BYTE(10))))  |
   602           (BYTE3WORD(T1(STATE_BYTE(15)))))  ^
   603           *roundkeyw++;
   604     C1 = ((BYTE0WORD(T2(STATE_BYTE(4))))   |
   605           (BYTE1WORD(T3(STATE_BYTE(9))))   |
   606           (BYTE2WORD(T0(STATE_BYTE(14))))  |
   607           (BYTE3WORD(T1(STATE_BYTE(3)))))   ^
   608           *roundkeyw++;
   609     C2 = ((BYTE0WORD(T2(STATE_BYTE(8))))   |
   610           (BYTE1WORD(T3(STATE_BYTE(13))))  |
   611           (BYTE2WORD(T0(STATE_BYTE(2))))   |
   612           (BYTE3WORD(T1(STATE_BYTE(7)))))   ^
   613           *roundkeyw++;
   614     C3 = ((BYTE0WORD(T2(STATE_BYTE(12))))  |
   615           (BYTE1WORD(T3(STATE_BYTE(1))))   |
   616           (BYTE2WORD(T0(STATE_BYTE(6))))   |
   617           (BYTE3WORD(T1(STATE_BYTE(11)))))  ^
   618           *roundkeyw++;
   619     *((PRUint32 *) pOut     )  = C0;
   620     *((PRUint32 *)(pOut + 4))  = C1;
   621     *((PRUint32 *)(pOut + 8))  = C2;
   622     *((PRUint32 *)(pOut + 12)) = C3;
   623 #if defined(NSS_X86_OR_X64)
   624 #undef pIn
   625 #undef pOut
   626 #else
   627     if ((ptrdiff_t)output & 0x3) {
   628 	memcpy(output, outBuf, sizeof outBuf);
   629     }
   630 #endif
   631     return SECSuccess;
   632 }
   634 static SECStatus 
   635 rijndael_decryptBlock128(AESContext *cx, 
   636                          unsigned char *output,
   637                          const unsigned char *input)
   638 {
   639     int r;
   640     PRUint32 *roundkeyw;
   641     rijndael_state state;
   642     PRUint32 C0, C1, C2, C3;
   643 #if defined(NSS_X86_OR_X64)
   644 #define pIn input
   645 #define pOut output
   646 #else
   647     unsigned char *pIn, *pOut;
   648     PRUint32 inBuf[4], outBuf[4];
   650     if ((ptrdiff_t)input & 0x3) {
   651 	memcpy(inBuf, input, sizeof inBuf);
   652 	pIn = (unsigned char *)inBuf;
   653     } else {
   654 	pIn = (unsigned char *)input;
   655     }
   656     if ((ptrdiff_t)output & 0x3) {
   657 	pOut = (unsigned char *)outBuf;
   658     } else {
   659 	pOut = (unsigned char *)output;
   660     }
   661 #endif
   662     roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3;
   663     /* reverse the final key addition */
   664     COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw--;
   665     COLUMN_2(state) = *((PRUint32 *)(pIn +  8)) ^ *roundkeyw--;
   666     COLUMN_1(state) = *((PRUint32 *)(pIn +  4)) ^ *roundkeyw--;
   667     COLUMN_0(state) = *((PRUint32 *)(pIn     )) ^ *roundkeyw--;
   668     /* Loop over rounds in reverse [NR..1] */
   669     for (r=cx->Nr; r>1; --r) {
   670 	/* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */
   671 	C0 = TInv0(STATE_BYTE(0))  ^
   672 	     TInv1(STATE_BYTE(13)) ^
   673 	     TInv2(STATE_BYTE(10)) ^
   674 	     TInv3(STATE_BYTE(7));
   675 	C1 = TInv0(STATE_BYTE(4))  ^
   676 	     TInv1(STATE_BYTE(1))  ^
   677 	     TInv2(STATE_BYTE(14)) ^
   678 	     TInv3(STATE_BYTE(11));
   679 	C2 = TInv0(STATE_BYTE(8))  ^
   680 	     TInv1(STATE_BYTE(5))  ^
   681 	     TInv2(STATE_BYTE(2))  ^
   682 	     TInv3(STATE_BYTE(15));
   683 	C3 = TInv0(STATE_BYTE(12)) ^
   684 	     TInv1(STATE_BYTE(9))  ^
   685 	     TInv2(STATE_BYTE(6))  ^
   686 	     TInv3(STATE_BYTE(3));
   687 	/* Invert the key addition step */
   688 	COLUMN_3(state) = C3 ^ *roundkeyw--;
   689 	COLUMN_2(state) = C2 ^ *roundkeyw--;
   690 	COLUMN_1(state) = C1 ^ *roundkeyw--;
   691 	COLUMN_0(state) = C0 ^ *roundkeyw--;
   692     }
   693     /* inverse sub */
   694     pOut[ 0] = SINV(STATE_BYTE( 0));
   695     pOut[ 1] = SINV(STATE_BYTE(13));
   696     pOut[ 2] = SINV(STATE_BYTE(10));
   697     pOut[ 3] = SINV(STATE_BYTE( 7));
   698     pOut[ 4] = SINV(STATE_BYTE( 4));
   699     pOut[ 5] = SINV(STATE_BYTE( 1));
   700     pOut[ 6] = SINV(STATE_BYTE(14));
   701     pOut[ 7] = SINV(STATE_BYTE(11));
   702     pOut[ 8] = SINV(STATE_BYTE( 8));
   703     pOut[ 9] = SINV(STATE_BYTE( 5));
   704     pOut[10] = SINV(STATE_BYTE( 2));
   705     pOut[11] = SINV(STATE_BYTE(15));
   706     pOut[12] = SINV(STATE_BYTE(12));
   707     pOut[13] = SINV(STATE_BYTE( 9));
   708     pOut[14] = SINV(STATE_BYTE( 6));
   709     pOut[15] = SINV(STATE_BYTE( 3));
   710     /* final key addition */
   711     *((PRUint32 *)(pOut + 12)) ^= *roundkeyw--;
   712     *((PRUint32 *)(pOut +  8)) ^= *roundkeyw--;
   713     *((PRUint32 *)(pOut +  4)) ^= *roundkeyw--;
   714     *((PRUint32 *) pOut      ) ^= *roundkeyw--;
   715 #if defined(NSS_X86_OR_X64)
   716 #undef pIn
   717 #undef pOut
   718 #else
   719     if ((ptrdiff_t)output & 0x3) {
   720 	memcpy(output, outBuf, sizeof outBuf);
   721     }
   722 #endif
   723     return SECSuccess;
   724 }
   726 /**************************************************************************
   727  *
   728  * Stuff related to general Rijndael encryption/decryption, for blocksizes
   729  * greater than 128 bits.
   730  *
   731  * XXX This code is currently untested!  So far, AES specs have only been
   732  *     released for 128 bit blocksizes.  This will be tested, but for now
   733  *     only the code above has been tested using known values.
   734  *
   735  *************************************************************************/
   737 #define COLUMN(array, j) *((PRUint32 *)(array + j))
   739 SECStatus 
   740 rijndael_encryptBlock(AESContext *cx, 
   741                       unsigned char *output,
   742                       const unsigned char *input)
   743 {
   744     return SECFailure;
   745 #ifdef rijndael_large_blocks_fixed
   746     unsigned int j, r, Nb;
   747     unsigned int c2=0, c3=0;
   748     PRUint32 *roundkeyw;
   749     PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE];
   750     Nb = cx->Nb;
   751     roundkeyw = cx->expandedKey;
   752     /* Step 1: Add Round Key 0 to initial state */
   753     for (j=0; j<4*Nb; j+=4) {
   754 	COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw++;
   755     }
   756     /* Step 2: Loop over rounds [1..NR-1] */
   757     for (r=1; r<cx->Nr; ++r) {
   758 	for (j=0; j<Nb; ++j) {
   759 	    COLUMN(output, j) = T0(STATE_BYTE(4*  j          )) ^
   760 	                        T1(STATE_BYTE(4*((j+ 1)%Nb)+1)) ^
   761 	                        T2(STATE_BYTE(4*((j+c2)%Nb)+2)) ^
   762 	                        T3(STATE_BYTE(4*((j+c3)%Nb)+3));
   763 	}
   764 	for (j=0; j<4*Nb; j+=4) {
   765 	    COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw++;
   766 	}
   767     }
   768     /* Step 3: Do the last round */
   769     /* Final round does not employ MixColumn */
   770     for (j=0; j<Nb; ++j) {
   771 	COLUMN(output, j) = ((BYTE0WORD(T2(STATE_BYTE(4* j         ))))  |
   772                              (BYTE1WORD(T3(STATE_BYTE(4*(j+ 1)%Nb)+1)))  |
   773                              (BYTE2WORD(T0(STATE_BYTE(4*(j+c2)%Nb)+2)))  |
   774                              (BYTE3WORD(T1(STATE_BYTE(4*(j+c3)%Nb)+3)))) ^
   775 	                     *roundkeyw++;
   776     }
   777     return SECSuccess;
   778 #endif
   779 }
   781 SECStatus 
   782 rijndael_decryptBlock(AESContext *cx, 
   783                       unsigned char *output,
   784                       const unsigned char *input)
   785 {
   786     return SECFailure;
   787 #ifdef rijndael_large_blocks_fixed
   788     int j, r, Nb;
   789     int c2=0, c3=0;
   790     PRUint32 *roundkeyw;
   791     PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE];
   792     Nb = cx->Nb;
   793     roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3;
   794     /* reverse key addition */
   795     for (j=4*Nb; j>=0; j-=4) {
   796 	COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw--;
   797     }
   798     /* Loop over rounds in reverse [NR..1] */
   799     for (r=cx->Nr; r>1; --r) {
   800 	/* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */
   801 	for (j=0; j<Nb; ++j) {
   802 	    COLUMN(output, 4*j) = TInv0(STATE_BYTE(4* j            )) ^
   803 	                          TInv1(STATE_BYTE(4*(j+Nb- 1)%Nb)+1) ^
   804 	                          TInv2(STATE_BYTE(4*(j+Nb-c2)%Nb)+2) ^
   805 	                          TInv3(STATE_BYTE(4*(j+Nb-c3)%Nb)+3);
   806 	}
   807 	/* Invert the key addition step */
   808 	for (j=4*Nb; j>=0; j-=4) {
   809 	    COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw--;
   810 	}
   811     }
   812     /* inverse sub */
   813     for (j=0; j<4*Nb; ++j) {
   814 	output[j] = SINV(clone[j]);
   815     }
   816     /* final key addition */
   817     for (j=4*Nb; j>=0; j-=4) {
   818 	COLUMN(output, j) ^= *roundkeyw--;
   819     }
   820     return SECSuccess;
   821 #endif
   822 }
   824 /**************************************************************************
   825  *
   826  *  Rijndael modes of operation (ECB and CBC)
   827  *
   828  *************************************************************************/
   830 static SECStatus 
   831 rijndael_encryptECB(AESContext *cx, unsigned char *output,
   832                     unsigned int *outputLen, unsigned int maxOutputLen,
   833                     const unsigned char *input, unsigned int inputLen, 
   834                     unsigned int blocksize)
   835 {
   836     SECStatus rv;
   837     AESBlockFunc *encryptor;
   839     encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) 
   840 				  ? &rijndael_encryptBlock128 
   841 				  : &rijndael_encryptBlock;
   842     while (inputLen > 0) {
   843         rv = (*encryptor)(cx, output, input);
   844 	if (rv != SECSuccess)
   845 	    return rv;
   846 	output += blocksize;
   847 	input += blocksize;
   848 	inputLen -= blocksize;
   849     }
   850     return SECSuccess;
   851 }
   853 static SECStatus 
   854 rijndael_encryptCBC(AESContext *cx, unsigned char *output,
   855                     unsigned int *outputLen, unsigned int maxOutputLen,
   856                     const unsigned char *input, unsigned int inputLen, 
   857                     unsigned int blocksize)
   858 {
   859     unsigned int j;
   860     SECStatus rv;
   861     AESBlockFunc *encryptor;
   862     unsigned char *lastblock;
   863     unsigned char inblock[RIJNDAEL_MAX_STATE_SIZE * 8];
   865     if (!inputLen)
   866 	return SECSuccess;
   867     lastblock = cx->iv;
   868     encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) 
   869 				  ? &rijndael_encryptBlock128 
   870 				  : &rijndael_encryptBlock;
   871     while (inputLen > 0) {
   872 	/* XOR with the last block (IV if first block) */
   873 	for (j=0; j<blocksize; ++j)
   874 	    inblock[j] = input[j] ^ lastblock[j];
   875 	/* encrypt */
   876         rv = (*encryptor)(cx, output, inblock);
   877 	if (rv != SECSuccess)
   878 	    return rv;
   879 	/* move to the next block */
   880 	lastblock = output;
   881 	output += blocksize;
   882 	input += blocksize;
   883 	inputLen -= blocksize;
   884     }
   885     memcpy(cx->iv, lastblock, blocksize);
   886     return SECSuccess;
   887 }
   889 static SECStatus 
   890 rijndael_decryptECB(AESContext *cx, unsigned char *output,
   891                     unsigned int *outputLen, unsigned int maxOutputLen,
   892                     const unsigned char *input, unsigned int inputLen, 
   893                     unsigned int blocksize)
   894 {
   895     SECStatus rv;
   896     AESBlockFunc *decryptor;
   898     decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) 
   899 				  ? &rijndael_decryptBlock128 
   900 				  : &rijndael_decryptBlock;
   901     while (inputLen > 0) {
   902         rv = (*decryptor)(cx, output, input);
   903 	if (rv != SECSuccess)
   904 	    return rv;
   905 	output += blocksize;
   906 	input += blocksize;
   907 	inputLen -= blocksize;
   908     }
   909     return SECSuccess;
   910 }
   912 static SECStatus 
   913 rijndael_decryptCBC(AESContext *cx, unsigned char *output,
   914                     unsigned int *outputLen, unsigned int maxOutputLen,
   915                     const unsigned char *input, unsigned int inputLen, 
   916                     unsigned int blocksize)
   917 {
   918     SECStatus rv;
   919     AESBlockFunc *decryptor;
   920     const unsigned char *in;
   921     unsigned char *out;
   922     unsigned int j;
   923     unsigned char newIV[RIJNDAEL_MAX_BLOCKSIZE];
   926     if (!inputLen) 
   927 	return SECSuccess;
   928     PORT_Assert(output - input >= 0 || input - output >= (int)inputLen );
   929     decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) 
   930                                   ? &rijndael_decryptBlock128 
   931 				  : &rijndael_decryptBlock;
   932     in  = input  + (inputLen - blocksize);
   933     memcpy(newIV, in, blocksize);
   934     out = output + (inputLen - blocksize);
   935     while (inputLen > blocksize) {
   936         rv = (*decryptor)(cx, out, in);
   937 	if (rv != SECSuccess)
   938 	    return rv;
   939 	for (j=0; j<blocksize; ++j)
   940 	    out[j] ^= in[(int)(j - blocksize)];
   941 	out -= blocksize;
   942 	in -= blocksize;
   943 	inputLen -= blocksize;
   944     }
   945     if (in == input) {
   946         rv = (*decryptor)(cx, out, in);
   947 	if (rv != SECSuccess)
   948 	    return rv;
   949 	for (j=0; j<blocksize; ++j)
   950 	    out[j] ^= cx->iv[j];
   951     }
   952     memcpy(cx->iv, newIV, blocksize);
   953     return SECSuccess;
   954 }
   956 /************************************************************************
   957  *
   958  * BLAPI Interface functions
   959  *
   960  * The following functions implement the encryption routines defined in
   961  * BLAPI for the AES cipher, Rijndael.
   962  *
   963  ***********************************************************************/
   965 AESContext * AES_AllocateContext(void)
   966 {
   967     return PORT_ZNew(AESContext);
   968 }
   971 #ifdef INTEL_GCM
   972 /*
   973  * Adapted from the example code in "How to detect New Instruction support in
   974  * the 4th generation Intel Core processor family" by Max Locktyukhin.
   975  *
   976  * XGETBV:
   977  *   Reads an extended control register (XCR) specified by ECX into EDX:EAX.
   978  */
   979 static PRBool
   980 check_xcr0_ymm()
   981 {
   982     PRUint32 xcr0;
   983 #if defined(_MSC_VER)
   984 #if defined(_M_IX86)
   985     __asm {
   986         mov ecx, 0
   987         xgetbv
   988         mov xcr0, eax
   989     }
   990 #else
   991     xcr0 = (PRUint32)_xgetbv(0);  /* Requires VS2010 SP1 or later. */
   992 #endif
   993 #else
   994     __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx");
   995 #endif
   996     /* Check if xmm and ymm state are enabled in XCR0. */
   997     return (xcr0 & 6) == 6;
   998 }
   999 #endif
  1001 /*
  1002 ** Initialize a new AES context suitable for AES encryption/decryption in
  1003 ** the ECB or CBC mode.
  1004 ** 	"mode" the mode of operation, which must be NSS_AES or NSS_AES_CBC
  1005 */
  1006 static SECStatus   
  1007 aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, 
  1008 	        const unsigned char *iv, int mode, unsigned int encrypt,
  1009 	        unsigned int blocksize)
  1011     unsigned int Nk;
  1012     /* According to Rijndael AES Proposal, section 12.1, block and key
  1013      * lengths between 128 and 256 bits are supported, as long as the
  1014      * length in bytes is divisible by 4.
  1015      */
  1016     if (key == NULL || 
  1017         keysize < RIJNDAEL_MIN_BLOCKSIZE   || 
  1018 	keysize > RIJNDAEL_MAX_BLOCKSIZE   || 
  1019 	keysize % 4 != 0 ||
  1020         blocksize < RIJNDAEL_MIN_BLOCKSIZE || 
  1021 	blocksize > RIJNDAEL_MAX_BLOCKSIZE || 
  1022 	blocksize % 4 != 0) {
  1023 	PORT_SetError(SEC_ERROR_INVALID_ARGS);
  1024 	return SECFailure;
  1026     if (mode != NSS_AES && mode != NSS_AES_CBC) {
  1027 	PORT_SetError(SEC_ERROR_INVALID_ARGS);
  1028 	return SECFailure;
  1030     if (mode == NSS_AES_CBC && iv == NULL) {
  1031 	PORT_SetError(SEC_ERROR_INVALID_ARGS);
  1032 	return SECFailure;
  1034     if (!cx) {
  1035 	PORT_SetError(SEC_ERROR_INVALID_ARGS);
  1036     	return SECFailure;
  1038 #ifdef USE_HW_AES
  1039     if (has_intel_aes == 0) {
  1040 	unsigned long eax, ebx, ecx, edx;
  1041 	char *disable_hw_aes = getenv("NSS_DISABLE_HW_AES");
  1043 	if (disable_hw_aes == NULL) {
  1044 	    freebl_cpuid(1, &eax, &ebx, &ecx, &edx);
  1045 	    has_intel_aes = (ecx & (1 << 25)) != 0 ? 1 : -1;
  1046 #ifdef INTEL_GCM
  1047 	    has_intel_clmul = (ecx & (1 << 1)) != 0 ? 1 : -1;
  1048 	    if ((ecx & (1 << 27)) != 0 && (ecx & (1 << 28)) != 0 &&
  1049 		check_xcr0_ymm()) {
  1050 		has_intel_avx = 1;
  1051 	    } else {
  1052 		has_intel_avx = -1;
  1054 #endif
  1055 	} else {
  1056 	    has_intel_aes = -1;
  1057 #ifdef INTEL_GCM
  1058 	    has_intel_avx = -1;
  1059 	    has_intel_clmul = -1;
  1060 #endif
  1063     use_hw_aes = (PRBool)
  1064 		(has_intel_aes > 0 && (keysize % 8) == 0 && blocksize == 16);
  1065 #ifdef INTEL_GCM
  1066     use_hw_gcm = (PRBool)
  1067 		(use_hw_aes && has_intel_avx>0 && has_intel_clmul>0);
  1068 #endif
  1069 #endif  /* USE_HW_AES */
  1070     /* Nb = (block size in bits) / 32 */
  1071     cx->Nb = blocksize / 4;
  1072     /* Nk = (key size in bits) / 32 */
  1073     Nk = keysize / 4;
  1074     /* Obtain number of rounds from "table" */
  1075     cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb);
  1076     /* copy in the iv, if neccessary */
  1077     if (mode == NSS_AES_CBC) {
  1078 	memcpy(cx->iv, iv, blocksize);
  1079 #ifdef USE_HW_AES
  1080 	if (use_hw_aes) {
  1081 	    cx->worker = (freeblCipherFunc)
  1082 				intel_aes_cbc_worker(encrypt, keysize);
  1083 	} else
  1084 #endif
  1086 	    cx->worker = (freeblCipherFunc) (encrypt
  1087 			  ? &rijndael_encryptCBC : &rijndael_decryptCBC);
  1089     } else {
  1090 #ifdef  USE_HW_AES
  1091 	if (use_hw_aes) {
  1092 	    cx->worker = (freeblCipherFunc) 
  1093 				intel_aes_ecb_worker(encrypt, keysize);
  1094 	} else
  1095 #endif
  1097 	    cx->worker = (freeblCipherFunc) (encrypt
  1098 			  ? &rijndael_encryptECB : &rijndael_decryptECB);
  1101     PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE);
  1102     if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) {
  1103 	PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
  1104 	goto cleanup;
  1106 #ifdef USE_HW_AES
  1107     if (use_hw_aes) {
  1108 	intel_aes_init(encrypt, keysize);
  1109     } else
  1110 #endif
  1113 #if defined(RIJNDAEL_GENERATE_TABLES) ||  \
  1114 	defined(RIJNDAEL_GENERATE_TABLES_MACRO)
  1115 	if (rijndaelTables == NULL) {
  1116 	    if (PR_CallOnce(&coRTInit, init_rijndael_tables)
  1117 	      != PR_SUCCESS) {
  1118 		return SecFailure;
  1121 #endif
  1122 	/* Generate expanded key */
  1123 	if (encrypt) {
  1124 	    if (rijndael_key_expansion(cx, key, Nk) != SECSuccess)
  1125 		goto cleanup;
  1126 	} else {
  1127 	    if (rijndael_invkey_expansion(cx, key, Nk) != SECSuccess)
  1128 		goto cleanup;
  1131     cx->worker_cx = cx;
  1132     cx->destroy = NULL;
  1133     cx->isBlock = PR_TRUE;
  1134     return SECSuccess;
  1135 cleanup:
  1136     return SECFailure;
  1139 SECStatus   
  1140 AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, 
  1141 	        const unsigned char *iv, int mode, unsigned int encrypt,
  1142 	        unsigned int blocksize)
  1144     int basemode = mode;
  1145     PRBool baseencrypt = encrypt;
  1146     SECStatus rv;
  1148     switch (mode) {
  1149     case NSS_AES_CTS:
  1150 	basemode = NSS_AES_CBC;
  1151 	break;
  1152     case NSS_AES_GCM:
  1153     case NSS_AES_CTR:
  1154 	basemode = NSS_AES;
  1155 	baseencrypt = PR_TRUE;
  1156 	break;
  1158     /* make sure enough is initializes so we can safely call Destroy */
  1159     cx->worker_cx = NULL;
  1160     cx->destroy = NULL;
  1161     rv = aes_InitContext(cx, key, keysize, iv, basemode, 
  1162 					baseencrypt, blocksize);
  1163     if (rv != SECSuccess) {
  1164 	AES_DestroyContext(cx, PR_FALSE);
  1165 	return rv;
  1168     /* finally, set up any mode specific contexts */
  1169     switch (mode) {
  1170     case NSS_AES_CTS:
  1171 	cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv, blocksize);
  1172 	cx->worker = (freeblCipherFunc) 
  1173 			(encrypt ?  CTS_EncryptUpdate : CTS_DecryptUpdate);
  1174 	cx->destroy = (freeblDestroyFunc) CTS_DestroyContext;
  1175 	cx->isBlock = PR_FALSE;
  1176 	break;
  1177     case NSS_AES_GCM:
  1178 #ifdef INTEL_GCM
  1179 	if(use_hw_gcm) {
  1180         	cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv, blocksize);
  1181 		cx->worker = (freeblCipherFunc)
  1182 			(encrypt ? intel_AES_GCM_EncryptUpdate : intel_AES_GCM_DecryptUpdate);
  1183 		cx->destroy = (freeblDestroyFunc) intel_AES_GCM_DestroyContext;
  1184 		cx->isBlock = PR_FALSE;
  1185     	} else
  1186 #endif
  1188 	cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv, blocksize);
  1189 	cx->worker = (freeblCipherFunc)
  1190 			(encrypt ? GCM_EncryptUpdate : GCM_DecryptUpdate);
  1191 	cx->destroy = (freeblDestroyFunc) GCM_DestroyContext;
  1192 	cx->isBlock = PR_FALSE;
  1194 	break;
  1195     case NSS_AES_CTR:
  1196 	cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv, blocksize);
  1197 #if defined(USE_HW_AES) && defined(_MSC_VER)
  1198 	if (use_hw_aes) {
  1199 	    cx->worker = (freeblCipherFunc) CTR_Update_HW_AES;
  1200 	} else
  1201 #endif
  1203 	    cx->worker = (freeblCipherFunc) CTR_Update;
  1205 	cx->destroy = (freeblDestroyFunc) CTR_DestroyContext;
  1206 	cx->isBlock = PR_FALSE;
  1207 	break;
  1208     default:
  1209 	/* everything has already been set up by aes_InitContext, just
  1210 	 * return */
  1211 	return SECSuccess;
  1213     /* check to see if we succeeded in getting the worker context */
  1214     if (cx->worker_cx == NULL) {
  1215 	/* no, just destroy the existing context */
  1216 	cx->destroy = NULL; /* paranoia, though you can see a dozen lines */
  1217 			    /* below that this isn't necessary */
  1218 	AES_DestroyContext(cx, PR_FALSE);
  1219 	return SECFailure;
  1221     return SECSuccess;
  1224 /* AES_CreateContext
  1226  * create a new context for Rijndael operations
  1227  */
  1228 AESContext *
  1229 AES_CreateContext(const unsigned char *key, const unsigned char *iv, 
  1230                   int mode, int encrypt,
  1231                   unsigned int keysize, unsigned int blocksize)
  1233     AESContext *cx = AES_AllocateContext();
  1234     if (cx) {
  1235 	SECStatus rv = AES_InitContext(cx, key, keysize, iv, mode, encrypt,
  1236 				       blocksize);
  1237 	if (rv != SECSuccess) {
  1238 	    AES_DestroyContext(cx, PR_TRUE);
  1239 	    cx = NULL;
  1242     return cx;
  1245 /*
  1246  * AES_DestroyContext
  1248  * Zero an AES cipher context.  If freeit is true, also free the pointer
  1249  * to the context.
  1250  */
  1251 void 
  1252 AES_DestroyContext(AESContext *cx, PRBool freeit)
  1254     if (cx->worker_cx && cx->destroy) {
  1255 	(*cx->destroy)(cx->worker_cx, PR_TRUE);
  1256 	cx->worker_cx = NULL;
  1257 	cx->destroy = NULL;
  1259     if (freeit)
  1260 	PORT_Free(cx);
  1263 /*
  1264  * AES_Encrypt
  1266  * Encrypt an arbitrary-length buffer.  The output buffer must already be
  1267  * allocated to at least inputLen.
  1268  */
  1269 SECStatus 
  1270 AES_Encrypt(AESContext *cx, unsigned char *output,
  1271             unsigned int *outputLen, unsigned int maxOutputLen,
  1272             const unsigned char *input, unsigned int inputLen)
  1274     int blocksize;
  1275     /* Check args */
  1276     if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) {
  1277 	PORT_SetError(SEC_ERROR_INVALID_ARGS);
  1278 	return SECFailure;
  1280     blocksize = 4 * cx->Nb;
  1281     if (cx->isBlock && (inputLen % blocksize != 0)) {
  1282 	PORT_SetError(SEC_ERROR_INPUT_LEN);
  1283 	return SECFailure;
  1285     if (maxOutputLen < inputLen) {
  1286 	PORT_SetError(SEC_ERROR_OUTPUT_LEN);
  1287 	return SECFailure;
  1289     *outputLen = inputLen;
  1290     return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen,	
  1291                              input, inputLen, blocksize);
  1294 /*
  1295  * AES_Decrypt
  1297  * Decrypt and arbitrary-length buffer.  The output buffer must already be
  1298  * allocated to at least inputLen.
  1299  */
  1300 SECStatus 
  1301 AES_Decrypt(AESContext *cx, unsigned char *output,
  1302             unsigned int *outputLen, unsigned int maxOutputLen,
  1303             const unsigned char *input, unsigned int inputLen)
  1305     int blocksize;
  1306     /* Check args */
  1307     if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) {
  1308 	PORT_SetError(SEC_ERROR_INVALID_ARGS);
  1309 	return SECFailure;
  1311     blocksize = 4 * cx->Nb;
  1312     if (cx->isBlock && (inputLen % blocksize != 0)) {
  1313 	PORT_SetError(SEC_ERROR_INPUT_LEN);
  1314 	return SECFailure;
  1316     if (maxOutputLen < inputLen) {
  1317 	PORT_SetError(SEC_ERROR_OUTPUT_LEN);
  1318 	return SECFailure;
  1320     *outputLen = inputLen;
  1321     return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen,	
  1322                              input, inputLen, blocksize);

mercurial