michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #ifdef FREEBL_NO_DEPEND michael@0: #include "stubs.h" michael@0: #endif michael@0: michael@0: #include "prinit.h" michael@0: #include "prerr.h" michael@0: #include "secerr.h" michael@0: michael@0: #include "prtypes.h" michael@0: #include "blapi.h" michael@0: #include "rijndael.h" michael@0: michael@0: #include "cts.h" michael@0: #include "ctr.h" michael@0: #include "gcm.h" michael@0: michael@0: #ifdef USE_HW_AES michael@0: #include "intel-aes.h" michael@0: #include "mpi.h" michael@0: michael@0: static int has_intel_aes = 0; michael@0: static PRBool use_hw_aes = PR_FALSE; michael@0: michael@0: #ifdef INTEL_GCM michael@0: #include "intel-gcm.h" michael@0: static int has_intel_avx = 0; michael@0: static int has_intel_clmul = 0; michael@0: static PRBool use_hw_gcm = PR_FALSE; michael@0: #endif michael@0: #endif /* USE_HW_AES */ michael@0: michael@0: /* michael@0: * There are currently five ways to build this code, varying in performance michael@0: * and code size. michael@0: * michael@0: * RIJNDAEL_INCLUDE_TABLES Include all tables from rijndael32.tab michael@0: * RIJNDAEL_GENERATE_TABLES Generate tables on first michael@0: * encryption/decryption, then store them; michael@0: * use the function gfm michael@0: * RIJNDAEL_GENERATE_TABLES_MACRO Same as above, but use macros to do michael@0: * the generation michael@0: * RIJNDAEL_GENERATE_VALUES Do not store tables, generate the table michael@0: * values "on-the-fly", using gfm michael@0: * RIJNDAEL_GENERATE_VALUES_MACRO Same as above, but use macros michael@0: * michael@0: * The default is RIJNDAEL_INCLUDE_TABLES. michael@0: */ michael@0: michael@0: /* michael@0: * When building RIJNDAEL_INCLUDE_TABLES, includes S**-1, Rcon, T[0..4], michael@0: * T**-1[0..4], IMXC[0..4] michael@0: * When building anything else, includes S, S**-1, Rcon michael@0: */ michael@0: #include "rijndael32.tab" michael@0: michael@0: #if defined(RIJNDAEL_INCLUDE_TABLES) michael@0: /* michael@0: * RIJNDAEL_INCLUDE_TABLES michael@0: */ michael@0: #define T0(i) _T0[i] michael@0: #define T1(i) _T1[i] michael@0: #define T2(i) _T2[i] michael@0: #define T3(i) _T3[i] michael@0: #define TInv0(i) _TInv0[i] michael@0: #define TInv1(i) _TInv1[i] michael@0: #define TInv2(i) _TInv2[i] michael@0: #define TInv3(i) _TInv3[i] michael@0: #define IMXC0(b) _IMXC0[b] michael@0: #define IMXC1(b) _IMXC1[b] michael@0: #define IMXC2(b) _IMXC2[b] michael@0: #define IMXC3(b) _IMXC3[b] michael@0: /* The S-box can be recovered from the T-tables */ michael@0: #ifdef IS_LITTLE_ENDIAN michael@0: #define SBOX(b) ((PRUint8)_T3[b]) michael@0: #else michael@0: #define SBOX(b) ((PRUint8)_T1[b]) michael@0: #endif michael@0: #define SINV(b) (_SInv[b]) michael@0: michael@0: #else /* not RIJNDAEL_INCLUDE_TABLES */ michael@0: michael@0: /* michael@0: * Code for generating T-table values. michael@0: */ michael@0: michael@0: #ifdef IS_LITTLE_ENDIAN michael@0: #define WORD4(b0, b1, b2, b3) \ michael@0: (((b3) << 24) | ((b2) << 16) | ((b1) << 8) | (b0)) michael@0: #else michael@0: #define WORD4(b0, b1, b2, b3) \ michael@0: (((b0) << 24) | ((b1) << 16) | ((b2) << 8) | (b3)) michael@0: #endif michael@0: michael@0: /* michael@0: * Define the S and S**-1 tables (both have been stored) michael@0: */ michael@0: #define SBOX(b) (_S[b]) michael@0: #define SINV(b) (_SInv[b]) michael@0: michael@0: /* michael@0: * The function xtime, used for Galois field multiplication michael@0: */ michael@0: #define XTIME(a) \ michael@0: ((a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1)) michael@0: michael@0: /* Choose GFM method (macros or function) */ michael@0: #if defined(RIJNDAEL_GENERATE_TABLES_MACRO) || \ michael@0: defined(RIJNDAEL_GENERATE_VALUES_MACRO) michael@0: michael@0: /* michael@0: * Galois field GF(2**8) multipliers, in macro form michael@0: */ michael@0: #define GFM01(a) \ michael@0: (a) /* a * 01 = a, the identity */ michael@0: #define GFM02(a) \ michael@0: (XTIME(a) & 0xff) /* a * 02 = xtime(a) */ michael@0: #define GFM04(a) \ michael@0: (GFM02(GFM02(a))) /* a * 04 = xtime**2(a) */ michael@0: #define GFM08(a) \ michael@0: (GFM02(GFM04(a))) /* a * 08 = xtime**3(a) */ michael@0: #define GFM03(a) \ michael@0: (GFM01(a) ^ GFM02(a)) /* a * 03 = a * (01 + 02) */ michael@0: #define GFM09(a) \ michael@0: (GFM01(a) ^ GFM08(a)) /* a * 09 = a * (01 + 08) */ michael@0: #define GFM0B(a) \ michael@0: (GFM01(a) ^ GFM02(a) ^ GFM08(a)) /* a * 0B = a * (01 + 02 + 08) */ michael@0: #define GFM0D(a) \ michael@0: (GFM01(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0D = a * (01 + 04 + 08) */ michael@0: #define GFM0E(a) \ michael@0: (GFM02(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0E = a * (02 + 04 + 08) */ michael@0: michael@0: #else /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_VALUES */ michael@0: michael@0: /* GF_MULTIPLY michael@0: * michael@0: * multiply two bytes represented in GF(2**8), mod (x**4 + 1) michael@0: */ michael@0: PRUint8 gfm(PRUint8 a, PRUint8 b) michael@0: { michael@0: PRUint8 res = 0; michael@0: while (b > 0) { michael@0: res = (b & 0x01) ? res ^ a : res; michael@0: a = XTIME(a); michael@0: b >>= 1; michael@0: } michael@0: return res; michael@0: } michael@0: michael@0: #define GFM01(a) \ michael@0: (a) /* a * 01 = a, the identity */ michael@0: #define GFM02(a) \ michael@0: (XTIME(a) & 0xff) /* a * 02 = xtime(a) */ michael@0: #define GFM03(a) \ michael@0: (gfm(a, 0x03)) /* a * 03 */ michael@0: #define GFM09(a) \ michael@0: (gfm(a, 0x09)) /* a * 09 */ michael@0: #define GFM0B(a) \ michael@0: (gfm(a, 0x0B)) /* a * 0B */ michael@0: #define GFM0D(a) \ michael@0: (gfm(a, 0x0D)) /* a * 0D */ michael@0: #define GFM0E(a) \ michael@0: (gfm(a, 0x0E)) /* a * 0E */ michael@0: michael@0: #endif /* choosing GFM function */ michael@0: michael@0: /* michael@0: * The T-tables michael@0: */ michael@0: #define G_T0(i) \ michael@0: ( WORD4( GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)) ) ) michael@0: #define G_T1(i) \ michael@0: ( WORD4( GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)) ) ) michael@0: #define G_T2(i) \ michael@0: ( WORD4( GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)) ) ) michael@0: #define G_T3(i) \ michael@0: ( WORD4( GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)) ) ) michael@0: michael@0: /* michael@0: * The inverse T-tables michael@0: */ michael@0: #define G_TInv0(i) \ michael@0: ( WORD4( GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)) ) ) michael@0: #define G_TInv1(i) \ michael@0: ( WORD4( GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)) ) ) michael@0: #define G_TInv2(i) \ michael@0: ( WORD4( GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)) ) ) michael@0: #define G_TInv3(i) \ michael@0: ( WORD4( GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)) ) ) michael@0: michael@0: /* michael@0: * The inverse mix column tables michael@0: */ michael@0: #define G_IMXC0(i) \ michael@0: ( WORD4( GFM0E(i), GFM09(i), GFM0D(i), GFM0B(i) ) ) michael@0: #define G_IMXC1(i) \ michael@0: ( WORD4( GFM0B(i), GFM0E(i), GFM09(i), GFM0D(i) ) ) michael@0: #define G_IMXC2(i) \ michael@0: ( WORD4( GFM0D(i), GFM0B(i), GFM0E(i), GFM09(i) ) ) michael@0: #define G_IMXC3(i) \ michael@0: ( WORD4( GFM09(i), GFM0D(i), GFM0B(i), GFM0E(i) ) ) michael@0: michael@0: /* Now choose the T-table indexing method */ michael@0: #if defined(RIJNDAEL_GENERATE_VALUES) michael@0: /* generate values for the tables with a function*/ michael@0: static PRUint32 gen_TInvXi(PRUint8 tx, PRUint8 i) michael@0: { michael@0: PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E; michael@0: si01 = SINV(i); michael@0: si02 = XTIME(si01); michael@0: si04 = XTIME(si02); michael@0: si08 = XTIME(si04); michael@0: si03 = si02 ^ si01; michael@0: si09 = si08 ^ si01; michael@0: si0B = si08 ^ si03; michael@0: si0D = si09 ^ si04; michael@0: si0E = si08 ^ si04 ^ si02; michael@0: switch (tx) { michael@0: case 0: michael@0: return WORD4(si0E, si09, si0D, si0B); michael@0: case 1: michael@0: return WORD4(si0B, si0E, si09, si0D); michael@0: case 2: michael@0: return WORD4(si0D, si0B, si0E, si09); michael@0: case 3: michael@0: return WORD4(si09, si0D, si0B, si0E); michael@0: } michael@0: return -1; michael@0: } michael@0: #define T0(i) G_T0(i) michael@0: #define T1(i) G_T1(i) michael@0: #define T2(i) G_T2(i) michael@0: #define T3(i) G_T3(i) michael@0: #define TInv0(i) gen_TInvXi(0, i) michael@0: #define TInv1(i) gen_TInvXi(1, i) michael@0: #define TInv2(i) gen_TInvXi(2, i) michael@0: #define TInv3(i) gen_TInvXi(3, i) michael@0: #define IMXC0(b) G_IMXC0(b) michael@0: #define IMXC1(b) G_IMXC1(b) michael@0: #define IMXC2(b) G_IMXC2(b) michael@0: #define IMXC3(b) G_IMXC3(b) michael@0: #elif defined(RIJNDAEL_GENERATE_VALUES_MACRO) michael@0: /* generate values for the tables with macros */ michael@0: #define T0(i) G_T0(i) michael@0: #define T1(i) G_T1(i) michael@0: #define T2(i) G_T2(i) michael@0: #define T3(i) G_T3(i) michael@0: #define TInv0(i) G_TInv0(i) michael@0: #define TInv1(i) G_TInv1(i) michael@0: #define TInv2(i) G_TInv2(i) michael@0: #define TInv3(i) G_TInv3(i) michael@0: #define IMXC0(b) G_IMXC0(b) michael@0: #define IMXC1(b) G_IMXC1(b) michael@0: #define IMXC2(b) G_IMXC2(b) michael@0: #define IMXC3(b) G_IMXC3(b) michael@0: #else /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_TABLES_MACRO */ michael@0: /* Generate T and T**-1 table values and store, then index */ michael@0: /* The inverse mix column tables are still generated */ michael@0: #define T0(i) rijndaelTables->T0[i] michael@0: #define T1(i) rijndaelTables->T1[i] michael@0: #define T2(i) rijndaelTables->T2[i] michael@0: #define T3(i) rijndaelTables->T3[i] michael@0: #define TInv0(i) rijndaelTables->TInv0[i] michael@0: #define TInv1(i) rijndaelTables->TInv1[i] michael@0: #define TInv2(i) rijndaelTables->TInv2[i] michael@0: #define TInv3(i) rijndaelTables->TInv3[i] michael@0: #define IMXC0(b) G_IMXC0(b) michael@0: #define IMXC1(b) G_IMXC1(b) michael@0: #define IMXC2(b) G_IMXC2(b) michael@0: #define IMXC3(b) G_IMXC3(b) michael@0: #endif /* choose T-table indexing method */ michael@0: michael@0: #endif /* not RIJNDAEL_INCLUDE_TABLES */ michael@0: michael@0: #if defined(RIJNDAEL_GENERATE_TABLES) || \ michael@0: defined(RIJNDAEL_GENERATE_TABLES_MACRO) michael@0: michael@0: /* Code to generate and store the tables */ michael@0: michael@0: struct rijndael_tables_str { michael@0: PRUint32 T0[256]; michael@0: PRUint32 T1[256]; michael@0: PRUint32 T2[256]; michael@0: PRUint32 T3[256]; michael@0: PRUint32 TInv0[256]; michael@0: PRUint32 TInv1[256]; michael@0: PRUint32 TInv2[256]; michael@0: PRUint32 TInv3[256]; michael@0: }; michael@0: michael@0: static struct rijndael_tables_str *rijndaelTables = NULL; michael@0: static PRCallOnceType coRTInit = { 0, 0, 0 }; michael@0: static PRStatus michael@0: init_rijndael_tables(void) michael@0: { michael@0: PRUint32 i; michael@0: PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E; michael@0: struct rijndael_tables_str *rts; michael@0: rts = (struct rijndael_tables_str *) michael@0: PORT_Alloc(sizeof(struct rijndael_tables_str)); michael@0: if (!rts) return PR_FAILURE; michael@0: for (i=0; i<256; i++) { michael@0: /* The forward values */ michael@0: si01 = SBOX(i); michael@0: si02 = XTIME(si01); michael@0: si03 = si02 ^ si01; michael@0: rts->T0[i] = WORD4(si02, si01, si01, si03); michael@0: rts->T1[i] = WORD4(si03, si02, si01, si01); michael@0: rts->T2[i] = WORD4(si01, si03, si02, si01); michael@0: rts->T3[i] = WORD4(si01, si01, si03, si02); michael@0: /* The inverse values */ michael@0: si01 = SINV(i); michael@0: si02 = XTIME(si01); michael@0: si04 = XTIME(si02); michael@0: si08 = XTIME(si04); michael@0: si03 = si02 ^ si01; michael@0: si09 = si08 ^ si01; michael@0: si0B = si08 ^ si03; michael@0: si0D = si09 ^ si04; michael@0: si0E = si08 ^ si04 ^ si02; michael@0: rts->TInv0[i] = WORD4(si0E, si09, si0D, si0B); michael@0: rts->TInv1[i] = WORD4(si0B, si0E, si09, si0D); michael@0: rts->TInv2[i] = WORD4(si0D, si0B, si0E, si09); michael@0: rts->TInv3[i] = WORD4(si09, si0D, si0B, si0E); michael@0: } michael@0: /* wait until all the values are in to set */ michael@0: rijndaelTables = rts; michael@0: return PR_SUCCESS; michael@0: } michael@0: michael@0: #endif /* code to generate tables */ michael@0: michael@0: /************************************************************************** michael@0: * michael@0: * Stuff related to the Rijndael key schedule michael@0: * michael@0: *************************************************************************/ michael@0: michael@0: #define SUBBYTE(w) \ michael@0: ((SBOX((w >> 24) & 0xff) << 24) | \ michael@0: (SBOX((w >> 16) & 0xff) << 16) | \ michael@0: (SBOX((w >> 8) & 0xff) << 8) | \ michael@0: (SBOX((w ) & 0xff) )) michael@0: michael@0: #ifdef IS_LITTLE_ENDIAN michael@0: #define ROTBYTE(b) \ michael@0: ((b >> 8) | (b << 24)) michael@0: #else michael@0: #define ROTBYTE(b) \ michael@0: ((b << 8) | (b >> 24)) michael@0: #endif michael@0: michael@0: /* rijndael_key_expansion7 michael@0: * michael@0: * Generate the expanded key from the key input by the user. michael@0: * XXX michael@0: * Nk == 7 (224 key bits) is a weird case. Since Nk > 6, an added SubByte michael@0: * transformation is done periodically. The period is every 4 bytes, and michael@0: * since 7%4 != 0 this happens at different times for each key word (unlike michael@0: * Nk == 8 where it happens twice in every key word, in the same positions). michael@0: * For now, I'm implementing this case "dumbly", w/o any unrolling. michael@0: */ michael@0: static SECStatus michael@0: rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int Nk) michael@0: { michael@0: unsigned int i; michael@0: PRUint32 *W; michael@0: PRUint32 *pW; michael@0: PRUint32 tmp; michael@0: W = cx->expandedKey; michael@0: /* 1. the first Nk words contain the cipher key */ michael@0: memcpy(W, key, Nk * 4); michael@0: i = Nk; michael@0: /* 2. loop until full expanded key is obtained */ michael@0: pW = W + i - 1; michael@0: for (; i < cx->Nb * (cx->Nr + 1); ++i) { michael@0: tmp = *pW++; michael@0: if (i % Nk == 0) michael@0: tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; michael@0: else if (i % Nk == 4) michael@0: tmp = SUBBYTE(tmp); michael@0: *pW = W[i - Nk] ^ tmp; michael@0: } michael@0: return SECSuccess; michael@0: } michael@0: michael@0: /* rijndael_key_expansion michael@0: * michael@0: * Generate the expanded key from the key input by the user. michael@0: */ michael@0: static SECStatus michael@0: rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) michael@0: { michael@0: unsigned int i; michael@0: PRUint32 *W; michael@0: PRUint32 *pW; michael@0: PRUint32 tmp; michael@0: unsigned int round_key_words = cx->Nb * (cx->Nr + 1); michael@0: if (Nk == 7) michael@0: return rijndael_key_expansion7(cx, key, Nk); michael@0: W = cx->expandedKey; michael@0: /* The first Nk words contain the input cipher key */ michael@0: memcpy(W, key, Nk * 4); michael@0: i = Nk; michael@0: pW = W + i - 1; michael@0: /* Loop over all sets of Nk words, except the last */ michael@0: while (i < round_key_words - Nk) { michael@0: tmp = *pW++; michael@0: tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; michael@0: *pW = W[i++ - Nk] ^ tmp; michael@0: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; michael@0: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; michael@0: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; michael@0: if (Nk == 4) michael@0: continue; michael@0: switch (Nk) { michael@0: case 8: tmp = *pW++; tmp = SUBBYTE(tmp); *pW = W[i++ - Nk] ^ tmp; michael@0: case 7: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; michael@0: case 6: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; michael@0: case 5: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; michael@0: } michael@0: } michael@0: /* Generate the last word */ michael@0: tmp = *pW++; michael@0: tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; michael@0: *pW = W[i++ - Nk] ^ tmp; michael@0: /* There may be overflow here, if Nk % (Nb * (Nr + 1)) > 0. However, michael@0: * since the above loop generated all but the last Nk key words, there michael@0: * is no more need for the SubByte transformation. michael@0: */ michael@0: if (Nk < 8) { michael@0: for (; i < round_key_words; ++i) { michael@0: tmp = *pW++; michael@0: *pW = W[i - Nk] ^ tmp; michael@0: } michael@0: } else { michael@0: /* except in the case when Nk == 8. Then one more SubByte may have michael@0: * to be performed, at i % Nk == 4. michael@0: */ michael@0: for (; i < round_key_words; ++i) { michael@0: tmp = *pW++; michael@0: if (i % Nk == 4) michael@0: tmp = SUBBYTE(tmp); michael@0: *pW = W[i - Nk] ^ tmp; michael@0: } michael@0: } michael@0: return SECSuccess; michael@0: } michael@0: michael@0: /* rijndael_invkey_expansion michael@0: * michael@0: * Generate the expanded key for the inverse cipher from the key input by michael@0: * the user. michael@0: */ michael@0: static SECStatus michael@0: rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) michael@0: { michael@0: unsigned int r; michael@0: PRUint32 *roundkeyw; michael@0: PRUint8 *b; michael@0: int Nb = cx->Nb; michael@0: /* begins like usual key expansion ... */ michael@0: if (rijndael_key_expansion(cx, key, Nk) != SECSuccess) michael@0: return SECFailure; michael@0: /* ... but has the additional step of InvMixColumn, michael@0: * excepting the first and last round keys. michael@0: */ michael@0: roundkeyw = cx->expandedKey + cx->Nb; michael@0: for (r=1; rNr; ++r) { michael@0: /* each key word, roundkeyw, represents a column in the key michael@0: * matrix. Each column is multiplied by the InvMixColumn matrix. michael@0: * [ 0E 0B 0D 09 ] [ b0 ] michael@0: * [ 09 0E 0B 0D ] * [ b1 ] michael@0: * [ 0D 09 0E 0B ] [ b2 ] michael@0: * [ 0B 0D 09 0E ] [ b3 ] michael@0: */ michael@0: b = (PRUint8 *)roundkeyw; michael@0: *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); michael@0: b = (PRUint8 *)roundkeyw; michael@0: *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); michael@0: b = (PRUint8 *)roundkeyw; michael@0: *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); michael@0: b = (PRUint8 *)roundkeyw; michael@0: *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); michael@0: if (Nb <= 4) michael@0: continue; michael@0: switch (Nb) { michael@0: case 8: b = (PRUint8 *)roundkeyw; michael@0: *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ michael@0: IMXC2(b[2]) ^ IMXC3(b[3]); michael@0: case 7: b = (PRUint8 *)roundkeyw; michael@0: *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ michael@0: IMXC2(b[2]) ^ IMXC3(b[3]); michael@0: case 6: b = (PRUint8 *)roundkeyw; michael@0: *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ michael@0: IMXC2(b[2]) ^ IMXC3(b[3]); michael@0: case 5: b = (PRUint8 *)roundkeyw; michael@0: *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ michael@0: IMXC2(b[2]) ^ IMXC3(b[3]); michael@0: } michael@0: } michael@0: return SECSuccess; michael@0: } michael@0: /************************************************************************** michael@0: * michael@0: * Stuff related to Rijndael encryption/decryption, optimized for michael@0: * a 128-bit blocksize. michael@0: * michael@0: *************************************************************************/ michael@0: michael@0: #ifdef IS_LITTLE_ENDIAN michael@0: #define BYTE0WORD(w) ((w) & 0x000000ff) michael@0: #define BYTE1WORD(w) ((w) & 0x0000ff00) michael@0: #define BYTE2WORD(w) ((w) & 0x00ff0000) michael@0: #define BYTE3WORD(w) ((w) & 0xff000000) michael@0: #else michael@0: #define BYTE0WORD(w) ((w) & 0xff000000) michael@0: #define BYTE1WORD(w) ((w) & 0x00ff0000) michael@0: #define BYTE2WORD(w) ((w) & 0x0000ff00) michael@0: #define BYTE3WORD(w) ((w) & 0x000000ff) michael@0: #endif michael@0: michael@0: typedef union { michael@0: PRUint32 w[4]; michael@0: PRUint8 b[16]; michael@0: } rijndael_state; michael@0: michael@0: #define COLUMN_0(state) state.w[0] michael@0: #define COLUMN_1(state) state.w[1] michael@0: #define COLUMN_2(state) state.w[2] michael@0: #define COLUMN_3(state) state.w[3] michael@0: michael@0: #define STATE_BYTE(i) state.b[i] michael@0: michael@0: static SECStatus michael@0: rijndael_encryptBlock128(AESContext *cx, michael@0: unsigned char *output, michael@0: const unsigned char *input) michael@0: { michael@0: unsigned int r; michael@0: PRUint32 *roundkeyw; michael@0: rijndael_state state; michael@0: PRUint32 C0, C1, C2, C3; michael@0: #if defined(NSS_X86_OR_X64) michael@0: #define pIn input michael@0: #define pOut output michael@0: #else michael@0: unsigned char *pIn, *pOut; michael@0: PRUint32 inBuf[4], outBuf[4]; michael@0: michael@0: if ((ptrdiff_t)input & 0x3) { michael@0: memcpy(inBuf, input, sizeof inBuf); michael@0: pIn = (unsigned char *)inBuf; michael@0: } else { michael@0: pIn = (unsigned char *)input; michael@0: } michael@0: if ((ptrdiff_t)output & 0x3) { michael@0: pOut = (unsigned char *)outBuf; michael@0: } else { michael@0: pOut = (unsigned char *)output; michael@0: } michael@0: #endif michael@0: roundkeyw = cx->expandedKey; michael@0: /* Step 1: Add Round Key 0 to initial state */ michael@0: COLUMN_0(state) = *((PRUint32 *)(pIn )) ^ *roundkeyw++; michael@0: COLUMN_1(state) = *((PRUint32 *)(pIn + 4 )) ^ *roundkeyw++; michael@0: COLUMN_2(state) = *((PRUint32 *)(pIn + 8 )) ^ *roundkeyw++; michael@0: COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw++; michael@0: /* Step 2: Loop over rounds [1..NR-1] */ michael@0: for (r=1; rNr; ++r) { michael@0: /* Do ShiftRow, ByteSub, and MixColumn all at once */ michael@0: C0 = T0(STATE_BYTE(0)) ^ michael@0: T1(STATE_BYTE(5)) ^ michael@0: T2(STATE_BYTE(10)) ^ michael@0: T3(STATE_BYTE(15)); michael@0: C1 = T0(STATE_BYTE(4)) ^ michael@0: T1(STATE_BYTE(9)) ^ michael@0: T2(STATE_BYTE(14)) ^ michael@0: T3(STATE_BYTE(3)); michael@0: C2 = T0(STATE_BYTE(8)) ^ michael@0: T1(STATE_BYTE(13)) ^ michael@0: T2(STATE_BYTE(2)) ^ michael@0: T3(STATE_BYTE(7)); michael@0: C3 = T0(STATE_BYTE(12)) ^ michael@0: T1(STATE_BYTE(1)) ^ michael@0: T2(STATE_BYTE(6)) ^ michael@0: T3(STATE_BYTE(11)); michael@0: /* Round key addition */ michael@0: COLUMN_0(state) = C0 ^ *roundkeyw++; michael@0: COLUMN_1(state) = C1 ^ *roundkeyw++; michael@0: COLUMN_2(state) = C2 ^ *roundkeyw++; michael@0: COLUMN_3(state) = C3 ^ *roundkeyw++; michael@0: } michael@0: /* Step 3: Do the last round */ michael@0: /* Final round does not employ MixColumn */ michael@0: C0 = ((BYTE0WORD(T2(STATE_BYTE(0)))) | michael@0: (BYTE1WORD(T3(STATE_BYTE(5)))) | michael@0: (BYTE2WORD(T0(STATE_BYTE(10)))) | michael@0: (BYTE3WORD(T1(STATE_BYTE(15))))) ^ michael@0: *roundkeyw++; michael@0: C1 = ((BYTE0WORD(T2(STATE_BYTE(4)))) | michael@0: (BYTE1WORD(T3(STATE_BYTE(9)))) | michael@0: (BYTE2WORD(T0(STATE_BYTE(14)))) | michael@0: (BYTE3WORD(T1(STATE_BYTE(3))))) ^ michael@0: *roundkeyw++; michael@0: C2 = ((BYTE0WORD(T2(STATE_BYTE(8)))) | michael@0: (BYTE1WORD(T3(STATE_BYTE(13)))) | michael@0: (BYTE2WORD(T0(STATE_BYTE(2)))) | michael@0: (BYTE3WORD(T1(STATE_BYTE(7))))) ^ michael@0: *roundkeyw++; michael@0: C3 = ((BYTE0WORD(T2(STATE_BYTE(12)))) | michael@0: (BYTE1WORD(T3(STATE_BYTE(1)))) | michael@0: (BYTE2WORD(T0(STATE_BYTE(6)))) | michael@0: (BYTE3WORD(T1(STATE_BYTE(11))))) ^ michael@0: *roundkeyw++; michael@0: *((PRUint32 *) pOut ) = C0; michael@0: *((PRUint32 *)(pOut + 4)) = C1; michael@0: *((PRUint32 *)(pOut + 8)) = C2; michael@0: *((PRUint32 *)(pOut + 12)) = C3; michael@0: #if defined(NSS_X86_OR_X64) michael@0: #undef pIn michael@0: #undef pOut michael@0: #else michael@0: if ((ptrdiff_t)output & 0x3) { michael@0: memcpy(output, outBuf, sizeof outBuf); michael@0: } michael@0: #endif michael@0: return SECSuccess; michael@0: } michael@0: michael@0: static SECStatus michael@0: rijndael_decryptBlock128(AESContext *cx, michael@0: unsigned char *output, michael@0: const unsigned char *input) michael@0: { michael@0: int r; michael@0: PRUint32 *roundkeyw; michael@0: rijndael_state state; michael@0: PRUint32 C0, C1, C2, C3; michael@0: #if defined(NSS_X86_OR_X64) michael@0: #define pIn input michael@0: #define pOut output michael@0: #else michael@0: unsigned char *pIn, *pOut; michael@0: PRUint32 inBuf[4], outBuf[4]; michael@0: michael@0: if ((ptrdiff_t)input & 0x3) { michael@0: memcpy(inBuf, input, sizeof inBuf); michael@0: pIn = (unsigned char *)inBuf; michael@0: } else { michael@0: pIn = (unsigned char *)input; michael@0: } michael@0: if ((ptrdiff_t)output & 0x3) { michael@0: pOut = (unsigned char *)outBuf; michael@0: } else { michael@0: pOut = (unsigned char *)output; michael@0: } michael@0: #endif michael@0: roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3; michael@0: /* reverse the final key addition */ michael@0: COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw--; michael@0: COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw--; michael@0: COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw--; michael@0: COLUMN_0(state) = *((PRUint32 *)(pIn )) ^ *roundkeyw--; michael@0: /* Loop over rounds in reverse [NR..1] */ michael@0: for (r=cx->Nr; r>1; --r) { michael@0: /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */ michael@0: C0 = TInv0(STATE_BYTE(0)) ^ michael@0: TInv1(STATE_BYTE(13)) ^ michael@0: TInv2(STATE_BYTE(10)) ^ michael@0: TInv3(STATE_BYTE(7)); michael@0: C1 = TInv0(STATE_BYTE(4)) ^ michael@0: TInv1(STATE_BYTE(1)) ^ michael@0: TInv2(STATE_BYTE(14)) ^ michael@0: TInv3(STATE_BYTE(11)); michael@0: C2 = TInv0(STATE_BYTE(8)) ^ michael@0: TInv1(STATE_BYTE(5)) ^ michael@0: TInv2(STATE_BYTE(2)) ^ michael@0: TInv3(STATE_BYTE(15)); michael@0: C3 = TInv0(STATE_BYTE(12)) ^ michael@0: TInv1(STATE_BYTE(9)) ^ michael@0: TInv2(STATE_BYTE(6)) ^ michael@0: TInv3(STATE_BYTE(3)); michael@0: /* Invert the key addition step */ michael@0: COLUMN_3(state) = C3 ^ *roundkeyw--; michael@0: COLUMN_2(state) = C2 ^ *roundkeyw--; michael@0: COLUMN_1(state) = C1 ^ *roundkeyw--; michael@0: COLUMN_0(state) = C0 ^ *roundkeyw--; michael@0: } michael@0: /* inverse sub */ michael@0: pOut[ 0] = SINV(STATE_BYTE( 0)); michael@0: pOut[ 1] = SINV(STATE_BYTE(13)); michael@0: pOut[ 2] = SINV(STATE_BYTE(10)); michael@0: pOut[ 3] = SINV(STATE_BYTE( 7)); michael@0: pOut[ 4] = SINV(STATE_BYTE( 4)); michael@0: pOut[ 5] = SINV(STATE_BYTE( 1)); michael@0: pOut[ 6] = SINV(STATE_BYTE(14)); michael@0: pOut[ 7] = SINV(STATE_BYTE(11)); michael@0: pOut[ 8] = SINV(STATE_BYTE( 8)); michael@0: pOut[ 9] = SINV(STATE_BYTE( 5)); michael@0: pOut[10] = SINV(STATE_BYTE( 2)); michael@0: pOut[11] = SINV(STATE_BYTE(15)); michael@0: pOut[12] = SINV(STATE_BYTE(12)); michael@0: pOut[13] = SINV(STATE_BYTE( 9)); michael@0: pOut[14] = SINV(STATE_BYTE( 6)); michael@0: pOut[15] = SINV(STATE_BYTE( 3)); michael@0: /* final key addition */ michael@0: *((PRUint32 *)(pOut + 12)) ^= *roundkeyw--; michael@0: *((PRUint32 *)(pOut + 8)) ^= *roundkeyw--; michael@0: *((PRUint32 *)(pOut + 4)) ^= *roundkeyw--; michael@0: *((PRUint32 *) pOut ) ^= *roundkeyw--; michael@0: #if defined(NSS_X86_OR_X64) michael@0: #undef pIn michael@0: #undef pOut michael@0: #else michael@0: if ((ptrdiff_t)output & 0x3) { michael@0: memcpy(output, outBuf, sizeof outBuf); michael@0: } michael@0: #endif michael@0: return SECSuccess; michael@0: } michael@0: michael@0: /************************************************************************** michael@0: * michael@0: * Stuff related to general Rijndael encryption/decryption, for blocksizes michael@0: * greater than 128 bits. michael@0: * michael@0: * XXX This code is currently untested! So far, AES specs have only been michael@0: * released for 128 bit blocksizes. This will be tested, but for now michael@0: * only the code above has been tested using known values. michael@0: * michael@0: *************************************************************************/ michael@0: michael@0: #define COLUMN(array, j) *((PRUint32 *)(array + j)) michael@0: michael@0: SECStatus michael@0: rijndael_encryptBlock(AESContext *cx, michael@0: unsigned char *output, michael@0: const unsigned char *input) michael@0: { michael@0: return SECFailure; michael@0: #ifdef rijndael_large_blocks_fixed michael@0: unsigned int j, r, Nb; michael@0: unsigned int c2=0, c3=0; michael@0: PRUint32 *roundkeyw; michael@0: PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE]; michael@0: Nb = cx->Nb; michael@0: roundkeyw = cx->expandedKey; michael@0: /* Step 1: Add Round Key 0 to initial state */ michael@0: for (j=0; j<4*Nb; j+=4) { michael@0: COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw++; michael@0: } michael@0: /* Step 2: Loop over rounds [1..NR-1] */ michael@0: for (r=1; rNr; ++r) { michael@0: for (j=0; jNb; michael@0: roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3; michael@0: /* reverse key addition */ michael@0: for (j=4*Nb; j>=0; j-=4) { michael@0: COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw--; michael@0: } michael@0: /* Loop over rounds in reverse [NR..1] */ michael@0: for (r=cx->Nr; r>1; --r) { michael@0: /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */ michael@0: for (j=0; j=0; j-=4) { michael@0: COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw--; michael@0: } michael@0: } michael@0: /* inverse sub */ michael@0: for (j=0; j<4*Nb; ++j) { michael@0: output[j] = SINV(clone[j]); michael@0: } michael@0: /* final key addition */ michael@0: for (j=4*Nb; j>=0; j-=4) { michael@0: COLUMN(output, j) ^= *roundkeyw--; michael@0: } michael@0: return SECSuccess; michael@0: #endif michael@0: } michael@0: michael@0: /************************************************************************** michael@0: * michael@0: * Rijndael modes of operation (ECB and CBC) michael@0: * michael@0: *************************************************************************/ michael@0: michael@0: static SECStatus michael@0: rijndael_encryptECB(AESContext *cx, unsigned char *output, michael@0: unsigned int *outputLen, unsigned int maxOutputLen, michael@0: const unsigned char *input, unsigned int inputLen, michael@0: unsigned int blocksize) michael@0: { michael@0: SECStatus rv; michael@0: AESBlockFunc *encryptor; michael@0: michael@0: encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) michael@0: ? &rijndael_encryptBlock128 michael@0: : &rijndael_encryptBlock; michael@0: while (inputLen > 0) { michael@0: rv = (*encryptor)(cx, output, input); michael@0: if (rv != SECSuccess) michael@0: return rv; michael@0: output += blocksize; michael@0: input += blocksize; michael@0: inputLen -= blocksize; michael@0: } michael@0: return SECSuccess; michael@0: } michael@0: michael@0: static SECStatus michael@0: rijndael_encryptCBC(AESContext *cx, unsigned char *output, michael@0: unsigned int *outputLen, unsigned int maxOutputLen, michael@0: const unsigned char *input, unsigned int inputLen, michael@0: unsigned int blocksize) michael@0: { michael@0: unsigned int j; michael@0: SECStatus rv; michael@0: AESBlockFunc *encryptor; michael@0: unsigned char *lastblock; michael@0: unsigned char inblock[RIJNDAEL_MAX_STATE_SIZE * 8]; michael@0: michael@0: if (!inputLen) michael@0: return SECSuccess; michael@0: lastblock = cx->iv; michael@0: encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) michael@0: ? &rijndael_encryptBlock128 michael@0: : &rijndael_encryptBlock; michael@0: while (inputLen > 0) { michael@0: /* XOR with the last block (IV if first block) */ michael@0: for (j=0; jiv, lastblock, blocksize); michael@0: return SECSuccess; michael@0: } michael@0: michael@0: static SECStatus michael@0: rijndael_decryptECB(AESContext *cx, unsigned char *output, michael@0: unsigned int *outputLen, unsigned int maxOutputLen, michael@0: const unsigned char *input, unsigned int inputLen, michael@0: unsigned int blocksize) michael@0: { michael@0: SECStatus rv; michael@0: AESBlockFunc *decryptor; michael@0: michael@0: decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) michael@0: ? &rijndael_decryptBlock128 michael@0: : &rijndael_decryptBlock; michael@0: while (inputLen > 0) { michael@0: rv = (*decryptor)(cx, output, input); michael@0: if (rv != SECSuccess) michael@0: return rv; michael@0: output += blocksize; michael@0: input += blocksize; michael@0: inputLen -= blocksize; michael@0: } michael@0: return SECSuccess; michael@0: } michael@0: michael@0: static SECStatus michael@0: rijndael_decryptCBC(AESContext *cx, unsigned char *output, michael@0: unsigned int *outputLen, unsigned int maxOutputLen, michael@0: const unsigned char *input, unsigned int inputLen, michael@0: unsigned int blocksize) michael@0: { michael@0: SECStatus rv; michael@0: AESBlockFunc *decryptor; michael@0: const unsigned char *in; michael@0: unsigned char *out; michael@0: unsigned int j; michael@0: unsigned char newIV[RIJNDAEL_MAX_BLOCKSIZE]; michael@0: michael@0: michael@0: if (!inputLen) michael@0: return SECSuccess; michael@0: PORT_Assert(output - input >= 0 || input - output >= (int)inputLen ); michael@0: decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) michael@0: ? &rijndael_decryptBlock128 michael@0: : &rijndael_decryptBlock; michael@0: in = input + (inputLen - blocksize); michael@0: memcpy(newIV, in, blocksize); michael@0: out = output + (inputLen - blocksize); michael@0: while (inputLen > blocksize) { michael@0: rv = (*decryptor)(cx, out, in); michael@0: if (rv != SECSuccess) michael@0: return rv; michael@0: for (j=0; jiv[j]; michael@0: } michael@0: memcpy(cx->iv, newIV, blocksize); michael@0: return SECSuccess; michael@0: } michael@0: michael@0: /************************************************************************ michael@0: * michael@0: * BLAPI Interface functions michael@0: * michael@0: * The following functions implement the encryption routines defined in michael@0: * BLAPI for the AES cipher, Rijndael. michael@0: * michael@0: ***********************************************************************/ michael@0: michael@0: AESContext * AES_AllocateContext(void) michael@0: { michael@0: return PORT_ZNew(AESContext); michael@0: } michael@0: michael@0: michael@0: #ifdef INTEL_GCM michael@0: /* michael@0: * Adapted from the example code in "How to detect New Instruction support in michael@0: * the 4th generation Intel Core processor family" by Max Locktyukhin. michael@0: * michael@0: * XGETBV: michael@0: * Reads an extended control register (XCR) specified by ECX into EDX:EAX. michael@0: */ michael@0: static PRBool michael@0: check_xcr0_ymm() michael@0: { michael@0: PRUint32 xcr0; michael@0: #if defined(_MSC_VER) michael@0: #if defined(_M_IX86) michael@0: __asm { michael@0: mov ecx, 0 michael@0: xgetbv michael@0: mov xcr0, eax michael@0: } michael@0: #else michael@0: xcr0 = (PRUint32)_xgetbv(0); /* Requires VS2010 SP1 or later. */ michael@0: #endif michael@0: #else michael@0: __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx"); michael@0: #endif michael@0: /* Check if xmm and ymm state are enabled in XCR0. */ michael@0: return (xcr0 & 6) == 6; michael@0: } michael@0: #endif michael@0: michael@0: /* michael@0: ** Initialize a new AES context suitable for AES encryption/decryption in michael@0: ** the ECB or CBC mode. michael@0: ** "mode" the mode of operation, which must be NSS_AES or NSS_AES_CBC michael@0: */ michael@0: static SECStatus michael@0: aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, michael@0: const unsigned char *iv, int mode, unsigned int encrypt, michael@0: unsigned int blocksize) michael@0: { michael@0: unsigned int Nk; michael@0: /* According to Rijndael AES Proposal, section 12.1, block and key michael@0: * lengths between 128 and 256 bits are supported, as long as the michael@0: * length in bytes is divisible by 4. michael@0: */ michael@0: if (key == NULL || michael@0: keysize < RIJNDAEL_MIN_BLOCKSIZE || michael@0: keysize > RIJNDAEL_MAX_BLOCKSIZE || michael@0: keysize % 4 != 0 || michael@0: blocksize < RIJNDAEL_MIN_BLOCKSIZE || michael@0: blocksize > RIJNDAEL_MAX_BLOCKSIZE || michael@0: blocksize % 4 != 0) { michael@0: PORT_SetError(SEC_ERROR_INVALID_ARGS); michael@0: return SECFailure; michael@0: } michael@0: if (mode != NSS_AES && mode != NSS_AES_CBC) { michael@0: PORT_SetError(SEC_ERROR_INVALID_ARGS); michael@0: return SECFailure; michael@0: } michael@0: if (mode == NSS_AES_CBC && iv == NULL) { michael@0: PORT_SetError(SEC_ERROR_INVALID_ARGS); michael@0: return SECFailure; michael@0: } michael@0: if (!cx) { michael@0: PORT_SetError(SEC_ERROR_INVALID_ARGS); michael@0: return SECFailure; michael@0: } michael@0: #ifdef USE_HW_AES michael@0: if (has_intel_aes == 0) { michael@0: unsigned long eax, ebx, ecx, edx; michael@0: char *disable_hw_aes = getenv("NSS_DISABLE_HW_AES"); michael@0: michael@0: if (disable_hw_aes == NULL) { michael@0: freebl_cpuid(1, &eax, &ebx, &ecx, &edx); michael@0: has_intel_aes = (ecx & (1 << 25)) != 0 ? 1 : -1; michael@0: #ifdef INTEL_GCM michael@0: has_intel_clmul = (ecx & (1 << 1)) != 0 ? 1 : -1; michael@0: if ((ecx & (1 << 27)) != 0 && (ecx & (1 << 28)) != 0 && michael@0: check_xcr0_ymm()) { michael@0: has_intel_avx = 1; michael@0: } else { michael@0: has_intel_avx = -1; michael@0: } michael@0: #endif michael@0: } else { michael@0: has_intel_aes = -1; michael@0: #ifdef INTEL_GCM michael@0: has_intel_avx = -1; michael@0: has_intel_clmul = -1; michael@0: #endif michael@0: } michael@0: } michael@0: use_hw_aes = (PRBool) michael@0: (has_intel_aes > 0 && (keysize % 8) == 0 && blocksize == 16); michael@0: #ifdef INTEL_GCM michael@0: use_hw_gcm = (PRBool) michael@0: (use_hw_aes && has_intel_avx>0 && has_intel_clmul>0); michael@0: #endif michael@0: #endif /* USE_HW_AES */ michael@0: /* Nb = (block size in bits) / 32 */ michael@0: cx->Nb = blocksize / 4; michael@0: /* Nk = (key size in bits) / 32 */ michael@0: Nk = keysize / 4; michael@0: /* Obtain number of rounds from "table" */ michael@0: cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb); michael@0: /* copy in the iv, if neccessary */ michael@0: if (mode == NSS_AES_CBC) { michael@0: memcpy(cx->iv, iv, blocksize); michael@0: #ifdef USE_HW_AES michael@0: if (use_hw_aes) { michael@0: cx->worker = (freeblCipherFunc) michael@0: intel_aes_cbc_worker(encrypt, keysize); michael@0: } else michael@0: #endif michael@0: { michael@0: cx->worker = (freeblCipherFunc) (encrypt michael@0: ? &rijndael_encryptCBC : &rijndael_decryptCBC); michael@0: } michael@0: } else { michael@0: #ifdef USE_HW_AES michael@0: if (use_hw_aes) { michael@0: cx->worker = (freeblCipherFunc) michael@0: intel_aes_ecb_worker(encrypt, keysize); michael@0: } else michael@0: #endif michael@0: { michael@0: cx->worker = (freeblCipherFunc) (encrypt michael@0: ? &rijndael_encryptECB : &rijndael_decryptECB); michael@0: } michael@0: } michael@0: PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE); michael@0: if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) { michael@0: PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); michael@0: goto cleanup; michael@0: } michael@0: #ifdef USE_HW_AES michael@0: if (use_hw_aes) { michael@0: intel_aes_init(encrypt, keysize); michael@0: } else michael@0: #endif michael@0: { michael@0: michael@0: #if defined(RIJNDAEL_GENERATE_TABLES) || \ michael@0: defined(RIJNDAEL_GENERATE_TABLES_MACRO) michael@0: if (rijndaelTables == NULL) { michael@0: if (PR_CallOnce(&coRTInit, init_rijndael_tables) michael@0: != PR_SUCCESS) { michael@0: return SecFailure; michael@0: } michael@0: } michael@0: #endif michael@0: /* Generate expanded key */ michael@0: if (encrypt) { michael@0: if (rijndael_key_expansion(cx, key, Nk) != SECSuccess) michael@0: goto cleanup; michael@0: } else { michael@0: if (rijndael_invkey_expansion(cx, key, Nk) != SECSuccess) michael@0: goto cleanup; michael@0: } michael@0: } michael@0: cx->worker_cx = cx; michael@0: cx->destroy = NULL; michael@0: cx->isBlock = PR_TRUE; michael@0: return SECSuccess; michael@0: cleanup: michael@0: return SECFailure; michael@0: } michael@0: michael@0: SECStatus michael@0: AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, michael@0: const unsigned char *iv, int mode, unsigned int encrypt, michael@0: unsigned int blocksize) michael@0: { michael@0: int basemode = mode; michael@0: PRBool baseencrypt = encrypt; michael@0: SECStatus rv; michael@0: michael@0: switch (mode) { michael@0: case NSS_AES_CTS: michael@0: basemode = NSS_AES_CBC; michael@0: break; michael@0: case NSS_AES_GCM: michael@0: case NSS_AES_CTR: michael@0: basemode = NSS_AES; michael@0: baseencrypt = PR_TRUE; michael@0: break; michael@0: } michael@0: /* make sure enough is initializes so we can safely call Destroy */ michael@0: cx->worker_cx = NULL; michael@0: cx->destroy = NULL; michael@0: rv = aes_InitContext(cx, key, keysize, iv, basemode, michael@0: baseencrypt, blocksize); michael@0: if (rv != SECSuccess) { michael@0: AES_DestroyContext(cx, PR_FALSE); michael@0: return rv; michael@0: } michael@0: michael@0: /* finally, set up any mode specific contexts */ michael@0: switch (mode) { michael@0: case NSS_AES_CTS: michael@0: cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv, blocksize); michael@0: cx->worker = (freeblCipherFunc) michael@0: (encrypt ? CTS_EncryptUpdate : CTS_DecryptUpdate); michael@0: cx->destroy = (freeblDestroyFunc) CTS_DestroyContext; michael@0: cx->isBlock = PR_FALSE; michael@0: break; michael@0: case NSS_AES_GCM: michael@0: #ifdef INTEL_GCM michael@0: if(use_hw_gcm) { michael@0: cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv, blocksize); michael@0: cx->worker = (freeblCipherFunc) michael@0: (encrypt ? intel_AES_GCM_EncryptUpdate : intel_AES_GCM_DecryptUpdate); michael@0: cx->destroy = (freeblDestroyFunc) intel_AES_GCM_DestroyContext; michael@0: cx->isBlock = PR_FALSE; michael@0: } else michael@0: #endif michael@0: { michael@0: cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv, blocksize); michael@0: cx->worker = (freeblCipherFunc) michael@0: (encrypt ? GCM_EncryptUpdate : GCM_DecryptUpdate); michael@0: cx->destroy = (freeblDestroyFunc) GCM_DestroyContext; michael@0: cx->isBlock = PR_FALSE; michael@0: } michael@0: break; michael@0: case NSS_AES_CTR: michael@0: cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv, blocksize); michael@0: #if defined(USE_HW_AES) && defined(_MSC_VER) michael@0: if (use_hw_aes) { michael@0: cx->worker = (freeblCipherFunc) CTR_Update_HW_AES; michael@0: } else michael@0: #endif michael@0: { michael@0: cx->worker = (freeblCipherFunc) CTR_Update; michael@0: } michael@0: cx->destroy = (freeblDestroyFunc) CTR_DestroyContext; michael@0: cx->isBlock = PR_FALSE; michael@0: break; michael@0: default: michael@0: /* everything has already been set up by aes_InitContext, just michael@0: * return */ michael@0: return SECSuccess; michael@0: } michael@0: /* check to see if we succeeded in getting the worker context */ michael@0: if (cx->worker_cx == NULL) { michael@0: /* no, just destroy the existing context */ michael@0: cx->destroy = NULL; /* paranoia, though you can see a dozen lines */ michael@0: /* below that this isn't necessary */ michael@0: AES_DestroyContext(cx, PR_FALSE); michael@0: return SECFailure; michael@0: } michael@0: return SECSuccess; michael@0: } michael@0: michael@0: /* AES_CreateContext michael@0: * michael@0: * create a new context for Rijndael operations michael@0: */ michael@0: AESContext * michael@0: AES_CreateContext(const unsigned char *key, const unsigned char *iv, michael@0: int mode, int encrypt, michael@0: unsigned int keysize, unsigned int blocksize) michael@0: { michael@0: AESContext *cx = AES_AllocateContext(); michael@0: if (cx) { michael@0: SECStatus rv = AES_InitContext(cx, key, keysize, iv, mode, encrypt, michael@0: blocksize); michael@0: if (rv != SECSuccess) { michael@0: AES_DestroyContext(cx, PR_TRUE); michael@0: cx = NULL; michael@0: } michael@0: } michael@0: return cx; michael@0: } michael@0: michael@0: /* michael@0: * AES_DestroyContext michael@0: * michael@0: * Zero an AES cipher context. If freeit is true, also free the pointer michael@0: * to the context. michael@0: */ michael@0: void michael@0: AES_DestroyContext(AESContext *cx, PRBool freeit) michael@0: { michael@0: if (cx->worker_cx && cx->destroy) { michael@0: (*cx->destroy)(cx->worker_cx, PR_TRUE); michael@0: cx->worker_cx = NULL; michael@0: cx->destroy = NULL; michael@0: } michael@0: if (freeit) michael@0: PORT_Free(cx); michael@0: } michael@0: michael@0: /* michael@0: * AES_Encrypt michael@0: * michael@0: * Encrypt an arbitrary-length buffer. The output buffer must already be michael@0: * allocated to at least inputLen. michael@0: */ michael@0: SECStatus michael@0: AES_Encrypt(AESContext *cx, unsigned char *output, michael@0: unsigned int *outputLen, unsigned int maxOutputLen, michael@0: const unsigned char *input, unsigned int inputLen) michael@0: { michael@0: int blocksize; michael@0: /* Check args */ michael@0: if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { michael@0: PORT_SetError(SEC_ERROR_INVALID_ARGS); michael@0: return SECFailure; michael@0: } michael@0: blocksize = 4 * cx->Nb; michael@0: if (cx->isBlock && (inputLen % blocksize != 0)) { michael@0: PORT_SetError(SEC_ERROR_INPUT_LEN); michael@0: return SECFailure; michael@0: } michael@0: if (maxOutputLen < inputLen) { michael@0: PORT_SetError(SEC_ERROR_OUTPUT_LEN); michael@0: return SECFailure; michael@0: } michael@0: *outputLen = inputLen; michael@0: return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, michael@0: input, inputLen, blocksize); michael@0: } michael@0: michael@0: /* michael@0: * AES_Decrypt michael@0: * michael@0: * Decrypt and arbitrary-length buffer. The output buffer must already be michael@0: * allocated to at least inputLen. michael@0: */ michael@0: SECStatus michael@0: AES_Decrypt(AESContext *cx, unsigned char *output, michael@0: unsigned int *outputLen, unsigned int maxOutputLen, michael@0: const unsigned char *input, unsigned int inputLen) michael@0: { michael@0: int blocksize; michael@0: /* Check args */ michael@0: if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { michael@0: PORT_SetError(SEC_ERROR_INVALID_ARGS); michael@0: return SECFailure; michael@0: } michael@0: blocksize = 4 * cx->Nb; michael@0: if (cx->isBlock && (inputLen % blocksize != 0)) { michael@0: PORT_SetError(SEC_ERROR_INPUT_LEN); michael@0: return SECFailure; michael@0: } michael@0: if (maxOutputLen < inputLen) { michael@0: PORT_SetError(SEC_ERROR_OUTPUT_LEN); michael@0: return SECFailure; michael@0: } michael@0: *outputLen = inputLen; michael@0: return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, michael@0: input, inputLen, blocksize); michael@0: }