1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/security/nss/lib/freebl/rijndael.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1323 @@ 1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.7 + 1.8 +#ifdef FREEBL_NO_DEPEND 1.9 +#include "stubs.h" 1.10 +#endif 1.11 + 1.12 +#include "prinit.h" 1.13 +#include "prerr.h" 1.14 +#include "secerr.h" 1.15 + 1.16 +#include "prtypes.h" 1.17 +#include "blapi.h" 1.18 +#include "rijndael.h" 1.19 + 1.20 +#include "cts.h" 1.21 +#include "ctr.h" 1.22 +#include "gcm.h" 1.23 + 1.24 +#ifdef USE_HW_AES 1.25 +#include "intel-aes.h" 1.26 +#include "mpi.h" 1.27 + 1.28 +static int has_intel_aes = 0; 1.29 +static PRBool use_hw_aes = PR_FALSE; 1.30 + 1.31 +#ifdef INTEL_GCM 1.32 +#include "intel-gcm.h" 1.33 +static int has_intel_avx = 0; 1.34 +static int has_intel_clmul = 0; 1.35 +static PRBool use_hw_gcm = PR_FALSE; 1.36 +#endif 1.37 +#endif /* USE_HW_AES */ 1.38 + 1.39 +/* 1.40 + * There are currently five ways to build this code, varying in performance 1.41 + * and code size. 1.42 + * 1.43 + * RIJNDAEL_INCLUDE_TABLES Include all tables from rijndael32.tab 1.44 + * RIJNDAEL_GENERATE_TABLES Generate tables on first 1.45 + * encryption/decryption, then store them; 1.46 + * use the function gfm 1.47 + * RIJNDAEL_GENERATE_TABLES_MACRO Same as above, but use macros to do 1.48 + * the generation 1.49 + * RIJNDAEL_GENERATE_VALUES Do not store tables, generate the table 1.50 + * values "on-the-fly", using gfm 1.51 + * RIJNDAEL_GENERATE_VALUES_MACRO Same as above, but use macros 1.52 + * 1.53 + * The default is RIJNDAEL_INCLUDE_TABLES. 1.54 + */ 1.55 + 1.56 +/* 1.57 + * When building RIJNDAEL_INCLUDE_TABLES, includes S**-1, Rcon, T[0..4], 1.58 + * T**-1[0..4], IMXC[0..4] 1.59 + * When building anything else, includes S, S**-1, Rcon 1.60 + */ 1.61 +#include "rijndael32.tab" 1.62 + 1.63 +#if defined(RIJNDAEL_INCLUDE_TABLES) 1.64 +/* 1.65 + * RIJNDAEL_INCLUDE_TABLES 1.66 + */ 1.67 +#define T0(i) _T0[i] 1.68 +#define T1(i) _T1[i] 1.69 +#define T2(i) _T2[i] 1.70 +#define T3(i) _T3[i] 1.71 +#define TInv0(i) _TInv0[i] 1.72 +#define TInv1(i) _TInv1[i] 1.73 +#define TInv2(i) _TInv2[i] 1.74 +#define TInv3(i) _TInv3[i] 1.75 +#define IMXC0(b) _IMXC0[b] 1.76 +#define IMXC1(b) _IMXC1[b] 1.77 +#define IMXC2(b) _IMXC2[b] 1.78 +#define IMXC3(b) _IMXC3[b] 1.79 +/* The S-box can be recovered from the T-tables */ 1.80 +#ifdef IS_LITTLE_ENDIAN 1.81 +#define SBOX(b) ((PRUint8)_T3[b]) 1.82 +#else 1.83 +#define SBOX(b) ((PRUint8)_T1[b]) 1.84 +#endif 1.85 +#define SINV(b) (_SInv[b]) 1.86 + 1.87 +#else /* not RIJNDAEL_INCLUDE_TABLES */ 1.88 + 1.89 +/* 1.90 + * Code for generating T-table values. 1.91 + */ 1.92 + 1.93 +#ifdef IS_LITTLE_ENDIAN 1.94 +#define WORD4(b0, b1, b2, b3) \ 1.95 + (((b3) << 24) | ((b2) << 16) | ((b1) << 8) | (b0)) 1.96 +#else 1.97 +#define WORD4(b0, b1, b2, b3) \ 1.98 + (((b0) << 24) | ((b1) << 16) | ((b2) << 8) | (b3)) 1.99 +#endif 1.100 + 1.101 +/* 1.102 + * Define the S and S**-1 tables (both have been stored) 1.103 + */ 1.104 +#define SBOX(b) (_S[b]) 1.105 +#define SINV(b) (_SInv[b]) 1.106 + 1.107 +/* 1.108 + * The function xtime, used for Galois field multiplication 1.109 + */ 1.110 +#define XTIME(a) \ 1.111 + ((a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1)) 1.112 + 1.113 +/* Choose GFM method (macros or function) */ 1.114 +#if defined(RIJNDAEL_GENERATE_TABLES_MACRO) || \ 1.115 + defined(RIJNDAEL_GENERATE_VALUES_MACRO) 1.116 + 1.117 +/* 1.118 + * Galois field GF(2**8) multipliers, in macro form 1.119 + */ 1.120 +#define GFM01(a) \ 1.121 + (a) /* a * 01 = a, the identity */ 1.122 +#define GFM02(a) \ 1.123 + (XTIME(a) & 0xff) /* a * 02 = xtime(a) */ 1.124 +#define GFM04(a) \ 1.125 + (GFM02(GFM02(a))) /* a * 04 = xtime**2(a) */ 1.126 +#define GFM08(a) \ 1.127 + (GFM02(GFM04(a))) /* a * 08 = xtime**3(a) */ 1.128 +#define GFM03(a) \ 1.129 + (GFM01(a) ^ GFM02(a)) /* a * 03 = a * (01 + 02) */ 1.130 +#define GFM09(a) \ 1.131 + (GFM01(a) ^ GFM08(a)) /* a * 09 = a * (01 + 08) */ 1.132 +#define GFM0B(a) \ 1.133 + (GFM01(a) ^ GFM02(a) ^ GFM08(a)) /* a * 0B = a * (01 + 02 + 08) */ 1.134 +#define GFM0D(a) \ 1.135 + (GFM01(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0D = a * (01 + 04 + 08) */ 1.136 +#define GFM0E(a) \ 1.137 + (GFM02(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0E = a * (02 + 04 + 08) */ 1.138 + 1.139 +#else /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_VALUES */ 1.140 + 1.141 +/* GF_MULTIPLY 1.142 + * 1.143 + * multiply two bytes represented in GF(2**8), mod (x**4 + 1) 1.144 + */ 1.145 +PRUint8 gfm(PRUint8 a, PRUint8 b) 1.146 +{ 1.147 + PRUint8 res = 0; 1.148 + while (b > 0) { 1.149 + res = (b & 0x01) ? res ^ a : res; 1.150 + a = XTIME(a); 1.151 + b >>= 1; 1.152 + } 1.153 + return res; 1.154 +} 1.155 + 1.156 +#define GFM01(a) \ 1.157 + (a) /* a * 01 = a, the identity */ 1.158 +#define GFM02(a) \ 1.159 + (XTIME(a) & 0xff) /* a * 02 = xtime(a) */ 1.160 +#define GFM03(a) \ 1.161 + (gfm(a, 0x03)) /* a * 03 */ 1.162 +#define GFM09(a) \ 1.163 + (gfm(a, 0x09)) /* a * 09 */ 1.164 +#define GFM0B(a) \ 1.165 + (gfm(a, 0x0B)) /* a * 0B */ 1.166 +#define GFM0D(a) \ 1.167 + (gfm(a, 0x0D)) /* a * 0D */ 1.168 +#define GFM0E(a) \ 1.169 + (gfm(a, 0x0E)) /* a * 0E */ 1.170 + 1.171 +#endif /* choosing GFM function */ 1.172 + 1.173 +/* 1.174 + * The T-tables 1.175 + */ 1.176 +#define G_T0(i) \ 1.177 + ( WORD4( GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)) ) ) 1.178 +#define G_T1(i) \ 1.179 + ( WORD4( GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)) ) ) 1.180 +#define G_T2(i) \ 1.181 + ( WORD4( GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)) ) ) 1.182 +#define G_T3(i) \ 1.183 + ( WORD4( GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)) ) ) 1.184 + 1.185 +/* 1.186 + * The inverse T-tables 1.187 + */ 1.188 +#define G_TInv0(i) \ 1.189 + ( WORD4( GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)) ) ) 1.190 +#define G_TInv1(i) \ 1.191 + ( WORD4( GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)) ) ) 1.192 +#define G_TInv2(i) \ 1.193 + ( WORD4( GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)) ) ) 1.194 +#define G_TInv3(i) \ 1.195 + ( WORD4( GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)) ) ) 1.196 + 1.197 +/* 1.198 + * The inverse mix column tables 1.199 + */ 1.200 +#define G_IMXC0(i) \ 1.201 + ( WORD4( GFM0E(i), GFM09(i), GFM0D(i), GFM0B(i) ) ) 1.202 +#define G_IMXC1(i) \ 1.203 + ( WORD4( GFM0B(i), GFM0E(i), GFM09(i), GFM0D(i) ) ) 1.204 +#define G_IMXC2(i) \ 1.205 + ( WORD4( GFM0D(i), GFM0B(i), GFM0E(i), GFM09(i) ) ) 1.206 +#define G_IMXC3(i) \ 1.207 + ( WORD4( GFM09(i), GFM0D(i), GFM0B(i), GFM0E(i) ) ) 1.208 + 1.209 +/* Now choose the T-table indexing method */ 1.210 +#if defined(RIJNDAEL_GENERATE_VALUES) 1.211 +/* generate values for the tables with a function*/ 1.212 +static PRUint32 gen_TInvXi(PRUint8 tx, PRUint8 i) 1.213 +{ 1.214 + PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E; 1.215 + si01 = SINV(i); 1.216 + si02 = XTIME(si01); 1.217 + si04 = XTIME(si02); 1.218 + si08 = XTIME(si04); 1.219 + si03 = si02 ^ si01; 1.220 + si09 = si08 ^ si01; 1.221 + si0B = si08 ^ si03; 1.222 + si0D = si09 ^ si04; 1.223 + si0E = si08 ^ si04 ^ si02; 1.224 + switch (tx) { 1.225 + case 0: 1.226 + return WORD4(si0E, si09, si0D, si0B); 1.227 + case 1: 1.228 + return WORD4(si0B, si0E, si09, si0D); 1.229 + case 2: 1.230 + return WORD4(si0D, si0B, si0E, si09); 1.231 + case 3: 1.232 + return WORD4(si09, si0D, si0B, si0E); 1.233 + } 1.234 + return -1; 1.235 +} 1.236 +#define T0(i) G_T0(i) 1.237 +#define T1(i) G_T1(i) 1.238 +#define T2(i) G_T2(i) 1.239 +#define T3(i) G_T3(i) 1.240 +#define TInv0(i) gen_TInvXi(0, i) 1.241 +#define TInv1(i) gen_TInvXi(1, i) 1.242 +#define TInv2(i) gen_TInvXi(2, i) 1.243 +#define TInv3(i) gen_TInvXi(3, i) 1.244 +#define IMXC0(b) G_IMXC0(b) 1.245 +#define IMXC1(b) G_IMXC1(b) 1.246 +#define IMXC2(b) G_IMXC2(b) 1.247 +#define IMXC3(b) G_IMXC3(b) 1.248 +#elif defined(RIJNDAEL_GENERATE_VALUES_MACRO) 1.249 +/* generate values for the tables with macros */ 1.250 +#define T0(i) G_T0(i) 1.251 +#define T1(i) G_T1(i) 1.252 +#define T2(i) G_T2(i) 1.253 +#define T3(i) G_T3(i) 1.254 +#define TInv0(i) G_TInv0(i) 1.255 +#define TInv1(i) G_TInv1(i) 1.256 +#define TInv2(i) G_TInv2(i) 1.257 +#define TInv3(i) G_TInv3(i) 1.258 +#define IMXC0(b) G_IMXC0(b) 1.259 +#define IMXC1(b) G_IMXC1(b) 1.260 +#define IMXC2(b) G_IMXC2(b) 1.261 +#define IMXC3(b) G_IMXC3(b) 1.262 +#else /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_TABLES_MACRO */ 1.263 +/* Generate T and T**-1 table values and store, then index */ 1.264 +/* The inverse mix column tables are still generated */ 1.265 +#define T0(i) rijndaelTables->T0[i] 1.266 +#define T1(i) rijndaelTables->T1[i] 1.267 +#define T2(i) rijndaelTables->T2[i] 1.268 +#define T3(i) rijndaelTables->T3[i] 1.269 +#define TInv0(i) rijndaelTables->TInv0[i] 1.270 +#define TInv1(i) rijndaelTables->TInv1[i] 1.271 +#define TInv2(i) rijndaelTables->TInv2[i] 1.272 +#define TInv3(i) rijndaelTables->TInv3[i] 1.273 +#define IMXC0(b) G_IMXC0(b) 1.274 +#define IMXC1(b) G_IMXC1(b) 1.275 +#define IMXC2(b) G_IMXC2(b) 1.276 +#define IMXC3(b) G_IMXC3(b) 1.277 +#endif /* choose T-table indexing method */ 1.278 + 1.279 +#endif /* not RIJNDAEL_INCLUDE_TABLES */ 1.280 + 1.281 +#if defined(RIJNDAEL_GENERATE_TABLES) || \ 1.282 + defined(RIJNDAEL_GENERATE_TABLES_MACRO) 1.283 + 1.284 +/* Code to generate and store the tables */ 1.285 + 1.286 +struct rijndael_tables_str { 1.287 + PRUint32 T0[256]; 1.288 + PRUint32 T1[256]; 1.289 + PRUint32 T2[256]; 1.290 + PRUint32 T3[256]; 1.291 + PRUint32 TInv0[256]; 1.292 + PRUint32 TInv1[256]; 1.293 + PRUint32 TInv2[256]; 1.294 + PRUint32 TInv3[256]; 1.295 +}; 1.296 + 1.297 +static struct rijndael_tables_str *rijndaelTables = NULL; 1.298 +static PRCallOnceType coRTInit = { 0, 0, 0 }; 1.299 +static PRStatus 1.300 +init_rijndael_tables(void) 1.301 +{ 1.302 + PRUint32 i; 1.303 + PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E; 1.304 + struct rijndael_tables_str *rts; 1.305 + rts = (struct rijndael_tables_str *) 1.306 + PORT_Alloc(sizeof(struct rijndael_tables_str)); 1.307 + if (!rts) return PR_FAILURE; 1.308 + for (i=0; i<256; i++) { 1.309 + /* The forward values */ 1.310 + si01 = SBOX(i); 1.311 + si02 = XTIME(si01); 1.312 + si03 = si02 ^ si01; 1.313 + rts->T0[i] = WORD4(si02, si01, si01, si03); 1.314 + rts->T1[i] = WORD4(si03, si02, si01, si01); 1.315 + rts->T2[i] = WORD4(si01, si03, si02, si01); 1.316 + rts->T3[i] = WORD4(si01, si01, si03, si02); 1.317 + /* The inverse values */ 1.318 + si01 = SINV(i); 1.319 + si02 = XTIME(si01); 1.320 + si04 = XTIME(si02); 1.321 + si08 = XTIME(si04); 1.322 + si03 = si02 ^ si01; 1.323 + si09 = si08 ^ si01; 1.324 + si0B = si08 ^ si03; 1.325 + si0D = si09 ^ si04; 1.326 + si0E = si08 ^ si04 ^ si02; 1.327 + rts->TInv0[i] = WORD4(si0E, si09, si0D, si0B); 1.328 + rts->TInv1[i] = WORD4(si0B, si0E, si09, si0D); 1.329 + rts->TInv2[i] = WORD4(si0D, si0B, si0E, si09); 1.330 + rts->TInv3[i] = WORD4(si09, si0D, si0B, si0E); 1.331 + } 1.332 + /* wait until all the values are in to set */ 1.333 + rijndaelTables = rts; 1.334 + return PR_SUCCESS; 1.335 +} 1.336 + 1.337 +#endif /* code to generate tables */ 1.338 + 1.339 +/************************************************************************** 1.340 + * 1.341 + * Stuff related to the Rijndael key schedule 1.342 + * 1.343 + *************************************************************************/ 1.344 + 1.345 +#define SUBBYTE(w) \ 1.346 + ((SBOX((w >> 24) & 0xff) << 24) | \ 1.347 + (SBOX((w >> 16) & 0xff) << 16) | \ 1.348 + (SBOX((w >> 8) & 0xff) << 8) | \ 1.349 + (SBOX((w ) & 0xff) )) 1.350 + 1.351 +#ifdef IS_LITTLE_ENDIAN 1.352 +#define ROTBYTE(b) \ 1.353 + ((b >> 8) | (b << 24)) 1.354 +#else 1.355 +#define ROTBYTE(b) \ 1.356 + ((b << 8) | (b >> 24)) 1.357 +#endif 1.358 + 1.359 +/* rijndael_key_expansion7 1.360 + * 1.361 + * Generate the expanded key from the key input by the user. 1.362 + * XXX 1.363 + * Nk == 7 (224 key bits) is a weird case. Since Nk > 6, an added SubByte 1.364 + * transformation is done periodically. The period is every 4 bytes, and 1.365 + * since 7%4 != 0 this happens at different times for each key word (unlike 1.366 + * Nk == 8 where it happens twice in every key word, in the same positions). 1.367 + * For now, I'm implementing this case "dumbly", w/o any unrolling. 1.368 + */ 1.369 +static SECStatus 1.370 +rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int Nk) 1.371 +{ 1.372 + unsigned int i; 1.373 + PRUint32 *W; 1.374 + PRUint32 *pW; 1.375 + PRUint32 tmp; 1.376 + W = cx->expandedKey; 1.377 + /* 1. the first Nk words contain the cipher key */ 1.378 + memcpy(W, key, Nk * 4); 1.379 + i = Nk; 1.380 + /* 2. loop until full expanded key is obtained */ 1.381 + pW = W + i - 1; 1.382 + for (; i < cx->Nb * (cx->Nr + 1); ++i) { 1.383 + tmp = *pW++; 1.384 + if (i % Nk == 0) 1.385 + tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; 1.386 + else if (i % Nk == 4) 1.387 + tmp = SUBBYTE(tmp); 1.388 + *pW = W[i - Nk] ^ tmp; 1.389 + } 1.390 + return SECSuccess; 1.391 +} 1.392 + 1.393 +/* rijndael_key_expansion 1.394 + * 1.395 + * Generate the expanded key from the key input by the user. 1.396 + */ 1.397 +static SECStatus 1.398 +rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) 1.399 +{ 1.400 + unsigned int i; 1.401 + PRUint32 *W; 1.402 + PRUint32 *pW; 1.403 + PRUint32 tmp; 1.404 + unsigned int round_key_words = cx->Nb * (cx->Nr + 1); 1.405 + if (Nk == 7) 1.406 + return rijndael_key_expansion7(cx, key, Nk); 1.407 + W = cx->expandedKey; 1.408 + /* The first Nk words contain the input cipher key */ 1.409 + memcpy(W, key, Nk * 4); 1.410 + i = Nk; 1.411 + pW = W + i - 1; 1.412 + /* Loop over all sets of Nk words, except the last */ 1.413 + while (i < round_key_words - Nk) { 1.414 + tmp = *pW++; 1.415 + tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; 1.416 + *pW = W[i++ - Nk] ^ tmp; 1.417 + tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; 1.418 + tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; 1.419 + tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; 1.420 + if (Nk == 4) 1.421 + continue; 1.422 + switch (Nk) { 1.423 + case 8: tmp = *pW++; tmp = SUBBYTE(tmp); *pW = W[i++ - Nk] ^ tmp; 1.424 + case 7: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; 1.425 + case 6: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; 1.426 + case 5: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; 1.427 + } 1.428 + } 1.429 + /* Generate the last word */ 1.430 + tmp = *pW++; 1.431 + tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; 1.432 + *pW = W[i++ - Nk] ^ tmp; 1.433 + /* There may be overflow here, if Nk % (Nb * (Nr + 1)) > 0. However, 1.434 + * since the above loop generated all but the last Nk key words, there 1.435 + * is no more need for the SubByte transformation. 1.436 + */ 1.437 + if (Nk < 8) { 1.438 + for (; i < round_key_words; ++i) { 1.439 + tmp = *pW++; 1.440 + *pW = W[i - Nk] ^ tmp; 1.441 + } 1.442 + } else { 1.443 + /* except in the case when Nk == 8. Then one more SubByte may have 1.444 + * to be performed, at i % Nk == 4. 1.445 + */ 1.446 + for (; i < round_key_words; ++i) { 1.447 + tmp = *pW++; 1.448 + if (i % Nk == 4) 1.449 + tmp = SUBBYTE(tmp); 1.450 + *pW = W[i - Nk] ^ tmp; 1.451 + } 1.452 + } 1.453 + return SECSuccess; 1.454 +} 1.455 + 1.456 +/* rijndael_invkey_expansion 1.457 + * 1.458 + * Generate the expanded key for the inverse cipher from the key input by 1.459 + * the user. 1.460 + */ 1.461 +static SECStatus 1.462 +rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) 1.463 +{ 1.464 + unsigned int r; 1.465 + PRUint32 *roundkeyw; 1.466 + PRUint8 *b; 1.467 + int Nb = cx->Nb; 1.468 + /* begins like usual key expansion ... */ 1.469 + if (rijndael_key_expansion(cx, key, Nk) != SECSuccess) 1.470 + return SECFailure; 1.471 + /* ... but has the additional step of InvMixColumn, 1.472 + * excepting the first and last round keys. 1.473 + */ 1.474 + roundkeyw = cx->expandedKey + cx->Nb; 1.475 + for (r=1; r<cx->Nr; ++r) { 1.476 + /* each key word, roundkeyw, represents a column in the key 1.477 + * matrix. Each column is multiplied by the InvMixColumn matrix. 1.478 + * [ 0E 0B 0D 09 ] [ b0 ] 1.479 + * [ 09 0E 0B 0D ] * [ b1 ] 1.480 + * [ 0D 09 0E 0B ] [ b2 ] 1.481 + * [ 0B 0D 09 0E ] [ b3 ] 1.482 + */ 1.483 + b = (PRUint8 *)roundkeyw; 1.484 + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); 1.485 + b = (PRUint8 *)roundkeyw; 1.486 + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); 1.487 + b = (PRUint8 *)roundkeyw; 1.488 + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); 1.489 + b = (PRUint8 *)roundkeyw; 1.490 + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); 1.491 + if (Nb <= 4) 1.492 + continue; 1.493 + switch (Nb) { 1.494 + case 8: b = (PRUint8 *)roundkeyw; 1.495 + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ 1.496 + IMXC2(b[2]) ^ IMXC3(b[3]); 1.497 + case 7: b = (PRUint8 *)roundkeyw; 1.498 + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ 1.499 + IMXC2(b[2]) ^ IMXC3(b[3]); 1.500 + case 6: b = (PRUint8 *)roundkeyw; 1.501 + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ 1.502 + IMXC2(b[2]) ^ IMXC3(b[3]); 1.503 + case 5: b = (PRUint8 *)roundkeyw; 1.504 + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ 1.505 + IMXC2(b[2]) ^ IMXC3(b[3]); 1.506 + } 1.507 + } 1.508 + return SECSuccess; 1.509 +} 1.510 +/************************************************************************** 1.511 + * 1.512 + * Stuff related to Rijndael encryption/decryption, optimized for 1.513 + * a 128-bit blocksize. 1.514 + * 1.515 + *************************************************************************/ 1.516 + 1.517 +#ifdef IS_LITTLE_ENDIAN 1.518 +#define BYTE0WORD(w) ((w) & 0x000000ff) 1.519 +#define BYTE1WORD(w) ((w) & 0x0000ff00) 1.520 +#define BYTE2WORD(w) ((w) & 0x00ff0000) 1.521 +#define BYTE3WORD(w) ((w) & 0xff000000) 1.522 +#else 1.523 +#define BYTE0WORD(w) ((w) & 0xff000000) 1.524 +#define BYTE1WORD(w) ((w) & 0x00ff0000) 1.525 +#define BYTE2WORD(w) ((w) & 0x0000ff00) 1.526 +#define BYTE3WORD(w) ((w) & 0x000000ff) 1.527 +#endif 1.528 + 1.529 +typedef union { 1.530 + PRUint32 w[4]; 1.531 + PRUint8 b[16]; 1.532 +} rijndael_state; 1.533 + 1.534 +#define COLUMN_0(state) state.w[0] 1.535 +#define COLUMN_1(state) state.w[1] 1.536 +#define COLUMN_2(state) state.w[2] 1.537 +#define COLUMN_3(state) state.w[3] 1.538 + 1.539 +#define STATE_BYTE(i) state.b[i] 1.540 + 1.541 +static SECStatus 1.542 +rijndael_encryptBlock128(AESContext *cx, 1.543 + unsigned char *output, 1.544 + const unsigned char *input) 1.545 +{ 1.546 + unsigned int r; 1.547 + PRUint32 *roundkeyw; 1.548 + rijndael_state state; 1.549 + PRUint32 C0, C1, C2, C3; 1.550 +#if defined(NSS_X86_OR_X64) 1.551 +#define pIn input 1.552 +#define pOut output 1.553 +#else 1.554 + unsigned char *pIn, *pOut; 1.555 + PRUint32 inBuf[4], outBuf[4]; 1.556 + 1.557 + if ((ptrdiff_t)input & 0x3) { 1.558 + memcpy(inBuf, input, sizeof inBuf); 1.559 + pIn = (unsigned char *)inBuf; 1.560 + } else { 1.561 + pIn = (unsigned char *)input; 1.562 + } 1.563 + if ((ptrdiff_t)output & 0x3) { 1.564 + pOut = (unsigned char *)outBuf; 1.565 + } else { 1.566 + pOut = (unsigned char *)output; 1.567 + } 1.568 +#endif 1.569 + roundkeyw = cx->expandedKey; 1.570 + /* Step 1: Add Round Key 0 to initial state */ 1.571 + COLUMN_0(state) = *((PRUint32 *)(pIn )) ^ *roundkeyw++; 1.572 + COLUMN_1(state) = *((PRUint32 *)(pIn + 4 )) ^ *roundkeyw++; 1.573 + COLUMN_2(state) = *((PRUint32 *)(pIn + 8 )) ^ *roundkeyw++; 1.574 + COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw++; 1.575 + /* Step 2: Loop over rounds [1..NR-1] */ 1.576 + for (r=1; r<cx->Nr; ++r) { 1.577 + /* Do ShiftRow, ByteSub, and MixColumn all at once */ 1.578 + C0 = T0(STATE_BYTE(0)) ^ 1.579 + T1(STATE_BYTE(5)) ^ 1.580 + T2(STATE_BYTE(10)) ^ 1.581 + T3(STATE_BYTE(15)); 1.582 + C1 = T0(STATE_BYTE(4)) ^ 1.583 + T1(STATE_BYTE(9)) ^ 1.584 + T2(STATE_BYTE(14)) ^ 1.585 + T3(STATE_BYTE(3)); 1.586 + C2 = T0(STATE_BYTE(8)) ^ 1.587 + T1(STATE_BYTE(13)) ^ 1.588 + T2(STATE_BYTE(2)) ^ 1.589 + T3(STATE_BYTE(7)); 1.590 + C3 = T0(STATE_BYTE(12)) ^ 1.591 + T1(STATE_BYTE(1)) ^ 1.592 + T2(STATE_BYTE(6)) ^ 1.593 + T3(STATE_BYTE(11)); 1.594 + /* Round key addition */ 1.595 + COLUMN_0(state) = C0 ^ *roundkeyw++; 1.596 + COLUMN_1(state) = C1 ^ *roundkeyw++; 1.597 + COLUMN_2(state) = C2 ^ *roundkeyw++; 1.598 + COLUMN_3(state) = C3 ^ *roundkeyw++; 1.599 + } 1.600 + /* Step 3: Do the last round */ 1.601 + /* Final round does not employ MixColumn */ 1.602 + C0 = ((BYTE0WORD(T2(STATE_BYTE(0)))) | 1.603 + (BYTE1WORD(T3(STATE_BYTE(5)))) | 1.604 + (BYTE2WORD(T0(STATE_BYTE(10)))) | 1.605 + (BYTE3WORD(T1(STATE_BYTE(15))))) ^ 1.606 + *roundkeyw++; 1.607 + C1 = ((BYTE0WORD(T2(STATE_BYTE(4)))) | 1.608 + (BYTE1WORD(T3(STATE_BYTE(9)))) | 1.609 + (BYTE2WORD(T0(STATE_BYTE(14)))) | 1.610 + (BYTE3WORD(T1(STATE_BYTE(3))))) ^ 1.611 + *roundkeyw++; 1.612 + C2 = ((BYTE0WORD(T2(STATE_BYTE(8)))) | 1.613 + (BYTE1WORD(T3(STATE_BYTE(13)))) | 1.614 + (BYTE2WORD(T0(STATE_BYTE(2)))) | 1.615 + (BYTE3WORD(T1(STATE_BYTE(7))))) ^ 1.616 + *roundkeyw++; 1.617 + C3 = ((BYTE0WORD(T2(STATE_BYTE(12)))) | 1.618 + (BYTE1WORD(T3(STATE_BYTE(1)))) | 1.619 + (BYTE2WORD(T0(STATE_BYTE(6)))) | 1.620 + (BYTE3WORD(T1(STATE_BYTE(11))))) ^ 1.621 + *roundkeyw++; 1.622 + *((PRUint32 *) pOut ) = C0; 1.623 + *((PRUint32 *)(pOut + 4)) = C1; 1.624 + *((PRUint32 *)(pOut + 8)) = C2; 1.625 + *((PRUint32 *)(pOut + 12)) = C3; 1.626 +#if defined(NSS_X86_OR_X64) 1.627 +#undef pIn 1.628 +#undef pOut 1.629 +#else 1.630 + if ((ptrdiff_t)output & 0x3) { 1.631 + memcpy(output, outBuf, sizeof outBuf); 1.632 + } 1.633 +#endif 1.634 + return SECSuccess; 1.635 +} 1.636 + 1.637 +static SECStatus 1.638 +rijndael_decryptBlock128(AESContext *cx, 1.639 + unsigned char *output, 1.640 + const unsigned char *input) 1.641 +{ 1.642 + int r; 1.643 + PRUint32 *roundkeyw; 1.644 + rijndael_state state; 1.645 + PRUint32 C0, C1, C2, C3; 1.646 +#if defined(NSS_X86_OR_X64) 1.647 +#define pIn input 1.648 +#define pOut output 1.649 +#else 1.650 + unsigned char *pIn, *pOut; 1.651 + PRUint32 inBuf[4], outBuf[4]; 1.652 + 1.653 + if ((ptrdiff_t)input & 0x3) { 1.654 + memcpy(inBuf, input, sizeof inBuf); 1.655 + pIn = (unsigned char *)inBuf; 1.656 + } else { 1.657 + pIn = (unsigned char *)input; 1.658 + } 1.659 + if ((ptrdiff_t)output & 0x3) { 1.660 + pOut = (unsigned char *)outBuf; 1.661 + } else { 1.662 + pOut = (unsigned char *)output; 1.663 + } 1.664 +#endif 1.665 + roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3; 1.666 + /* reverse the final key addition */ 1.667 + COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw--; 1.668 + COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw--; 1.669 + COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw--; 1.670 + COLUMN_0(state) = *((PRUint32 *)(pIn )) ^ *roundkeyw--; 1.671 + /* Loop over rounds in reverse [NR..1] */ 1.672 + for (r=cx->Nr; r>1; --r) { 1.673 + /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */ 1.674 + C0 = TInv0(STATE_BYTE(0)) ^ 1.675 + TInv1(STATE_BYTE(13)) ^ 1.676 + TInv2(STATE_BYTE(10)) ^ 1.677 + TInv3(STATE_BYTE(7)); 1.678 + C1 = TInv0(STATE_BYTE(4)) ^ 1.679 + TInv1(STATE_BYTE(1)) ^ 1.680 + TInv2(STATE_BYTE(14)) ^ 1.681 + TInv3(STATE_BYTE(11)); 1.682 + C2 = TInv0(STATE_BYTE(8)) ^ 1.683 + TInv1(STATE_BYTE(5)) ^ 1.684 + TInv2(STATE_BYTE(2)) ^ 1.685 + TInv3(STATE_BYTE(15)); 1.686 + C3 = TInv0(STATE_BYTE(12)) ^ 1.687 + TInv1(STATE_BYTE(9)) ^ 1.688 + TInv2(STATE_BYTE(6)) ^ 1.689 + TInv3(STATE_BYTE(3)); 1.690 + /* Invert the key addition step */ 1.691 + COLUMN_3(state) = C3 ^ *roundkeyw--; 1.692 + COLUMN_2(state) = C2 ^ *roundkeyw--; 1.693 + COLUMN_1(state) = C1 ^ *roundkeyw--; 1.694 + COLUMN_0(state) = C0 ^ *roundkeyw--; 1.695 + } 1.696 + /* inverse sub */ 1.697 + pOut[ 0] = SINV(STATE_BYTE( 0)); 1.698 + pOut[ 1] = SINV(STATE_BYTE(13)); 1.699 + pOut[ 2] = SINV(STATE_BYTE(10)); 1.700 + pOut[ 3] = SINV(STATE_BYTE( 7)); 1.701 + pOut[ 4] = SINV(STATE_BYTE( 4)); 1.702 + pOut[ 5] = SINV(STATE_BYTE( 1)); 1.703 + pOut[ 6] = SINV(STATE_BYTE(14)); 1.704 + pOut[ 7] = SINV(STATE_BYTE(11)); 1.705 + pOut[ 8] = SINV(STATE_BYTE( 8)); 1.706 + pOut[ 9] = SINV(STATE_BYTE( 5)); 1.707 + pOut[10] = SINV(STATE_BYTE( 2)); 1.708 + pOut[11] = SINV(STATE_BYTE(15)); 1.709 + pOut[12] = SINV(STATE_BYTE(12)); 1.710 + pOut[13] = SINV(STATE_BYTE( 9)); 1.711 + pOut[14] = SINV(STATE_BYTE( 6)); 1.712 + pOut[15] = SINV(STATE_BYTE( 3)); 1.713 + /* final key addition */ 1.714 + *((PRUint32 *)(pOut + 12)) ^= *roundkeyw--; 1.715 + *((PRUint32 *)(pOut + 8)) ^= *roundkeyw--; 1.716 + *((PRUint32 *)(pOut + 4)) ^= *roundkeyw--; 1.717 + *((PRUint32 *) pOut ) ^= *roundkeyw--; 1.718 +#if defined(NSS_X86_OR_X64) 1.719 +#undef pIn 1.720 +#undef pOut 1.721 +#else 1.722 + if ((ptrdiff_t)output & 0x3) { 1.723 + memcpy(output, outBuf, sizeof outBuf); 1.724 + } 1.725 +#endif 1.726 + return SECSuccess; 1.727 +} 1.728 + 1.729 +/************************************************************************** 1.730 + * 1.731 + * Stuff related to general Rijndael encryption/decryption, for blocksizes 1.732 + * greater than 128 bits. 1.733 + * 1.734 + * XXX This code is currently untested! So far, AES specs have only been 1.735 + * released for 128 bit blocksizes. This will be tested, but for now 1.736 + * only the code above has been tested using known values. 1.737 + * 1.738 + *************************************************************************/ 1.739 + 1.740 +#define COLUMN(array, j) *((PRUint32 *)(array + j)) 1.741 + 1.742 +SECStatus 1.743 +rijndael_encryptBlock(AESContext *cx, 1.744 + unsigned char *output, 1.745 + const unsigned char *input) 1.746 +{ 1.747 + return SECFailure; 1.748 +#ifdef rijndael_large_blocks_fixed 1.749 + unsigned int j, r, Nb; 1.750 + unsigned int c2=0, c3=0; 1.751 + PRUint32 *roundkeyw; 1.752 + PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE]; 1.753 + Nb = cx->Nb; 1.754 + roundkeyw = cx->expandedKey; 1.755 + /* Step 1: Add Round Key 0 to initial state */ 1.756 + for (j=0; j<4*Nb; j+=4) { 1.757 + COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw++; 1.758 + } 1.759 + /* Step 2: Loop over rounds [1..NR-1] */ 1.760 + for (r=1; r<cx->Nr; ++r) { 1.761 + for (j=0; j<Nb; ++j) { 1.762 + COLUMN(output, j) = T0(STATE_BYTE(4* j )) ^ 1.763 + T1(STATE_BYTE(4*((j+ 1)%Nb)+1)) ^ 1.764 + T2(STATE_BYTE(4*((j+c2)%Nb)+2)) ^ 1.765 + T3(STATE_BYTE(4*((j+c3)%Nb)+3)); 1.766 + } 1.767 + for (j=0; j<4*Nb; j+=4) { 1.768 + COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw++; 1.769 + } 1.770 + } 1.771 + /* Step 3: Do the last round */ 1.772 + /* Final round does not employ MixColumn */ 1.773 + for (j=0; j<Nb; ++j) { 1.774 + COLUMN(output, j) = ((BYTE0WORD(T2(STATE_BYTE(4* j )))) | 1.775 + (BYTE1WORD(T3(STATE_BYTE(4*(j+ 1)%Nb)+1))) | 1.776 + (BYTE2WORD(T0(STATE_BYTE(4*(j+c2)%Nb)+2))) | 1.777 + (BYTE3WORD(T1(STATE_BYTE(4*(j+c3)%Nb)+3)))) ^ 1.778 + *roundkeyw++; 1.779 + } 1.780 + return SECSuccess; 1.781 +#endif 1.782 +} 1.783 + 1.784 +SECStatus 1.785 +rijndael_decryptBlock(AESContext *cx, 1.786 + unsigned char *output, 1.787 + const unsigned char *input) 1.788 +{ 1.789 + return SECFailure; 1.790 +#ifdef rijndael_large_blocks_fixed 1.791 + int j, r, Nb; 1.792 + int c2=0, c3=0; 1.793 + PRUint32 *roundkeyw; 1.794 + PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE]; 1.795 + Nb = cx->Nb; 1.796 + roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3; 1.797 + /* reverse key addition */ 1.798 + for (j=4*Nb; j>=0; j-=4) { 1.799 + COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw--; 1.800 + } 1.801 + /* Loop over rounds in reverse [NR..1] */ 1.802 + for (r=cx->Nr; r>1; --r) { 1.803 + /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */ 1.804 + for (j=0; j<Nb; ++j) { 1.805 + COLUMN(output, 4*j) = TInv0(STATE_BYTE(4* j )) ^ 1.806 + TInv1(STATE_BYTE(4*(j+Nb- 1)%Nb)+1) ^ 1.807 + TInv2(STATE_BYTE(4*(j+Nb-c2)%Nb)+2) ^ 1.808 + TInv3(STATE_BYTE(4*(j+Nb-c3)%Nb)+3); 1.809 + } 1.810 + /* Invert the key addition step */ 1.811 + for (j=4*Nb; j>=0; j-=4) { 1.812 + COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw--; 1.813 + } 1.814 + } 1.815 + /* inverse sub */ 1.816 + for (j=0; j<4*Nb; ++j) { 1.817 + output[j] = SINV(clone[j]); 1.818 + } 1.819 + /* final key addition */ 1.820 + for (j=4*Nb; j>=0; j-=4) { 1.821 + COLUMN(output, j) ^= *roundkeyw--; 1.822 + } 1.823 + return SECSuccess; 1.824 +#endif 1.825 +} 1.826 + 1.827 +/************************************************************************** 1.828 + * 1.829 + * Rijndael modes of operation (ECB and CBC) 1.830 + * 1.831 + *************************************************************************/ 1.832 + 1.833 +static SECStatus 1.834 +rijndael_encryptECB(AESContext *cx, unsigned char *output, 1.835 + unsigned int *outputLen, unsigned int maxOutputLen, 1.836 + const unsigned char *input, unsigned int inputLen, 1.837 + unsigned int blocksize) 1.838 +{ 1.839 + SECStatus rv; 1.840 + AESBlockFunc *encryptor; 1.841 + 1.842 + encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) 1.843 + ? &rijndael_encryptBlock128 1.844 + : &rijndael_encryptBlock; 1.845 + while (inputLen > 0) { 1.846 + rv = (*encryptor)(cx, output, input); 1.847 + if (rv != SECSuccess) 1.848 + return rv; 1.849 + output += blocksize; 1.850 + input += blocksize; 1.851 + inputLen -= blocksize; 1.852 + } 1.853 + return SECSuccess; 1.854 +} 1.855 + 1.856 +static SECStatus 1.857 +rijndael_encryptCBC(AESContext *cx, unsigned char *output, 1.858 + unsigned int *outputLen, unsigned int maxOutputLen, 1.859 + const unsigned char *input, unsigned int inputLen, 1.860 + unsigned int blocksize) 1.861 +{ 1.862 + unsigned int j; 1.863 + SECStatus rv; 1.864 + AESBlockFunc *encryptor; 1.865 + unsigned char *lastblock; 1.866 + unsigned char inblock[RIJNDAEL_MAX_STATE_SIZE * 8]; 1.867 + 1.868 + if (!inputLen) 1.869 + return SECSuccess; 1.870 + lastblock = cx->iv; 1.871 + encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) 1.872 + ? &rijndael_encryptBlock128 1.873 + : &rijndael_encryptBlock; 1.874 + while (inputLen > 0) { 1.875 + /* XOR with the last block (IV if first block) */ 1.876 + for (j=0; j<blocksize; ++j) 1.877 + inblock[j] = input[j] ^ lastblock[j]; 1.878 + /* encrypt */ 1.879 + rv = (*encryptor)(cx, output, inblock); 1.880 + if (rv != SECSuccess) 1.881 + return rv; 1.882 + /* move to the next block */ 1.883 + lastblock = output; 1.884 + output += blocksize; 1.885 + input += blocksize; 1.886 + inputLen -= blocksize; 1.887 + } 1.888 + memcpy(cx->iv, lastblock, blocksize); 1.889 + return SECSuccess; 1.890 +} 1.891 + 1.892 +static SECStatus 1.893 +rijndael_decryptECB(AESContext *cx, unsigned char *output, 1.894 + unsigned int *outputLen, unsigned int maxOutputLen, 1.895 + const unsigned char *input, unsigned int inputLen, 1.896 + unsigned int blocksize) 1.897 +{ 1.898 + SECStatus rv; 1.899 + AESBlockFunc *decryptor; 1.900 + 1.901 + decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) 1.902 + ? &rijndael_decryptBlock128 1.903 + : &rijndael_decryptBlock; 1.904 + while (inputLen > 0) { 1.905 + rv = (*decryptor)(cx, output, input); 1.906 + if (rv != SECSuccess) 1.907 + return rv; 1.908 + output += blocksize; 1.909 + input += blocksize; 1.910 + inputLen -= blocksize; 1.911 + } 1.912 + return SECSuccess; 1.913 +} 1.914 + 1.915 +static SECStatus 1.916 +rijndael_decryptCBC(AESContext *cx, unsigned char *output, 1.917 + unsigned int *outputLen, unsigned int maxOutputLen, 1.918 + const unsigned char *input, unsigned int inputLen, 1.919 + unsigned int blocksize) 1.920 +{ 1.921 + SECStatus rv; 1.922 + AESBlockFunc *decryptor; 1.923 + const unsigned char *in; 1.924 + unsigned char *out; 1.925 + unsigned int j; 1.926 + unsigned char newIV[RIJNDAEL_MAX_BLOCKSIZE]; 1.927 + 1.928 + 1.929 + if (!inputLen) 1.930 + return SECSuccess; 1.931 + PORT_Assert(output - input >= 0 || input - output >= (int)inputLen ); 1.932 + decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) 1.933 + ? &rijndael_decryptBlock128 1.934 + : &rijndael_decryptBlock; 1.935 + in = input + (inputLen - blocksize); 1.936 + memcpy(newIV, in, blocksize); 1.937 + out = output + (inputLen - blocksize); 1.938 + while (inputLen > blocksize) { 1.939 + rv = (*decryptor)(cx, out, in); 1.940 + if (rv != SECSuccess) 1.941 + return rv; 1.942 + for (j=0; j<blocksize; ++j) 1.943 + out[j] ^= in[(int)(j - blocksize)]; 1.944 + out -= blocksize; 1.945 + in -= blocksize; 1.946 + inputLen -= blocksize; 1.947 + } 1.948 + if (in == input) { 1.949 + rv = (*decryptor)(cx, out, in); 1.950 + if (rv != SECSuccess) 1.951 + return rv; 1.952 + for (j=0; j<blocksize; ++j) 1.953 + out[j] ^= cx->iv[j]; 1.954 + } 1.955 + memcpy(cx->iv, newIV, blocksize); 1.956 + return SECSuccess; 1.957 +} 1.958 + 1.959 +/************************************************************************ 1.960 + * 1.961 + * BLAPI Interface functions 1.962 + * 1.963 + * The following functions implement the encryption routines defined in 1.964 + * BLAPI for the AES cipher, Rijndael. 1.965 + * 1.966 + ***********************************************************************/ 1.967 + 1.968 +AESContext * AES_AllocateContext(void) 1.969 +{ 1.970 + return PORT_ZNew(AESContext); 1.971 +} 1.972 + 1.973 + 1.974 +#ifdef INTEL_GCM 1.975 +/* 1.976 + * Adapted from the example code in "How to detect New Instruction support in 1.977 + * the 4th generation Intel Core processor family" by Max Locktyukhin. 1.978 + * 1.979 + * XGETBV: 1.980 + * Reads an extended control register (XCR) specified by ECX into EDX:EAX. 1.981 + */ 1.982 +static PRBool 1.983 +check_xcr0_ymm() 1.984 +{ 1.985 + PRUint32 xcr0; 1.986 +#if defined(_MSC_VER) 1.987 +#if defined(_M_IX86) 1.988 + __asm { 1.989 + mov ecx, 0 1.990 + xgetbv 1.991 + mov xcr0, eax 1.992 + } 1.993 +#else 1.994 + xcr0 = (PRUint32)_xgetbv(0); /* Requires VS2010 SP1 or later. */ 1.995 +#endif 1.996 +#else 1.997 + __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx"); 1.998 +#endif 1.999 + /* Check if xmm and ymm state are enabled in XCR0. */ 1.1000 + return (xcr0 & 6) == 6; 1.1001 +} 1.1002 +#endif 1.1003 + 1.1004 +/* 1.1005 +** Initialize a new AES context suitable for AES encryption/decryption in 1.1006 +** the ECB or CBC mode. 1.1007 +** "mode" the mode of operation, which must be NSS_AES or NSS_AES_CBC 1.1008 +*/ 1.1009 +static SECStatus 1.1010 +aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, 1.1011 + const unsigned char *iv, int mode, unsigned int encrypt, 1.1012 + unsigned int blocksize) 1.1013 +{ 1.1014 + unsigned int Nk; 1.1015 + /* According to Rijndael AES Proposal, section 12.1, block and key 1.1016 + * lengths between 128 and 256 bits are supported, as long as the 1.1017 + * length in bytes is divisible by 4. 1.1018 + */ 1.1019 + if (key == NULL || 1.1020 + keysize < RIJNDAEL_MIN_BLOCKSIZE || 1.1021 + keysize > RIJNDAEL_MAX_BLOCKSIZE || 1.1022 + keysize % 4 != 0 || 1.1023 + blocksize < RIJNDAEL_MIN_BLOCKSIZE || 1.1024 + blocksize > RIJNDAEL_MAX_BLOCKSIZE || 1.1025 + blocksize % 4 != 0) { 1.1026 + PORT_SetError(SEC_ERROR_INVALID_ARGS); 1.1027 + return SECFailure; 1.1028 + } 1.1029 + if (mode != NSS_AES && mode != NSS_AES_CBC) { 1.1030 + PORT_SetError(SEC_ERROR_INVALID_ARGS); 1.1031 + return SECFailure; 1.1032 + } 1.1033 + if (mode == NSS_AES_CBC && iv == NULL) { 1.1034 + PORT_SetError(SEC_ERROR_INVALID_ARGS); 1.1035 + return SECFailure; 1.1036 + } 1.1037 + if (!cx) { 1.1038 + PORT_SetError(SEC_ERROR_INVALID_ARGS); 1.1039 + return SECFailure; 1.1040 + } 1.1041 +#ifdef USE_HW_AES 1.1042 + if (has_intel_aes == 0) { 1.1043 + unsigned long eax, ebx, ecx, edx; 1.1044 + char *disable_hw_aes = getenv("NSS_DISABLE_HW_AES"); 1.1045 + 1.1046 + if (disable_hw_aes == NULL) { 1.1047 + freebl_cpuid(1, &eax, &ebx, &ecx, &edx); 1.1048 + has_intel_aes = (ecx & (1 << 25)) != 0 ? 1 : -1; 1.1049 +#ifdef INTEL_GCM 1.1050 + has_intel_clmul = (ecx & (1 << 1)) != 0 ? 1 : -1; 1.1051 + if ((ecx & (1 << 27)) != 0 && (ecx & (1 << 28)) != 0 && 1.1052 + check_xcr0_ymm()) { 1.1053 + has_intel_avx = 1; 1.1054 + } else { 1.1055 + has_intel_avx = -1; 1.1056 + } 1.1057 +#endif 1.1058 + } else { 1.1059 + has_intel_aes = -1; 1.1060 +#ifdef INTEL_GCM 1.1061 + has_intel_avx = -1; 1.1062 + has_intel_clmul = -1; 1.1063 +#endif 1.1064 + } 1.1065 + } 1.1066 + use_hw_aes = (PRBool) 1.1067 + (has_intel_aes > 0 && (keysize % 8) == 0 && blocksize == 16); 1.1068 +#ifdef INTEL_GCM 1.1069 + use_hw_gcm = (PRBool) 1.1070 + (use_hw_aes && has_intel_avx>0 && has_intel_clmul>0); 1.1071 +#endif 1.1072 +#endif /* USE_HW_AES */ 1.1073 + /* Nb = (block size in bits) / 32 */ 1.1074 + cx->Nb = blocksize / 4; 1.1075 + /* Nk = (key size in bits) / 32 */ 1.1076 + Nk = keysize / 4; 1.1077 + /* Obtain number of rounds from "table" */ 1.1078 + cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb); 1.1079 + /* copy in the iv, if neccessary */ 1.1080 + if (mode == NSS_AES_CBC) { 1.1081 + memcpy(cx->iv, iv, blocksize); 1.1082 +#ifdef USE_HW_AES 1.1083 + if (use_hw_aes) { 1.1084 + cx->worker = (freeblCipherFunc) 1.1085 + intel_aes_cbc_worker(encrypt, keysize); 1.1086 + } else 1.1087 +#endif 1.1088 + { 1.1089 + cx->worker = (freeblCipherFunc) (encrypt 1.1090 + ? &rijndael_encryptCBC : &rijndael_decryptCBC); 1.1091 + } 1.1092 + } else { 1.1093 +#ifdef USE_HW_AES 1.1094 + if (use_hw_aes) { 1.1095 + cx->worker = (freeblCipherFunc) 1.1096 + intel_aes_ecb_worker(encrypt, keysize); 1.1097 + } else 1.1098 +#endif 1.1099 + { 1.1100 + cx->worker = (freeblCipherFunc) (encrypt 1.1101 + ? &rijndael_encryptECB : &rijndael_decryptECB); 1.1102 + } 1.1103 + } 1.1104 + PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE); 1.1105 + if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) { 1.1106 + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); 1.1107 + goto cleanup; 1.1108 + } 1.1109 +#ifdef USE_HW_AES 1.1110 + if (use_hw_aes) { 1.1111 + intel_aes_init(encrypt, keysize); 1.1112 + } else 1.1113 +#endif 1.1114 + { 1.1115 + 1.1116 +#if defined(RIJNDAEL_GENERATE_TABLES) || \ 1.1117 + defined(RIJNDAEL_GENERATE_TABLES_MACRO) 1.1118 + if (rijndaelTables == NULL) { 1.1119 + if (PR_CallOnce(&coRTInit, init_rijndael_tables) 1.1120 + != PR_SUCCESS) { 1.1121 + return SecFailure; 1.1122 + } 1.1123 + } 1.1124 +#endif 1.1125 + /* Generate expanded key */ 1.1126 + if (encrypt) { 1.1127 + if (rijndael_key_expansion(cx, key, Nk) != SECSuccess) 1.1128 + goto cleanup; 1.1129 + } else { 1.1130 + if (rijndael_invkey_expansion(cx, key, Nk) != SECSuccess) 1.1131 + goto cleanup; 1.1132 + } 1.1133 + } 1.1134 + cx->worker_cx = cx; 1.1135 + cx->destroy = NULL; 1.1136 + cx->isBlock = PR_TRUE; 1.1137 + return SECSuccess; 1.1138 +cleanup: 1.1139 + return SECFailure; 1.1140 +} 1.1141 + 1.1142 +SECStatus 1.1143 +AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, 1.1144 + const unsigned char *iv, int mode, unsigned int encrypt, 1.1145 + unsigned int blocksize) 1.1146 +{ 1.1147 + int basemode = mode; 1.1148 + PRBool baseencrypt = encrypt; 1.1149 + SECStatus rv; 1.1150 + 1.1151 + switch (mode) { 1.1152 + case NSS_AES_CTS: 1.1153 + basemode = NSS_AES_CBC; 1.1154 + break; 1.1155 + case NSS_AES_GCM: 1.1156 + case NSS_AES_CTR: 1.1157 + basemode = NSS_AES; 1.1158 + baseencrypt = PR_TRUE; 1.1159 + break; 1.1160 + } 1.1161 + /* make sure enough is initializes so we can safely call Destroy */ 1.1162 + cx->worker_cx = NULL; 1.1163 + cx->destroy = NULL; 1.1164 + rv = aes_InitContext(cx, key, keysize, iv, basemode, 1.1165 + baseencrypt, blocksize); 1.1166 + if (rv != SECSuccess) { 1.1167 + AES_DestroyContext(cx, PR_FALSE); 1.1168 + return rv; 1.1169 + } 1.1170 + 1.1171 + /* finally, set up any mode specific contexts */ 1.1172 + switch (mode) { 1.1173 + case NSS_AES_CTS: 1.1174 + cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv, blocksize); 1.1175 + cx->worker = (freeblCipherFunc) 1.1176 + (encrypt ? CTS_EncryptUpdate : CTS_DecryptUpdate); 1.1177 + cx->destroy = (freeblDestroyFunc) CTS_DestroyContext; 1.1178 + cx->isBlock = PR_FALSE; 1.1179 + break; 1.1180 + case NSS_AES_GCM: 1.1181 +#ifdef INTEL_GCM 1.1182 + if(use_hw_gcm) { 1.1183 + cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv, blocksize); 1.1184 + cx->worker = (freeblCipherFunc) 1.1185 + (encrypt ? intel_AES_GCM_EncryptUpdate : intel_AES_GCM_DecryptUpdate); 1.1186 + cx->destroy = (freeblDestroyFunc) intel_AES_GCM_DestroyContext; 1.1187 + cx->isBlock = PR_FALSE; 1.1188 + } else 1.1189 +#endif 1.1190 + { 1.1191 + cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv, blocksize); 1.1192 + cx->worker = (freeblCipherFunc) 1.1193 + (encrypt ? GCM_EncryptUpdate : GCM_DecryptUpdate); 1.1194 + cx->destroy = (freeblDestroyFunc) GCM_DestroyContext; 1.1195 + cx->isBlock = PR_FALSE; 1.1196 + } 1.1197 + break; 1.1198 + case NSS_AES_CTR: 1.1199 + cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv, blocksize); 1.1200 +#if defined(USE_HW_AES) && defined(_MSC_VER) 1.1201 + if (use_hw_aes) { 1.1202 + cx->worker = (freeblCipherFunc) CTR_Update_HW_AES; 1.1203 + } else 1.1204 +#endif 1.1205 + { 1.1206 + cx->worker = (freeblCipherFunc) CTR_Update; 1.1207 + } 1.1208 + cx->destroy = (freeblDestroyFunc) CTR_DestroyContext; 1.1209 + cx->isBlock = PR_FALSE; 1.1210 + break; 1.1211 + default: 1.1212 + /* everything has already been set up by aes_InitContext, just 1.1213 + * return */ 1.1214 + return SECSuccess; 1.1215 + } 1.1216 + /* check to see if we succeeded in getting the worker context */ 1.1217 + if (cx->worker_cx == NULL) { 1.1218 + /* no, just destroy the existing context */ 1.1219 + cx->destroy = NULL; /* paranoia, though you can see a dozen lines */ 1.1220 + /* below that this isn't necessary */ 1.1221 + AES_DestroyContext(cx, PR_FALSE); 1.1222 + return SECFailure; 1.1223 + } 1.1224 + return SECSuccess; 1.1225 +} 1.1226 + 1.1227 +/* AES_CreateContext 1.1228 + * 1.1229 + * create a new context for Rijndael operations 1.1230 + */ 1.1231 +AESContext * 1.1232 +AES_CreateContext(const unsigned char *key, const unsigned char *iv, 1.1233 + int mode, int encrypt, 1.1234 + unsigned int keysize, unsigned int blocksize) 1.1235 +{ 1.1236 + AESContext *cx = AES_AllocateContext(); 1.1237 + if (cx) { 1.1238 + SECStatus rv = AES_InitContext(cx, key, keysize, iv, mode, encrypt, 1.1239 + blocksize); 1.1240 + if (rv != SECSuccess) { 1.1241 + AES_DestroyContext(cx, PR_TRUE); 1.1242 + cx = NULL; 1.1243 + } 1.1244 + } 1.1245 + return cx; 1.1246 +} 1.1247 + 1.1248 +/* 1.1249 + * AES_DestroyContext 1.1250 + * 1.1251 + * Zero an AES cipher context. If freeit is true, also free the pointer 1.1252 + * to the context. 1.1253 + */ 1.1254 +void 1.1255 +AES_DestroyContext(AESContext *cx, PRBool freeit) 1.1256 +{ 1.1257 + if (cx->worker_cx && cx->destroy) { 1.1258 + (*cx->destroy)(cx->worker_cx, PR_TRUE); 1.1259 + cx->worker_cx = NULL; 1.1260 + cx->destroy = NULL; 1.1261 + } 1.1262 + if (freeit) 1.1263 + PORT_Free(cx); 1.1264 +} 1.1265 + 1.1266 +/* 1.1267 + * AES_Encrypt 1.1268 + * 1.1269 + * Encrypt an arbitrary-length buffer. The output buffer must already be 1.1270 + * allocated to at least inputLen. 1.1271 + */ 1.1272 +SECStatus 1.1273 +AES_Encrypt(AESContext *cx, unsigned char *output, 1.1274 + unsigned int *outputLen, unsigned int maxOutputLen, 1.1275 + const unsigned char *input, unsigned int inputLen) 1.1276 +{ 1.1277 + int blocksize; 1.1278 + /* Check args */ 1.1279 + if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { 1.1280 + PORT_SetError(SEC_ERROR_INVALID_ARGS); 1.1281 + return SECFailure; 1.1282 + } 1.1283 + blocksize = 4 * cx->Nb; 1.1284 + if (cx->isBlock && (inputLen % blocksize != 0)) { 1.1285 + PORT_SetError(SEC_ERROR_INPUT_LEN); 1.1286 + return SECFailure; 1.1287 + } 1.1288 + if (maxOutputLen < inputLen) { 1.1289 + PORT_SetError(SEC_ERROR_OUTPUT_LEN); 1.1290 + return SECFailure; 1.1291 + } 1.1292 + *outputLen = inputLen; 1.1293 + return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, 1.1294 + input, inputLen, blocksize); 1.1295 +} 1.1296 + 1.1297 +/* 1.1298 + * AES_Decrypt 1.1299 + * 1.1300 + * Decrypt and arbitrary-length buffer. The output buffer must already be 1.1301 + * allocated to at least inputLen. 1.1302 + */ 1.1303 +SECStatus 1.1304 +AES_Decrypt(AESContext *cx, unsigned char *output, 1.1305 + unsigned int *outputLen, unsigned int maxOutputLen, 1.1306 + const unsigned char *input, unsigned int inputLen) 1.1307 +{ 1.1308 + int blocksize; 1.1309 + /* Check args */ 1.1310 + if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { 1.1311 + PORT_SetError(SEC_ERROR_INVALID_ARGS); 1.1312 + return SECFailure; 1.1313 + } 1.1314 + blocksize = 4 * cx->Nb; 1.1315 + if (cx->isBlock && (inputLen % blocksize != 0)) { 1.1316 + PORT_SetError(SEC_ERROR_INPUT_LEN); 1.1317 + return SECFailure; 1.1318 + } 1.1319 + if (maxOutputLen < inputLen) { 1.1320 + PORT_SetError(SEC_ERROR_OUTPUT_LEN); 1.1321 + return SECFailure; 1.1322 + } 1.1323 + *outputLen = inputLen; 1.1324 + return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, 1.1325 + input, inputLen, blocksize); 1.1326 +}