security/nss/lib/freebl/rijndael.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 2 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 4
michael@0 5 #ifdef FREEBL_NO_DEPEND
michael@0 6 #include "stubs.h"
michael@0 7 #endif
michael@0 8
michael@0 9 #include "prinit.h"
michael@0 10 #include "prerr.h"
michael@0 11 #include "secerr.h"
michael@0 12
michael@0 13 #include "prtypes.h"
michael@0 14 #include "blapi.h"
michael@0 15 #include "rijndael.h"
michael@0 16
michael@0 17 #include "cts.h"
michael@0 18 #include "ctr.h"
michael@0 19 #include "gcm.h"
michael@0 20
michael@0 21 #ifdef USE_HW_AES
michael@0 22 #include "intel-aes.h"
michael@0 23 #include "mpi.h"
michael@0 24
michael@0 25 static int has_intel_aes = 0;
michael@0 26 static PRBool use_hw_aes = PR_FALSE;
michael@0 27
michael@0 28 #ifdef INTEL_GCM
michael@0 29 #include "intel-gcm.h"
michael@0 30 static int has_intel_avx = 0;
michael@0 31 static int has_intel_clmul = 0;
michael@0 32 static PRBool use_hw_gcm = PR_FALSE;
michael@0 33 #endif
michael@0 34 #endif /* USE_HW_AES */
michael@0 35
michael@0 36 /*
michael@0 37 * There are currently five ways to build this code, varying in performance
michael@0 38 * and code size.
michael@0 39 *
michael@0 40 * RIJNDAEL_INCLUDE_TABLES Include all tables from rijndael32.tab
michael@0 41 * RIJNDAEL_GENERATE_TABLES Generate tables on first
michael@0 42 * encryption/decryption, then store them;
michael@0 43 * use the function gfm
michael@0 44 * RIJNDAEL_GENERATE_TABLES_MACRO Same as above, but use macros to do
michael@0 45 * the generation
michael@0 46 * RIJNDAEL_GENERATE_VALUES Do not store tables, generate the table
michael@0 47 * values "on-the-fly", using gfm
michael@0 48 * RIJNDAEL_GENERATE_VALUES_MACRO Same as above, but use macros
michael@0 49 *
michael@0 50 * The default is RIJNDAEL_INCLUDE_TABLES.
michael@0 51 */
michael@0 52
michael@0 53 /*
michael@0 54 * When building RIJNDAEL_INCLUDE_TABLES, includes S**-1, Rcon, T[0..4],
michael@0 55 * T**-1[0..4], IMXC[0..4]
michael@0 56 * When building anything else, includes S, S**-1, Rcon
michael@0 57 */
michael@0 58 #include "rijndael32.tab"
michael@0 59
michael@0 60 #if defined(RIJNDAEL_INCLUDE_TABLES)
michael@0 61 /*
michael@0 62 * RIJNDAEL_INCLUDE_TABLES
michael@0 63 */
michael@0 64 #define T0(i) _T0[i]
michael@0 65 #define T1(i) _T1[i]
michael@0 66 #define T2(i) _T2[i]
michael@0 67 #define T3(i) _T3[i]
michael@0 68 #define TInv0(i) _TInv0[i]
michael@0 69 #define TInv1(i) _TInv1[i]
michael@0 70 #define TInv2(i) _TInv2[i]
michael@0 71 #define TInv3(i) _TInv3[i]
michael@0 72 #define IMXC0(b) _IMXC0[b]
michael@0 73 #define IMXC1(b) _IMXC1[b]
michael@0 74 #define IMXC2(b) _IMXC2[b]
michael@0 75 #define IMXC3(b) _IMXC3[b]
michael@0 76 /* The S-box can be recovered from the T-tables */
michael@0 77 #ifdef IS_LITTLE_ENDIAN
michael@0 78 #define SBOX(b) ((PRUint8)_T3[b])
michael@0 79 #else
michael@0 80 #define SBOX(b) ((PRUint8)_T1[b])
michael@0 81 #endif
michael@0 82 #define SINV(b) (_SInv[b])
michael@0 83
michael@0 84 #else /* not RIJNDAEL_INCLUDE_TABLES */
michael@0 85
michael@0 86 /*
michael@0 87 * Code for generating T-table values.
michael@0 88 */
michael@0 89
michael@0 90 #ifdef IS_LITTLE_ENDIAN
michael@0 91 #define WORD4(b0, b1, b2, b3) \
michael@0 92 (((b3) << 24) | ((b2) << 16) | ((b1) << 8) | (b0))
michael@0 93 #else
michael@0 94 #define WORD4(b0, b1, b2, b3) \
michael@0 95 (((b0) << 24) | ((b1) << 16) | ((b2) << 8) | (b3))
michael@0 96 #endif
michael@0 97
michael@0 98 /*
michael@0 99 * Define the S and S**-1 tables (both have been stored)
michael@0 100 */
michael@0 101 #define SBOX(b) (_S[b])
michael@0 102 #define SINV(b) (_SInv[b])
michael@0 103
michael@0 104 /*
michael@0 105 * The function xtime, used for Galois field multiplication
michael@0 106 */
michael@0 107 #define XTIME(a) \
michael@0 108 ((a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1))
michael@0 109
michael@0 110 /* Choose GFM method (macros or function) */
michael@0 111 #if defined(RIJNDAEL_GENERATE_TABLES_MACRO) || \
michael@0 112 defined(RIJNDAEL_GENERATE_VALUES_MACRO)
michael@0 113
michael@0 114 /*
michael@0 115 * Galois field GF(2**8) multipliers, in macro form
michael@0 116 */
michael@0 117 #define GFM01(a) \
michael@0 118 (a) /* a * 01 = a, the identity */
michael@0 119 #define GFM02(a) \
michael@0 120 (XTIME(a) & 0xff) /* a * 02 = xtime(a) */
michael@0 121 #define GFM04(a) \
michael@0 122 (GFM02(GFM02(a))) /* a * 04 = xtime**2(a) */
michael@0 123 #define GFM08(a) \
michael@0 124 (GFM02(GFM04(a))) /* a * 08 = xtime**3(a) */
michael@0 125 #define GFM03(a) \
michael@0 126 (GFM01(a) ^ GFM02(a)) /* a * 03 = a * (01 + 02) */
michael@0 127 #define GFM09(a) \
michael@0 128 (GFM01(a) ^ GFM08(a)) /* a * 09 = a * (01 + 08) */
michael@0 129 #define GFM0B(a) \
michael@0 130 (GFM01(a) ^ GFM02(a) ^ GFM08(a)) /* a * 0B = a * (01 + 02 + 08) */
michael@0 131 #define GFM0D(a) \
michael@0 132 (GFM01(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0D = a * (01 + 04 + 08) */
michael@0 133 #define GFM0E(a) \
michael@0 134 (GFM02(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0E = a * (02 + 04 + 08) */
michael@0 135
michael@0 136 #else /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_VALUES */
michael@0 137
michael@0 138 /* GF_MULTIPLY
michael@0 139 *
michael@0 140 * multiply two bytes represented in GF(2**8), mod (x**4 + 1)
michael@0 141 */
michael@0 142 PRUint8 gfm(PRUint8 a, PRUint8 b)
michael@0 143 {
michael@0 144 PRUint8 res = 0;
michael@0 145 while (b > 0) {
michael@0 146 res = (b & 0x01) ? res ^ a : res;
michael@0 147 a = XTIME(a);
michael@0 148 b >>= 1;
michael@0 149 }
michael@0 150 return res;
michael@0 151 }
michael@0 152
michael@0 153 #define GFM01(a) \
michael@0 154 (a) /* a * 01 = a, the identity */
michael@0 155 #define GFM02(a) \
michael@0 156 (XTIME(a) & 0xff) /* a * 02 = xtime(a) */
michael@0 157 #define GFM03(a) \
michael@0 158 (gfm(a, 0x03)) /* a * 03 */
michael@0 159 #define GFM09(a) \
michael@0 160 (gfm(a, 0x09)) /* a * 09 */
michael@0 161 #define GFM0B(a) \
michael@0 162 (gfm(a, 0x0B)) /* a * 0B */
michael@0 163 #define GFM0D(a) \
michael@0 164 (gfm(a, 0x0D)) /* a * 0D */
michael@0 165 #define GFM0E(a) \
michael@0 166 (gfm(a, 0x0E)) /* a * 0E */
michael@0 167
michael@0 168 #endif /* choosing GFM function */
michael@0 169
michael@0 170 /*
michael@0 171 * The T-tables
michael@0 172 */
michael@0 173 #define G_T0(i) \
michael@0 174 ( WORD4( GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)) ) )
michael@0 175 #define G_T1(i) \
michael@0 176 ( WORD4( GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)) ) )
michael@0 177 #define G_T2(i) \
michael@0 178 ( WORD4( GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)) ) )
michael@0 179 #define G_T3(i) \
michael@0 180 ( WORD4( GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)) ) )
michael@0 181
michael@0 182 /*
michael@0 183 * The inverse T-tables
michael@0 184 */
michael@0 185 #define G_TInv0(i) \
michael@0 186 ( WORD4( GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)) ) )
michael@0 187 #define G_TInv1(i) \
michael@0 188 ( WORD4( GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)) ) )
michael@0 189 #define G_TInv2(i) \
michael@0 190 ( WORD4( GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)) ) )
michael@0 191 #define G_TInv3(i) \
michael@0 192 ( WORD4( GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)) ) )
michael@0 193
michael@0 194 /*
michael@0 195 * The inverse mix column tables
michael@0 196 */
michael@0 197 #define G_IMXC0(i) \
michael@0 198 ( WORD4( GFM0E(i), GFM09(i), GFM0D(i), GFM0B(i) ) )
michael@0 199 #define G_IMXC1(i) \
michael@0 200 ( WORD4( GFM0B(i), GFM0E(i), GFM09(i), GFM0D(i) ) )
michael@0 201 #define G_IMXC2(i) \
michael@0 202 ( WORD4( GFM0D(i), GFM0B(i), GFM0E(i), GFM09(i) ) )
michael@0 203 #define G_IMXC3(i) \
michael@0 204 ( WORD4( GFM09(i), GFM0D(i), GFM0B(i), GFM0E(i) ) )
michael@0 205
michael@0 206 /* Now choose the T-table indexing method */
michael@0 207 #if defined(RIJNDAEL_GENERATE_VALUES)
michael@0 208 /* generate values for the tables with a function*/
michael@0 209 static PRUint32 gen_TInvXi(PRUint8 tx, PRUint8 i)
michael@0 210 {
michael@0 211 PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E;
michael@0 212 si01 = SINV(i);
michael@0 213 si02 = XTIME(si01);
michael@0 214 si04 = XTIME(si02);
michael@0 215 si08 = XTIME(si04);
michael@0 216 si03 = si02 ^ si01;
michael@0 217 si09 = si08 ^ si01;
michael@0 218 si0B = si08 ^ si03;
michael@0 219 si0D = si09 ^ si04;
michael@0 220 si0E = si08 ^ si04 ^ si02;
michael@0 221 switch (tx) {
michael@0 222 case 0:
michael@0 223 return WORD4(si0E, si09, si0D, si0B);
michael@0 224 case 1:
michael@0 225 return WORD4(si0B, si0E, si09, si0D);
michael@0 226 case 2:
michael@0 227 return WORD4(si0D, si0B, si0E, si09);
michael@0 228 case 3:
michael@0 229 return WORD4(si09, si0D, si0B, si0E);
michael@0 230 }
michael@0 231 return -1;
michael@0 232 }
michael@0 233 #define T0(i) G_T0(i)
michael@0 234 #define T1(i) G_T1(i)
michael@0 235 #define T2(i) G_T2(i)
michael@0 236 #define T3(i) G_T3(i)
michael@0 237 #define TInv0(i) gen_TInvXi(0, i)
michael@0 238 #define TInv1(i) gen_TInvXi(1, i)
michael@0 239 #define TInv2(i) gen_TInvXi(2, i)
michael@0 240 #define TInv3(i) gen_TInvXi(3, i)
michael@0 241 #define IMXC0(b) G_IMXC0(b)
michael@0 242 #define IMXC1(b) G_IMXC1(b)
michael@0 243 #define IMXC2(b) G_IMXC2(b)
michael@0 244 #define IMXC3(b) G_IMXC3(b)
michael@0 245 #elif defined(RIJNDAEL_GENERATE_VALUES_MACRO)
michael@0 246 /* generate values for the tables with macros */
michael@0 247 #define T0(i) G_T0(i)
michael@0 248 #define T1(i) G_T1(i)
michael@0 249 #define T2(i) G_T2(i)
michael@0 250 #define T3(i) G_T3(i)
michael@0 251 #define TInv0(i) G_TInv0(i)
michael@0 252 #define TInv1(i) G_TInv1(i)
michael@0 253 #define TInv2(i) G_TInv2(i)
michael@0 254 #define TInv3(i) G_TInv3(i)
michael@0 255 #define IMXC0(b) G_IMXC0(b)
michael@0 256 #define IMXC1(b) G_IMXC1(b)
michael@0 257 #define IMXC2(b) G_IMXC2(b)
michael@0 258 #define IMXC3(b) G_IMXC3(b)
michael@0 259 #else /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_TABLES_MACRO */
michael@0 260 /* Generate T and T**-1 table values and store, then index */
michael@0 261 /* The inverse mix column tables are still generated */
michael@0 262 #define T0(i) rijndaelTables->T0[i]
michael@0 263 #define T1(i) rijndaelTables->T1[i]
michael@0 264 #define T2(i) rijndaelTables->T2[i]
michael@0 265 #define T3(i) rijndaelTables->T3[i]
michael@0 266 #define TInv0(i) rijndaelTables->TInv0[i]
michael@0 267 #define TInv1(i) rijndaelTables->TInv1[i]
michael@0 268 #define TInv2(i) rijndaelTables->TInv2[i]
michael@0 269 #define TInv3(i) rijndaelTables->TInv3[i]
michael@0 270 #define IMXC0(b) G_IMXC0(b)
michael@0 271 #define IMXC1(b) G_IMXC1(b)
michael@0 272 #define IMXC2(b) G_IMXC2(b)
michael@0 273 #define IMXC3(b) G_IMXC3(b)
michael@0 274 #endif /* choose T-table indexing method */
michael@0 275
michael@0 276 #endif /* not RIJNDAEL_INCLUDE_TABLES */
michael@0 277
michael@0 278 #if defined(RIJNDAEL_GENERATE_TABLES) || \
michael@0 279 defined(RIJNDAEL_GENERATE_TABLES_MACRO)
michael@0 280
michael@0 281 /* Code to generate and store the tables */
michael@0 282
michael@0 283 struct rijndael_tables_str {
michael@0 284 PRUint32 T0[256];
michael@0 285 PRUint32 T1[256];
michael@0 286 PRUint32 T2[256];
michael@0 287 PRUint32 T3[256];
michael@0 288 PRUint32 TInv0[256];
michael@0 289 PRUint32 TInv1[256];
michael@0 290 PRUint32 TInv2[256];
michael@0 291 PRUint32 TInv3[256];
michael@0 292 };
michael@0 293
michael@0 294 static struct rijndael_tables_str *rijndaelTables = NULL;
michael@0 295 static PRCallOnceType coRTInit = { 0, 0, 0 };
michael@0 296 static PRStatus
michael@0 297 init_rijndael_tables(void)
michael@0 298 {
michael@0 299 PRUint32 i;
michael@0 300 PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E;
michael@0 301 struct rijndael_tables_str *rts;
michael@0 302 rts = (struct rijndael_tables_str *)
michael@0 303 PORT_Alloc(sizeof(struct rijndael_tables_str));
michael@0 304 if (!rts) return PR_FAILURE;
michael@0 305 for (i=0; i<256; i++) {
michael@0 306 /* The forward values */
michael@0 307 si01 = SBOX(i);
michael@0 308 si02 = XTIME(si01);
michael@0 309 si03 = si02 ^ si01;
michael@0 310 rts->T0[i] = WORD4(si02, si01, si01, si03);
michael@0 311 rts->T1[i] = WORD4(si03, si02, si01, si01);
michael@0 312 rts->T2[i] = WORD4(si01, si03, si02, si01);
michael@0 313 rts->T3[i] = WORD4(si01, si01, si03, si02);
michael@0 314 /* The inverse values */
michael@0 315 si01 = SINV(i);
michael@0 316 si02 = XTIME(si01);
michael@0 317 si04 = XTIME(si02);
michael@0 318 si08 = XTIME(si04);
michael@0 319 si03 = si02 ^ si01;
michael@0 320 si09 = si08 ^ si01;
michael@0 321 si0B = si08 ^ si03;
michael@0 322 si0D = si09 ^ si04;
michael@0 323 si0E = si08 ^ si04 ^ si02;
michael@0 324 rts->TInv0[i] = WORD4(si0E, si09, si0D, si0B);
michael@0 325 rts->TInv1[i] = WORD4(si0B, si0E, si09, si0D);
michael@0 326 rts->TInv2[i] = WORD4(si0D, si0B, si0E, si09);
michael@0 327 rts->TInv3[i] = WORD4(si09, si0D, si0B, si0E);
michael@0 328 }
michael@0 329 /* wait until all the values are in to set */
michael@0 330 rijndaelTables = rts;
michael@0 331 return PR_SUCCESS;
michael@0 332 }
michael@0 333
michael@0 334 #endif /* code to generate tables */
michael@0 335
michael@0 336 /**************************************************************************
michael@0 337 *
michael@0 338 * Stuff related to the Rijndael key schedule
michael@0 339 *
michael@0 340 *************************************************************************/
michael@0 341
michael@0 342 #define SUBBYTE(w) \
michael@0 343 ((SBOX((w >> 24) & 0xff) << 24) | \
michael@0 344 (SBOX((w >> 16) & 0xff) << 16) | \
michael@0 345 (SBOX((w >> 8) & 0xff) << 8) | \
michael@0 346 (SBOX((w ) & 0xff) ))
michael@0 347
michael@0 348 #ifdef IS_LITTLE_ENDIAN
michael@0 349 #define ROTBYTE(b) \
michael@0 350 ((b >> 8) | (b << 24))
michael@0 351 #else
michael@0 352 #define ROTBYTE(b) \
michael@0 353 ((b << 8) | (b >> 24))
michael@0 354 #endif
michael@0 355
michael@0 356 /* rijndael_key_expansion7
michael@0 357 *
michael@0 358 * Generate the expanded key from the key input by the user.
michael@0 359 * XXX
michael@0 360 * Nk == 7 (224 key bits) is a weird case. Since Nk > 6, an added SubByte
michael@0 361 * transformation is done periodically. The period is every 4 bytes, and
michael@0 362 * since 7%4 != 0 this happens at different times for each key word (unlike
michael@0 363 * Nk == 8 where it happens twice in every key word, in the same positions).
michael@0 364 * For now, I'm implementing this case "dumbly", w/o any unrolling.
michael@0 365 */
michael@0 366 static SECStatus
michael@0 367 rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int Nk)
michael@0 368 {
michael@0 369 unsigned int i;
michael@0 370 PRUint32 *W;
michael@0 371 PRUint32 *pW;
michael@0 372 PRUint32 tmp;
michael@0 373 W = cx->expandedKey;
michael@0 374 /* 1. the first Nk words contain the cipher key */
michael@0 375 memcpy(W, key, Nk * 4);
michael@0 376 i = Nk;
michael@0 377 /* 2. loop until full expanded key is obtained */
michael@0 378 pW = W + i - 1;
michael@0 379 for (; i < cx->Nb * (cx->Nr + 1); ++i) {
michael@0 380 tmp = *pW++;
michael@0 381 if (i % Nk == 0)
michael@0 382 tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
michael@0 383 else if (i % Nk == 4)
michael@0 384 tmp = SUBBYTE(tmp);
michael@0 385 *pW = W[i - Nk] ^ tmp;
michael@0 386 }
michael@0 387 return SECSuccess;
michael@0 388 }
michael@0 389
michael@0 390 /* rijndael_key_expansion
michael@0 391 *
michael@0 392 * Generate the expanded key from the key input by the user.
michael@0 393 */
michael@0 394 static SECStatus
michael@0 395 rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk)
michael@0 396 {
michael@0 397 unsigned int i;
michael@0 398 PRUint32 *W;
michael@0 399 PRUint32 *pW;
michael@0 400 PRUint32 tmp;
michael@0 401 unsigned int round_key_words = cx->Nb * (cx->Nr + 1);
michael@0 402 if (Nk == 7)
michael@0 403 return rijndael_key_expansion7(cx, key, Nk);
michael@0 404 W = cx->expandedKey;
michael@0 405 /* The first Nk words contain the input cipher key */
michael@0 406 memcpy(W, key, Nk * 4);
michael@0 407 i = Nk;
michael@0 408 pW = W + i - 1;
michael@0 409 /* Loop over all sets of Nk words, except the last */
michael@0 410 while (i < round_key_words - Nk) {
michael@0 411 tmp = *pW++;
michael@0 412 tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
michael@0 413 *pW = W[i++ - Nk] ^ tmp;
michael@0 414 tmp = *pW++; *pW = W[i++ - Nk] ^ tmp;
michael@0 415 tmp = *pW++; *pW = W[i++ - Nk] ^ tmp;
michael@0 416 tmp = *pW++; *pW = W[i++ - Nk] ^ tmp;
michael@0 417 if (Nk == 4)
michael@0 418 continue;
michael@0 419 switch (Nk) {
michael@0 420 case 8: tmp = *pW++; tmp = SUBBYTE(tmp); *pW = W[i++ - Nk] ^ tmp;
michael@0 421 case 7: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp;
michael@0 422 case 6: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp;
michael@0 423 case 5: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp;
michael@0 424 }
michael@0 425 }
michael@0 426 /* Generate the last word */
michael@0 427 tmp = *pW++;
michael@0 428 tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
michael@0 429 *pW = W[i++ - Nk] ^ tmp;
michael@0 430 /* There may be overflow here, if Nk % (Nb * (Nr + 1)) > 0. However,
michael@0 431 * since the above loop generated all but the last Nk key words, there
michael@0 432 * is no more need for the SubByte transformation.
michael@0 433 */
michael@0 434 if (Nk < 8) {
michael@0 435 for (; i < round_key_words; ++i) {
michael@0 436 tmp = *pW++;
michael@0 437 *pW = W[i - Nk] ^ tmp;
michael@0 438 }
michael@0 439 } else {
michael@0 440 /* except in the case when Nk == 8. Then one more SubByte may have
michael@0 441 * to be performed, at i % Nk == 4.
michael@0 442 */
michael@0 443 for (; i < round_key_words; ++i) {
michael@0 444 tmp = *pW++;
michael@0 445 if (i % Nk == 4)
michael@0 446 tmp = SUBBYTE(tmp);
michael@0 447 *pW = W[i - Nk] ^ tmp;
michael@0 448 }
michael@0 449 }
michael@0 450 return SECSuccess;
michael@0 451 }
michael@0 452
michael@0 453 /* rijndael_invkey_expansion
michael@0 454 *
michael@0 455 * Generate the expanded key for the inverse cipher from the key input by
michael@0 456 * the user.
michael@0 457 */
michael@0 458 static SECStatus
michael@0 459 rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk)
michael@0 460 {
michael@0 461 unsigned int r;
michael@0 462 PRUint32 *roundkeyw;
michael@0 463 PRUint8 *b;
michael@0 464 int Nb = cx->Nb;
michael@0 465 /* begins like usual key expansion ... */
michael@0 466 if (rijndael_key_expansion(cx, key, Nk) != SECSuccess)
michael@0 467 return SECFailure;
michael@0 468 /* ... but has the additional step of InvMixColumn,
michael@0 469 * excepting the first and last round keys.
michael@0 470 */
michael@0 471 roundkeyw = cx->expandedKey + cx->Nb;
michael@0 472 for (r=1; r<cx->Nr; ++r) {
michael@0 473 /* each key word, roundkeyw, represents a column in the key
michael@0 474 * matrix. Each column is multiplied by the InvMixColumn matrix.
michael@0 475 * [ 0E 0B 0D 09 ] [ b0 ]
michael@0 476 * [ 09 0E 0B 0D ] * [ b1 ]
michael@0 477 * [ 0D 09 0E 0B ] [ b2 ]
michael@0 478 * [ 0B 0D 09 0E ] [ b3 ]
michael@0 479 */
michael@0 480 b = (PRUint8 *)roundkeyw;
michael@0 481 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
michael@0 482 b = (PRUint8 *)roundkeyw;
michael@0 483 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
michael@0 484 b = (PRUint8 *)roundkeyw;
michael@0 485 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
michael@0 486 b = (PRUint8 *)roundkeyw;
michael@0 487 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
michael@0 488 if (Nb <= 4)
michael@0 489 continue;
michael@0 490 switch (Nb) {
michael@0 491 case 8: b = (PRUint8 *)roundkeyw;
michael@0 492 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
michael@0 493 IMXC2(b[2]) ^ IMXC3(b[3]);
michael@0 494 case 7: b = (PRUint8 *)roundkeyw;
michael@0 495 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
michael@0 496 IMXC2(b[2]) ^ IMXC3(b[3]);
michael@0 497 case 6: b = (PRUint8 *)roundkeyw;
michael@0 498 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
michael@0 499 IMXC2(b[2]) ^ IMXC3(b[3]);
michael@0 500 case 5: b = (PRUint8 *)roundkeyw;
michael@0 501 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
michael@0 502 IMXC2(b[2]) ^ IMXC3(b[3]);
michael@0 503 }
michael@0 504 }
michael@0 505 return SECSuccess;
michael@0 506 }
michael@0 507 /**************************************************************************
michael@0 508 *
michael@0 509 * Stuff related to Rijndael encryption/decryption, optimized for
michael@0 510 * a 128-bit blocksize.
michael@0 511 *
michael@0 512 *************************************************************************/
michael@0 513
michael@0 514 #ifdef IS_LITTLE_ENDIAN
michael@0 515 #define BYTE0WORD(w) ((w) & 0x000000ff)
michael@0 516 #define BYTE1WORD(w) ((w) & 0x0000ff00)
michael@0 517 #define BYTE2WORD(w) ((w) & 0x00ff0000)
michael@0 518 #define BYTE3WORD(w) ((w) & 0xff000000)
michael@0 519 #else
michael@0 520 #define BYTE0WORD(w) ((w) & 0xff000000)
michael@0 521 #define BYTE1WORD(w) ((w) & 0x00ff0000)
michael@0 522 #define BYTE2WORD(w) ((w) & 0x0000ff00)
michael@0 523 #define BYTE3WORD(w) ((w) & 0x000000ff)
michael@0 524 #endif
michael@0 525
michael@0 526 typedef union {
michael@0 527 PRUint32 w[4];
michael@0 528 PRUint8 b[16];
michael@0 529 } rijndael_state;
michael@0 530
michael@0 531 #define COLUMN_0(state) state.w[0]
michael@0 532 #define COLUMN_1(state) state.w[1]
michael@0 533 #define COLUMN_2(state) state.w[2]
michael@0 534 #define COLUMN_3(state) state.w[3]
michael@0 535
michael@0 536 #define STATE_BYTE(i) state.b[i]
michael@0 537
michael@0 538 static SECStatus
michael@0 539 rijndael_encryptBlock128(AESContext *cx,
michael@0 540 unsigned char *output,
michael@0 541 const unsigned char *input)
michael@0 542 {
michael@0 543 unsigned int r;
michael@0 544 PRUint32 *roundkeyw;
michael@0 545 rijndael_state state;
michael@0 546 PRUint32 C0, C1, C2, C3;
michael@0 547 #if defined(NSS_X86_OR_X64)
michael@0 548 #define pIn input
michael@0 549 #define pOut output
michael@0 550 #else
michael@0 551 unsigned char *pIn, *pOut;
michael@0 552 PRUint32 inBuf[4], outBuf[4];
michael@0 553
michael@0 554 if ((ptrdiff_t)input & 0x3) {
michael@0 555 memcpy(inBuf, input, sizeof inBuf);
michael@0 556 pIn = (unsigned char *)inBuf;
michael@0 557 } else {
michael@0 558 pIn = (unsigned char *)input;
michael@0 559 }
michael@0 560 if ((ptrdiff_t)output & 0x3) {
michael@0 561 pOut = (unsigned char *)outBuf;
michael@0 562 } else {
michael@0 563 pOut = (unsigned char *)output;
michael@0 564 }
michael@0 565 #endif
michael@0 566 roundkeyw = cx->expandedKey;
michael@0 567 /* Step 1: Add Round Key 0 to initial state */
michael@0 568 COLUMN_0(state) = *((PRUint32 *)(pIn )) ^ *roundkeyw++;
michael@0 569 COLUMN_1(state) = *((PRUint32 *)(pIn + 4 )) ^ *roundkeyw++;
michael@0 570 COLUMN_2(state) = *((PRUint32 *)(pIn + 8 )) ^ *roundkeyw++;
michael@0 571 COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw++;
michael@0 572 /* Step 2: Loop over rounds [1..NR-1] */
michael@0 573 for (r=1; r<cx->Nr; ++r) {
michael@0 574 /* Do ShiftRow, ByteSub, and MixColumn all at once */
michael@0 575 C0 = T0(STATE_BYTE(0)) ^
michael@0 576 T1(STATE_BYTE(5)) ^
michael@0 577 T2(STATE_BYTE(10)) ^
michael@0 578 T3(STATE_BYTE(15));
michael@0 579 C1 = T0(STATE_BYTE(4)) ^
michael@0 580 T1(STATE_BYTE(9)) ^
michael@0 581 T2(STATE_BYTE(14)) ^
michael@0 582 T3(STATE_BYTE(3));
michael@0 583 C2 = T0(STATE_BYTE(8)) ^
michael@0 584 T1(STATE_BYTE(13)) ^
michael@0 585 T2(STATE_BYTE(2)) ^
michael@0 586 T3(STATE_BYTE(7));
michael@0 587 C3 = T0(STATE_BYTE(12)) ^
michael@0 588 T1(STATE_BYTE(1)) ^
michael@0 589 T2(STATE_BYTE(6)) ^
michael@0 590 T3(STATE_BYTE(11));
michael@0 591 /* Round key addition */
michael@0 592 COLUMN_0(state) = C0 ^ *roundkeyw++;
michael@0 593 COLUMN_1(state) = C1 ^ *roundkeyw++;
michael@0 594 COLUMN_2(state) = C2 ^ *roundkeyw++;
michael@0 595 COLUMN_3(state) = C3 ^ *roundkeyw++;
michael@0 596 }
michael@0 597 /* Step 3: Do the last round */
michael@0 598 /* Final round does not employ MixColumn */
michael@0 599 C0 = ((BYTE0WORD(T2(STATE_BYTE(0)))) |
michael@0 600 (BYTE1WORD(T3(STATE_BYTE(5)))) |
michael@0 601 (BYTE2WORD(T0(STATE_BYTE(10)))) |
michael@0 602 (BYTE3WORD(T1(STATE_BYTE(15))))) ^
michael@0 603 *roundkeyw++;
michael@0 604 C1 = ((BYTE0WORD(T2(STATE_BYTE(4)))) |
michael@0 605 (BYTE1WORD(T3(STATE_BYTE(9)))) |
michael@0 606 (BYTE2WORD(T0(STATE_BYTE(14)))) |
michael@0 607 (BYTE3WORD(T1(STATE_BYTE(3))))) ^
michael@0 608 *roundkeyw++;
michael@0 609 C2 = ((BYTE0WORD(T2(STATE_BYTE(8)))) |
michael@0 610 (BYTE1WORD(T3(STATE_BYTE(13)))) |
michael@0 611 (BYTE2WORD(T0(STATE_BYTE(2)))) |
michael@0 612 (BYTE3WORD(T1(STATE_BYTE(7))))) ^
michael@0 613 *roundkeyw++;
michael@0 614 C3 = ((BYTE0WORD(T2(STATE_BYTE(12)))) |
michael@0 615 (BYTE1WORD(T3(STATE_BYTE(1)))) |
michael@0 616 (BYTE2WORD(T0(STATE_BYTE(6)))) |
michael@0 617 (BYTE3WORD(T1(STATE_BYTE(11))))) ^
michael@0 618 *roundkeyw++;
michael@0 619 *((PRUint32 *) pOut ) = C0;
michael@0 620 *((PRUint32 *)(pOut + 4)) = C1;
michael@0 621 *((PRUint32 *)(pOut + 8)) = C2;
michael@0 622 *((PRUint32 *)(pOut + 12)) = C3;
michael@0 623 #if defined(NSS_X86_OR_X64)
michael@0 624 #undef pIn
michael@0 625 #undef pOut
michael@0 626 #else
michael@0 627 if ((ptrdiff_t)output & 0x3) {
michael@0 628 memcpy(output, outBuf, sizeof outBuf);
michael@0 629 }
michael@0 630 #endif
michael@0 631 return SECSuccess;
michael@0 632 }
michael@0 633
michael@0 634 static SECStatus
michael@0 635 rijndael_decryptBlock128(AESContext *cx,
michael@0 636 unsigned char *output,
michael@0 637 const unsigned char *input)
michael@0 638 {
michael@0 639 int r;
michael@0 640 PRUint32 *roundkeyw;
michael@0 641 rijndael_state state;
michael@0 642 PRUint32 C0, C1, C2, C3;
michael@0 643 #if defined(NSS_X86_OR_X64)
michael@0 644 #define pIn input
michael@0 645 #define pOut output
michael@0 646 #else
michael@0 647 unsigned char *pIn, *pOut;
michael@0 648 PRUint32 inBuf[4], outBuf[4];
michael@0 649
michael@0 650 if ((ptrdiff_t)input & 0x3) {
michael@0 651 memcpy(inBuf, input, sizeof inBuf);
michael@0 652 pIn = (unsigned char *)inBuf;
michael@0 653 } else {
michael@0 654 pIn = (unsigned char *)input;
michael@0 655 }
michael@0 656 if ((ptrdiff_t)output & 0x3) {
michael@0 657 pOut = (unsigned char *)outBuf;
michael@0 658 } else {
michael@0 659 pOut = (unsigned char *)output;
michael@0 660 }
michael@0 661 #endif
michael@0 662 roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3;
michael@0 663 /* reverse the final key addition */
michael@0 664 COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw--;
michael@0 665 COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw--;
michael@0 666 COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw--;
michael@0 667 COLUMN_0(state) = *((PRUint32 *)(pIn )) ^ *roundkeyw--;
michael@0 668 /* Loop over rounds in reverse [NR..1] */
michael@0 669 for (r=cx->Nr; r>1; --r) {
michael@0 670 /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */
michael@0 671 C0 = TInv0(STATE_BYTE(0)) ^
michael@0 672 TInv1(STATE_BYTE(13)) ^
michael@0 673 TInv2(STATE_BYTE(10)) ^
michael@0 674 TInv3(STATE_BYTE(7));
michael@0 675 C1 = TInv0(STATE_BYTE(4)) ^
michael@0 676 TInv1(STATE_BYTE(1)) ^
michael@0 677 TInv2(STATE_BYTE(14)) ^
michael@0 678 TInv3(STATE_BYTE(11));
michael@0 679 C2 = TInv0(STATE_BYTE(8)) ^
michael@0 680 TInv1(STATE_BYTE(5)) ^
michael@0 681 TInv2(STATE_BYTE(2)) ^
michael@0 682 TInv3(STATE_BYTE(15));
michael@0 683 C3 = TInv0(STATE_BYTE(12)) ^
michael@0 684 TInv1(STATE_BYTE(9)) ^
michael@0 685 TInv2(STATE_BYTE(6)) ^
michael@0 686 TInv3(STATE_BYTE(3));
michael@0 687 /* Invert the key addition step */
michael@0 688 COLUMN_3(state) = C3 ^ *roundkeyw--;
michael@0 689 COLUMN_2(state) = C2 ^ *roundkeyw--;
michael@0 690 COLUMN_1(state) = C1 ^ *roundkeyw--;
michael@0 691 COLUMN_0(state) = C0 ^ *roundkeyw--;
michael@0 692 }
michael@0 693 /* inverse sub */
michael@0 694 pOut[ 0] = SINV(STATE_BYTE( 0));
michael@0 695 pOut[ 1] = SINV(STATE_BYTE(13));
michael@0 696 pOut[ 2] = SINV(STATE_BYTE(10));
michael@0 697 pOut[ 3] = SINV(STATE_BYTE( 7));
michael@0 698 pOut[ 4] = SINV(STATE_BYTE( 4));
michael@0 699 pOut[ 5] = SINV(STATE_BYTE( 1));
michael@0 700 pOut[ 6] = SINV(STATE_BYTE(14));
michael@0 701 pOut[ 7] = SINV(STATE_BYTE(11));
michael@0 702 pOut[ 8] = SINV(STATE_BYTE( 8));
michael@0 703 pOut[ 9] = SINV(STATE_BYTE( 5));
michael@0 704 pOut[10] = SINV(STATE_BYTE( 2));
michael@0 705 pOut[11] = SINV(STATE_BYTE(15));
michael@0 706 pOut[12] = SINV(STATE_BYTE(12));
michael@0 707 pOut[13] = SINV(STATE_BYTE( 9));
michael@0 708 pOut[14] = SINV(STATE_BYTE( 6));
michael@0 709 pOut[15] = SINV(STATE_BYTE( 3));
michael@0 710 /* final key addition */
michael@0 711 *((PRUint32 *)(pOut + 12)) ^= *roundkeyw--;
michael@0 712 *((PRUint32 *)(pOut + 8)) ^= *roundkeyw--;
michael@0 713 *((PRUint32 *)(pOut + 4)) ^= *roundkeyw--;
michael@0 714 *((PRUint32 *) pOut ) ^= *roundkeyw--;
michael@0 715 #if defined(NSS_X86_OR_X64)
michael@0 716 #undef pIn
michael@0 717 #undef pOut
michael@0 718 #else
michael@0 719 if ((ptrdiff_t)output & 0x3) {
michael@0 720 memcpy(output, outBuf, sizeof outBuf);
michael@0 721 }
michael@0 722 #endif
michael@0 723 return SECSuccess;
michael@0 724 }
michael@0 725
michael@0 726 /**************************************************************************
michael@0 727 *
michael@0 728 * Stuff related to general Rijndael encryption/decryption, for blocksizes
michael@0 729 * greater than 128 bits.
michael@0 730 *
michael@0 731 * XXX This code is currently untested! So far, AES specs have only been
michael@0 732 * released for 128 bit blocksizes. This will be tested, but for now
michael@0 733 * only the code above has been tested using known values.
michael@0 734 *
michael@0 735 *************************************************************************/
michael@0 736
michael@0 737 #define COLUMN(array, j) *((PRUint32 *)(array + j))
michael@0 738
michael@0 739 SECStatus
michael@0 740 rijndael_encryptBlock(AESContext *cx,
michael@0 741 unsigned char *output,
michael@0 742 const unsigned char *input)
michael@0 743 {
michael@0 744 return SECFailure;
michael@0 745 #ifdef rijndael_large_blocks_fixed
michael@0 746 unsigned int j, r, Nb;
michael@0 747 unsigned int c2=0, c3=0;
michael@0 748 PRUint32 *roundkeyw;
michael@0 749 PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE];
michael@0 750 Nb = cx->Nb;
michael@0 751 roundkeyw = cx->expandedKey;
michael@0 752 /* Step 1: Add Round Key 0 to initial state */
michael@0 753 for (j=0; j<4*Nb; j+=4) {
michael@0 754 COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw++;
michael@0 755 }
michael@0 756 /* Step 2: Loop over rounds [1..NR-1] */
michael@0 757 for (r=1; r<cx->Nr; ++r) {
michael@0 758 for (j=0; j<Nb; ++j) {
michael@0 759 COLUMN(output, j) = T0(STATE_BYTE(4* j )) ^
michael@0 760 T1(STATE_BYTE(4*((j+ 1)%Nb)+1)) ^
michael@0 761 T2(STATE_BYTE(4*((j+c2)%Nb)+2)) ^
michael@0 762 T3(STATE_BYTE(4*((j+c3)%Nb)+3));
michael@0 763 }
michael@0 764 for (j=0; j<4*Nb; j+=4) {
michael@0 765 COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw++;
michael@0 766 }
michael@0 767 }
michael@0 768 /* Step 3: Do the last round */
michael@0 769 /* Final round does not employ MixColumn */
michael@0 770 for (j=0; j<Nb; ++j) {
michael@0 771 COLUMN(output, j) = ((BYTE0WORD(T2(STATE_BYTE(4* j )))) |
michael@0 772 (BYTE1WORD(T3(STATE_BYTE(4*(j+ 1)%Nb)+1))) |
michael@0 773 (BYTE2WORD(T0(STATE_BYTE(4*(j+c2)%Nb)+2))) |
michael@0 774 (BYTE3WORD(T1(STATE_BYTE(4*(j+c3)%Nb)+3)))) ^
michael@0 775 *roundkeyw++;
michael@0 776 }
michael@0 777 return SECSuccess;
michael@0 778 #endif
michael@0 779 }
michael@0 780
michael@0 781 SECStatus
michael@0 782 rijndael_decryptBlock(AESContext *cx,
michael@0 783 unsigned char *output,
michael@0 784 const unsigned char *input)
michael@0 785 {
michael@0 786 return SECFailure;
michael@0 787 #ifdef rijndael_large_blocks_fixed
michael@0 788 int j, r, Nb;
michael@0 789 int c2=0, c3=0;
michael@0 790 PRUint32 *roundkeyw;
michael@0 791 PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE];
michael@0 792 Nb = cx->Nb;
michael@0 793 roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3;
michael@0 794 /* reverse key addition */
michael@0 795 for (j=4*Nb; j>=0; j-=4) {
michael@0 796 COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw--;
michael@0 797 }
michael@0 798 /* Loop over rounds in reverse [NR..1] */
michael@0 799 for (r=cx->Nr; r>1; --r) {
michael@0 800 /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */
michael@0 801 for (j=0; j<Nb; ++j) {
michael@0 802 COLUMN(output, 4*j) = TInv0(STATE_BYTE(4* j )) ^
michael@0 803 TInv1(STATE_BYTE(4*(j+Nb- 1)%Nb)+1) ^
michael@0 804 TInv2(STATE_BYTE(4*(j+Nb-c2)%Nb)+2) ^
michael@0 805 TInv3(STATE_BYTE(4*(j+Nb-c3)%Nb)+3);
michael@0 806 }
michael@0 807 /* Invert the key addition step */
michael@0 808 for (j=4*Nb; j>=0; j-=4) {
michael@0 809 COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw--;
michael@0 810 }
michael@0 811 }
michael@0 812 /* inverse sub */
michael@0 813 for (j=0; j<4*Nb; ++j) {
michael@0 814 output[j] = SINV(clone[j]);
michael@0 815 }
michael@0 816 /* final key addition */
michael@0 817 for (j=4*Nb; j>=0; j-=4) {
michael@0 818 COLUMN(output, j) ^= *roundkeyw--;
michael@0 819 }
michael@0 820 return SECSuccess;
michael@0 821 #endif
michael@0 822 }
michael@0 823
michael@0 824 /**************************************************************************
michael@0 825 *
michael@0 826 * Rijndael modes of operation (ECB and CBC)
michael@0 827 *
michael@0 828 *************************************************************************/
michael@0 829
michael@0 830 static SECStatus
michael@0 831 rijndael_encryptECB(AESContext *cx, unsigned char *output,
michael@0 832 unsigned int *outputLen, unsigned int maxOutputLen,
michael@0 833 const unsigned char *input, unsigned int inputLen,
michael@0 834 unsigned int blocksize)
michael@0 835 {
michael@0 836 SECStatus rv;
michael@0 837 AESBlockFunc *encryptor;
michael@0 838
michael@0 839 encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE)
michael@0 840 ? &rijndael_encryptBlock128
michael@0 841 : &rijndael_encryptBlock;
michael@0 842 while (inputLen > 0) {
michael@0 843 rv = (*encryptor)(cx, output, input);
michael@0 844 if (rv != SECSuccess)
michael@0 845 return rv;
michael@0 846 output += blocksize;
michael@0 847 input += blocksize;
michael@0 848 inputLen -= blocksize;
michael@0 849 }
michael@0 850 return SECSuccess;
michael@0 851 }
michael@0 852
michael@0 853 static SECStatus
michael@0 854 rijndael_encryptCBC(AESContext *cx, unsigned char *output,
michael@0 855 unsigned int *outputLen, unsigned int maxOutputLen,
michael@0 856 const unsigned char *input, unsigned int inputLen,
michael@0 857 unsigned int blocksize)
michael@0 858 {
michael@0 859 unsigned int j;
michael@0 860 SECStatus rv;
michael@0 861 AESBlockFunc *encryptor;
michael@0 862 unsigned char *lastblock;
michael@0 863 unsigned char inblock[RIJNDAEL_MAX_STATE_SIZE * 8];
michael@0 864
michael@0 865 if (!inputLen)
michael@0 866 return SECSuccess;
michael@0 867 lastblock = cx->iv;
michael@0 868 encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE)
michael@0 869 ? &rijndael_encryptBlock128
michael@0 870 : &rijndael_encryptBlock;
michael@0 871 while (inputLen > 0) {
michael@0 872 /* XOR with the last block (IV if first block) */
michael@0 873 for (j=0; j<blocksize; ++j)
michael@0 874 inblock[j] = input[j] ^ lastblock[j];
michael@0 875 /* encrypt */
michael@0 876 rv = (*encryptor)(cx, output, inblock);
michael@0 877 if (rv != SECSuccess)
michael@0 878 return rv;
michael@0 879 /* move to the next block */
michael@0 880 lastblock = output;
michael@0 881 output += blocksize;
michael@0 882 input += blocksize;
michael@0 883 inputLen -= blocksize;
michael@0 884 }
michael@0 885 memcpy(cx->iv, lastblock, blocksize);
michael@0 886 return SECSuccess;
michael@0 887 }
michael@0 888
michael@0 889 static SECStatus
michael@0 890 rijndael_decryptECB(AESContext *cx, unsigned char *output,
michael@0 891 unsigned int *outputLen, unsigned int maxOutputLen,
michael@0 892 const unsigned char *input, unsigned int inputLen,
michael@0 893 unsigned int blocksize)
michael@0 894 {
michael@0 895 SECStatus rv;
michael@0 896 AESBlockFunc *decryptor;
michael@0 897
michael@0 898 decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE)
michael@0 899 ? &rijndael_decryptBlock128
michael@0 900 : &rijndael_decryptBlock;
michael@0 901 while (inputLen > 0) {
michael@0 902 rv = (*decryptor)(cx, output, input);
michael@0 903 if (rv != SECSuccess)
michael@0 904 return rv;
michael@0 905 output += blocksize;
michael@0 906 input += blocksize;
michael@0 907 inputLen -= blocksize;
michael@0 908 }
michael@0 909 return SECSuccess;
michael@0 910 }
michael@0 911
michael@0 912 static SECStatus
michael@0 913 rijndael_decryptCBC(AESContext *cx, unsigned char *output,
michael@0 914 unsigned int *outputLen, unsigned int maxOutputLen,
michael@0 915 const unsigned char *input, unsigned int inputLen,
michael@0 916 unsigned int blocksize)
michael@0 917 {
michael@0 918 SECStatus rv;
michael@0 919 AESBlockFunc *decryptor;
michael@0 920 const unsigned char *in;
michael@0 921 unsigned char *out;
michael@0 922 unsigned int j;
michael@0 923 unsigned char newIV[RIJNDAEL_MAX_BLOCKSIZE];
michael@0 924
michael@0 925
michael@0 926 if (!inputLen)
michael@0 927 return SECSuccess;
michael@0 928 PORT_Assert(output - input >= 0 || input - output >= (int)inputLen );
michael@0 929 decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE)
michael@0 930 ? &rijndael_decryptBlock128
michael@0 931 : &rijndael_decryptBlock;
michael@0 932 in = input + (inputLen - blocksize);
michael@0 933 memcpy(newIV, in, blocksize);
michael@0 934 out = output + (inputLen - blocksize);
michael@0 935 while (inputLen > blocksize) {
michael@0 936 rv = (*decryptor)(cx, out, in);
michael@0 937 if (rv != SECSuccess)
michael@0 938 return rv;
michael@0 939 for (j=0; j<blocksize; ++j)
michael@0 940 out[j] ^= in[(int)(j - blocksize)];
michael@0 941 out -= blocksize;
michael@0 942 in -= blocksize;
michael@0 943 inputLen -= blocksize;
michael@0 944 }
michael@0 945 if (in == input) {
michael@0 946 rv = (*decryptor)(cx, out, in);
michael@0 947 if (rv != SECSuccess)
michael@0 948 return rv;
michael@0 949 for (j=0; j<blocksize; ++j)
michael@0 950 out[j] ^= cx->iv[j];
michael@0 951 }
michael@0 952 memcpy(cx->iv, newIV, blocksize);
michael@0 953 return SECSuccess;
michael@0 954 }
michael@0 955
michael@0 956 /************************************************************************
michael@0 957 *
michael@0 958 * BLAPI Interface functions
michael@0 959 *
michael@0 960 * The following functions implement the encryption routines defined in
michael@0 961 * BLAPI for the AES cipher, Rijndael.
michael@0 962 *
michael@0 963 ***********************************************************************/
michael@0 964
michael@0 965 AESContext * AES_AllocateContext(void)
michael@0 966 {
michael@0 967 return PORT_ZNew(AESContext);
michael@0 968 }
michael@0 969
michael@0 970
michael@0 971 #ifdef INTEL_GCM
michael@0 972 /*
michael@0 973 * Adapted from the example code in "How to detect New Instruction support in
michael@0 974 * the 4th generation Intel Core processor family" by Max Locktyukhin.
michael@0 975 *
michael@0 976 * XGETBV:
michael@0 977 * Reads an extended control register (XCR) specified by ECX into EDX:EAX.
michael@0 978 */
michael@0 979 static PRBool
michael@0 980 check_xcr0_ymm()
michael@0 981 {
michael@0 982 PRUint32 xcr0;
michael@0 983 #if defined(_MSC_VER)
michael@0 984 #if defined(_M_IX86)
michael@0 985 __asm {
michael@0 986 mov ecx, 0
michael@0 987 xgetbv
michael@0 988 mov xcr0, eax
michael@0 989 }
michael@0 990 #else
michael@0 991 xcr0 = (PRUint32)_xgetbv(0); /* Requires VS2010 SP1 or later. */
michael@0 992 #endif
michael@0 993 #else
michael@0 994 __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx");
michael@0 995 #endif
michael@0 996 /* Check if xmm and ymm state are enabled in XCR0. */
michael@0 997 return (xcr0 & 6) == 6;
michael@0 998 }
michael@0 999 #endif
michael@0 1000
michael@0 1001 /*
michael@0 1002 ** Initialize a new AES context suitable for AES encryption/decryption in
michael@0 1003 ** the ECB or CBC mode.
michael@0 1004 ** "mode" the mode of operation, which must be NSS_AES or NSS_AES_CBC
michael@0 1005 */
michael@0 1006 static SECStatus
michael@0 1007 aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize,
michael@0 1008 const unsigned char *iv, int mode, unsigned int encrypt,
michael@0 1009 unsigned int blocksize)
michael@0 1010 {
michael@0 1011 unsigned int Nk;
michael@0 1012 /* According to Rijndael AES Proposal, section 12.1, block and key
michael@0 1013 * lengths between 128 and 256 bits are supported, as long as the
michael@0 1014 * length in bytes is divisible by 4.
michael@0 1015 */
michael@0 1016 if (key == NULL ||
michael@0 1017 keysize < RIJNDAEL_MIN_BLOCKSIZE ||
michael@0 1018 keysize > RIJNDAEL_MAX_BLOCKSIZE ||
michael@0 1019 keysize % 4 != 0 ||
michael@0 1020 blocksize < RIJNDAEL_MIN_BLOCKSIZE ||
michael@0 1021 blocksize > RIJNDAEL_MAX_BLOCKSIZE ||
michael@0 1022 blocksize % 4 != 0) {
michael@0 1023 PORT_SetError(SEC_ERROR_INVALID_ARGS);
michael@0 1024 return SECFailure;
michael@0 1025 }
michael@0 1026 if (mode != NSS_AES && mode != NSS_AES_CBC) {
michael@0 1027 PORT_SetError(SEC_ERROR_INVALID_ARGS);
michael@0 1028 return SECFailure;
michael@0 1029 }
michael@0 1030 if (mode == NSS_AES_CBC && iv == NULL) {
michael@0 1031 PORT_SetError(SEC_ERROR_INVALID_ARGS);
michael@0 1032 return SECFailure;
michael@0 1033 }
michael@0 1034 if (!cx) {
michael@0 1035 PORT_SetError(SEC_ERROR_INVALID_ARGS);
michael@0 1036 return SECFailure;
michael@0 1037 }
michael@0 1038 #ifdef USE_HW_AES
michael@0 1039 if (has_intel_aes == 0) {
michael@0 1040 unsigned long eax, ebx, ecx, edx;
michael@0 1041 char *disable_hw_aes = getenv("NSS_DISABLE_HW_AES");
michael@0 1042
michael@0 1043 if (disable_hw_aes == NULL) {
michael@0 1044 freebl_cpuid(1, &eax, &ebx, &ecx, &edx);
michael@0 1045 has_intel_aes = (ecx & (1 << 25)) != 0 ? 1 : -1;
michael@0 1046 #ifdef INTEL_GCM
michael@0 1047 has_intel_clmul = (ecx & (1 << 1)) != 0 ? 1 : -1;
michael@0 1048 if ((ecx & (1 << 27)) != 0 && (ecx & (1 << 28)) != 0 &&
michael@0 1049 check_xcr0_ymm()) {
michael@0 1050 has_intel_avx = 1;
michael@0 1051 } else {
michael@0 1052 has_intel_avx = -1;
michael@0 1053 }
michael@0 1054 #endif
michael@0 1055 } else {
michael@0 1056 has_intel_aes = -1;
michael@0 1057 #ifdef INTEL_GCM
michael@0 1058 has_intel_avx = -1;
michael@0 1059 has_intel_clmul = -1;
michael@0 1060 #endif
michael@0 1061 }
michael@0 1062 }
michael@0 1063 use_hw_aes = (PRBool)
michael@0 1064 (has_intel_aes > 0 && (keysize % 8) == 0 && blocksize == 16);
michael@0 1065 #ifdef INTEL_GCM
michael@0 1066 use_hw_gcm = (PRBool)
michael@0 1067 (use_hw_aes && has_intel_avx>0 && has_intel_clmul>0);
michael@0 1068 #endif
michael@0 1069 #endif /* USE_HW_AES */
michael@0 1070 /* Nb = (block size in bits) / 32 */
michael@0 1071 cx->Nb = blocksize / 4;
michael@0 1072 /* Nk = (key size in bits) / 32 */
michael@0 1073 Nk = keysize / 4;
michael@0 1074 /* Obtain number of rounds from "table" */
michael@0 1075 cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb);
michael@0 1076 /* copy in the iv, if neccessary */
michael@0 1077 if (mode == NSS_AES_CBC) {
michael@0 1078 memcpy(cx->iv, iv, blocksize);
michael@0 1079 #ifdef USE_HW_AES
michael@0 1080 if (use_hw_aes) {
michael@0 1081 cx->worker = (freeblCipherFunc)
michael@0 1082 intel_aes_cbc_worker(encrypt, keysize);
michael@0 1083 } else
michael@0 1084 #endif
michael@0 1085 {
michael@0 1086 cx->worker = (freeblCipherFunc) (encrypt
michael@0 1087 ? &rijndael_encryptCBC : &rijndael_decryptCBC);
michael@0 1088 }
michael@0 1089 } else {
michael@0 1090 #ifdef USE_HW_AES
michael@0 1091 if (use_hw_aes) {
michael@0 1092 cx->worker = (freeblCipherFunc)
michael@0 1093 intel_aes_ecb_worker(encrypt, keysize);
michael@0 1094 } else
michael@0 1095 #endif
michael@0 1096 {
michael@0 1097 cx->worker = (freeblCipherFunc) (encrypt
michael@0 1098 ? &rijndael_encryptECB : &rijndael_decryptECB);
michael@0 1099 }
michael@0 1100 }
michael@0 1101 PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE);
michael@0 1102 if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) {
michael@0 1103 PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
michael@0 1104 goto cleanup;
michael@0 1105 }
michael@0 1106 #ifdef USE_HW_AES
michael@0 1107 if (use_hw_aes) {
michael@0 1108 intel_aes_init(encrypt, keysize);
michael@0 1109 } else
michael@0 1110 #endif
michael@0 1111 {
michael@0 1112
michael@0 1113 #if defined(RIJNDAEL_GENERATE_TABLES) || \
michael@0 1114 defined(RIJNDAEL_GENERATE_TABLES_MACRO)
michael@0 1115 if (rijndaelTables == NULL) {
michael@0 1116 if (PR_CallOnce(&coRTInit, init_rijndael_tables)
michael@0 1117 != PR_SUCCESS) {
michael@0 1118 return SecFailure;
michael@0 1119 }
michael@0 1120 }
michael@0 1121 #endif
michael@0 1122 /* Generate expanded key */
michael@0 1123 if (encrypt) {
michael@0 1124 if (rijndael_key_expansion(cx, key, Nk) != SECSuccess)
michael@0 1125 goto cleanup;
michael@0 1126 } else {
michael@0 1127 if (rijndael_invkey_expansion(cx, key, Nk) != SECSuccess)
michael@0 1128 goto cleanup;
michael@0 1129 }
michael@0 1130 }
michael@0 1131 cx->worker_cx = cx;
michael@0 1132 cx->destroy = NULL;
michael@0 1133 cx->isBlock = PR_TRUE;
michael@0 1134 return SECSuccess;
michael@0 1135 cleanup:
michael@0 1136 return SECFailure;
michael@0 1137 }
michael@0 1138
michael@0 1139 SECStatus
michael@0 1140 AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize,
michael@0 1141 const unsigned char *iv, int mode, unsigned int encrypt,
michael@0 1142 unsigned int blocksize)
michael@0 1143 {
michael@0 1144 int basemode = mode;
michael@0 1145 PRBool baseencrypt = encrypt;
michael@0 1146 SECStatus rv;
michael@0 1147
michael@0 1148 switch (mode) {
michael@0 1149 case NSS_AES_CTS:
michael@0 1150 basemode = NSS_AES_CBC;
michael@0 1151 break;
michael@0 1152 case NSS_AES_GCM:
michael@0 1153 case NSS_AES_CTR:
michael@0 1154 basemode = NSS_AES;
michael@0 1155 baseencrypt = PR_TRUE;
michael@0 1156 break;
michael@0 1157 }
michael@0 1158 /* make sure enough is initializes so we can safely call Destroy */
michael@0 1159 cx->worker_cx = NULL;
michael@0 1160 cx->destroy = NULL;
michael@0 1161 rv = aes_InitContext(cx, key, keysize, iv, basemode,
michael@0 1162 baseencrypt, blocksize);
michael@0 1163 if (rv != SECSuccess) {
michael@0 1164 AES_DestroyContext(cx, PR_FALSE);
michael@0 1165 return rv;
michael@0 1166 }
michael@0 1167
michael@0 1168 /* finally, set up any mode specific contexts */
michael@0 1169 switch (mode) {
michael@0 1170 case NSS_AES_CTS:
michael@0 1171 cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv, blocksize);
michael@0 1172 cx->worker = (freeblCipherFunc)
michael@0 1173 (encrypt ? CTS_EncryptUpdate : CTS_DecryptUpdate);
michael@0 1174 cx->destroy = (freeblDestroyFunc) CTS_DestroyContext;
michael@0 1175 cx->isBlock = PR_FALSE;
michael@0 1176 break;
michael@0 1177 case NSS_AES_GCM:
michael@0 1178 #ifdef INTEL_GCM
michael@0 1179 if(use_hw_gcm) {
michael@0 1180 cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv, blocksize);
michael@0 1181 cx->worker = (freeblCipherFunc)
michael@0 1182 (encrypt ? intel_AES_GCM_EncryptUpdate : intel_AES_GCM_DecryptUpdate);
michael@0 1183 cx->destroy = (freeblDestroyFunc) intel_AES_GCM_DestroyContext;
michael@0 1184 cx->isBlock = PR_FALSE;
michael@0 1185 } else
michael@0 1186 #endif
michael@0 1187 {
michael@0 1188 cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv, blocksize);
michael@0 1189 cx->worker = (freeblCipherFunc)
michael@0 1190 (encrypt ? GCM_EncryptUpdate : GCM_DecryptUpdate);
michael@0 1191 cx->destroy = (freeblDestroyFunc) GCM_DestroyContext;
michael@0 1192 cx->isBlock = PR_FALSE;
michael@0 1193 }
michael@0 1194 break;
michael@0 1195 case NSS_AES_CTR:
michael@0 1196 cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv, blocksize);
michael@0 1197 #if defined(USE_HW_AES) && defined(_MSC_VER)
michael@0 1198 if (use_hw_aes) {
michael@0 1199 cx->worker = (freeblCipherFunc) CTR_Update_HW_AES;
michael@0 1200 } else
michael@0 1201 #endif
michael@0 1202 {
michael@0 1203 cx->worker = (freeblCipherFunc) CTR_Update;
michael@0 1204 }
michael@0 1205 cx->destroy = (freeblDestroyFunc) CTR_DestroyContext;
michael@0 1206 cx->isBlock = PR_FALSE;
michael@0 1207 break;
michael@0 1208 default:
michael@0 1209 /* everything has already been set up by aes_InitContext, just
michael@0 1210 * return */
michael@0 1211 return SECSuccess;
michael@0 1212 }
michael@0 1213 /* check to see if we succeeded in getting the worker context */
michael@0 1214 if (cx->worker_cx == NULL) {
michael@0 1215 /* no, just destroy the existing context */
michael@0 1216 cx->destroy = NULL; /* paranoia, though you can see a dozen lines */
michael@0 1217 /* below that this isn't necessary */
michael@0 1218 AES_DestroyContext(cx, PR_FALSE);
michael@0 1219 return SECFailure;
michael@0 1220 }
michael@0 1221 return SECSuccess;
michael@0 1222 }
michael@0 1223
michael@0 1224 /* AES_CreateContext
michael@0 1225 *
michael@0 1226 * create a new context for Rijndael operations
michael@0 1227 */
michael@0 1228 AESContext *
michael@0 1229 AES_CreateContext(const unsigned char *key, const unsigned char *iv,
michael@0 1230 int mode, int encrypt,
michael@0 1231 unsigned int keysize, unsigned int blocksize)
michael@0 1232 {
michael@0 1233 AESContext *cx = AES_AllocateContext();
michael@0 1234 if (cx) {
michael@0 1235 SECStatus rv = AES_InitContext(cx, key, keysize, iv, mode, encrypt,
michael@0 1236 blocksize);
michael@0 1237 if (rv != SECSuccess) {
michael@0 1238 AES_DestroyContext(cx, PR_TRUE);
michael@0 1239 cx = NULL;
michael@0 1240 }
michael@0 1241 }
michael@0 1242 return cx;
michael@0 1243 }
michael@0 1244
michael@0 1245 /*
michael@0 1246 * AES_DestroyContext
michael@0 1247 *
michael@0 1248 * Zero an AES cipher context. If freeit is true, also free the pointer
michael@0 1249 * to the context.
michael@0 1250 */
michael@0 1251 void
michael@0 1252 AES_DestroyContext(AESContext *cx, PRBool freeit)
michael@0 1253 {
michael@0 1254 if (cx->worker_cx && cx->destroy) {
michael@0 1255 (*cx->destroy)(cx->worker_cx, PR_TRUE);
michael@0 1256 cx->worker_cx = NULL;
michael@0 1257 cx->destroy = NULL;
michael@0 1258 }
michael@0 1259 if (freeit)
michael@0 1260 PORT_Free(cx);
michael@0 1261 }
michael@0 1262
michael@0 1263 /*
michael@0 1264 * AES_Encrypt
michael@0 1265 *
michael@0 1266 * Encrypt an arbitrary-length buffer. The output buffer must already be
michael@0 1267 * allocated to at least inputLen.
michael@0 1268 */
michael@0 1269 SECStatus
michael@0 1270 AES_Encrypt(AESContext *cx, unsigned char *output,
michael@0 1271 unsigned int *outputLen, unsigned int maxOutputLen,
michael@0 1272 const unsigned char *input, unsigned int inputLen)
michael@0 1273 {
michael@0 1274 int blocksize;
michael@0 1275 /* Check args */
michael@0 1276 if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) {
michael@0 1277 PORT_SetError(SEC_ERROR_INVALID_ARGS);
michael@0 1278 return SECFailure;
michael@0 1279 }
michael@0 1280 blocksize = 4 * cx->Nb;
michael@0 1281 if (cx->isBlock && (inputLen % blocksize != 0)) {
michael@0 1282 PORT_SetError(SEC_ERROR_INPUT_LEN);
michael@0 1283 return SECFailure;
michael@0 1284 }
michael@0 1285 if (maxOutputLen < inputLen) {
michael@0 1286 PORT_SetError(SEC_ERROR_OUTPUT_LEN);
michael@0 1287 return SECFailure;
michael@0 1288 }
michael@0 1289 *outputLen = inputLen;
michael@0 1290 return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen,
michael@0 1291 input, inputLen, blocksize);
michael@0 1292 }
michael@0 1293
michael@0 1294 /*
michael@0 1295 * AES_Decrypt
michael@0 1296 *
michael@0 1297 * Decrypt and arbitrary-length buffer. The output buffer must already be
michael@0 1298 * allocated to at least inputLen.
michael@0 1299 */
michael@0 1300 SECStatus
michael@0 1301 AES_Decrypt(AESContext *cx, unsigned char *output,
michael@0 1302 unsigned int *outputLen, unsigned int maxOutputLen,
michael@0 1303 const unsigned char *input, unsigned int inputLen)
michael@0 1304 {
michael@0 1305 int blocksize;
michael@0 1306 /* Check args */
michael@0 1307 if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) {
michael@0 1308 PORT_SetError(SEC_ERROR_INVALID_ARGS);
michael@0 1309 return SECFailure;
michael@0 1310 }
michael@0 1311 blocksize = 4 * cx->Nb;
michael@0 1312 if (cx->isBlock && (inputLen % blocksize != 0)) {
michael@0 1313 PORT_SetError(SEC_ERROR_INPUT_LEN);
michael@0 1314 return SECFailure;
michael@0 1315 }
michael@0 1316 if (maxOutputLen < inputLen) {
michael@0 1317 PORT_SetError(SEC_ERROR_OUTPUT_LEN);
michael@0 1318 return SECFailure;
michael@0 1319 }
michael@0 1320 *outputLen = inputLen;
michael@0 1321 return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen,
michael@0 1322 input, inputLen, blocksize);
michael@0 1323 }

mercurial