security/nss/lib/freebl/arcfour.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/security/nss/lib/freebl/arcfour.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,573 @@
     1.4 +/* arcfour.c - the arc four algorithm.
     1.5 + *
     1.6 + * This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +#ifdef FREEBL_NO_DEPEND
    1.11 +#include "stubs.h"
    1.12 +#endif
    1.13 +
    1.14 +#include "prerr.h"
    1.15 +#include "secerr.h"
    1.16 +
    1.17 +#include "prtypes.h"
    1.18 +#include "blapi.h"
    1.19 +
    1.20 +/* Architecture-dependent defines */
    1.21 +
    1.22 +#if defined(SOLARIS) || defined(HPUX) || defined(NSS_X86) || \
    1.23 +    defined(_WIN64)
    1.24 +/* Convert the byte-stream to a word-stream */
    1.25 +#define CONVERT_TO_WORDS
    1.26 +#endif
    1.27 +
    1.28 +#if defined(AIX) || defined(OSF1) || defined(NSS_BEVAND_ARCFOUR)
    1.29 +/* Treat array variables as words, not bytes, on CPUs that take 
    1.30 + * much longer to write bytes than to write words, or when using 
    1.31 + * assembler code that required it.
    1.32 + */
    1.33 +#define USE_WORD
    1.34 +#endif
    1.35 +
    1.36 +#if defined(IS_64) || defined(NSS_BEVAND_ARCFOUR)
    1.37 +typedef PRUint64 WORD;
    1.38 +#else
    1.39 +typedef PRUint32 WORD;
    1.40 +#endif
    1.41 +#define WORDSIZE sizeof(WORD)
    1.42 +
    1.43 +#if defined(USE_WORD)
    1.44 +typedef WORD Stype;
    1.45 +#else
    1.46 +typedef PRUint8 Stype;
    1.47 +#endif
    1.48 +
    1.49 +#define ARCFOUR_STATE_SIZE 256
    1.50 +
    1.51 +#define MASK1BYTE (WORD)(0xff)
    1.52 +
    1.53 +#define SWAP(a, b) \
    1.54 +	tmp = a; \
    1.55 +	a = b; \
    1.56 +	b = tmp;
    1.57 +
    1.58 +/*
    1.59 + * State information for stream cipher.
    1.60 + */
    1.61 +struct RC4ContextStr
    1.62 +{
    1.63 +#if defined(NSS_ARCFOUR_IJ_B4_S) || defined(NSS_BEVAND_ARCFOUR)
    1.64 +	Stype i;
    1.65 +	Stype j;
    1.66 +	Stype S[ARCFOUR_STATE_SIZE];
    1.67 +#else
    1.68 +	Stype S[ARCFOUR_STATE_SIZE];
    1.69 +	Stype i;
    1.70 +	Stype j;
    1.71 +#endif
    1.72 +};
    1.73 +
    1.74 +/*
    1.75 + * array indices [0..255] to initialize cx->S array (faster than loop).
    1.76 + */
    1.77 +static const Stype Kinit[256] = {
    1.78 +	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
    1.79 +	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    1.80 +	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
    1.81 +	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
    1.82 +	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
    1.83 +	0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
    1.84 +	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
    1.85 +	0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
    1.86 +	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
    1.87 +	0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
    1.88 +	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
    1.89 +	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
    1.90 +	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    1.91 +	0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
    1.92 +	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    1.93 +	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
    1.94 +	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
    1.95 +	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
    1.96 +	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
    1.97 +	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
    1.98 +	0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
    1.99 +	0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
   1.100 +	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
   1.101 +	0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
   1.102 +	0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
   1.103 +	0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
   1.104 +	0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
   1.105 +	0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
   1.106 +	0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
   1.107 +	0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
   1.108 +	0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
   1.109 +	0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
   1.110 +};
   1.111 +
   1.112 +RC4Context *
   1.113 +RC4_AllocateContext(void)
   1.114 +{
   1.115 +    return PORT_ZNew(RC4Context);
   1.116 +}
   1.117 +
   1.118 +SECStatus   
   1.119 +RC4_InitContext(RC4Context *cx, const unsigned char *key, unsigned int len,
   1.120 +	        const unsigned char * unused1, int unused2, 
   1.121 +		unsigned int unused3, unsigned int unused4)
   1.122 +{
   1.123 +	unsigned int i;
   1.124 +	PRUint8 j, tmp;
   1.125 +	PRUint8 K[256];
   1.126 +	PRUint8 *L;
   1.127 +
   1.128 +	/* verify the key length. */
   1.129 +	PORT_Assert(len > 0 && len < ARCFOUR_STATE_SIZE);
   1.130 +	if (len == 0 || len >= ARCFOUR_STATE_SIZE) {
   1.131 +		PORT_SetError(SEC_ERROR_BAD_KEY);
   1.132 +		return SECFailure;
   1.133 +	}
   1.134 +	if (cx == NULL) {
   1.135 +	    PORT_SetError(SEC_ERROR_INVALID_ARGS);
   1.136 +	    return SECFailure;
   1.137 +	}
   1.138 +	/* Initialize the state using array indices. */
   1.139 +	memcpy(cx->S, Kinit, sizeof cx->S);
   1.140 +	/* Fill in K repeatedly with values from key. */
   1.141 +	L = K;
   1.142 +	for (i = sizeof K; i > len; i-= len) {
   1.143 +		memcpy(L, key, len);
   1.144 +		L += len;
   1.145 +	}
   1.146 +	memcpy(L, key, i);
   1.147 +	/* Stir the state of the generator.  At this point it is assumed
   1.148 +	 * that the key is the size of the state buffer.  If this is not
   1.149 +	 * the case, the key bytes are repeated to fill the buffer.
   1.150 +	 */
   1.151 +	j = 0;
   1.152 +#define ARCFOUR_STATE_STIR(ii) \
   1.153 +	j = j + cx->S[ii] + K[ii]; \
   1.154 +	SWAP(cx->S[ii], cx->S[j]);
   1.155 +	for (i=0; i<ARCFOUR_STATE_SIZE; i++) {
   1.156 +		ARCFOUR_STATE_STIR(i);
   1.157 +	}
   1.158 +	cx->i = 0;
   1.159 +	cx->j = 0;
   1.160 +	return SECSuccess;
   1.161 +}
   1.162 +
   1.163 +
   1.164 +/*
   1.165 + * Initialize a new generator.
   1.166 + */
   1.167 +RC4Context *
   1.168 +RC4_CreateContext(const unsigned char *key, int len)
   1.169 +{
   1.170 +    RC4Context *cx = RC4_AllocateContext();
   1.171 +    if (cx) {
   1.172 +	SECStatus rv = RC4_InitContext(cx, key, len, NULL, 0, 0, 0);
   1.173 +	if (rv != SECSuccess) {
   1.174 +	    PORT_ZFree(cx, sizeof(*cx));
   1.175 +	    cx = NULL;
   1.176 +	}
   1.177 +    }
   1.178 +    return cx;
   1.179 +}
   1.180 +
   1.181 +void 
   1.182 +RC4_DestroyContext(RC4Context *cx, PRBool freeit)
   1.183 +{
   1.184 +	if (freeit)
   1.185 +		PORT_ZFree(cx, sizeof(*cx));
   1.186 +}
   1.187 +
   1.188 +#if defined(NSS_BEVAND_ARCFOUR)
   1.189 +extern void ARCFOUR(RC4Context *cx, WORD inputLen, 
   1.190 +	const unsigned char *input, unsigned char *output);
   1.191 +#else
   1.192 +/*
   1.193 + * Generate the next byte in the stream.
   1.194 + */
   1.195 +#define ARCFOUR_NEXT_BYTE() \
   1.196 +	tmpSi = cx->S[++tmpi]; \
   1.197 +	tmpj += tmpSi; \
   1.198 +	tmpSj = cx->S[tmpj]; \
   1.199 +	cx->S[tmpi] = tmpSj; \
   1.200 +	cx->S[tmpj] = tmpSi; \
   1.201 +	t = tmpSi + tmpSj;
   1.202 +
   1.203 +#ifdef CONVERT_TO_WORDS
   1.204 +/*
   1.205 + * Straight ARCFOUR op.  No optimization.
   1.206 + */
   1.207 +static SECStatus 
   1.208 +rc4_no_opt(RC4Context *cx, unsigned char *output,
   1.209 +           unsigned int *outputLen, unsigned int maxOutputLen,
   1.210 +           const unsigned char *input, unsigned int inputLen)
   1.211 +{
   1.212 +    PRUint8 t;
   1.213 +	Stype tmpSi, tmpSj;
   1.214 +	register PRUint8 tmpi = cx->i;
   1.215 +	register PRUint8 tmpj = cx->j;
   1.216 +	unsigned int index;
   1.217 +	PORT_Assert(maxOutputLen >= inputLen);
   1.218 +	if (maxOutputLen < inputLen) {
   1.219 +		PORT_SetError(SEC_ERROR_OUTPUT_LEN);
   1.220 +		return SECFailure;
   1.221 +	}
   1.222 +	for (index=0; index < inputLen; index++) {
   1.223 +		/* Generate next byte from stream. */
   1.224 +		ARCFOUR_NEXT_BYTE();
   1.225 +		/* output = next stream byte XOR next input byte */
   1.226 +		output[index] = cx->S[t] ^ input[index];
   1.227 +	}
   1.228 +	*outputLen = inputLen;
   1.229 +	cx->i = tmpi;
   1.230 +	cx->j = tmpj;
   1.231 +	return SECSuccess;
   1.232 +}
   1.233 +
   1.234 +#else
   1.235 +/* !CONVERT_TO_WORDS */
   1.236 +
   1.237 +/*
   1.238 + * Byte-at-a-time ARCFOUR, unrolling the loop into 8 pieces.
   1.239 + */
   1.240 +static SECStatus 
   1.241 +rc4_unrolled(RC4Context *cx, unsigned char *output,
   1.242 +             unsigned int *outputLen, unsigned int maxOutputLen,
   1.243 +             const unsigned char *input, unsigned int inputLen)
   1.244 +{
   1.245 +	PRUint8 t;
   1.246 +	Stype tmpSi, tmpSj;
   1.247 +	register PRUint8 tmpi = cx->i;
   1.248 +	register PRUint8 tmpj = cx->j;
   1.249 +	int index;
   1.250 +	PORT_Assert(maxOutputLen >= inputLen);
   1.251 +	if (maxOutputLen < inputLen) {
   1.252 +		PORT_SetError(SEC_ERROR_OUTPUT_LEN);
   1.253 +		return SECFailure;
   1.254 +	}
   1.255 +	for (index = inputLen / 8; index-- > 0; input += 8, output += 8) {
   1.256 +		ARCFOUR_NEXT_BYTE();
   1.257 +		output[0] = cx->S[t] ^ input[0];
   1.258 +		ARCFOUR_NEXT_BYTE();
   1.259 +		output[1] = cx->S[t] ^ input[1];
   1.260 +		ARCFOUR_NEXT_BYTE();
   1.261 +		output[2] = cx->S[t] ^ input[2];
   1.262 +		ARCFOUR_NEXT_BYTE();
   1.263 +		output[3] = cx->S[t] ^ input[3];
   1.264 +		ARCFOUR_NEXT_BYTE();
   1.265 +		output[4] = cx->S[t] ^ input[4];
   1.266 +		ARCFOUR_NEXT_BYTE();
   1.267 +		output[5] = cx->S[t] ^ input[5];
   1.268 +		ARCFOUR_NEXT_BYTE();
   1.269 +		output[6] = cx->S[t] ^ input[6];
   1.270 +		ARCFOUR_NEXT_BYTE();
   1.271 +		output[7] = cx->S[t] ^ input[7];
   1.272 +	}
   1.273 +	index = inputLen % 8;
   1.274 +	if (index) {
   1.275 +		input += index;
   1.276 +		output += index;
   1.277 +		switch (index) {
   1.278 +		case 7:
   1.279 +			ARCFOUR_NEXT_BYTE();
   1.280 +			output[-7] = cx->S[t] ^ input[-7]; /* FALLTHRU */
   1.281 +		case 6:
   1.282 +			ARCFOUR_NEXT_BYTE();
   1.283 +			output[-6] = cx->S[t] ^ input[-6]; /* FALLTHRU */
   1.284 +		case 5:
   1.285 +			ARCFOUR_NEXT_BYTE();
   1.286 +			output[-5] = cx->S[t] ^ input[-5]; /* FALLTHRU */
   1.287 +		case 4:
   1.288 +			ARCFOUR_NEXT_BYTE();
   1.289 +			output[-4] = cx->S[t] ^ input[-4]; /* FALLTHRU */
   1.290 +		case 3:
   1.291 +			ARCFOUR_NEXT_BYTE();
   1.292 +			output[-3] = cx->S[t] ^ input[-3]; /* FALLTHRU */
   1.293 +		case 2:
   1.294 +			ARCFOUR_NEXT_BYTE();
   1.295 +			output[-2] = cx->S[t] ^ input[-2]; /* FALLTHRU */
   1.296 +		case 1:
   1.297 +			ARCFOUR_NEXT_BYTE();
   1.298 +			output[-1] = cx->S[t] ^ input[-1]; /* FALLTHRU */
   1.299 +		default:
   1.300 +			/* FALLTHRU */
   1.301 +			; /* hp-ux build breaks without this */
   1.302 +		}
   1.303 +	}
   1.304 +	cx->i = tmpi;
   1.305 +	cx->j = tmpj;
   1.306 +	*outputLen = inputLen;
   1.307 +	return SECSuccess;
   1.308 +}
   1.309 +#endif
   1.310 +
   1.311 +#ifdef IS_LITTLE_ENDIAN
   1.312 +#define ARCFOUR_NEXT4BYTES_L(n) \
   1.313 +	ARCFOUR_NEXT_BYTE(); streamWord |= (WORD)cx->S[t] << (n     ); \
   1.314 +	ARCFOUR_NEXT_BYTE(); streamWord |= (WORD)cx->S[t] << (n +  8); \
   1.315 +	ARCFOUR_NEXT_BYTE(); streamWord |= (WORD)cx->S[t] << (n + 16); \
   1.316 +	ARCFOUR_NEXT_BYTE(); streamWord |= (WORD)cx->S[t] << (n + 24);
   1.317 +#else
   1.318 +#define ARCFOUR_NEXT4BYTES_B(n) \
   1.319 +	ARCFOUR_NEXT_BYTE(); streamWord |= (WORD)cx->S[t] << (n + 24); \
   1.320 +	ARCFOUR_NEXT_BYTE(); streamWord |= (WORD)cx->S[t] << (n + 16); \
   1.321 +	ARCFOUR_NEXT_BYTE(); streamWord |= (WORD)cx->S[t] << (n +  8); \
   1.322 +	ARCFOUR_NEXT_BYTE(); streamWord |= (WORD)cx->S[t] << (n     );
   1.323 +#endif
   1.324 +
   1.325 +#if (defined(IS_64) && !defined(__sparc)) || defined(NSS_USE_64)
   1.326 +/* 64-bit wordsize */
   1.327 +#ifdef IS_LITTLE_ENDIAN
   1.328 +#define ARCFOUR_NEXT_WORD() \
   1.329 +	{ streamWord = 0; ARCFOUR_NEXT4BYTES_L(0); ARCFOUR_NEXT4BYTES_L(32); }
   1.330 +#else
   1.331 +#define ARCFOUR_NEXT_WORD() \
   1.332 +	{ streamWord = 0; ARCFOUR_NEXT4BYTES_B(32); ARCFOUR_NEXT4BYTES_B(0); }
   1.333 +#endif
   1.334 +#else
   1.335 +/* 32-bit wordsize */
   1.336 +#ifdef IS_LITTLE_ENDIAN
   1.337 +#define ARCFOUR_NEXT_WORD() \
   1.338 +	{ streamWord = 0; ARCFOUR_NEXT4BYTES_L(0); }
   1.339 +#else
   1.340 +#define ARCFOUR_NEXT_WORD() \
   1.341 +	{ streamWord = 0; ARCFOUR_NEXT4BYTES_B(0); }
   1.342 +#endif
   1.343 +#endif
   1.344 +
   1.345 +#ifdef IS_LITTLE_ENDIAN
   1.346 +#define RSH <<
   1.347 +#define LSH >>
   1.348 +#else
   1.349 +#define RSH >>
   1.350 +#define LSH <<
   1.351 +#endif
   1.352 +
   1.353 +#ifdef IS_LITTLE_ENDIAN
   1.354 +#define LEFTMOST_BYTE_SHIFT 0
   1.355 +#define NEXT_BYTE_SHIFT(shift) shift + 8
   1.356 +#else
   1.357 +#define LEFTMOST_BYTE_SHIFT 8*(WORDSIZE - 1)
   1.358 +#define NEXT_BYTE_SHIFT(shift) shift - 8
   1.359 +#endif
   1.360 +
   1.361 +#ifdef CONVERT_TO_WORDS
   1.362 +static SECStatus 
   1.363 +rc4_wordconv(RC4Context *cx, unsigned char *output,
   1.364 +             unsigned int *outputLen, unsigned int maxOutputLen,
   1.365 +             const unsigned char *input, unsigned int inputLen)
   1.366 +{
   1.367 +	PR_STATIC_ASSERT(sizeof(PRUword) == sizeof(ptrdiff_t));
   1.368 +	unsigned int inOffset = (PRUword)input % WORDSIZE;
   1.369 +	unsigned int outOffset = (PRUword)output % WORDSIZE;
   1.370 +	register WORD streamWord;
   1.371 +	register const WORD *pInWord;
   1.372 +	register WORD *pOutWord;
   1.373 +	register WORD inWord, nextInWord;
   1.374 +	PRUint8 t;
   1.375 +	register Stype tmpSi, tmpSj;
   1.376 +	register PRUint8 tmpi = cx->i;
   1.377 +	register PRUint8 tmpj = cx->j;
   1.378 +	unsigned int bufShift, invBufShift;
   1.379 +	unsigned int i;
   1.380 +	const unsigned char *finalIn;
   1.381 +	unsigned char *finalOut;
   1.382 +
   1.383 +	PORT_Assert(maxOutputLen >= inputLen);
   1.384 +	if (maxOutputLen < inputLen) {
   1.385 +		PORT_SetError(SEC_ERROR_OUTPUT_LEN);
   1.386 +		return SECFailure;
   1.387 +	}
   1.388 +	if (inputLen < 2*WORDSIZE) {
   1.389 +		/* Ignore word conversion, do byte-at-a-time */
   1.390 +		return rc4_no_opt(cx, output, outputLen, maxOutputLen, input, inputLen);
   1.391 +	}
   1.392 +	*outputLen = inputLen;
   1.393 +	pInWord = (const WORD *)(input - inOffset);
   1.394 +	pOutWord = (WORD *)(output - outOffset);
   1.395 +	if (inOffset <= outOffset) {
   1.396 +		bufShift = 8*(outOffset - inOffset);
   1.397 +		invBufShift = 8*WORDSIZE - bufShift;
   1.398 +	} else {
   1.399 +		invBufShift = 8*(inOffset - outOffset);
   1.400 +		bufShift = 8*WORDSIZE - invBufShift;
   1.401 +	}
   1.402 +	/*****************************************************************/
   1.403 +	/* Step 1:                                                       */
   1.404 +	/* If the first output word is partial, consume the bytes in the */
   1.405 +	/* first partial output word by loading one or two words of      */
   1.406 +	/* input and shifting them accordingly.  Otherwise, just load    */
   1.407 +	/* in the first word of input.  At the end of this block, at     */
   1.408 +	/* least one partial word of input should ALWAYS be loaded.      */
   1.409 +	/*****************************************************************/
   1.410 +	if (outOffset) {
   1.411 +		unsigned int byteCount = WORDSIZE - outOffset; 
   1.412 +		for (i = 0; i < byteCount; i++) {
   1.413 +			ARCFOUR_NEXT_BYTE();
   1.414 +			output[i] = cx->S[t] ^ input[i];
   1.415 +		}
   1.416 +		/* Consumed byteCount bytes of input */
   1.417 +		inputLen -= byteCount;
   1.418 +		pInWord++;
   1.419 +
   1.420 +		/* move to next word of output */
   1.421 +		pOutWord++;
   1.422 +
   1.423 +		/* If buffers are relatively misaligned, shift the bytes in inWord
   1.424 +		 * to be aligned to the output buffer.
   1.425 +		 */
   1.426 +		if (inOffset < outOffset) {
   1.427 +			/* The first input word (which may be partial) has more bytes
   1.428 +			 * than needed.  Copy the remainder to inWord.
   1.429 +			 */
   1.430 +			unsigned int shift = LEFTMOST_BYTE_SHIFT;
   1.431 +			inWord = 0;
   1.432 +			for (i = 0; i < outOffset - inOffset; i++) {
   1.433 +				inWord |= (WORD)input[byteCount + i] << shift;
   1.434 +				shift = NEXT_BYTE_SHIFT(shift);
   1.435 +			}
   1.436 +		} else if (inOffset > outOffset) {
   1.437 +			/* Consumed some bytes in the second input word.  Copy the
   1.438 +			 * remainder to inWord.
   1.439 +			 */
   1.440 +			inWord = *pInWord++;
   1.441 +			inWord = inWord LSH invBufShift;
   1.442 +		} else {
   1.443 +			inWord = 0;
   1.444 +		}
   1.445 +	} else {
   1.446 +		/* output is word-aligned */
   1.447 +		if (inOffset) {
   1.448 +			/* Input is not word-aligned.  The first word load of input 
   1.449 +			 * will not produce a full word of input bytes, so one word
   1.450 +			 * must be pre-loaded.  The main loop below will load in the
   1.451 +			 * next input word and shift some of its bytes into inWord
   1.452 +			 * in order to create a full input word.  Note that the main
   1.453 +			 * loop must execute at least once because the input must
   1.454 +			 * be at least two words.
   1.455 +			 */
   1.456 +			unsigned int shift = LEFTMOST_BYTE_SHIFT;
   1.457 +			inWord = 0;
   1.458 +			for (i = 0; i < WORDSIZE - inOffset; i++) {
   1.459 +				inWord |= (WORD)input[i] << shift;
   1.460 +				shift = NEXT_BYTE_SHIFT(shift);
   1.461 +			}
   1.462 +			pInWord++;
   1.463 +		} else {
   1.464 +			/* Input is word-aligned.  The first word load of input 
   1.465 +			 * will produce a full word of input bytes, so nothing
   1.466 +			 * needs to be loaded here.
   1.467 +			 */
   1.468 +			inWord = 0;
   1.469 +		}
   1.470 +	}
   1.471 +	/*****************************************************************/
   1.472 +	/* Step 2: main loop                                             */
   1.473 +	/* At this point the output buffer is word-aligned.  Any unused  */
   1.474 +	/* bytes from above will be in inWord (shifted correctly).  If   */
   1.475 +	/* the input buffer is unaligned relative to the output buffer,  */
   1.476 +	/* shifting has to be done.                                      */
   1.477 +	/*****************************************************************/
   1.478 +	if (bufShift) {
   1.479 +		/* preloadedByteCount is the number of input bytes pre-loaded
   1.480 +		 * in inWord.
   1.481 +		 */
   1.482 +		unsigned int preloadedByteCount = bufShift/8;
   1.483 +		for (; inputLen >= preloadedByteCount + WORDSIZE;
   1.484 +		     inputLen -= WORDSIZE) {
   1.485 +			nextInWord = *pInWord++;
   1.486 +			inWord |= nextInWord RSH bufShift;
   1.487 +			nextInWord = nextInWord LSH invBufShift;
   1.488 +			ARCFOUR_NEXT_WORD();
   1.489 +			*pOutWord++ = inWord ^ streamWord;
   1.490 +			inWord = nextInWord;
   1.491 +		}
   1.492 +		if (inputLen == 0) {
   1.493 +			/* Nothing left to do. */
   1.494 +			cx->i = tmpi;
   1.495 +			cx->j = tmpj;
   1.496 +			return SECSuccess;
   1.497 +		}
   1.498 +		finalIn = (const unsigned char *)pInWord - preloadedByteCount;
   1.499 +	} else {
   1.500 +		for (; inputLen >= WORDSIZE; inputLen -= WORDSIZE) {
   1.501 +			inWord = *pInWord++;
   1.502 +			ARCFOUR_NEXT_WORD();
   1.503 +			*pOutWord++ = inWord ^ streamWord;
   1.504 +		}
   1.505 +		if (inputLen == 0) {
   1.506 +			/* Nothing left to do. */
   1.507 +			cx->i = tmpi;
   1.508 +			cx->j = tmpj;
   1.509 +			return SECSuccess;
   1.510 +		}
   1.511 +		finalIn = (const unsigned char *)pInWord;
   1.512 +	}
   1.513 +	/*****************************************************************/
   1.514 +	/* Step 3:                                                       */
   1.515 +	/* Do the remaining partial word of input one byte at a time.    */
   1.516 +	/*****************************************************************/
   1.517 +	finalOut = (unsigned char *)pOutWord;
   1.518 +	for (i = 0; i < inputLen; i++) {
   1.519 +		ARCFOUR_NEXT_BYTE();
   1.520 +		finalOut[i] = cx->S[t] ^ finalIn[i];
   1.521 +	}
   1.522 +	cx->i = tmpi;
   1.523 +	cx->j = tmpj;
   1.524 +	return SECSuccess;
   1.525 +}
   1.526 +#endif
   1.527 +#endif /* NSS_BEVAND_ARCFOUR */
   1.528 +
   1.529 +SECStatus 
   1.530 +RC4_Encrypt(RC4Context *cx, unsigned char *output,
   1.531 +            unsigned int *outputLen, unsigned int maxOutputLen,
   1.532 +            const unsigned char *input, unsigned int inputLen)
   1.533 +{
   1.534 +	PORT_Assert(maxOutputLen >= inputLen);
   1.535 +	if (maxOutputLen < inputLen) {
   1.536 +		PORT_SetError(SEC_ERROR_OUTPUT_LEN);
   1.537 +		return SECFailure;
   1.538 +	}
   1.539 +#if defined(NSS_BEVAND_ARCFOUR)
   1.540 +	ARCFOUR(cx, inputLen, input, output);
   1.541 +        *outputLen = inputLen;
   1.542 +	return SECSuccess;
   1.543 +#elif defined( CONVERT_TO_WORDS )
   1.544 +	/* Convert the byte-stream to a word-stream */
   1.545 +	return rc4_wordconv(cx, output, outputLen, maxOutputLen, input, inputLen);
   1.546 +#else
   1.547 +	/* Operate on bytes, but unroll the main loop */
   1.548 +	return rc4_unrolled(cx, output, outputLen, maxOutputLen, input, inputLen);
   1.549 +#endif
   1.550 +}
   1.551 +
   1.552 +SECStatus RC4_Decrypt(RC4Context *cx, unsigned char *output,
   1.553 +                      unsigned int *outputLen, unsigned int maxOutputLen,
   1.554 +                      const unsigned char *input, unsigned int inputLen)
   1.555 +{
   1.556 +	PORT_Assert(maxOutputLen >= inputLen);
   1.557 +	if (maxOutputLen < inputLen) {
   1.558 +		PORT_SetError(SEC_ERROR_OUTPUT_LEN);
   1.559 +		return SECFailure;
   1.560 +	}
   1.561 +	/* decrypt and encrypt are same operation. */
   1.562 +#if defined(NSS_BEVAND_ARCFOUR)
   1.563 +	ARCFOUR(cx, inputLen, input, output);
   1.564 +        *outputLen = inputLen;
   1.565 +	return SECSuccess;
   1.566 +#elif defined( CONVERT_TO_WORDS )
   1.567 +	/* Convert the byte-stream to a word-stream */
   1.568 +	return rc4_wordconv(cx, output, outputLen, maxOutputLen, input, inputLen);
   1.569 +#else
   1.570 +	/* Operate on bytes, but unroll the main loop */
   1.571 +	return rc4_unrolled(cx, output, outputLen, maxOutputLen, input, inputLen);
   1.572 +#endif
   1.573 +}
   1.574 +
   1.575 +#undef CONVERT_TO_WORDS
   1.576 +#undef USE_WORD

mercurial