security/nss/lib/freebl/ecl/ecp_256.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/security/nss/lib/freebl/ecl/ecp_256.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,377 @@
     1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.7 +
     1.8 +#include "ecp.h"
     1.9 +#include "mpi.h"
    1.10 +#include "mplogic.h"
    1.11 +#include "mpi-priv.h"
    1.12 +
    1.13 +/* Fast modular reduction for p256 = 2^256 - 2^224 + 2^192+ 2^96 - 1.  a can be r. 
    1.14 + * Uses algorithm 2.29 from Hankerson, Menezes, Vanstone. Guide to 
    1.15 + * Elliptic Curve Cryptography. */
    1.16 +static mp_err
    1.17 +ec_GFp_nistp256_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
    1.18 +{
    1.19 +	mp_err res = MP_OKAY;
    1.20 +	mp_size a_used = MP_USED(a);
    1.21 +	int a_bits = mpl_significant_bits(a);
    1.22 +	mp_digit carry;
    1.23 +
    1.24 +#ifdef ECL_THIRTY_TWO_BIT
    1.25 +	mp_digit a8=0, a9=0, a10=0, a11=0, a12=0, a13=0, a14=0, a15=0;
    1.26 +	mp_digit r0, r1, r2, r3, r4, r5, r6, r7;
    1.27 +	int r8; /* must be a signed value ! */
    1.28 +#else
    1.29 +	mp_digit a4=0, a5=0, a6=0, a7=0;
    1.30 +	mp_digit a4h, a4l, a5h, a5l, a6h, a6l, a7h, a7l;
    1.31 +	mp_digit r0, r1, r2, r3;
    1.32 +	int r4; /* must be a signed value ! */
    1.33 +#endif
    1.34 +	/* for polynomials larger than twice the field size 
    1.35 +	 * use regular reduction */
    1.36 +	if (a_bits < 256) {
    1.37 +		if (a == r) return MP_OKAY;
    1.38 +		return mp_copy(a,r);
    1.39 +	}
    1.40 +	if (a_bits > 512)  {
    1.41 +		MP_CHECKOK(mp_mod(a, &meth->irr, r));
    1.42 +	} else {
    1.43 +
    1.44 +#ifdef ECL_THIRTY_TWO_BIT
    1.45 +		switch (a_used) {
    1.46 +		case 16:
    1.47 +			a15 = MP_DIGIT(a,15);
    1.48 +		case 15:
    1.49 +			a14 = MP_DIGIT(a,14);
    1.50 +		case 14:
    1.51 +			a13 = MP_DIGIT(a,13);
    1.52 +		case 13:
    1.53 +			a12 = MP_DIGIT(a,12);
    1.54 +		case 12:
    1.55 +			a11 = MP_DIGIT(a,11);
    1.56 +		case 11:
    1.57 +			a10 = MP_DIGIT(a,10);
    1.58 +		case 10:
    1.59 +			a9 = MP_DIGIT(a,9);
    1.60 +		case 9:
    1.61 +			a8 = MP_DIGIT(a,8);
    1.62 +		}
    1.63 +
    1.64 +		r0 = MP_DIGIT(a,0);
    1.65 +		r1 = MP_DIGIT(a,1);
    1.66 +		r2 = MP_DIGIT(a,2);
    1.67 +		r3 = MP_DIGIT(a,3);
    1.68 +		r4 = MP_DIGIT(a,4);
    1.69 +		r5 = MP_DIGIT(a,5);
    1.70 +		r6 = MP_DIGIT(a,6);
    1.71 +		r7 = MP_DIGIT(a,7);
    1.72 +
    1.73 +		/* sum 1 */
    1.74 +		MP_ADD_CARRY(r3, a11, r3, 0,     carry);
    1.75 +		MP_ADD_CARRY(r4, a12, r4, carry, carry);
    1.76 +		MP_ADD_CARRY(r5, a13, r5, carry, carry);
    1.77 +		MP_ADD_CARRY(r6, a14, r6, carry, carry);
    1.78 +		MP_ADD_CARRY(r7, a15, r7, carry, carry);
    1.79 +		r8 = carry;
    1.80 +		MP_ADD_CARRY(r3, a11, r3, 0,     carry);
    1.81 +		MP_ADD_CARRY(r4, a12, r4, carry, carry);
    1.82 +		MP_ADD_CARRY(r5, a13, r5, carry, carry);
    1.83 +		MP_ADD_CARRY(r6, a14, r6, carry, carry);
    1.84 +		MP_ADD_CARRY(r7, a15, r7, carry, carry);
    1.85 +		r8 += carry;
    1.86 +		/* sum 2 */
    1.87 +		MP_ADD_CARRY(r3, a12, r3, 0,     carry);
    1.88 +		MP_ADD_CARRY(r4, a13, r4, carry, carry);
    1.89 +		MP_ADD_CARRY(r5, a14, r5, carry, carry);
    1.90 +		MP_ADD_CARRY(r6, a15, r6, carry, carry);
    1.91 +		MP_ADD_CARRY(r7,   0, r7, carry, carry);
    1.92 +		r8 += carry;
    1.93 +		/* combine last bottom of sum 3 with second sum 2 */
    1.94 +		MP_ADD_CARRY(r0, a8,  r0, 0,     carry);
    1.95 +		MP_ADD_CARRY(r1, a9,  r1, carry, carry);
    1.96 +		MP_ADD_CARRY(r2, a10, r2, carry, carry);
    1.97 +		MP_ADD_CARRY(r3, a12, r3, carry, carry);
    1.98 +		MP_ADD_CARRY(r4, a13, r4, carry, carry);
    1.99 +		MP_ADD_CARRY(r5, a14, r5, carry, carry);
   1.100 +		MP_ADD_CARRY(r6, a15, r6, carry, carry);
   1.101 +		MP_ADD_CARRY(r7, a15, r7, carry, carry); /* from sum 3 */
   1.102 +		r8 += carry;
   1.103 +		/* sum 3 (rest of it)*/
   1.104 +		MP_ADD_CARRY(r6, a14, r6, 0,     carry);
   1.105 +		MP_ADD_CARRY(r7,   0, r7, carry, carry);
   1.106 +		r8 += carry;
   1.107 +		/* sum 4 (rest of it)*/
   1.108 +		MP_ADD_CARRY(r0, a9,  r0, 0,     carry);
   1.109 +		MP_ADD_CARRY(r1, a10, r1, carry, carry);
   1.110 +		MP_ADD_CARRY(r2, a11, r2, carry, carry);
   1.111 +		MP_ADD_CARRY(r3, a13, r3, carry, carry);
   1.112 +		MP_ADD_CARRY(r4, a14, r4, carry, carry);
   1.113 +		MP_ADD_CARRY(r5, a15, r5, carry, carry);
   1.114 +		MP_ADD_CARRY(r6, a13, r6, carry, carry);
   1.115 +		MP_ADD_CARRY(r7, a8,  r7, carry, carry);
   1.116 +		r8 += carry;
   1.117 +		/* diff 5 */
   1.118 +		MP_SUB_BORROW(r0, a11, r0, 0,     carry);
   1.119 +		MP_SUB_BORROW(r1, a12, r1, carry, carry);
   1.120 +		MP_SUB_BORROW(r2, a13, r2, carry, carry);
   1.121 +		MP_SUB_BORROW(r3,   0, r3, carry, carry);
   1.122 +		MP_SUB_BORROW(r4,   0, r4, carry, carry);
   1.123 +		MP_SUB_BORROW(r5,   0, r5, carry, carry);
   1.124 +		MP_SUB_BORROW(r6, a8,  r6, carry, carry);
   1.125 +		MP_SUB_BORROW(r7, a10, r7, carry, carry);
   1.126 +		r8 -= carry;
   1.127 +		/* diff 6 */
   1.128 +		MP_SUB_BORROW(r0, a12, r0, 0,     carry);
   1.129 +		MP_SUB_BORROW(r1, a13, r1, carry, carry);
   1.130 +		MP_SUB_BORROW(r2, a14, r2, carry, carry);
   1.131 +		MP_SUB_BORROW(r3, a15, r3, carry, carry);
   1.132 +		MP_SUB_BORROW(r4,   0, r4, carry, carry);
   1.133 +		MP_SUB_BORROW(r5,   0, r5, carry, carry);
   1.134 +		MP_SUB_BORROW(r6, a9,  r6, carry, carry);
   1.135 +		MP_SUB_BORROW(r7, a11, r7, carry, carry);
   1.136 +		r8 -= carry;
   1.137 +		/* diff 7 */
   1.138 +		MP_SUB_BORROW(r0, a13, r0, 0,     carry);
   1.139 +		MP_SUB_BORROW(r1, a14, r1, carry, carry);
   1.140 +		MP_SUB_BORROW(r2, a15, r2, carry, carry);
   1.141 +		MP_SUB_BORROW(r3, a8,  r3, carry, carry);
   1.142 +		MP_SUB_BORROW(r4, a9,  r4, carry, carry);
   1.143 +		MP_SUB_BORROW(r5, a10, r5, carry, carry);
   1.144 +		MP_SUB_BORROW(r6, 0,   r6, carry, carry);
   1.145 +		MP_SUB_BORROW(r7, a12, r7, carry, carry);
   1.146 +		r8 -= carry;
   1.147 +		/* diff 8 */
   1.148 +		MP_SUB_BORROW(r0, a14, r0, 0,     carry);
   1.149 +		MP_SUB_BORROW(r1, a15, r1, carry, carry);
   1.150 +		MP_SUB_BORROW(r2, 0,   r2, carry, carry);
   1.151 +		MP_SUB_BORROW(r3, a9,  r3, carry, carry);
   1.152 +		MP_SUB_BORROW(r4, a10, r4, carry, carry);
   1.153 +		MP_SUB_BORROW(r5, a11, r5, carry, carry);
   1.154 +		MP_SUB_BORROW(r6, 0,   r6, carry, carry);
   1.155 +		MP_SUB_BORROW(r7, a13, r7, carry, carry);
   1.156 +		r8 -= carry;
   1.157 +
   1.158 +		/* reduce the overflows */
   1.159 +		while (r8 > 0) {
   1.160 +			mp_digit r8_d = r8;
   1.161 +			MP_ADD_CARRY(r0, r8_d,         r0, 0,     carry);
   1.162 +			MP_ADD_CARRY(r1, 0,            r1, carry, carry);
   1.163 +			MP_ADD_CARRY(r2, 0,            r2, carry, carry);
   1.164 +			MP_ADD_CARRY(r3, 0-r8_d,       r3, carry, carry);
   1.165 +			MP_ADD_CARRY(r4, MP_DIGIT_MAX, r4, carry, carry);
   1.166 +			MP_ADD_CARRY(r5, MP_DIGIT_MAX, r5, carry, carry);
   1.167 +			MP_ADD_CARRY(r6, 0-(r8_d+1),   r6, carry, carry);
   1.168 +			MP_ADD_CARRY(r7, (r8_d-1),     r7, carry, carry);
   1.169 +			r8 = carry;
   1.170 +		}
   1.171 +
   1.172 +		/* reduce the underflows */
   1.173 +		while (r8 < 0) {
   1.174 +			mp_digit r8_d = -r8;
   1.175 +			MP_SUB_BORROW(r0, r8_d,         r0, 0,     carry);
   1.176 +			MP_SUB_BORROW(r1, 0,            r1, carry, carry);
   1.177 +			MP_SUB_BORROW(r2, 0,            r2, carry, carry);
   1.178 +			MP_SUB_BORROW(r3, 0-r8_d,       r3, carry, carry);
   1.179 +			MP_SUB_BORROW(r4, MP_DIGIT_MAX, r4, carry, carry);
   1.180 +			MP_SUB_BORROW(r5, MP_DIGIT_MAX, r5, carry, carry);
   1.181 +			MP_SUB_BORROW(r6, 0-(r8_d+1),   r6, carry, carry);
   1.182 +			MP_SUB_BORROW(r7, (r8_d-1),     r7, carry, carry);
   1.183 +			r8 = 0-carry;
   1.184 +		}
   1.185 +		if (a != r) {
   1.186 +			MP_CHECKOK(s_mp_pad(r,8));
   1.187 +		}
   1.188 +		MP_SIGN(r) = MP_ZPOS;
   1.189 +		MP_USED(r) = 8;
   1.190 +
   1.191 +		MP_DIGIT(r,7) = r7;
   1.192 +		MP_DIGIT(r,6) = r6;
   1.193 +		MP_DIGIT(r,5) = r5;
   1.194 +		MP_DIGIT(r,4) = r4;
   1.195 +		MP_DIGIT(r,3) = r3;
   1.196 +		MP_DIGIT(r,2) = r2;
   1.197 +		MP_DIGIT(r,1) = r1;
   1.198 +		MP_DIGIT(r,0) = r0;
   1.199 +
   1.200 +		/* final reduction if necessary */
   1.201 +		if ((r7 == MP_DIGIT_MAX) &&
   1.202 +			((r6 > 1) || ((r6 == 1) &&
   1.203 +			(r5 || r4 || r3 || 
   1.204 +				((r2 == MP_DIGIT_MAX) && (r1 == MP_DIGIT_MAX)
   1.205 +				  && (r0 == MP_DIGIT_MAX)))))) {
   1.206 +			MP_CHECKOK(mp_sub(r, &meth->irr, r));
   1.207 +		}
   1.208 +
   1.209 +		s_mp_clamp(r);
   1.210 +#else
   1.211 +		switch (a_used) {
   1.212 +		case 8:
   1.213 +			a7 = MP_DIGIT(a,7);
   1.214 +		case 7:
   1.215 +			a6 = MP_DIGIT(a,6);
   1.216 +		case 6:
   1.217 +			a5 = MP_DIGIT(a,5);
   1.218 +		case 5:
   1.219 +			a4 = MP_DIGIT(a,4);
   1.220 +		}
   1.221 +		a7l = a7 << 32;
   1.222 +		a7h = a7 >> 32;
   1.223 +		a6l = a6 << 32;
   1.224 +		a6h = a6 >> 32;
   1.225 +		a5l = a5 << 32;
   1.226 +		a5h = a5 >> 32;
   1.227 +		a4l = a4 << 32;
   1.228 +		a4h = a4 >> 32;
   1.229 +		r3 = MP_DIGIT(a,3);
   1.230 +		r2 = MP_DIGIT(a,2);
   1.231 +		r1 = MP_DIGIT(a,1);
   1.232 +		r0 = MP_DIGIT(a,0);
   1.233 +
   1.234 +		/* sum 1 */
   1.235 +		MP_ADD_CARRY(r1, a5h << 32, r1, 0,     carry);
   1.236 +		MP_ADD_CARRY(r2, a6,        r2, carry, carry);
   1.237 +		MP_ADD_CARRY(r3, a7,        r3, carry, carry);
   1.238 +		r4 = carry;
   1.239 +		MP_ADD_CARRY(r1, a5h << 32, r1, 0,     carry);
   1.240 +		MP_ADD_CARRY(r2, a6,        r2, carry, carry);
   1.241 +		MP_ADD_CARRY(r3, a7,        r3, carry, carry);
   1.242 +		r4 += carry;
   1.243 +		/* sum 2 */
   1.244 +		MP_ADD_CARRY(r1, a6l,       r1, 0,     carry);
   1.245 +		MP_ADD_CARRY(r2, a6h | a7l, r2, carry, carry);
   1.246 +		MP_ADD_CARRY(r3, a7h,       r3, carry, carry);
   1.247 +		r4 += carry;
   1.248 +		MP_ADD_CARRY(r1, a6l,       r1, 0,     carry);
   1.249 +		MP_ADD_CARRY(r2, a6h | a7l, r2, carry, carry);
   1.250 +		MP_ADD_CARRY(r3, a7h,       r3, carry, carry);
   1.251 +		r4 += carry;
   1.252 +
   1.253 +		/* sum 3 */
   1.254 +		MP_ADD_CARRY(r0, a4,        r0, 0,     carry);
   1.255 +		MP_ADD_CARRY(r1, a5l >> 32, r1, carry, carry);
   1.256 +		MP_ADD_CARRY(r2, 0,         r2, carry, carry);
   1.257 +		MP_ADD_CARRY(r3, a7,        r3, carry, carry);
   1.258 +		r4 += carry;
   1.259 +		/* sum 4 */
   1.260 +		MP_ADD_CARRY(r0, a4h | a5l,     r0, 0,     carry);
   1.261 +		MP_ADD_CARRY(r1, a5h|(a6h<<32), r1, carry, carry);
   1.262 +		MP_ADD_CARRY(r2, a7,            r2, carry, carry);
   1.263 +		MP_ADD_CARRY(r3, a6h | a4l,     r3, carry, carry);
   1.264 +		r4 += carry;
   1.265 +		/* diff 5 */
   1.266 +		MP_SUB_BORROW(r0, a5h | a6l,    r0, 0,     carry);
   1.267 +		MP_SUB_BORROW(r1, a6h,          r1, carry, carry);
   1.268 +		MP_SUB_BORROW(r2, 0,            r2, carry, carry);
   1.269 +		MP_SUB_BORROW(r3, (a4l>>32)|a5l,r3, carry, carry);
   1.270 +		r4 -= carry;
   1.271 +		/* diff 6 */
   1.272 +		MP_SUB_BORROW(r0, a6,  		r0, 0,     carry);
   1.273 +		MP_SUB_BORROW(r1, a7,           r1, carry, carry);
   1.274 +		MP_SUB_BORROW(r2, 0,            r2, carry, carry);
   1.275 +		MP_SUB_BORROW(r3, a4h|(a5h<<32),r3, carry, carry);
   1.276 +		r4 -= carry;
   1.277 +		/* diff 7 */
   1.278 +		MP_SUB_BORROW(r0, a6h|a7l,	r0, 0,     carry);
   1.279 +		MP_SUB_BORROW(r1, a7h|a4l,      r1, carry, carry);
   1.280 +		MP_SUB_BORROW(r2, a4h|a5l,      r2, carry, carry);
   1.281 +		MP_SUB_BORROW(r3, a6l,          r3, carry, carry);
   1.282 +		r4 -= carry;
   1.283 +		/* diff 8 */
   1.284 +		MP_SUB_BORROW(r0, a7,	        r0, 0,     carry);
   1.285 +		MP_SUB_BORROW(r1, a4h<<32,      r1, carry, carry);
   1.286 +		MP_SUB_BORROW(r2, a5,           r2, carry, carry);
   1.287 +		MP_SUB_BORROW(r3, a6h<<32,      r3, carry, carry);
   1.288 +		r4 -= carry;
   1.289 +
   1.290 +		/* reduce the overflows */
   1.291 +		while (r4 > 0) {
   1.292 +			mp_digit r4_long = r4;
   1.293 +			mp_digit r4l = (r4_long << 32);
   1.294 +			MP_ADD_CARRY(r0, r4_long,      r0, 0,     carry);
   1.295 +			MP_ADD_CARRY(r1, 0-r4l,        r1, carry, carry);
   1.296 +			MP_ADD_CARRY(r2, MP_DIGIT_MAX, r2, carry, carry);
   1.297 +			MP_ADD_CARRY(r3, r4l-r4_long-1,r3, carry, carry);
   1.298 +			r4 = carry;
   1.299 +		}
   1.300 +
   1.301 +		/* reduce the underflows */
   1.302 +		while (r4 < 0) {
   1.303 +			mp_digit r4_long = -r4;
   1.304 +			mp_digit r4l = (r4_long << 32);
   1.305 +			MP_SUB_BORROW(r0, r4_long,      r0, 0,     carry);
   1.306 +			MP_SUB_BORROW(r1, 0-r4l,        r1, carry, carry);
   1.307 +			MP_SUB_BORROW(r2, MP_DIGIT_MAX, r2, carry, carry);
   1.308 +			MP_SUB_BORROW(r3, r4l-r4_long-1,r3, carry, carry);
   1.309 +			r4 = 0-carry;
   1.310 +		}
   1.311 +
   1.312 +		if (a != r) {
   1.313 +			MP_CHECKOK(s_mp_pad(r,4));
   1.314 +		}
   1.315 +		MP_SIGN(r) = MP_ZPOS;
   1.316 +		MP_USED(r) = 4;
   1.317 +
   1.318 +		MP_DIGIT(r,3) = r3;
   1.319 +		MP_DIGIT(r,2) = r2;
   1.320 +		MP_DIGIT(r,1) = r1;
   1.321 +		MP_DIGIT(r,0) = r0;
   1.322 +
   1.323 +		/* final reduction if necessary */
   1.324 +		if ((r3 > 0xFFFFFFFF00000001ULL) ||
   1.325 +			((r3 == 0xFFFFFFFF00000001ULL) && 
   1.326 +			(r2 || (r1 >> 32)|| 
   1.327 +			       (r1 == 0xFFFFFFFFULL && r0 == MP_DIGIT_MAX)))) {
   1.328 +			/* very rare, just use mp_sub */
   1.329 +			MP_CHECKOK(mp_sub(r, &meth->irr, r));
   1.330 +		}
   1.331 +			
   1.332 +		s_mp_clamp(r);
   1.333 +#endif
   1.334 +	}
   1.335 +
   1.336 +  CLEANUP:
   1.337 +	return res;
   1.338 +}
   1.339 +
   1.340 +/* Compute the square of polynomial a, reduce modulo p256. Store the
   1.341 + * result in r.  r could be a.  Uses optimized modular reduction for p256. 
   1.342 + */
   1.343 +static mp_err
   1.344 +ec_GFp_nistp256_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
   1.345 +{
   1.346 +	mp_err res = MP_OKAY;
   1.347 +
   1.348 +	MP_CHECKOK(mp_sqr(a, r));
   1.349 +	MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth));
   1.350 +  CLEANUP:
   1.351 +	return res;
   1.352 +}
   1.353 +
   1.354 +/* Compute the product of two polynomials a and b, reduce modulo p256.
   1.355 + * Store the result in r.  r could be a or b; a could be b.  Uses
   1.356 + * optimized modular reduction for p256. */
   1.357 +static mp_err
   1.358 +ec_GFp_nistp256_mul(const mp_int *a, const mp_int *b, mp_int *r,
   1.359 +					const GFMethod *meth)
   1.360 +{
   1.361 +	mp_err res = MP_OKAY;
   1.362 +
   1.363 +	MP_CHECKOK(mp_mul(a, b, r));
   1.364 +	MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth));
   1.365 +  CLEANUP:
   1.366 +	return res;
   1.367 +}
   1.368 +
   1.369 +/* Wire in fast field arithmetic and precomputation of base point for
   1.370 + * named curves. */
   1.371 +mp_err
   1.372 +ec_group_set_gfp256(ECGroup *group, ECCurveName name)
   1.373 +{
   1.374 +	if (name == ECCurve_NIST_P256) {
   1.375 +		group->meth->field_mod = &ec_GFp_nistp256_mod;
   1.376 +		group->meth->field_mul = &ec_GFp_nistp256_mul;
   1.377 +		group->meth->field_sqr = &ec_GFp_nistp256_sqr;
   1.378 +	}
   1.379 +	return MP_OKAY;
   1.380 +}

mercurial