1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/security/nss/lib/freebl/ecl/ecp_256.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,377 @@ 1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.7 + 1.8 +#include "ecp.h" 1.9 +#include "mpi.h" 1.10 +#include "mplogic.h" 1.11 +#include "mpi-priv.h" 1.12 + 1.13 +/* Fast modular reduction for p256 = 2^256 - 2^224 + 2^192+ 2^96 - 1. a can be r. 1.14 + * Uses algorithm 2.29 from Hankerson, Menezes, Vanstone. Guide to 1.15 + * Elliptic Curve Cryptography. */ 1.16 +static mp_err 1.17 +ec_GFp_nistp256_mod(const mp_int *a, mp_int *r, const GFMethod *meth) 1.18 +{ 1.19 + mp_err res = MP_OKAY; 1.20 + mp_size a_used = MP_USED(a); 1.21 + int a_bits = mpl_significant_bits(a); 1.22 + mp_digit carry; 1.23 + 1.24 +#ifdef ECL_THIRTY_TWO_BIT 1.25 + mp_digit a8=0, a9=0, a10=0, a11=0, a12=0, a13=0, a14=0, a15=0; 1.26 + mp_digit r0, r1, r2, r3, r4, r5, r6, r7; 1.27 + int r8; /* must be a signed value ! */ 1.28 +#else 1.29 + mp_digit a4=0, a5=0, a6=0, a7=0; 1.30 + mp_digit a4h, a4l, a5h, a5l, a6h, a6l, a7h, a7l; 1.31 + mp_digit r0, r1, r2, r3; 1.32 + int r4; /* must be a signed value ! */ 1.33 +#endif 1.34 + /* for polynomials larger than twice the field size 1.35 + * use regular reduction */ 1.36 + if (a_bits < 256) { 1.37 + if (a == r) return MP_OKAY; 1.38 + return mp_copy(a,r); 1.39 + } 1.40 + if (a_bits > 512) { 1.41 + MP_CHECKOK(mp_mod(a, &meth->irr, r)); 1.42 + } else { 1.43 + 1.44 +#ifdef ECL_THIRTY_TWO_BIT 1.45 + switch (a_used) { 1.46 + case 16: 1.47 + a15 = MP_DIGIT(a,15); 1.48 + case 15: 1.49 + a14 = MP_DIGIT(a,14); 1.50 + case 14: 1.51 + a13 = MP_DIGIT(a,13); 1.52 + case 13: 1.53 + a12 = MP_DIGIT(a,12); 1.54 + case 12: 1.55 + a11 = MP_DIGIT(a,11); 1.56 + case 11: 1.57 + a10 = MP_DIGIT(a,10); 1.58 + case 10: 1.59 + a9 = MP_DIGIT(a,9); 1.60 + case 9: 1.61 + a8 = MP_DIGIT(a,8); 1.62 + } 1.63 + 1.64 + r0 = MP_DIGIT(a,0); 1.65 + r1 = MP_DIGIT(a,1); 1.66 + r2 = MP_DIGIT(a,2); 1.67 + r3 = MP_DIGIT(a,3); 1.68 + r4 = MP_DIGIT(a,4); 1.69 + r5 = MP_DIGIT(a,5); 1.70 + r6 = MP_DIGIT(a,6); 1.71 + r7 = MP_DIGIT(a,7); 1.72 + 1.73 + /* sum 1 */ 1.74 + MP_ADD_CARRY(r3, a11, r3, 0, carry); 1.75 + MP_ADD_CARRY(r4, a12, r4, carry, carry); 1.76 + MP_ADD_CARRY(r5, a13, r5, carry, carry); 1.77 + MP_ADD_CARRY(r6, a14, r6, carry, carry); 1.78 + MP_ADD_CARRY(r7, a15, r7, carry, carry); 1.79 + r8 = carry; 1.80 + MP_ADD_CARRY(r3, a11, r3, 0, carry); 1.81 + MP_ADD_CARRY(r4, a12, r4, carry, carry); 1.82 + MP_ADD_CARRY(r5, a13, r5, carry, carry); 1.83 + MP_ADD_CARRY(r6, a14, r6, carry, carry); 1.84 + MP_ADD_CARRY(r7, a15, r7, carry, carry); 1.85 + r8 += carry; 1.86 + /* sum 2 */ 1.87 + MP_ADD_CARRY(r3, a12, r3, 0, carry); 1.88 + MP_ADD_CARRY(r4, a13, r4, carry, carry); 1.89 + MP_ADD_CARRY(r5, a14, r5, carry, carry); 1.90 + MP_ADD_CARRY(r6, a15, r6, carry, carry); 1.91 + MP_ADD_CARRY(r7, 0, r7, carry, carry); 1.92 + r8 += carry; 1.93 + /* combine last bottom of sum 3 with second sum 2 */ 1.94 + MP_ADD_CARRY(r0, a8, r0, 0, carry); 1.95 + MP_ADD_CARRY(r1, a9, r1, carry, carry); 1.96 + MP_ADD_CARRY(r2, a10, r2, carry, carry); 1.97 + MP_ADD_CARRY(r3, a12, r3, carry, carry); 1.98 + MP_ADD_CARRY(r4, a13, r4, carry, carry); 1.99 + MP_ADD_CARRY(r5, a14, r5, carry, carry); 1.100 + MP_ADD_CARRY(r6, a15, r6, carry, carry); 1.101 + MP_ADD_CARRY(r7, a15, r7, carry, carry); /* from sum 3 */ 1.102 + r8 += carry; 1.103 + /* sum 3 (rest of it)*/ 1.104 + MP_ADD_CARRY(r6, a14, r6, 0, carry); 1.105 + MP_ADD_CARRY(r7, 0, r7, carry, carry); 1.106 + r8 += carry; 1.107 + /* sum 4 (rest of it)*/ 1.108 + MP_ADD_CARRY(r0, a9, r0, 0, carry); 1.109 + MP_ADD_CARRY(r1, a10, r1, carry, carry); 1.110 + MP_ADD_CARRY(r2, a11, r2, carry, carry); 1.111 + MP_ADD_CARRY(r3, a13, r3, carry, carry); 1.112 + MP_ADD_CARRY(r4, a14, r4, carry, carry); 1.113 + MP_ADD_CARRY(r5, a15, r5, carry, carry); 1.114 + MP_ADD_CARRY(r6, a13, r6, carry, carry); 1.115 + MP_ADD_CARRY(r7, a8, r7, carry, carry); 1.116 + r8 += carry; 1.117 + /* diff 5 */ 1.118 + MP_SUB_BORROW(r0, a11, r0, 0, carry); 1.119 + MP_SUB_BORROW(r1, a12, r1, carry, carry); 1.120 + MP_SUB_BORROW(r2, a13, r2, carry, carry); 1.121 + MP_SUB_BORROW(r3, 0, r3, carry, carry); 1.122 + MP_SUB_BORROW(r4, 0, r4, carry, carry); 1.123 + MP_SUB_BORROW(r5, 0, r5, carry, carry); 1.124 + MP_SUB_BORROW(r6, a8, r6, carry, carry); 1.125 + MP_SUB_BORROW(r7, a10, r7, carry, carry); 1.126 + r8 -= carry; 1.127 + /* diff 6 */ 1.128 + MP_SUB_BORROW(r0, a12, r0, 0, carry); 1.129 + MP_SUB_BORROW(r1, a13, r1, carry, carry); 1.130 + MP_SUB_BORROW(r2, a14, r2, carry, carry); 1.131 + MP_SUB_BORROW(r3, a15, r3, carry, carry); 1.132 + MP_SUB_BORROW(r4, 0, r4, carry, carry); 1.133 + MP_SUB_BORROW(r5, 0, r5, carry, carry); 1.134 + MP_SUB_BORROW(r6, a9, r6, carry, carry); 1.135 + MP_SUB_BORROW(r7, a11, r7, carry, carry); 1.136 + r8 -= carry; 1.137 + /* diff 7 */ 1.138 + MP_SUB_BORROW(r0, a13, r0, 0, carry); 1.139 + MP_SUB_BORROW(r1, a14, r1, carry, carry); 1.140 + MP_SUB_BORROW(r2, a15, r2, carry, carry); 1.141 + MP_SUB_BORROW(r3, a8, r3, carry, carry); 1.142 + MP_SUB_BORROW(r4, a9, r4, carry, carry); 1.143 + MP_SUB_BORROW(r5, a10, r5, carry, carry); 1.144 + MP_SUB_BORROW(r6, 0, r6, carry, carry); 1.145 + MP_SUB_BORROW(r7, a12, r7, carry, carry); 1.146 + r8 -= carry; 1.147 + /* diff 8 */ 1.148 + MP_SUB_BORROW(r0, a14, r0, 0, carry); 1.149 + MP_SUB_BORROW(r1, a15, r1, carry, carry); 1.150 + MP_SUB_BORROW(r2, 0, r2, carry, carry); 1.151 + MP_SUB_BORROW(r3, a9, r3, carry, carry); 1.152 + MP_SUB_BORROW(r4, a10, r4, carry, carry); 1.153 + MP_SUB_BORROW(r5, a11, r5, carry, carry); 1.154 + MP_SUB_BORROW(r6, 0, r6, carry, carry); 1.155 + MP_SUB_BORROW(r7, a13, r7, carry, carry); 1.156 + r8 -= carry; 1.157 + 1.158 + /* reduce the overflows */ 1.159 + while (r8 > 0) { 1.160 + mp_digit r8_d = r8; 1.161 + MP_ADD_CARRY(r0, r8_d, r0, 0, carry); 1.162 + MP_ADD_CARRY(r1, 0, r1, carry, carry); 1.163 + MP_ADD_CARRY(r2, 0, r2, carry, carry); 1.164 + MP_ADD_CARRY(r3, 0-r8_d, r3, carry, carry); 1.165 + MP_ADD_CARRY(r4, MP_DIGIT_MAX, r4, carry, carry); 1.166 + MP_ADD_CARRY(r5, MP_DIGIT_MAX, r5, carry, carry); 1.167 + MP_ADD_CARRY(r6, 0-(r8_d+1), r6, carry, carry); 1.168 + MP_ADD_CARRY(r7, (r8_d-1), r7, carry, carry); 1.169 + r8 = carry; 1.170 + } 1.171 + 1.172 + /* reduce the underflows */ 1.173 + while (r8 < 0) { 1.174 + mp_digit r8_d = -r8; 1.175 + MP_SUB_BORROW(r0, r8_d, r0, 0, carry); 1.176 + MP_SUB_BORROW(r1, 0, r1, carry, carry); 1.177 + MP_SUB_BORROW(r2, 0, r2, carry, carry); 1.178 + MP_SUB_BORROW(r3, 0-r8_d, r3, carry, carry); 1.179 + MP_SUB_BORROW(r4, MP_DIGIT_MAX, r4, carry, carry); 1.180 + MP_SUB_BORROW(r5, MP_DIGIT_MAX, r5, carry, carry); 1.181 + MP_SUB_BORROW(r6, 0-(r8_d+1), r6, carry, carry); 1.182 + MP_SUB_BORROW(r7, (r8_d-1), r7, carry, carry); 1.183 + r8 = 0-carry; 1.184 + } 1.185 + if (a != r) { 1.186 + MP_CHECKOK(s_mp_pad(r,8)); 1.187 + } 1.188 + MP_SIGN(r) = MP_ZPOS; 1.189 + MP_USED(r) = 8; 1.190 + 1.191 + MP_DIGIT(r,7) = r7; 1.192 + MP_DIGIT(r,6) = r6; 1.193 + MP_DIGIT(r,5) = r5; 1.194 + MP_DIGIT(r,4) = r4; 1.195 + MP_DIGIT(r,3) = r3; 1.196 + MP_DIGIT(r,2) = r2; 1.197 + MP_DIGIT(r,1) = r1; 1.198 + MP_DIGIT(r,0) = r0; 1.199 + 1.200 + /* final reduction if necessary */ 1.201 + if ((r7 == MP_DIGIT_MAX) && 1.202 + ((r6 > 1) || ((r6 == 1) && 1.203 + (r5 || r4 || r3 || 1.204 + ((r2 == MP_DIGIT_MAX) && (r1 == MP_DIGIT_MAX) 1.205 + && (r0 == MP_DIGIT_MAX)))))) { 1.206 + MP_CHECKOK(mp_sub(r, &meth->irr, r)); 1.207 + } 1.208 + 1.209 + s_mp_clamp(r); 1.210 +#else 1.211 + switch (a_used) { 1.212 + case 8: 1.213 + a7 = MP_DIGIT(a,7); 1.214 + case 7: 1.215 + a6 = MP_DIGIT(a,6); 1.216 + case 6: 1.217 + a5 = MP_DIGIT(a,5); 1.218 + case 5: 1.219 + a4 = MP_DIGIT(a,4); 1.220 + } 1.221 + a7l = a7 << 32; 1.222 + a7h = a7 >> 32; 1.223 + a6l = a6 << 32; 1.224 + a6h = a6 >> 32; 1.225 + a5l = a5 << 32; 1.226 + a5h = a5 >> 32; 1.227 + a4l = a4 << 32; 1.228 + a4h = a4 >> 32; 1.229 + r3 = MP_DIGIT(a,3); 1.230 + r2 = MP_DIGIT(a,2); 1.231 + r1 = MP_DIGIT(a,1); 1.232 + r0 = MP_DIGIT(a,0); 1.233 + 1.234 + /* sum 1 */ 1.235 + MP_ADD_CARRY(r1, a5h << 32, r1, 0, carry); 1.236 + MP_ADD_CARRY(r2, a6, r2, carry, carry); 1.237 + MP_ADD_CARRY(r3, a7, r3, carry, carry); 1.238 + r4 = carry; 1.239 + MP_ADD_CARRY(r1, a5h << 32, r1, 0, carry); 1.240 + MP_ADD_CARRY(r2, a6, r2, carry, carry); 1.241 + MP_ADD_CARRY(r3, a7, r3, carry, carry); 1.242 + r4 += carry; 1.243 + /* sum 2 */ 1.244 + MP_ADD_CARRY(r1, a6l, r1, 0, carry); 1.245 + MP_ADD_CARRY(r2, a6h | a7l, r2, carry, carry); 1.246 + MP_ADD_CARRY(r3, a7h, r3, carry, carry); 1.247 + r4 += carry; 1.248 + MP_ADD_CARRY(r1, a6l, r1, 0, carry); 1.249 + MP_ADD_CARRY(r2, a6h | a7l, r2, carry, carry); 1.250 + MP_ADD_CARRY(r3, a7h, r3, carry, carry); 1.251 + r4 += carry; 1.252 + 1.253 + /* sum 3 */ 1.254 + MP_ADD_CARRY(r0, a4, r0, 0, carry); 1.255 + MP_ADD_CARRY(r1, a5l >> 32, r1, carry, carry); 1.256 + MP_ADD_CARRY(r2, 0, r2, carry, carry); 1.257 + MP_ADD_CARRY(r3, a7, r3, carry, carry); 1.258 + r4 += carry; 1.259 + /* sum 4 */ 1.260 + MP_ADD_CARRY(r0, a4h | a5l, r0, 0, carry); 1.261 + MP_ADD_CARRY(r1, a5h|(a6h<<32), r1, carry, carry); 1.262 + MP_ADD_CARRY(r2, a7, r2, carry, carry); 1.263 + MP_ADD_CARRY(r3, a6h | a4l, r3, carry, carry); 1.264 + r4 += carry; 1.265 + /* diff 5 */ 1.266 + MP_SUB_BORROW(r0, a5h | a6l, r0, 0, carry); 1.267 + MP_SUB_BORROW(r1, a6h, r1, carry, carry); 1.268 + MP_SUB_BORROW(r2, 0, r2, carry, carry); 1.269 + MP_SUB_BORROW(r3, (a4l>>32)|a5l,r3, carry, carry); 1.270 + r4 -= carry; 1.271 + /* diff 6 */ 1.272 + MP_SUB_BORROW(r0, a6, r0, 0, carry); 1.273 + MP_SUB_BORROW(r1, a7, r1, carry, carry); 1.274 + MP_SUB_BORROW(r2, 0, r2, carry, carry); 1.275 + MP_SUB_BORROW(r3, a4h|(a5h<<32),r3, carry, carry); 1.276 + r4 -= carry; 1.277 + /* diff 7 */ 1.278 + MP_SUB_BORROW(r0, a6h|a7l, r0, 0, carry); 1.279 + MP_SUB_BORROW(r1, a7h|a4l, r1, carry, carry); 1.280 + MP_SUB_BORROW(r2, a4h|a5l, r2, carry, carry); 1.281 + MP_SUB_BORROW(r3, a6l, r3, carry, carry); 1.282 + r4 -= carry; 1.283 + /* diff 8 */ 1.284 + MP_SUB_BORROW(r0, a7, r0, 0, carry); 1.285 + MP_SUB_BORROW(r1, a4h<<32, r1, carry, carry); 1.286 + MP_SUB_BORROW(r2, a5, r2, carry, carry); 1.287 + MP_SUB_BORROW(r3, a6h<<32, r3, carry, carry); 1.288 + r4 -= carry; 1.289 + 1.290 + /* reduce the overflows */ 1.291 + while (r4 > 0) { 1.292 + mp_digit r4_long = r4; 1.293 + mp_digit r4l = (r4_long << 32); 1.294 + MP_ADD_CARRY(r0, r4_long, r0, 0, carry); 1.295 + MP_ADD_CARRY(r1, 0-r4l, r1, carry, carry); 1.296 + MP_ADD_CARRY(r2, MP_DIGIT_MAX, r2, carry, carry); 1.297 + MP_ADD_CARRY(r3, r4l-r4_long-1,r3, carry, carry); 1.298 + r4 = carry; 1.299 + } 1.300 + 1.301 + /* reduce the underflows */ 1.302 + while (r4 < 0) { 1.303 + mp_digit r4_long = -r4; 1.304 + mp_digit r4l = (r4_long << 32); 1.305 + MP_SUB_BORROW(r0, r4_long, r0, 0, carry); 1.306 + MP_SUB_BORROW(r1, 0-r4l, r1, carry, carry); 1.307 + MP_SUB_BORROW(r2, MP_DIGIT_MAX, r2, carry, carry); 1.308 + MP_SUB_BORROW(r3, r4l-r4_long-1,r3, carry, carry); 1.309 + r4 = 0-carry; 1.310 + } 1.311 + 1.312 + if (a != r) { 1.313 + MP_CHECKOK(s_mp_pad(r,4)); 1.314 + } 1.315 + MP_SIGN(r) = MP_ZPOS; 1.316 + MP_USED(r) = 4; 1.317 + 1.318 + MP_DIGIT(r,3) = r3; 1.319 + MP_DIGIT(r,2) = r2; 1.320 + MP_DIGIT(r,1) = r1; 1.321 + MP_DIGIT(r,0) = r0; 1.322 + 1.323 + /* final reduction if necessary */ 1.324 + if ((r3 > 0xFFFFFFFF00000001ULL) || 1.325 + ((r3 == 0xFFFFFFFF00000001ULL) && 1.326 + (r2 || (r1 >> 32)|| 1.327 + (r1 == 0xFFFFFFFFULL && r0 == MP_DIGIT_MAX)))) { 1.328 + /* very rare, just use mp_sub */ 1.329 + MP_CHECKOK(mp_sub(r, &meth->irr, r)); 1.330 + } 1.331 + 1.332 + s_mp_clamp(r); 1.333 +#endif 1.334 + } 1.335 + 1.336 + CLEANUP: 1.337 + return res; 1.338 +} 1.339 + 1.340 +/* Compute the square of polynomial a, reduce modulo p256. Store the 1.341 + * result in r. r could be a. Uses optimized modular reduction for p256. 1.342 + */ 1.343 +static mp_err 1.344 +ec_GFp_nistp256_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) 1.345 +{ 1.346 + mp_err res = MP_OKAY; 1.347 + 1.348 + MP_CHECKOK(mp_sqr(a, r)); 1.349 + MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth)); 1.350 + CLEANUP: 1.351 + return res; 1.352 +} 1.353 + 1.354 +/* Compute the product of two polynomials a and b, reduce modulo p256. 1.355 + * Store the result in r. r could be a or b; a could be b. Uses 1.356 + * optimized modular reduction for p256. */ 1.357 +static mp_err 1.358 +ec_GFp_nistp256_mul(const mp_int *a, const mp_int *b, mp_int *r, 1.359 + const GFMethod *meth) 1.360 +{ 1.361 + mp_err res = MP_OKAY; 1.362 + 1.363 + MP_CHECKOK(mp_mul(a, b, r)); 1.364 + MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth)); 1.365 + CLEANUP: 1.366 + return res; 1.367 +} 1.368 + 1.369 +/* Wire in fast field arithmetic and precomputation of base point for 1.370 + * named curves. */ 1.371 +mp_err 1.372 +ec_group_set_gfp256(ECGroup *group, ECCurveName name) 1.373 +{ 1.374 + if (name == ECCurve_NIST_P256) { 1.375 + group->meth->field_mod = &ec_GFp_nistp256_mod; 1.376 + group->meth->field_mul = &ec_GFp_nistp256_mul; 1.377 + group->meth->field_sqr = &ec_GFp_nistp256_sqr; 1.378 + } 1.379 + return MP_OKAY; 1.380 +}