1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/security/nss/lib/freebl/ecl/ecp_192.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,483 @@ 1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.7 + 1.8 +#include "ecp.h" 1.9 +#include "mpi.h" 1.10 +#include "mplogic.h" 1.11 +#include "mpi-priv.h" 1.12 + 1.13 +#define ECP192_DIGITS ECL_CURVE_DIGITS(192) 1.14 + 1.15 +/* Fast modular reduction for p192 = 2^192 - 2^64 - 1. a can be r. Uses 1.16 + * algorithm 7 from Brown, Hankerson, Lopez, Menezes. Software 1.17 + * Implementation of the NIST Elliptic Curves over Prime Fields. */ 1.18 +static mp_err 1.19 +ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth) 1.20 +{ 1.21 + mp_err res = MP_OKAY; 1.22 + mp_size a_used = MP_USED(a); 1.23 + mp_digit r3; 1.24 +#ifndef MPI_AMD64_ADD 1.25 + mp_digit carry; 1.26 +#endif 1.27 +#ifdef ECL_THIRTY_TWO_BIT 1.28 + mp_digit a5a = 0, a5b = 0, a4a = 0, a4b = 0, a3a = 0, a3b = 0; 1.29 + mp_digit r0a, r0b, r1a, r1b, r2a, r2b; 1.30 +#else 1.31 + mp_digit a5 = 0, a4 = 0, a3 = 0; 1.32 + mp_digit r0, r1, r2; 1.33 +#endif 1.34 + 1.35 + /* reduction not needed if a is not larger than field size */ 1.36 + if (a_used < ECP192_DIGITS) { 1.37 + if (a == r) { 1.38 + return MP_OKAY; 1.39 + } 1.40 + return mp_copy(a, r); 1.41 + } 1.42 + 1.43 + /* for polynomials larger than twice the field size, use regular 1.44 + * reduction */ 1.45 + if (a_used > ECP192_DIGITS*2) { 1.46 + MP_CHECKOK(mp_mod(a, &meth->irr, r)); 1.47 + } else { 1.48 + /* copy out upper words of a */ 1.49 + 1.50 +#ifdef ECL_THIRTY_TWO_BIT 1.51 + 1.52 + /* in all the math below, 1.53 + * nXb is most signifiant, nXa is least significant */ 1.54 + switch (a_used) { 1.55 + case 12: 1.56 + a5b = MP_DIGIT(a, 11); 1.57 + case 11: 1.58 + a5a = MP_DIGIT(a, 10); 1.59 + case 10: 1.60 + a4b = MP_DIGIT(a, 9); 1.61 + case 9: 1.62 + a4a = MP_DIGIT(a, 8); 1.63 + case 8: 1.64 + a3b = MP_DIGIT(a, 7); 1.65 + case 7: 1.66 + a3a = MP_DIGIT(a, 6); 1.67 + } 1.68 + 1.69 + 1.70 + r2b= MP_DIGIT(a, 5); 1.71 + r2a= MP_DIGIT(a, 4); 1.72 + r1b = MP_DIGIT(a, 3); 1.73 + r1a = MP_DIGIT(a, 2); 1.74 + r0b = MP_DIGIT(a, 1); 1.75 + r0a = MP_DIGIT(a, 0); 1.76 + 1.77 + /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */ 1.78 + MP_ADD_CARRY(r0a, a3a, r0a, 0, carry); 1.79 + MP_ADD_CARRY(r0b, a3b, r0b, carry, carry); 1.80 + MP_ADD_CARRY(r1a, a3a, r1a, carry, carry); 1.81 + MP_ADD_CARRY(r1b, a3b, r1b, carry, carry); 1.82 + MP_ADD_CARRY(r2a, a4a, r2a, carry, carry); 1.83 + MP_ADD_CARRY(r2b, a4b, r2b, carry, carry); 1.84 + r3 = carry; carry = 0; 1.85 + MP_ADD_CARRY(r0a, a5a, r0a, 0, carry); 1.86 + MP_ADD_CARRY(r0b, a5b, r0b, carry, carry); 1.87 + MP_ADD_CARRY(r1a, a5a, r1a, carry, carry); 1.88 + MP_ADD_CARRY(r1b, a5b, r1b, carry, carry); 1.89 + MP_ADD_CARRY(r2a, a5a, r2a, carry, carry); 1.90 + MP_ADD_CARRY(r2b, a5b, r2b, carry, carry); 1.91 + r3 += carry; 1.92 + MP_ADD_CARRY(r1a, a4a, r1a, 0, carry); 1.93 + MP_ADD_CARRY(r1b, a4b, r1b, carry, carry); 1.94 + MP_ADD_CARRY(r2a, 0, r2a, carry, carry); 1.95 + MP_ADD_CARRY(r2b, 0, r2b, carry, carry); 1.96 + r3 += carry; 1.97 + 1.98 + /* reduce out the carry */ 1.99 + while (r3) { 1.100 + MP_ADD_CARRY(r0a, r3, r0a, 0, carry); 1.101 + MP_ADD_CARRY(r0b, 0, r0b, carry, carry); 1.102 + MP_ADD_CARRY(r1a, r3, r1a, carry, carry); 1.103 + MP_ADD_CARRY(r1b, 0, r1b, carry, carry); 1.104 + MP_ADD_CARRY(r2a, 0, r2a, carry, carry); 1.105 + MP_ADD_CARRY(r2b, 0, r2b, carry, carry); 1.106 + r3 = carry; 1.107 + } 1.108 + 1.109 + /* check for final reduction */ 1.110 + /* 1.111 + * our field is 0xffffffffffffffff, 0xfffffffffffffffe, 1.112 + * 0xffffffffffffffff. That means we can only be over and need 1.113 + * one more reduction 1.114 + * if r2 == 0xffffffffffffffffff (same as r2+1 == 0) 1.115 + * and 1.116 + * r1 == 0xffffffffffffffffff or 1.117 + * r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff 1.118 + * In all cases, we subtract the field (or add the 2's 1.119 + * complement value (1,1,0)). (r0, r1, r2) 1.120 + */ 1.121 + if (((r2b == 0xffffffff) && (r2a == 0xffffffff) 1.122 + && (r1b == 0xffffffff) ) && 1.123 + ((r1a == 0xffffffff) || 1.124 + (r1a == 0xfffffffe) && (r0a == 0xffffffff) && 1.125 + (r0b == 0xffffffff)) ) { 1.126 + /* do a quick subtract */ 1.127 + MP_ADD_CARRY(r0a, 1, r0a, 0, carry); 1.128 + MP_ADD_CARRY(r0b, carry, r0a, 0, carry); 1.129 + r1a += 1+carry; 1.130 + r1b = r2a = r2b = 0; 1.131 + } 1.132 + 1.133 + /* set the lower words of r */ 1.134 + if (a != r) { 1.135 + MP_CHECKOK(s_mp_pad(r, 6)); 1.136 + } 1.137 + MP_DIGIT(r, 5) = r2b; 1.138 + MP_DIGIT(r, 4) = r2a; 1.139 + MP_DIGIT(r, 3) = r1b; 1.140 + MP_DIGIT(r, 2) = r1a; 1.141 + MP_DIGIT(r, 1) = r0b; 1.142 + MP_DIGIT(r, 0) = r0a; 1.143 + MP_USED(r) = 6; 1.144 +#else 1.145 + switch (a_used) { 1.146 + case 6: 1.147 + a5 = MP_DIGIT(a, 5); 1.148 + case 5: 1.149 + a4 = MP_DIGIT(a, 4); 1.150 + case 4: 1.151 + a3 = MP_DIGIT(a, 3); 1.152 + } 1.153 + 1.154 + r2 = MP_DIGIT(a, 2); 1.155 + r1 = MP_DIGIT(a, 1); 1.156 + r0 = MP_DIGIT(a, 0); 1.157 + 1.158 + /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */ 1.159 +#ifndef MPI_AMD64_ADD 1.160 + MP_ADD_CARRY(r0, a3, r0, 0, carry); 1.161 + MP_ADD_CARRY(r1, a3, r1, carry, carry); 1.162 + MP_ADD_CARRY(r2, a4, r2, carry, carry); 1.163 + r3 = carry; 1.164 + MP_ADD_CARRY(r0, a5, r0, 0, carry); 1.165 + MP_ADD_CARRY(r1, a5, r1, carry, carry); 1.166 + MP_ADD_CARRY(r2, a5, r2, carry, carry); 1.167 + r3 += carry; 1.168 + MP_ADD_CARRY(r1, a4, r1, 0, carry); 1.169 + MP_ADD_CARRY(r2, 0, r2, carry, carry); 1.170 + r3 += carry; 1.171 + 1.172 +#else 1.173 + r2 = MP_DIGIT(a, 2); 1.174 + r1 = MP_DIGIT(a, 1); 1.175 + r0 = MP_DIGIT(a, 0); 1.176 + 1.177 + /* set the lower words of r */ 1.178 + __asm__ ( 1.179 + "xorq %3,%3 \n\t" 1.180 + "addq %4,%0 \n\t" 1.181 + "adcq %4,%1 \n\t" 1.182 + "adcq %5,%2 \n\t" 1.183 + "adcq $0,%3 \n\t" 1.184 + "addq %6,%0 \n\t" 1.185 + "adcq %6,%1 \n\t" 1.186 + "adcq %6,%2 \n\t" 1.187 + "adcq $0,%3 \n\t" 1.188 + "addq %5,%1 \n\t" 1.189 + "adcq $0,%2 \n\t" 1.190 + "adcq $0,%3 \n\t" 1.191 + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3), 1.192 + "=r"(a4), "=r"(a5) 1.193 + : "0" (r0), "1" (r1), "2" (r2), "3" (r3), 1.194 + "4" (a3), "5" (a4), "6"(a5) 1.195 + : "%cc" ); 1.196 +#endif 1.197 + 1.198 + /* reduce out the carry */ 1.199 + while (r3) { 1.200 +#ifndef MPI_AMD64_ADD 1.201 + MP_ADD_CARRY(r0, r3, r0, 0, carry); 1.202 + MP_ADD_CARRY(r1, r3, r1, carry, carry); 1.203 + MP_ADD_CARRY(r2, 0, r2, carry, carry); 1.204 + r3 = carry; 1.205 +#else 1.206 + a3=r3; 1.207 + __asm__ ( 1.208 + "xorq %3,%3 \n\t" 1.209 + "addq %4,%0 \n\t" 1.210 + "adcq %4,%1 \n\t" 1.211 + "adcq $0,%2 \n\t" 1.212 + "adcq $0,%3 \n\t" 1.213 + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3) 1.214 + : "0" (r0), "1" (r1), "2" (r2), "3" (r3), "4"(a3) 1.215 + : "%cc" ); 1.216 +#endif 1.217 + } 1.218 + 1.219 + /* check for final reduction */ 1.220 + /* 1.221 + * our field is 0xffffffffffffffff, 0xfffffffffffffffe, 1.222 + * 0xffffffffffffffff. That means we can only be over and need 1.223 + * one more reduction 1.224 + * if r2 == 0xffffffffffffffffff (same as r2+1 == 0) 1.225 + * and 1.226 + * r1 == 0xffffffffffffffffff or 1.227 + * r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff 1.228 + * In all cases, we subtract the field (or add the 2's 1.229 + * complement value (1,1,0)). (r0, r1, r2) 1.230 + */ 1.231 + if (r3 || ((r2 == MP_DIGIT_MAX) && 1.232 + ((r1 == MP_DIGIT_MAX) || 1.233 + ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) { 1.234 + /* do a quick subtract */ 1.235 + MP_ADD_CARRY(r0, 1, r0, 0, carry); 1.236 + r1 += 1+carry; 1.237 + r2 = 0; 1.238 + } 1.239 + /* set the lower words of r */ 1.240 + if (a != r) { 1.241 + MP_CHECKOK(s_mp_pad(r, 3)); 1.242 + } 1.243 + MP_DIGIT(r, 2) = r2; 1.244 + MP_DIGIT(r, 1) = r1; 1.245 + MP_DIGIT(r, 0) = r0; 1.246 + MP_USED(r) = 3; 1.247 +#endif 1.248 + } 1.249 + s_mp_clamp(r); 1.250 + CLEANUP: 1.251 + return res; 1.252 +} 1.253 + 1.254 +#ifndef ECL_THIRTY_TWO_BIT 1.255 +/* Compute the sum of 192 bit curves. Do the work in-line since the 1.256 + * number of words are so small, we don't want to overhead of mp function 1.257 + * calls. Uses optimized modular reduction for p192. 1.258 + */ 1.259 +static mp_err 1.260 +ec_GFp_nistp192_add(const mp_int *a, const mp_int *b, mp_int *r, 1.261 + const GFMethod *meth) 1.262 +{ 1.263 + mp_err res = MP_OKAY; 1.264 + mp_digit a0 = 0, a1 = 0, a2 = 0; 1.265 + mp_digit r0 = 0, r1 = 0, r2 = 0; 1.266 + mp_digit carry; 1.267 + 1.268 + switch(MP_USED(a)) { 1.269 + case 3: 1.270 + a2 = MP_DIGIT(a,2); 1.271 + case 2: 1.272 + a1 = MP_DIGIT(a,1); 1.273 + case 1: 1.274 + a0 = MP_DIGIT(a,0); 1.275 + } 1.276 + switch(MP_USED(b)) { 1.277 + case 3: 1.278 + r2 = MP_DIGIT(b,2); 1.279 + case 2: 1.280 + r1 = MP_DIGIT(b,1); 1.281 + case 1: 1.282 + r0 = MP_DIGIT(b,0); 1.283 + } 1.284 + 1.285 +#ifndef MPI_AMD64_ADD 1.286 + MP_ADD_CARRY(a0, r0, r0, 0, carry); 1.287 + MP_ADD_CARRY(a1, r1, r1, carry, carry); 1.288 + MP_ADD_CARRY(a2, r2, r2, carry, carry); 1.289 +#else 1.290 + __asm__ ( 1.291 + "xorq %3,%3 \n\t" 1.292 + "addq %4,%0 \n\t" 1.293 + "adcq %5,%1 \n\t" 1.294 + "adcq %6,%2 \n\t" 1.295 + "adcq $0,%3 \n\t" 1.296 + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry) 1.297 + : "r" (a0), "r" (a1), "r" (a2), "0" (r0), 1.298 + "1" (r1), "2" (r2) 1.299 + : "%cc" ); 1.300 +#endif 1.301 + 1.302 + /* Do quick 'subract' if we've gone over 1.303 + * (add the 2's complement of the curve field) */ 1.304 + if (carry || ((r2 == MP_DIGIT_MAX) && 1.305 + ((r1 == MP_DIGIT_MAX) || 1.306 + ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) { 1.307 +#ifndef MPI_AMD64_ADD 1.308 + MP_ADD_CARRY(r0, 1, r0, 0, carry); 1.309 + MP_ADD_CARRY(r1, 1, r1, carry, carry); 1.310 + MP_ADD_CARRY(r2, 0, r2, carry, carry); 1.311 +#else 1.312 + __asm__ ( 1.313 + "addq $1,%0 \n\t" 1.314 + "adcq $1,%1 \n\t" 1.315 + "adcq $0,%2 \n\t" 1.316 + : "=r"(r0), "=r"(r1), "=r"(r2) 1.317 + : "0" (r0), "1" (r1), "2" (r2) 1.318 + : "%cc" ); 1.319 +#endif 1.320 + } 1.321 + 1.322 + 1.323 + MP_CHECKOK(s_mp_pad(r, 3)); 1.324 + MP_DIGIT(r, 2) = r2; 1.325 + MP_DIGIT(r, 1) = r1; 1.326 + MP_DIGIT(r, 0) = r0; 1.327 + MP_SIGN(r) = MP_ZPOS; 1.328 + MP_USED(r) = 3; 1.329 + s_mp_clamp(r); 1.330 + 1.331 + 1.332 + CLEANUP: 1.333 + return res; 1.334 +} 1.335 + 1.336 +/* Compute the diff of 192 bit curves. Do the work in-line since the 1.337 + * number of words are so small, we don't want to overhead of mp function 1.338 + * calls. Uses optimized modular reduction for p192. 1.339 + */ 1.340 +static mp_err 1.341 +ec_GFp_nistp192_sub(const mp_int *a, const mp_int *b, mp_int *r, 1.342 + const GFMethod *meth) 1.343 +{ 1.344 + mp_err res = MP_OKAY; 1.345 + mp_digit b0 = 0, b1 = 0, b2 = 0; 1.346 + mp_digit r0 = 0, r1 = 0, r2 = 0; 1.347 + mp_digit borrow; 1.348 + 1.349 + switch(MP_USED(a)) { 1.350 + case 3: 1.351 + r2 = MP_DIGIT(a,2); 1.352 + case 2: 1.353 + r1 = MP_DIGIT(a,1); 1.354 + case 1: 1.355 + r0 = MP_DIGIT(a,0); 1.356 + } 1.357 + 1.358 + switch(MP_USED(b)) { 1.359 + case 3: 1.360 + b2 = MP_DIGIT(b,2); 1.361 + case 2: 1.362 + b1 = MP_DIGIT(b,1); 1.363 + case 1: 1.364 + b0 = MP_DIGIT(b,0); 1.365 + } 1.366 + 1.367 +#ifndef MPI_AMD64_ADD 1.368 + MP_SUB_BORROW(r0, b0, r0, 0, borrow); 1.369 + MP_SUB_BORROW(r1, b1, r1, borrow, borrow); 1.370 + MP_SUB_BORROW(r2, b2, r2, borrow, borrow); 1.371 +#else 1.372 + __asm__ ( 1.373 + "xorq %3,%3 \n\t" 1.374 + "subq %4,%0 \n\t" 1.375 + "sbbq %5,%1 \n\t" 1.376 + "sbbq %6,%2 \n\t" 1.377 + "adcq $0,%3 \n\t" 1.378 + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow) 1.379 + : "r" (b0), "r" (b1), "r" (b2), "0" (r0), 1.380 + "1" (r1), "2" (r2) 1.381 + : "%cc" ); 1.382 +#endif 1.383 + 1.384 + /* Do quick 'add' if we've gone under 0 1.385 + * (subtract the 2's complement of the curve field) */ 1.386 + if (borrow) { 1.387 +#ifndef MPI_AMD64_ADD 1.388 + MP_SUB_BORROW(r0, 1, r0, 0, borrow); 1.389 + MP_SUB_BORROW(r1, 1, r1, borrow, borrow); 1.390 + MP_SUB_BORROW(r2, 0, r2, borrow, borrow); 1.391 +#else 1.392 + __asm__ ( 1.393 + "subq $1,%0 \n\t" 1.394 + "sbbq $1,%1 \n\t" 1.395 + "sbbq $0,%2 \n\t" 1.396 + : "=r"(r0), "=r"(r1), "=r"(r2) 1.397 + : "0" (r0), "1" (r1), "2" (r2) 1.398 + : "%cc" ); 1.399 +#endif 1.400 + } 1.401 + 1.402 + MP_CHECKOK(s_mp_pad(r, 3)); 1.403 + MP_DIGIT(r, 2) = r2; 1.404 + MP_DIGIT(r, 1) = r1; 1.405 + MP_DIGIT(r, 0) = r0; 1.406 + MP_SIGN(r) = MP_ZPOS; 1.407 + MP_USED(r) = 3; 1.408 + s_mp_clamp(r); 1.409 + 1.410 + CLEANUP: 1.411 + return res; 1.412 +} 1.413 + 1.414 +#endif 1.415 + 1.416 +/* Compute the square of polynomial a, reduce modulo p192. Store the 1.417 + * result in r. r could be a. Uses optimized modular reduction for p192. 1.418 + */ 1.419 +static mp_err 1.420 +ec_GFp_nistp192_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) 1.421 +{ 1.422 + mp_err res = MP_OKAY; 1.423 + 1.424 + MP_CHECKOK(mp_sqr(a, r)); 1.425 + MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth)); 1.426 + CLEANUP: 1.427 + return res; 1.428 +} 1.429 + 1.430 +/* Compute the product of two polynomials a and b, reduce modulo p192. 1.431 + * Store the result in r. r could be a or b; a could be b. Uses 1.432 + * optimized modular reduction for p192. */ 1.433 +static mp_err 1.434 +ec_GFp_nistp192_mul(const mp_int *a, const mp_int *b, mp_int *r, 1.435 + const GFMethod *meth) 1.436 +{ 1.437 + mp_err res = MP_OKAY; 1.438 + 1.439 + MP_CHECKOK(mp_mul(a, b, r)); 1.440 + MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth)); 1.441 + CLEANUP: 1.442 + return res; 1.443 +} 1.444 + 1.445 +/* Divides two field elements. If a is NULL, then returns the inverse of 1.446 + * b. */ 1.447 +static mp_err 1.448 +ec_GFp_nistp192_div(const mp_int *a, const mp_int *b, mp_int *r, 1.449 + const GFMethod *meth) 1.450 +{ 1.451 + mp_err res = MP_OKAY; 1.452 + mp_int t; 1.453 + 1.454 + /* If a is NULL, then return the inverse of b, otherwise return a/b. */ 1.455 + if (a == NULL) { 1.456 + return mp_invmod(b, &meth->irr, r); 1.457 + } else { 1.458 + /* MPI doesn't support divmod, so we implement it using invmod and 1.459 + * mulmod. */ 1.460 + MP_CHECKOK(mp_init(&t)); 1.461 + MP_CHECKOK(mp_invmod(b, &meth->irr, &t)); 1.462 + MP_CHECKOK(mp_mul(a, &t, r)); 1.463 + MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth)); 1.464 + CLEANUP: 1.465 + mp_clear(&t); 1.466 + return res; 1.467 + } 1.468 +} 1.469 + 1.470 +/* Wire in fast field arithmetic and precomputation of base point for 1.471 + * named curves. */ 1.472 +mp_err 1.473 +ec_group_set_gfp192(ECGroup *group, ECCurveName name) 1.474 +{ 1.475 + if (name == ECCurve_NIST_P192) { 1.476 + group->meth->field_mod = &ec_GFp_nistp192_mod; 1.477 + group->meth->field_mul = &ec_GFp_nistp192_mul; 1.478 + group->meth->field_sqr = &ec_GFp_nistp192_sqr; 1.479 + group->meth->field_div = &ec_GFp_nistp192_div; 1.480 +#ifndef ECL_THIRTY_TWO_BIT 1.481 + group->meth->field_add = &ec_GFp_nistp192_add; 1.482 + group->meth->field_sub = &ec_GFp_nistp192_sub; 1.483 +#endif 1.484 + } 1.485 + return MP_OKAY; 1.486 +}