1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/security/nss/lib/freebl/ecl/ecp_fpinc.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,821 @@ 1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.7 + 1.8 +/* This source file is meant to be included by other source files 1.9 + * (ecp_fp###.c, where ### is one of 160, 192, 224) and should not 1.10 + * constitute an independent compilation unit. It requires the following 1.11 + * preprocessor definitions be made: ECFP_BSIZE - the number of bits in 1.12 + * the field's prime 1.13 + * ECFP_NUMDOUBLES - the number of doubles to store one 1.14 + * multi-precision integer in floating point 1.15 + 1.16 +/* Adds a prefix to a given token to give a unique token name. Prefixes 1.17 + * with "ecfp" + ECFP_BSIZE + "_". e.g. if ECFP_BSIZE = 160, then 1.18 + * PREFIX(hello) = ecfp160_hello This optimization allows static function 1.19 + * linking and compiler loop unrolling without code duplication. */ 1.20 +#ifndef PREFIX 1.21 +#define PREFIX(b) PREFIX1(ECFP_BSIZE, b) 1.22 +#define PREFIX1(bsize, b) PREFIX2(bsize, b) 1.23 +#define PREFIX2(bsize, b) ecfp ## bsize ## _ ## b 1.24 +#endif 1.25 + 1.26 +/* Returns true iff every double in d is 0. (If d == 0 and it is tidied, 1.27 + * this will be true.) */ 1.28 +mp_err PREFIX(isZero) (const double *d) { 1.29 + int i; 1.30 + 1.31 + for (i = 0; i < ECFP_NUMDOUBLES; i++) { 1.32 + if (d[i] != 0) 1.33 + return MP_NO; 1.34 + } 1.35 + return MP_YES; 1.36 +} 1.37 + 1.38 +/* Sets the multi-precision floating point number at t = 0 */ 1.39 +void PREFIX(zero) (double *t) { 1.40 + int i; 1.41 + 1.42 + for (i = 0; i < ECFP_NUMDOUBLES; i++) { 1.43 + t[i] = 0; 1.44 + } 1.45 +} 1.46 + 1.47 +/* Sets the multi-precision floating point number at t = 1 */ 1.48 +void PREFIX(one) (double *t) { 1.49 + int i; 1.50 + 1.51 + t[0] = 1; 1.52 + for (i = 1; i < ECFP_NUMDOUBLES; i++) { 1.53 + t[i] = 0; 1.54 + } 1.55 +} 1.56 + 1.57 +/* Checks if point P(x, y, z) is at infinity. Uses Jacobian coordinates. */ 1.58 +mp_err PREFIX(pt_is_inf_jac) (const ecfp_jac_pt * p) { 1.59 + return PREFIX(isZero) (p->z); 1.60 +} 1.61 + 1.62 +/* Sets the Jacobian point P to be at infinity. */ 1.63 +void PREFIX(set_pt_inf_jac) (ecfp_jac_pt * p) { 1.64 + PREFIX(zero) (p->z); 1.65 +} 1.66 + 1.67 +/* Checks if point P(x, y) is at infinity. Uses Affine coordinates. */ 1.68 +mp_err PREFIX(pt_is_inf_aff) (const ecfp_aff_pt * p) { 1.69 + if (PREFIX(isZero) (p->x) == MP_YES && PREFIX(isZero) (p->y) == MP_YES) 1.70 + return MP_YES; 1.71 + return MP_NO; 1.72 +} 1.73 + 1.74 +/* Sets the affine point P to be at infinity. */ 1.75 +void PREFIX(set_pt_inf_aff) (ecfp_aff_pt * p) { 1.76 + PREFIX(zero) (p->x); 1.77 + PREFIX(zero) (p->y); 1.78 +} 1.79 + 1.80 +/* Checks if point P(x, y, z, a*z^4) is at infinity. Uses Modified 1.81 + * Jacobian coordinates. */ 1.82 +mp_err PREFIX(pt_is_inf_jm) (const ecfp_jm_pt * p) { 1.83 + return PREFIX(isZero) (p->z); 1.84 +} 1.85 + 1.86 +/* Sets the Modified Jacobian point P to be at infinity. */ 1.87 +void PREFIX(set_pt_inf_jm) (ecfp_jm_pt * p) { 1.88 + PREFIX(zero) (p->z); 1.89 +} 1.90 + 1.91 +/* Checks if point P(x, y, z, z^2, z^3) is at infinity. Uses Chudnovsky 1.92 + * Jacobian coordinates */ 1.93 +mp_err PREFIX(pt_is_inf_chud) (const ecfp_chud_pt * p) { 1.94 + return PREFIX(isZero) (p->z); 1.95 +} 1.96 + 1.97 +/* Sets the Chudnovsky Jacobian point P to be at infinity. */ 1.98 +void PREFIX(set_pt_inf_chud) (ecfp_chud_pt * p) { 1.99 + PREFIX(zero) (p->z); 1.100 +} 1.101 + 1.102 +/* Copies a multi-precision floating point number, Setting dest = src */ 1.103 +void PREFIX(copy) (double *dest, const double *src) { 1.104 + int i; 1.105 + 1.106 + for (i = 0; i < ECFP_NUMDOUBLES; i++) { 1.107 + dest[i] = src[i]; 1.108 + } 1.109 +} 1.110 + 1.111 +/* Sets dest = -src */ 1.112 +void PREFIX(negLong) (double *dest, const double *src) { 1.113 + int i; 1.114 + 1.115 + for (i = 0; i < 2 * ECFP_NUMDOUBLES; i++) { 1.116 + dest[i] = -src[i]; 1.117 + } 1.118 +} 1.119 + 1.120 +/* Sets r = -p p = (x, y, z, z2, z3) r = (x, -y, z, z2, z3) Uses 1.121 + * Chudnovsky Jacobian coordinates. */ 1.122 +/* TODO reverse order */ 1.123 +void PREFIX(pt_neg_chud) (const ecfp_chud_pt * p, ecfp_chud_pt * r) { 1.124 + int i; 1.125 + 1.126 + PREFIX(copy) (r->x, p->x); 1.127 + PREFIX(copy) (r->z, p->z); 1.128 + PREFIX(copy) (r->z2, p->z2); 1.129 + PREFIX(copy) (r->z3, p->z3); 1.130 + for (i = 0; i < ECFP_NUMDOUBLES; i++) { 1.131 + r->y[i] = -p->y[i]; 1.132 + } 1.133 +} 1.134 + 1.135 +/* Computes r = x + y. Does not tidy or reduce. Any combinations of r, x, 1.136 + * y can point to the same data. Componentwise adds first ECFP_NUMDOUBLES 1.137 + * doubles of x and y and stores the result in r. */ 1.138 +void PREFIX(addShort) (double *r, const double *x, const double *y) { 1.139 + int i; 1.140 + 1.141 + for (i = 0; i < ECFP_NUMDOUBLES; i++) { 1.142 + *r++ = *x++ + *y++; 1.143 + } 1.144 +} 1.145 + 1.146 +/* Computes r = x + y. Does not tidy or reduce. Any combinations of r, x, 1.147 + * y can point to the same data. Componentwise adds first 1.148 + * 2*ECFP_NUMDOUBLES doubles of x and y and stores the result in r. */ 1.149 +void PREFIX(addLong) (double *r, const double *x, const double *y) { 1.150 + int i; 1.151 + 1.152 + for (i = 0; i < 2 * ECFP_NUMDOUBLES; i++) { 1.153 + *r++ = *x++ + *y++; 1.154 + } 1.155 +} 1.156 + 1.157 +/* Computes r = x - y. Does not tidy or reduce. Any combinations of r, x, 1.158 + * y can point to the same data. Componentwise subtracts first 1.159 + * ECFP_NUMDOUBLES doubles of x and y and stores the result in r. */ 1.160 +void PREFIX(subtractShort) (double *r, const double *x, const double *y) { 1.161 + int i; 1.162 + 1.163 + for (i = 0; i < ECFP_NUMDOUBLES; i++) { 1.164 + *r++ = *x++ - *y++; 1.165 + } 1.166 +} 1.167 + 1.168 +/* Computes r = x - y. Does not tidy or reduce. Any combinations of r, x, 1.169 + * y can point to the same data. Componentwise subtracts first 1.170 + * 2*ECFP_NUMDOUBLES doubles of x and y and stores the result in r. */ 1.171 +void PREFIX(subtractLong) (double *r, const double *x, const double *y) { 1.172 + int i; 1.173 + 1.174 + for (i = 0; i < 2 * ECFP_NUMDOUBLES; i++) { 1.175 + *r++ = *x++ - *y++; 1.176 + } 1.177 +} 1.178 + 1.179 +/* Computes r = x*y. Both x and y should be tidied and reduced, 1.180 + * r must be different (point to different memory) than x and y. 1.181 + * Does not tidy or reduce. */ 1.182 +void PREFIX(multiply)(double *r, const double *x, const double *y) { 1.183 + int i, j; 1.184 + 1.185 + for(j=0;j<ECFP_NUMDOUBLES-1;j++) { 1.186 + r[j] = x[0] * y[j]; 1.187 + r[j+(ECFP_NUMDOUBLES-1)] = x[ECFP_NUMDOUBLES-1] * y[j]; 1.188 + } 1.189 + r[ECFP_NUMDOUBLES-1] = x[0] * y[ECFP_NUMDOUBLES-1]; 1.190 + r[ECFP_NUMDOUBLES-1] += x[ECFP_NUMDOUBLES-1] * y[0]; 1.191 + r[2*ECFP_NUMDOUBLES-2] = x[ECFP_NUMDOUBLES-1] * y[ECFP_NUMDOUBLES-1]; 1.192 + r[2*ECFP_NUMDOUBLES-1] = 0; 1.193 + 1.194 + for(i=1;i<ECFP_NUMDOUBLES-1;i++) { 1.195 + for(j=0;j<ECFP_NUMDOUBLES;j++) { 1.196 + r[i+j] += (x[i] * y[j]); 1.197 + } 1.198 + } 1.199 +} 1.200 + 1.201 +/* Computes the square of x and stores the result in r. x should be 1.202 + * tidied & reduced, r will be neither tidied nor reduced. 1.203 + * r should point to different memory than x */ 1.204 +void PREFIX(square) (double *r, const double *x) { 1.205 + PREFIX(multiply) (r, x, x); 1.206 +} 1.207 + 1.208 +/* Perform a point doubling in Jacobian coordinates. Input and output 1.209 + * should be multi-precision floating point integers. */ 1.210 +void PREFIX(pt_dbl_jac) (const ecfp_jac_pt * dp, ecfp_jac_pt * dr, 1.211 + const EC_group_fp * group) { 1.212 + double t0[2 * ECFP_NUMDOUBLES], t1[2 * ECFP_NUMDOUBLES], 1.213 + M[2 * ECFP_NUMDOUBLES], S[2 * ECFP_NUMDOUBLES]; 1.214 + 1.215 + /* Check for point at infinity */ 1.216 + if (PREFIX(pt_is_inf_jac) (dp) == MP_YES) { 1.217 + /* Set r = pt at infinity */ 1.218 + PREFIX(set_pt_inf_jac) (dr); 1.219 + goto CLEANUP; 1.220 + } 1.221 + 1.222 + /* Perform typical point doubling operations */ 1.223 + 1.224 + /* TODO? is it worthwhile to do optimizations for when pz = 1? */ 1.225 + 1.226 + if (group->aIsM3) { 1.227 + /* When a = -3, M = 3(px - pz^2)(px + pz^2) */ 1.228 + PREFIX(square) (t1, dp->z); 1.229 + group->ecfp_reduce(t1, t1, group); /* 2^23 since the negative 1.230 + * rounding buys another bit */ 1.231 + PREFIX(addShort) (t0, dp->x, t1); /* 2*2^23 */ 1.232 + PREFIX(subtractShort) (t1, dp->x, t1); /* 2 * 2^23 */ 1.233 + PREFIX(multiply) (M, t0, t1); /* 40 * 2^46 */ 1.234 + PREFIX(addLong) (t0, M, M); /* 80 * 2^46 */ 1.235 + PREFIX(addLong) (M, t0, M); /* 120 * 2^46 < 2^53 */ 1.236 + group->ecfp_reduce(M, M, group); 1.237 + } else { 1.238 + /* Generic case */ 1.239 + /* M = 3 (px^2) + a*(pz^4) */ 1.240 + PREFIX(square) (t0, dp->x); 1.241 + PREFIX(addLong) (M, t0, t0); 1.242 + PREFIX(addLong) (t0, t0, M); /* t0 = 3(px^2) */ 1.243 + PREFIX(square) (M, dp->z); 1.244 + group->ecfp_reduce(M, M, group); 1.245 + PREFIX(square) (t1, M); 1.246 + group->ecfp_reduce(t1, t1, group); 1.247 + PREFIX(multiply) (M, t1, group->curvea); /* M = a(pz^4) */ 1.248 + PREFIX(addLong) (M, M, t0); 1.249 + group->ecfp_reduce(M, M, group); 1.250 + } 1.251 + 1.252 + /* rz = 2 * py * pz */ 1.253 + PREFIX(multiply) (t1, dp->y, dp->z); 1.254 + PREFIX(addLong) (t1, t1, t1); 1.255 + group->ecfp_reduce(dr->z, t1, group); 1.256 + 1.257 + /* t0 = 2y^2 */ 1.258 + PREFIX(square) (t0, dp->y); 1.259 + group->ecfp_reduce(t0, t0, group); 1.260 + PREFIX(addShort) (t0, t0, t0); 1.261 + 1.262 + /* S = 4 * px * py^2 = 2 * px * t0 */ 1.263 + PREFIX(multiply) (S, dp->x, t0); 1.264 + PREFIX(addLong) (S, S, S); 1.265 + group->ecfp_reduce(S, S, group); 1.266 + 1.267 + /* rx = M^2 - 2 * S */ 1.268 + PREFIX(square) (t1, M); 1.269 + PREFIX(subtractShort) (t1, t1, S); 1.270 + PREFIX(subtractShort) (t1, t1, S); 1.271 + group->ecfp_reduce(dr->x, t1, group); 1.272 + 1.273 + /* ry = M * (S - rx) - 8 * py^4 */ 1.274 + PREFIX(square) (t1, t0); /* t1 = 4y^4 */ 1.275 + PREFIX(subtractShort) (S, S, dr->x); 1.276 + PREFIX(multiply) (t0, M, S); 1.277 + PREFIX(subtractLong) (t0, t0, t1); 1.278 + PREFIX(subtractLong) (t0, t0, t1); 1.279 + group->ecfp_reduce(dr->y, t0, group); 1.280 + 1.281 + CLEANUP: 1.282 + return; 1.283 +} 1.284 + 1.285 +/* Perform a point addition using coordinate system Jacobian + Affine -> 1.286 + * Jacobian. Input and output should be multi-precision floating point 1.287 + * integers. */ 1.288 +void PREFIX(pt_add_jac_aff) (const ecfp_jac_pt * p, const ecfp_aff_pt * q, 1.289 + ecfp_jac_pt * r, const EC_group_fp * group) { 1.290 + /* Temporary storage */ 1.291 + double A[2 * ECFP_NUMDOUBLES], B[2 * ECFP_NUMDOUBLES], 1.292 + C[2 * ECFP_NUMDOUBLES], C2[2 * ECFP_NUMDOUBLES], 1.293 + D[2 * ECFP_NUMDOUBLES], C3[2 * ECFP_NUMDOUBLES]; 1.294 + 1.295 + /* Check for point at infinity for p or q */ 1.296 + if (PREFIX(pt_is_inf_aff) (q) == MP_YES) { 1.297 + PREFIX(copy) (r->x, p->x); 1.298 + PREFIX(copy) (r->y, p->y); 1.299 + PREFIX(copy) (r->z, p->z); 1.300 + goto CLEANUP; 1.301 + } else if (PREFIX(pt_is_inf_jac) (p) == MP_YES) { 1.302 + PREFIX(copy) (r->x, q->x); 1.303 + PREFIX(copy) (r->y, q->y); 1.304 + /* Since the affine point is not infinity, we can set r->z = 1 */ 1.305 + PREFIX(one) (r->z); 1.306 + goto CLEANUP; 1.307 + } 1.308 + 1.309 + /* Calculates c = qx * pz^2 - px d = (qy * b - py) rx = d^2 - c^3 + 2 1.310 + * (px * c^2) ry = d * (c-rx) - py*c^3 rz = c * pz */ 1.311 + 1.312 + /* A = pz^2, B = pz^3 */ 1.313 + PREFIX(square) (A, p->z); 1.314 + group->ecfp_reduce(A, A, group); 1.315 + PREFIX(multiply) (B, A, p->z); 1.316 + group->ecfp_reduce(B, B, group); 1.317 + 1.318 + /* C = qx * A - px */ 1.319 + PREFIX(multiply) (C, q->x, A); 1.320 + PREFIX(subtractShort) (C, C, p->x); 1.321 + group->ecfp_reduce(C, C, group); 1.322 + 1.323 + /* D = qy * B - py */ 1.324 + PREFIX(multiply) (D, q->y, B); 1.325 + PREFIX(subtractShort) (D, D, p->y); 1.326 + group->ecfp_reduce(D, D, group); 1.327 + 1.328 + /* C2 = C^2, C3 = C^3 */ 1.329 + PREFIX(square) (C2, C); 1.330 + group->ecfp_reduce(C2, C2, group); 1.331 + PREFIX(multiply) (C3, C2, C); 1.332 + group->ecfp_reduce(C3, C3, group); 1.333 + 1.334 + /* rz = A = pz * C */ 1.335 + PREFIX(multiply) (A, p->z, C); 1.336 + group->ecfp_reduce(r->z, A, group); 1.337 + 1.338 + /* C = px * C^2, untidied, unreduced */ 1.339 + PREFIX(multiply) (C, p->x, C2); 1.340 + 1.341 + /* A = D^2, untidied, unreduced */ 1.342 + PREFIX(square) (A, D); 1.343 + 1.344 + /* rx = B = A - C3 - C - C = D^2 - (C^3 + 2 * (px * C^2) */ 1.345 + PREFIX(subtractShort) (A, A, C3); 1.346 + PREFIX(subtractLong) (A, A, C); 1.347 + PREFIX(subtractLong) (A, A, C); 1.348 + group->ecfp_reduce(r->x, A, group); 1.349 + 1.350 + /* B = py * C3, untidied, unreduced */ 1.351 + PREFIX(multiply) (B, p->y, C3); 1.352 + 1.353 + /* C = px * C^2 - rx */ 1.354 + PREFIX(subtractShort) (C, C, r->x); 1.355 + group->ecfp_reduce(C, C, group); 1.356 + 1.357 + /* ry = A = D * C - py * C^3 */ 1.358 + PREFIX(multiply) (A, D, C); 1.359 + PREFIX(subtractLong) (A, A, B); 1.360 + group->ecfp_reduce(r->y, A, group); 1.361 + 1.362 + CLEANUP: 1.363 + return; 1.364 +} 1.365 + 1.366 +/* Perform a point addition using Jacobian coordinate system. Input and 1.367 + * output should be multi-precision floating point integers. */ 1.368 +void PREFIX(pt_add_jac) (const ecfp_jac_pt * p, const ecfp_jac_pt * q, 1.369 + ecfp_jac_pt * r, const EC_group_fp * group) { 1.370 + 1.371 + /* Temporary Storage */ 1.372 + double t0[2 * ECFP_NUMDOUBLES], t1[2 * ECFP_NUMDOUBLES], 1.373 + U[2 * ECFP_NUMDOUBLES], R[2 * ECFP_NUMDOUBLES], 1.374 + S[2 * ECFP_NUMDOUBLES], H[2 * ECFP_NUMDOUBLES], 1.375 + H3[2 * ECFP_NUMDOUBLES]; 1.376 + 1.377 + /* Check for point at infinity for p, if so set r = q */ 1.378 + if (PREFIX(pt_is_inf_jac) (p) == MP_YES) { 1.379 + PREFIX(copy) (r->x, q->x); 1.380 + PREFIX(copy) (r->y, q->y); 1.381 + PREFIX(copy) (r->z, q->z); 1.382 + goto CLEANUP; 1.383 + } 1.384 + 1.385 + /* Check for point at infinity for p, if so set r = q */ 1.386 + if (PREFIX(pt_is_inf_jac) (q) == MP_YES) { 1.387 + PREFIX(copy) (r->x, p->x); 1.388 + PREFIX(copy) (r->y, p->y); 1.389 + PREFIX(copy) (r->z, p->z); 1.390 + goto CLEANUP; 1.391 + } 1.392 + 1.393 + /* U = px * qz^2 , S = py * qz^3 */ 1.394 + PREFIX(square) (t0, q->z); 1.395 + group->ecfp_reduce(t0, t0, group); 1.396 + PREFIX(multiply) (U, p->x, t0); 1.397 + group->ecfp_reduce(U, U, group); 1.398 + PREFIX(multiply) (t1, t0, q->z); 1.399 + group->ecfp_reduce(t1, t1, group); 1.400 + PREFIX(multiply) (t0, p->y, t1); 1.401 + group->ecfp_reduce(S, t0, group); 1.402 + 1.403 + /* H = qx*(pz)^2 - U , R = (qy * pz^3 - S) */ 1.404 + PREFIX(square) (t0, p->z); 1.405 + group->ecfp_reduce(t0, t0, group); 1.406 + PREFIX(multiply) (H, q->x, t0); 1.407 + PREFIX(subtractShort) (H, H, U); 1.408 + group->ecfp_reduce(H, H, group); 1.409 + PREFIX(multiply) (t1, t0, p->z); /* t1 = pz^3 */ 1.410 + group->ecfp_reduce(t1, t1, group); 1.411 + PREFIX(multiply) (t0, t1, q->y); /* t0 = qy * pz^3 */ 1.412 + PREFIX(subtractShort) (t0, t0, S); 1.413 + group->ecfp_reduce(R, t0, group); 1.414 + 1.415 + /* U = U*H^2, H3 = H^3 */ 1.416 + PREFIX(square) (t0, H); 1.417 + group->ecfp_reduce(t0, t0, group); 1.418 + PREFIX(multiply) (t1, U, t0); 1.419 + group->ecfp_reduce(U, t1, group); 1.420 + PREFIX(multiply) (H3, t0, H); 1.421 + group->ecfp_reduce(H3, H3, group); 1.422 + 1.423 + /* rz = pz * qz * H */ 1.424 + PREFIX(multiply) (t0, q->z, H); 1.425 + group->ecfp_reduce(t0, t0, group); 1.426 + PREFIX(multiply) (t1, t0, p->z); 1.427 + group->ecfp_reduce(r->z, t1, group); 1.428 + 1.429 + /* rx = R^2 - H^3 - 2 * U */ 1.430 + PREFIX(square) (t0, R); 1.431 + PREFIX(subtractShort) (t0, t0, H3); 1.432 + PREFIX(subtractShort) (t0, t0, U); 1.433 + PREFIX(subtractShort) (t0, t0, U); 1.434 + group->ecfp_reduce(r->x, t0, group); 1.435 + 1.436 + /* ry = R(U - rx) - S*H3 */ 1.437 + PREFIX(subtractShort) (t1, U, r->x); 1.438 + PREFIX(multiply) (t0, t1, R); 1.439 + PREFIX(multiply) (t1, S, H3); 1.440 + PREFIX(subtractLong) (t1, t0, t1); 1.441 + group->ecfp_reduce(r->y, t1, group); 1.442 + 1.443 + CLEANUP: 1.444 + return; 1.445 +} 1.446 + 1.447 +/* Perform a point doubling in Modified Jacobian coordinates. Input and 1.448 + * output should be multi-precision floating point integers. */ 1.449 +void PREFIX(pt_dbl_jm) (const ecfp_jm_pt * p, ecfp_jm_pt * r, 1.450 + const EC_group_fp * group) { 1.451 + 1.452 + /* Temporary storage */ 1.453 + double t0[2 * ECFP_NUMDOUBLES], t1[2 * ECFP_NUMDOUBLES], 1.454 + M[2 * ECFP_NUMDOUBLES], S[2 * ECFP_NUMDOUBLES], 1.455 + U[2 * ECFP_NUMDOUBLES], T[2 * ECFP_NUMDOUBLES]; 1.456 + 1.457 + /* Check for point at infinity */ 1.458 + if (PREFIX(pt_is_inf_jm) (p) == MP_YES) { 1.459 + /* Set r = pt at infinity by setting rz = 0 */ 1.460 + PREFIX(set_pt_inf_jm) (r); 1.461 + goto CLEANUP; 1.462 + } 1.463 + 1.464 + /* M = 3 (px^2) + a*(pz^4) */ 1.465 + PREFIX(square) (t0, p->x); 1.466 + PREFIX(addLong) (M, t0, t0); 1.467 + PREFIX(addLong) (t0, t0, M); /* t0 = 3(px^2) */ 1.468 + PREFIX(addShort) (t0, t0, p->az4); 1.469 + group->ecfp_reduce(M, t0, group); 1.470 + 1.471 + /* rz = 2 * py * pz */ 1.472 + PREFIX(multiply) (t1, p->y, p->z); 1.473 + PREFIX(addLong) (t1, t1, t1); 1.474 + group->ecfp_reduce(r->z, t1, group); 1.475 + 1.476 + /* t0 = 2y^2, U = 8y^4 */ 1.477 + PREFIX(square) (t0, p->y); 1.478 + group->ecfp_reduce(t0, t0, group); 1.479 + PREFIX(addShort) (t0, t0, t0); 1.480 + PREFIX(square) (U, t0); 1.481 + group->ecfp_reduce(U, U, group); 1.482 + PREFIX(addShort) (U, U, U); 1.483 + 1.484 + /* S = 4 * px * py^2 = 2 * px * t0 */ 1.485 + PREFIX(multiply) (S, p->x, t0); 1.486 + group->ecfp_reduce(S, S, group); 1.487 + PREFIX(addShort) (S, S, S); 1.488 + 1.489 + /* rx = M^2 - 2S */ 1.490 + PREFIX(square) (T, M); 1.491 + PREFIX(subtractShort) (T, T, S); 1.492 + PREFIX(subtractShort) (T, T, S); 1.493 + group->ecfp_reduce(r->x, T, group); 1.494 + 1.495 + /* ry = M * (S - rx) - U */ 1.496 + PREFIX(subtractShort) (S, S, r->x); 1.497 + PREFIX(multiply) (t0, M, S); 1.498 + PREFIX(subtractShort) (t0, t0, U); 1.499 + group->ecfp_reduce(r->y, t0, group); 1.500 + 1.501 + /* ra*z^4 = 2*U*(apz4) */ 1.502 + PREFIX(multiply) (t1, U, p->az4); 1.503 + PREFIX(addLong) (t1, t1, t1); 1.504 + group->ecfp_reduce(r->az4, t1, group); 1.505 + 1.506 + CLEANUP: 1.507 + return; 1.508 +} 1.509 + 1.510 +/* Perform a point doubling using coordinates Affine -> Chudnovsky 1.511 + * Jacobian. Input and output should be multi-precision floating point 1.512 + * integers. */ 1.513 +void PREFIX(pt_dbl_aff2chud) (const ecfp_aff_pt * p, ecfp_chud_pt * r, 1.514 + const EC_group_fp * group) { 1.515 + double t0[2 * ECFP_NUMDOUBLES], t1[2 * ECFP_NUMDOUBLES], 1.516 + M[2 * ECFP_NUMDOUBLES], twoY2[2 * ECFP_NUMDOUBLES], 1.517 + S[2 * ECFP_NUMDOUBLES]; 1.518 + 1.519 + /* Check for point at infinity for p, if so set r = O */ 1.520 + if (PREFIX(pt_is_inf_aff) (p) == MP_YES) { 1.521 + PREFIX(set_pt_inf_chud) (r); 1.522 + goto CLEANUP; 1.523 + } 1.524 + 1.525 + /* M = 3(px)^2 + a */ 1.526 + PREFIX(square) (t0, p->x); 1.527 + PREFIX(addLong) (t1, t0, t0); 1.528 + PREFIX(addLong) (t1, t1, t0); 1.529 + PREFIX(addShort) (t1, t1, group->curvea); 1.530 + group->ecfp_reduce(M, t1, group); 1.531 + 1.532 + /* twoY2 = 2*(py)^2, S = 4(px)(py)^2 */ 1.533 + PREFIX(square) (twoY2, p->y); 1.534 + PREFIX(addLong) (twoY2, twoY2, twoY2); 1.535 + group->ecfp_reduce(twoY2, twoY2, group); 1.536 + PREFIX(multiply) (S, p->x, twoY2); 1.537 + PREFIX(addLong) (S, S, S); 1.538 + group->ecfp_reduce(S, S, group); 1.539 + 1.540 + /* rx = M^2 - 2S */ 1.541 + PREFIX(square) (t0, M); 1.542 + PREFIX(subtractShort) (t0, t0, S); 1.543 + PREFIX(subtractShort) (t0, t0, S); 1.544 + group->ecfp_reduce(r->x, t0, group); 1.545 + 1.546 + /* ry = M(S-rx) - 8y^4 */ 1.547 + PREFIX(subtractShort) (t0, S, r->x); 1.548 + PREFIX(multiply) (t1, t0, M); 1.549 + PREFIX(square) (t0, twoY2); 1.550 + PREFIX(subtractLong) (t1, t1, t0); 1.551 + PREFIX(subtractLong) (t1, t1, t0); 1.552 + group->ecfp_reduce(r->y, t1, group); 1.553 + 1.554 + /* rz = 2py */ 1.555 + PREFIX(addShort) (r->z, p->y, p->y); 1.556 + 1.557 + /* rz2 = rz^2 */ 1.558 + PREFIX(square) (t0, r->z); 1.559 + group->ecfp_reduce(r->z2, t0, group); 1.560 + 1.561 + /* rz3 = rz^3 */ 1.562 + PREFIX(multiply) (t0, r->z, r->z2); 1.563 + group->ecfp_reduce(r->z3, t0, group); 1.564 + 1.565 + CLEANUP: 1.566 + return; 1.567 +} 1.568 + 1.569 +/* Perform a point addition using coordinates: Modified Jacobian + 1.570 + * Chudnovsky Jacobian -> Modified Jacobian. Input and output should be 1.571 + * multi-precision floating point integers. */ 1.572 +void PREFIX(pt_add_jm_chud) (ecfp_jm_pt * p, ecfp_chud_pt * q, 1.573 + ecfp_jm_pt * r, const EC_group_fp * group) { 1.574 + 1.575 + double t0[2 * ECFP_NUMDOUBLES], t1[2 * ECFP_NUMDOUBLES], 1.576 + U[2 * ECFP_NUMDOUBLES], R[2 * ECFP_NUMDOUBLES], 1.577 + S[2 * ECFP_NUMDOUBLES], H[2 * ECFP_NUMDOUBLES], 1.578 + H3[2 * ECFP_NUMDOUBLES], pz2[2 * ECFP_NUMDOUBLES]; 1.579 + 1.580 + /* Check for point at infinity for p, if so set r = q need to convert 1.581 + * from Chudnovsky form to Modified Jacobian form */ 1.582 + if (PREFIX(pt_is_inf_jm) (p) == MP_YES) { 1.583 + PREFIX(copy) (r->x, q->x); 1.584 + PREFIX(copy) (r->y, q->y); 1.585 + PREFIX(copy) (r->z, q->z); 1.586 + PREFIX(square) (t0, q->z2); 1.587 + group->ecfp_reduce(t0, t0, group); 1.588 + PREFIX(multiply) (t1, t0, group->curvea); 1.589 + group->ecfp_reduce(r->az4, t1, group); 1.590 + goto CLEANUP; 1.591 + } 1.592 + /* Check for point at infinity for q, if so set r = p */ 1.593 + if (PREFIX(pt_is_inf_chud) (q) == MP_YES) { 1.594 + PREFIX(copy) (r->x, p->x); 1.595 + PREFIX(copy) (r->y, p->y); 1.596 + PREFIX(copy) (r->z, p->z); 1.597 + PREFIX(copy) (r->az4, p->az4); 1.598 + goto CLEANUP; 1.599 + } 1.600 + 1.601 + /* U = px * qz^2 */ 1.602 + PREFIX(multiply) (U, p->x, q->z2); 1.603 + group->ecfp_reduce(U, U, group); 1.604 + 1.605 + /* H = qx*(pz)^2 - U */ 1.606 + PREFIX(square) (t0, p->z); 1.607 + group->ecfp_reduce(pz2, t0, group); 1.608 + PREFIX(multiply) (H, pz2, q->x); 1.609 + group->ecfp_reduce(H, H, group); 1.610 + PREFIX(subtractShort) (H, H, U); 1.611 + 1.612 + /* U = U*H^2, H3 = H^3 */ 1.613 + PREFIX(square) (t0, H); 1.614 + group->ecfp_reduce(t0, t0, group); 1.615 + PREFIX(multiply) (t1, U, t0); 1.616 + group->ecfp_reduce(U, t1, group); 1.617 + PREFIX(multiply) (H3, t0, H); 1.618 + group->ecfp_reduce(H3, H3, group); 1.619 + 1.620 + /* S = py * qz^3 */ 1.621 + PREFIX(multiply) (S, p->y, q->z3); 1.622 + group->ecfp_reduce(S, S, group); 1.623 + 1.624 + /* R = (qy * z1^3 - s) */ 1.625 + PREFIX(multiply) (t0, pz2, p->z); 1.626 + group->ecfp_reduce(t0, t0, group); 1.627 + PREFIX(multiply) (R, t0, q->y); 1.628 + PREFIX(subtractShort) (R, R, S); 1.629 + group->ecfp_reduce(R, R, group); 1.630 + 1.631 + /* rz = pz * qz * H */ 1.632 + PREFIX(multiply) (t1, q->z, H); 1.633 + group->ecfp_reduce(t1, t1, group); 1.634 + PREFIX(multiply) (t0, p->z, t1); 1.635 + group->ecfp_reduce(r->z, t0, group); 1.636 + 1.637 + /* rx = R^2 - H^3 - 2 * U */ 1.638 + PREFIX(square) (t0, R); 1.639 + PREFIX(subtractShort) (t0, t0, H3); 1.640 + PREFIX(subtractShort) (t0, t0, U); 1.641 + PREFIX(subtractShort) (t0, t0, U); 1.642 + group->ecfp_reduce(r->x, t0, group); 1.643 + 1.644 + /* ry = R(U - rx) - S*H3 */ 1.645 + PREFIX(subtractShort) (t1, U, r->x); 1.646 + PREFIX(multiply) (t0, t1, R); 1.647 + PREFIX(multiply) (t1, S, H3); 1.648 + PREFIX(subtractLong) (t1, t0, t1); 1.649 + group->ecfp_reduce(r->y, t1, group); 1.650 + 1.651 + if (group->aIsM3) { /* a == -3 */ 1.652 + /* a(rz^4) = -3 * ((rz^2)^2) */ 1.653 + PREFIX(square) (t0, r->z); 1.654 + group->ecfp_reduce(t0, t0, group); 1.655 + PREFIX(square) (t1, t0); 1.656 + PREFIX(addLong) (t0, t1, t1); 1.657 + PREFIX(addLong) (t0, t0, t1); 1.658 + PREFIX(negLong) (t0, t0); 1.659 + group->ecfp_reduce(r->az4, t0, group); 1.660 + } else { /* Generic case */ 1.661 + /* a(rz^4) = a * ((rz^2)^2) */ 1.662 + PREFIX(square) (t0, r->z); 1.663 + group->ecfp_reduce(t0, t0, group); 1.664 + PREFIX(square) (t1, t0); 1.665 + group->ecfp_reduce(t1, t1, group); 1.666 + PREFIX(multiply) (t0, group->curvea, t1); 1.667 + group->ecfp_reduce(r->az4, t0, group); 1.668 + } 1.669 + CLEANUP: 1.670 + return; 1.671 +} 1.672 + 1.673 +/* Perform a point addition using Chudnovsky Jacobian coordinates. Input 1.674 + * and output should be multi-precision floating point integers. */ 1.675 +void PREFIX(pt_add_chud) (const ecfp_chud_pt * p, const ecfp_chud_pt * q, 1.676 + ecfp_chud_pt * r, const EC_group_fp * group) { 1.677 + 1.678 + /* Temporary Storage */ 1.679 + double t0[2 * ECFP_NUMDOUBLES], t1[2 * ECFP_NUMDOUBLES], 1.680 + U[2 * ECFP_NUMDOUBLES], R[2 * ECFP_NUMDOUBLES], 1.681 + S[2 * ECFP_NUMDOUBLES], H[2 * ECFP_NUMDOUBLES], 1.682 + H3[2 * ECFP_NUMDOUBLES]; 1.683 + 1.684 + /* Check for point at infinity for p, if so set r = q */ 1.685 + if (PREFIX(pt_is_inf_chud) (p) == MP_YES) { 1.686 + PREFIX(copy) (r->x, q->x); 1.687 + PREFIX(copy) (r->y, q->y); 1.688 + PREFIX(copy) (r->z, q->z); 1.689 + PREFIX(copy) (r->z2, q->z2); 1.690 + PREFIX(copy) (r->z3, q->z3); 1.691 + goto CLEANUP; 1.692 + } 1.693 + 1.694 + /* Check for point at infinity for p, if so set r = q */ 1.695 + if (PREFIX(pt_is_inf_chud) (q) == MP_YES) { 1.696 + PREFIX(copy) (r->x, p->x); 1.697 + PREFIX(copy) (r->y, p->y); 1.698 + PREFIX(copy) (r->z, p->z); 1.699 + PREFIX(copy) (r->z2, p->z2); 1.700 + PREFIX(copy) (r->z3, p->z3); 1.701 + goto CLEANUP; 1.702 + } 1.703 + 1.704 + /* U = px * qz^2 */ 1.705 + PREFIX(multiply) (U, p->x, q->z2); 1.706 + group->ecfp_reduce(U, U, group); 1.707 + 1.708 + /* H = qx*(pz)^2 - U */ 1.709 + PREFIX(multiply) (H, q->x, p->z2); 1.710 + PREFIX(subtractShort) (H, H, U); 1.711 + group->ecfp_reduce(H, H, group); 1.712 + 1.713 + /* U = U*H^2, H3 = H^3 */ 1.714 + PREFIX(square) (t0, H); 1.715 + group->ecfp_reduce(t0, t0, group); 1.716 + PREFIX(multiply) (t1, U, t0); 1.717 + group->ecfp_reduce(U, t1, group); 1.718 + PREFIX(multiply) (H3, t0, H); 1.719 + group->ecfp_reduce(H3, H3, group); 1.720 + 1.721 + /* S = py * qz^3 */ 1.722 + PREFIX(multiply) (S, p->y, q->z3); 1.723 + group->ecfp_reduce(S, S, group); 1.724 + 1.725 + /* rz = pz * qz * H */ 1.726 + PREFIX(multiply) (t0, q->z, H); 1.727 + group->ecfp_reduce(t0, t0, group); 1.728 + PREFIX(multiply) (t1, t0, p->z); 1.729 + group->ecfp_reduce(r->z, t1, group); 1.730 + 1.731 + /* R = (qy * z1^3 - s) */ 1.732 + PREFIX(multiply) (t0, q->y, p->z3); 1.733 + PREFIX(subtractShort) (t0, t0, S); 1.734 + group->ecfp_reduce(R, t0, group); 1.735 + 1.736 + /* rx = R^2 - H^3 - 2 * U */ 1.737 + PREFIX(square) (t0, R); 1.738 + PREFIX(subtractShort) (t0, t0, H3); 1.739 + PREFIX(subtractShort) (t0, t0, U); 1.740 + PREFIX(subtractShort) (t0, t0, U); 1.741 + group->ecfp_reduce(r->x, t0, group); 1.742 + 1.743 + /* ry = R(U - rx) - S*H3 */ 1.744 + PREFIX(subtractShort) (t1, U, r->x); 1.745 + PREFIX(multiply) (t0, t1, R); 1.746 + PREFIX(multiply) (t1, S, H3); 1.747 + PREFIX(subtractLong) (t1, t0, t1); 1.748 + group->ecfp_reduce(r->y, t1, group); 1.749 + 1.750 + /* rz2 = rz^2 */ 1.751 + PREFIX(square) (t0, r->z); 1.752 + group->ecfp_reduce(r->z2, t0, group); 1.753 + 1.754 + /* rz3 = rz^3 */ 1.755 + PREFIX(multiply) (t0, r->z, r->z2); 1.756 + group->ecfp_reduce(r->z3, t0, group); 1.757 + 1.758 + CLEANUP: 1.759 + return; 1.760 +} 1.761 + 1.762 +/* Expects out to be an array of size 16 of Chudnovsky Jacobian points. 1.763 + * Fills in Chudnovsky Jacobian form (x, y, z, z^2, z^3), for -15P, -13P, 1.764 + * -11P, -9P, -7P, -5P, -3P, -P, P, 3P, 5P, 7P, 9P, 11P, 13P, 15P */ 1.765 +void PREFIX(precompute_chud) (ecfp_chud_pt * out, const ecfp_aff_pt * p, 1.766 + const EC_group_fp * group) { 1.767 + 1.768 + ecfp_chud_pt p2; 1.769 + 1.770 + /* Set out[8] = P */ 1.771 + PREFIX(copy) (out[8].x, p->x); 1.772 + PREFIX(copy) (out[8].y, p->y); 1.773 + PREFIX(one) (out[8].z); 1.774 + PREFIX(one) (out[8].z2); 1.775 + PREFIX(one) (out[8].z3); 1.776 + 1.777 + /* Set p2 = 2P */ 1.778 + PREFIX(pt_dbl_aff2chud) (p, &p2, group); 1.779 + 1.780 + /* Set 3P, 5P, ..., 15P */ 1.781 + PREFIX(pt_add_chud) (&out[8], &p2, &out[9], group); 1.782 + PREFIX(pt_add_chud) (&out[9], &p2, &out[10], group); 1.783 + PREFIX(pt_add_chud) (&out[10], &p2, &out[11], group); 1.784 + PREFIX(pt_add_chud) (&out[11], &p2, &out[12], group); 1.785 + PREFIX(pt_add_chud) (&out[12], &p2, &out[13], group); 1.786 + PREFIX(pt_add_chud) (&out[13], &p2, &out[14], group); 1.787 + PREFIX(pt_add_chud) (&out[14], &p2, &out[15], group); 1.788 + 1.789 + /* Set -15P, -13P, ..., -P */ 1.790 + PREFIX(pt_neg_chud) (&out[8], &out[7]); 1.791 + PREFIX(pt_neg_chud) (&out[9], &out[6]); 1.792 + PREFIX(pt_neg_chud) (&out[10], &out[5]); 1.793 + PREFIX(pt_neg_chud) (&out[11], &out[4]); 1.794 + PREFIX(pt_neg_chud) (&out[12], &out[3]); 1.795 + PREFIX(pt_neg_chud) (&out[13], &out[2]); 1.796 + PREFIX(pt_neg_chud) (&out[14], &out[1]); 1.797 + PREFIX(pt_neg_chud) (&out[15], &out[0]); 1.798 +} 1.799 + 1.800 +/* Expects out to be an array of size 16 of Jacobian points. Fills in 1.801 + * Jacobian form (x, y, z), for O, P, 2P, ... 15P */ 1.802 +void PREFIX(precompute_jac) (ecfp_jac_pt * precomp, const ecfp_aff_pt * p, 1.803 + const EC_group_fp * group) { 1.804 + int i; 1.805 + 1.806 + /* fill precomputation table */ 1.807 + /* set precomp[0] */ 1.808 + PREFIX(set_pt_inf_jac) (&precomp[0]); 1.809 + /* set precomp[1] */ 1.810 + PREFIX(copy) (precomp[1].x, p->x); 1.811 + PREFIX(copy) (precomp[1].y, p->y); 1.812 + if (PREFIX(pt_is_inf_aff) (p) == MP_YES) { 1.813 + PREFIX(zero) (precomp[1].z); 1.814 + } else { 1.815 + PREFIX(one) (precomp[1].z); 1.816 + } 1.817 + /* set precomp[2] */ 1.818 + group->pt_dbl_jac(&precomp[1], &precomp[2], group); 1.819 + 1.820 + /* set rest of precomp */ 1.821 + for (i = 3; i < 16; i++) { 1.822 + group->pt_add_jac_aff(&precomp[i - 1], p, &precomp[i], group); 1.823 + } 1.824 +}