michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0:  * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0:  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0: 
michael@0: #include "ecp.h"
michael@0: #include "mpi.h"
michael@0: #include "mplogic.h"
michael@0: #include "mpi-priv.h"
michael@0: 
michael@0: /* Fast modular reduction for p384 = 2^384 - 2^128 - 2^96 + 2^32 - 1.  a can be r. 
michael@0:  * Uses algorithm 2.30 from Hankerson, Menezes, Vanstone. Guide to 
michael@0:  * Elliptic Curve Cryptography. */
michael@0: static mp_err
michael@0: ec_GFp_nistp384_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
michael@0: {
michael@0: 	mp_err res = MP_OKAY;
michael@0: 	int a_bits = mpl_significant_bits(a);
michael@0: 	int i;
michael@0: 
michael@0: 	/* m1, m2 are statically-allocated mp_int of exactly the size we need */
michael@0: 	mp_int m[10];
michael@0: 
michael@0: #ifdef ECL_THIRTY_TWO_BIT
michael@0: 	mp_digit s[10][12];
michael@0: 	for (i = 0; i < 10; i++) {
michael@0: 		MP_SIGN(&m[i]) = MP_ZPOS;
michael@0: 		MP_ALLOC(&m[i]) = 12;
michael@0: 		MP_USED(&m[i]) = 12;
michael@0: 		MP_DIGITS(&m[i]) = s[i];
michael@0: 	}
michael@0: #else
michael@0: 	mp_digit s[10][6];
michael@0: 	for (i = 0; i < 10; i++) {
michael@0: 		MP_SIGN(&m[i]) = MP_ZPOS;
michael@0: 		MP_ALLOC(&m[i]) = 6;
michael@0: 		MP_USED(&m[i]) = 6;
michael@0: 		MP_DIGITS(&m[i]) = s[i];
michael@0: 	}
michael@0: #endif
michael@0: 
michael@0: #ifdef ECL_THIRTY_TWO_BIT
michael@0: 	/* for polynomials larger than twice the field size or polynomials 
michael@0: 	 * not using all words, use regular reduction */
michael@0: 	if ((a_bits > 768) || (a_bits <= 736)) {
michael@0: 		MP_CHECKOK(mp_mod(a, &meth->irr, r));
michael@0: 	} else {
michael@0: 		for (i = 0; i < 12; i++) {
michael@0: 			s[0][i] = MP_DIGIT(a, i);
michael@0: 		}
michael@0: 		s[1][0] = 0;
michael@0: 		s[1][1] = 0;
michael@0: 		s[1][2] = 0;
michael@0: 		s[1][3] = 0;
michael@0: 		s[1][4] = MP_DIGIT(a, 21);
michael@0: 		s[1][5] = MP_DIGIT(a, 22);
michael@0: 		s[1][6] = MP_DIGIT(a, 23);
michael@0: 		s[1][7] = 0;
michael@0: 		s[1][8] = 0;
michael@0: 		s[1][9] = 0;
michael@0: 		s[1][10] = 0;
michael@0: 		s[1][11] = 0;
michael@0: 		for (i = 0; i < 12; i++) {
michael@0: 			s[2][i] = MP_DIGIT(a, i+12);
michael@0: 		}
michael@0: 		s[3][0] = MP_DIGIT(a, 21);
michael@0: 		s[3][1] = MP_DIGIT(a, 22);
michael@0: 		s[3][2] = MP_DIGIT(a, 23);
michael@0: 		for (i = 3; i < 12; i++) {
michael@0: 			s[3][i] = MP_DIGIT(a, i+9);
michael@0: 		}
michael@0: 		s[4][0] = 0;
michael@0: 		s[4][1] = MP_DIGIT(a, 23);
michael@0: 		s[4][2] = 0;
michael@0: 		s[4][3] = MP_DIGIT(a, 20);
michael@0: 		for (i = 4; i < 12; i++) {
michael@0: 			s[4][i] = MP_DIGIT(a, i+8);
michael@0: 		}
michael@0: 		s[5][0] = 0;
michael@0: 		s[5][1] = 0;
michael@0: 		s[5][2] = 0;
michael@0: 		s[5][3] = 0;
michael@0: 		s[5][4] = MP_DIGIT(a, 20);
michael@0: 		s[5][5] = MP_DIGIT(a, 21);
michael@0: 		s[5][6] = MP_DIGIT(a, 22);
michael@0: 		s[5][7] = MP_DIGIT(a, 23);
michael@0: 		s[5][8] = 0;
michael@0: 		s[5][9] = 0;
michael@0: 		s[5][10] = 0;
michael@0: 		s[5][11] = 0;
michael@0: 		s[6][0] = MP_DIGIT(a, 20);
michael@0: 		s[6][1] = 0;
michael@0: 		s[6][2] = 0;
michael@0: 		s[6][3] = MP_DIGIT(a, 21);
michael@0: 		s[6][4] = MP_DIGIT(a, 22);
michael@0: 		s[6][5] = MP_DIGIT(a, 23);
michael@0: 		s[6][6] = 0;
michael@0: 		s[6][7] = 0;
michael@0: 		s[6][8] = 0;
michael@0: 		s[6][9] = 0;
michael@0: 		s[6][10] = 0;
michael@0: 		s[6][11] = 0;
michael@0: 		s[7][0] = MP_DIGIT(a, 23);
michael@0: 		for (i = 1; i < 12; i++) {
michael@0: 			s[7][i] = MP_DIGIT(a, i+11);
michael@0: 		}
michael@0: 		s[8][0] = 0;
michael@0: 		s[8][1] = MP_DIGIT(a, 20);
michael@0: 		s[8][2] = MP_DIGIT(a, 21);
michael@0: 		s[8][3] = MP_DIGIT(a, 22);
michael@0: 		s[8][4] = MP_DIGIT(a, 23);
michael@0: 		s[8][5] = 0;
michael@0: 		s[8][6] = 0;
michael@0: 		s[8][7] = 0;
michael@0: 		s[8][8] = 0;
michael@0: 		s[8][9] = 0;
michael@0: 		s[8][10] = 0;
michael@0: 		s[8][11] = 0;
michael@0: 		s[9][0] = 0;
michael@0: 		s[9][1] = 0;
michael@0: 		s[9][2] = 0;
michael@0: 		s[9][3] = MP_DIGIT(a, 23);
michael@0: 		s[9][4] = MP_DIGIT(a, 23);
michael@0: 		s[9][5] = 0;
michael@0: 		s[9][6] = 0;
michael@0: 		s[9][7] = 0;
michael@0: 		s[9][8] = 0;
michael@0: 		s[9][9] = 0;
michael@0: 		s[9][10] = 0;
michael@0: 		s[9][11] = 0;
michael@0: 
michael@0: 		MP_CHECKOK(mp_add(&m[0], &m[1], r));
michael@0: 		MP_CHECKOK(mp_add(r, &m[1], r));
michael@0: 		MP_CHECKOK(mp_add(r, &m[2], r));
michael@0: 		MP_CHECKOK(mp_add(r, &m[3], r));
michael@0: 		MP_CHECKOK(mp_add(r, &m[4], r));
michael@0: 		MP_CHECKOK(mp_add(r, &m[5], r));
michael@0: 		MP_CHECKOK(mp_add(r, &m[6], r));
michael@0: 		MP_CHECKOK(mp_sub(r, &m[7], r));
michael@0: 		MP_CHECKOK(mp_sub(r, &m[8], r));
michael@0: 		MP_CHECKOK(mp_submod(r, &m[9], &meth->irr, r));
michael@0: 		s_mp_clamp(r);
michael@0: 	}
michael@0: #else
michael@0: 	/* for polynomials larger than twice the field size or polynomials 
michael@0: 	 * not using all words, use regular reduction */
michael@0: 	if ((a_bits > 768) || (a_bits <= 736)) {
michael@0: 		MP_CHECKOK(mp_mod(a, &meth->irr, r));
michael@0: 	} else {
michael@0: 		for (i = 0; i < 6; i++) {
michael@0: 			s[0][i] = MP_DIGIT(a, i);
michael@0: 		}
michael@0: 		s[1][0] = 0;
michael@0: 		s[1][1] = 0;
michael@0: 		s[1][2] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32);
michael@0: 		s[1][3] = MP_DIGIT(a, 11) >> 32;
michael@0: 		s[1][4] = 0;
michael@0: 		s[1][5] = 0;
michael@0: 		for (i = 0; i < 6; i++) {
michael@0: 			s[2][i] = MP_DIGIT(a, i+6);
michael@0: 		}
michael@0: 		s[3][0] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32);
michael@0: 		s[3][1] = (MP_DIGIT(a, 11) >> 32) | (MP_DIGIT(a, 6) << 32);
michael@0: 		for (i = 2; i < 6; i++) {
michael@0: 			s[3][i] = (MP_DIGIT(a, i+4) >> 32) | (MP_DIGIT(a, i+5) << 32);
michael@0: 		}
michael@0: 		s[4][0] = (MP_DIGIT(a, 11) >> 32) << 32;
michael@0: 		s[4][1] = MP_DIGIT(a, 10) << 32;
michael@0: 		for (i = 2; i < 6; i++) {
michael@0: 			s[4][i] = MP_DIGIT(a, i+4);
michael@0: 		}
michael@0: 		s[5][0] = 0;
michael@0: 		s[5][1] = 0;
michael@0: 		s[5][2] = MP_DIGIT(a, 10);
michael@0: 		s[5][3] = MP_DIGIT(a, 11);
michael@0: 		s[5][4] = 0;
michael@0: 		s[5][5] = 0;
michael@0: 		s[6][0] = (MP_DIGIT(a, 10) << 32) >> 32;
michael@0: 		s[6][1] = (MP_DIGIT(a, 10) >> 32) << 32;
michael@0: 		s[6][2] = MP_DIGIT(a, 11);
michael@0: 		s[6][3] = 0;
michael@0: 		s[6][4] = 0;
michael@0: 		s[6][5] = 0;
michael@0: 		s[7][0] = (MP_DIGIT(a, 11) >> 32) | (MP_DIGIT(a, 6) << 32);
michael@0: 		for (i = 1; i < 6; i++) {
michael@0: 			s[7][i] = (MP_DIGIT(a, i+5) >> 32) | (MP_DIGIT(a, i+6) << 32);
michael@0: 		}
michael@0: 		s[8][0] = MP_DIGIT(a, 10) << 32;
michael@0: 		s[8][1] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32);
michael@0: 		s[8][2] = MP_DIGIT(a, 11) >> 32;
michael@0: 		s[8][3] = 0;
michael@0: 		s[8][4] = 0;
michael@0: 		s[8][5] = 0;
michael@0: 		s[9][0] = 0;
michael@0: 		s[9][1] = (MP_DIGIT(a, 11) >> 32) << 32;
michael@0: 		s[9][2] = MP_DIGIT(a, 11) >> 32;
michael@0: 		s[9][3] = 0;
michael@0: 		s[9][4] = 0;
michael@0: 		s[9][5] = 0;
michael@0: 
michael@0: 		MP_CHECKOK(mp_add(&m[0], &m[1], r));
michael@0: 		MP_CHECKOK(mp_add(r, &m[1], r));
michael@0: 		MP_CHECKOK(mp_add(r, &m[2], r));
michael@0: 		MP_CHECKOK(mp_add(r, &m[3], r));
michael@0: 		MP_CHECKOK(mp_add(r, &m[4], r));
michael@0: 		MP_CHECKOK(mp_add(r, &m[5], r));
michael@0: 		MP_CHECKOK(mp_add(r, &m[6], r));
michael@0: 		MP_CHECKOK(mp_sub(r, &m[7], r));
michael@0: 		MP_CHECKOK(mp_sub(r, &m[8], r));
michael@0: 		MP_CHECKOK(mp_submod(r, &m[9], &meth->irr, r));
michael@0: 		s_mp_clamp(r);
michael@0: 	}
michael@0: #endif
michael@0: 
michael@0:   CLEANUP:
michael@0: 	return res;
michael@0: }
michael@0: 
michael@0: /* Compute the square of polynomial a, reduce modulo p384. Store the
michael@0:  * result in r.  r could be a.  Uses optimized modular reduction for p384. 
michael@0:  */
michael@0: static mp_err
michael@0: ec_GFp_nistp384_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
michael@0: {
michael@0: 	mp_err res = MP_OKAY;
michael@0: 
michael@0: 	MP_CHECKOK(mp_sqr(a, r));
michael@0: 	MP_CHECKOK(ec_GFp_nistp384_mod(r, r, meth));
michael@0:   CLEANUP:
michael@0: 	return res;
michael@0: }
michael@0: 
michael@0: /* Compute the product of two polynomials a and b, reduce modulo p384.
michael@0:  * Store the result in r.  r could be a or b; a could be b.  Uses
michael@0:  * optimized modular reduction for p384. */
michael@0: static mp_err
michael@0: ec_GFp_nistp384_mul(const mp_int *a, const mp_int *b, mp_int *r,
michael@0: 					const GFMethod *meth)
michael@0: {
michael@0: 	mp_err res = MP_OKAY;
michael@0: 
michael@0: 	MP_CHECKOK(mp_mul(a, b, r));
michael@0: 	MP_CHECKOK(ec_GFp_nistp384_mod(r, r, meth));
michael@0:   CLEANUP:
michael@0: 	return res;
michael@0: }
michael@0: 
michael@0: /* Wire in fast field arithmetic and precomputation of base point for
michael@0:  * named curves. */
michael@0: mp_err
michael@0: ec_group_set_gfp384(ECGroup *group, ECCurveName name)
michael@0: {
michael@0: 	if (name == ECCurve_NIST_P384) {
michael@0: 		group->meth->field_mod = &ec_GFp_nistp384_mod;
michael@0: 		group->meth->field_mul = &ec_GFp_nistp384_mul;
michael@0: 		group->meth->field_sqr = &ec_GFp_nistp384_sqr;
michael@0: 	}
michael@0: 	return MP_OKAY;
michael@0: }