security/nss/lib/freebl/ecl/ecp_192.c

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* This Source Code Form is subject to the terms of the Mozilla Public
     2  * License, v. 2.0. If a copy of the MPL was not distributed with this
     3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     5 #include "ecp.h"
     6 #include "mpi.h"
     7 #include "mplogic.h"
     8 #include "mpi-priv.h"
    10 #define ECP192_DIGITS ECL_CURVE_DIGITS(192)
    12 /* Fast modular reduction for p192 = 2^192 - 2^64 - 1.  a can be r. Uses
    13  * algorithm 7 from Brown, Hankerson, Lopez, Menezes. Software
    14  * Implementation of the NIST Elliptic Curves over Prime Fields. */
    15 static mp_err
    16 ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
    17 {
    18 	mp_err res = MP_OKAY;
    19 	mp_size a_used = MP_USED(a);
    20 	mp_digit r3;
    21 #ifndef MPI_AMD64_ADD 
    22 	mp_digit carry;
    23 #endif
    24 #ifdef ECL_THIRTY_TWO_BIT
    25 	mp_digit a5a = 0, a5b = 0, a4a = 0, a4b = 0, a3a = 0, a3b = 0;
    26         mp_digit r0a, r0b, r1a, r1b, r2a, r2b;
    27 #else
    28 	mp_digit a5 = 0, a4 = 0, a3 = 0;
    29         mp_digit r0, r1, r2;
    30 #endif
    32 	/* reduction not needed if a is not larger than field size */
    33 	if (a_used < ECP192_DIGITS) {
    34 		if (a == r) {
    35 			return MP_OKAY;
    36 		}
    37 		return mp_copy(a, r);
    38 	}
    40 	/* for polynomials larger than twice the field size, use regular
    41 	 * reduction */
    42 	if (a_used > ECP192_DIGITS*2) {
    43 		MP_CHECKOK(mp_mod(a, &meth->irr, r));
    44 	} else {
    45 		/* copy out upper words of a */
    47 #ifdef ECL_THIRTY_TWO_BIT
    49 		/* in all the math below,
    50 		 * nXb is most signifiant, nXa is least significant */
    51 		switch (a_used) {
    52 		case 12:
    53 			a5b = MP_DIGIT(a, 11);
    54 		case 11:
    55 			a5a = MP_DIGIT(a, 10);
    56 		case 10:
    57 			a4b = MP_DIGIT(a, 9);
    58 		case 9:
    59 			a4a = MP_DIGIT(a, 8);
    60 		case 8:
    61 			a3b = MP_DIGIT(a, 7);
    62 		case 7:
    63 			a3a = MP_DIGIT(a, 6);
    64 		}
    67                 r2b= MP_DIGIT(a, 5);
    68                 r2a= MP_DIGIT(a, 4);
    69                 r1b = MP_DIGIT(a, 3);
    70                 r1a = MP_DIGIT(a, 2);
    71                 r0b = MP_DIGIT(a, 1);
    72                 r0a = MP_DIGIT(a, 0);
    74 		/* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
    75 		MP_ADD_CARRY(r0a, a3a, r0a, 0,    carry);
    76 		MP_ADD_CARRY(r0b, a3b, r0b, carry, carry);
    77 		MP_ADD_CARRY(r1a, a3a, r1a, carry, carry);
    78 		MP_ADD_CARRY(r1b, a3b, r1b, carry, carry);
    79 		MP_ADD_CARRY(r2a, a4a, r2a, carry, carry);
    80 		MP_ADD_CARRY(r2b, a4b, r2b, carry, carry);
    81 		r3 = carry; carry = 0;
    82 		MP_ADD_CARRY(r0a, a5a, r0a, 0,     carry);
    83 		MP_ADD_CARRY(r0b, a5b, r0b, carry, carry);
    84 		MP_ADD_CARRY(r1a, a5a, r1a, carry, carry);
    85 		MP_ADD_CARRY(r1b, a5b, r1b, carry, carry);
    86 		MP_ADD_CARRY(r2a, a5a, r2a, carry, carry);
    87 		MP_ADD_CARRY(r2b, a5b, r2b, carry, carry);
    88 		r3 += carry; 
    89 		MP_ADD_CARRY(r1a, a4a, r1a, 0,     carry);
    90 		MP_ADD_CARRY(r1b, a4b, r1b, carry, carry);
    91 		MP_ADD_CARRY(r2a,   0, r2a, carry, carry);
    92 		MP_ADD_CARRY(r2b,   0, r2b, carry, carry);
    93 		r3 += carry;
    95 		/* reduce out the carry */
    96 		while (r3) {
    97 			MP_ADD_CARRY(r0a, r3, r0a, 0,     carry);
    98 			MP_ADD_CARRY(r0b,  0, r0b, carry, carry);
    99 			MP_ADD_CARRY(r1a, r3, r1a, carry, carry);
   100 			MP_ADD_CARRY(r1b,  0, r1b, carry, carry);
   101 			MP_ADD_CARRY(r2a,  0, r2a, carry, carry);
   102 			MP_ADD_CARRY(r2b,  0, r2b, carry, carry);
   103 			r3 = carry;
   104 		}
   106 		/* check for final reduction */
   107 		/*
   108 		 * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
   109 		 * 0xffffffffffffffff. That means we can only be over and need
   110 		 * one more reduction 
   111 		 *  if r2 == 0xffffffffffffffffff (same as r2+1 == 0) 
   112 		 *     and
   113 		 *     r1 == 0xffffffffffffffffff   or
   114 		 *     r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
   115 		 * In all cases, we subtract the field (or add the 2's 
   116 		 * complement value (1,1,0)).  (r0, r1, r2)
   117 		 */
   118 		if (((r2b == 0xffffffff) && (r2a == 0xffffffff) 
   119 			&& (r1b == 0xffffffff) ) &&
   120 			   ((r1a == 0xffffffff) || 
   121 			    (r1a == 0xfffffffe) && (r0a == 0xffffffff) &&
   122 					(r0b == 0xffffffff)) ) {
   123 			/* do a quick subtract */
   124 			MP_ADD_CARRY(r0a, 1, r0a, 0, carry);
   125 			MP_ADD_CARRY(r0b, carry, r0a, 0, carry);
   126 			r1a += 1+carry;
   127 			r1b = r2a = r2b = 0;
   128 		}
   130 		/* set the lower words of r */
   131 		if (a != r) {
   132 			MP_CHECKOK(s_mp_pad(r, 6));
   133 		}
   134 		MP_DIGIT(r, 5) = r2b;
   135 		MP_DIGIT(r, 4) = r2a;
   136 		MP_DIGIT(r, 3) = r1b;
   137 		MP_DIGIT(r, 2) = r1a;
   138 		MP_DIGIT(r, 1) = r0b;
   139 		MP_DIGIT(r, 0) = r0a;
   140 		MP_USED(r) = 6;
   141 #else
   142 		switch (a_used) {
   143 		case 6:
   144 			a5 = MP_DIGIT(a, 5);
   145 		case 5:
   146 			a4 = MP_DIGIT(a, 4);
   147 		case 4:
   148 			a3 = MP_DIGIT(a, 3);
   149 		}
   151                 r2 = MP_DIGIT(a, 2);
   152                 r1 = MP_DIGIT(a, 1);
   153                 r0 = MP_DIGIT(a, 0);
   155 		/* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
   156 #ifndef MPI_AMD64_ADD 
   157 		MP_ADD_CARRY(r0, a3, r0, 0,     carry);
   158 		MP_ADD_CARRY(r1, a3, r1, carry, carry);
   159 		MP_ADD_CARRY(r2, a4, r2, carry, carry);
   160 		r3 = carry; 
   161 		MP_ADD_CARRY(r0, a5, r0, 0,     carry);
   162 		MP_ADD_CARRY(r1, a5, r1, carry, carry);
   163 		MP_ADD_CARRY(r2, a5, r2, carry, carry);
   164 		r3 += carry; 
   165 		MP_ADD_CARRY(r1, a4, r1, 0,     carry);
   166 		MP_ADD_CARRY(r2,  0, r2, carry, carry);
   167 		r3 += carry;
   169 #else 
   170                 r2 = MP_DIGIT(a, 2);
   171                 r1 = MP_DIGIT(a, 1);
   172                 r0 = MP_DIGIT(a, 0);
   174                 /* set the lower words of r */
   175                 __asm__ (
   176                 "xorq   %3,%3           \n\t"
   177                 "addq   %4,%0           \n\t"
   178                 "adcq   %4,%1           \n\t"
   179                 "adcq   %5,%2           \n\t"
   180                 "adcq   $0,%3           \n\t"
   181                 "addq   %6,%0           \n\t"
   182                 "adcq   %6,%1           \n\t"
   183                 "adcq   %6,%2           \n\t"
   184                 "adcq   $0,%3           \n\t"
   185                 "addq   %5,%1           \n\t"
   186                 "adcq   $0,%2           \n\t"
   187                 "adcq   $0,%3           \n\t"
   188                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3), 
   189 		  "=r"(a4), "=r"(a5)
   190                 : "0" (r0), "1" (r1), "2" (r2), "3" (r3), 
   191 		  "4" (a3), "5" (a4), "6"(a5)
   192                 : "%cc" );
   193 #endif 
   195 		/* reduce out the carry */
   196 		while (r3) {
   197 #ifndef MPI_AMD64_ADD
   198 			MP_ADD_CARRY(r0, r3, r0, 0,     carry);
   199 			MP_ADD_CARRY(r1, r3, r1, carry, carry);
   200 			MP_ADD_CARRY(r2,  0, r2, carry, carry);
   201 			r3 = carry;
   202 #else
   203 			a3=r3;
   204               		__asm__ (
   205                 	"xorq   %3,%3           \n\t"
   206                 	"addq   %4,%0           \n\t"
   207                 	"adcq   %4,%1           \n\t"
   208                 	"adcq   $0,%2           \n\t"
   209                 	"adcq   $0,%3           \n\t"
   210                 	: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3)
   211                 	: "0" (r0), "1" (r1), "2" (r2), "3" (r3), "4"(a3)
   212                 	: "%cc" );
   213 #endif
   214 		}
   216 		/* check for final reduction */
   217 		/*
   218 		 * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
   219 		 * 0xffffffffffffffff. That means we can only be over and need
   220 		 * one more reduction 
   221 		 *  if r2 == 0xffffffffffffffffff (same as r2+1 == 0) 
   222 		 *     and
   223 		 *     r1 == 0xffffffffffffffffff   or
   224 		 *     r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
   225 		 * In all cases, we subtract the field (or add the 2's 
   226 		 * complement value (1,1,0)).  (r0, r1, r2)
   227 		 */
   228 		if (r3 || ((r2 == MP_DIGIT_MAX) &&
   229 		      ((r1 == MP_DIGIT_MAX) || 
   230 			((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
   231 			/* do a quick subtract */
   232 			MP_ADD_CARRY(r0, 1, r0, 0, carry);
   233 			r1 += 1+carry;
   234 			r2 = 0;
   235 		}
   236 		/* set the lower words of r */
   237 		if (a != r) {
   238 			MP_CHECKOK(s_mp_pad(r, 3));
   239 		}
   240 		MP_DIGIT(r, 2) = r2;
   241 		MP_DIGIT(r, 1) = r1;
   242 		MP_DIGIT(r, 0) = r0;
   243 		MP_USED(r) = 3;
   244 #endif
   245 	}
   246 	s_mp_clamp(r);
   247   CLEANUP:
   248 	return res;
   249 }
   251 #ifndef ECL_THIRTY_TWO_BIT
   252 /* Compute the sum of 192 bit curves. Do the work in-line since the
   253  * number of words are so small, we don't want to overhead of mp function
   254  * calls.  Uses optimized modular reduction for p192. 
   255  */
   256 static mp_err
   257 ec_GFp_nistp192_add(const mp_int *a, const mp_int *b, mp_int *r, 
   258 			const GFMethod *meth)
   259 {
   260 	mp_err res = MP_OKAY;
   261 	mp_digit a0 = 0, a1 = 0, a2 = 0;
   262 	mp_digit r0 = 0, r1 = 0, r2 = 0;
   263 	mp_digit carry;
   265 	switch(MP_USED(a)) {
   266 	case 3:
   267 		a2 = MP_DIGIT(a,2);
   268 	case 2:
   269 		a1 = MP_DIGIT(a,1);
   270 	case 1:
   271 		a0 = MP_DIGIT(a,0);
   272 	}
   273 	switch(MP_USED(b)) {
   274 	case 3:
   275 		r2 = MP_DIGIT(b,2);
   276 	case 2:
   277 		r1 = MP_DIGIT(b,1);
   278 	case 1:
   279 		r0 = MP_DIGIT(b,0);
   280 	}
   282 #ifndef MPI_AMD64_ADD
   283 	MP_ADD_CARRY(a0, r0, r0, 0,     carry);
   284 	MP_ADD_CARRY(a1, r1, r1, carry, carry);
   285 	MP_ADD_CARRY(a2, r2, r2, carry, carry);
   286 #else
   287 	__asm__ (
   288                 "xorq   %3,%3           \n\t"
   289                 "addq   %4,%0           \n\t"
   290                 "adcq   %5,%1           \n\t"
   291                 "adcq   %6,%2           \n\t"
   292                 "adcq   $0,%3           \n\t"
   293                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
   294                 : "r" (a0), "r" (a1), "r" (a2), "0" (r0), 
   295 		  "1" (r1), "2" (r2)
   296                 : "%cc" );
   297 #endif
   299 	/* Do quick 'subract' if we've gone over 
   300 	 * (add the 2's complement of the curve field) */
   301 	if (carry || ((r2 == MP_DIGIT_MAX) &&
   302 		      ((r1 == MP_DIGIT_MAX) || 
   303 			((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
   304 #ifndef MPI_AMD64_ADD
   305 		MP_ADD_CARRY(r0, 1, r0, 0,     carry);
   306 		MP_ADD_CARRY(r1, 1, r1, carry, carry);
   307 		MP_ADD_CARRY(r2, 0, r2, carry, carry);
   308 #else
   309 		__asm__ (
   310 			"addq   $1,%0           \n\t"
   311 			"adcq   $1,%1           \n\t"
   312 			"adcq   $0,%2           \n\t"
   313 			: "=r"(r0), "=r"(r1), "=r"(r2)
   314 			: "0" (r0), "1" (r1), "2" (r2)
   315 			: "%cc" );
   316 #endif
   317 	}
   320 	MP_CHECKOK(s_mp_pad(r, 3));
   321 	MP_DIGIT(r, 2) = r2;
   322 	MP_DIGIT(r, 1) = r1;
   323 	MP_DIGIT(r, 0) = r0;
   324 	MP_SIGN(r) = MP_ZPOS;
   325 	MP_USED(r) = 3;
   326 	s_mp_clamp(r);
   329   CLEANUP:
   330 	return res;
   331 }
   333 /* Compute the diff of 192 bit curves. Do the work in-line since the
   334  * number of words are so small, we don't want to overhead of mp function
   335  * calls.  Uses optimized modular reduction for p192. 
   336  */
   337 static mp_err
   338 ec_GFp_nistp192_sub(const mp_int *a, const mp_int *b, mp_int *r, 
   339 			const GFMethod *meth)
   340 {
   341 	mp_err res = MP_OKAY;
   342 	mp_digit b0 = 0, b1 = 0, b2 = 0;
   343 	mp_digit r0 = 0, r1 = 0, r2 = 0;
   344 	mp_digit borrow;
   346 	switch(MP_USED(a)) {
   347 	case 3:
   348 		r2 = MP_DIGIT(a,2);
   349 	case 2:
   350 		r1 = MP_DIGIT(a,1);
   351 	case 1:
   352 		r0 = MP_DIGIT(a,0);
   353 	}
   355 	switch(MP_USED(b)) {
   356 	case 3:
   357 		b2 = MP_DIGIT(b,2);
   358 	case 2:
   359 		b1 = MP_DIGIT(b,1);
   360 	case 1:
   361 		b0 = MP_DIGIT(b,0);
   362 	}
   364 #ifndef MPI_AMD64_ADD
   365 	MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
   366 	MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
   367 	MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
   368 #else
   369 	__asm__ (
   370                 "xorq   %3,%3           \n\t"
   371                 "subq   %4,%0           \n\t"
   372                 "sbbq   %5,%1           \n\t"
   373                 "sbbq   %6,%2           \n\t"
   374                 "adcq   $0,%3           \n\t"
   375                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow)
   376                 : "r" (b0), "r" (b1), "r" (b2), "0" (r0), 
   377 		  "1" (r1), "2" (r2)
   378                 : "%cc" );
   379 #endif
   381 	/* Do quick 'add' if we've gone under 0
   382 	 * (subtract the 2's complement of the curve field) */
   383 	if (borrow) {
   384 #ifndef MPI_AMD64_ADD
   385 		MP_SUB_BORROW(r0, 1, r0, 0,     borrow);
   386 		MP_SUB_BORROW(r1, 1, r1, borrow, borrow);
   387 		MP_SUB_BORROW(r2,  0, r2, borrow, borrow);
   388 #else
   389 		__asm__ (
   390 			"subq   $1,%0           \n\t"
   391 			"sbbq   $1,%1           \n\t"
   392 			"sbbq   $0,%2           \n\t"
   393 			: "=r"(r0), "=r"(r1), "=r"(r2)
   394 			: "0" (r0), "1" (r1), "2" (r2)
   395 			: "%cc" );
   396 #endif
   397 	}
   399 	MP_CHECKOK(s_mp_pad(r, 3));
   400 	MP_DIGIT(r, 2) = r2;
   401 	MP_DIGIT(r, 1) = r1;
   402 	MP_DIGIT(r, 0) = r0;
   403 	MP_SIGN(r) = MP_ZPOS;
   404 	MP_USED(r) = 3;
   405 	s_mp_clamp(r);
   407   CLEANUP:
   408 	return res;
   409 }
   411 #endif
   413 /* Compute the square of polynomial a, reduce modulo p192. Store the
   414  * result in r.  r could be a.  Uses optimized modular reduction for p192. 
   415  */
   416 static mp_err
   417 ec_GFp_nistp192_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
   418 {
   419 	mp_err res = MP_OKAY;
   421 	MP_CHECKOK(mp_sqr(a, r));
   422 	MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
   423   CLEANUP:
   424 	return res;
   425 }
   427 /* Compute the product of two polynomials a and b, reduce modulo p192.
   428  * Store the result in r.  r could be a or b; a could be b.  Uses
   429  * optimized modular reduction for p192. */
   430 static mp_err
   431 ec_GFp_nistp192_mul(const mp_int *a, const mp_int *b, mp_int *r,
   432 					const GFMethod *meth)
   433 {
   434 	mp_err res = MP_OKAY;
   436 	MP_CHECKOK(mp_mul(a, b, r));
   437 	MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
   438   CLEANUP:
   439 	return res;
   440 }
   442 /* Divides two field elements. If a is NULL, then returns the inverse of
   443  * b. */
   444 static mp_err
   445 ec_GFp_nistp192_div(const mp_int *a, const mp_int *b, mp_int *r,
   446 		   const GFMethod *meth)
   447 {
   448 	mp_err res = MP_OKAY;
   449 	mp_int t;
   451 	/* If a is NULL, then return the inverse of b, otherwise return a/b. */
   452 	if (a == NULL) {
   453 		return  mp_invmod(b, &meth->irr, r);
   454 	} else {
   455 		/* MPI doesn't support divmod, so we implement it using invmod and 
   456 		 * mulmod. */
   457 		MP_CHECKOK(mp_init(&t));
   458 		MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
   459 		MP_CHECKOK(mp_mul(a, &t, r));
   460 		MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
   461 	  CLEANUP:
   462 		mp_clear(&t);
   463 		return res;
   464 	}
   465 }
   467 /* Wire in fast field arithmetic and precomputation of base point for
   468  * named curves. */
   469 mp_err
   470 ec_group_set_gfp192(ECGroup *group, ECCurveName name)
   471 {
   472 	if (name == ECCurve_NIST_P192) {
   473 		group->meth->field_mod = &ec_GFp_nistp192_mod;
   474 		group->meth->field_mul = &ec_GFp_nistp192_mul;
   475 		group->meth->field_sqr = &ec_GFp_nistp192_sqr;
   476 		group->meth->field_div = &ec_GFp_nistp192_div;
   477 #ifndef ECL_THIRTY_TWO_BIT
   478 		group->meth->field_add = &ec_GFp_nistp192_add;
   479 		group->meth->field_sub = &ec_GFp_nistp192_sub;
   480 #endif
   481 	}
   482 	return MP_OKAY;
   483 }

mercurial