|
1 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
2 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
4 |
|
5 #include "ecp.h" |
|
6 #include "mpi.h" |
|
7 #include "mplogic.h" |
|
8 #include "mpi-priv.h" |
|
9 |
|
10 #define ECP192_DIGITS ECL_CURVE_DIGITS(192) |
|
11 |
|
12 /* Fast modular reduction for p192 = 2^192 - 2^64 - 1. a can be r. Uses |
|
13 * algorithm 7 from Brown, Hankerson, Lopez, Menezes. Software |
|
14 * Implementation of the NIST Elliptic Curves over Prime Fields. */ |
|
15 static mp_err |
|
16 ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth) |
|
17 { |
|
18 mp_err res = MP_OKAY; |
|
19 mp_size a_used = MP_USED(a); |
|
20 mp_digit r3; |
|
21 #ifndef MPI_AMD64_ADD |
|
22 mp_digit carry; |
|
23 #endif |
|
24 #ifdef ECL_THIRTY_TWO_BIT |
|
25 mp_digit a5a = 0, a5b = 0, a4a = 0, a4b = 0, a3a = 0, a3b = 0; |
|
26 mp_digit r0a, r0b, r1a, r1b, r2a, r2b; |
|
27 #else |
|
28 mp_digit a5 = 0, a4 = 0, a3 = 0; |
|
29 mp_digit r0, r1, r2; |
|
30 #endif |
|
31 |
|
32 /* reduction not needed if a is not larger than field size */ |
|
33 if (a_used < ECP192_DIGITS) { |
|
34 if (a == r) { |
|
35 return MP_OKAY; |
|
36 } |
|
37 return mp_copy(a, r); |
|
38 } |
|
39 |
|
40 /* for polynomials larger than twice the field size, use regular |
|
41 * reduction */ |
|
42 if (a_used > ECP192_DIGITS*2) { |
|
43 MP_CHECKOK(mp_mod(a, &meth->irr, r)); |
|
44 } else { |
|
45 /* copy out upper words of a */ |
|
46 |
|
47 #ifdef ECL_THIRTY_TWO_BIT |
|
48 |
|
49 /* in all the math below, |
|
50 * nXb is most signifiant, nXa is least significant */ |
|
51 switch (a_used) { |
|
52 case 12: |
|
53 a5b = MP_DIGIT(a, 11); |
|
54 case 11: |
|
55 a5a = MP_DIGIT(a, 10); |
|
56 case 10: |
|
57 a4b = MP_DIGIT(a, 9); |
|
58 case 9: |
|
59 a4a = MP_DIGIT(a, 8); |
|
60 case 8: |
|
61 a3b = MP_DIGIT(a, 7); |
|
62 case 7: |
|
63 a3a = MP_DIGIT(a, 6); |
|
64 } |
|
65 |
|
66 |
|
67 r2b= MP_DIGIT(a, 5); |
|
68 r2a= MP_DIGIT(a, 4); |
|
69 r1b = MP_DIGIT(a, 3); |
|
70 r1a = MP_DIGIT(a, 2); |
|
71 r0b = MP_DIGIT(a, 1); |
|
72 r0a = MP_DIGIT(a, 0); |
|
73 |
|
74 /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */ |
|
75 MP_ADD_CARRY(r0a, a3a, r0a, 0, carry); |
|
76 MP_ADD_CARRY(r0b, a3b, r0b, carry, carry); |
|
77 MP_ADD_CARRY(r1a, a3a, r1a, carry, carry); |
|
78 MP_ADD_CARRY(r1b, a3b, r1b, carry, carry); |
|
79 MP_ADD_CARRY(r2a, a4a, r2a, carry, carry); |
|
80 MP_ADD_CARRY(r2b, a4b, r2b, carry, carry); |
|
81 r3 = carry; carry = 0; |
|
82 MP_ADD_CARRY(r0a, a5a, r0a, 0, carry); |
|
83 MP_ADD_CARRY(r0b, a5b, r0b, carry, carry); |
|
84 MP_ADD_CARRY(r1a, a5a, r1a, carry, carry); |
|
85 MP_ADD_CARRY(r1b, a5b, r1b, carry, carry); |
|
86 MP_ADD_CARRY(r2a, a5a, r2a, carry, carry); |
|
87 MP_ADD_CARRY(r2b, a5b, r2b, carry, carry); |
|
88 r3 += carry; |
|
89 MP_ADD_CARRY(r1a, a4a, r1a, 0, carry); |
|
90 MP_ADD_CARRY(r1b, a4b, r1b, carry, carry); |
|
91 MP_ADD_CARRY(r2a, 0, r2a, carry, carry); |
|
92 MP_ADD_CARRY(r2b, 0, r2b, carry, carry); |
|
93 r3 += carry; |
|
94 |
|
95 /* reduce out the carry */ |
|
96 while (r3) { |
|
97 MP_ADD_CARRY(r0a, r3, r0a, 0, carry); |
|
98 MP_ADD_CARRY(r0b, 0, r0b, carry, carry); |
|
99 MP_ADD_CARRY(r1a, r3, r1a, carry, carry); |
|
100 MP_ADD_CARRY(r1b, 0, r1b, carry, carry); |
|
101 MP_ADD_CARRY(r2a, 0, r2a, carry, carry); |
|
102 MP_ADD_CARRY(r2b, 0, r2b, carry, carry); |
|
103 r3 = carry; |
|
104 } |
|
105 |
|
106 /* check for final reduction */ |
|
107 /* |
|
108 * our field is 0xffffffffffffffff, 0xfffffffffffffffe, |
|
109 * 0xffffffffffffffff. That means we can only be over and need |
|
110 * one more reduction |
|
111 * if r2 == 0xffffffffffffffffff (same as r2+1 == 0) |
|
112 * and |
|
113 * r1 == 0xffffffffffffffffff or |
|
114 * r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff |
|
115 * In all cases, we subtract the field (or add the 2's |
|
116 * complement value (1,1,0)). (r0, r1, r2) |
|
117 */ |
|
118 if (((r2b == 0xffffffff) && (r2a == 0xffffffff) |
|
119 && (r1b == 0xffffffff) ) && |
|
120 ((r1a == 0xffffffff) || |
|
121 (r1a == 0xfffffffe) && (r0a == 0xffffffff) && |
|
122 (r0b == 0xffffffff)) ) { |
|
123 /* do a quick subtract */ |
|
124 MP_ADD_CARRY(r0a, 1, r0a, 0, carry); |
|
125 MP_ADD_CARRY(r0b, carry, r0a, 0, carry); |
|
126 r1a += 1+carry; |
|
127 r1b = r2a = r2b = 0; |
|
128 } |
|
129 |
|
130 /* set the lower words of r */ |
|
131 if (a != r) { |
|
132 MP_CHECKOK(s_mp_pad(r, 6)); |
|
133 } |
|
134 MP_DIGIT(r, 5) = r2b; |
|
135 MP_DIGIT(r, 4) = r2a; |
|
136 MP_DIGIT(r, 3) = r1b; |
|
137 MP_DIGIT(r, 2) = r1a; |
|
138 MP_DIGIT(r, 1) = r0b; |
|
139 MP_DIGIT(r, 0) = r0a; |
|
140 MP_USED(r) = 6; |
|
141 #else |
|
142 switch (a_used) { |
|
143 case 6: |
|
144 a5 = MP_DIGIT(a, 5); |
|
145 case 5: |
|
146 a4 = MP_DIGIT(a, 4); |
|
147 case 4: |
|
148 a3 = MP_DIGIT(a, 3); |
|
149 } |
|
150 |
|
151 r2 = MP_DIGIT(a, 2); |
|
152 r1 = MP_DIGIT(a, 1); |
|
153 r0 = MP_DIGIT(a, 0); |
|
154 |
|
155 /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */ |
|
156 #ifndef MPI_AMD64_ADD |
|
157 MP_ADD_CARRY(r0, a3, r0, 0, carry); |
|
158 MP_ADD_CARRY(r1, a3, r1, carry, carry); |
|
159 MP_ADD_CARRY(r2, a4, r2, carry, carry); |
|
160 r3 = carry; |
|
161 MP_ADD_CARRY(r0, a5, r0, 0, carry); |
|
162 MP_ADD_CARRY(r1, a5, r1, carry, carry); |
|
163 MP_ADD_CARRY(r2, a5, r2, carry, carry); |
|
164 r3 += carry; |
|
165 MP_ADD_CARRY(r1, a4, r1, 0, carry); |
|
166 MP_ADD_CARRY(r2, 0, r2, carry, carry); |
|
167 r3 += carry; |
|
168 |
|
169 #else |
|
170 r2 = MP_DIGIT(a, 2); |
|
171 r1 = MP_DIGIT(a, 1); |
|
172 r0 = MP_DIGIT(a, 0); |
|
173 |
|
174 /* set the lower words of r */ |
|
175 __asm__ ( |
|
176 "xorq %3,%3 \n\t" |
|
177 "addq %4,%0 \n\t" |
|
178 "adcq %4,%1 \n\t" |
|
179 "adcq %5,%2 \n\t" |
|
180 "adcq $0,%3 \n\t" |
|
181 "addq %6,%0 \n\t" |
|
182 "adcq %6,%1 \n\t" |
|
183 "adcq %6,%2 \n\t" |
|
184 "adcq $0,%3 \n\t" |
|
185 "addq %5,%1 \n\t" |
|
186 "adcq $0,%2 \n\t" |
|
187 "adcq $0,%3 \n\t" |
|
188 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3), |
|
189 "=r"(a4), "=r"(a5) |
|
190 : "0" (r0), "1" (r1), "2" (r2), "3" (r3), |
|
191 "4" (a3), "5" (a4), "6"(a5) |
|
192 : "%cc" ); |
|
193 #endif |
|
194 |
|
195 /* reduce out the carry */ |
|
196 while (r3) { |
|
197 #ifndef MPI_AMD64_ADD |
|
198 MP_ADD_CARRY(r0, r3, r0, 0, carry); |
|
199 MP_ADD_CARRY(r1, r3, r1, carry, carry); |
|
200 MP_ADD_CARRY(r2, 0, r2, carry, carry); |
|
201 r3 = carry; |
|
202 #else |
|
203 a3=r3; |
|
204 __asm__ ( |
|
205 "xorq %3,%3 \n\t" |
|
206 "addq %4,%0 \n\t" |
|
207 "adcq %4,%1 \n\t" |
|
208 "adcq $0,%2 \n\t" |
|
209 "adcq $0,%3 \n\t" |
|
210 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3) |
|
211 : "0" (r0), "1" (r1), "2" (r2), "3" (r3), "4"(a3) |
|
212 : "%cc" ); |
|
213 #endif |
|
214 } |
|
215 |
|
216 /* check for final reduction */ |
|
217 /* |
|
218 * our field is 0xffffffffffffffff, 0xfffffffffffffffe, |
|
219 * 0xffffffffffffffff. That means we can only be over and need |
|
220 * one more reduction |
|
221 * if r2 == 0xffffffffffffffffff (same as r2+1 == 0) |
|
222 * and |
|
223 * r1 == 0xffffffffffffffffff or |
|
224 * r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff |
|
225 * In all cases, we subtract the field (or add the 2's |
|
226 * complement value (1,1,0)). (r0, r1, r2) |
|
227 */ |
|
228 if (r3 || ((r2 == MP_DIGIT_MAX) && |
|
229 ((r1 == MP_DIGIT_MAX) || |
|
230 ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) { |
|
231 /* do a quick subtract */ |
|
232 MP_ADD_CARRY(r0, 1, r0, 0, carry); |
|
233 r1 += 1+carry; |
|
234 r2 = 0; |
|
235 } |
|
236 /* set the lower words of r */ |
|
237 if (a != r) { |
|
238 MP_CHECKOK(s_mp_pad(r, 3)); |
|
239 } |
|
240 MP_DIGIT(r, 2) = r2; |
|
241 MP_DIGIT(r, 1) = r1; |
|
242 MP_DIGIT(r, 0) = r0; |
|
243 MP_USED(r) = 3; |
|
244 #endif |
|
245 } |
|
246 s_mp_clamp(r); |
|
247 CLEANUP: |
|
248 return res; |
|
249 } |
|
250 |
|
251 #ifndef ECL_THIRTY_TWO_BIT |
|
252 /* Compute the sum of 192 bit curves. Do the work in-line since the |
|
253 * number of words are so small, we don't want to overhead of mp function |
|
254 * calls. Uses optimized modular reduction for p192. |
|
255 */ |
|
256 static mp_err |
|
257 ec_GFp_nistp192_add(const mp_int *a, const mp_int *b, mp_int *r, |
|
258 const GFMethod *meth) |
|
259 { |
|
260 mp_err res = MP_OKAY; |
|
261 mp_digit a0 = 0, a1 = 0, a2 = 0; |
|
262 mp_digit r0 = 0, r1 = 0, r2 = 0; |
|
263 mp_digit carry; |
|
264 |
|
265 switch(MP_USED(a)) { |
|
266 case 3: |
|
267 a2 = MP_DIGIT(a,2); |
|
268 case 2: |
|
269 a1 = MP_DIGIT(a,1); |
|
270 case 1: |
|
271 a0 = MP_DIGIT(a,0); |
|
272 } |
|
273 switch(MP_USED(b)) { |
|
274 case 3: |
|
275 r2 = MP_DIGIT(b,2); |
|
276 case 2: |
|
277 r1 = MP_DIGIT(b,1); |
|
278 case 1: |
|
279 r0 = MP_DIGIT(b,0); |
|
280 } |
|
281 |
|
282 #ifndef MPI_AMD64_ADD |
|
283 MP_ADD_CARRY(a0, r0, r0, 0, carry); |
|
284 MP_ADD_CARRY(a1, r1, r1, carry, carry); |
|
285 MP_ADD_CARRY(a2, r2, r2, carry, carry); |
|
286 #else |
|
287 __asm__ ( |
|
288 "xorq %3,%3 \n\t" |
|
289 "addq %4,%0 \n\t" |
|
290 "adcq %5,%1 \n\t" |
|
291 "adcq %6,%2 \n\t" |
|
292 "adcq $0,%3 \n\t" |
|
293 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry) |
|
294 : "r" (a0), "r" (a1), "r" (a2), "0" (r0), |
|
295 "1" (r1), "2" (r2) |
|
296 : "%cc" ); |
|
297 #endif |
|
298 |
|
299 /* Do quick 'subract' if we've gone over |
|
300 * (add the 2's complement of the curve field) */ |
|
301 if (carry || ((r2 == MP_DIGIT_MAX) && |
|
302 ((r1 == MP_DIGIT_MAX) || |
|
303 ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) { |
|
304 #ifndef MPI_AMD64_ADD |
|
305 MP_ADD_CARRY(r0, 1, r0, 0, carry); |
|
306 MP_ADD_CARRY(r1, 1, r1, carry, carry); |
|
307 MP_ADD_CARRY(r2, 0, r2, carry, carry); |
|
308 #else |
|
309 __asm__ ( |
|
310 "addq $1,%0 \n\t" |
|
311 "adcq $1,%1 \n\t" |
|
312 "adcq $0,%2 \n\t" |
|
313 : "=r"(r0), "=r"(r1), "=r"(r2) |
|
314 : "0" (r0), "1" (r1), "2" (r2) |
|
315 : "%cc" ); |
|
316 #endif |
|
317 } |
|
318 |
|
319 |
|
320 MP_CHECKOK(s_mp_pad(r, 3)); |
|
321 MP_DIGIT(r, 2) = r2; |
|
322 MP_DIGIT(r, 1) = r1; |
|
323 MP_DIGIT(r, 0) = r0; |
|
324 MP_SIGN(r) = MP_ZPOS; |
|
325 MP_USED(r) = 3; |
|
326 s_mp_clamp(r); |
|
327 |
|
328 |
|
329 CLEANUP: |
|
330 return res; |
|
331 } |
|
332 |
|
333 /* Compute the diff of 192 bit curves. Do the work in-line since the |
|
334 * number of words are so small, we don't want to overhead of mp function |
|
335 * calls. Uses optimized modular reduction for p192. |
|
336 */ |
|
337 static mp_err |
|
338 ec_GFp_nistp192_sub(const mp_int *a, const mp_int *b, mp_int *r, |
|
339 const GFMethod *meth) |
|
340 { |
|
341 mp_err res = MP_OKAY; |
|
342 mp_digit b0 = 0, b1 = 0, b2 = 0; |
|
343 mp_digit r0 = 0, r1 = 0, r2 = 0; |
|
344 mp_digit borrow; |
|
345 |
|
346 switch(MP_USED(a)) { |
|
347 case 3: |
|
348 r2 = MP_DIGIT(a,2); |
|
349 case 2: |
|
350 r1 = MP_DIGIT(a,1); |
|
351 case 1: |
|
352 r0 = MP_DIGIT(a,0); |
|
353 } |
|
354 |
|
355 switch(MP_USED(b)) { |
|
356 case 3: |
|
357 b2 = MP_DIGIT(b,2); |
|
358 case 2: |
|
359 b1 = MP_DIGIT(b,1); |
|
360 case 1: |
|
361 b0 = MP_DIGIT(b,0); |
|
362 } |
|
363 |
|
364 #ifndef MPI_AMD64_ADD |
|
365 MP_SUB_BORROW(r0, b0, r0, 0, borrow); |
|
366 MP_SUB_BORROW(r1, b1, r1, borrow, borrow); |
|
367 MP_SUB_BORROW(r2, b2, r2, borrow, borrow); |
|
368 #else |
|
369 __asm__ ( |
|
370 "xorq %3,%3 \n\t" |
|
371 "subq %4,%0 \n\t" |
|
372 "sbbq %5,%1 \n\t" |
|
373 "sbbq %6,%2 \n\t" |
|
374 "adcq $0,%3 \n\t" |
|
375 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow) |
|
376 : "r" (b0), "r" (b1), "r" (b2), "0" (r0), |
|
377 "1" (r1), "2" (r2) |
|
378 : "%cc" ); |
|
379 #endif |
|
380 |
|
381 /* Do quick 'add' if we've gone under 0 |
|
382 * (subtract the 2's complement of the curve field) */ |
|
383 if (borrow) { |
|
384 #ifndef MPI_AMD64_ADD |
|
385 MP_SUB_BORROW(r0, 1, r0, 0, borrow); |
|
386 MP_SUB_BORROW(r1, 1, r1, borrow, borrow); |
|
387 MP_SUB_BORROW(r2, 0, r2, borrow, borrow); |
|
388 #else |
|
389 __asm__ ( |
|
390 "subq $1,%0 \n\t" |
|
391 "sbbq $1,%1 \n\t" |
|
392 "sbbq $0,%2 \n\t" |
|
393 : "=r"(r0), "=r"(r1), "=r"(r2) |
|
394 : "0" (r0), "1" (r1), "2" (r2) |
|
395 : "%cc" ); |
|
396 #endif |
|
397 } |
|
398 |
|
399 MP_CHECKOK(s_mp_pad(r, 3)); |
|
400 MP_DIGIT(r, 2) = r2; |
|
401 MP_DIGIT(r, 1) = r1; |
|
402 MP_DIGIT(r, 0) = r0; |
|
403 MP_SIGN(r) = MP_ZPOS; |
|
404 MP_USED(r) = 3; |
|
405 s_mp_clamp(r); |
|
406 |
|
407 CLEANUP: |
|
408 return res; |
|
409 } |
|
410 |
|
411 #endif |
|
412 |
|
413 /* Compute the square of polynomial a, reduce modulo p192. Store the |
|
414 * result in r. r could be a. Uses optimized modular reduction for p192. |
|
415 */ |
|
416 static mp_err |
|
417 ec_GFp_nistp192_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) |
|
418 { |
|
419 mp_err res = MP_OKAY; |
|
420 |
|
421 MP_CHECKOK(mp_sqr(a, r)); |
|
422 MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth)); |
|
423 CLEANUP: |
|
424 return res; |
|
425 } |
|
426 |
|
427 /* Compute the product of two polynomials a and b, reduce modulo p192. |
|
428 * Store the result in r. r could be a or b; a could be b. Uses |
|
429 * optimized modular reduction for p192. */ |
|
430 static mp_err |
|
431 ec_GFp_nistp192_mul(const mp_int *a, const mp_int *b, mp_int *r, |
|
432 const GFMethod *meth) |
|
433 { |
|
434 mp_err res = MP_OKAY; |
|
435 |
|
436 MP_CHECKOK(mp_mul(a, b, r)); |
|
437 MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth)); |
|
438 CLEANUP: |
|
439 return res; |
|
440 } |
|
441 |
|
442 /* Divides two field elements. If a is NULL, then returns the inverse of |
|
443 * b. */ |
|
444 static mp_err |
|
445 ec_GFp_nistp192_div(const mp_int *a, const mp_int *b, mp_int *r, |
|
446 const GFMethod *meth) |
|
447 { |
|
448 mp_err res = MP_OKAY; |
|
449 mp_int t; |
|
450 |
|
451 /* If a is NULL, then return the inverse of b, otherwise return a/b. */ |
|
452 if (a == NULL) { |
|
453 return mp_invmod(b, &meth->irr, r); |
|
454 } else { |
|
455 /* MPI doesn't support divmod, so we implement it using invmod and |
|
456 * mulmod. */ |
|
457 MP_CHECKOK(mp_init(&t)); |
|
458 MP_CHECKOK(mp_invmod(b, &meth->irr, &t)); |
|
459 MP_CHECKOK(mp_mul(a, &t, r)); |
|
460 MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth)); |
|
461 CLEANUP: |
|
462 mp_clear(&t); |
|
463 return res; |
|
464 } |
|
465 } |
|
466 |
|
467 /* Wire in fast field arithmetic and precomputation of base point for |
|
468 * named curves. */ |
|
469 mp_err |
|
470 ec_group_set_gfp192(ECGroup *group, ECCurveName name) |
|
471 { |
|
472 if (name == ECCurve_NIST_P192) { |
|
473 group->meth->field_mod = &ec_GFp_nistp192_mod; |
|
474 group->meth->field_mul = &ec_GFp_nistp192_mul; |
|
475 group->meth->field_sqr = &ec_GFp_nistp192_sqr; |
|
476 group->meth->field_div = &ec_GFp_nistp192_div; |
|
477 #ifndef ECL_THIRTY_TWO_BIT |
|
478 group->meth->field_add = &ec_GFp_nistp192_add; |
|
479 group->meth->field_sub = &ec_GFp_nistp192_sub; |
|
480 #endif |
|
481 } |
|
482 return MP_OKAY; |
|
483 } |