|
1 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
2 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
4 |
|
5 #include "ecp.h" |
|
6 #include "mpi.h" |
|
7 #include "mplogic.h" |
|
8 #include "mpi-priv.h" |
|
9 |
|
10 /* Fast modular reduction for p256 = 2^256 - 2^224 + 2^192+ 2^96 - 1. a can be r. |
|
11 * Uses algorithm 2.29 from Hankerson, Menezes, Vanstone. Guide to |
|
12 * Elliptic Curve Cryptography. */ |
|
13 static mp_err |
|
14 ec_GFp_nistp256_mod(const mp_int *a, mp_int *r, const GFMethod *meth) |
|
15 { |
|
16 mp_err res = MP_OKAY; |
|
17 mp_size a_used = MP_USED(a); |
|
18 int a_bits = mpl_significant_bits(a); |
|
19 mp_digit carry; |
|
20 |
|
21 #ifdef ECL_THIRTY_TWO_BIT |
|
22 mp_digit a8=0, a9=0, a10=0, a11=0, a12=0, a13=0, a14=0, a15=0; |
|
23 mp_digit r0, r1, r2, r3, r4, r5, r6, r7; |
|
24 int r8; /* must be a signed value ! */ |
|
25 #else |
|
26 mp_digit a4=0, a5=0, a6=0, a7=0; |
|
27 mp_digit a4h, a4l, a5h, a5l, a6h, a6l, a7h, a7l; |
|
28 mp_digit r0, r1, r2, r3; |
|
29 int r4; /* must be a signed value ! */ |
|
30 #endif |
|
31 /* for polynomials larger than twice the field size |
|
32 * use regular reduction */ |
|
33 if (a_bits < 256) { |
|
34 if (a == r) return MP_OKAY; |
|
35 return mp_copy(a,r); |
|
36 } |
|
37 if (a_bits > 512) { |
|
38 MP_CHECKOK(mp_mod(a, &meth->irr, r)); |
|
39 } else { |
|
40 |
|
41 #ifdef ECL_THIRTY_TWO_BIT |
|
42 switch (a_used) { |
|
43 case 16: |
|
44 a15 = MP_DIGIT(a,15); |
|
45 case 15: |
|
46 a14 = MP_DIGIT(a,14); |
|
47 case 14: |
|
48 a13 = MP_DIGIT(a,13); |
|
49 case 13: |
|
50 a12 = MP_DIGIT(a,12); |
|
51 case 12: |
|
52 a11 = MP_DIGIT(a,11); |
|
53 case 11: |
|
54 a10 = MP_DIGIT(a,10); |
|
55 case 10: |
|
56 a9 = MP_DIGIT(a,9); |
|
57 case 9: |
|
58 a8 = MP_DIGIT(a,8); |
|
59 } |
|
60 |
|
61 r0 = MP_DIGIT(a,0); |
|
62 r1 = MP_DIGIT(a,1); |
|
63 r2 = MP_DIGIT(a,2); |
|
64 r3 = MP_DIGIT(a,3); |
|
65 r4 = MP_DIGIT(a,4); |
|
66 r5 = MP_DIGIT(a,5); |
|
67 r6 = MP_DIGIT(a,6); |
|
68 r7 = MP_DIGIT(a,7); |
|
69 |
|
70 /* sum 1 */ |
|
71 MP_ADD_CARRY(r3, a11, r3, 0, carry); |
|
72 MP_ADD_CARRY(r4, a12, r4, carry, carry); |
|
73 MP_ADD_CARRY(r5, a13, r5, carry, carry); |
|
74 MP_ADD_CARRY(r6, a14, r6, carry, carry); |
|
75 MP_ADD_CARRY(r7, a15, r7, carry, carry); |
|
76 r8 = carry; |
|
77 MP_ADD_CARRY(r3, a11, r3, 0, carry); |
|
78 MP_ADD_CARRY(r4, a12, r4, carry, carry); |
|
79 MP_ADD_CARRY(r5, a13, r5, carry, carry); |
|
80 MP_ADD_CARRY(r6, a14, r6, carry, carry); |
|
81 MP_ADD_CARRY(r7, a15, r7, carry, carry); |
|
82 r8 += carry; |
|
83 /* sum 2 */ |
|
84 MP_ADD_CARRY(r3, a12, r3, 0, carry); |
|
85 MP_ADD_CARRY(r4, a13, r4, carry, carry); |
|
86 MP_ADD_CARRY(r5, a14, r5, carry, carry); |
|
87 MP_ADD_CARRY(r6, a15, r6, carry, carry); |
|
88 MP_ADD_CARRY(r7, 0, r7, carry, carry); |
|
89 r8 += carry; |
|
90 /* combine last bottom of sum 3 with second sum 2 */ |
|
91 MP_ADD_CARRY(r0, a8, r0, 0, carry); |
|
92 MP_ADD_CARRY(r1, a9, r1, carry, carry); |
|
93 MP_ADD_CARRY(r2, a10, r2, carry, carry); |
|
94 MP_ADD_CARRY(r3, a12, r3, carry, carry); |
|
95 MP_ADD_CARRY(r4, a13, r4, carry, carry); |
|
96 MP_ADD_CARRY(r5, a14, r5, carry, carry); |
|
97 MP_ADD_CARRY(r6, a15, r6, carry, carry); |
|
98 MP_ADD_CARRY(r7, a15, r7, carry, carry); /* from sum 3 */ |
|
99 r8 += carry; |
|
100 /* sum 3 (rest of it)*/ |
|
101 MP_ADD_CARRY(r6, a14, r6, 0, carry); |
|
102 MP_ADD_CARRY(r7, 0, r7, carry, carry); |
|
103 r8 += carry; |
|
104 /* sum 4 (rest of it)*/ |
|
105 MP_ADD_CARRY(r0, a9, r0, 0, carry); |
|
106 MP_ADD_CARRY(r1, a10, r1, carry, carry); |
|
107 MP_ADD_CARRY(r2, a11, r2, carry, carry); |
|
108 MP_ADD_CARRY(r3, a13, r3, carry, carry); |
|
109 MP_ADD_CARRY(r4, a14, r4, carry, carry); |
|
110 MP_ADD_CARRY(r5, a15, r5, carry, carry); |
|
111 MP_ADD_CARRY(r6, a13, r6, carry, carry); |
|
112 MP_ADD_CARRY(r7, a8, r7, carry, carry); |
|
113 r8 += carry; |
|
114 /* diff 5 */ |
|
115 MP_SUB_BORROW(r0, a11, r0, 0, carry); |
|
116 MP_SUB_BORROW(r1, a12, r1, carry, carry); |
|
117 MP_SUB_BORROW(r2, a13, r2, carry, carry); |
|
118 MP_SUB_BORROW(r3, 0, r3, carry, carry); |
|
119 MP_SUB_BORROW(r4, 0, r4, carry, carry); |
|
120 MP_SUB_BORROW(r5, 0, r5, carry, carry); |
|
121 MP_SUB_BORROW(r6, a8, r6, carry, carry); |
|
122 MP_SUB_BORROW(r7, a10, r7, carry, carry); |
|
123 r8 -= carry; |
|
124 /* diff 6 */ |
|
125 MP_SUB_BORROW(r0, a12, r0, 0, carry); |
|
126 MP_SUB_BORROW(r1, a13, r1, carry, carry); |
|
127 MP_SUB_BORROW(r2, a14, r2, carry, carry); |
|
128 MP_SUB_BORROW(r3, a15, r3, carry, carry); |
|
129 MP_SUB_BORROW(r4, 0, r4, carry, carry); |
|
130 MP_SUB_BORROW(r5, 0, r5, carry, carry); |
|
131 MP_SUB_BORROW(r6, a9, r6, carry, carry); |
|
132 MP_SUB_BORROW(r7, a11, r7, carry, carry); |
|
133 r8 -= carry; |
|
134 /* diff 7 */ |
|
135 MP_SUB_BORROW(r0, a13, r0, 0, carry); |
|
136 MP_SUB_BORROW(r1, a14, r1, carry, carry); |
|
137 MP_SUB_BORROW(r2, a15, r2, carry, carry); |
|
138 MP_SUB_BORROW(r3, a8, r3, carry, carry); |
|
139 MP_SUB_BORROW(r4, a9, r4, carry, carry); |
|
140 MP_SUB_BORROW(r5, a10, r5, carry, carry); |
|
141 MP_SUB_BORROW(r6, 0, r6, carry, carry); |
|
142 MP_SUB_BORROW(r7, a12, r7, carry, carry); |
|
143 r8 -= carry; |
|
144 /* diff 8 */ |
|
145 MP_SUB_BORROW(r0, a14, r0, 0, carry); |
|
146 MP_SUB_BORROW(r1, a15, r1, carry, carry); |
|
147 MP_SUB_BORROW(r2, 0, r2, carry, carry); |
|
148 MP_SUB_BORROW(r3, a9, r3, carry, carry); |
|
149 MP_SUB_BORROW(r4, a10, r4, carry, carry); |
|
150 MP_SUB_BORROW(r5, a11, r5, carry, carry); |
|
151 MP_SUB_BORROW(r6, 0, r6, carry, carry); |
|
152 MP_SUB_BORROW(r7, a13, r7, carry, carry); |
|
153 r8 -= carry; |
|
154 |
|
155 /* reduce the overflows */ |
|
156 while (r8 > 0) { |
|
157 mp_digit r8_d = r8; |
|
158 MP_ADD_CARRY(r0, r8_d, r0, 0, carry); |
|
159 MP_ADD_CARRY(r1, 0, r1, carry, carry); |
|
160 MP_ADD_CARRY(r2, 0, r2, carry, carry); |
|
161 MP_ADD_CARRY(r3, 0-r8_d, r3, carry, carry); |
|
162 MP_ADD_CARRY(r4, MP_DIGIT_MAX, r4, carry, carry); |
|
163 MP_ADD_CARRY(r5, MP_DIGIT_MAX, r5, carry, carry); |
|
164 MP_ADD_CARRY(r6, 0-(r8_d+1), r6, carry, carry); |
|
165 MP_ADD_CARRY(r7, (r8_d-1), r7, carry, carry); |
|
166 r8 = carry; |
|
167 } |
|
168 |
|
169 /* reduce the underflows */ |
|
170 while (r8 < 0) { |
|
171 mp_digit r8_d = -r8; |
|
172 MP_SUB_BORROW(r0, r8_d, r0, 0, carry); |
|
173 MP_SUB_BORROW(r1, 0, r1, carry, carry); |
|
174 MP_SUB_BORROW(r2, 0, r2, carry, carry); |
|
175 MP_SUB_BORROW(r3, 0-r8_d, r3, carry, carry); |
|
176 MP_SUB_BORROW(r4, MP_DIGIT_MAX, r4, carry, carry); |
|
177 MP_SUB_BORROW(r5, MP_DIGIT_MAX, r5, carry, carry); |
|
178 MP_SUB_BORROW(r6, 0-(r8_d+1), r6, carry, carry); |
|
179 MP_SUB_BORROW(r7, (r8_d-1), r7, carry, carry); |
|
180 r8 = 0-carry; |
|
181 } |
|
182 if (a != r) { |
|
183 MP_CHECKOK(s_mp_pad(r,8)); |
|
184 } |
|
185 MP_SIGN(r) = MP_ZPOS; |
|
186 MP_USED(r) = 8; |
|
187 |
|
188 MP_DIGIT(r,7) = r7; |
|
189 MP_DIGIT(r,6) = r6; |
|
190 MP_DIGIT(r,5) = r5; |
|
191 MP_DIGIT(r,4) = r4; |
|
192 MP_DIGIT(r,3) = r3; |
|
193 MP_DIGIT(r,2) = r2; |
|
194 MP_DIGIT(r,1) = r1; |
|
195 MP_DIGIT(r,0) = r0; |
|
196 |
|
197 /* final reduction if necessary */ |
|
198 if ((r7 == MP_DIGIT_MAX) && |
|
199 ((r6 > 1) || ((r6 == 1) && |
|
200 (r5 || r4 || r3 || |
|
201 ((r2 == MP_DIGIT_MAX) && (r1 == MP_DIGIT_MAX) |
|
202 && (r0 == MP_DIGIT_MAX)))))) { |
|
203 MP_CHECKOK(mp_sub(r, &meth->irr, r)); |
|
204 } |
|
205 |
|
206 s_mp_clamp(r); |
|
207 #else |
|
208 switch (a_used) { |
|
209 case 8: |
|
210 a7 = MP_DIGIT(a,7); |
|
211 case 7: |
|
212 a6 = MP_DIGIT(a,6); |
|
213 case 6: |
|
214 a5 = MP_DIGIT(a,5); |
|
215 case 5: |
|
216 a4 = MP_DIGIT(a,4); |
|
217 } |
|
218 a7l = a7 << 32; |
|
219 a7h = a7 >> 32; |
|
220 a6l = a6 << 32; |
|
221 a6h = a6 >> 32; |
|
222 a5l = a5 << 32; |
|
223 a5h = a5 >> 32; |
|
224 a4l = a4 << 32; |
|
225 a4h = a4 >> 32; |
|
226 r3 = MP_DIGIT(a,3); |
|
227 r2 = MP_DIGIT(a,2); |
|
228 r1 = MP_DIGIT(a,1); |
|
229 r0 = MP_DIGIT(a,0); |
|
230 |
|
231 /* sum 1 */ |
|
232 MP_ADD_CARRY(r1, a5h << 32, r1, 0, carry); |
|
233 MP_ADD_CARRY(r2, a6, r2, carry, carry); |
|
234 MP_ADD_CARRY(r3, a7, r3, carry, carry); |
|
235 r4 = carry; |
|
236 MP_ADD_CARRY(r1, a5h << 32, r1, 0, carry); |
|
237 MP_ADD_CARRY(r2, a6, r2, carry, carry); |
|
238 MP_ADD_CARRY(r3, a7, r3, carry, carry); |
|
239 r4 += carry; |
|
240 /* sum 2 */ |
|
241 MP_ADD_CARRY(r1, a6l, r1, 0, carry); |
|
242 MP_ADD_CARRY(r2, a6h | a7l, r2, carry, carry); |
|
243 MP_ADD_CARRY(r3, a7h, r3, carry, carry); |
|
244 r4 += carry; |
|
245 MP_ADD_CARRY(r1, a6l, r1, 0, carry); |
|
246 MP_ADD_CARRY(r2, a6h | a7l, r2, carry, carry); |
|
247 MP_ADD_CARRY(r3, a7h, r3, carry, carry); |
|
248 r4 += carry; |
|
249 |
|
250 /* sum 3 */ |
|
251 MP_ADD_CARRY(r0, a4, r0, 0, carry); |
|
252 MP_ADD_CARRY(r1, a5l >> 32, r1, carry, carry); |
|
253 MP_ADD_CARRY(r2, 0, r2, carry, carry); |
|
254 MP_ADD_CARRY(r3, a7, r3, carry, carry); |
|
255 r4 += carry; |
|
256 /* sum 4 */ |
|
257 MP_ADD_CARRY(r0, a4h | a5l, r0, 0, carry); |
|
258 MP_ADD_CARRY(r1, a5h|(a6h<<32), r1, carry, carry); |
|
259 MP_ADD_CARRY(r2, a7, r2, carry, carry); |
|
260 MP_ADD_CARRY(r3, a6h | a4l, r3, carry, carry); |
|
261 r4 += carry; |
|
262 /* diff 5 */ |
|
263 MP_SUB_BORROW(r0, a5h | a6l, r0, 0, carry); |
|
264 MP_SUB_BORROW(r1, a6h, r1, carry, carry); |
|
265 MP_SUB_BORROW(r2, 0, r2, carry, carry); |
|
266 MP_SUB_BORROW(r3, (a4l>>32)|a5l,r3, carry, carry); |
|
267 r4 -= carry; |
|
268 /* diff 6 */ |
|
269 MP_SUB_BORROW(r0, a6, r0, 0, carry); |
|
270 MP_SUB_BORROW(r1, a7, r1, carry, carry); |
|
271 MP_SUB_BORROW(r2, 0, r2, carry, carry); |
|
272 MP_SUB_BORROW(r3, a4h|(a5h<<32),r3, carry, carry); |
|
273 r4 -= carry; |
|
274 /* diff 7 */ |
|
275 MP_SUB_BORROW(r0, a6h|a7l, r0, 0, carry); |
|
276 MP_SUB_BORROW(r1, a7h|a4l, r1, carry, carry); |
|
277 MP_SUB_BORROW(r2, a4h|a5l, r2, carry, carry); |
|
278 MP_SUB_BORROW(r3, a6l, r3, carry, carry); |
|
279 r4 -= carry; |
|
280 /* diff 8 */ |
|
281 MP_SUB_BORROW(r0, a7, r0, 0, carry); |
|
282 MP_SUB_BORROW(r1, a4h<<32, r1, carry, carry); |
|
283 MP_SUB_BORROW(r2, a5, r2, carry, carry); |
|
284 MP_SUB_BORROW(r3, a6h<<32, r3, carry, carry); |
|
285 r4 -= carry; |
|
286 |
|
287 /* reduce the overflows */ |
|
288 while (r4 > 0) { |
|
289 mp_digit r4_long = r4; |
|
290 mp_digit r4l = (r4_long << 32); |
|
291 MP_ADD_CARRY(r0, r4_long, r0, 0, carry); |
|
292 MP_ADD_CARRY(r1, 0-r4l, r1, carry, carry); |
|
293 MP_ADD_CARRY(r2, MP_DIGIT_MAX, r2, carry, carry); |
|
294 MP_ADD_CARRY(r3, r4l-r4_long-1,r3, carry, carry); |
|
295 r4 = carry; |
|
296 } |
|
297 |
|
298 /* reduce the underflows */ |
|
299 while (r4 < 0) { |
|
300 mp_digit r4_long = -r4; |
|
301 mp_digit r4l = (r4_long << 32); |
|
302 MP_SUB_BORROW(r0, r4_long, r0, 0, carry); |
|
303 MP_SUB_BORROW(r1, 0-r4l, r1, carry, carry); |
|
304 MP_SUB_BORROW(r2, MP_DIGIT_MAX, r2, carry, carry); |
|
305 MP_SUB_BORROW(r3, r4l-r4_long-1,r3, carry, carry); |
|
306 r4 = 0-carry; |
|
307 } |
|
308 |
|
309 if (a != r) { |
|
310 MP_CHECKOK(s_mp_pad(r,4)); |
|
311 } |
|
312 MP_SIGN(r) = MP_ZPOS; |
|
313 MP_USED(r) = 4; |
|
314 |
|
315 MP_DIGIT(r,3) = r3; |
|
316 MP_DIGIT(r,2) = r2; |
|
317 MP_DIGIT(r,1) = r1; |
|
318 MP_DIGIT(r,0) = r0; |
|
319 |
|
320 /* final reduction if necessary */ |
|
321 if ((r3 > 0xFFFFFFFF00000001ULL) || |
|
322 ((r3 == 0xFFFFFFFF00000001ULL) && |
|
323 (r2 || (r1 >> 32)|| |
|
324 (r1 == 0xFFFFFFFFULL && r0 == MP_DIGIT_MAX)))) { |
|
325 /* very rare, just use mp_sub */ |
|
326 MP_CHECKOK(mp_sub(r, &meth->irr, r)); |
|
327 } |
|
328 |
|
329 s_mp_clamp(r); |
|
330 #endif |
|
331 } |
|
332 |
|
333 CLEANUP: |
|
334 return res; |
|
335 } |
|
336 |
|
337 /* Compute the square of polynomial a, reduce modulo p256. Store the |
|
338 * result in r. r could be a. Uses optimized modular reduction for p256. |
|
339 */ |
|
340 static mp_err |
|
341 ec_GFp_nistp256_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) |
|
342 { |
|
343 mp_err res = MP_OKAY; |
|
344 |
|
345 MP_CHECKOK(mp_sqr(a, r)); |
|
346 MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth)); |
|
347 CLEANUP: |
|
348 return res; |
|
349 } |
|
350 |
|
351 /* Compute the product of two polynomials a and b, reduce modulo p256. |
|
352 * Store the result in r. r could be a or b; a could be b. Uses |
|
353 * optimized modular reduction for p256. */ |
|
354 static mp_err |
|
355 ec_GFp_nistp256_mul(const mp_int *a, const mp_int *b, mp_int *r, |
|
356 const GFMethod *meth) |
|
357 { |
|
358 mp_err res = MP_OKAY; |
|
359 |
|
360 MP_CHECKOK(mp_mul(a, b, r)); |
|
361 MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth)); |
|
362 CLEANUP: |
|
363 return res; |
|
364 } |
|
365 |
|
366 /* Wire in fast field arithmetic and precomputation of base point for |
|
367 * named curves. */ |
|
368 mp_err |
|
369 ec_group_set_gfp256(ECGroup *group, ECCurveName name) |
|
370 { |
|
371 if (name == ECCurve_NIST_P256) { |
|
372 group->meth->field_mod = &ec_GFp_nistp256_mod; |
|
373 group->meth->field_mul = &ec_GFp_nistp256_mul; |
|
374 group->meth->field_sqr = &ec_GFp_nistp256_sqr; |
|
375 } |
|
376 return MP_OKAY; |
|
377 } |