Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 #include <regdef.h>
5 .set noreorder
6 .set noat
8 .section .text, 1, 0x00000006, 4, 4
9 .text:
10 .section .text
12 .ent s_mpv_mul_d_add
13 .globl s_mpv_mul_d_add
15 s_mpv_mul_d_add:
16 #/* c += a * b */
17 #void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
18 # mp_digit *c)
19 #{
20 # mp_digit a0, a1; regs a4, a5
21 # mp_digit c0, c1; regs a6, a7
22 # mp_digit cy = 0; reg t2
23 # mp_word w0, w1; regs t0, t1
24 #
25 # if (a_len) {
26 beq a1,zero,.L.1
27 move t2,zero # cy = 0
28 dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
29 dsrl32 a2,a2,0 # This clears the upper 32 bits.
30 # a0 = a[0];
31 lwu a4,0(a0)
32 # w0 = ((mp_word)b * a0);
33 dmultu a2,a4
34 # if (--a_len) {
35 addiu a1,a1,-1
36 beq a1,zero,.L.2
37 # while (a_len >= 2) {
38 sltiu t3,a1,2
39 bne t3,zero,.L.3
40 # a1 = a[1];
41 lwu a5,4(a0)
42 .L.4:
43 # a_len -= 2;
44 addiu a1,a1,-2
45 # c0 = c[0];
46 lwu a6,0(a3)
47 # w0 += cy;
48 mflo t0
49 daddu t0,t0,t2
50 # w0 += c0;
51 daddu t0,t0,a6
52 # w1 = (mp_word)b * a1;
53 dmultu a2,a5 #
54 # cy = CARRYOUT(w0);
55 dsrl32 t2,t0,0
56 # c[0] = ACCUM(w0);
57 sw t0,0(a3)
58 # a0 = a[2];
59 lwu a4,8(a0)
60 # a += 2;
61 addiu a0,a0,8
62 # c1 = c[1];
63 lwu a7,4(a3)
64 # w1 += cy;
65 mflo t1
66 daddu t1,t1,t2
67 # w1 += c1;
68 daddu t1,t1,a7
69 # w0 = (mp_word)b * a0;
70 dmultu a2,a4 #
71 # cy = CARRYOUT(w1);
72 dsrl32 t2,t1,0
73 # c[1] = ACCUM(w1);
74 sw t1,4(a3)
75 # c += 2;
76 addiu a3,a3,8
77 sltiu t3,a1,2
78 beq t3,zero,.L.4
79 # a1 = a[1];
80 lwu a5,4(a0)
81 # }
82 .L.3:
83 # c0 = c[0];
84 lwu a6,0(a3)
85 # w0 += cy;
86 # if (a_len) {
87 mflo t0
88 beq a1,zero,.L.5
89 daddu t0,t0,t2
90 # w1 = (mp_word)b * a1;
91 dmultu a2,a5
92 # w0 += c0;
93 daddu t0,t0,a6 #
94 # cy = CARRYOUT(w0);
95 dsrl32 t2,t0,0
96 # c[0] = ACCUM(w0);
97 sw t0,0(a3)
98 # c1 = c[1];
99 lwu a7,4(a3)
100 # w1 += cy;
101 mflo t1
102 daddu t1,t1,t2
103 # w1 += c1;
104 daddu t1,t1,a7
105 # c[1] = ACCUM(w1);
106 sw t1,4(a3)
107 # cy = CARRYOUT(w1);
108 dsrl32 t2,t1,0
109 # c += 1;
110 b .L.6
111 addiu a3,a3,4
112 # } else {
113 .L.5:
114 # w0 += c0;
115 daddu t0,t0,a6
116 # c[0] = ACCUM(w0);
117 sw t0,0(a3)
118 # cy = CARRYOUT(w0);
119 b .L.6
120 dsrl32 t2,t0,0
121 # }
122 # } else {
123 .L.2:
124 # c0 = c[0];
125 lwu a6,0(a3)
126 # w0 += c0;
127 mflo t0
128 daddu t0,t0,a6
129 # c[0] = ACCUM(w0);
130 sw t0,0(a3)
131 # cy = CARRYOUT(w0);
132 dsrl32 t2,t0,0
133 # }
134 .L.6:
135 # c[1] = cy;
136 jr ra
137 sw t2,4(a3)
138 # }
139 .L.1:
140 jr ra
141 nop
142 #}
143 #
144 .end s_mpv_mul_d_add
146 .ent s_mpv_mul_d_add_prop
147 .globl s_mpv_mul_d_add_prop
149 s_mpv_mul_d_add_prop:
150 #/* c += a * b */
151 #void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
152 # mp_digit *c)
153 #{
154 # mp_digit a0, a1; regs a4, a5
155 # mp_digit c0, c1; regs a6, a7
156 # mp_digit cy = 0; reg t2
157 # mp_word w0, w1; regs t0, t1
158 #
159 # if (a_len) {
160 beq a1,zero,.M.1
161 move t2,zero # cy = 0
162 dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
163 dsrl32 a2,a2,0 # This clears the upper 32 bits.
164 # a0 = a[0];
165 lwu a4,0(a0)
166 # w0 = ((mp_word)b * a0);
167 dmultu a2,a4
168 # if (--a_len) {
169 addiu a1,a1,-1
170 beq a1,zero,.M.2
171 # while (a_len >= 2) {
172 sltiu t3,a1,2
173 bne t3,zero,.M.3
174 # a1 = a[1];
175 lwu a5,4(a0)
176 .M.4:
177 # a_len -= 2;
178 addiu a1,a1,-2
179 # c0 = c[0];
180 lwu a6,0(a3)
181 # w0 += cy;
182 mflo t0
183 daddu t0,t0,t2
184 # w0 += c0;
185 daddu t0,t0,a6
186 # w1 = (mp_word)b * a1;
187 dmultu a2,a5 #
188 # cy = CARRYOUT(w0);
189 dsrl32 t2,t0,0
190 # c[0] = ACCUM(w0);
191 sw t0,0(a3)
192 # a0 = a[2];
193 lwu a4,8(a0)
194 # a += 2;
195 addiu a0,a0,8
196 # c1 = c[1];
197 lwu a7,4(a3)
198 # w1 += cy;
199 mflo t1
200 daddu t1,t1,t2
201 # w1 += c1;
202 daddu t1,t1,a7
203 # w0 = (mp_word)b * a0;
204 dmultu a2,a4 #
205 # cy = CARRYOUT(w1);
206 dsrl32 t2,t1,0
207 # c[1] = ACCUM(w1);
208 sw t1,4(a3)
209 # c += 2;
210 addiu a3,a3,8
211 sltiu t3,a1,2
212 beq t3,zero,.M.4
213 # a1 = a[1];
214 lwu a5,4(a0)
215 # }
216 .M.3:
217 # c0 = c[0];
218 lwu a6,0(a3)
219 # w0 += cy;
220 # if (a_len) {
221 mflo t0
222 beq a1,zero,.M.5
223 daddu t0,t0,t2
224 # w1 = (mp_word)b * a1;
225 dmultu a2,a5
226 # w0 += c0;
227 daddu t0,t0,a6 #
228 # cy = CARRYOUT(w0);
229 dsrl32 t2,t0,0
230 # c[0] = ACCUM(w0);
231 sw t0,0(a3)
232 # c1 = c[1];
233 lwu a7,4(a3)
234 # w1 += cy;
235 mflo t1
236 daddu t1,t1,t2
237 # w1 += c1;
238 daddu t1,t1,a7
239 # c[1] = ACCUM(w1);
240 sw t1,4(a3)
241 # cy = CARRYOUT(w1);
242 dsrl32 t2,t1,0
243 # c += 1;
244 b .M.6
245 addiu a3,a3,8
246 # } else {
247 .M.5:
248 # w0 += c0;
249 daddu t0,t0,a6
250 # c[0] = ACCUM(w0);
251 sw t0,0(a3)
252 # cy = CARRYOUT(w0);
253 dsrl32 t2,t0,0
254 b .M.6
255 addiu a3,a3,4
256 # }
257 # } else {
258 .M.2:
259 # c0 = c[0];
260 lwu a6,0(a3)
261 # w0 += c0;
262 mflo t0
263 daddu t0,t0,a6
264 # c[0] = ACCUM(w0);
265 sw t0,0(a3)
266 # cy = CARRYOUT(w0);
267 dsrl32 t2,t0,0
268 addiu a3,a3,4
269 # }
270 .M.6:
272 # while (cy) {
273 beq t2,zero,.M.1
274 nop
275 .M.7:
276 # mp_word w = (mp_word)*c + cy;
277 lwu a6,0(a3)
278 daddu t2,t2,a6
279 # *c++ = ACCUM(w);
280 sw t2,0(a3)
281 # cy = CARRYOUT(w);
282 dsrl32 t2,t2,0
283 bne t2,zero,.M.7
284 addiu a3,a3,4
286 # }
287 .M.1:
288 jr ra
289 nop
290 #}
291 #
292 .end s_mpv_mul_d_add_prop
294 .ent s_mpv_mul_d
295 .globl s_mpv_mul_d
297 s_mpv_mul_d:
298 #/* c = a * b */
299 #void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b,
300 # mp_digit *c)
301 #{
302 # mp_digit a0, a1; regs a4, a5
303 # mp_digit cy = 0; reg t2
304 # mp_word w0, w1; regs t0, t1
305 #
306 # if (a_len) {
307 beq a1,zero,.N.1
308 move t2,zero # cy = 0
309 dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
310 dsrl32 a2,a2,0 # This clears the upper 32 bits.
311 # a0 = a[0];
312 lwu a4,0(a0)
313 # w0 = ((mp_word)b * a0);
314 dmultu a2,a4
315 # if (--a_len) {
316 addiu a1,a1,-1
317 beq a1,zero,.N.2
318 # while (a_len >= 2) {
319 sltiu t3,a1,2
320 bne t3,zero,.N.3
321 # a1 = a[1];
322 lwu a5,4(a0)
323 .N.4:
324 # a_len -= 2;
325 addiu a1,a1,-2
326 # w0 += cy;
327 mflo t0
328 daddu t0,t0,t2
329 # cy = CARRYOUT(w0);
330 dsrl32 t2,t0,0
331 # w1 = (mp_word)b * a1;
332 dmultu a2,a5
333 # c[0] = ACCUM(w0);
334 sw t0,0(a3)
335 # a0 = a[2];
336 lwu a4,8(a0)
337 # a += 2;
338 addiu a0,a0,8
339 # w1 += cy;
340 mflo t1
341 daddu t1,t1,t2
342 # cy = CARRYOUT(w1);
343 dsrl32 t2,t1,0
344 # w0 = (mp_word)b * a0;
345 dmultu a2,a4
346 # c[1] = ACCUM(w1);
347 sw t1,4(a3)
348 # c += 2;
349 addiu a3,a3,8
350 sltiu t3,a1,2
351 beq t3,zero,.N.4
352 # a1 = a[1];
353 lwu a5,4(a0)
354 # }
355 .N.3:
356 # w0 += cy;
357 # if (a_len) {
358 mflo t0
359 beq a1,zero,.N.5
360 daddu t0,t0,t2
361 # w1 = (mp_word)b * a1;
362 dmultu a2,a5 #
363 # cy = CARRYOUT(w0);
364 dsrl32 t2,t0,0
365 # c[0] = ACCUM(w0);
366 sw t0,0(a3)
367 # w1 += cy;
368 mflo t1
369 daddu t1,t1,t2
370 # c[1] = ACCUM(w1);
371 sw t1,4(a3)
372 # cy = CARRYOUT(w1);
373 dsrl32 t2,t1,0
374 # c += 1;
375 b .N.6
376 addiu a3,a3,4
377 # } else {
378 .N.5:
379 # c[0] = ACCUM(w0);
380 sw t0,0(a3)
381 # cy = CARRYOUT(w0);
382 b .N.6
383 dsrl32 t2,t0,0
384 # }
385 # } else {
386 .N.2:
387 mflo t0
388 # c[0] = ACCUM(w0);
389 sw t0,0(a3)
390 # cy = CARRYOUT(w0);
391 dsrl32 t2,t0,0
392 # }
393 .N.6:
394 # c[1] = cy;
395 jr ra
396 sw t2,4(a3)
397 # }
398 .N.1:
399 jr ra
400 nop
401 #}
402 #
403 .end s_mpv_mul_d
406 .ent s_mpv_sqr_add_prop
407 .globl s_mpv_sqr_add_prop
408 #void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs);
409 # registers
410 # a0 *a
411 # a1 a_len
412 # a2 *sqr
413 # a3 digit from *a, a_i
414 # a4 square of digit from a
415 # a5,a6 next 2 digits in sqr
416 # a7,t0 carry
417 s_mpv_sqr_add_prop:
418 move a7,zero
419 move t0,zero
420 lwu a3,0(a0)
421 addiu a1,a1,-1 # --a_len
422 dmultu a3,a3
423 beq a1,zero,.P.3 # jump if we've already done the only sqr
424 addiu a0,a0,4 # ++a
425 .P.2:
426 lwu a5,0(a2)
427 lwu a6,4(a2)
428 addiu a2,a2,8 # sqrs += 2;
429 dsll32 a6,a6,0
430 daddu a5,a5,a6
431 lwu a3,0(a0)
432 addiu a0,a0,4 # ++a
433 mflo a4
434 daddu a6,a5,a4
435 sltu a7,a6,a5 # a7 = a6 < a5 detect overflow
436 dmultu a3,a3
437 daddu a4,a6,t0
438 sltu t0,a4,a6
439 add t0,t0,a7
440 sw a4,-8(a2)
441 addiu a1,a1,-1 # --a_len
442 dsrl32 a4,a4,0
443 bne a1,zero,.P.2 # loop if a_len > 0
444 sw a4,-4(a2)
445 .P.3:
446 lwu a5,0(a2)
447 lwu a6,4(a2)
448 addiu a2,a2,8 # sqrs += 2;
449 dsll32 a6,a6,0
450 daddu a5,a5,a6
451 mflo a4
452 daddu a6,a5,a4
453 sltu a7,a6,a5 # a7 = a6 < a5 detect overflow
454 daddu a4,a6,t0
455 sltu t0,a4,a6
456 add t0,t0,a7
457 sw a4,-8(a2)
458 beq t0,zero,.P.9 # jump if no carry
459 dsrl32 a4,a4,0
460 .P.8:
461 sw a4,-4(a2)
462 /* propagate final carry */
463 lwu a5,0(a2)
464 daddu a6,a5,t0
465 sltu t0,a6,a5
466 bne t0,zero,.P.8 # loop if carry persists
467 addiu a2,a2,4 # sqrs++
468 .P.9:
469 jr ra
470 sw a4,-4(a2)
472 .end s_mpv_sqr_add_prop