Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 #
2 # This Source Code Form is subject to the terms of the Mozilla Public
3 # License, v. 2.0. If a copy of the MPL was not distributed with this
4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 .data
7 .align 4
8 #
9 # -1 means to call _s_mpi_is_sse to determine if we support sse
10 # instructions.
11 # 0 means to use x86 instructions
12 # 1 means to use sse2 instructions
13 .type is_sse,@object
14 .size is_sse,4
15 is_sse: .long -1
17 #
18 # sigh, handle the difference between -fPIC and not PIC
19 # default to pic, since this file seems to be exclusively
20 # linux right now (solaris uses mpi_i86pc.s and windows uses
21 # mpi_x86_asm.c)
22 #
23 #.ifndef NO_PIC
24 #.macro GET var,reg
25 # movl \var@GOTOFF(%ebx),\reg
26 #.endm
27 #.macro PUT reg,var
28 # movl \reg,\var@GOTOFF(%ebx)
29 #.endm
30 #.else
31 .macro GET var,reg
32 movl \var,\reg
33 .endm
34 .macro PUT reg,var
35 movl \reg,\var
36 .endm
37 #.endif
39 .text
42 # ebp - 36: caller's esi
43 # ebp - 32: caller's edi
44 # ebp - 28:
45 # ebp - 24:
46 # ebp - 20:
47 # ebp - 16:
48 # ebp - 12:
49 # ebp - 8:
50 # ebp - 4:
51 # ebp + 0: caller's ebp
52 # ebp + 4: return address
53 # ebp + 8: a argument
54 # ebp + 12: a_len argument
55 # ebp + 16: b argument
56 # ebp + 20: c argument
57 # registers:
58 # eax:
59 # ebx: carry
60 # ecx: a_len
61 # edx:
62 # esi: a ptr
63 # edi: c ptr
64 .globl _s_mpv_mul_d
65 .type _s_mpv_mul_d,@function
66 _s_mpv_mul_d:
67 GET is_sse,%eax
68 cmp $0,%eax
69 je _s_mpv_mul_d_x86
70 jg _s_mpv_mul_d_sse2
71 call _s_mpi_is_sse2
72 PUT %eax,is_sse
73 cmp $0,%eax
74 jg _s_mpv_mul_d_sse2
75 _s_mpv_mul_d_x86:
76 push %ebp
77 mov %esp,%ebp
78 sub $28,%esp
79 push %edi
80 push %esi
81 push %ebx
82 movl $0,%ebx # carry = 0
83 mov 12(%ebp),%ecx # ecx = a_len
84 mov 20(%ebp),%edi
85 cmp $0,%ecx
86 je 2f # jmp if a_len == 0
87 mov 8(%ebp),%esi # esi = a
88 cld
89 1:
90 lodsl # eax = [ds:esi]; esi += 4
91 mov 16(%ebp),%edx # edx = b
92 mull %edx # edx:eax = Phi:Plo = a_i * b
94 add %ebx,%eax # add carry (%ebx) to edx:eax
95 adc $0,%edx
96 mov %edx,%ebx # high half of product becomes next carry
98 stosl # [es:edi] = ax; edi += 4;
99 dec %ecx # --a_len
100 jnz 1b # jmp if a_len != 0
101 2:
102 mov %ebx,0(%edi) # *c = carry
103 pop %ebx
104 pop %esi
105 pop %edi
106 leave
107 ret
108 nop
109 _s_mpv_mul_d_sse2:
110 push %ebp
111 mov %esp,%ebp
112 push %edi
113 push %esi
114 psubq %mm2,%mm2 # carry = 0
115 mov 12(%ebp),%ecx # ecx = a_len
116 movd 16(%ebp),%mm1 # mm1 = b
117 mov 20(%ebp),%edi
118 cmp $0,%ecx
119 je 6f # jmp if a_len == 0
120 mov 8(%ebp),%esi # esi = a
121 cld
122 5:
123 movd 0(%esi),%mm0 # mm0 = *a++
124 add $4,%esi
125 pmuludq %mm1,%mm0 # mm0 = b * *a++
126 paddq %mm0,%mm2 # add the carry
127 movd %mm2,0(%edi) # store the 32bit result
128 add $4,%edi
129 psrlq $32, %mm2 # save the carry
130 dec %ecx # --a_len
131 jnz 5b # jmp if a_len != 0
132 6:
133 movd %mm2,0(%edi) # *c = carry
134 emms
135 pop %esi
136 pop %edi
137 leave
138 ret
139 nop
141 # ebp - 36: caller's esi
142 # ebp - 32: caller's edi
143 # ebp - 28:
144 # ebp - 24:
145 # ebp - 20:
146 # ebp - 16:
147 # ebp - 12:
148 # ebp - 8:
149 # ebp - 4:
150 # ebp + 0: caller's ebp
151 # ebp + 4: return address
152 # ebp + 8: a argument
153 # ebp + 12: a_len argument
154 # ebp + 16: b argument
155 # ebp + 20: c argument
156 # registers:
157 # eax:
158 # ebx: carry
159 # ecx: a_len
160 # edx:
161 # esi: a ptr
162 # edi: c ptr
163 .globl _s_mpv_mul_d_add
164 .type _s_mpv_mul_d_add,@function
165 _s_mpv_mul_d_add:
166 GET is_sse,%eax
167 cmp $0,%eax
168 je _s_mpv_mul_d_add_x86
169 jg _s_mpv_mul_d_add_sse2
170 call _s_mpi_is_sse2
171 PUT %eax,is_sse
172 cmp $0,%eax
173 jg _s_mpv_mul_d_add_sse2
174 _s_mpv_mul_d_add_x86:
175 push %ebp
176 mov %esp,%ebp
177 sub $28,%esp
178 push %edi
179 push %esi
180 push %ebx
181 movl $0,%ebx # carry = 0
182 mov 12(%ebp),%ecx # ecx = a_len
183 mov 20(%ebp),%edi
184 cmp $0,%ecx
185 je 11f # jmp if a_len == 0
186 mov 8(%ebp),%esi # esi = a
187 cld
188 10:
189 lodsl # eax = [ds:esi]; esi += 4
190 mov 16(%ebp),%edx # edx = b
191 mull %edx # edx:eax = Phi:Plo = a_i * b
193 add %ebx,%eax # add carry (%ebx) to edx:eax
194 adc $0,%edx
195 mov 0(%edi),%ebx # add in current word from *c
196 add %ebx,%eax
197 adc $0,%edx
198 mov %edx,%ebx # high half of product becomes next carry
200 stosl # [es:edi] = ax; edi += 4;
201 dec %ecx # --a_len
202 jnz 10b # jmp if a_len != 0
203 11:
204 mov %ebx,0(%edi) # *c = carry
205 pop %ebx
206 pop %esi
207 pop %edi
208 leave
209 ret
210 nop
211 _s_mpv_mul_d_add_sse2:
212 push %ebp
213 mov %esp,%ebp
214 push %edi
215 push %esi
216 psubq %mm2,%mm2 # carry = 0
217 mov 12(%ebp),%ecx # ecx = a_len
218 movd 16(%ebp),%mm1 # mm1 = b
219 mov 20(%ebp),%edi
220 cmp $0,%ecx
221 je 16f # jmp if a_len == 0
222 mov 8(%ebp),%esi # esi = a
223 cld
224 15:
225 movd 0(%esi),%mm0 # mm0 = *a++
226 add $4,%esi
227 pmuludq %mm1,%mm0 # mm0 = b * *a++
228 paddq %mm0,%mm2 # add the carry
229 movd 0(%edi),%mm0
230 paddq %mm0,%mm2 # add the carry
231 movd %mm2,0(%edi) # store the 32bit result
232 add $4,%edi
233 psrlq $32, %mm2 # save the carry
234 dec %ecx # --a_len
235 jnz 15b # jmp if a_len != 0
236 16:
237 movd %mm2,0(%edi) # *c = carry
238 emms
239 pop %esi
240 pop %edi
241 leave
242 ret
243 nop
245 # ebp - 8: caller's esi
246 # ebp - 4: caller's edi
247 # ebp + 0: caller's ebp
248 # ebp + 4: return address
249 # ebp + 8: a argument
250 # ebp + 12: a_len argument
251 # ebp + 16: b argument
252 # ebp + 20: c argument
253 # registers:
254 # eax:
255 # ebx: carry
256 # ecx: a_len
257 # edx:
258 # esi: a ptr
259 # edi: c ptr
260 .globl _s_mpv_mul_d_add_prop
261 .type _s_mpv_mul_d_add_prop,@function
262 _s_mpv_mul_d_add_prop:
263 GET is_sse,%eax
264 cmp $0,%eax
265 je _s_mpv_mul_d_add_prop_x86
266 jg _s_mpv_mul_d_add_prop_sse2
267 call _s_mpi_is_sse2
268 PUT %eax,is_sse
269 cmp $0,%eax
270 jg _s_mpv_mul_d_add_prop_sse2
271 _s_mpv_mul_d_add_prop_x86:
272 push %ebp
273 mov %esp,%ebp
274 sub $28,%esp
275 push %edi
276 push %esi
277 push %ebx
278 movl $0,%ebx # carry = 0
279 mov 12(%ebp),%ecx # ecx = a_len
280 mov 20(%ebp),%edi
281 cmp $0,%ecx
282 je 21f # jmp if a_len == 0
283 cld
284 mov 8(%ebp),%esi # esi = a
285 20:
286 lodsl # eax = [ds:esi]; esi += 4
287 mov 16(%ebp),%edx # edx = b
288 mull %edx # edx:eax = Phi:Plo = a_i * b
290 add %ebx,%eax # add carry (%ebx) to edx:eax
291 adc $0,%edx
292 mov 0(%edi),%ebx # add in current word from *c
293 add %ebx,%eax
294 adc $0,%edx
295 mov %edx,%ebx # high half of product becomes next carry
297 stosl # [es:edi] = ax; edi += 4;
298 dec %ecx # --a_len
299 jnz 20b # jmp if a_len != 0
300 21:
301 cmp $0,%ebx # is carry zero?
302 jz 23f
303 mov 0(%edi),%eax # add in current word from *c
304 add %ebx,%eax
305 stosl # [es:edi] = ax; edi += 4;
306 jnc 23f
307 22:
308 mov 0(%edi),%eax # add in current word from *c
309 adc $0,%eax
310 stosl # [es:edi] = ax; edi += 4;
311 jc 22b
312 23:
313 pop %ebx
314 pop %esi
315 pop %edi
316 leave
317 ret
318 nop
319 _s_mpv_mul_d_add_prop_sse2:
320 push %ebp
321 mov %esp,%ebp
322 push %edi
323 push %esi
324 push %ebx
325 psubq %mm2,%mm2 # carry = 0
326 mov 12(%ebp),%ecx # ecx = a_len
327 movd 16(%ebp),%mm1 # mm1 = b
328 mov 20(%ebp),%edi
329 cmp $0,%ecx
330 je 26f # jmp if a_len == 0
331 mov 8(%ebp),%esi # esi = a
332 cld
333 25:
334 movd 0(%esi),%mm0 # mm0 = *a++
335 movd 0(%edi),%mm3 # fetch the sum
336 add $4,%esi
337 pmuludq %mm1,%mm0 # mm0 = b * *a++
338 paddq %mm0,%mm2 # add the carry
339 paddq %mm3,%mm2 # add *c++
340 movd %mm2,0(%edi) # store the 32bit result
341 add $4,%edi
342 psrlq $32, %mm2 # save the carry
343 dec %ecx # --a_len
344 jnz 25b # jmp if a_len != 0
345 26:
346 movd %mm2,%ebx
347 cmp $0,%ebx # is carry zero?
348 jz 28f
349 mov 0(%edi),%eax
350 add %ebx, %eax
351 stosl
352 jnc 28f
353 27:
354 mov 0(%edi),%eax # add in current word from *c
355 adc $0,%eax
356 stosl # [es:edi] = ax; edi += 4;
357 jc 27b
358 28:
359 emms
360 pop %ebx
361 pop %esi
362 pop %edi
363 leave
364 ret
365 nop
368 # ebp - 20: caller's esi
369 # ebp - 16: caller's edi
370 # ebp - 12:
371 # ebp - 8: carry
372 # ebp - 4: a_len local
373 # ebp + 0: caller's ebp
374 # ebp + 4: return address
375 # ebp + 8: pa argument
376 # ebp + 12: a_len argument
377 # ebp + 16: ps argument
378 # ebp + 20:
379 # registers:
380 # eax:
381 # ebx: carry
382 # ecx: a_len
383 # edx:
384 # esi: a ptr
385 # edi: c ptr
387 .globl _s_mpv_sqr_add_prop
388 .type _s_mpv_sqr_add_prop,@function
389 _s_mpv_sqr_add_prop:
390 GET is_sse,%eax
391 cmp $0,%eax
392 je _s_mpv_sqr_add_prop_x86
393 jg _s_mpv_sqr_add_prop_sse2
394 call _s_mpi_is_sse2
395 PUT %eax,is_sse
396 cmp $0,%eax
397 jg _s_mpv_sqr_add_prop_sse2
398 _s_mpv_sqr_add_prop_x86:
399 push %ebp
400 mov %esp,%ebp
401 sub $12,%esp
402 push %edi
403 push %esi
404 push %ebx
405 movl $0,%ebx # carry = 0
406 mov 12(%ebp),%ecx # a_len
407 mov 16(%ebp),%edi # edi = ps
408 cmp $0,%ecx
409 je 31f # jump if a_len == 0
410 cld
411 mov 8(%ebp),%esi # esi = pa
412 30:
413 lodsl # %eax = [ds:si]; si += 4;
414 mull %eax
416 add %ebx,%eax # add "carry"
417 adc $0,%edx
418 mov 0(%edi),%ebx
419 add %ebx,%eax # add low word from result
420 mov 4(%edi),%ebx
421 stosl # [es:di] = %eax; di += 4;
422 adc %ebx,%edx # add high word from result
423 movl $0,%ebx
424 mov %edx,%eax
425 adc $0,%ebx
426 stosl # [es:di] = %eax; di += 4;
427 dec %ecx # --a_len
428 jnz 30b # jmp if a_len != 0
429 31:
430 cmp $0,%ebx # is carry zero?
431 jz 34f
432 mov 0(%edi),%eax # add in current word from *c
433 add %ebx,%eax
434 stosl # [es:edi] = ax; edi += 4;
435 jnc 34f
436 32:
437 mov 0(%edi),%eax # add in current word from *c
438 adc $0,%eax
439 stosl # [es:edi] = ax; edi += 4;
440 jc 32b
441 34:
442 pop %ebx
443 pop %esi
444 pop %edi
445 leave
446 ret
447 nop
448 _s_mpv_sqr_add_prop_sse2:
449 push %ebp
450 mov %esp,%ebp
451 push %edi
452 push %esi
453 push %ebx
454 psubq %mm2,%mm2 # carry = 0
455 mov 12(%ebp),%ecx # ecx = a_len
456 mov 16(%ebp),%edi
457 cmp $0,%ecx
458 je 36f # jmp if a_len == 0
459 mov 8(%ebp),%esi # esi = a
460 cld
461 35:
462 movd 0(%esi),%mm0 # mm0 = *a
463 movd 0(%edi),%mm3 # fetch the sum
464 add $4,%esi
465 pmuludq %mm0,%mm0 # mm0 = sqr(a)
466 paddq %mm0,%mm2 # add the carry
467 paddq %mm3,%mm2 # add the low word
468 movd 4(%edi),%mm3
469 movd %mm2,0(%edi) # store the 32bit result
470 psrlq $32, %mm2
471 paddq %mm3,%mm2 # add the high word
472 movd %mm2,4(%edi) # store the 32bit result
473 psrlq $32, %mm2 # save the carry.
474 add $8,%edi
475 dec %ecx # --a_len
476 jnz 35b # jmp if a_len != 0
477 36:
478 movd %mm2,%ebx
479 cmp $0,%ebx # is carry zero?
480 jz 38f
481 mov 0(%edi),%eax
482 add %ebx, %eax
483 stosl
484 jnc 38f
485 37:
486 mov 0(%edi),%eax # add in current word from *c
487 adc $0,%eax
488 stosl # [es:edi] = ax; edi += 4;
489 jc 37b
490 38:
491 emms
492 pop %ebx
493 pop %esi
494 pop %edi
495 leave
496 ret
497 nop
499 #
500 # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
501 # so its high bit is 1. This code is from NSPR.
502 #
503 # mp_err _s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
504 # mp_digit *qp, mp_digit *rp)
506 # esp + 0: Caller's ebx
507 # esp + 4: return address
508 # esp + 8: Nhi argument
509 # esp + 12: Nlo argument
510 # esp + 16: divisor argument
511 # esp + 20: qp argument
512 # esp + 24: rp argument
513 # registers:
514 # eax:
515 # ebx: carry
516 # ecx: a_len
517 # edx:
518 # esi: a ptr
519 # edi: c ptr
520 #
522 .globl _s_mpv_div_2dx1d
523 .type _s_mpv_div_2dx1d,@function
524 _s_mpv_div_2dx1d:
525 push %ebx
526 mov 8(%esp),%edx
527 mov 12(%esp),%eax
528 mov 16(%esp),%ebx
529 div %ebx
530 mov 20(%esp),%ebx
531 mov %eax,0(%ebx)
532 mov 24(%esp),%ebx
533 mov %edx,0(%ebx)
534 xor %eax,%eax # return zero
535 pop %ebx
536 ret
537 nop