|
1 # This Source Code Form is subject to the terms of the Mozilla Public |
|
2 # License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
4 |
|
5 #ifdef DARWIN |
|
6 #define s_mpv_mul_d _s_mpv_mul_d |
|
7 #define s_mpv_mul_d_add _s_mpv_mul_d_add |
|
8 #define s_mpv_mul_d_add_prop _s_mpv_mul_d_add_prop |
|
9 #define s_mpv_sqr_add_prop _s_mpv_sqr_add_prop |
|
10 #define s_mpv_div_2dx1d _s_mpv_div_2dx1d |
|
11 #define TYPE_FUNCTION(x) |
|
12 #else |
|
13 #define TYPE_FUNCTION(x) .type x, @function |
|
14 #endif |
|
15 |
|
16 .text |
|
17 |
|
18 # ebp - 8: caller's esi |
|
19 # ebp - 4: caller's edi |
|
20 # ebp + 0: caller's ebp |
|
21 # ebp + 4: return address |
|
22 # ebp + 8: a argument |
|
23 # ebp + 12: a_len argument |
|
24 # ebp + 16: b argument |
|
25 # ebp + 20: c argument |
|
26 # registers: |
|
27 # ebx: |
|
28 # ecx: a_len |
|
29 # esi: a ptr |
|
30 # edi: c ptr |
|
31 .globl s_mpv_mul_d |
|
32 .private_extern s_mpv_mul_d |
|
33 TYPE_FUNCTION(s_mpv_mul_d) |
|
34 s_mpv_mul_d: |
|
35 push %ebp |
|
36 mov %esp, %ebp |
|
37 push %edi |
|
38 push %esi |
|
39 psubq %mm2, %mm2 # carry = 0 |
|
40 mov 12(%ebp), %ecx # ecx = a_len |
|
41 movd 16(%ebp), %mm1 # mm1 = b |
|
42 mov 20(%ebp), %edi |
|
43 cmp $0, %ecx |
|
44 je 2f # jmp if a_len == 0 |
|
45 mov 8(%ebp), %esi # esi = a |
|
46 cld |
|
47 1: |
|
48 movd 0(%esi), %mm0 # mm0 = *a++ |
|
49 add $4, %esi |
|
50 pmuludq %mm1, %mm0 # mm0 = b * *a++ |
|
51 paddq %mm0, %mm2 # add the carry |
|
52 movd %mm2, 0(%edi) # store the 32bit result |
|
53 add $4, %edi |
|
54 psrlq $32, %mm2 # save the carry |
|
55 dec %ecx # --a_len |
|
56 jnz 1b # jmp if a_len != 0 |
|
57 2: |
|
58 movd %mm2, 0(%edi) # *c = carry |
|
59 emms |
|
60 pop %esi |
|
61 pop %edi |
|
62 leave |
|
63 ret |
|
64 nop |
|
65 |
|
66 # ebp - 8: caller's esi |
|
67 # ebp - 4: caller's edi |
|
68 # ebp + 0: caller's ebp |
|
69 # ebp + 4: return address |
|
70 # ebp + 8: a argument |
|
71 # ebp + 12: a_len argument |
|
72 # ebp + 16: b argument |
|
73 # ebp + 20: c argument |
|
74 # registers: |
|
75 # ebx: |
|
76 # ecx: a_len |
|
77 # esi: a ptr |
|
78 # edi: c ptr |
|
79 .globl s_mpv_mul_d_add |
|
80 .private_extern s_mpv_mul_d_add |
|
81 TYPE_FUNCTION(s_mpv_mul_d_add) |
|
82 s_mpv_mul_d_add: |
|
83 push %ebp |
|
84 mov %esp, %ebp |
|
85 push %edi |
|
86 push %esi |
|
87 psubq %mm2, %mm2 # carry = 0 |
|
88 mov 12(%ebp), %ecx # ecx = a_len |
|
89 movd 16(%ebp), %mm1 # mm1 = b |
|
90 mov 20(%ebp), %edi |
|
91 cmp $0, %ecx |
|
92 je 2f # jmp if a_len == 0 |
|
93 mov 8(%ebp), %esi # esi = a |
|
94 cld |
|
95 1: |
|
96 movd 0(%esi), %mm0 # mm0 = *a++ |
|
97 add $4, %esi |
|
98 pmuludq %mm1, %mm0 # mm0 = b * *a++ |
|
99 paddq %mm0, %mm2 # add the carry |
|
100 movd 0(%edi), %mm0 |
|
101 paddq %mm0, %mm2 # add the carry |
|
102 movd %mm2, 0(%edi) # store the 32bit result |
|
103 add $4, %edi |
|
104 psrlq $32, %mm2 # save the carry |
|
105 dec %ecx # --a_len |
|
106 jnz 1b # jmp if a_len != 0 |
|
107 2: |
|
108 movd %mm2, 0(%edi) # *c = carry |
|
109 emms |
|
110 pop %esi |
|
111 pop %edi |
|
112 leave |
|
113 ret |
|
114 nop |
|
115 |
|
116 # ebp - 12: caller's ebx |
|
117 # ebp - 8: caller's esi |
|
118 # ebp - 4: caller's edi |
|
119 # ebp + 0: caller's ebp |
|
120 # ebp + 4: return address |
|
121 # ebp + 8: a argument |
|
122 # ebp + 12: a_len argument |
|
123 # ebp + 16: b argument |
|
124 # ebp + 20: c argument |
|
125 # registers: |
|
126 # eax: |
|
127 # ebx: carry |
|
128 # ecx: a_len |
|
129 # esi: a ptr |
|
130 # edi: c ptr |
|
131 .globl s_mpv_mul_d_add_prop |
|
132 .private_extern s_mpv_mul_d_add_prop |
|
133 TYPE_FUNCTION(s_mpv_mul_d_add_prop) |
|
134 s_mpv_mul_d_add_prop: |
|
135 push %ebp |
|
136 mov %esp, %ebp |
|
137 push %edi |
|
138 push %esi |
|
139 push %ebx |
|
140 psubq %mm2, %mm2 # carry = 0 |
|
141 mov 12(%ebp), %ecx # ecx = a_len |
|
142 movd 16(%ebp), %mm1 # mm1 = b |
|
143 mov 20(%ebp), %edi |
|
144 cmp $0, %ecx |
|
145 je 2f # jmp if a_len == 0 |
|
146 mov 8(%ebp), %esi # esi = a |
|
147 cld |
|
148 1: |
|
149 movd 0(%esi), %mm0 # mm0 = *a++ |
|
150 movd 0(%edi), %mm3 # fetch the sum |
|
151 add $4, %esi |
|
152 pmuludq %mm1, %mm0 # mm0 = b * *a++ |
|
153 paddq %mm0, %mm2 # add the carry |
|
154 paddq %mm3, %mm2 # add *c++ |
|
155 movd %mm2, 0(%edi) # store the 32bit result |
|
156 add $4, %edi |
|
157 psrlq $32, %mm2 # save the carry |
|
158 dec %ecx # --a_len |
|
159 jnz 1b # jmp if a_len != 0 |
|
160 2: |
|
161 movd %mm2, %ebx |
|
162 cmp $0, %ebx # is carry zero? |
|
163 jz 4f |
|
164 mov 0(%edi), %eax |
|
165 add %ebx, %eax |
|
166 stosl |
|
167 jnc 4f |
|
168 3: |
|
169 mov 0(%edi), %eax # add in current word from *c |
|
170 adc $0, %eax |
|
171 stosl # [es:edi] = ax; edi += 4; |
|
172 jc 3b |
|
173 4: |
|
174 emms |
|
175 pop %ebx |
|
176 pop %esi |
|
177 pop %edi |
|
178 leave |
|
179 ret |
|
180 nop |
|
181 |
|
182 # ebp - 12: caller's ebx |
|
183 # ebp - 8: caller's esi |
|
184 # ebp - 4: caller's edi |
|
185 # ebp + 0: caller's ebp |
|
186 # ebp + 4: return address |
|
187 # ebp + 8: pa argument |
|
188 # ebp + 12: a_len argument |
|
189 # ebp + 16: ps argument |
|
190 # registers: |
|
191 # eax: |
|
192 # ebx: carry |
|
193 # ecx: a_len |
|
194 # esi: a ptr |
|
195 # edi: c ptr |
|
196 .globl s_mpv_sqr_add_prop |
|
197 .private_extern s_mpv_sqr_add_prop |
|
198 TYPE_FUNCTION(s_mpv_sqr_add_prop) |
|
199 s_mpv_sqr_add_prop: |
|
200 push %ebp |
|
201 mov %esp, %ebp |
|
202 push %edi |
|
203 push %esi |
|
204 push %ebx |
|
205 psubq %mm2, %mm2 # carry = 0 |
|
206 mov 12(%ebp), %ecx # ecx = a_len |
|
207 mov 16(%ebp), %edi |
|
208 cmp $0, %ecx |
|
209 je 2f # jmp if a_len == 0 |
|
210 mov 8(%ebp), %esi # esi = a |
|
211 cld |
|
212 1: |
|
213 movd 0(%esi), %mm0 # mm0 = *a |
|
214 movd 0(%edi), %mm3 # fetch the sum |
|
215 add $4, %esi |
|
216 pmuludq %mm0, %mm0 # mm0 = sqr(a) |
|
217 paddq %mm0, %mm2 # add the carry |
|
218 paddq %mm3, %mm2 # add the low word |
|
219 movd 4(%edi), %mm3 |
|
220 movd %mm2, 0(%edi) # store the 32bit result |
|
221 psrlq $32, %mm2 |
|
222 paddq %mm3, %mm2 # add the high word |
|
223 movd %mm2, 4(%edi) # store the 32bit result |
|
224 psrlq $32, %mm2 # save the carry. |
|
225 add $8, %edi |
|
226 dec %ecx # --a_len |
|
227 jnz 1b # jmp if a_len != 0 |
|
228 2: |
|
229 movd %mm2, %ebx |
|
230 cmp $0, %ebx # is carry zero? |
|
231 jz 4f |
|
232 mov 0(%edi), %eax |
|
233 add %ebx, %eax |
|
234 stosl |
|
235 jnc 4f |
|
236 3: |
|
237 mov 0(%edi), %eax # add in current word from *c |
|
238 adc $0, %eax |
|
239 stosl # [es:edi] = ax; edi += 4; |
|
240 jc 3b |
|
241 4: |
|
242 emms |
|
243 pop %ebx |
|
244 pop %esi |
|
245 pop %edi |
|
246 leave |
|
247 ret |
|
248 nop |
|
249 |
|
250 # |
|
251 # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized |
|
252 # so its high bit is 1. This code is from NSPR. |
|
253 # |
|
254 # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, |
|
255 # mp_digit *qp, mp_digit *rp) |
|
256 |
|
257 # esp + 0: Caller's ebx |
|
258 # esp + 4: return address |
|
259 # esp + 8: Nhi argument |
|
260 # esp + 12: Nlo argument |
|
261 # esp + 16: divisor argument |
|
262 # esp + 20: qp argument |
|
263 # esp + 24: rp argument |
|
264 # registers: |
|
265 # eax: |
|
266 # ebx: carry |
|
267 # ecx: a_len |
|
268 # edx: |
|
269 # esi: a ptr |
|
270 # edi: c ptr |
|
271 # |
|
272 .globl s_mpv_div_2dx1d |
|
273 .private_extern s_mpv_div_2dx1d |
|
274 TYPE_FUNCTION(s_mpv_div_2dx1d) |
|
275 s_mpv_div_2dx1d: |
|
276 push %ebx |
|
277 mov 8(%esp), %edx |
|
278 mov 12(%esp), %eax |
|
279 mov 16(%esp), %ebx |
|
280 div %ebx |
|
281 mov 20(%esp), %ebx |
|
282 mov %eax, 0(%ebx) |
|
283 mov 24(%esp), %ebx |
|
284 mov %edx, 0(%ebx) |
|
285 xor %eax, %eax # return zero |
|
286 pop %ebx |
|
287 ret |
|
288 nop |
|
289 |
|
290 #ifndef DARWIN |
|
291 # Magic indicating no need for an executable stack |
|
292 .section .note.GNU-stack, "", @progbits |
|
293 .previous |
|
294 #endif |