security/nss/lib/freebl/mpi/mpi_sse2.s

changeset 0
6474c204b198
equal deleted inserted replaced
-1:000000000000 0:25ccf17d01ce
1 # This Source Code Form is subject to the terms of the Mozilla Public
2 # License, v. 2.0. If a copy of the MPL was not distributed with this
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5 #ifdef DARWIN
6 #define s_mpv_mul_d _s_mpv_mul_d
7 #define s_mpv_mul_d_add _s_mpv_mul_d_add
8 #define s_mpv_mul_d_add_prop _s_mpv_mul_d_add_prop
9 #define s_mpv_sqr_add_prop _s_mpv_sqr_add_prop
10 #define s_mpv_div_2dx1d _s_mpv_div_2dx1d
11 #define TYPE_FUNCTION(x)
12 #else
13 #define TYPE_FUNCTION(x) .type x, @function
14 #endif
15
16 .text
17
18 # ebp - 8: caller's esi
19 # ebp - 4: caller's edi
20 # ebp + 0: caller's ebp
21 # ebp + 4: return address
22 # ebp + 8: a argument
23 # ebp + 12: a_len argument
24 # ebp + 16: b argument
25 # ebp + 20: c argument
26 # registers:
27 # ebx:
28 # ecx: a_len
29 # esi: a ptr
30 # edi: c ptr
31 .globl s_mpv_mul_d
32 .private_extern s_mpv_mul_d
33 TYPE_FUNCTION(s_mpv_mul_d)
34 s_mpv_mul_d:
35 push %ebp
36 mov %esp, %ebp
37 push %edi
38 push %esi
39 psubq %mm2, %mm2 # carry = 0
40 mov 12(%ebp), %ecx # ecx = a_len
41 movd 16(%ebp), %mm1 # mm1 = b
42 mov 20(%ebp), %edi
43 cmp $0, %ecx
44 je 2f # jmp if a_len == 0
45 mov 8(%ebp), %esi # esi = a
46 cld
47 1:
48 movd 0(%esi), %mm0 # mm0 = *a++
49 add $4, %esi
50 pmuludq %mm1, %mm0 # mm0 = b * *a++
51 paddq %mm0, %mm2 # add the carry
52 movd %mm2, 0(%edi) # store the 32bit result
53 add $4, %edi
54 psrlq $32, %mm2 # save the carry
55 dec %ecx # --a_len
56 jnz 1b # jmp if a_len != 0
57 2:
58 movd %mm2, 0(%edi) # *c = carry
59 emms
60 pop %esi
61 pop %edi
62 leave
63 ret
64 nop
65
66 # ebp - 8: caller's esi
67 # ebp - 4: caller's edi
68 # ebp + 0: caller's ebp
69 # ebp + 4: return address
70 # ebp + 8: a argument
71 # ebp + 12: a_len argument
72 # ebp + 16: b argument
73 # ebp + 20: c argument
74 # registers:
75 # ebx:
76 # ecx: a_len
77 # esi: a ptr
78 # edi: c ptr
79 .globl s_mpv_mul_d_add
80 .private_extern s_mpv_mul_d_add
81 TYPE_FUNCTION(s_mpv_mul_d_add)
82 s_mpv_mul_d_add:
83 push %ebp
84 mov %esp, %ebp
85 push %edi
86 push %esi
87 psubq %mm2, %mm2 # carry = 0
88 mov 12(%ebp), %ecx # ecx = a_len
89 movd 16(%ebp), %mm1 # mm1 = b
90 mov 20(%ebp), %edi
91 cmp $0, %ecx
92 je 2f # jmp if a_len == 0
93 mov 8(%ebp), %esi # esi = a
94 cld
95 1:
96 movd 0(%esi), %mm0 # mm0 = *a++
97 add $4, %esi
98 pmuludq %mm1, %mm0 # mm0 = b * *a++
99 paddq %mm0, %mm2 # add the carry
100 movd 0(%edi), %mm0
101 paddq %mm0, %mm2 # add the carry
102 movd %mm2, 0(%edi) # store the 32bit result
103 add $4, %edi
104 psrlq $32, %mm2 # save the carry
105 dec %ecx # --a_len
106 jnz 1b # jmp if a_len != 0
107 2:
108 movd %mm2, 0(%edi) # *c = carry
109 emms
110 pop %esi
111 pop %edi
112 leave
113 ret
114 nop
115
116 # ebp - 12: caller's ebx
117 # ebp - 8: caller's esi
118 # ebp - 4: caller's edi
119 # ebp + 0: caller's ebp
120 # ebp + 4: return address
121 # ebp + 8: a argument
122 # ebp + 12: a_len argument
123 # ebp + 16: b argument
124 # ebp + 20: c argument
125 # registers:
126 # eax:
127 # ebx: carry
128 # ecx: a_len
129 # esi: a ptr
130 # edi: c ptr
131 .globl s_mpv_mul_d_add_prop
132 .private_extern s_mpv_mul_d_add_prop
133 TYPE_FUNCTION(s_mpv_mul_d_add_prop)
134 s_mpv_mul_d_add_prop:
135 push %ebp
136 mov %esp, %ebp
137 push %edi
138 push %esi
139 push %ebx
140 psubq %mm2, %mm2 # carry = 0
141 mov 12(%ebp), %ecx # ecx = a_len
142 movd 16(%ebp), %mm1 # mm1 = b
143 mov 20(%ebp), %edi
144 cmp $0, %ecx
145 je 2f # jmp if a_len == 0
146 mov 8(%ebp), %esi # esi = a
147 cld
148 1:
149 movd 0(%esi), %mm0 # mm0 = *a++
150 movd 0(%edi), %mm3 # fetch the sum
151 add $4, %esi
152 pmuludq %mm1, %mm0 # mm0 = b * *a++
153 paddq %mm0, %mm2 # add the carry
154 paddq %mm3, %mm2 # add *c++
155 movd %mm2, 0(%edi) # store the 32bit result
156 add $4, %edi
157 psrlq $32, %mm2 # save the carry
158 dec %ecx # --a_len
159 jnz 1b # jmp if a_len != 0
160 2:
161 movd %mm2, %ebx
162 cmp $0, %ebx # is carry zero?
163 jz 4f
164 mov 0(%edi), %eax
165 add %ebx, %eax
166 stosl
167 jnc 4f
168 3:
169 mov 0(%edi), %eax # add in current word from *c
170 adc $0, %eax
171 stosl # [es:edi] = ax; edi += 4;
172 jc 3b
173 4:
174 emms
175 pop %ebx
176 pop %esi
177 pop %edi
178 leave
179 ret
180 nop
181
182 # ebp - 12: caller's ebx
183 # ebp - 8: caller's esi
184 # ebp - 4: caller's edi
185 # ebp + 0: caller's ebp
186 # ebp + 4: return address
187 # ebp + 8: pa argument
188 # ebp + 12: a_len argument
189 # ebp + 16: ps argument
190 # registers:
191 # eax:
192 # ebx: carry
193 # ecx: a_len
194 # esi: a ptr
195 # edi: c ptr
196 .globl s_mpv_sqr_add_prop
197 .private_extern s_mpv_sqr_add_prop
198 TYPE_FUNCTION(s_mpv_sqr_add_prop)
199 s_mpv_sqr_add_prop:
200 push %ebp
201 mov %esp, %ebp
202 push %edi
203 push %esi
204 push %ebx
205 psubq %mm2, %mm2 # carry = 0
206 mov 12(%ebp), %ecx # ecx = a_len
207 mov 16(%ebp), %edi
208 cmp $0, %ecx
209 je 2f # jmp if a_len == 0
210 mov 8(%ebp), %esi # esi = a
211 cld
212 1:
213 movd 0(%esi), %mm0 # mm0 = *a
214 movd 0(%edi), %mm3 # fetch the sum
215 add $4, %esi
216 pmuludq %mm0, %mm0 # mm0 = sqr(a)
217 paddq %mm0, %mm2 # add the carry
218 paddq %mm3, %mm2 # add the low word
219 movd 4(%edi), %mm3
220 movd %mm2, 0(%edi) # store the 32bit result
221 psrlq $32, %mm2
222 paddq %mm3, %mm2 # add the high word
223 movd %mm2, 4(%edi) # store the 32bit result
224 psrlq $32, %mm2 # save the carry.
225 add $8, %edi
226 dec %ecx # --a_len
227 jnz 1b # jmp if a_len != 0
228 2:
229 movd %mm2, %ebx
230 cmp $0, %ebx # is carry zero?
231 jz 4f
232 mov 0(%edi), %eax
233 add %ebx, %eax
234 stosl
235 jnc 4f
236 3:
237 mov 0(%edi), %eax # add in current word from *c
238 adc $0, %eax
239 stosl # [es:edi] = ax; edi += 4;
240 jc 3b
241 4:
242 emms
243 pop %ebx
244 pop %esi
245 pop %edi
246 leave
247 ret
248 nop
249
250 #
251 # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
252 # so its high bit is 1. This code is from NSPR.
253 #
254 # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
255 # mp_digit *qp, mp_digit *rp)
256
257 # esp + 0: Caller's ebx
258 # esp + 4: return address
259 # esp + 8: Nhi argument
260 # esp + 12: Nlo argument
261 # esp + 16: divisor argument
262 # esp + 20: qp argument
263 # esp + 24: rp argument
264 # registers:
265 # eax:
266 # ebx: carry
267 # ecx: a_len
268 # edx:
269 # esi: a ptr
270 # edi: c ptr
271 #
272 .globl s_mpv_div_2dx1d
273 .private_extern s_mpv_div_2dx1d
274 TYPE_FUNCTION(s_mpv_div_2dx1d)
275 s_mpv_div_2dx1d:
276 push %ebx
277 mov 8(%esp), %edx
278 mov 12(%esp), %eax
279 mov 16(%esp), %ebx
280 div %ebx
281 mov 20(%esp), %ebx
282 mov %eax, 0(%ebx)
283 mov 24(%esp), %ebx
284 mov %edx, 0(%ebx)
285 xor %eax, %eax # return zero
286 pop %ebx
287 ret
288 nop
289
290 #ifndef DARWIN
291 # Magic indicating no need for an executable stack
292 .section .note.GNU-stack, "", @progbits
293 .previous
294 #endif

mercurial