Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | # |
michael@0 | 2 | # This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
michael@0 | 5 | |
michael@0 | 6 | .data |
michael@0 | 7 | .align 4 |
michael@0 | 8 | # |
michael@0 | 9 | # -1 means to call _s_mpi_is_sse to determine if we support sse |
michael@0 | 10 | # instructions. |
michael@0 | 11 | # 0 means to use x86 instructions |
michael@0 | 12 | # 1 means to use sse2 instructions |
michael@0 | 13 | .type is_sse,@object |
michael@0 | 14 | .size is_sse,4 |
michael@0 | 15 | is_sse: .long -1 |
michael@0 | 16 | |
michael@0 | 17 | # |
michael@0 | 18 | # sigh, handle the difference between -fPIC and not PIC |
michael@0 | 19 | # default to pic, since this file seems to be exclusively |
michael@0 | 20 | # linux right now (solaris uses mpi_i86pc.s and windows uses |
michael@0 | 21 | # mpi_x86_asm.c) |
michael@0 | 22 | # |
michael@0 | 23 | #.ifndef NO_PIC |
michael@0 | 24 | #.macro GET var,reg |
michael@0 | 25 | # movl \var@GOTOFF(%ebx),\reg |
michael@0 | 26 | #.endm |
michael@0 | 27 | #.macro PUT reg,var |
michael@0 | 28 | # movl \reg,\var@GOTOFF(%ebx) |
michael@0 | 29 | #.endm |
michael@0 | 30 | #.else |
michael@0 | 31 | .macro GET var,reg |
michael@0 | 32 | movl \var,\reg |
michael@0 | 33 | .endm |
michael@0 | 34 | .macro PUT reg,var |
michael@0 | 35 | movl \reg,\var |
michael@0 | 36 | .endm |
michael@0 | 37 | #.endif |
michael@0 | 38 | |
michael@0 | 39 | .text |
michael@0 | 40 | |
michael@0 | 41 | |
michael@0 | 42 | # ebp - 36: caller's esi |
michael@0 | 43 | # ebp - 32: caller's edi |
michael@0 | 44 | # ebp - 28: |
michael@0 | 45 | # ebp - 24: |
michael@0 | 46 | # ebp - 20: |
michael@0 | 47 | # ebp - 16: |
michael@0 | 48 | # ebp - 12: |
michael@0 | 49 | # ebp - 8: |
michael@0 | 50 | # ebp - 4: |
michael@0 | 51 | # ebp + 0: caller's ebp |
michael@0 | 52 | # ebp + 4: return address |
michael@0 | 53 | # ebp + 8: a argument |
michael@0 | 54 | # ebp + 12: a_len argument |
michael@0 | 55 | # ebp + 16: b argument |
michael@0 | 56 | # ebp + 20: c argument |
michael@0 | 57 | # registers: |
michael@0 | 58 | # eax: |
michael@0 | 59 | # ebx: carry |
michael@0 | 60 | # ecx: a_len |
michael@0 | 61 | # edx: |
michael@0 | 62 | # esi: a ptr |
michael@0 | 63 | # edi: c ptr |
michael@0 | 64 | .globl _s_mpv_mul_d |
michael@0 | 65 | .type _s_mpv_mul_d,@function |
michael@0 | 66 | _s_mpv_mul_d: |
michael@0 | 67 | GET is_sse,%eax |
michael@0 | 68 | cmp $0,%eax |
michael@0 | 69 | je _s_mpv_mul_d_x86 |
michael@0 | 70 | jg _s_mpv_mul_d_sse2 |
michael@0 | 71 | call _s_mpi_is_sse2 |
michael@0 | 72 | PUT %eax,is_sse |
michael@0 | 73 | cmp $0,%eax |
michael@0 | 74 | jg _s_mpv_mul_d_sse2 |
michael@0 | 75 | _s_mpv_mul_d_x86: |
michael@0 | 76 | push %ebp |
michael@0 | 77 | mov %esp,%ebp |
michael@0 | 78 | sub $28,%esp |
michael@0 | 79 | push %edi |
michael@0 | 80 | push %esi |
michael@0 | 81 | push %ebx |
michael@0 | 82 | movl $0,%ebx # carry = 0 |
michael@0 | 83 | mov 12(%ebp),%ecx # ecx = a_len |
michael@0 | 84 | mov 20(%ebp),%edi |
michael@0 | 85 | cmp $0,%ecx |
michael@0 | 86 | je 2f # jmp if a_len == 0 |
michael@0 | 87 | mov 8(%ebp),%esi # esi = a |
michael@0 | 88 | cld |
michael@0 | 89 | 1: |
michael@0 | 90 | lodsl # eax = [ds:esi]; esi += 4 |
michael@0 | 91 | mov 16(%ebp),%edx # edx = b |
michael@0 | 92 | mull %edx # edx:eax = Phi:Plo = a_i * b |
michael@0 | 93 | |
michael@0 | 94 | add %ebx,%eax # add carry (%ebx) to edx:eax |
michael@0 | 95 | adc $0,%edx |
michael@0 | 96 | mov %edx,%ebx # high half of product becomes next carry |
michael@0 | 97 | |
michael@0 | 98 | stosl # [es:edi] = ax; edi += 4; |
michael@0 | 99 | dec %ecx # --a_len |
michael@0 | 100 | jnz 1b # jmp if a_len != 0 |
michael@0 | 101 | 2: |
michael@0 | 102 | mov %ebx,0(%edi) # *c = carry |
michael@0 | 103 | pop %ebx |
michael@0 | 104 | pop %esi |
michael@0 | 105 | pop %edi |
michael@0 | 106 | leave |
michael@0 | 107 | ret |
michael@0 | 108 | nop |
michael@0 | 109 | _s_mpv_mul_d_sse2: |
michael@0 | 110 | push %ebp |
michael@0 | 111 | mov %esp,%ebp |
michael@0 | 112 | push %edi |
michael@0 | 113 | push %esi |
michael@0 | 114 | psubq %mm2,%mm2 # carry = 0 |
michael@0 | 115 | mov 12(%ebp),%ecx # ecx = a_len |
michael@0 | 116 | movd 16(%ebp),%mm1 # mm1 = b |
michael@0 | 117 | mov 20(%ebp),%edi |
michael@0 | 118 | cmp $0,%ecx |
michael@0 | 119 | je 6f # jmp if a_len == 0 |
michael@0 | 120 | mov 8(%ebp),%esi # esi = a |
michael@0 | 121 | cld |
michael@0 | 122 | 5: |
michael@0 | 123 | movd 0(%esi),%mm0 # mm0 = *a++ |
michael@0 | 124 | add $4,%esi |
michael@0 | 125 | pmuludq %mm1,%mm0 # mm0 = b * *a++ |
michael@0 | 126 | paddq %mm0,%mm2 # add the carry |
michael@0 | 127 | movd %mm2,0(%edi) # store the 32bit result |
michael@0 | 128 | add $4,%edi |
michael@0 | 129 | psrlq $32, %mm2 # save the carry |
michael@0 | 130 | dec %ecx # --a_len |
michael@0 | 131 | jnz 5b # jmp if a_len != 0 |
michael@0 | 132 | 6: |
michael@0 | 133 | movd %mm2,0(%edi) # *c = carry |
michael@0 | 134 | emms |
michael@0 | 135 | pop %esi |
michael@0 | 136 | pop %edi |
michael@0 | 137 | leave |
michael@0 | 138 | ret |
michael@0 | 139 | nop |
michael@0 | 140 | |
michael@0 | 141 | # ebp - 36: caller's esi |
michael@0 | 142 | # ebp - 32: caller's edi |
michael@0 | 143 | # ebp - 28: |
michael@0 | 144 | # ebp - 24: |
michael@0 | 145 | # ebp - 20: |
michael@0 | 146 | # ebp - 16: |
michael@0 | 147 | # ebp - 12: |
michael@0 | 148 | # ebp - 8: |
michael@0 | 149 | # ebp - 4: |
michael@0 | 150 | # ebp + 0: caller's ebp |
michael@0 | 151 | # ebp + 4: return address |
michael@0 | 152 | # ebp + 8: a argument |
michael@0 | 153 | # ebp + 12: a_len argument |
michael@0 | 154 | # ebp + 16: b argument |
michael@0 | 155 | # ebp + 20: c argument |
michael@0 | 156 | # registers: |
michael@0 | 157 | # eax: |
michael@0 | 158 | # ebx: carry |
michael@0 | 159 | # ecx: a_len |
michael@0 | 160 | # edx: |
michael@0 | 161 | # esi: a ptr |
michael@0 | 162 | # edi: c ptr |
michael@0 | 163 | .globl _s_mpv_mul_d_add |
michael@0 | 164 | .type _s_mpv_mul_d_add,@function |
michael@0 | 165 | _s_mpv_mul_d_add: |
michael@0 | 166 | GET is_sse,%eax |
michael@0 | 167 | cmp $0,%eax |
michael@0 | 168 | je _s_mpv_mul_d_add_x86 |
michael@0 | 169 | jg _s_mpv_mul_d_add_sse2 |
michael@0 | 170 | call _s_mpi_is_sse2 |
michael@0 | 171 | PUT %eax,is_sse |
michael@0 | 172 | cmp $0,%eax |
michael@0 | 173 | jg _s_mpv_mul_d_add_sse2 |
michael@0 | 174 | _s_mpv_mul_d_add_x86: |
michael@0 | 175 | push %ebp |
michael@0 | 176 | mov %esp,%ebp |
michael@0 | 177 | sub $28,%esp |
michael@0 | 178 | push %edi |
michael@0 | 179 | push %esi |
michael@0 | 180 | push %ebx |
michael@0 | 181 | movl $0,%ebx # carry = 0 |
michael@0 | 182 | mov 12(%ebp),%ecx # ecx = a_len |
michael@0 | 183 | mov 20(%ebp),%edi |
michael@0 | 184 | cmp $0,%ecx |
michael@0 | 185 | je 11f # jmp if a_len == 0 |
michael@0 | 186 | mov 8(%ebp),%esi # esi = a |
michael@0 | 187 | cld |
michael@0 | 188 | 10: |
michael@0 | 189 | lodsl # eax = [ds:esi]; esi += 4 |
michael@0 | 190 | mov 16(%ebp),%edx # edx = b |
michael@0 | 191 | mull %edx # edx:eax = Phi:Plo = a_i * b |
michael@0 | 192 | |
michael@0 | 193 | add %ebx,%eax # add carry (%ebx) to edx:eax |
michael@0 | 194 | adc $0,%edx |
michael@0 | 195 | mov 0(%edi),%ebx # add in current word from *c |
michael@0 | 196 | add %ebx,%eax |
michael@0 | 197 | adc $0,%edx |
michael@0 | 198 | mov %edx,%ebx # high half of product becomes next carry |
michael@0 | 199 | |
michael@0 | 200 | stosl # [es:edi] = ax; edi += 4; |
michael@0 | 201 | dec %ecx # --a_len |
michael@0 | 202 | jnz 10b # jmp if a_len != 0 |
michael@0 | 203 | 11: |
michael@0 | 204 | mov %ebx,0(%edi) # *c = carry |
michael@0 | 205 | pop %ebx |
michael@0 | 206 | pop %esi |
michael@0 | 207 | pop %edi |
michael@0 | 208 | leave |
michael@0 | 209 | ret |
michael@0 | 210 | nop |
michael@0 | 211 | _s_mpv_mul_d_add_sse2: |
michael@0 | 212 | push %ebp |
michael@0 | 213 | mov %esp,%ebp |
michael@0 | 214 | push %edi |
michael@0 | 215 | push %esi |
michael@0 | 216 | psubq %mm2,%mm2 # carry = 0 |
michael@0 | 217 | mov 12(%ebp),%ecx # ecx = a_len |
michael@0 | 218 | movd 16(%ebp),%mm1 # mm1 = b |
michael@0 | 219 | mov 20(%ebp),%edi |
michael@0 | 220 | cmp $0,%ecx |
michael@0 | 221 | je 16f # jmp if a_len == 0 |
michael@0 | 222 | mov 8(%ebp),%esi # esi = a |
michael@0 | 223 | cld |
michael@0 | 224 | 15: |
michael@0 | 225 | movd 0(%esi),%mm0 # mm0 = *a++ |
michael@0 | 226 | add $4,%esi |
michael@0 | 227 | pmuludq %mm1,%mm0 # mm0 = b * *a++ |
michael@0 | 228 | paddq %mm0,%mm2 # add the carry |
michael@0 | 229 | movd 0(%edi),%mm0 |
michael@0 | 230 | paddq %mm0,%mm2 # add the carry |
michael@0 | 231 | movd %mm2,0(%edi) # store the 32bit result |
michael@0 | 232 | add $4,%edi |
michael@0 | 233 | psrlq $32, %mm2 # save the carry |
michael@0 | 234 | dec %ecx # --a_len |
michael@0 | 235 | jnz 15b # jmp if a_len != 0 |
michael@0 | 236 | 16: |
michael@0 | 237 | movd %mm2,0(%edi) # *c = carry |
michael@0 | 238 | emms |
michael@0 | 239 | pop %esi |
michael@0 | 240 | pop %edi |
michael@0 | 241 | leave |
michael@0 | 242 | ret |
michael@0 | 243 | nop |
michael@0 | 244 | |
michael@0 | 245 | # ebp - 8: caller's esi |
michael@0 | 246 | # ebp - 4: caller's edi |
michael@0 | 247 | # ebp + 0: caller's ebp |
michael@0 | 248 | # ebp + 4: return address |
michael@0 | 249 | # ebp + 8: a argument |
michael@0 | 250 | # ebp + 12: a_len argument |
michael@0 | 251 | # ebp + 16: b argument |
michael@0 | 252 | # ebp + 20: c argument |
michael@0 | 253 | # registers: |
michael@0 | 254 | # eax: |
michael@0 | 255 | # ebx: carry |
michael@0 | 256 | # ecx: a_len |
michael@0 | 257 | # edx: |
michael@0 | 258 | # esi: a ptr |
michael@0 | 259 | # edi: c ptr |
michael@0 | 260 | .globl _s_mpv_mul_d_add_prop |
michael@0 | 261 | .type _s_mpv_mul_d_add_prop,@function |
michael@0 | 262 | _s_mpv_mul_d_add_prop: |
michael@0 | 263 | GET is_sse,%eax |
michael@0 | 264 | cmp $0,%eax |
michael@0 | 265 | je _s_mpv_mul_d_add_prop_x86 |
michael@0 | 266 | jg _s_mpv_mul_d_add_prop_sse2 |
michael@0 | 267 | call _s_mpi_is_sse2 |
michael@0 | 268 | PUT %eax,is_sse |
michael@0 | 269 | cmp $0,%eax |
michael@0 | 270 | jg _s_mpv_mul_d_add_prop_sse2 |
michael@0 | 271 | _s_mpv_mul_d_add_prop_x86: |
michael@0 | 272 | push %ebp |
michael@0 | 273 | mov %esp,%ebp |
michael@0 | 274 | sub $28,%esp |
michael@0 | 275 | push %edi |
michael@0 | 276 | push %esi |
michael@0 | 277 | push %ebx |
michael@0 | 278 | movl $0,%ebx # carry = 0 |
michael@0 | 279 | mov 12(%ebp),%ecx # ecx = a_len |
michael@0 | 280 | mov 20(%ebp),%edi |
michael@0 | 281 | cmp $0,%ecx |
michael@0 | 282 | je 21f # jmp if a_len == 0 |
michael@0 | 283 | cld |
michael@0 | 284 | mov 8(%ebp),%esi # esi = a |
michael@0 | 285 | 20: |
michael@0 | 286 | lodsl # eax = [ds:esi]; esi += 4 |
michael@0 | 287 | mov 16(%ebp),%edx # edx = b |
michael@0 | 288 | mull %edx # edx:eax = Phi:Plo = a_i * b |
michael@0 | 289 | |
michael@0 | 290 | add %ebx,%eax # add carry (%ebx) to edx:eax |
michael@0 | 291 | adc $0,%edx |
michael@0 | 292 | mov 0(%edi),%ebx # add in current word from *c |
michael@0 | 293 | add %ebx,%eax |
michael@0 | 294 | adc $0,%edx |
michael@0 | 295 | mov %edx,%ebx # high half of product becomes next carry |
michael@0 | 296 | |
michael@0 | 297 | stosl # [es:edi] = ax; edi += 4; |
michael@0 | 298 | dec %ecx # --a_len |
michael@0 | 299 | jnz 20b # jmp if a_len != 0 |
michael@0 | 300 | 21: |
michael@0 | 301 | cmp $0,%ebx # is carry zero? |
michael@0 | 302 | jz 23f |
michael@0 | 303 | mov 0(%edi),%eax # add in current word from *c |
michael@0 | 304 | add %ebx,%eax |
michael@0 | 305 | stosl # [es:edi] = ax; edi += 4; |
michael@0 | 306 | jnc 23f |
michael@0 | 307 | 22: |
michael@0 | 308 | mov 0(%edi),%eax # add in current word from *c |
michael@0 | 309 | adc $0,%eax |
michael@0 | 310 | stosl # [es:edi] = ax; edi += 4; |
michael@0 | 311 | jc 22b |
michael@0 | 312 | 23: |
michael@0 | 313 | pop %ebx |
michael@0 | 314 | pop %esi |
michael@0 | 315 | pop %edi |
michael@0 | 316 | leave |
michael@0 | 317 | ret |
michael@0 | 318 | nop |
michael@0 | 319 | _s_mpv_mul_d_add_prop_sse2: |
michael@0 | 320 | push %ebp |
michael@0 | 321 | mov %esp,%ebp |
michael@0 | 322 | push %edi |
michael@0 | 323 | push %esi |
michael@0 | 324 | push %ebx |
michael@0 | 325 | psubq %mm2,%mm2 # carry = 0 |
michael@0 | 326 | mov 12(%ebp),%ecx # ecx = a_len |
michael@0 | 327 | movd 16(%ebp),%mm1 # mm1 = b |
michael@0 | 328 | mov 20(%ebp),%edi |
michael@0 | 329 | cmp $0,%ecx |
michael@0 | 330 | je 26f # jmp if a_len == 0 |
michael@0 | 331 | mov 8(%ebp),%esi # esi = a |
michael@0 | 332 | cld |
michael@0 | 333 | 25: |
michael@0 | 334 | movd 0(%esi),%mm0 # mm0 = *a++ |
michael@0 | 335 | movd 0(%edi),%mm3 # fetch the sum |
michael@0 | 336 | add $4,%esi |
michael@0 | 337 | pmuludq %mm1,%mm0 # mm0 = b * *a++ |
michael@0 | 338 | paddq %mm0,%mm2 # add the carry |
michael@0 | 339 | paddq %mm3,%mm2 # add *c++ |
michael@0 | 340 | movd %mm2,0(%edi) # store the 32bit result |
michael@0 | 341 | add $4,%edi |
michael@0 | 342 | psrlq $32, %mm2 # save the carry |
michael@0 | 343 | dec %ecx # --a_len |
michael@0 | 344 | jnz 25b # jmp if a_len != 0 |
michael@0 | 345 | 26: |
michael@0 | 346 | movd %mm2,%ebx |
michael@0 | 347 | cmp $0,%ebx # is carry zero? |
michael@0 | 348 | jz 28f |
michael@0 | 349 | mov 0(%edi),%eax |
michael@0 | 350 | add %ebx, %eax |
michael@0 | 351 | stosl |
michael@0 | 352 | jnc 28f |
michael@0 | 353 | 27: |
michael@0 | 354 | mov 0(%edi),%eax # add in current word from *c |
michael@0 | 355 | adc $0,%eax |
michael@0 | 356 | stosl # [es:edi] = ax; edi += 4; |
michael@0 | 357 | jc 27b |
michael@0 | 358 | 28: |
michael@0 | 359 | emms |
michael@0 | 360 | pop %ebx |
michael@0 | 361 | pop %esi |
michael@0 | 362 | pop %edi |
michael@0 | 363 | leave |
michael@0 | 364 | ret |
michael@0 | 365 | nop |
michael@0 | 366 | |
michael@0 | 367 | |
michael@0 | 368 | # ebp - 20: caller's esi |
michael@0 | 369 | # ebp - 16: caller's edi |
michael@0 | 370 | # ebp - 12: |
michael@0 | 371 | # ebp - 8: carry |
michael@0 | 372 | # ebp - 4: a_len local |
michael@0 | 373 | # ebp + 0: caller's ebp |
michael@0 | 374 | # ebp + 4: return address |
michael@0 | 375 | # ebp + 8: pa argument |
michael@0 | 376 | # ebp + 12: a_len argument |
michael@0 | 377 | # ebp + 16: ps argument |
michael@0 | 378 | # ebp + 20: |
michael@0 | 379 | # registers: |
michael@0 | 380 | # eax: |
michael@0 | 381 | # ebx: carry |
michael@0 | 382 | # ecx: a_len |
michael@0 | 383 | # edx: |
michael@0 | 384 | # esi: a ptr |
michael@0 | 385 | # edi: c ptr |
michael@0 | 386 | |
michael@0 | 387 | .globl _s_mpv_sqr_add_prop |
michael@0 | 388 | .type _s_mpv_sqr_add_prop,@function |
michael@0 | 389 | _s_mpv_sqr_add_prop: |
michael@0 | 390 | GET is_sse,%eax |
michael@0 | 391 | cmp $0,%eax |
michael@0 | 392 | je _s_mpv_sqr_add_prop_x86 |
michael@0 | 393 | jg _s_mpv_sqr_add_prop_sse2 |
michael@0 | 394 | call _s_mpi_is_sse2 |
michael@0 | 395 | PUT %eax,is_sse |
michael@0 | 396 | cmp $0,%eax |
michael@0 | 397 | jg _s_mpv_sqr_add_prop_sse2 |
michael@0 | 398 | _s_mpv_sqr_add_prop_x86: |
michael@0 | 399 | push %ebp |
michael@0 | 400 | mov %esp,%ebp |
michael@0 | 401 | sub $12,%esp |
michael@0 | 402 | push %edi |
michael@0 | 403 | push %esi |
michael@0 | 404 | push %ebx |
michael@0 | 405 | movl $0,%ebx # carry = 0 |
michael@0 | 406 | mov 12(%ebp),%ecx # a_len |
michael@0 | 407 | mov 16(%ebp),%edi # edi = ps |
michael@0 | 408 | cmp $0,%ecx |
michael@0 | 409 | je 31f # jump if a_len == 0 |
michael@0 | 410 | cld |
michael@0 | 411 | mov 8(%ebp),%esi # esi = pa |
michael@0 | 412 | 30: |
michael@0 | 413 | lodsl # %eax = [ds:si]; si += 4; |
michael@0 | 414 | mull %eax |
michael@0 | 415 | |
michael@0 | 416 | add %ebx,%eax # add "carry" |
michael@0 | 417 | adc $0,%edx |
michael@0 | 418 | mov 0(%edi),%ebx |
michael@0 | 419 | add %ebx,%eax # add low word from result |
michael@0 | 420 | mov 4(%edi),%ebx |
michael@0 | 421 | stosl # [es:di] = %eax; di += 4; |
michael@0 | 422 | adc %ebx,%edx # add high word from result |
michael@0 | 423 | movl $0,%ebx |
michael@0 | 424 | mov %edx,%eax |
michael@0 | 425 | adc $0,%ebx |
michael@0 | 426 | stosl # [es:di] = %eax; di += 4; |
michael@0 | 427 | dec %ecx # --a_len |
michael@0 | 428 | jnz 30b # jmp if a_len != 0 |
michael@0 | 429 | 31: |
michael@0 | 430 | cmp $0,%ebx # is carry zero? |
michael@0 | 431 | jz 34f |
michael@0 | 432 | mov 0(%edi),%eax # add in current word from *c |
michael@0 | 433 | add %ebx,%eax |
michael@0 | 434 | stosl # [es:edi] = ax; edi += 4; |
michael@0 | 435 | jnc 34f |
michael@0 | 436 | 32: |
michael@0 | 437 | mov 0(%edi),%eax # add in current word from *c |
michael@0 | 438 | adc $0,%eax |
michael@0 | 439 | stosl # [es:edi] = ax; edi += 4; |
michael@0 | 440 | jc 32b |
michael@0 | 441 | 34: |
michael@0 | 442 | pop %ebx |
michael@0 | 443 | pop %esi |
michael@0 | 444 | pop %edi |
michael@0 | 445 | leave |
michael@0 | 446 | ret |
michael@0 | 447 | nop |
michael@0 | 448 | _s_mpv_sqr_add_prop_sse2: |
michael@0 | 449 | push %ebp |
michael@0 | 450 | mov %esp,%ebp |
michael@0 | 451 | push %edi |
michael@0 | 452 | push %esi |
michael@0 | 453 | push %ebx |
michael@0 | 454 | psubq %mm2,%mm2 # carry = 0 |
michael@0 | 455 | mov 12(%ebp),%ecx # ecx = a_len |
michael@0 | 456 | mov 16(%ebp),%edi |
michael@0 | 457 | cmp $0,%ecx |
michael@0 | 458 | je 36f # jmp if a_len == 0 |
michael@0 | 459 | mov 8(%ebp),%esi # esi = a |
michael@0 | 460 | cld |
michael@0 | 461 | 35: |
michael@0 | 462 | movd 0(%esi),%mm0 # mm0 = *a |
michael@0 | 463 | movd 0(%edi),%mm3 # fetch the sum |
michael@0 | 464 | add $4,%esi |
michael@0 | 465 | pmuludq %mm0,%mm0 # mm0 = sqr(a) |
michael@0 | 466 | paddq %mm0,%mm2 # add the carry |
michael@0 | 467 | paddq %mm3,%mm2 # add the low word |
michael@0 | 468 | movd 4(%edi),%mm3 |
michael@0 | 469 | movd %mm2,0(%edi) # store the 32bit result |
michael@0 | 470 | psrlq $32, %mm2 |
michael@0 | 471 | paddq %mm3,%mm2 # add the high word |
michael@0 | 472 | movd %mm2,4(%edi) # store the 32bit result |
michael@0 | 473 | psrlq $32, %mm2 # save the carry. |
michael@0 | 474 | add $8,%edi |
michael@0 | 475 | dec %ecx # --a_len |
michael@0 | 476 | jnz 35b # jmp if a_len != 0 |
michael@0 | 477 | 36: |
michael@0 | 478 | movd %mm2,%ebx |
michael@0 | 479 | cmp $0,%ebx # is carry zero? |
michael@0 | 480 | jz 38f |
michael@0 | 481 | mov 0(%edi),%eax |
michael@0 | 482 | add %ebx, %eax |
michael@0 | 483 | stosl |
michael@0 | 484 | jnc 38f |
michael@0 | 485 | 37: |
michael@0 | 486 | mov 0(%edi),%eax # add in current word from *c |
michael@0 | 487 | adc $0,%eax |
michael@0 | 488 | stosl # [es:edi] = ax; edi += 4; |
michael@0 | 489 | jc 37b |
michael@0 | 490 | 38: |
michael@0 | 491 | emms |
michael@0 | 492 | pop %ebx |
michael@0 | 493 | pop %esi |
michael@0 | 494 | pop %edi |
michael@0 | 495 | leave |
michael@0 | 496 | ret |
michael@0 | 497 | nop |
michael@0 | 498 | |
michael@0 | 499 | # |
michael@0 | 500 | # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized |
michael@0 | 501 | # so its high bit is 1. This code is from NSPR. |
michael@0 | 502 | # |
michael@0 | 503 | # mp_err _s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, |
michael@0 | 504 | # mp_digit *qp, mp_digit *rp) |
michael@0 | 505 | |
michael@0 | 506 | # esp + 0: Caller's ebx |
michael@0 | 507 | # esp + 4: return address |
michael@0 | 508 | # esp + 8: Nhi argument |
michael@0 | 509 | # esp + 12: Nlo argument |
michael@0 | 510 | # esp + 16: divisor argument |
michael@0 | 511 | # esp + 20: qp argument |
michael@0 | 512 | # esp + 24: rp argument |
michael@0 | 513 | # registers: |
michael@0 | 514 | # eax: |
michael@0 | 515 | # ebx: carry |
michael@0 | 516 | # ecx: a_len |
michael@0 | 517 | # edx: |
michael@0 | 518 | # esi: a ptr |
michael@0 | 519 | # edi: c ptr |
michael@0 | 520 | # |
michael@0 | 521 | |
michael@0 | 522 | .globl _s_mpv_div_2dx1d |
michael@0 | 523 | .type _s_mpv_div_2dx1d,@function |
michael@0 | 524 | _s_mpv_div_2dx1d: |
michael@0 | 525 | push %ebx |
michael@0 | 526 | mov 8(%esp),%edx |
michael@0 | 527 | mov 12(%esp),%eax |
michael@0 | 528 | mov 16(%esp),%ebx |
michael@0 | 529 | div %ebx |
michael@0 | 530 | mov 20(%esp),%ebx |
michael@0 | 531 | mov %eax,0(%ebx) |
michael@0 | 532 | mov 24(%esp),%ebx |
michael@0 | 533 | mov %edx,0(%ebx) |
michael@0 | 534 | xor %eax,%eax # return zero |
michael@0 | 535 | pop %ebx |
michael@0 | 536 | ret |
michael@0 | 537 | nop |
michael@0 | 538 |