security/nss/lib/freebl/mpi/mpi_x86_os2.s

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 #
michael@0 2 # This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0 5
michael@0 6 .data
michael@0 7 .align 4
michael@0 8 #
michael@0 9 # -1 means to call _s_mpi_is_sse to determine if we support sse
michael@0 10 # instructions.
michael@0 11 # 0 means to use x86 instructions
michael@0 12 # 1 means to use sse2 instructions
michael@0 13 .type is_sse,@object
michael@0 14 .size is_sse,4
michael@0 15 is_sse: .long -1
michael@0 16
michael@0 17 #
michael@0 18 # sigh, handle the difference between -fPIC and not PIC
michael@0 19 # default to pic, since this file seems to be exclusively
michael@0 20 # linux right now (solaris uses mpi_i86pc.s and windows uses
michael@0 21 # mpi_x86_asm.c)
michael@0 22 #
michael@0 23 #.ifndef NO_PIC
michael@0 24 #.macro GET var,reg
michael@0 25 # movl \var@GOTOFF(%ebx),\reg
michael@0 26 #.endm
michael@0 27 #.macro PUT reg,var
michael@0 28 # movl \reg,\var@GOTOFF(%ebx)
michael@0 29 #.endm
michael@0 30 #.else
michael@0 31 .macro GET var,reg
michael@0 32 movl \var,\reg
michael@0 33 .endm
michael@0 34 .macro PUT reg,var
michael@0 35 movl \reg,\var
michael@0 36 .endm
michael@0 37 #.endif
michael@0 38
michael@0 39 .text
michael@0 40
michael@0 41
michael@0 42 # ebp - 36: caller's esi
michael@0 43 # ebp - 32: caller's edi
michael@0 44 # ebp - 28:
michael@0 45 # ebp - 24:
michael@0 46 # ebp - 20:
michael@0 47 # ebp - 16:
michael@0 48 # ebp - 12:
michael@0 49 # ebp - 8:
michael@0 50 # ebp - 4:
michael@0 51 # ebp + 0: caller's ebp
michael@0 52 # ebp + 4: return address
michael@0 53 # ebp + 8: a argument
michael@0 54 # ebp + 12: a_len argument
michael@0 55 # ebp + 16: b argument
michael@0 56 # ebp + 20: c argument
michael@0 57 # registers:
michael@0 58 # eax:
michael@0 59 # ebx: carry
michael@0 60 # ecx: a_len
michael@0 61 # edx:
michael@0 62 # esi: a ptr
michael@0 63 # edi: c ptr
michael@0 64 .globl _s_mpv_mul_d
michael@0 65 .type _s_mpv_mul_d,@function
michael@0 66 _s_mpv_mul_d:
michael@0 67 GET is_sse,%eax
michael@0 68 cmp $0,%eax
michael@0 69 je _s_mpv_mul_d_x86
michael@0 70 jg _s_mpv_mul_d_sse2
michael@0 71 call _s_mpi_is_sse2
michael@0 72 PUT %eax,is_sse
michael@0 73 cmp $0,%eax
michael@0 74 jg _s_mpv_mul_d_sse2
michael@0 75 _s_mpv_mul_d_x86:
michael@0 76 push %ebp
michael@0 77 mov %esp,%ebp
michael@0 78 sub $28,%esp
michael@0 79 push %edi
michael@0 80 push %esi
michael@0 81 push %ebx
michael@0 82 movl $0,%ebx # carry = 0
michael@0 83 mov 12(%ebp),%ecx # ecx = a_len
michael@0 84 mov 20(%ebp),%edi
michael@0 85 cmp $0,%ecx
michael@0 86 je 2f # jmp if a_len == 0
michael@0 87 mov 8(%ebp),%esi # esi = a
michael@0 88 cld
michael@0 89 1:
michael@0 90 lodsl # eax = [ds:esi]; esi += 4
michael@0 91 mov 16(%ebp),%edx # edx = b
michael@0 92 mull %edx # edx:eax = Phi:Plo = a_i * b
michael@0 93
michael@0 94 add %ebx,%eax # add carry (%ebx) to edx:eax
michael@0 95 adc $0,%edx
michael@0 96 mov %edx,%ebx # high half of product becomes next carry
michael@0 97
michael@0 98 stosl # [es:edi] = ax; edi += 4;
michael@0 99 dec %ecx # --a_len
michael@0 100 jnz 1b # jmp if a_len != 0
michael@0 101 2:
michael@0 102 mov %ebx,0(%edi) # *c = carry
michael@0 103 pop %ebx
michael@0 104 pop %esi
michael@0 105 pop %edi
michael@0 106 leave
michael@0 107 ret
michael@0 108 nop
michael@0 109 _s_mpv_mul_d_sse2:
michael@0 110 push %ebp
michael@0 111 mov %esp,%ebp
michael@0 112 push %edi
michael@0 113 push %esi
michael@0 114 psubq %mm2,%mm2 # carry = 0
michael@0 115 mov 12(%ebp),%ecx # ecx = a_len
michael@0 116 movd 16(%ebp),%mm1 # mm1 = b
michael@0 117 mov 20(%ebp),%edi
michael@0 118 cmp $0,%ecx
michael@0 119 je 6f # jmp if a_len == 0
michael@0 120 mov 8(%ebp),%esi # esi = a
michael@0 121 cld
michael@0 122 5:
michael@0 123 movd 0(%esi),%mm0 # mm0 = *a++
michael@0 124 add $4,%esi
michael@0 125 pmuludq %mm1,%mm0 # mm0 = b * *a++
michael@0 126 paddq %mm0,%mm2 # add the carry
michael@0 127 movd %mm2,0(%edi) # store the 32bit result
michael@0 128 add $4,%edi
michael@0 129 psrlq $32, %mm2 # save the carry
michael@0 130 dec %ecx # --a_len
michael@0 131 jnz 5b # jmp if a_len != 0
michael@0 132 6:
michael@0 133 movd %mm2,0(%edi) # *c = carry
michael@0 134 emms
michael@0 135 pop %esi
michael@0 136 pop %edi
michael@0 137 leave
michael@0 138 ret
michael@0 139 nop
michael@0 140
michael@0 141 # ebp - 36: caller's esi
michael@0 142 # ebp - 32: caller's edi
michael@0 143 # ebp - 28:
michael@0 144 # ebp - 24:
michael@0 145 # ebp - 20:
michael@0 146 # ebp - 16:
michael@0 147 # ebp - 12:
michael@0 148 # ebp - 8:
michael@0 149 # ebp - 4:
michael@0 150 # ebp + 0: caller's ebp
michael@0 151 # ebp + 4: return address
michael@0 152 # ebp + 8: a argument
michael@0 153 # ebp + 12: a_len argument
michael@0 154 # ebp + 16: b argument
michael@0 155 # ebp + 20: c argument
michael@0 156 # registers:
michael@0 157 # eax:
michael@0 158 # ebx: carry
michael@0 159 # ecx: a_len
michael@0 160 # edx:
michael@0 161 # esi: a ptr
michael@0 162 # edi: c ptr
michael@0 163 .globl _s_mpv_mul_d_add
michael@0 164 .type _s_mpv_mul_d_add,@function
michael@0 165 _s_mpv_mul_d_add:
michael@0 166 GET is_sse,%eax
michael@0 167 cmp $0,%eax
michael@0 168 je _s_mpv_mul_d_add_x86
michael@0 169 jg _s_mpv_mul_d_add_sse2
michael@0 170 call _s_mpi_is_sse2
michael@0 171 PUT %eax,is_sse
michael@0 172 cmp $0,%eax
michael@0 173 jg _s_mpv_mul_d_add_sse2
michael@0 174 _s_mpv_mul_d_add_x86:
michael@0 175 push %ebp
michael@0 176 mov %esp,%ebp
michael@0 177 sub $28,%esp
michael@0 178 push %edi
michael@0 179 push %esi
michael@0 180 push %ebx
michael@0 181 movl $0,%ebx # carry = 0
michael@0 182 mov 12(%ebp),%ecx # ecx = a_len
michael@0 183 mov 20(%ebp),%edi
michael@0 184 cmp $0,%ecx
michael@0 185 je 11f # jmp if a_len == 0
michael@0 186 mov 8(%ebp),%esi # esi = a
michael@0 187 cld
michael@0 188 10:
michael@0 189 lodsl # eax = [ds:esi]; esi += 4
michael@0 190 mov 16(%ebp),%edx # edx = b
michael@0 191 mull %edx # edx:eax = Phi:Plo = a_i * b
michael@0 192
michael@0 193 add %ebx,%eax # add carry (%ebx) to edx:eax
michael@0 194 adc $0,%edx
michael@0 195 mov 0(%edi),%ebx # add in current word from *c
michael@0 196 add %ebx,%eax
michael@0 197 adc $0,%edx
michael@0 198 mov %edx,%ebx # high half of product becomes next carry
michael@0 199
michael@0 200 stosl # [es:edi] = ax; edi += 4;
michael@0 201 dec %ecx # --a_len
michael@0 202 jnz 10b # jmp if a_len != 0
michael@0 203 11:
michael@0 204 mov %ebx,0(%edi) # *c = carry
michael@0 205 pop %ebx
michael@0 206 pop %esi
michael@0 207 pop %edi
michael@0 208 leave
michael@0 209 ret
michael@0 210 nop
michael@0 211 _s_mpv_mul_d_add_sse2:
michael@0 212 push %ebp
michael@0 213 mov %esp,%ebp
michael@0 214 push %edi
michael@0 215 push %esi
michael@0 216 psubq %mm2,%mm2 # carry = 0
michael@0 217 mov 12(%ebp),%ecx # ecx = a_len
michael@0 218 movd 16(%ebp),%mm1 # mm1 = b
michael@0 219 mov 20(%ebp),%edi
michael@0 220 cmp $0,%ecx
michael@0 221 je 16f # jmp if a_len == 0
michael@0 222 mov 8(%ebp),%esi # esi = a
michael@0 223 cld
michael@0 224 15:
michael@0 225 movd 0(%esi),%mm0 # mm0 = *a++
michael@0 226 add $4,%esi
michael@0 227 pmuludq %mm1,%mm0 # mm0 = b * *a++
michael@0 228 paddq %mm0,%mm2 # add the carry
michael@0 229 movd 0(%edi),%mm0
michael@0 230 paddq %mm0,%mm2 # add the carry
michael@0 231 movd %mm2,0(%edi) # store the 32bit result
michael@0 232 add $4,%edi
michael@0 233 psrlq $32, %mm2 # save the carry
michael@0 234 dec %ecx # --a_len
michael@0 235 jnz 15b # jmp if a_len != 0
michael@0 236 16:
michael@0 237 movd %mm2,0(%edi) # *c = carry
michael@0 238 emms
michael@0 239 pop %esi
michael@0 240 pop %edi
michael@0 241 leave
michael@0 242 ret
michael@0 243 nop
michael@0 244
michael@0 245 # ebp - 8: caller's esi
michael@0 246 # ebp - 4: caller's edi
michael@0 247 # ebp + 0: caller's ebp
michael@0 248 # ebp + 4: return address
michael@0 249 # ebp + 8: a argument
michael@0 250 # ebp + 12: a_len argument
michael@0 251 # ebp + 16: b argument
michael@0 252 # ebp + 20: c argument
michael@0 253 # registers:
michael@0 254 # eax:
michael@0 255 # ebx: carry
michael@0 256 # ecx: a_len
michael@0 257 # edx:
michael@0 258 # esi: a ptr
michael@0 259 # edi: c ptr
michael@0 260 .globl _s_mpv_mul_d_add_prop
michael@0 261 .type _s_mpv_mul_d_add_prop,@function
michael@0 262 _s_mpv_mul_d_add_prop:
michael@0 263 GET is_sse,%eax
michael@0 264 cmp $0,%eax
michael@0 265 je _s_mpv_mul_d_add_prop_x86
michael@0 266 jg _s_mpv_mul_d_add_prop_sse2
michael@0 267 call _s_mpi_is_sse2
michael@0 268 PUT %eax,is_sse
michael@0 269 cmp $0,%eax
michael@0 270 jg _s_mpv_mul_d_add_prop_sse2
michael@0 271 _s_mpv_mul_d_add_prop_x86:
michael@0 272 push %ebp
michael@0 273 mov %esp,%ebp
michael@0 274 sub $28,%esp
michael@0 275 push %edi
michael@0 276 push %esi
michael@0 277 push %ebx
michael@0 278 movl $0,%ebx # carry = 0
michael@0 279 mov 12(%ebp),%ecx # ecx = a_len
michael@0 280 mov 20(%ebp),%edi
michael@0 281 cmp $0,%ecx
michael@0 282 je 21f # jmp if a_len == 0
michael@0 283 cld
michael@0 284 mov 8(%ebp),%esi # esi = a
michael@0 285 20:
michael@0 286 lodsl # eax = [ds:esi]; esi += 4
michael@0 287 mov 16(%ebp),%edx # edx = b
michael@0 288 mull %edx # edx:eax = Phi:Plo = a_i * b
michael@0 289
michael@0 290 add %ebx,%eax # add carry (%ebx) to edx:eax
michael@0 291 adc $0,%edx
michael@0 292 mov 0(%edi),%ebx # add in current word from *c
michael@0 293 add %ebx,%eax
michael@0 294 adc $0,%edx
michael@0 295 mov %edx,%ebx # high half of product becomes next carry
michael@0 296
michael@0 297 stosl # [es:edi] = ax; edi += 4;
michael@0 298 dec %ecx # --a_len
michael@0 299 jnz 20b # jmp if a_len != 0
michael@0 300 21:
michael@0 301 cmp $0,%ebx # is carry zero?
michael@0 302 jz 23f
michael@0 303 mov 0(%edi),%eax # add in current word from *c
michael@0 304 add %ebx,%eax
michael@0 305 stosl # [es:edi] = ax; edi += 4;
michael@0 306 jnc 23f
michael@0 307 22:
michael@0 308 mov 0(%edi),%eax # add in current word from *c
michael@0 309 adc $0,%eax
michael@0 310 stosl # [es:edi] = ax; edi += 4;
michael@0 311 jc 22b
michael@0 312 23:
michael@0 313 pop %ebx
michael@0 314 pop %esi
michael@0 315 pop %edi
michael@0 316 leave
michael@0 317 ret
michael@0 318 nop
michael@0 319 _s_mpv_mul_d_add_prop_sse2:
michael@0 320 push %ebp
michael@0 321 mov %esp,%ebp
michael@0 322 push %edi
michael@0 323 push %esi
michael@0 324 push %ebx
michael@0 325 psubq %mm2,%mm2 # carry = 0
michael@0 326 mov 12(%ebp),%ecx # ecx = a_len
michael@0 327 movd 16(%ebp),%mm1 # mm1 = b
michael@0 328 mov 20(%ebp),%edi
michael@0 329 cmp $0,%ecx
michael@0 330 je 26f # jmp if a_len == 0
michael@0 331 mov 8(%ebp),%esi # esi = a
michael@0 332 cld
michael@0 333 25:
michael@0 334 movd 0(%esi),%mm0 # mm0 = *a++
michael@0 335 movd 0(%edi),%mm3 # fetch the sum
michael@0 336 add $4,%esi
michael@0 337 pmuludq %mm1,%mm0 # mm0 = b * *a++
michael@0 338 paddq %mm0,%mm2 # add the carry
michael@0 339 paddq %mm3,%mm2 # add *c++
michael@0 340 movd %mm2,0(%edi) # store the 32bit result
michael@0 341 add $4,%edi
michael@0 342 psrlq $32, %mm2 # save the carry
michael@0 343 dec %ecx # --a_len
michael@0 344 jnz 25b # jmp if a_len != 0
michael@0 345 26:
michael@0 346 movd %mm2,%ebx
michael@0 347 cmp $0,%ebx # is carry zero?
michael@0 348 jz 28f
michael@0 349 mov 0(%edi),%eax
michael@0 350 add %ebx, %eax
michael@0 351 stosl
michael@0 352 jnc 28f
michael@0 353 27:
michael@0 354 mov 0(%edi),%eax # add in current word from *c
michael@0 355 adc $0,%eax
michael@0 356 stosl # [es:edi] = ax; edi += 4;
michael@0 357 jc 27b
michael@0 358 28:
michael@0 359 emms
michael@0 360 pop %ebx
michael@0 361 pop %esi
michael@0 362 pop %edi
michael@0 363 leave
michael@0 364 ret
michael@0 365 nop
michael@0 366
michael@0 367
michael@0 368 # ebp - 20: caller's esi
michael@0 369 # ebp - 16: caller's edi
michael@0 370 # ebp - 12:
michael@0 371 # ebp - 8: carry
michael@0 372 # ebp - 4: a_len local
michael@0 373 # ebp + 0: caller's ebp
michael@0 374 # ebp + 4: return address
michael@0 375 # ebp + 8: pa argument
michael@0 376 # ebp + 12: a_len argument
michael@0 377 # ebp + 16: ps argument
michael@0 378 # ebp + 20:
michael@0 379 # registers:
michael@0 380 # eax:
michael@0 381 # ebx: carry
michael@0 382 # ecx: a_len
michael@0 383 # edx:
michael@0 384 # esi: a ptr
michael@0 385 # edi: c ptr
michael@0 386
michael@0 387 .globl _s_mpv_sqr_add_prop
michael@0 388 .type _s_mpv_sqr_add_prop,@function
michael@0 389 _s_mpv_sqr_add_prop:
michael@0 390 GET is_sse,%eax
michael@0 391 cmp $0,%eax
michael@0 392 je _s_mpv_sqr_add_prop_x86
michael@0 393 jg _s_mpv_sqr_add_prop_sse2
michael@0 394 call _s_mpi_is_sse2
michael@0 395 PUT %eax,is_sse
michael@0 396 cmp $0,%eax
michael@0 397 jg _s_mpv_sqr_add_prop_sse2
michael@0 398 _s_mpv_sqr_add_prop_x86:
michael@0 399 push %ebp
michael@0 400 mov %esp,%ebp
michael@0 401 sub $12,%esp
michael@0 402 push %edi
michael@0 403 push %esi
michael@0 404 push %ebx
michael@0 405 movl $0,%ebx # carry = 0
michael@0 406 mov 12(%ebp),%ecx # a_len
michael@0 407 mov 16(%ebp),%edi # edi = ps
michael@0 408 cmp $0,%ecx
michael@0 409 je 31f # jump if a_len == 0
michael@0 410 cld
michael@0 411 mov 8(%ebp),%esi # esi = pa
michael@0 412 30:
michael@0 413 lodsl # %eax = [ds:si]; si += 4;
michael@0 414 mull %eax
michael@0 415
michael@0 416 add %ebx,%eax # add "carry"
michael@0 417 adc $0,%edx
michael@0 418 mov 0(%edi),%ebx
michael@0 419 add %ebx,%eax # add low word from result
michael@0 420 mov 4(%edi),%ebx
michael@0 421 stosl # [es:di] = %eax; di += 4;
michael@0 422 adc %ebx,%edx # add high word from result
michael@0 423 movl $0,%ebx
michael@0 424 mov %edx,%eax
michael@0 425 adc $0,%ebx
michael@0 426 stosl # [es:di] = %eax; di += 4;
michael@0 427 dec %ecx # --a_len
michael@0 428 jnz 30b # jmp if a_len != 0
michael@0 429 31:
michael@0 430 cmp $0,%ebx # is carry zero?
michael@0 431 jz 34f
michael@0 432 mov 0(%edi),%eax # add in current word from *c
michael@0 433 add %ebx,%eax
michael@0 434 stosl # [es:edi] = ax; edi += 4;
michael@0 435 jnc 34f
michael@0 436 32:
michael@0 437 mov 0(%edi),%eax # add in current word from *c
michael@0 438 adc $0,%eax
michael@0 439 stosl # [es:edi] = ax; edi += 4;
michael@0 440 jc 32b
michael@0 441 34:
michael@0 442 pop %ebx
michael@0 443 pop %esi
michael@0 444 pop %edi
michael@0 445 leave
michael@0 446 ret
michael@0 447 nop
michael@0 448 _s_mpv_sqr_add_prop_sse2:
michael@0 449 push %ebp
michael@0 450 mov %esp,%ebp
michael@0 451 push %edi
michael@0 452 push %esi
michael@0 453 push %ebx
michael@0 454 psubq %mm2,%mm2 # carry = 0
michael@0 455 mov 12(%ebp),%ecx # ecx = a_len
michael@0 456 mov 16(%ebp),%edi
michael@0 457 cmp $0,%ecx
michael@0 458 je 36f # jmp if a_len == 0
michael@0 459 mov 8(%ebp),%esi # esi = a
michael@0 460 cld
michael@0 461 35:
michael@0 462 movd 0(%esi),%mm0 # mm0 = *a
michael@0 463 movd 0(%edi),%mm3 # fetch the sum
michael@0 464 add $4,%esi
michael@0 465 pmuludq %mm0,%mm0 # mm0 = sqr(a)
michael@0 466 paddq %mm0,%mm2 # add the carry
michael@0 467 paddq %mm3,%mm2 # add the low word
michael@0 468 movd 4(%edi),%mm3
michael@0 469 movd %mm2,0(%edi) # store the 32bit result
michael@0 470 psrlq $32, %mm2
michael@0 471 paddq %mm3,%mm2 # add the high word
michael@0 472 movd %mm2,4(%edi) # store the 32bit result
michael@0 473 psrlq $32, %mm2 # save the carry.
michael@0 474 add $8,%edi
michael@0 475 dec %ecx # --a_len
michael@0 476 jnz 35b # jmp if a_len != 0
michael@0 477 36:
michael@0 478 movd %mm2,%ebx
michael@0 479 cmp $0,%ebx # is carry zero?
michael@0 480 jz 38f
michael@0 481 mov 0(%edi),%eax
michael@0 482 add %ebx, %eax
michael@0 483 stosl
michael@0 484 jnc 38f
michael@0 485 37:
michael@0 486 mov 0(%edi),%eax # add in current word from *c
michael@0 487 adc $0,%eax
michael@0 488 stosl # [es:edi] = ax; edi += 4;
michael@0 489 jc 37b
michael@0 490 38:
michael@0 491 emms
michael@0 492 pop %ebx
michael@0 493 pop %esi
michael@0 494 pop %edi
michael@0 495 leave
michael@0 496 ret
michael@0 497 nop
michael@0 498
michael@0 499 #
michael@0 500 # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
michael@0 501 # so its high bit is 1. This code is from NSPR.
michael@0 502 #
michael@0 503 # mp_err _s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
michael@0 504 # mp_digit *qp, mp_digit *rp)
michael@0 505
michael@0 506 # esp + 0: Caller's ebx
michael@0 507 # esp + 4: return address
michael@0 508 # esp + 8: Nhi argument
michael@0 509 # esp + 12: Nlo argument
michael@0 510 # esp + 16: divisor argument
michael@0 511 # esp + 20: qp argument
michael@0 512 # esp + 24: rp argument
michael@0 513 # registers:
michael@0 514 # eax:
michael@0 515 # ebx: carry
michael@0 516 # ecx: a_len
michael@0 517 # edx:
michael@0 518 # esi: a ptr
michael@0 519 # edi: c ptr
michael@0 520 #
michael@0 521
michael@0 522 .globl _s_mpv_div_2dx1d
michael@0 523 .type _s_mpv_div_2dx1d,@function
michael@0 524 _s_mpv_div_2dx1d:
michael@0 525 push %ebx
michael@0 526 mov 8(%esp),%edx
michael@0 527 mov 12(%esp),%eax
michael@0 528 mov 16(%esp),%ebx
michael@0 529 div %ebx
michael@0 530 mov 20(%esp),%ebx
michael@0 531 mov %eax,0(%ebx)
michael@0 532 mov 24(%esp),%ebx
michael@0 533 mov %edx,0(%ebx)
michael@0 534 xor %eax,%eax # return zero
michael@0 535 pop %ebx
michael@0 536 ret
michael@0 537 nop
michael@0 538

mercurial