michael@0: #
michael@0: # This Source Code Form is subject to the terms of the Mozilla Public
michael@0: # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0: # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0: 
michael@0: .data
michael@0: .align 4
michael@0:  #
michael@0:  # -1 means to call _s_mpi_is_sse to determine if we support sse 
michael@0:  #    instructions.
michael@0:  #  0 means to use x86 instructions
michael@0:  #  1 means to use sse2 instructions
michael@0: .type	is_sse,@object
michael@0: .size	is_sse,4
michael@0: is_sse: .long	-1 
michael@0: 
michael@0: #
michael@0: # sigh, handle the difference between -fPIC and not PIC
michael@0: # default to pic, since this file seems to be exclusively
michael@0: # linux right now (solaris uses mpi_i86pc.s and windows uses
michael@0: # mpi_x86_asm.c)
michael@0: #
michael@0: #.ifndef NO_PIC
michael@0: #.macro GET   var,reg
michael@0: #    movl   \var@GOTOFF(%ebx),\reg
michael@0: #.endm
michael@0: #.macro PUT   reg,var
michael@0: #    movl   \reg,\var@GOTOFF(%ebx)
michael@0: #.endm
michael@0: #.else
michael@0: .macro GET   var,reg
michael@0:     movl   \var,\reg
michael@0: .endm
michael@0: .macro PUT   reg,var
michael@0:     movl   \reg,\var
michael@0: .endm
michael@0: #.endif
michael@0: 
michael@0: .text
michael@0: 
michael@0: 
michael@0:  #  ebp - 36:	caller's esi
michael@0:  #  ebp - 32:	caller's edi
michael@0:  #  ebp - 28:	
michael@0:  #  ebp - 24:	
michael@0:  #  ebp - 20:	
michael@0:  #  ebp - 16:	
michael@0:  #  ebp - 12:	
michael@0:  #  ebp - 8:	
michael@0:  #  ebp - 4:	
michael@0:  #  ebp + 0:	caller's ebp
michael@0:  #  ebp + 4:	return address
michael@0:  #  ebp + 8:	a	argument
michael@0:  #  ebp + 12:	a_len	argument
michael@0:  #  ebp + 16:	b	argument
michael@0:  #  ebp + 20:	c	argument
michael@0:  #  registers:
michael@0:  # 	eax:
michael@0:  #	ebx:	carry
michael@0:  #	ecx:	a_len
michael@0:  #	edx:
michael@0:  #	esi:	a ptr
michael@0:  #	edi:	c ptr
michael@0: .globl	_s_mpv_mul_d
michael@0: .type	_s_mpv_mul_d,@function
michael@0: _s_mpv_mul_d:
michael@0:     GET    is_sse,%eax
michael@0:     cmp    $0,%eax
michael@0:     je     _s_mpv_mul_d_x86
michael@0:     jg     _s_mpv_mul_d_sse2
michael@0:     call   _s_mpi_is_sse2
michael@0:     PUT    %eax,is_sse
michael@0:     cmp    $0,%eax
michael@0:     jg     _s_mpv_mul_d_sse2
michael@0: _s_mpv_mul_d_x86:
michael@0:     push   %ebp
michael@0:     mov    %esp,%ebp
michael@0:     sub    $28,%esp
michael@0:     push   %edi
michael@0:     push   %esi
michael@0:     push   %ebx
michael@0:     movl   $0,%ebx		# carry = 0
michael@0:     mov    12(%ebp),%ecx	# ecx = a_len
michael@0:     mov    20(%ebp),%edi
michael@0:     cmp    $0,%ecx
michael@0:     je     2f			# jmp if a_len == 0
michael@0:     mov    8(%ebp),%esi		# esi = a
michael@0:     cld
michael@0: 1:
michael@0:     lodsl			# eax = [ds:esi]; esi += 4
michael@0:     mov    16(%ebp),%edx	# edx = b
michael@0:     mull   %edx			# edx:eax = Phi:Plo = a_i * b
michael@0: 
michael@0:     add    %ebx,%eax		# add carry (%ebx) to edx:eax
michael@0:     adc    $0,%edx
michael@0:     mov    %edx,%ebx		# high half of product becomes next carry
michael@0: 
michael@0:     stosl			# [es:edi] = ax; edi += 4;
michael@0:     dec    %ecx			# --a_len
michael@0:     jnz    1b			# jmp if a_len != 0
michael@0: 2:
michael@0:     mov    %ebx,0(%edi)		# *c = carry
michael@0:     pop    %ebx
michael@0:     pop    %esi
michael@0:     pop    %edi
michael@0:     leave  
michael@0:     ret    
michael@0:     nop
michael@0: _s_mpv_mul_d_sse2:
michael@0:     push   %ebp
michael@0:     mov    %esp,%ebp
michael@0:     push   %edi
michael@0:     push   %esi
michael@0:     psubq  %mm2,%mm2		# carry = 0
michael@0:     mov    12(%ebp),%ecx	# ecx = a_len
michael@0:     movd   16(%ebp),%mm1	# mm1 = b
michael@0:     mov    20(%ebp),%edi
michael@0:     cmp    $0,%ecx
michael@0:     je     6f			# jmp if a_len == 0
michael@0:     mov    8(%ebp),%esi		# esi = a
michael@0:     cld
michael@0: 5:
michael@0:     movd   0(%esi),%mm0         # mm0 = *a++
michael@0:     add    $4,%esi
michael@0:     pmuludq %mm1,%mm0           # mm0 = b * *a++
michael@0:     paddq  %mm0,%mm2            # add the carry
michael@0:     movd   %mm2,0(%edi)         # store the 32bit result
michael@0:     add    $4,%edi
michael@0:     psrlq  $32, %mm2		# save the carry
michael@0:     dec    %ecx			# --a_len
michael@0:     jnz    5b			# jmp if a_len != 0
michael@0: 6:
michael@0:     movd   %mm2,0(%edi)		# *c = carry
michael@0:     emms
michael@0:     pop    %esi
michael@0:     pop    %edi
michael@0:     leave  
michael@0:     ret    
michael@0:     nop
michael@0: 
michael@0:  #  ebp - 36:	caller's esi
michael@0:  #  ebp - 32:	caller's edi
michael@0:  #  ebp - 28:	
michael@0:  #  ebp - 24:	
michael@0:  #  ebp - 20:	
michael@0:  #  ebp - 16:	
michael@0:  #  ebp - 12:	
michael@0:  #  ebp - 8:	
michael@0:  #  ebp - 4:	
michael@0:  #  ebp + 0:	caller's ebp
michael@0:  #  ebp + 4:	return address
michael@0:  #  ebp + 8:	a	argument
michael@0:  #  ebp + 12:	a_len	argument
michael@0:  #  ebp + 16:	b	argument
michael@0:  #  ebp + 20:	c	argument
michael@0:  #  registers:
michael@0:  # 	eax:
michael@0:  #	ebx:	carry
michael@0:  #	ecx:	a_len
michael@0:  #	edx:
michael@0:  #	esi:	a ptr
michael@0:  #	edi:	c ptr
michael@0: .globl	_s_mpv_mul_d_add
michael@0: .type	_s_mpv_mul_d_add,@function
michael@0: _s_mpv_mul_d_add:
michael@0:     GET    is_sse,%eax
michael@0:     cmp    $0,%eax
michael@0:     je     _s_mpv_mul_d_add_x86
michael@0:     jg     _s_mpv_mul_d_add_sse2
michael@0:     call   _s_mpi_is_sse2
michael@0:     PUT    %eax,is_sse
michael@0:     cmp    $0,%eax
michael@0:     jg     _s_mpv_mul_d_add_sse2
michael@0: _s_mpv_mul_d_add_x86:
michael@0:     push   %ebp
michael@0:     mov    %esp,%ebp
michael@0:     sub    $28,%esp
michael@0:     push   %edi
michael@0:     push   %esi
michael@0:     push   %ebx
michael@0:     movl   $0,%ebx		# carry = 0
michael@0:     mov    12(%ebp),%ecx	# ecx = a_len
michael@0:     mov    20(%ebp),%edi
michael@0:     cmp    $0,%ecx
michael@0:     je     11f			# jmp if a_len == 0
michael@0:     mov    8(%ebp),%esi		# esi = a
michael@0:     cld
michael@0: 10:
michael@0:     lodsl			# eax = [ds:esi]; esi += 4
michael@0:     mov    16(%ebp),%edx	# edx = b
michael@0:     mull   %edx			# edx:eax = Phi:Plo = a_i * b
michael@0: 
michael@0:     add    %ebx,%eax		# add carry (%ebx) to edx:eax
michael@0:     adc    $0,%edx
michael@0:     mov    0(%edi),%ebx		# add in current word from *c
michael@0:     add    %ebx,%eax		
michael@0:     adc    $0,%edx
michael@0:     mov    %edx,%ebx		# high half of product becomes next carry
michael@0: 
michael@0:     stosl			# [es:edi] = ax; edi += 4;
michael@0:     dec    %ecx			# --a_len
michael@0:     jnz    10b			# jmp if a_len != 0
michael@0: 11:
michael@0:     mov    %ebx,0(%edi)		# *c = carry
michael@0:     pop    %ebx
michael@0:     pop    %esi
michael@0:     pop    %edi
michael@0:     leave  
michael@0:     ret    
michael@0:     nop
michael@0: _s_mpv_mul_d_add_sse2:
michael@0:     push   %ebp
michael@0:     mov    %esp,%ebp
michael@0:     push   %edi
michael@0:     push   %esi
michael@0:     psubq  %mm2,%mm2		# carry = 0
michael@0:     mov    12(%ebp),%ecx	# ecx = a_len
michael@0:     movd   16(%ebp),%mm1	# mm1 = b
michael@0:     mov    20(%ebp),%edi
michael@0:     cmp    $0,%ecx
michael@0:     je     16f			# jmp if a_len == 0
michael@0:     mov    8(%ebp),%esi		# esi = a
michael@0:     cld
michael@0: 15:
michael@0:     movd   0(%esi),%mm0         # mm0 = *a++
michael@0:     add    $4,%esi
michael@0:     pmuludq %mm1,%mm0           # mm0 = b * *a++
michael@0:     paddq  %mm0,%mm2            # add the carry
michael@0:     movd   0(%edi),%mm0
michael@0:     paddq  %mm0,%mm2            # add the carry
michael@0:     movd   %mm2,0(%edi)         # store the 32bit result
michael@0:     add    $4,%edi
michael@0:     psrlq  $32, %mm2		# save the carry
michael@0:     dec    %ecx			# --a_len
michael@0:     jnz    15b			# jmp if a_len != 0
michael@0: 16:
michael@0:     movd   %mm2,0(%edi)		# *c = carry
michael@0:     emms
michael@0:     pop    %esi
michael@0:     pop    %edi
michael@0:     leave  
michael@0:     ret    
michael@0:     nop
michael@0: 
michael@0:  #  ebp - 8:	caller's esi
michael@0:  #  ebp - 4:	caller's edi
michael@0:  #  ebp + 0:	caller's ebp
michael@0:  #  ebp + 4:	return address
michael@0:  #  ebp + 8:	a	argument
michael@0:  #  ebp + 12:	a_len	argument
michael@0:  #  ebp + 16:	b	argument
michael@0:  #  ebp + 20:	c	argument
michael@0:  #  registers:
michael@0:  # 	eax:
michael@0:  #	ebx:	carry
michael@0:  #	ecx:	a_len
michael@0:  #	edx:
michael@0:  #	esi:	a ptr
michael@0:  #	edi:	c ptr
michael@0: .globl	_s_mpv_mul_d_add_prop
michael@0: .type	_s_mpv_mul_d_add_prop,@function
michael@0: _s_mpv_mul_d_add_prop:
michael@0:     GET    is_sse,%eax
michael@0:     cmp    $0,%eax
michael@0:     je     _s_mpv_mul_d_add_prop_x86
michael@0:     jg     _s_mpv_mul_d_add_prop_sse2
michael@0:     call   _s_mpi_is_sse2
michael@0:     PUT    %eax,is_sse
michael@0:     cmp    $0,%eax
michael@0:     jg     _s_mpv_mul_d_add_prop_sse2
michael@0: _s_mpv_mul_d_add_prop_x86:
michael@0:     push   %ebp
michael@0:     mov    %esp,%ebp
michael@0:     sub    $28,%esp
michael@0:     push   %edi
michael@0:     push   %esi
michael@0:     push   %ebx
michael@0:     movl   $0,%ebx		# carry = 0
michael@0:     mov    12(%ebp),%ecx	# ecx = a_len
michael@0:     mov    20(%ebp),%edi
michael@0:     cmp    $0,%ecx
michael@0:     je     21f			# jmp if a_len == 0
michael@0:     cld
michael@0:     mov    8(%ebp),%esi		# esi = a
michael@0: 20:
michael@0:     lodsl			# eax = [ds:esi]; esi += 4
michael@0:     mov    16(%ebp),%edx	# edx = b
michael@0:     mull   %edx			# edx:eax = Phi:Plo = a_i * b
michael@0: 
michael@0:     add    %ebx,%eax		# add carry (%ebx) to edx:eax
michael@0:     adc    $0,%edx
michael@0:     mov    0(%edi),%ebx		# add in current word from *c
michael@0:     add    %ebx,%eax		
michael@0:     adc    $0,%edx
michael@0:     mov    %edx,%ebx		# high half of product becomes next carry
michael@0: 
michael@0:     stosl			# [es:edi] = ax; edi += 4;
michael@0:     dec    %ecx			# --a_len
michael@0:     jnz    20b			# jmp if a_len != 0
michael@0: 21:
michael@0:     cmp    $0,%ebx		# is carry zero?
michael@0:     jz     23f
michael@0:     mov    0(%edi),%eax		# add in current word from *c
michael@0:     add	   %ebx,%eax
michael@0:     stosl			# [es:edi] = ax; edi += 4;
michael@0:     jnc    23f
michael@0: 22:
michael@0:     mov    0(%edi),%eax		# add in current word from *c
michael@0:     adc	   $0,%eax
michael@0:     stosl			# [es:edi] = ax; edi += 4;
michael@0:     jc     22b
michael@0: 23:
michael@0:     pop    %ebx
michael@0:     pop    %esi
michael@0:     pop    %edi
michael@0:     leave  
michael@0:     ret    
michael@0:     nop
michael@0: _s_mpv_mul_d_add_prop_sse2:
michael@0:     push   %ebp
michael@0:     mov    %esp,%ebp
michael@0:     push   %edi
michael@0:     push   %esi
michael@0:     push   %ebx
michael@0:     psubq  %mm2,%mm2		# carry = 0
michael@0:     mov    12(%ebp),%ecx	# ecx = a_len
michael@0:     movd   16(%ebp),%mm1	# mm1 = b
michael@0:     mov    20(%ebp),%edi
michael@0:     cmp    $0,%ecx
michael@0:     je     26f			# jmp if a_len == 0
michael@0:     mov    8(%ebp),%esi		# esi = a
michael@0:     cld
michael@0: 25:
michael@0:     movd   0(%esi),%mm0         # mm0 = *a++
michael@0:     movd   0(%edi),%mm3		# fetch the sum
michael@0:     add    $4,%esi
michael@0:     pmuludq %mm1,%mm0           # mm0 = b * *a++
michael@0:     paddq  %mm0,%mm2            # add the carry
michael@0:     paddq  %mm3,%mm2            # add *c++
michael@0:     movd   %mm2,0(%edi)         # store the 32bit result
michael@0:     add    $4,%edi
michael@0:     psrlq  $32, %mm2		# save the carry
michael@0:     dec    %ecx			# --a_len
michael@0:     jnz    25b			# jmp if a_len != 0
michael@0: 26:
michael@0:     movd   %mm2,%ebx
michael@0:     cmp    $0,%ebx		# is carry zero?
michael@0:     jz     28f
michael@0:     mov    0(%edi),%eax
michael@0:     add    %ebx, %eax
michael@0:     stosl
michael@0:     jnc    28f
michael@0: 27:
michael@0:     mov    0(%edi),%eax		# add in current word from *c
michael@0:     adc	   $0,%eax
michael@0:     stosl			# [es:edi] = ax; edi += 4;
michael@0:     jc     27b
michael@0: 28:
michael@0:     emms
michael@0:     pop    %ebx
michael@0:     pop    %esi
michael@0:     pop    %edi
michael@0:     leave  
michael@0:     ret    
michael@0:     nop
michael@0: 
michael@0: 
michael@0:  #  ebp - 20:	caller's esi
michael@0:  #  ebp - 16:	caller's edi
michael@0:  #  ebp - 12:	
michael@0:  #  ebp - 8:	carry
michael@0:  #  ebp - 4:	a_len	local
michael@0:  #  ebp + 0:	caller's ebp
michael@0:  #  ebp + 4:	return address
michael@0:  #  ebp + 8:	pa	argument
michael@0:  #  ebp + 12:	a_len	argument
michael@0:  #  ebp + 16:	ps	argument
michael@0:  #  ebp + 20:	
michael@0:  #  registers:
michael@0:  # 	eax:
michael@0:  #	ebx:	carry
michael@0:  #	ecx:	a_len
michael@0:  #	edx:
michael@0:  #	esi:	a ptr
michael@0:  #	edi:	c ptr
michael@0: 
michael@0: .globl	_s_mpv_sqr_add_prop
michael@0: .type	_s_mpv_sqr_add_prop,@function
michael@0: _s_mpv_sqr_add_prop:
michael@0:      GET   is_sse,%eax
michael@0:      cmp    $0,%eax
michael@0:      je     _s_mpv_sqr_add_prop_x86
michael@0:      jg     _s_mpv_sqr_add_prop_sse2
michael@0:      call   _s_mpi_is_sse2
michael@0:      PUT    %eax,is_sse
michael@0:      cmp    $0,%eax
michael@0:      jg     _s_mpv_sqr_add_prop_sse2
michael@0: _s_mpv_sqr_add_prop_x86:
michael@0:      push   %ebp
michael@0:      mov    %esp,%ebp
michael@0:      sub    $12,%esp
michael@0:      push   %edi
michael@0:      push   %esi
michael@0:      push   %ebx
michael@0:      movl   $0,%ebx		# carry = 0
michael@0:      mov    12(%ebp),%ecx	# a_len
michael@0:      mov    16(%ebp),%edi	# edi = ps
michael@0:      cmp    $0,%ecx
michael@0:      je     31f			# jump if a_len == 0
michael@0:      cld
michael@0:      mov    8(%ebp),%esi	# esi = pa
michael@0: 30:
michael@0:      lodsl			# %eax = [ds:si]; si += 4;
michael@0:      mull   %eax
michael@0: 
michael@0:      add    %ebx,%eax		# add "carry"
michael@0:      adc    $0,%edx
michael@0:      mov    0(%edi),%ebx
michael@0:      add    %ebx,%eax		# add low word from result
michael@0:      mov    4(%edi),%ebx
michael@0:      stosl			# [es:di] = %eax; di += 4;
michael@0:      adc    %ebx,%edx		# add high word from result
michael@0:      movl   $0,%ebx
michael@0:      mov    %edx,%eax
michael@0:      adc    $0,%ebx
michael@0:      stosl			# [es:di] = %eax; di += 4;
michael@0:      dec    %ecx		# --a_len
michael@0:      jnz    30b			# jmp if a_len != 0
michael@0: 31:
michael@0:     cmp    $0,%ebx		# is carry zero?
michael@0:     jz     34f
michael@0:     mov    0(%edi),%eax		# add in current word from *c
michael@0:     add	   %ebx,%eax
michael@0:     stosl			# [es:edi] = ax; edi += 4;
michael@0:     jnc    34f
michael@0: 32:
michael@0:     mov    0(%edi),%eax		# add in current word from *c
michael@0:     adc	   $0,%eax
michael@0:     stosl			# [es:edi] = ax; edi += 4;
michael@0:     jc     32b
michael@0: 34:
michael@0:     pop    %ebx
michael@0:     pop    %esi
michael@0:     pop    %edi
michael@0:     leave  
michael@0:     ret    
michael@0:     nop
michael@0: _s_mpv_sqr_add_prop_sse2:
michael@0:     push   %ebp
michael@0:     mov    %esp,%ebp
michael@0:     push   %edi
michael@0:     push   %esi
michael@0:     push   %ebx
michael@0:     psubq  %mm2,%mm2		# carry = 0
michael@0:     mov    12(%ebp),%ecx	# ecx = a_len
michael@0:     mov    16(%ebp),%edi
michael@0:     cmp    $0,%ecx
michael@0:     je     36f			# jmp if a_len == 0
michael@0:     mov    8(%ebp),%esi		# esi = a
michael@0:     cld
michael@0: 35:
michael@0:     movd   0(%esi),%mm0        # mm0 = *a
michael@0:     movd   0(%edi),%mm3	       # fetch the sum
michael@0:     add	   $4,%esi
michael@0:     pmuludq %mm0,%mm0          # mm0 = sqr(a)
michael@0:     paddq  %mm0,%mm2           # add the carry
michael@0:     paddq  %mm3,%mm2           # add the low word
michael@0:     movd   4(%edi),%mm3
michael@0:     movd   %mm2,0(%edi)        # store the 32bit result
michael@0:     psrlq  $32, %mm2	
michael@0:     paddq  %mm3,%mm2           # add the high word
michael@0:     movd   %mm2,4(%edi)        # store the 32bit result
michael@0:     psrlq  $32, %mm2	       # save the carry.
michael@0:     add    $8,%edi
michael@0:     dec    %ecx			# --a_len
michael@0:     jnz    35b			# jmp if a_len != 0
michael@0: 36:
michael@0:     movd   %mm2,%ebx
michael@0:     cmp    $0,%ebx		# is carry zero?
michael@0:     jz     38f
michael@0:     mov    0(%edi),%eax
michael@0:     add    %ebx, %eax
michael@0:     stosl
michael@0:     jnc    38f
michael@0: 37:
michael@0:     mov    0(%edi),%eax		# add in current word from *c
michael@0:     adc	   $0,%eax
michael@0:     stosl			# [es:edi] = ax; edi += 4;
michael@0:     jc     37b
michael@0: 38:
michael@0:     emms
michael@0:     pop    %ebx
michael@0:     pop    %esi
michael@0:     pop    %edi
michael@0:     leave  
michael@0:     ret    
michael@0:     nop
michael@0: 
michael@0:  #
michael@0:  # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
michael@0:  # so its high bit is 1.   This code is from NSPR.
michael@0:  #
michael@0:  # mp_err _s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
michael@0:  # 		          mp_digit *qp, mp_digit *rp)
michael@0: 
michael@0:  #  esp +  0:   Caller's ebx
michael@0:  #  esp +  4:	return address
michael@0:  #  esp +  8:	Nhi	argument
michael@0:  #  esp + 12:	Nlo	argument
michael@0:  #  esp + 16:	divisor	argument
michael@0:  #  esp + 20:	qp	argument
michael@0:  #  esp + 24:   rp	argument
michael@0:  #  registers:
michael@0:  # 	eax:
michael@0:  #	ebx:	carry
michael@0:  #	ecx:	a_len
michael@0:  #	edx:
michael@0:  #	esi:	a ptr
michael@0:  #	edi:	c ptr
michael@0:  # 
michael@0: 
michael@0: .globl	_s_mpv_div_2dx1d
michael@0: .type	_s_mpv_div_2dx1d,@function
michael@0: _s_mpv_div_2dx1d:
michael@0:        push   %ebx
michael@0:        mov    8(%esp),%edx
michael@0:        mov    12(%esp),%eax
michael@0:        mov    16(%esp),%ebx
michael@0:        div    %ebx
michael@0:        mov    20(%esp),%ebx
michael@0:        mov    %eax,0(%ebx)
michael@0:        mov    24(%esp),%ebx
michael@0:        mov    %edx,0(%ebx)
michael@0:        xor    %eax,%eax		# return zero
michael@0:        pop    %ebx
michael@0:        ret    
michael@0:        nop
michael@0: