michael@0: # This Source Code Form is subject to the terms of the Mozilla Public
michael@0: # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0: # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0: 
michael@0: #ifdef DARWIN
michael@0: #define s_mpv_mul_d          _s_mpv_mul_d
michael@0: #define s_mpv_mul_d_add      _s_mpv_mul_d_add
michael@0: #define s_mpv_mul_d_add_prop _s_mpv_mul_d_add_prop
michael@0: #define s_mpv_sqr_add_prop   _s_mpv_sqr_add_prop
michael@0: #define s_mpv_div_2dx1d      _s_mpv_div_2dx1d
michael@0: #define TYPE_FUNCTION(x)
michael@0: #else
michael@0: #define TYPE_FUNCTION(x) .type x, @function
michael@0: #endif
michael@0: 
michael@0: .text
michael@0: 
michael@0:  #  ebp - 8:    caller's esi
michael@0:  #  ebp - 4:    caller's edi
michael@0:  #  ebp + 0:    caller's ebp
michael@0:  #  ebp + 4:    return address
michael@0:  #  ebp + 8:    a       argument
michael@0:  #  ebp + 12:   a_len   argument
michael@0:  #  ebp + 16:   b       argument
michael@0:  #  ebp + 20:   c       argument
michael@0:  #  registers:
michael@0:  #      ebx:
michael@0:  #      ecx:    a_len
michael@0:  #      esi:    a ptr
michael@0:  #      edi:    c ptr
michael@0: .globl s_mpv_mul_d
michael@0: .private_extern s_mpv_mul_d
michael@0: TYPE_FUNCTION(s_mpv_mul_d)
michael@0: s_mpv_mul_d:
michael@0:     push   %ebp
michael@0:     mov    %esp, %ebp
michael@0:     push   %edi
michael@0:     push   %esi
michael@0:     psubq  %mm2, %mm2           # carry = 0
michael@0:     mov    12(%ebp), %ecx       # ecx = a_len
michael@0:     movd   16(%ebp), %mm1       # mm1 = b
michael@0:     mov    20(%ebp), %edi
michael@0:     cmp    $0, %ecx
michael@0:     je     2f                   # jmp if a_len == 0
michael@0:     mov    8(%ebp), %esi        # esi = a
michael@0:     cld
michael@0: 1:
michael@0:     movd   0(%esi), %mm0        # mm0 = *a++
michael@0:     add    $4, %esi
michael@0:     pmuludq %mm1, %mm0          # mm0 = b * *a++
michael@0:     paddq  %mm0, %mm2           # add the carry
michael@0:     movd   %mm2, 0(%edi)        # store the 32bit result
michael@0:     add    $4, %edi
michael@0:     psrlq  $32, %mm2            # save the carry
michael@0:     dec    %ecx                 # --a_len
michael@0:     jnz    1b                   # jmp if a_len != 0
michael@0: 2:
michael@0:     movd   %mm2, 0(%edi)        # *c = carry
michael@0:     emms
michael@0:     pop    %esi
michael@0:     pop    %edi
michael@0:     leave  
michael@0:     ret    
michael@0:     nop
michael@0: 
michael@0:  #  ebp - 8:    caller's esi
michael@0:  #  ebp - 4:    caller's edi
michael@0:  #  ebp + 0:    caller's ebp
michael@0:  #  ebp + 4:    return address
michael@0:  #  ebp + 8:    a       argument
michael@0:  #  ebp + 12:   a_len   argument
michael@0:  #  ebp + 16:   b       argument
michael@0:  #  ebp + 20:   c       argument
michael@0:  #  registers:
michael@0:  #      ebx:
michael@0:  #      ecx:    a_len
michael@0:  #      esi:    a ptr
michael@0:  #      edi:    c ptr
michael@0: .globl s_mpv_mul_d_add
michael@0: .private_extern s_mpv_mul_d_add
michael@0: TYPE_FUNCTION(s_mpv_mul_d_add)
michael@0: s_mpv_mul_d_add:
michael@0:     push   %ebp
michael@0:     mov    %esp, %ebp
michael@0:     push   %edi
michael@0:     push   %esi
michael@0:     psubq  %mm2, %mm2           # carry = 0
michael@0:     mov    12(%ebp), %ecx       # ecx = a_len
michael@0:     movd   16(%ebp), %mm1       # mm1 = b
michael@0:     mov    20(%ebp), %edi
michael@0:     cmp    $0, %ecx
michael@0:     je     2f                   # jmp if a_len == 0
michael@0:     mov    8(%ebp), %esi        # esi = a
michael@0:     cld
michael@0: 1:
michael@0:     movd   0(%esi), %mm0        # mm0 = *a++
michael@0:     add    $4, %esi
michael@0:     pmuludq %mm1, %mm0          # mm0 = b * *a++
michael@0:     paddq  %mm0, %mm2           # add the carry
michael@0:     movd   0(%edi), %mm0
michael@0:     paddq  %mm0, %mm2           # add the carry
michael@0:     movd   %mm2, 0(%edi)        # store the 32bit result
michael@0:     add    $4, %edi
michael@0:     psrlq  $32, %mm2            # save the carry
michael@0:     dec    %ecx                 # --a_len
michael@0:     jnz    1b                   # jmp if a_len != 0
michael@0: 2:
michael@0:     movd   %mm2, 0(%edi)        # *c = carry
michael@0:     emms
michael@0:     pop    %esi
michael@0:     pop    %edi
michael@0:     leave  
michael@0:     ret    
michael@0:     nop
michael@0: 
michael@0:  #  ebp - 12:   caller's ebx
michael@0:  #  ebp - 8:    caller's esi
michael@0:  #  ebp - 4:    caller's edi
michael@0:  #  ebp + 0:    caller's ebp
michael@0:  #  ebp + 4:    return address
michael@0:  #  ebp + 8:    a       argument
michael@0:  #  ebp + 12:   a_len   argument
michael@0:  #  ebp + 16:   b       argument
michael@0:  #  ebp + 20:   c       argument
michael@0:  #  registers:
michael@0:  #      eax:
michael@0:  #      ebx:    carry
michael@0:  #      ecx:    a_len
michael@0:  #      esi:    a ptr
michael@0:  #      edi:    c ptr
michael@0: .globl s_mpv_mul_d_add_prop
michael@0: .private_extern s_mpv_mul_d_add_prop
michael@0: TYPE_FUNCTION(s_mpv_mul_d_add_prop)
michael@0: s_mpv_mul_d_add_prop:
michael@0:     push   %ebp
michael@0:     mov    %esp, %ebp
michael@0:     push   %edi
michael@0:     push   %esi
michael@0:     push   %ebx
michael@0:     psubq  %mm2, %mm2           # carry = 0
michael@0:     mov    12(%ebp), %ecx       # ecx = a_len
michael@0:     movd   16(%ebp), %mm1       # mm1 = b
michael@0:     mov    20(%ebp), %edi
michael@0:     cmp    $0, %ecx
michael@0:     je     2f                   # jmp if a_len == 0
michael@0:     mov    8(%ebp), %esi        # esi = a
michael@0:     cld
michael@0: 1:
michael@0:     movd   0(%esi), %mm0        # mm0 = *a++
michael@0:     movd   0(%edi), %mm3        # fetch the sum
michael@0:     add    $4, %esi
michael@0:     pmuludq %mm1, %mm0          # mm0 = b * *a++
michael@0:     paddq  %mm0, %mm2           # add the carry
michael@0:     paddq  %mm3, %mm2           # add *c++
michael@0:     movd   %mm2, 0(%edi)        # store the 32bit result
michael@0:     add    $4, %edi
michael@0:     psrlq  $32, %mm2            # save the carry
michael@0:     dec    %ecx                 # --a_len
michael@0:     jnz    1b                   # jmp if a_len != 0
michael@0: 2:
michael@0:     movd   %mm2, %ebx
michael@0:     cmp    $0, %ebx             # is carry zero?
michael@0:     jz     4f
michael@0:     mov    0(%edi), %eax
michael@0:     add    %ebx, %eax
michael@0:     stosl
michael@0:     jnc    4f
michael@0: 3:
michael@0:     mov    0(%edi), %eax        # add in current word from *c
michael@0:     adc    $0, %eax
michael@0:     stosl                       # [es:edi] = ax; edi += 4;
michael@0:     jc     3b
michael@0: 4:
michael@0:     emms
michael@0:     pop    %ebx
michael@0:     pop    %esi
michael@0:     pop    %edi
michael@0:     leave  
michael@0:     ret    
michael@0:     nop
michael@0: 
michael@0:  #  ebp - 12:   caller's ebx
michael@0:  #  ebp - 8:    caller's esi
michael@0:  #  ebp - 4:    caller's edi
michael@0:  #  ebp + 0:    caller's ebp
michael@0:  #  ebp + 4:    return address
michael@0:  #  ebp + 8:    pa      argument
michael@0:  #  ebp + 12:   a_len   argument
michael@0:  #  ebp + 16:   ps      argument
michael@0:  #  registers:
michael@0:  #      eax:
michael@0:  #      ebx:    carry
michael@0:  #      ecx:    a_len
michael@0:  #      esi:    a ptr
michael@0:  #      edi:    c ptr
michael@0: .globl s_mpv_sqr_add_prop
michael@0: .private_extern s_mpv_sqr_add_prop
michael@0: TYPE_FUNCTION(s_mpv_sqr_add_prop)
michael@0: s_mpv_sqr_add_prop:
michael@0:     push   %ebp
michael@0:     mov    %esp, %ebp
michael@0:     push   %edi
michael@0:     push   %esi
michael@0:     push   %ebx
michael@0:     psubq  %mm2, %mm2           # carry = 0
michael@0:     mov    12(%ebp), %ecx       # ecx = a_len
michael@0:     mov    16(%ebp), %edi
michael@0:     cmp    $0, %ecx
michael@0:     je     2f                   # jmp if a_len == 0
michael@0:     mov    8(%ebp), %esi        # esi = a
michael@0:     cld
michael@0: 1:
michael@0:     movd   0(%esi), %mm0        # mm0 = *a
michael@0:     movd   0(%edi), %mm3        # fetch the sum
michael@0:     add    $4, %esi
michael@0:     pmuludq %mm0, %mm0          # mm0 = sqr(a)
michael@0:     paddq  %mm0, %mm2           # add the carry
michael@0:     paddq  %mm3, %mm2           # add the low word
michael@0:     movd   4(%edi), %mm3
michael@0:     movd   %mm2, 0(%edi)        # store the 32bit result
michael@0:     psrlq  $32, %mm2
michael@0:     paddq  %mm3, %mm2           # add the high word
michael@0:     movd   %mm2, 4(%edi)        # store the 32bit result
michael@0:     psrlq  $32, %mm2            # save the carry.
michael@0:     add    $8, %edi
michael@0:     dec    %ecx                 # --a_len
michael@0:     jnz    1b                   # jmp if a_len != 0
michael@0: 2:
michael@0:     movd   %mm2, %ebx
michael@0:     cmp    $0, %ebx             # is carry zero?
michael@0:     jz     4f
michael@0:     mov    0(%edi), %eax
michael@0:     add    %ebx, %eax
michael@0:     stosl
michael@0:     jnc    4f
michael@0: 3:
michael@0:     mov    0(%edi), %eax        # add in current word from *c
michael@0:     adc    $0, %eax
michael@0:     stosl                       #  [es:edi] = ax; edi += 4;
michael@0:     jc     3b
michael@0: 4:
michael@0:     emms
michael@0:     pop    %ebx
michael@0:     pop    %esi
michael@0:     pop    %edi
michael@0:     leave  
michael@0:     ret    
michael@0:     nop
michael@0: 
michael@0:  #
michael@0:  # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
michael@0:  # so its high bit is 1.   This code is from NSPR.
michael@0:  #
michael@0:  # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
michael@0:  #                        mp_digit *qp, mp_digit *rp)
michael@0: 
michael@0:  #  esp +  0:   Caller's ebx
michael@0:  #  esp +  4:   return address
michael@0:  #  esp +  8:   Nhi     argument
michael@0:  #  esp + 12:   Nlo     argument
michael@0:  #  esp + 16:   divisor argument
michael@0:  #  esp + 20:   qp      argument
michael@0:  #  esp + 24:   rp      argument
michael@0:  #  registers:
michael@0:  #      eax:
michael@0:  #      ebx:    carry
michael@0:  #      ecx:    a_len
michael@0:  #      edx:
michael@0:  #      esi:    a ptr
michael@0:  #      edi:    c ptr
michael@0:  # 
michael@0: .globl s_mpv_div_2dx1d
michael@0: .private_extern s_mpv_div_2dx1d
michael@0: TYPE_FUNCTION(s_mpv_div_2dx1d)
michael@0: s_mpv_div_2dx1d:
michael@0:        push   %ebx
michael@0:        mov    8(%esp), %edx
michael@0:        mov    12(%esp), %eax
michael@0:        mov    16(%esp), %ebx
michael@0:        div    %ebx
michael@0:        mov    20(%esp), %ebx
michael@0:        mov    %eax, 0(%ebx)
michael@0:        mov    24(%esp), %ebx
michael@0:        mov    %edx, 0(%ebx)
michael@0:        xor    %eax, %eax        # return zero
michael@0:        pop    %ebx
michael@0:        ret    
michael@0:        nop
michael@0: 
michael@0: #ifndef DARWIN
michael@0:  # Magic indicating no need for an executable stack
michael@0: .section .note.GNU-stack, "", @progbits
michael@0: .previous
michael@0: #endif