michael@0: # This Source Code Form is subject to the terms of the Mozilla Public michael@0: # License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: # file, You can obtain one at http://mozilla.org/MPL/2.0/. michael@0: michael@0: #ifdef DARWIN michael@0: #define s_mpv_mul_d _s_mpv_mul_d michael@0: #define s_mpv_mul_d_add _s_mpv_mul_d_add michael@0: #define s_mpv_mul_d_add_prop _s_mpv_mul_d_add_prop michael@0: #define s_mpv_sqr_add_prop _s_mpv_sqr_add_prop michael@0: #define s_mpv_div_2dx1d _s_mpv_div_2dx1d michael@0: #define TYPE_FUNCTION(x) michael@0: #else michael@0: #define TYPE_FUNCTION(x) .type x, @function michael@0: #endif michael@0: michael@0: .text michael@0: michael@0: # ebp - 8: caller's esi michael@0: # ebp - 4: caller's edi michael@0: # ebp + 0: caller's ebp michael@0: # ebp + 4: return address michael@0: # ebp + 8: a argument michael@0: # ebp + 12: a_len argument michael@0: # ebp + 16: b argument michael@0: # ebp + 20: c argument michael@0: # registers: michael@0: # ebx: michael@0: # ecx: a_len michael@0: # esi: a ptr michael@0: # edi: c ptr michael@0: .globl s_mpv_mul_d michael@0: .private_extern s_mpv_mul_d michael@0: TYPE_FUNCTION(s_mpv_mul_d) michael@0: s_mpv_mul_d: michael@0: push %ebp michael@0: mov %esp, %ebp michael@0: push %edi michael@0: push %esi michael@0: psubq %mm2, %mm2 # carry = 0 michael@0: mov 12(%ebp), %ecx # ecx = a_len michael@0: movd 16(%ebp), %mm1 # mm1 = b michael@0: mov 20(%ebp), %edi michael@0: cmp $0, %ecx michael@0: je 2f # jmp if a_len == 0 michael@0: mov 8(%ebp), %esi # esi = a michael@0: cld michael@0: 1: michael@0: movd 0(%esi), %mm0 # mm0 = *a++ michael@0: add $4, %esi michael@0: pmuludq %mm1, %mm0 # mm0 = b * *a++ michael@0: paddq %mm0, %mm2 # add the carry michael@0: movd %mm2, 0(%edi) # store the 32bit result michael@0: add $4, %edi michael@0: psrlq $32, %mm2 # save the carry michael@0: dec %ecx # --a_len michael@0: jnz 1b # jmp if a_len != 0 michael@0: 2: michael@0: movd %mm2, 0(%edi) # *c = carry michael@0: emms michael@0: pop %esi michael@0: pop %edi michael@0: leave michael@0: ret michael@0: nop michael@0: michael@0: # ebp - 8: caller's esi michael@0: # ebp - 4: caller's edi michael@0: # ebp + 0: caller's ebp michael@0: # ebp + 4: return address michael@0: # ebp + 8: a argument michael@0: # ebp + 12: a_len argument michael@0: # ebp + 16: b argument michael@0: # ebp + 20: c argument michael@0: # registers: michael@0: # ebx: michael@0: # ecx: a_len michael@0: # esi: a ptr michael@0: # edi: c ptr michael@0: .globl s_mpv_mul_d_add michael@0: .private_extern s_mpv_mul_d_add michael@0: TYPE_FUNCTION(s_mpv_mul_d_add) michael@0: s_mpv_mul_d_add: michael@0: push %ebp michael@0: mov %esp, %ebp michael@0: push %edi michael@0: push %esi michael@0: psubq %mm2, %mm2 # carry = 0 michael@0: mov 12(%ebp), %ecx # ecx = a_len michael@0: movd 16(%ebp), %mm1 # mm1 = b michael@0: mov 20(%ebp), %edi michael@0: cmp $0, %ecx michael@0: je 2f # jmp if a_len == 0 michael@0: mov 8(%ebp), %esi # esi = a michael@0: cld michael@0: 1: michael@0: movd 0(%esi), %mm0 # mm0 = *a++ michael@0: add $4, %esi michael@0: pmuludq %mm1, %mm0 # mm0 = b * *a++ michael@0: paddq %mm0, %mm2 # add the carry michael@0: movd 0(%edi), %mm0 michael@0: paddq %mm0, %mm2 # add the carry michael@0: movd %mm2, 0(%edi) # store the 32bit result michael@0: add $4, %edi michael@0: psrlq $32, %mm2 # save the carry michael@0: dec %ecx # --a_len michael@0: jnz 1b # jmp if a_len != 0 michael@0: 2: michael@0: movd %mm2, 0(%edi) # *c = carry michael@0: emms michael@0: pop %esi michael@0: pop %edi michael@0: leave michael@0: ret michael@0: nop michael@0: michael@0: # ebp - 12: caller's ebx michael@0: # ebp - 8: caller's esi michael@0: # ebp - 4: caller's edi michael@0: # ebp + 0: caller's ebp michael@0: # ebp + 4: return address michael@0: # ebp + 8: a argument michael@0: # ebp + 12: a_len argument michael@0: # ebp + 16: b argument michael@0: # ebp + 20: c argument michael@0: # registers: michael@0: # eax: michael@0: # ebx: carry michael@0: # ecx: a_len michael@0: # esi: a ptr michael@0: # edi: c ptr michael@0: .globl s_mpv_mul_d_add_prop michael@0: .private_extern s_mpv_mul_d_add_prop michael@0: TYPE_FUNCTION(s_mpv_mul_d_add_prop) michael@0: s_mpv_mul_d_add_prop: michael@0: push %ebp michael@0: mov %esp, %ebp michael@0: push %edi michael@0: push %esi michael@0: push %ebx michael@0: psubq %mm2, %mm2 # carry = 0 michael@0: mov 12(%ebp), %ecx # ecx = a_len michael@0: movd 16(%ebp), %mm1 # mm1 = b michael@0: mov 20(%ebp), %edi michael@0: cmp $0, %ecx michael@0: je 2f # jmp if a_len == 0 michael@0: mov 8(%ebp), %esi # esi = a michael@0: cld michael@0: 1: michael@0: movd 0(%esi), %mm0 # mm0 = *a++ michael@0: movd 0(%edi), %mm3 # fetch the sum michael@0: add $4, %esi michael@0: pmuludq %mm1, %mm0 # mm0 = b * *a++ michael@0: paddq %mm0, %mm2 # add the carry michael@0: paddq %mm3, %mm2 # add *c++ michael@0: movd %mm2, 0(%edi) # store the 32bit result michael@0: add $4, %edi michael@0: psrlq $32, %mm2 # save the carry michael@0: dec %ecx # --a_len michael@0: jnz 1b # jmp if a_len != 0 michael@0: 2: michael@0: movd %mm2, %ebx michael@0: cmp $0, %ebx # is carry zero? michael@0: jz 4f michael@0: mov 0(%edi), %eax michael@0: add %ebx, %eax michael@0: stosl michael@0: jnc 4f michael@0: 3: michael@0: mov 0(%edi), %eax # add in current word from *c michael@0: adc $0, %eax michael@0: stosl # [es:edi] = ax; edi += 4; michael@0: jc 3b michael@0: 4: michael@0: emms michael@0: pop %ebx michael@0: pop %esi michael@0: pop %edi michael@0: leave michael@0: ret michael@0: nop michael@0: michael@0: # ebp - 12: caller's ebx michael@0: # ebp - 8: caller's esi michael@0: # ebp - 4: caller's edi michael@0: # ebp + 0: caller's ebp michael@0: # ebp + 4: return address michael@0: # ebp + 8: pa argument michael@0: # ebp + 12: a_len argument michael@0: # ebp + 16: ps argument michael@0: # registers: michael@0: # eax: michael@0: # ebx: carry michael@0: # ecx: a_len michael@0: # esi: a ptr michael@0: # edi: c ptr michael@0: .globl s_mpv_sqr_add_prop michael@0: .private_extern s_mpv_sqr_add_prop michael@0: TYPE_FUNCTION(s_mpv_sqr_add_prop) michael@0: s_mpv_sqr_add_prop: michael@0: push %ebp michael@0: mov %esp, %ebp michael@0: push %edi michael@0: push %esi michael@0: push %ebx michael@0: psubq %mm2, %mm2 # carry = 0 michael@0: mov 12(%ebp), %ecx # ecx = a_len michael@0: mov 16(%ebp), %edi michael@0: cmp $0, %ecx michael@0: je 2f # jmp if a_len == 0 michael@0: mov 8(%ebp), %esi # esi = a michael@0: cld michael@0: 1: michael@0: movd 0(%esi), %mm0 # mm0 = *a michael@0: movd 0(%edi), %mm3 # fetch the sum michael@0: add $4, %esi michael@0: pmuludq %mm0, %mm0 # mm0 = sqr(a) michael@0: paddq %mm0, %mm2 # add the carry michael@0: paddq %mm3, %mm2 # add the low word michael@0: movd 4(%edi), %mm3 michael@0: movd %mm2, 0(%edi) # store the 32bit result michael@0: psrlq $32, %mm2 michael@0: paddq %mm3, %mm2 # add the high word michael@0: movd %mm2, 4(%edi) # store the 32bit result michael@0: psrlq $32, %mm2 # save the carry. michael@0: add $8, %edi michael@0: dec %ecx # --a_len michael@0: jnz 1b # jmp if a_len != 0 michael@0: 2: michael@0: movd %mm2, %ebx michael@0: cmp $0, %ebx # is carry zero? michael@0: jz 4f michael@0: mov 0(%edi), %eax michael@0: add %ebx, %eax michael@0: stosl michael@0: jnc 4f michael@0: 3: michael@0: mov 0(%edi), %eax # add in current word from *c michael@0: adc $0, %eax michael@0: stosl # [es:edi] = ax; edi += 4; michael@0: jc 3b michael@0: 4: michael@0: emms michael@0: pop %ebx michael@0: pop %esi michael@0: pop %edi michael@0: leave michael@0: ret michael@0: nop michael@0: michael@0: # michael@0: # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized michael@0: # so its high bit is 1. This code is from NSPR. michael@0: # michael@0: # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, michael@0: # mp_digit *qp, mp_digit *rp) michael@0: michael@0: # esp + 0: Caller's ebx michael@0: # esp + 4: return address michael@0: # esp + 8: Nhi argument michael@0: # esp + 12: Nlo argument michael@0: # esp + 16: divisor argument michael@0: # esp + 20: qp argument michael@0: # esp + 24: rp argument michael@0: # registers: michael@0: # eax: michael@0: # ebx: carry michael@0: # ecx: a_len michael@0: # edx: michael@0: # esi: a ptr michael@0: # edi: c ptr michael@0: # michael@0: .globl s_mpv_div_2dx1d michael@0: .private_extern s_mpv_div_2dx1d michael@0: TYPE_FUNCTION(s_mpv_div_2dx1d) michael@0: s_mpv_div_2dx1d: michael@0: push %ebx michael@0: mov 8(%esp), %edx michael@0: mov 12(%esp), %eax michael@0: mov 16(%esp), %ebx michael@0: div %ebx michael@0: mov 20(%esp), %ebx michael@0: mov %eax, 0(%ebx) michael@0: mov 24(%esp), %ebx michael@0: mov %edx, 0(%ebx) michael@0: xor %eax, %eax # return zero michael@0: pop %ebx michael@0: ret michael@0: nop michael@0: michael@0: #ifndef DARWIN michael@0: # Magic indicating no need for an executable stack michael@0: .section .note.GNU-stack, "", @progbits michael@0: .previous michael@0: #endif