michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: #include michael@0: .set noreorder michael@0: .set noat michael@0: michael@0: .section .text, 1, 0x00000006, 4, 4 michael@0: .text: michael@0: .section .text michael@0: michael@0: .ent s_mpv_mul_d_add michael@0: .globl s_mpv_mul_d_add michael@0: michael@0: s_mpv_mul_d_add: michael@0: #/* c += a * b */ michael@0: #void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, michael@0: # mp_digit *c) michael@0: #{ michael@0: # mp_digit a0, a1; regs a4, a5 michael@0: # mp_digit c0, c1; regs a6, a7 michael@0: # mp_digit cy = 0; reg t2 michael@0: # mp_word w0, w1; regs t0, t1 michael@0: # michael@0: # if (a_len) { michael@0: beq a1,zero,.L.1 michael@0: move t2,zero # cy = 0 michael@0: dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) michael@0: dsrl32 a2,a2,0 # This clears the upper 32 bits. michael@0: # a0 = a[0]; michael@0: lwu a4,0(a0) michael@0: # w0 = ((mp_word)b * a0); michael@0: dmultu a2,a4 michael@0: # if (--a_len) { michael@0: addiu a1,a1,-1 michael@0: beq a1,zero,.L.2 michael@0: # while (a_len >= 2) { michael@0: sltiu t3,a1,2 michael@0: bne t3,zero,.L.3 michael@0: # a1 = a[1]; michael@0: lwu a5,4(a0) michael@0: .L.4: michael@0: # a_len -= 2; michael@0: addiu a1,a1,-2 michael@0: # c0 = c[0]; michael@0: lwu a6,0(a3) michael@0: # w0 += cy; michael@0: mflo t0 michael@0: daddu t0,t0,t2 michael@0: # w0 += c0; michael@0: daddu t0,t0,a6 michael@0: # w1 = (mp_word)b * a1; michael@0: dmultu a2,a5 # michael@0: # cy = CARRYOUT(w0); michael@0: dsrl32 t2,t0,0 michael@0: # c[0] = ACCUM(w0); michael@0: sw t0,0(a3) michael@0: # a0 = a[2]; michael@0: lwu a4,8(a0) michael@0: # a += 2; michael@0: addiu a0,a0,8 michael@0: # c1 = c[1]; michael@0: lwu a7,4(a3) michael@0: # w1 += cy; michael@0: mflo t1 michael@0: daddu t1,t1,t2 michael@0: # w1 += c1; michael@0: daddu t1,t1,a7 michael@0: # w0 = (mp_word)b * a0; michael@0: dmultu a2,a4 # michael@0: # cy = CARRYOUT(w1); michael@0: dsrl32 t2,t1,0 michael@0: # c[1] = ACCUM(w1); michael@0: sw t1,4(a3) michael@0: # c += 2; michael@0: addiu a3,a3,8 michael@0: sltiu t3,a1,2 michael@0: beq t3,zero,.L.4 michael@0: # a1 = a[1]; michael@0: lwu a5,4(a0) michael@0: # } michael@0: .L.3: michael@0: # c0 = c[0]; michael@0: lwu a6,0(a3) michael@0: # w0 += cy; michael@0: # if (a_len) { michael@0: mflo t0 michael@0: beq a1,zero,.L.5 michael@0: daddu t0,t0,t2 michael@0: # w1 = (mp_word)b * a1; michael@0: dmultu a2,a5 michael@0: # w0 += c0; michael@0: daddu t0,t0,a6 # michael@0: # cy = CARRYOUT(w0); michael@0: dsrl32 t2,t0,0 michael@0: # c[0] = ACCUM(w0); michael@0: sw t0,0(a3) michael@0: # c1 = c[1]; michael@0: lwu a7,4(a3) michael@0: # w1 += cy; michael@0: mflo t1 michael@0: daddu t1,t1,t2 michael@0: # w1 += c1; michael@0: daddu t1,t1,a7 michael@0: # c[1] = ACCUM(w1); michael@0: sw t1,4(a3) michael@0: # cy = CARRYOUT(w1); michael@0: dsrl32 t2,t1,0 michael@0: # c += 1; michael@0: b .L.6 michael@0: addiu a3,a3,4 michael@0: # } else { michael@0: .L.5: michael@0: # w0 += c0; michael@0: daddu t0,t0,a6 michael@0: # c[0] = ACCUM(w0); michael@0: sw t0,0(a3) michael@0: # cy = CARRYOUT(w0); michael@0: b .L.6 michael@0: dsrl32 t2,t0,0 michael@0: # } michael@0: # } else { michael@0: .L.2: michael@0: # c0 = c[0]; michael@0: lwu a6,0(a3) michael@0: # w0 += c0; michael@0: mflo t0 michael@0: daddu t0,t0,a6 michael@0: # c[0] = ACCUM(w0); michael@0: sw t0,0(a3) michael@0: # cy = CARRYOUT(w0); michael@0: dsrl32 t2,t0,0 michael@0: # } michael@0: .L.6: michael@0: # c[1] = cy; michael@0: jr ra michael@0: sw t2,4(a3) michael@0: # } michael@0: .L.1: michael@0: jr ra michael@0: nop michael@0: #} michael@0: # michael@0: .end s_mpv_mul_d_add michael@0: michael@0: .ent s_mpv_mul_d_add_prop michael@0: .globl s_mpv_mul_d_add_prop michael@0: michael@0: s_mpv_mul_d_add_prop: michael@0: #/* c += a * b */ michael@0: #void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, michael@0: # mp_digit *c) michael@0: #{ michael@0: # mp_digit a0, a1; regs a4, a5 michael@0: # mp_digit c0, c1; regs a6, a7 michael@0: # mp_digit cy = 0; reg t2 michael@0: # mp_word w0, w1; regs t0, t1 michael@0: # michael@0: # if (a_len) { michael@0: beq a1,zero,.M.1 michael@0: move t2,zero # cy = 0 michael@0: dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) michael@0: dsrl32 a2,a2,0 # This clears the upper 32 bits. michael@0: # a0 = a[0]; michael@0: lwu a4,0(a0) michael@0: # w0 = ((mp_word)b * a0); michael@0: dmultu a2,a4 michael@0: # if (--a_len) { michael@0: addiu a1,a1,-1 michael@0: beq a1,zero,.M.2 michael@0: # while (a_len >= 2) { michael@0: sltiu t3,a1,2 michael@0: bne t3,zero,.M.3 michael@0: # a1 = a[1]; michael@0: lwu a5,4(a0) michael@0: .M.4: michael@0: # a_len -= 2; michael@0: addiu a1,a1,-2 michael@0: # c0 = c[0]; michael@0: lwu a6,0(a3) michael@0: # w0 += cy; michael@0: mflo t0 michael@0: daddu t0,t0,t2 michael@0: # w0 += c0; michael@0: daddu t0,t0,a6 michael@0: # w1 = (mp_word)b * a1; michael@0: dmultu a2,a5 # michael@0: # cy = CARRYOUT(w0); michael@0: dsrl32 t2,t0,0 michael@0: # c[0] = ACCUM(w0); michael@0: sw t0,0(a3) michael@0: # a0 = a[2]; michael@0: lwu a4,8(a0) michael@0: # a += 2; michael@0: addiu a0,a0,8 michael@0: # c1 = c[1]; michael@0: lwu a7,4(a3) michael@0: # w1 += cy; michael@0: mflo t1 michael@0: daddu t1,t1,t2 michael@0: # w1 += c1; michael@0: daddu t1,t1,a7 michael@0: # w0 = (mp_word)b * a0; michael@0: dmultu a2,a4 # michael@0: # cy = CARRYOUT(w1); michael@0: dsrl32 t2,t1,0 michael@0: # c[1] = ACCUM(w1); michael@0: sw t1,4(a3) michael@0: # c += 2; michael@0: addiu a3,a3,8 michael@0: sltiu t3,a1,2 michael@0: beq t3,zero,.M.4 michael@0: # a1 = a[1]; michael@0: lwu a5,4(a0) michael@0: # } michael@0: .M.3: michael@0: # c0 = c[0]; michael@0: lwu a6,0(a3) michael@0: # w0 += cy; michael@0: # if (a_len) { michael@0: mflo t0 michael@0: beq a1,zero,.M.5 michael@0: daddu t0,t0,t2 michael@0: # w1 = (mp_word)b * a1; michael@0: dmultu a2,a5 michael@0: # w0 += c0; michael@0: daddu t0,t0,a6 # michael@0: # cy = CARRYOUT(w0); michael@0: dsrl32 t2,t0,0 michael@0: # c[0] = ACCUM(w0); michael@0: sw t0,0(a3) michael@0: # c1 = c[1]; michael@0: lwu a7,4(a3) michael@0: # w1 += cy; michael@0: mflo t1 michael@0: daddu t1,t1,t2 michael@0: # w1 += c1; michael@0: daddu t1,t1,a7 michael@0: # c[1] = ACCUM(w1); michael@0: sw t1,4(a3) michael@0: # cy = CARRYOUT(w1); michael@0: dsrl32 t2,t1,0 michael@0: # c += 1; michael@0: b .M.6 michael@0: addiu a3,a3,8 michael@0: # } else { michael@0: .M.5: michael@0: # w0 += c0; michael@0: daddu t0,t0,a6 michael@0: # c[0] = ACCUM(w0); michael@0: sw t0,0(a3) michael@0: # cy = CARRYOUT(w0); michael@0: dsrl32 t2,t0,0 michael@0: b .M.6 michael@0: addiu a3,a3,4 michael@0: # } michael@0: # } else { michael@0: .M.2: michael@0: # c0 = c[0]; michael@0: lwu a6,0(a3) michael@0: # w0 += c0; michael@0: mflo t0 michael@0: daddu t0,t0,a6 michael@0: # c[0] = ACCUM(w0); michael@0: sw t0,0(a3) michael@0: # cy = CARRYOUT(w0); michael@0: dsrl32 t2,t0,0 michael@0: addiu a3,a3,4 michael@0: # } michael@0: .M.6: michael@0: michael@0: # while (cy) { michael@0: beq t2,zero,.M.1 michael@0: nop michael@0: .M.7: michael@0: # mp_word w = (mp_word)*c + cy; michael@0: lwu a6,0(a3) michael@0: daddu t2,t2,a6 michael@0: # *c++ = ACCUM(w); michael@0: sw t2,0(a3) michael@0: # cy = CARRYOUT(w); michael@0: dsrl32 t2,t2,0 michael@0: bne t2,zero,.M.7 michael@0: addiu a3,a3,4 michael@0: michael@0: # } michael@0: .M.1: michael@0: jr ra michael@0: nop michael@0: #} michael@0: # michael@0: .end s_mpv_mul_d_add_prop michael@0: michael@0: .ent s_mpv_mul_d michael@0: .globl s_mpv_mul_d michael@0: michael@0: s_mpv_mul_d: michael@0: #/* c = a * b */ michael@0: #void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, michael@0: # mp_digit *c) michael@0: #{ michael@0: # mp_digit a0, a1; regs a4, a5 michael@0: # mp_digit cy = 0; reg t2 michael@0: # mp_word w0, w1; regs t0, t1 michael@0: # michael@0: # if (a_len) { michael@0: beq a1,zero,.N.1 michael@0: move t2,zero # cy = 0 michael@0: dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) michael@0: dsrl32 a2,a2,0 # This clears the upper 32 bits. michael@0: # a0 = a[0]; michael@0: lwu a4,0(a0) michael@0: # w0 = ((mp_word)b * a0); michael@0: dmultu a2,a4 michael@0: # if (--a_len) { michael@0: addiu a1,a1,-1 michael@0: beq a1,zero,.N.2 michael@0: # while (a_len >= 2) { michael@0: sltiu t3,a1,2 michael@0: bne t3,zero,.N.3 michael@0: # a1 = a[1]; michael@0: lwu a5,4(a0) michael@0: .N.4: michael@0: # a_len -= 2; michael@0: addiu a1,a1,-2 michael@0: # w0 += cy; michael@0: mflo t0 michael@0: daddu t0,t0,t2 michael@0: # cy = CARRYOUT(w0); michael@0: dsrl32 t2,t0,0 michael@0: # w1 = (mp_word)b * a1; michael@0: dmultu a2,a5 michael@0: # c[0] = ACCUM(w0); michael@0: sw t0,0(a3) michael@0: # a0 = a[2]; michael@0: lwu a4,8(a0) michael@0: # a += 2; michael@0: addiu a0,a0,8 michael@0: # w1 += cy; michael@0: mflo t1 michael@0: daddu t1,t1,t2 michael@0: # cy = CARRYOUT(w1); michael@0: dsrl32 t2,t1,0 michael@0: # w0 = (mp_word)b * a0; michael@0: dmultu a2,a4 michael@0: # c[1] = ACCUM(w1); michael@0: sw t1,4(a3) michael@0: # c += 2; michael@0: addiu a3,a3,8 michael@0: sltiu t3,a1,2 michael@0: beq t3,zero,.N.4 michael@0: # a1 = a[1]; michael@0: lwu a5,4(a0) michael@0: # } michael@0: .N.3: michael@0: # w0 += cy; michael@0: # if (a_len) { michael@0: mflo t0 michael@0: beq a1,zero,.N.5 michael@0: daddu t0,t0,t2 michael@0: # w1 = (mp_word)b * a1; michael@0: dmultu a2,a5 # michael@0: # cy = CARRYOUT(w0); michael@0: dsrl32 t2,t0,0 michael@0: # c[0] = ACCUM(w0); michael@0: sw t0,0(a3) michael@0: # w1 += cy; michael@0: mflo t1 michael@0: daddu t1,t1,t2 michael@0: # c[1] = ACCUM(w1); michael@0: sw t1,4(a3) michael@0: # cy = CARRYOUT(w1); michael@0: dsrl32 t2,t1,0 michael@0: # c += 1; michael@0: b .N.6 michael@0: addiu a3,a3,4 michael@0: # } else { michael@0: .N.5: michael@0: # c[0] = ACCUM(w0); michael@0: sw t0,0(a3) michael@0: # cy = CARRYOUT(w0); michael@0: b .N.6 michael@0: dsrl32 t2,t0,0 michael@0: # } michael@0: # } else { michael@0: .N.2: michael@0: mflo t0 michael@0: # c[0] = ACCUM(w0); michael@0: sw t0,0(a3) michael@0: # cy = CARRYOUT(w0); michael@0: dsrl32 t2,t0,0 michael@0: # } michael@0: .N.6: michael@0: # c[1] = cy; michael@0: jr ra michael@0: sw t2,4(a3) michael@0: # } michael@0: .N.1: michael@0: jr ra michael@0: nop michael@0: #} michael@0: # michael@0: .end s_mpv_mul_d michael@0: michael@0: michael@0: .ent s_mpv_sqr_add_prop michael@0: .globl s_mpv_sqr_add_prop michael@0: #void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs); michael@0: # registers michael@0: # a0 *a michael@0: # a1 a_len michael@0: # a2 *sqr michael@0: # a3 digit from *a, a_i michael@0: # a4 square of digit from a michael@0: # a5,a6 next 2 digits in sqr michael@0: # a7,t0 carry michael@0: s_mpv_sqr_add_prop: michael@0: move a7,zero michael@0: move t0,zero michael@0: lwu a3,0(a0) michael@0: addiu a1,a1,-1 # --a_len michael@0: dmultu a3,a3 michael@0: beq a1,zero,.P.3 # jump if we've already done the only sqr michael@0: addiu a0,a0,4 # ++a michael@0: .P.2: michael@0: lwu a5,0(a2) michael@0: lwu a6,4(a2) michael@0: addiu a2,a2,8 # sqrs += 2; michael@0: dsll32 a6,a6,0 michael@0: daddu a5,a5,a6 michael@0: lwu a3,0(a0) michael@0: addiu a0,a0,4 # ++a michael@0: mflo a4 michael@0: daddu a6,a5,a4 michael@0: sltu a7,a6,a5 # a7 = a6 < a5 detect overflow michael@0: dmultu a3,a3 michael@0: daddu a4,a6,t0 michael@0: sltu t0,a4,a6 michael@0: add t0,t0,a7 michael@0: sw a4,-8(a2) michael@0: addiu a1,a1,-1 # --a_len michael@0: dsrl32 a4,a4,0 michael@0: bne a1,zero,.P.2 # loop if a_len > 0 michael@0: sw a4,-4(a2) michael@0: .P.3: michael@0: lwu a5,0(a2) michael@0: lwu a6,4(a2) michael@0: addiu a2,a2,8 # sqrs += 2; michael@0: dsll32 a6,a6,0 michael@0: daddu a5,a5,a6 michael@0: mflo a4 michael@0: daddu a6,a5,a4 michael@0: sltu a7,a6,a5 # a7 = a6 < a5 detect overflow michael@0: daddu a4,a6,t0 michael@0: sltu t0,a4,a6 michael@0: add t0,t0,a7 michael@0: sw a4,-8(a2) michael@0: beq t0,zero,.P.9 # jump if no carry michael@0: dsrl32 a4,a4,0 michael@0: .P.8: michael@0: sw a4,-4(a2) michael@0: /* propagate final carry */ michael@0: lwu a5,0(a2) michael@0: daddu a6,a5,t0 michael@0: sltu t0,a6,a5 michael@0: bne t0,zero,.P.8 # loop if carry persists michael@0: addiu a2,a2,4 # sqrs++ michael@0: .P.9: michael@0: jr ra michael@0: sw a4,-4(a2) michael@0: michael@0: .end s_mpv_sqr_add_prop