security/nss/lib/freebl/mpi/mpi_mips.s

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/security/nss/lib/freebl/mpi/mpi_mips.s	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,472 @@
     1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.7 +#include <regdef.h>
     1.8 +        .set    noreorder
     1.9 +        .set    noat
    1.10 +
    1.11 +        .section        .text, 1, 0x00000006, 4, 4
    1.12 +.text:
    1.13 +        .section        .text
    1.14 +
    1.15 +        .ent    s_mpv_mul_d_add
    1.16 +        .globl  s_mpv_mul_d_add
    1.17 +
    1.18 +s_mpv_mul_d_add: 
    1.19 + #/* c += a * b */
    1.20 + #void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, 
    1.21 + #			      mp_digit *c)
    1.22 + #{
    1.23 + #  mp_digit   a0, a1;	regs a4, a5
    1.24 + #  mp_digit   c0, c1;  regs a6, a7
    1.25 + #  mp_digit   cy = 0;  reg t2
    1.26 + #  mp_word    w0, w1;  regs t0, t1
    1.27 + #
    1.28 + #  if (a_len) {
    1.29 +	beq	a1,zero,.L.1
    1.30 +	move	t2,zero		# cy = 0
    1.31 +	dsll32	a2,a2,0		# "b" is sometimes negative (?!?!)
    1.32 +	dsrl32	a2,a2,0		# This clears the upper 32 bits.
    1.33 + #    a0 = a[0];
    1.34 +	lwu	a4,0(a0)
    1.35 + #    w0 = ((mp_word)b * a0);
    1.36 +	dmultu	a2,a4
    1.37 + #    if (--a_len) {
    1.38 +	addiu	a1,a1,-1
    1.39 +	beq	a1,zero,.L.2
    1.40 + #      while (a_len >= 2) {
    1.41 +	sltiu	t3,a1,2
    1.42 +	bne	t3,zero,.L.3
    1.43 + #	  a1     = a[1];
    1.44 +	lwu	a5,4(a0)
    1.45 +.L.4:
    1.46 + #	  a_len -= 2;
    1.47 +        addiu	a1,a1,-2
    1.48 + #	  c0     = c[0];
    1.49 +	lwu	a6,0(a3)
    1.50 + #	  w0    += cy;
    1.51 +	mflo	t0
    1.52 +	daddu	t0,t0,t2
    1.53 + #	  w0    += c0;
    1.54 +	daddu	t0,t0,a6
    1.55 + #	  w1     = (mp_word)b * a1; 
    1.56 +	dmultu	a2,a5			#
    1.57 + #	  cy     = CARRYOUT(w0);
    1.58 +	dsrl32	t2,t0,0
    1.59 + #	  c[0]   = ACCUM(w0);
    1.60 +	sw	t0,0(a3)
    1.61 + #	  a0     = a[2];
    1.62 +	lwu	a4,8(a0)
    1.63 + #	  a     += 2;
    1.64 +	addiu	a0,a0,8
    1.65 + #	  c1     = c[1];
    1.66 +	lwu	a7,4(a3)
    1.67 + #	  w1    += cy;
    1.68 +	mflo	t1
    1.69 +	daddu	t1,t1,t2
    1.70 + #	  w1    += c1;
    1.71 +	daddu	t1,t1,a7
    1.72 + #	  w0     = (mp_word)b * a0;
    1.73 +	dmultu	a2,a4			#
    1.74 + #	  cy     = CARRYOUT(w1);
    1.75 +	dsrl32	t2,t1,0
    1.76 + #	  c[1]   = ACCUM(w1);
    1.77 +	sw	t1,4(a3)
    1.78 + #	  c     += 2;
    1.79 +	addiu	a3,a3,8
    1.80 +	sltiu	t3,a1,2
    1.81 +	beq	t3,zero,.L.4
    1.82 + #	  a1     = a[1];
    1.83 +	lwu	a5,4(a0)
    1.84 + #      }
    1.85 +.L.3:
    1.86 + #      c0       = c[0];
    1.87 +	lwu	a6,0(a3)
    1.88 + #      w0      += cy;
    1.89 + #      if (a_len) {
    1.90 +	mflo	t0
    1.91 +	beq	a1,zero,.L.5
    1.92 +	daddu	t0,t0,t2
    1.93 + #	  w1     = (mp_word)b * a1; 
    1.94 +	dmultu	a2,a5
    1.95 + #	  w0    += c0;
    1.96 +	daddu	t0,t0,a6		#
    1.97 + #	  cy     = CARRYOUT(w0);
    1.98 +	dsrl32	t2,t0,0
    1.99 + #	  c[0]   = ACCUM(w0);
   1.100 +	sw	t0,0(a3)
   1.101 + #	  c1     = c[1];
   1.102 +	lwu	a7,4(a3)
   1.103 + #	  w1    += cy;
   1.104 +	mflo	t1
   1.105 +	daddu	t1,t1,t2
   1.106 + #	  w1    += c1;
   1.107 +	daddu	t1,t1,a7
   1.108 + #	  c[1]   = ACCUM(w1);
   1.109 +	sw	t1,4(a3)
   1.110 + #	  cy     = CARRYOUT(w1);
   1.111 +	dsrl32	t2,t1,0
   1.112 + #	  c     += 1;
   1.113 +	b	.L.6
   1.114 +	addiu	a3,a3,4
   1.115 + #      } else {
   1.116 +.L.5:
   1.117 + #	  w0    += c0;
   1.118 +	daddu	t0,t0,a6
   1.119 + #	  c[0]   = ACCUM(w0);
   1.120 +	sw	t0,0(a3)
   1.121 + #	  cy     = CARRYOUT(w0);
   1.122 +	b	.L.6
   1.123 +	dsrl32	t2,t0,0
   1.124 + #      }
   1.125 + #    } else {
   1.126 +.L.2:
   1.127 + #      c0     = c[0];
   1.128 +	lwu	a6,0(a3)
   1.129 + #      w0    += c0;
   1.130 +	mflo	t0
   1.131 +	daddu	t0,t0,a6
   1.132 + #      c[0]   = ACCUM(w0);
   1.133 +	sw	t0,0(a3)
   1.134 + #      cy     = CARRYOUT(w0);
   1.135 +	dsrl32	t2,t0,0
   1.136 + #    }
   1.137 +.L.6:
   1.138 + #    c[1] = cy;
   1.139 +	jr	ra
   1.140 +	sw	t2,4(a3)
   1.141 + #  }
   1.142 +.L.1:
   1.143 +	jr	ra
   1.144 +	nop
   1.145 + #}
   1.146 + #
   1.147 +        .end    s_mpv_mul_d_add
   1.148 +
   1.149 +        .ent    s_mpv_mul_d_add_prop
   1.150 +        .globl  s_mpv_mul_d_add_prop
   1.151 +
   1.152 +s_mpv_mul_d_add_prop: 
   1.153 + #/* c += a * b */
   1.154 + #void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, 
   1.155 + #			      mp_digit *c)
   1.156 + #{
   1.157 + #  mp_digit   a0, a1;	regs a4, a5
   1.158 + #  mp_digit   c0, c1;  regs a6, a7
   1.159 + #  mp_digit   cy = 0;  reg t2
   1.160 + #  mp_word    w0, w1;  regs t0, t1
   1.161 + #
   1.162 + #  if (a_len) {
   1.163 +	beq	a1,zero,.M.1
   1.164 +	move	t2,zero		# cy = 0
   1.165 +	dsll32	a2,a2,0		# "b" is sometimes negative (?!?!)
   1.166 +	dsrl32	a2,a2,0		# This clears the upper 32 bits.
   1.167 + #    a0 = a[0];
   1.168 +	lwu	a4,0(a0)
   1.169 + #    w0 = ((mp_word)b * a0);
   1.170 +	dmultu	a2,a4
   1.171 + #    if (--a_len) {
   1.172 +	addiu	a1,a1,-1
   1.173 +	beq	a1,zero,.M.2
   1.174 + #      while (a_len >= 2) {
   1.175 +	sltiu	t3,a1,2
   1.176 +	bne	t3,zero,.M.3
   1.177 + #	  a1     = a[1];
   1.178 +	lwu	a5,4(a0)
   1.179 +.M.4:
   1.180 + #	  a_len -= 2;
   1.181 +        addiu	a1,a1,-2
   1.182 + #	  c0     = c[0];
   1.183 +	lwu	a6,0(a3)
   1.184 + #	  w0    += cy;
   1.185 +	mflo	t0
   1.186 +	daddu	t0,t0,t2
   1.187 + #	  w0    += c0;
   1.188 +	daddu	t0,t0,a6
   1.189 + #	  w1     = (mp_word)b * a1; 
   1.190 +	dmultu	a2,a5			#
   1.191 + #	  cy     = CARRYOUT(w0);
   1.192 +	dsrl32	t2,t0,0
   1.193 + #	  c[0]   = ACCUM(w0);
   1.194 +	sw	t0,0(a3)
   1.195 + #	  a0     = a[2];
   1.196 +	lwu	a4,8(a0)
   1.197 + #	  a     += 2;
   1.198 +	addiu	a0,a0,8
   1.199 + #	  c1     = c[1];
   1.200 +	lwu	a7,4(a3)
   1.201 + #	  w1    += cy;
   1.202 +	mflo	t1
   1.203 +	daddu	t1,t1,t2
   1.204 + #	  w1    += c1;
   1.205 +	daddu	t1,t1,a7
   1.206 + #	  w0     = (mp_word)b * a0;
   1.207 +	dmultu	a2,a4			#
   1.208 + #	  cy     = CARRYOUT(w1);
   1.209 +	dsrl32	t2,t1,0
   1.210 + #	  c[1]   = ACCUM(w1);
   1.211 +	sw	t1,4(a3)
   1.212 + #	  c     += 2;
   1.213 +	addiu	a3,a3,8
   1.214 +	sltiu	t3,a1,2
   1.215 +	beq	t3,zero,.M.4
   1.216 + #	  a1     = a[1];
   1.217 +	lwu	a5,4(a0)
   1.218 + #      }
   1.219 +.M.3:
   1.220 + #      c0       = c[0];
   1.221 +	lwu	a6,0(a3)
   1.222 + #      w0      += cy;
   1.223 + #      if (a_len) {
   1.224 +	mflo	t0
   1.225 +	beq	a1,zero,.M.5
   1.226 +	daddu	t0,t0,t2
   1.227 + #	  w1     = (mp_word)b * a1; 
   1.228 +	dmultu	a2,a5
   1.229 + #	  w0    += c0;
   1.230 +	daddu	t0,t0,a6		#
   1.231 + #	  cy     = CARRYOUT(w0);
   1.232 +	dsrl32	t2,t0,0
   1.233 + #	  c[0]   = ACCUM(w0);
   1.234 +	sw	t0,0(a3)
   1.235 + #	  c1     = c[1];
   1.236 +	lwu	a7,4(a3)
   1.237 + #	  w1    += cy;
   1.238 +	mflo	t1
   1.239 +	daddu	t1,t1,t2
   1.240 + #	  w1    += c1;
   1.241 +	daddu	t1,t1,a7
   1.242 + #	  c[1]   = ACCUM(w1);
   1.243 +	sw	t1,4(a3)
   1.244 + #	  cy     = CARRYOUT(w1);
   1.245 +	dsrl32	t2,t1,0
   1.246 + #	  c     += 1;
   1.247 +	b	.M.6
   1.248 +	addiu	a3,a3,8
   1.249 + #      } else {
   1.250 +.M.5:
   1.251 + #	  w0    += c0;
   1.252 +	daddu	t0,t0,a6
   1.253 + #	  c[0]   = ACCUM(w0);
   1.254 +	sw	t0,0(a3)
   1.255 + #	  cy     = CARRYOUT(w0);
   1.256 +	dsrl32	t2,t0,0
   1.257 +	b	.M.6
   1.258 +	addiu	a3,a3,4
   1.259 + #      }
   1.260 + #    } else {
   1.261 +.M.2:
   1.262 + #      c0     = c[0];
   1.263 +	lwu	a6,0(a3)
   1.264 + #      w0    += c0;
   1.265 +	mflo	t0
   1.266 +	daddu	t0,t0,a6
   1.267 + #      c[0]   = ACCUM(w0);
   1.268 +	sw	t0,0(a3)
   1.269 + #      cy     = CARRYOUT(w0);
   1.270 +	dsrl32	t2,t0,0
   1.271 +	addiu	a3,a3,4
   1.272 + #    }
   1.273 +.M.6:
   1.274 +
   1.275 + #    while (cy) {
   1.276 +	beq	t2,zero,.M.1
   1.277 +	nop
   1.278 +.M.7:
   1.279 + #      mp_word w = (mp_word)*c + cy;
   1.280 +	lwu	a6,0(a3)
   1.281 +	daddu	t2,t2,a6
   1.282 + #      *c++ = ACCUM(w);
   1.283 +	sw	t2,0(a3)
   1.284 + #      cy = CARRYOUT(w);
   1.285 +	dsrl32	t2,t2,0
   1.286 +	bne	t2,zero,.M.7
   1.287 +	addiu	a3,a3,4
   1.288 +
   1.289 + #  }
   1.290 +.M.1:
   1.291 +	jr	ra
   1.292 +	nop
   1.293 + #}
   1.294 + #
   1.295 +        .end    s_mpv_mul_d_add_prop
   1.296 +
   1.297 +        .ent    s_mpv_mul_d
   1.298 +        .globl  s_mpv_mul_d
   1.299 +
   1.300 +s_mpv_mul_d: 
   1.301 + #/* c = a * b */
   1.302 + #void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, 
   1.303 + #			      mp_digit *c)
   1.304 + #{
   1.305 + #  mp_digit   a0, a1;	regs a4, a5
   1.306 + #  mp_digit   cy = 0;  reg t2
   1.307 + #  mp_word    w0, w1;  regs t0, t1
   1.308 + #
   1.309 + #  if (a_len) {
   1.310 +	beq	a1,zero,.N.1
   1.311 +	move	t2,zero		# cy = 0
   1.312 +	dsll32	a2,a2,0		# "b" is sometimes negative (?!?!)
   1.313 +	dsrl32	a2,a2,0		# This clears the upper 32 bits.
   1.314 + #    a0 = a[0];
   1.315 +	lwu	a4,0(a0)
   1.316 + #    w0 = ((mp_word)b * a0);
   1.317 +	dmultu	a2,a4
   1.318 + #    if (--a_len) {
   1.319 +	addiu	a1,a1,-1
   1.320 +	beq	a1,zero,.N.2
   1.321 + #      while (a_len >= 2) {
   1.322 +	sltiu	t3,a1,2
   1.323 +	bne	t3,zero,.N.3
   1.324 + #	  a1     = a[1];
   1.325 +	lwu	a5,4(a0)
   1.326 +.N.4:
   1.327 + #	  a_len -= 2;
   1.328 +        addiu	a1,a1,-2
   1.329 + #	  w0    += cy;
   1.330 +	mflo	t0
   1.331 +	daddu	t0,t0,t2
   1.332 + #	  cy     = CARRYOUT(w0);
   1.333 +	dsrl32	t2,t0,0
   1.334 + #	  w1     = (mp_word)b * a1; 
   1.335 +	dmultu	a2,a5	
   1.336 + #	  c[0]   = ACCUM(w0);
   1.337 +	sw	t0,0(a3)
   1.338 + #	  a0     = a[2];
   1.339 +	lwu	a4,8(a0)
   1.340 + #	  a     += 2;
   1.341 +	addiu	a0,a0,8
   1.342 + #	  w1    += cy;
   1.343 +	mflo	t1
   1.344 +	daddu	t1,t1,t2
   1.345 + #	  cy     = CARRYOUT(w1);
   1.346 +	dsrl32	t2,t1,0
   1.347 + #	  w0     = (mp_word)b * a0;
   1.348 +	dmultu	a2,a4	
   1.349 + #	  c[1]   = ACCUM(w1);
   1.350 +	sw	t1,4(a3)
   1.351 + #	  c     += 2;
   1.352 +	addiu	a3,a3,8
   1.353 +	sltiu	t3,a1,2
   1.354 +	beq	t3,zero,.N.4
   1.355 + #	  a1     = a[1];
   1.356 +	lwu	a5,4(a0)
   1.357 + #      }
   1.358 +.N.3:
   1.359 + #      w0      += cy;
   1.360 + #      if (a_len) {
   1.361 +	mflo	t0
   1.362 +	beq	a1,zero,.N.5
   1.363 +	daddu	t0,t0,t2
   1.364 + #	  w1     = (mp_word)b * a1; 
   1.365 +	dmultu	a2,a5			#
   1.366 + #	  cy     = CARRYOUT(w0);
   1.367 +	dsrl32	t2,t0,0
   1.368 + #	  c[0]   = ACCUM(w0);
   1.369 +	sw	t0,0(a3)
   1.370 + #	  w1    += cy;
   1.371 +	mflo	t1
   1.372 +	daddu	t1,t1,t2
   1.373 + #	  c[1]   = ACCUM(w1);
   1.374 +	sw	t1,4(a3)
   1.375 + #	  cy     = CARRYOUT(w1);
   1.376 +	dsrl32	t2,t1,0
   1.377 + #	  c     += 1;
   1.378 +	b	.N.6
   1.379 +	addiu	a3,a3,4
   1.380 + #      } else {
   1.381 +.N.5:
   1.382 + #	  c[0]   = ACCUM(w0);
   1.383 +	sw	t0,0(a3)
   1.384 + #	  cy     = CARRYOUT(w0);
   1.385 +	b	.N.6
   1.386 +	dsrl32	t2,t0,0
   1.387 + #      }
   1.388 + #    } else {
   1.389 +.N.2:
   1.390 +	mflo	t0
   1.391 + #      c[0]   = ACCUM(w0);
   1.392 +	sw	t0,0(a3)
   1.393 + #      cy     = CARRYOUT(w0);
   1.394 +	dsrl32	t2,t0,0
   1.395 + #    }
   1.396 +.N.6:
   1.397 + #    c[1] = cy;
   1.398 +	jr	ra
   1.399 +	sw	t2,4(a3)
   1.400 + #  }
   1.401 +.N.1:
   1.402 +	jr	ra
   1.403 +	nop
   1.404 + #}
   1.405 + #
   1.406 +        .end    s_mpv_mul_d
   1.407 +
   1.408 +
   1.409 +        .ent    s_mpv_sqr_add_prop
   1.410 +        .globl  s_mpv_sqr_add_prop
   1.411 + #void   s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs);
   1.412 + #	registers
   1.413 + #	a0		*a
   1.414 + #	a1		a_len
   1.415 + #	a2		*sqr
   1.416 + #	a3		digit from *a, a_i
   1.417 + #	a4		square of digit from a
   1.418 + #	a5,a6		next 2 digits in sqr
   1.419 + #	a7,t0		carry 
   1.420 +s_mpv_sqr_add_prop:
   1.421 +	move	a7,zero
   1.422 +	move	t0,zero
   1.423 +	lwu	a3,0(a0)
   1.424 +	addiu	a1,a1,-1	# --a_len
   1.425 +	dmultu	a3,a3
   1.426 +	beq	a1,zero,.P.3	# jump if we've already done the only sqr
   1.427 +	addiu	a0,a0,4		# ++a
   1.428 +.P.2:
   1.429 +        lwu	a5,0(a2)
   1.430 +        lwu	a6,4(a2)
   1.431 +	addiu	a2,a2,8		# sqrs += 2;
   1.432 +	dsll32	a6,a6,0
   1.433 +	daddu	a5,a5,a6
   1.434 +	lwu	a3,0(a0)
   1.435 +	addiu	a0,a0,4		# ++a
   1.436 +	mflo	a4
   1.437 +	daddu	a6,a5,a4
   1.438 +	sltu	a7,a6,a5	# a7 = a6 < a5	detect overflow
   1.439 +	dmultu	a3,a3
   1.440 +	daddu	a4,a6,t0
   1.441 +	sltu	t0,a4,a6
   1.442 +	add	t0,t0,a7
   1.443 +	sw	a4,-8(a2)
   1.444 +	addiu	a1,a1,-1	# --a_len
   1.445 +	dsrl32	a4,a4,0
   1.446 +	bne	a1,zero,.P.2	# loop if a_len > 0
   1.447 +	sw	a4,-4(a2)
   1.448 +.P.3:
   1.449 +        lwu	a5,0(a2)
   1.450 +        lwu	a6,4(a2)
   1.451 +	addiu	a2,a2,8		# sqrs += 2;
   1.452 +	dsll32	a6,a6,0
   1.453 +	daddu	a5,a5,a6
   1.454 +	mflo	a4
   1.455 +	daddu	a6,a5,a4
   1.456 +	sltu	a7,a6,a5	# a7 = a6 < a5	detect overflow
   1.457 +	daddu	a4,a6,t0
   1.458 +	sltu	t0,a4,a6
   1.459 +	add	t0,t0,a7
   1.460 +	sw	a4,-8(a2)
   1.461 +	beq	t0,zero,.P.9	# jump if no carry
   1.462 +	dsrl32	a4,a4,0
   1.463 +.P.8:
   1.464 +	sw	a4,-4(a2)
   1.465 +	/* propagate final carry */
   1.466 +	lwu	a5,0(a2)
   1.467 +	daddu	a6,a5,t0
   1.468 +	sltu	t0,a6,a5
   1.469 +	bne	t0,zero,.P.8	# loop if carry persists
   1.470 +	addiu	a2,a2,4		# sqrs++
   1.471 +.P.9:
   1.472 +	jr	ra
   1.473 +	sw	a4,-4(a2)
   1.474 +
   1.475 +        .end    s_mpv_sqr_add_prop

mercurial