security/nss/lib/freebl/mpi/mpi_mips.s

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 2 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 4 #include <regdef.h>
michael@0 5 .set noreorder
michael@0 6 .set noat
michael@0 7
michael@0 8 .section .text, 1, 0x00000006, 4, 4
michael@0 9 .text:
michael@0 10 .section .text
michael@0 11
michael@0 12 .ent s_mpv_mul_d_add
michael@0 13 .globl s_mpv_mul_d_add
michael@0 14
michael@0 15 s_mpv_mul_d_add:
michael@0 16 #/* c += a * b */
michael@0 17 #void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
michael@0 18 # mp_digit *c)
michael@0 19 #{
michael@0 20 # mp_digit a0, a1; regs a4, a5
michael@0 21 # mp_digit c0, c1; regs a6, a7
michael@0 22 # mp_digit cy = 0; reg t2
michael@0 23 # mp_word w0, w1; regs t0, t1
michael@0 24 #
michael@0 25 # if (a_len) {
michael@0 26 beq a1,zero,.L.1
michael@0 27 move t2,zero # cy = 0
michael@0 28 dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
michael@0 29 dsrl32 a2,a2,0 # This clears the upper 32 bits.
michael@0 30 # a0 = a[0];
michael@0 31 lwu a4,0(a0)
michael@0 32 # w0 = ((mp_word)b * a0);
michael@0 33 dmultu a2,a4
michael@0 34 # if (--a_len) {
michael@0 35 addiu a1,a1,-1
michael@0 36 beq a1,zero,.L.2
michael@0 37 # while (a_len >= 2) {
michael@0 38 sltiu t3,a1,2
michael@0 39 bne t3,zero,.L.3
michael@0 40 # a1 = a[1];
michael@0 41 lwu a5,4(a0)
michael@0 42 .L.4:
michael@0 43 # a_len -= 2;
michael@0 44 addiu a1,a1,-2
michael@0 45 # c0 = c[0];
michael@0 46 lwu a6,0(a3)
michael@0 47 # w0 += cy;
michael@0 48 mflo t0
michael@0 49 daddu t0,t0,t2
michael@0 50 # w0 += c0;
michael@0 51 daddu t0,t0,a6
michael@0 52 # w1 = (mp_word)b * a1;
michael@0 53 dmultu a2,a5 #
michael@0 54 # cy = CARRYOUT(w0);
michael@0 55 dsrl32 t2,t0,0
michael@0 56 # c[0] = ACCUM(w0);
michael@0 57 sw t0,0(a3)
michael@0 58 # a0 = a[2];
michael@0 59 lwu a4,8(a0)
michael@0 60 # a += 2;
michael@0 61 addiu a0,a0,8
michael@0 62 # c1 = c[1];
michael@0 63 lwu a7,4(a3)
michael@0 64 # w1 += cy;
michael@0 65 mflo t1
michael@0 66 daddu t1,t1,t2
michael@0 67 # w1 += c1;
michael@0 68 daddu t1,t1,a7
michael@0 69 # w0 = (mp_word)b * a0;
michael@0 70 dmultu a2,a4 #
michael@0 71 # cy = CARRYOUT(w1);
michael@0 72 dsrl32 t2,t1,0
michael@0 73 # c[1] = ACCUM(w1);
michael@0 74 sw t1,4(a3)
michael@0 75 # c += 2;
michael@0 76 addiu a3,a3,8
michael@0 77 sltiu t3,a1,2
michael@0 78 beq t3,zero,.L.4
michael@0 79 # a1 = a[1];
michael@0 80 lwu a5,4(a0)
michael@0 81 # }
michael@0 82 .L.3:
michael@0 83 # c0 = c[0];
michael@0 84 lwu a6,0(a3)
michael@0 85 # w0 += cy;
michael@0 86 # if (a_len) {
michael@0 87 mflo t0
michael@0 88 beq a1,zero,.L.5
michael@0 89 daddu t0,t0,t2
michael@0 90 # w1 = (mp_word)b * a1;
michael@0 91 dmultu a2,a5
michael@0 92 # w0 += c0;
michael@0 93 daddu t0,t0,a6 #
michael@0 94 # cy = CARRYOUT(w0);
michael@0 95 dsrl32 t2,t0,0
michael@0 96 # c[0] = ACCUM(w0);
michael@0 97 sw t0,0(a3)
michael@0 98 # c1 = c[1];
michael@0 99 lwu a7,4(a3)
michael@0 100 # w1 += cy;
michael@0 101 mflo t1
michael@0 102 daddu t1,t1,t2
michael@0 103 # w1 += c1;
michael@0 104 daddu t1,t1,a7
michael@0 105 # c[1] = ACCUM(w1);
michael@0 106 sw t1,4(a3)
michael@0 107 # cy = CARRYOUT(w1);
michael@0 108 dsrl32 t2,t1,0
michael@0 109 # c += 1;
michael@0 110 b .L.6
michael@0 111 addiu a3,a3,4
michael@0 112 # } else {
michael@0 113 .L.5:
michael@0 114 # w0 += c0;
michael@0 115 daddu t0,t0,a6
michael@0 116 # c[0] = ACCUM(w0);
michael@0 117 sw t0,0(a3)
michael@0 118 # cy = CARRYOUT(w0);
michael@0 119 b .L.6
michael@0 120 dsrl32 t2,t0,0
michael@0 121 # }
michael@0 122 # } else {
michael@0 123 .L.2:
michael@0 124 # c0 = c[0];
michael@0 125 lwu a6,0(a3)
michael@0 126 # w0 += c0;
michael@0 127 mflo t0
michael@0 128 daddu t0,t0,a6
michael@0 129 # c[0] = ACCUM(w0);
michael@0 130 sw t0,0(a3)
michael@0 131 # cy = CARRYOUT(w0);
michael@0 132 dsrl32 t2,t0,0
michael@0 133 # }
michael@0 134 .L.6:
michael@0 135 # c[1] = cy;
michael@0 136 jr ra
michael@0 137 sw t2,4(a3)
michael@0 138 # }
michael@0 139 .L.1:
michael@0 140 jr ra
michael@0 141 nop
michael@0 142 #}
michael@0 143 #
michael@0 144 .end s_mpv_mul_d_add
michael@0 145
michael@0 146 .ent s_mpv_mul_d_add_prop
michael@0 147 .globl s_mpv_mul_d_add_prop
michael@0 148
michael@0 149 s_mpv_mul_d_add_prop:
michael@0 150 #/* c += a * b */
michael@0 151 #void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
michael@0 152 # mp_digit *c)
michael@0 153 #{
michael@0 154 # mp_digit a0, a1; regs a4, a5
michael@0 155 # mp_digit c0, c1; regs a6, a7
michael@0 156 # mp_digit cy = 0; reg t2
michael@0 157 # mp_word w0, w1; regs t0, t1
michael@0 158 #
michael@0 159 # if (a_len) {
michael@0 160 beq a1,zero,.M.1
michael@0 161 move t2,zero # cy = 0
michael@0 162 dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
michael@0 163 dsrl32 a2,a2,0 # This clears the upper 32 bits.
michael@0 164 # a0 = a[0];
michael@0 165 lwu a4,0(a0)
michael@0 166 # w0 = ((mp_word)b * a0);
michael@0 167 dmultu a2,a4
michael@0 168 # if (--a_len) {
michael@0 169 addiu a1,a1,-1
michael@0 170 beq a1,zero,.M.2
michael@0 171 # while (a_len >= 2) {
michael@0 172 sltiu t3,a1,2
michael@0 173 bne t3,zero,.M.3
michael@0 174 # a1 = a[1];
michael@0 175 lwu a5,4(a0)
michael@0 176 .M.4:
michael@0 177 # a_len -= 2;
michael@0 178 addiu a1,a1,-2
michael@0 179 # c0 = c[0];
michael@0 180 lwu a6,0(a3)
michael@0 181 # w0 += cy;
michael@0 182 mflo t0
michael@0 183 daddu t0,t0,t2
michael@0 184 # w0 += c0;
michael@0 185 daddu t0,t0,a6
michael@0 186 # w1 = (mp_word)b * a1;
michael@0 187 dmultu a2,a5 #
michael@0 188 # cy = CARRYOUT(w0);
michael@0 189 dsrl32 t2,t0,0
michael@0 190 # c[0] = ACCUM(w0);
michael@0 191 sw t0,0(a3)
michael@0 192 # a0 = a[2];
michael@0 193 lwu a4,8(a0)
michael@0 194 # a += 2;
michael@0 195 addiu a0,a0,8
michael@0 196 # c1 = c[1];
michael@0 197 lwu a7,4(a3)
michael@0 198 # w1 += cy;
michael@0 199 mflo t1
michael@0 200 daddu t1,t1,t2
michael@0 201 # w1 += c1;
michael@0 202 daddu t1,t1,a7
michael@0 203 # w0 = (mp_word)b * a0;
michael@0 204 dmultu a2,a4 #
michael@0 205 # cy = CARRYOUT(w1);
michael@0 206 dsrl32 t2,t1,0
michael@0 207 # c[1] = ACCUM(w1);
michael@0 208 sw t1,4(a3)
michael@0 209 # c += 2;
michael@0 210 addiu a3,a3,8
michael@0 211 sltiu t3,a1,2
michael@0 212 beq t3,zero,.M.4
michael@0 213 # a1 = a[1];
michael@0 214 lwu a5,4(a0)
michael@0 215 # }
michael@0 216 .M.3:
michael@0 217 # c0 = c[0];
michael@0 218 lwu a6,0(a3)
michael@0 219 # w0 += cy;
michael@0 220 # if (a_len) {
michael@0 221 mflo t0
michael@0 222 beq a1,zero,.M.5
michael@0 223 daddu t0,t0,t2
michael@0 224 # w1 = (mp_word)b * a1;
michael@0 225 dmultu a2,a5
michael@0 226 # w0 += c0;
michael@0 227 daddu t0,t0,a6 #
michael@0 228 # cy = CARRYOUT(w0);
michael@0 229 dsrl32 t2,t0,0
michael@0 230 # c[0] = ACCUM(w0);
michael@0 231 sw t0,0(a3)
michael@0 232 # c1 = c[1];
michael@0 233 lwu a7,4(a3)
michael@0 234 # w1 += cy;
michael@0 235 mflo t1
michael@0 236 daddu t1,t1,t2
michael@0 237 # w1 += c1;
michael@0 238 daddu t1,t1,a7
michael@0 239 # c[1] = ACCUM(w1);
michael@0 240 sw t1,4(a3)
michael@0 241 # cy = CARRYOUT(w1);
michael@0 242 dsrl32 t2,t1,0
michael@0 243 # c += 1;
michael@0 244 b .M.6
michael@0 245 addiu a3,a3,8
michael@0 246 # } else {
michael@0 247 .M.5:
michael@0 248 # w0 += c0;
michael@0 249 daddu t0,t0,a6
michael@0 250 # c[0] = ACCUM(w0);
michael@0 251 sw t0,0(a3)
michael@0 252 # cy = CARRYOUT(w0);
michael@0 253 dsrl32 t2,t0,0
michael@0 254 b .M.6
michael@0 255 addiu a3,a3,4
michael@0 256 # }
michael@0 257 # } else {
michael@0 258 .M.2:
michael@0 259 # c0 = c[0];
michael@0 260 lwu a6,0(a3)
michael@0 261 # w0 += c0;
michael@0 262 mflo t0
michael@0 263 daddu t0,t0,a6
michael@0 264 # c[0] = ACCUM(w0);
michael@0 265 sw t0,0(a3)
michael@0 266 # cy = CARRYOUT(w0);
michael@0 267 dsrl32 t2,t0,0
michael@0 268 addiu a3,a3,4
michael@0 269 # }
michael@0 270 .M.6:
michael@0 271
michael@0 272 # while (cy) {
michael@0 273 beq t2,zero,.M.1
michael@0 274 nop
michael@0 275 .M.7:
michael@0 276 # mp_word w = (mp_word)*c + cy;
michael@0 277 lwu a6,0(a3)
michael@0 278 daddu t2,t2,a6
michael@0 279 # *c++ = ACCUM(w);
michael@0 280 sw t2,0(a3)
michael@0 281 # cy = CARRYOUT(w);
michael@0 282 dsrl32 t2,t2,0
michael@0 283 bne t2,zero,.M.7
michael@0 284 addiu a3,a3,4
michael@0 285
michael@0 286 # }
michael@0 287 .M.1:
michael@0 288 jr ra
michael@0 289 nop
michael@0 290 #}
michael@0 291 #
michael@0 292 .end s_mpv_mul_d_add_prop
michael@0 293
michael@0 294 .ent s_mpv_mul_d
michael@0 295 .globl s_mpv_mul_d
michael@0 296
michael@0 297 s_mpv_mul_d:
michael@0 298 #/* c = a * b */
michael@0 299 #void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b,
michael@0 300 # mp_digit *c)
michael@0 301 #{
michael@0 302 # mp_digit a0, a1; regs a4, a5
michael@0 303 # mp_digit cy = 0; reg t2
michael@0 304 # mp_word w0, w1; regs t0, t1
michael@0 305 #
michael@0 306 # if (a_len) {
michael@0 307 beq a1,zero,.N.1
michael@0 308 move t2,zero # cy = 0
michael@0 309 dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
michael@0 310 dsrl32 a2,a2,0 # This clears the upper 32 bits.
michael@0 311 # a0 = a[0];
michael@0 312 lwu a4,0(a0)
michael@0 313 # w0 = ((mp_word)b * a0);
michael@0 314 dmultu a2,a4
michael@0 315 # if (--a_len) {
michael@0 316 addiu a1,a1,-1
michael@0 317 beq a1,zero,.N.2
michael@0 318 # while (a_len >= 2) {
michael@0 319 sltiu t3,a1,2
michael@0 320 bne t3,zero,.N.3
michael@0 321 # a1 = a[1];
michael@0 322 lwu a5,4(a0)
michael@0 323 .N.4:
michael@0 324 # a_len -= 2;
michael@0 325 addiu a1,a1,-2
michael@0 326 # w0 += cy;
michael@0 327 mflo t0
michael@0 328 daddu t0,t0,t2
michael@0 329 # cy = CARRYOUT(w0);
michael@0 330 dsrl32 t2,t0,0
michael@0 331 # w1 = (mp_word)b * a1;
michael@0 332 dmultu a2,a5
michael@0 333 # c[0] = ACCUM(w0);
michael@0 334 sw t0,0(a3)
michael@0 335 # a0 = a[2];
michael@0 336 lwu a4,8(a0)
michael@0 337 # a += 2;
michael@0 338 addiu a0,a0,8
michael@0 339 # w1 += cy;
michael@0 340 mflo t1
michael@0 341 daddu t1,t1,t2
michael@0 342 # cy = CARRYOUT(w1);
michael@0 343 dsrl32 t2,t1,0
michael@0 344 # w0 = (mp_word)b * a0;
michael@0 345 dmultu a2,a4
michael@0 346 # c[1] = ACCUM(w1);
michael@0 347 sw t1,4(a3)
michael@0 348 # c += 2;
michael@0 349 addiu a3,a3,8
michael@0 350 sltiu t3,a1,2
michael@0 351 beq t3,zero,.N.4
michael@0 352 # a1 = a[1];
michael@0 353 lwu a5,4(a0)
michael@0 354 # }
michael@0 355 .N.3:
michael@0 356 # w0 += cy;
michael@0 357 # if (a_len) {
michael@0 358 mflo t0
michael@0 359 beq a1,zero,.N.5
michael@0 360 daddu t0,t0,t2
michael@0 361 # w1 = (mp_word)b * a1;
michael@0 362 dmultu a2,a5 #
michael@0 363 # cy = CARRYOUT(w0);
michael@0 364 dsrl32 t2,t0,0
michael@0 365 # c[0] = ACCUM(w0);
michael@0 366 sw t0,0(a3)
michael@0 367 # w1 += cy;
michael@0 368 mflo t1
michael@0 369 daddu t1,t1,t2
michael@0 370 # c[1] = ACCUM(w1);
michael@0 371 sw t1,4(a3)
michael@0 372 # cy = CARRYOUT(w1);
michael@0 373 dsrl32 t2,t1,0
michael@0 374 # c += 1;
michael@0 375 b .N.6
michael@0 376 addiu a3,a3,4
michael@0 377 # } else {
michael@0 378 .N.5:
michael@0 379 # c[0] = ACCUM(w0);
michael@0 380 sw t0,0(a3)
michael@0 381 # cy = CARRYOUT(w0);
michael@0 382 b .N.6
michael@0 383 dsrl32 t2,t0,0
michael@0 384 # }
michael@0 385 # } else {
michael@0 386 .N.2:
michael@0 387 mflo t0
michael@0 388 # c[0] = ACCUM(w0);
michael@0 389 sw t0,0(a3)
michael@0 390 # cy = CARRYOUT(w0);
michael@0 391 dsrl32 t2,t0,0
michael@0 392 # }
michael@0 393 .N.6:
michael@0 394 # c[1] = cy;
michael@0 395 jr ra
michael@0 396 sw t2,4(a3)
michael@0 397 # }
michael@0 398 .N.1:
michael@0 399 jr ra
michael@0 400 nop
michael@0 401 #}
michael@0 402 #
michael@0 403 .end s_mpv_mul_d
michael@0 404
michael@0 405
michael@0 406 .ent s_mpv_sqr_add_prop
michael@0 407 .globl s_mpv_sqr_add_prop
michael@0 408 #void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs);
michael@0 409 # registers
michael@0 410 # a0 *a
michael@0 411 # a1 a_len
michael@0 412 # a2 *sqr
michael@0 413 # a3 digit from *a, a_i
michael@0 414 # a4 square of digit from a
michael@0 415 # a5,a6 next 2 digits in sqr
michael@0 416 # a7,t0 carry
michael@0 417 s_mpv_sqr_add_prop:
michael@0 418 move a7,zero
michael@0 419 move t0,zero
michael@0 420 lwu a3,0(a0)
michael@0 421 addiu a1,a1,-1 # --a_len
michael@0 422 dmultu a3,a3
michael@0 423 beq a1,zero,.P.3 # jump if we've already done the only sqr
michael@0 424 addiu a0,a0,4 # ++a
michael@0 425 .P.2:
michael@0 426 lwu a5,0(a2)
michael@0 427 lwu a6,4(a2)
michael@0 428 addiu a2,a2,8 # sqrs += 2;
michael@0 429 dsll32 a6,a6,0
michael@0 430 daddu a5,a5,a6
michael@0 431 lwu a3,0(a0)
michael@0 432 addiu a0,a0,4 # ++a
michael@0 433 mflo a4
michael@0 434 daddu a6,a5,a4
michael@0 435 sltu a7,a6,a5 # a7 = a6 < a5 detect overflow
michael@0 436 dmultu a3,a3
michael@0 437 daddu a4,a6,t0
michael@0 438 sltu t0,a4,a6
michael@0 439 add t0,t0,a7
michael@0 440 sw a4,-8(a2)
michael@0 441 addiu a1,a1,-1 # --a_len
michael@0 442 dsrl32 a4,a4,0
michael@0 443 bne a1,zero,.P.2 # loop if a_len > 0
michael@0 444 sw a4,-4(a2)
michael@0 445 .P.3:
michael@0 446 lwu a5,0(a2)
michael@0 447 lwu a6,4(a2)
michael@0 448 addiu a2,a2,8 # sqrs += 2;
michael@0 449 dsll32 a6,a6,0
michael@0 450 daddu a5,a5,a6
michael@0 451 mflo a4
michael@0 452 daddu a6,a5,a4
michael@0 453 sltu a7,a6,a5 # a7 = a6 < a5 detect overflow
michael@0 454 daddu a4,a6,t0
michael@0 455 sltu t0,a4,a6
michael@0 456 add t0,t0,a7
michael@0 457 sw a4,-8(a2)
michael@0 458 beq t0,zero,.P.9 # jump if no carry
michael@0 459 dsrl32 a4,a4,0
michael@0 460 .P.8:
michael@0 461 sw a4,-4(a2)
michael@0 462 /* propagate final carry */
michael@0 463 lwu a5,0(a2)
michael@0 464 daddu a6,a5,t0
michael@0 465 sltu t0,a6,a5
michael@0 466 bne t0,zero,.P.8 # loop if carry persists
michael@0 467 addiu a2,a2,4 # sqrs++
michael@0 468 .P.9:
michael@0 469 jr ra
michael@0 470 sw a4,-4(a2)
michael@0 471
michael@0 472 .end s_mpv_sqr_add_prop

mercurial