Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 2 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 3 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 4 | #include <regdef.h> |
michael@0 | 5 | .set noreorder |
michael@0 | 6 | .set noat |
michael@0 | 7 | |
michael@0 | 8 | .section .text, 1, 0x00000006, 4, 4 |
michael@0 | 9 | .text: |
michael@0 | 10 | .section .text |
michael@0 | 11 | |
michael@0 | 12 | .ent s_mpv_mul_d_add |
michael@0 | 13 | .globl s_mpv_mul_d_add |
michael@0 | 14 | |
michael@0 | 15 | s_mpv_mul_d_add: |
michael@0 | 16 | #/* c += a * b */ |
michael@0 | 17 | #void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, |
michael@0 | 18 | # mp_digit *c) |
michael@0 | 19 | #{ |
michael@0 | 20 | # mp_digit a0, a1; regs a4, a5 |
michael@0 | 21 | # mp_digit c0, c1; regs a6, a7 |
michael@0 | 22 | # mp_digit cy = 0; reg t2 |
michael@0 | 23 | # mp_word w0, w1; regs t0, t1 |
michael@0 | 24 | # |
michael@0 | 25 | # if (a_len) { |
michael@0 | 26 | beq a1,zero,.L.1 |
michael@0 | 27 | move t2,zero # cy = 0 |
michael@0 | 28 | dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) |
michael@0 | 29 | dsrl32 a2,a2,0 # This clears the upper 32 bits. |
michael@0 | 30 | # a0 = a[0]; |
michael@0 | 31 | lwu a4,0(a0) |
michael@0 | 32 | # w0 = ((mp_word)b * a0); |
michael@0 | 33 | dmultu a2,a4 |
michael@0 | 34 | # if (--a_len) { |
michael@0 | 35 | addiu a1,a1,-1 |
michael@0 | 36 | beq a1,zero,.L.2 |
michael@0 | 37 | # while (a_len >= 2) { |
michael@0 | 38 | sltiu t3,a1,2 |
michael@0 | 39 | bne t3,zero,.L.3 |
michael@0 | 40 | # a1 = a[1]; |
michael@0 | 41 | lwu a5,4(a0) |
michael@0 | 42 | .L.4: |
michael@0 | 43 | # a_len -= 2; |
michael@0 | 44 | addiu a1,a1,-2 |
michael@0 | 45 | # c0 = c[0]; |
michael@0 | 46 | lwu a6,0(a3) |
michael@0 | 47 | # w0 += cy; |
michael@0 | 48 | mflo t0 |
michael@0 | 49 | daddu t0,t0,t2 |
michael@0 | 50 | # w0 += c0; |
michael@0 | 51 | daddu t0,t0,a6 |
michael@0 | 52 | # w1 = (mp_word)b * a1; |
michael@0 | 53 | dmultu a2,a5 # |
michael@0 | 54 | # cy = CARRYOUT(w0); |
michael@0 | 55 | dsrl32 t2,t0,0 |
michael@0 | 56 | # c[0] = ACCUM(w0); |
michael@0 | 57 | sw t0,0(a3) |
michael@0 | 58 | # a0 = a[2]; |
michael@0 | 59 | lwu a4,8(a0) |
michael@0 | 60 | # a += 2; |
michael@0 | 61 | addiu a0,a0,8 |
michael@0 | 62 | # c1 = c[1]; |
michael@0 | 63 | lwu a7,4(a3) |
michael@0 | 64 | # w1 += cy; |
michael@0 | 65 | mflo t1 |
michael@0 | 66 | daddu t1,t1,t2 |
michael@0 | 67 | # w1 += c1; |
michael@0 | 68 | daddu t1,t1,a7 |
michael@0 | 69 | # w0 = (mp_word)b * a0; |
michael@0 | 70 | dmultu a2,a4 # |
michael@0 | 71 | # cy = CARRYOUT(w1); |
michael@0 | 72 | dsrl32 t2,t1,0 |
michael@0 | 73 | # c[1] = ACCUM(w1); |
michael@0 | 74 | sw t1,4(a3) |
michael@0 | 75 | # c += 2; |
michael@0 | 76 | addiu a3,a3,8 |
michael@0 | 77 | sltiu t3,a1,2 |
michael@0 | 78 | beq t3,zero,.L.4 |
michael@0 | 79 | # a1 = a[1]; |
michael@0 | 80 | lwu a5,4(a0) |
michael@0 | 81 | # } |
michael@0 | 82 | .L.3: |
michael@0 | 83 | # c0 = c[0]; |
michael@0 | 84 | lwu a6,0(a3) |
michael@0 | 85 | # w0 += cy; |
michael@0 | 86 | # if (a_len) { |
michael@0 | 87 | mflo t0 |
michael@0 | 88 | beq a1,zero,.L.5 |
michael@0 | 89 | daddu t0,t0,t2 |
michael@0 | 90 | # w1 = (mp_word)b * a1; |
michael@0 | 91 | dmultu a2,a5 |
michael@0 | 92 | # w0 += c0; |
michael@0 | 93 | daddu t0,t0,a6 # |
michael@0 | 94 | # cy = CARRYOUT(w0); |
michael@0 | 95 | dsrl32 t2,t0,0 |
michael@0 | 96 | # c[0] = ACCUM(w0); |
michael@0 | 97 | sw t0,0(a3) |
michael@0 | 98 | # c1 = c[1]; |
michael@0 | 99 | lwu a7,4(a3) |
michael@0 | 100 | # w1 += cy; |
michael@0 | 101 | mflo t1 |
michael@0 | 102 | daddu t1,t1,t2 |
michael@0 | 103 | # w1 += c1; |
michael@0 | 104 | daddu t1,t1,a7 |
michael@0 | 105 | # c[1] = ACCUM(w1); |
michael@0 | 106 | sw t1,4(a3) |
michael@0 | 107 | # cy = CARRYOUT(w1); |
michael@0 | 108 | dsrl32 t2,t1,0 |
michael@0 | 109 | # c += 1; |
michael@0 | 110 | b .L.6 |
michael@0 | 111 | addiu a3,a3,4 |
michael@0 | 112 | # } else { |
michael@0 | 113 | .L.5: |
michael@0 | 114 | # w0 += c0; |
michael@0 | 115 | daddu t0,t0,a6 |
michael@0 | 116 | # c[0] = ACCUM(w0); |
michael@0 | 117 | sw t0,0(a3) |
michael@0 | 118 | # cy = CARRYOUT(w0); |
michael@0 | 119 | b .L.6 |
michael@0 | 120 | dsrl32 t2,t0,0 |
michael@0 | 121 | # } |
michael@0 | 122 | # } else { |
michael@0 | 123 | .L.2: |
michael@0 | 124 | # c0 = c[0]; |
michael@0 | 125 | lwu a6,0(a3) |
michael@0 | 126 | # w0 += c0; |
michael@0 | 127 | mflo t0 |
michael@0 | 128 | daddu t0,t0,a6 |
michael@0 | 129 | # c[0] = ACCUM(w0); |
michael@0 | 130 | sw t0,0(a3) |
michael@0 | 131 | # cy = CARRYOUT(w0); |
michael@0 | 132 | dsrl32 t2,t0,0 |
michael@0 | 133 | # } |
michael@0 | 134 | .L.6: |
michael@0 | 135 | # c[1] = cy; |
michael@0 | 136 | jr ra |
michael@0 | 137 | sw t2,4(a3) |
michael@0 | 138 | # } |
michael@0 | 139 | .L.1: |
michael@0 | 140 | jr ra |
michael@0 | 141 | nop |
michael@0 | 142 | #} |
michael@0 | 143 | # |
michael@0 | 144 | .end s_mpv_mul_d_add |
michael@0 | 145 | |
michael@0 | 146 | .ent s_mpv_mul_d_add_prop |
michael@0 | 147 | .globl s_mpv_mul_d_add_prop |
michael@0 | 148 | |
michael@0 | 149 | s_mpv_mul_d_add_prop: |
michael@0 | 150 | #/* c += a * b */ |
michael@0 | 151 | #void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, |
michael@0 | 152 | # mp_digit *c) |
michael@0 | 153 | #{ |
michael@0 | 154 | # mp_digit a0, a1; regs a4, a5 |
michael@0 | 155 | # mp_digit c0, c1; regs a6, a7 |
michael@0 | 156 | # mp_digit cy = 0; reg t2 |
michael@0 | 157 | # mp_word w0, w1; regs t0, t1 |
michael@0 | 158 | # |
michael@0 | 159 | # if (a_len) { |
michael@0 | 160 | beq a1,zero,.M.1 |
michael@0 | 161 | move t2,zero # cy = 0 |
michael@0 | 162 | dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) |
michael@0 | 163 | dsrl32 a2,a2,0 # This clears the upper 32 bits. |
michael@0 | 164 | # a0 = a[0]; |
michael@0 | 165 | lwu a4,0(a0) |
michael@0 | 166 | # w0 = ((mp_word)b * a0); |
michael@0 | 167 | dmultu a2,a4 |
michael@0 | 168 | # if (--a_len) { |
michael@0 | 169 | addiu a1,a1,-1 |
michael@0 | 170 | beq a1,zero,.M.2 |
michael@0 | 171 | # while (a_len >= 2) { |
michael@0 | 172 | sltiu t3,a1,2 |
michael@0 | 173 | bne t3,zero,.M.3 |
michael@0 | 174 | # a1 = a[1]; |
michael@0 | 175 | lwu a5,4(a0) |
michael@0 | 176 | .M.4: |
michael@0 | 177 | # a_len -= 2; |
michael@0 | 178 | addiu a1,a1,-2 |
michael@0 | 179 | # c0 = c[0]; |
michael@0 | 180 | lwu a6,0(a3) |
michael@0 | 181 | # w0 += cy; |
michael@0 | 182 | mflo t0 |
michael@0 | 183 | daddu t0,t0,t2 |
michael@0 | 184 | # w0 += c0; |
michael@0 | 185 | daddu t0,t0,a6 |
michael@0 | 186 | # w1 = (mp_word)b * a1; |
michael@0 | 187 | dmultu a2,a5 # |
michael@0 | 188 | # cy = CARRYOUT(w0); |
michael@0 | 189 | dsrl32 t2,t0,0 |
michael@0 | 190 | # c[0] = ACCUM(w0); |
michael@0 | 191 | sw t0,0(a3) |
michael@0 | 192 | # a0 = a[2]; |
michael@0 | 193 | lwu a4,8(a0) |
michael@0 | 194 | # a += 2; |
michael@0 | 195 | addiu a0,a0,8 |
michael@0 | 196 | # c1 = c[1]; |
michael@0 | 197 | lwu a7,4(a3) |
michael@0 | 198 | # w1 += cy; |
michael@0 | 199 | mflo t1 |
michael@0 | 200 | daddu t1,t1,t2 |
michael@0 | 201 | # w1 += c1; |
michael@0 | 202 | daddu t1,t1,a7 |
michael@0 | 203 | # w0 = (mp_word)b * a0; |
michael@0 | 204 | dmultu a2,a4 # |
michael@0 | 205 | # cy = CARRYOUT(w1); |
michael@0 | 206 | dsrl32 t2,t1,0 |
michael@0 | 207 | # c[1] = ACCUM(w1); |
michael@0 | 208 | sw t1,4(a3) |
michael@0 | 209 | # c += 2; |
michael@0 | 210 | addiu a3,a3,8 |
michael@0 | 211 | sltiu t3,a1,2 |
michael@0 | 212 | beq t3,zero,.M.4 |
michael@0 | 213 | # a1 = a[1]; |
michael@0 | 214 | lwu a5,4(a0) |
michael@0 | 215 | # } |
michael@0 | 216 | .M.3: |
michael@0 | 217 | # c0 = c[0]; |
michael@0 | 218 | lwu a6,0(a3) |
michael@0 | 219 | # w0 += cy; |
michael@0 | 220 | # if (a_len) { |
michael@0 | 221 | mflo t0 |
michael@0 | 222 | beq a1,zero,.M.5 |
michael@0 | 223 | daddu t0,t0,t2 |
michael@0 | 224 | # w1 = (mp_word)b * a1; |
michael@0 | 225 | dmultu a2,a5 |
michael@0 | 226 | # w0 += c0; |
michael@0 | 227 | daddu t0,t0,a6 # |
michael@0 | 228 | # cy = CARRYOUT(w0); |
michael@0 | 229 | dsrl32 t2,t0,0 |
michael@0 | 230 | # c[0] = ACCUM(w0); |
michael@0 | 231 | sw t0,0(a3) |
michael@0 | 232 | # c1 = c[1]; |
michael@0 | 233 | lwu a7,4(a3) |
michael@0 | 234 | # w1 += cy; |
michael@0 | 235 | mflo t1 |
michael@0 | 236 | daddu t1,t1,t2 |
michael@0 | 237 | # w1 += c1; |
michael@0 | 238 | daddu t1,t1,a7 |
michael@0 | 239 | # c[1] = ACCUM(w1); |
michael@0 | 240 | sw t1,4(a3) |
michael@0 | 241 | # cy = CARRYOUT(w1); |
michael@0 | 242 | dsrl32 t2,t1,0 |
michael@0 | 243 | # c += 1; |
michael@0 | 244 | b .M.6 |
michael@0 | 245 | addiu a3,a3,8 |
michael@0 | 246 | # } else { |
michael@0 | 247 | .M.5: |
michael@0 | 248 | # w0 += c0; |
michael@0 | 249 | daddu t0,t0,a6 |
michael@0 | 250 | # c[0] = ACCUM(w0); |
michael@0 | 251 | sw t0,0(a3) |
michael@0 | 252 | # cy = CARRYOUT(w0); |
michael@0 | 253 | dsrl32 t2,t0,0 |
michael@0 | 254 | b .M.6 |
michael@0 | 255 | addiu a3,a3,4 |
michael@0 | 256 | # } |
michael@0 | 257 | # } else { |
michael@0 | 258 | .M.2: |
michael@0 | 259 | # c0 = c[0]; |
michael@0 | 260 | lwu a6,0(a3) |
michael@0 | 261 | # w0 += c0; |
michael@0 | 262 | mflo t0 |
michael@0 | 263 | daddu t0,t0,a6 |
michael@0 | 264 | # c[0] = ACCUM(w0); |
michael@0 | 265 | sw t0,0(a3) |
michael@0 | 266 | # cy = CARRYOUT(w0); |
michael@0 | 267 | dsrl32 t2,t0,0 |
michael@0 | 268 | addiu a3,a3,4 |
michael@0 | 269 | # } |
michael@0 | 270 | .M.6: |
michael@0 | 271 | |
michael@0 | 272 | # while (cy) { |
michael@0 | 273 | beq t2,zero,.M.1 |
michael@0 | 274 | nop |
michael@0 | 275 | .M.7: |
michael@0 | 276 | # mp_word w = (mp_word)*c + cy; |
michael@0 | 277 | lwu a6,0(a3) |
michael@0 | 278 | daddu t2,t2,a6 |
michael@0 | 279 | # *c++ = ACCUM(w); |
michael@0 | 280 | sw t2,0(a3) |
michael@0 | 281 | # cy = CARRYOUT(w); |
michael@0 | 282 | dsrl32 t2,t2,0 |
michael@0 | 283 | bne t2,zero,.M.7 |
michael@0 | 284 | addiu a3,a3,4 |
michael@0 | 285 | |
michael@0 | 286 | # } |
michael@0 | 287 | .M.1: |
michael@0 | 288 | jr ra |
michael@0 | 289 | nop |
michael@0 | 290 | #} |
michael@0 | 291 | # |
michael@0 | 292 | .end s_mpv_mul_d_add_prop |
michael@0 | 293 | |
michael@0 | 294 | .ent s_mpv_mul_d |
michael@0 | 295 | .globl s_mpv_mul_d |
michael@0 | 296 | |
michael@0 | 297 | s_mpv_mul_d: |
michael@0 | 298 | #/* c = a * b */ |
michael@0 | 299 | #void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, |
michael@0 | 300 | # mp_digit *c) |
michael@0 | 301 | #{ |
michael@0 | 302 | # mp_digit a0, a1; regs a4, a5 |
michael@0 | 303 | # mp_digit cy = 0; reg t2 |
michael@0 | 304 | # mp_word w0, w1; regs t0, t1 |
michael@0 | 305 | # |
michael@0 | 306 | # if (a_len) { |
michael@0 | 307 | beq a1,zero,.N.1 |
michael@0 | 308 | move t2,zero # cy = 0 |
michael@0 | 309 | dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) |
michael@0 | 310 | dsrl32 a2,a2,0 # This clears the upper 32 bits. |
michael@0 | 311 | # a0 = a[0]; |
michael@0 | 312 | lwu a4,0(a0) |
michael@0 | 313 | # w0 = ((mp_word)b * a0); |
michael@0 | 314 | dmultu a2,a4 |
michael@0 | 315 | # if (--a_len) { |
michael@0 | 316 | addiu a1,a1,-1 |
michael@0 | 317 | beq a1,zero,.N.2 |
michael@0 | 318 | # while (a_len >= 2) { |
michael@0 | 319 | sltiu t3,a1,2 |
michael@0 | 320 | bne t3,zero,.N.3 |
michael@0 | 321 | # a1 = a[1]; |
michael@0 | 322 | lwu a5,4(a0) |
michael@0 | 323 | .N.4: |
michael@0 | 324 | # a_len -= 2; |
michael@0 | 325 | addiu a1,a1,-2 |
michael@0 | 326 | # w0 += cy; |
michael@0 | 327 | mflo t0 |
michael@0 | 328 | daddu t0,t0,t2 |
michael@0 | 329 | # cy = CARRYOUT(w0); |
michael@0 | 330 | dsrl32 t2,t0,0 |
michael@0 | 331 | # w1 = (mp_word)b * a1; |
michael@0 | 332 | dmultu a2,a5 |
michael@0 | 333 | # c[0] = ACCUM(w0); |
michael@0 | 334 | sw t0,0(a3) |
michael@0 | 335 | # a0 = a[2]; |
michael@0 | 336 | lwu a4,8(a0) |
michael@0 | 337 | # a += 2; |
michael@0 | 338 | addiu a0,a0,8 |
michael@0 | 339 | # w1 += cy; |
michael@0 | 340 | mflo t1 |
michael@0 | 341 | daddu t1,t1,t2 |
michael@0 | 342 | # cy = CARRYOUT(w1); |
michael@0 | 343 | dsrl32 t2,t1,0 |
michael@0 | 344 | # w0 = (mp_word)b * a0; |
michael@0 | 345 | dmultu a2,a4 |
michael@0 | 346 | # c[1] = ACCUM(w1); |
michael@0 | 347 | sw t1,4(a3) |
michael@0 | 348 | # c += 2; |
michael@0 | 349 | addiu a3,a3,8 |
michael@0 | 350 | sltiu t3,a1,2 |
michael@0 | 351 | beq t3,zero,.N.4 |
michael@0 | 352 | # a1 = a[1]; |
michael@0 | 353 | lwu a5,4(a0) |
michael@0 | 354 | # } |
michael@0 | 355 | .N.3: |
michael@0 | 356 | # w0 += cy; |
michael@0 | 357 | # if (a_len) { |
michael@0 | 358 | mflo t0 |
michael@0 | 359 | beq a1,zero,.N.5 |
michael@0 | 360 | daddu t0,t0,t2 |
michael@0 | 361 | # w1 = (mp_word)b * a1; |
michael@0 | 362 | dmultu a2,a5 # |
michael@0 | 363 | # cy = CARRYOUT(w0); |
michael@0 | 364 | dsrl32 t2,t0,0 |
michael@0 | 365 | # c[0] = ACCUM(w0); |
michael@0 | 366 | sw t0,0(a3) |
michael@0 | 367 | # w1 += cy; |
michael@0 | 368 | mflo t1 |
michael@0 | 369 | daddu t1,t1,t2 |
michael@0 | 370 | # c[1] = ACCUM(w1); |
michael@0 | 371 | sw t1,4(a3) |
michael@0 | 372 | # cy = CARRYOUT(w1); |
michael@0 | 373 | dsrl32 t2,t1,0 |
michael@0 | 374 | # c += 1; |
michael@0 | 375 | b .N.6 |
michael@0 | 376 | addiu a3,a3,4 |
michael@0 | 377 | # } else { |
michael@0 | 378 | .N.5: |
michael@0 | 379 | # c[0] = ACCUM(w0); |
michael@0 | 380 | sw t0,0(a3) |
michael@0 | 381 | # cy = CARRYOUT(w0); |
michael@0 | 382 | b .N.6 |
michael@0 | 383 | dsrl32 t2,t0,0 |
michael@0 | 384 | # } |
michael@0 | 385 | # } else { |
michael@0 | 386 | .N.2: |
michael@0 | 387 | mflo t0 |
michael@0 | 388 | # c[0] = ACCUM(w0); |
michael@0 | 389 | sw t0,0(a3) |
michael@0 | 390 | # cy = CARRYOUT(w0); |
michael@0 | 391 | dsrl32 t2,t0,0 |
michael@0 | 392 | # } |
michael@0 | 393 | .N.6: |
michael@0 | 394 | # c[1] = cy; |
michael@0 | 395 | jr ra |
michael@0 | 396 | sw t2,4(a3) |
michael@0 | 397 | # } |
michael@0 | 398 | .N.1: |
michael@0 | 399 | jr ra |
michael@0 | 400 | nop |
michael@0 | 401 | #} |
michael@0 | 402 | # |
michael@0 | 403 | .end s_mpv_mul_d |
michael@0 | 404 | |
michael@0 | 405 | |
michael@0 | 406 | .ent s_mpv_sqr_add_prop |
michael@0 | 407 | .globl s_mpv_sqr_add_prop |
michael@0 | 408 | #void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs); |
michael@0 | 409 | # registers |
michael@0 | 410 | # a0 *a |
michael@0 | 411 | # a1 a_len |
michael@0 | 412 | # a2 *sqr |
michael@0 | 413 | # a3 digit from *a, a_i |
michael@0 | 414 | # a4 square of digit from a |
michael@0 | 415 | # a5,a6 next 2 digits in sqr |
michael@0 | 416 | # a7,t0 carry |
michael@0 | 417 | s_mpv_sqr_add_prop: |
michael@0 | 418 | move a7,zero |
michael@0 | 419 | move t0,zero |
michael@0 | 420 | lwu a3,0(a0) |
michael@0 | 421 | addiu a1,a1,-1 # --a_len |
michael@0 | 422 | dmultu a3,a3 |
michael@0 | 423 | beq a1,zero,.P.3 # jump if we've already done the only sqr |
michael@0 | 424 | addiu a0,a0,4 # ++a |
michael@0 | 425 | .P.2: |
michael@0 | 426 | lwu a5,0(a2) |
michael@0 | 427 | lwu a6,4(a2) |
michael@0 | 428 | addiu a2,a2,8 # sqrs += 2; |
michael@0 | 429 | dsll32 a6,a6,0 |
michael@0 | 430 | daddu a5,a5,a6 |
michael@0 | 431 | lwu a3,0(a0) |
michael@0 | 432 | addiu a0,a0,4 # ++a |
michael@0 | 433 | mflo a4 |
michael@0 | 434 | daddu a6,a5,a4 |
michael@0 | 435 | sltu a7,a6,a5 # a7 = a6 < a5 detect overflow |
michael@0 | 436 | dmultu a3,a3 |
michael@0 | 437 | daddu a4,a6,t0 |
michael@0 | 438 | sltu t0,a4,a6 |
michael@0 | 439 | add t0,t0,a7 |
michael@0 | 440 | sw a4,-8(a2) |
michael@0 | 441 | addiu a1,a1,-1 # --a_len |
michael@0 | 442 | dsrl32 a4,a4,0 |
michael@0 | 443 | bne a1,zero,.P.2 # loop if a_len > 0 |
michael@0 | 444 | sw a4,-4(a2) |
michael@0 | 445 | .P.3: |
michael@0 | 446 | lwu a5,0(a2) |
michael@0 | 447 | lwu a6,4(a2) |
michael@0 | 448 | addiu a2,a2,8 # sqrs += 2; |
michael@0 | 449 | dsll32 a6,a6,0 |
michael@0 | 450 | daddu a5,a5,a6 |
michael@0 | 451 | mflo a4 |
michael@0 | 452 | daddu a6,a5,a4 |
michael@0 | 453 | sltu a7,a6,a5 # a7 = a6 < a5 detect overflow |
michael@0 | 454 | daddu a4,a6,t0 |
michael@0 | 455 | sltu t0,a4,a6 |
michael@0 | 456 | add t0,t0,a7 |
michael@0 | 457 | sw a4,-8(a2) |
michael@0 | 458 | beq t0,zero,.P.9 # jump if no carry |
michael@0 | 459 | dsrl32 a4,a4,0 |
michael@0 | 460 | .P.8: |
michael@0 | 461 | sw a4,-4(a2) |
michael@0 | 462 | /* propagate final carry */ |
michael@0 | 463 | lwu a5,0(a2) |
michael@0 | 464 | daddu a6,a5,t0 |
michael@0 | 465 | sltu t0,a6,a5 |
michael@0 | 466 | bne t0,zero,.P.8 # loop if carry persists |
michael@0 | 467 | addiu a2,a2,4 # sqrs++ |
michael@0 | 468 | .P.9: |
michael@0 | 469 | jr ra |
michael@0 | 470 | sw a4,-4(a2) |
michael@0 | 471 | |
michael@0 | 472 | .end s_mpv_sqr_add_prop |