security/nss/lib/freebl/mpi/mpi_x86_asm.c

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 * mpi_x86_asm.c - MSVC inline assembly implementation of s_mpv_ functions.
michael@0 3 *
michael@0 4 * This Source Code Form is subject to the terms of the Mozilla Public
michael@0 5 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 6 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 7
michael@0 8 #include "mpi-priv.h"
michael@0 9
michael@0 10 static int is_sse = -1;
michael@0 11 extern unsigned long s_mpi_is_sse2();
michael@0 12
michael@0 13 /*
michael@0 14 * ebp - 36: caller's esi
michael@0 15 * ebp - 32: caller's edi
michael@0 16 * ebp - 28:
michael@0 17 * ebp - 24:
michael@0 18 * ebp - 20:
michael@0 19 * ebp - 16:
michael@0 20 * ebp - 12:
michael@0 21 * ebp - 8:
michael@0 22 * ebp - 4:
michael@0 23 * ebp + 0: caller's ebp
michael@0 24 * ebp + 4: return address
michael@0 25 * ebp + 8: a argument
michael@0 26 * ebp + 12: a_len argument
michael@0 27 * ebp + 16: b argument
michael@0 28 * ebp + 20: c argument
michael@0 29 * registers:
michael@0 30 * eax:
michael@0 31 * ebx: carry
michael@0 32 * ecx: a_len
michael@0 33 * edx:
michael@0 34 * esi: a ptr
michael@0 35 * edi: c ptr
michael@0 36 */
michael@0 37 __declspec(naked) void
michael@0 38 s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
michael@0 39 {
michael@0 40 __asm {
michael@0 41 mov eax, is_sse
michael@0 42 cmp eax, 0
michael@0 43 je s_mpv_mul_d_x86
michael@0 44 jg s_mpv_mul_d_sse2
michael@0 45 call s_mpi_is_sse2
michael@0 46 mov is_sse, eax
michael@0 47 cmp eax, 0
michael@0 48 jg s_mpv_mul_d_sse2
michael@0 49 s_mpv_mul_d_x86:
michael@0 50 push ebp
michael@0 51 mov ebp,esp
michael@0 52 sub esp,28
michael@0 53 push edi
michael@0 54 push esi
michael@0 55 push ebx
michael@0 56 mov ebx,0 ; carry = 0
michael@0 57 mov ecx,[ebp+12] ; ecx = a_len
michael@0 58 mov edi,[ebp+20]
michael@0 59 cmp ecx,0
michael@0 60 je L_2 ; jmp if a_len == 0
michael@0 61 mov esi,[ebp+8] ; esi = a
michael@0 62 cld
michael@0 63 L_1:
michael@0 64 lodsd ; eax = [ds:esi]; esi += 4
michael@0 65 mov edx,[ebp+16] ; edx = b
michael@0 66 mul edx ; edx:eax = Phi:Plo = a_i * b
michael@0 67
michael@0 68 add eax,ebx ; add carry (ebx) to edx:eax
michael@0 69 adc edx,0
michael@0 70 mov ebx,edx ; high half of product becomes next carry
michael@0 71
michael@0 72 stosd ; [es:edi] = ax; edi += 4;
michael@0 73 dec ecx ; --a_len
michael@0 74 jnz L_1 ; jmp if a_len != 0
michael@0 75 L_2:
michael@0 76 mov [edi],ebx ; *c = carry
michael@0 77 pop ebx
michael@0 78 pop esi
michael@0 79 pop edi
michael@0 80 leave
michael@0 81 ret
michael@0 82 nop
michael@0 83 s_mpv_mul_d_sse2:
michael@0 84 push ebp
michael@0 85 mov ebp, esp
michael@0 86 push edi
michael@0 87 push esi
michael@0 88 psubq mm2, mm2 ; carry = 0
michael@0 89 mov ecx, [ebp+12] ; ecx = a_len
michael@0 90 movd mm1, [ebp+16] ; mm1 = b
michael@0 91 mov edi, [ebp+20]
michael@0 92 cmp ecx, 0
michael@0 93 je L_6 ; jmp if a_len == 0
michael@0 94 mov esi, [ebp+8] ; esi = a
michael@0 95 cld
michael@0 96 L_5:
michael@0 97 movd mm0, [esi] ; mm0 = *a++
michael@0 98 add esi, 4
michael@0 99 pmuludq mm0, mm1 ; mm0 = b * *a++
michael@0 100 paddq mm2, mm0 ; add the carry
michael@0 101 movd [edi], mm2 ; store the 32bit result
michael@0 102 add edi, 4
michael@0 103 psrlq mm2, 32 ; save the carry
michael@0 104 dec ecx ; --a_len
michael@0 105 jnz L_5 ; jmp if a_len != 0
michael@0 106 L_6:
michael@0 107 movd [edi], mm2 ; *c = carry
michael@0 108 emms
michael@0 109 pop esi
michael@0 110 pop edi
michael@0 111 leave
michael@0 112 ret
michael@0 113 nop
michael@0 114 }
michael@0 115 }
michael@0 116
michael@0 117 /*
michael@0 118 * ebp - 36: caller's esi
michael@0 119 * ebp - 32: caller's edi
michael@0 120 * ebp - 28:
michael@0 121 * ebp - 24:
michael@0 122 * ebp - 20:
michael@0 123 * ebp - 16:
michael@0 124 * ebp - 12:
michael@0 125 * ebp - 8:
michael@0 126 * ebp - 4:
michael@0 127 * ebp + 0: caller's ebp
michael@0 128 * ebp + 4: return address
michael@0 129 * ebp + 8: a argument
michael@0 130 * ebp + 12: a_len argument
michael@0 131 * ebp + 16: b argument
michael@0 132 * ebp + 20: c argument
michael@0 133 * registers:
michael@0 134 * eax:
michael@0 135 * ebx: carry
michael@0 136 * ecx: a_len
michael@0 137 * edx:
michael@0 138 * esi: a ptr
michael@0 139 * edi: c ptr
michael@0 140 */
michael@0 141 __declspec(naked) void
michael@0 142 s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
michael@0 143 {
michael@0 144 __asm {
michael@0 145 mov eax, is_sse
michael@0 146 cmp eax, 0
michael@0 147 je s_mpv_mul_d_add_x86
michael@0 148 jg s_mpv_mul_d_add_sse2
michael@0 149 call s_mpi_is_sse2
michael@0 150 mov is_sse, eax
michael@0 151 cmp eax, 0
michael@0 152 jg s_mpv_mul_d_add_sse2
michael@0 153 s_mpv_mul_d_add_x86:
michael@0 154 push ebp
michael@0 155 mov ebp,esp
michael@0 156 sub esp,28
michael@0 157 push edi
michael@0 158 push esi
michael@0 159 push ebx
michael@0 160 mov ebx,0 ; carry = 0
michael@0 161 mov ecx,[ebp+12] ; ecx = a_len
michael@0 162 mov edi,[ebp+20]
michael@0 163 cmp ecx,0
michael@0 164 je L_11 ; jmp if a_len == 0
michael@0 165 mov esi,[ebp+8] ; esi = a
michael@0 166 cld
michael@0 167 L_10:
michael@0 168 lodsd ; eax = [ds:esi]; esi += 4
michael@0 169 mov edx,[ebp+16] ; edx = b
michael@0 170 mul edx ; edx:eax = Phi:Plo = a_i * b
michael@0 171
michael@0 172 add eax,ebx ; add carry (ebx) to edx:eax
michael@0 173 adc edx,0
michael@0 174 mov ebx,[edi] ; add in current word from *c
michael@0 175 add eax,ebx
michael@0 176 adc edx,0
michael@0 177 mov ebx,edx ; high half of product becomes next carry
michael@0 178
michael@0 179 stosd ; [es:edi] = ax; edi += 4;
michael@0 180 dec ecx ; --a_len
michael@0 181 jnz L_10 ; jmp if a_len != 0
michael@0 182 L_11:
michael@0 183 mov [edi],ebx ; *c = carry
michael@0 184 pop ebx
michael@0 185 pop esi
michael@0 186 pop edi
michael@0 187 leave
michael@0 188 ret
michael@0 189 nop
michael@0 190 s_mpv_mul_d_add_sse2:
michael@0 191 push ebp
michael@0 192 mov ebp, esp
michael@0 193 push edi
michael@0 194 push esi
michael@0 195 psubq mm2, mm2 ; carry = 0
michael@0 196 mov ecx, [ebp+12] ; ecx = a_len
michael@0 197 movd mm1, [ebp+16] ; mm1 = b
michael@0 198 mov edi, [ebp+20]
michael@0 199 cmp ecx, 0
michael@0 200 je L_16 ; jmp if a_len == 0
michael@0 201 mov esi, [ebp+8] ; esi = a
michael@0 202 cld
michael@0 203 L_15:
michael@0 204 movd mm0, [esi] ; mm0 = *a++
michael@0 205 add esi, 4
michael@0 206 pmuludq mm0, mm1 ; mm0 = b * *a++
michael@0 207 paddq mm2, mm0 ; add the carry
michael@0 208 movd mm0, [edi]
michael@0 209 paddq mm2, mm0 ; add the carry
michael@0 210 movd [edi], mm2 ; store the 32bit result
michael@0 211 add edi, 4
michael@0 212 psrlq mm2, 32 ; save the carry
michael@0 213 dec ecx ; --a_len
michael@0 214 jnz L_15 ; jmp if a_len != 0
michael@0 215 L_16:
michael@0 216 movd [edi], mm2 ; *c = carry
michael@0 217 emms
michael@0 218 pop esi
michael@0 219 pop edi
michael@0 220 leave
michael@0 221 ret
michael@0 222 nop
michael@0 223 }
michael@0 224 }
michael@0 225
michael@0 226 /*
michael@0 227 * ebp - 36: caller's esi
michael@0 228 * ebp - 32: caller's edi
michael@0 229 * ebp - 28:
michael@0 230 * ebp - 24:
michael@0 231 * ebp - 20:
michael@0 232 * ebp - 16:
michael@0 233 * ebp - 12:
michael@0 234 * ebp - 8:
michael@0 235 * ebp - 4:
michael@0 236 * ebp + 0: caller's ebp
michael@0 237 * ebp + 4: return address
michael@0 238 * ebp + 8: a argument
michael@0 239 * ebp + 12: a_len argument
michael@0 240 * ebp + 16: b argument
michael@0 241 * ebp + 20: c argument
michael@0 242 * registers:
michael@0 243 * eax:
michael@0 244 * ebx: carry
michael@0 245 * ecx: a_len
michael@0 246 * edx:
michael@0 247 * esi: a ptr
michael@0 248 * edi: c ptr
michael@0 249 */
michael@0 250 __declspec(naked) void
michael@0 251 s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
michael@0 252 {
michael@0 253 __asm {
michael@0 254 mov eax, is_sse
michael@0 255 cmp eax, 0
michael@0 256 je s_mpv_mul_d_add_prop_x86
michael@0 257 jg s_mpv_mul_d_add_prop_sse2
michael@0 258 call s_mpi_is_sse2
michael@0 259 mov is_sse, eax
michael@0 260 cmp eax, 0
michael@0 261 jg s_mpv_mul_d_add_prop_sse2
michael@0 262 s_mpv_mul_d_add_prop_x86:
michael@0 263 push ebp
michael@0 264 mov ebp,esp
michael@0 265 sub esp,28
michael@0 266 push edi
michael@0 267 push esi
michael@0 268 push ebx
michael@0 269 mov ebx,0 ; carry = 0
michael@0 270 mov ecx,[ebp+12] ; ecx = a_len
michael@0 271 mov edi,[ebp+20]
michael@0 272 cmp ecx,0
michael@0 273 je L_21 ; jmp if a_len == 0
michael@0 274 cld
michael@0 275 mov esi,[ebp+8] ; esi = a
michael@0 276 L_20:
michael@0 277 lodsd ; eax = [ds:esi]; esi += 4
michael@0 278 mov edx,[ebp+16] ; edx = b
michael@0 279 mul edx ; edx:eax = Phi:Plo = a_i * b
michael@0 280
michael@0 281 add eax,ebx ; add carry (ebx) to edx:eax
michael@0 282 adc edx,0
michael@0 283 mov ebx,[edi] ; add in current word from *c
michael@0 284 add eax,ebx
michael@0 285 adc edx,0
michael@0 286 mov ebx,edx ; high half of product becomes next carry
michael@0 287
michael@0 288 stosd ; [es:edi] = ax; edi += 4;
michael@0 289 dec ecx ; --a_len
michael@0 290 jnz L_20 ; jmp if a_len != 0
michael@0 291 L_21:
michael@0 292 cmp ebx,0 ; is carry zero?
michael@0 293 jz L_23
michael@0 294 mov eax,[edi] ; add in current word from *c
michael@0 295 add eax,ebx
michael@0 296 stosd ; [es:edi] = ax; edi += 4;
michael@0 297 jnc L_23
michael@0 298 L_22:
michael@0 299 mov eax,[edi] ; add in current word from *c
michael@0 300 adc eax,0
michael@0 301 stosd ; [es:edi] = ax; edi += 4;
michael@0 302 jc L_22
michael@0 303 L_23:
michael@0 304 pop ebx
michael@0 305 pop esi
michael@0 306 pop edi
michael@0 307 leave
michael@0 308 ret
michael@0 309 nop
michael@0 310 s_mpv_mul_d_add_prop_sse2:
michael@0 311 push ebp
michael@0 312 mov ebp, esp
michael@0 313 push edi
michael@0 314 push esi
michael@0 315 push ebx
michael@0 316 psubq mm2, mm2 ; carry = 0
michael@0 317 mov ecx, [ebp+12] ; ecx = a_len
michael@0 318 movd mm1, [ebp+16] ; mm1 = b
michael@0 319 mov edi, [ebp+20]
michael@0 320 cmp ecx, 0
michael@0 321 je L_26 ; jmp if a_len == 0
michael@0 322 mov esi, [ebp+8] ; esi = a
michael@0 323 cld
michael@0 324 L_25:
michael@0 325 movd mm0, [esi] ; mm0 = *a++
michael@0 326 movd mm3, [edi] ; fetch the sum
michael@0 327 add esi, 4
michael@0 328 pmuludq mm0, mm1 ; mm0 = b * *a++
michael@0 329 paddq mm2, mm0 ; add the carry
michael@0 330 paddq mm2, mm3 ; add *c++
michael@0 331 movd [edi], mm2 ; store the 32bit result
michael@0 332 add edi, 4
michael@0 333 psrlq mm2, 32 ; save the carry
michael@0 334 dec ecx ; --a_len
michael@0 335 jnz L_25 ; jmp if a_len != 0
michael@0 336 L_26:
michael@0 337 movd ebx, mm2
michael@0 338 cmp ebx, 0 ; is carry zero?
michael@0 339 jz L_28
michael@0 340 mov eax, [edi]
michael@0 341 add eax, ebx
michael@0 342 stosd
michael@0 343 jnc L_28
michael@0 344 L_27:
michael@0 345 mov eax, [edi] ; add in current word from *c
michael@0 346 adc eax, 0
michael@0 347 stosd ; [es:edi] = ax; edi += 4;
michael@0 348 jc L_27
michael@0 349 L_28:
michael@0 350 emms
michael@0 351 pop ebx
michael@0 352 pop esi
michael@0 353 pop edi
michael@0 354 leave
michael@0 355 ret
michael@0 356 nop
michael@0 357 }
michael@0 358 }
michael@0 359
michael@0 360 /*
michael@0 361 * ebp - 20: caller's esi
michael@0 362 * ebp - 16: caller's edi
michael@0 363 * ebp - 12:
michael@0 364 * ebp - 8: carry
michael@0 365 * ebp - 4: a_len local
michael@0 366 * ebp + 0: caller's ebp
michael@0 367 * ebp + 4: return address
michael@0 368 * ebp + 8: pa argument
michael@0 369 * ebp + 12: a_len argument
michael@0 370 * ebp + 16: ps argument
michael@0 371 * ebp + 20:
michael@0 372 * registers:
michael@0 373 * eax:
michael@0 374 * ebx: carry
michael@0 375 * ecx: a_len
michael@0 376 * edx:
michael@0 377 * esi: a ptr
michael@0 378 * edi: c ptr
michael@0 379 */
michael@0 380 __declspec(naked) void
michael@0 381 s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs)
michael@0 382 {
michael@0 383 __asm {
michael@0 384 mov eax, is_sse
michael@0 385 cmp eax, 0
michael@0 386 je s_mpv_sqr_add_prop_x86
michael@0 387 jg s_mpv_sqr_add_prop_sse2
michael@0 388 call s_mpi_is_sse2
michael@0 389 mov is_sse, eax
michael@0 390 cmp eax, 0
michael@0 391 jg s_mpv_sqr_add_prop_sse2
michael@0 392 s_mpv_sqr_add_prop_x86:
michael@0 393 push ebp
michael@0 394 mov ebp,esp
michael@0 395 sub esp,12
michael@0 396 push edi
michael@0 397 push esi
michael@0 398 push ebx
michael@0 399 mov ebx,0 ; carry = 0
michael@0 400 mov ecx,[ebp+12] ; a_len
michael@0 401 mov edi,[ebp+16] ; edi = ps
michael@0 402 cmp ecx,0
michael@0 403 je L_31 ; jump if a_len == 0
michael@0 404 cld
michael@0 405 mov esi,[ebp+8] ; esi = pa
michael@0 406 L_30:
michael@0 407 lodsd ; eax = [ds:si]; si += 4;
michael@0 408 mul eax
michael@0 409
michael@0 410 add eax,ebx ; add "carry"
michael@0 411 adc edx,0
michael@0 412 mov ebx,[edi]
michael@0 413 add eax,ebx ; add low word from result
michael@0 414 mov ebx,[edi+4]
michael@0 415 stosd ; [es:di] = eax; di += 4;
michael@0 416 adc edx,ebx ; add high word from result
michael@0 417 mov ebx,0
michael@0 418 mov eax,edx
michael@0 419 adc ebx,0
michael@0 420 stosd ; [es:di] = eax; di += 4;
michael@0 421 dec ecx ; --a_len
michael@0 422 jnz L_30 ; jmp if a_len != 0
michael@0 423 L_31:
michael@0 424 cmp ebx,0 ; is carry zero?
michael@0 425 jz L_34
michael@0 426 mov eax,[edi] ; add in current word from *c
michael@0 427 add eax,ebx
michael@0 428 stosd ; [es:edi] = ax; edi += 4;
michael@0 429 jnc L_34
michael@0 430 L_32:
michael@0 431 mov eax,[edi] ; add in current word from *c
michael@0 432 adc eax,0
michael@0 433 stosd ; [es:edi] = ax; edi += 4;
michael@0 434 jc L_32
michael@0 435 L_34:
michael@0 436 pop ebx
michael@0 437 pop esi
michael@0 438 pop edi
michael@0 439 leave
michael@0 440 ret
michael@0 441 nop
michael@0 442 s_mpv_sqr_add_prop_sse2:
michael@0 443 push ebp
michael@0 444 mov ebp, esp
michael@0 445 push edi
michael@0 446 push esi
michael@0 447 push ebx
michael@0 448 psubq mm2, mm2 ; carry = 0
michael@0 449 mov ecx, [ebp+12] ; ecx = a_len
michael@0 450 mov edi, [ebp+16]
michael@0 451 cmp ecx, 0
michael@0 452 je L_36 ; jmp if a_len == 0
michael@0 453 mov esi, [ebp+8] ; esi = a
michael@0 454 cld
michael@0 455 L_35:
michael@0 456 movd mm0, [esi] ; mm0 = *a
michael@0 457 movd mm3, [edi] ; fetch the sum
michael@0 458 add esi, 4
michael@0 459 pmuludq mm0, mm0 ; mm0 = sqr(a)
michael@0 460 paddq mm2, mm0 ; add the carry
michael@0 461 paddq mm2, mm3 ; add the low word
michael@0 462 movd mm3, [edi+4]
michael@0 463 movd [edi], mm2 ; store the 32bit result
michael@0 464 psrlq mm2, 32
michael@0 465 paddq mm2, mm3 ; add the high word
michael@0 466 movd [edi+4], mm2 ; store the 32bit result
michael@0 467 psrlq mm2, 32 ; save the carry.
michael@0 468 add edi, 8
michael@0 469 dec ecx ; --a_len
michael@0 470 jnz L_35 ; jmp if a_len != 0
michael@0 471 L_36:
michael@0 472 movd ebx, mm2
michael@0 473 cmp ebx, 0 ; is carry zero?
michael@0 474 jz L_38
michael@0 475 mov eax, [edi]
michael@0 476 add eax, ebx
michael@0 477 stosd
michael@0 478 jnc L_38
michael@0 479 L_37:
michael@0 480 mov eax, [edi] ; add in current word from *c
michael@0 481 adc eax, 0
michael@0 482 stosd ; [es:edi] = ax; edi += 4;
michael@0 483 jc L_37
michael@0 484 L_38:
michael@0 485 emms
michael@0 486 pop ebx
michael@0 487 pop esi
michael@0 488 pop edi
michael@0 489 leave
michael@0 490 ret
michael@0 491 nop
michael@0 492 }
michael@0 493 }
michael@0 494
michael@0 495 /*
michael@0 496 * Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
michael@0 497 * so its high bit is 1. This code is from NSPR.
michael@0 498 *
michael@0 499 * Dump of assembler code for function s_mpv_div_2dx1d:
michael@0 500 *
michael@0 501 * esp + 0: Caller's ebx
michael@0 502 * esp + 4: return address
michael@0 503 * esp + 8: Nhi argument
michael@0 504 * esp + 12: Nlo argument
michael@0 505 * esp + 16: divisor argument
michael@0 506 * esp + 20: qp argument
michael@0 507 * esp + 24: rp argument
michael@0 508 * registers:
michael@0 509 * eax:
michael@0 510 * ebx: carry
michael@0 511 * ecx: a_len
michael@0 512 * edx:
michael@0 513 * esi: a ptr
michael@0 514 * edi: c ptr
michael@0 515 */
michael@0 516 __declspec(naked) mp_err
michael@0 517 s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
michael@0 518 mp_digit *qp, mp_digit *rp)
michael@0 519 {
michael@0 520 __asm {
michael@0 521 push ebx
michael@0 522 mov edx,[esp+8]
michael@0 523 mov eax,[esp+12]
michael@0 524 mov ebx,[esp+16]
michael@0 525 div ebx
michael@0 526 mov ebx,[esp+20]
michael@0 527 mov [ebx],eax
michael@0 528 mov ebx,[esp+24]
michael@0 529 mov [ebx],edx
michael@0 530 xor eax,eax ; return zero
michael@0 531 pop ebx
michael@0 532 ret
michael@0 533 nop
michael@0 534 }
michael@0 535 }

mercurial