The Tor Browser: security/nss/lib/freebl/mpi/mpi_x86

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

1 #

     2 # This Source Code Form is subject to the terms of the Mozilla Public

     3 # License, v. 2.0. If a copy of the MPL was not distributed with this

     4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

     6 .data

     7 .align 4

     8  #

     9  # -1 means to call _s_mpi_is_sse to determine if we support sse

    10  #    instructions.

    11  #  0 means to use x86 instructions

    12  #  1 means to use sse2 instructions

    13 .type	is_sse,@object

    14 .size	is_sse,4

    15 is_sse: .long	-1

    17 #

    18 # sigh, handle the difference between -fPIC and not PIC

    19 # default to pic, since this file seems to be exclusively

    20 # linux right now (solaris uses mpi_i86pc.s and windows uses

    21 # mpi_x86_asm.c)

    22 #

    23 #.ifndef NO_PIC

    24 #.macro GET   var,reg

    25 #    movl   \var@GOTOFF(%ebx),\reg

    26 #.endm

    27 #.macro PUT   reg,var

    28 #    movl   \reg,\var@GOTOFF(%ebx)

    29 #.endm

    30 #.else

    31 .macro GET   var,reg

    32     movl   \var,\reg

    33 .endm

    34 .macro PUT   reg,var

    35     movl   \reg,\var

    36 .endm

    37 #.endif

    39 .text

    42  #  ebp - 36:	caller's esi

    43  #  ebp - 32:	caller's edi

    44  #  ebp - 28:

    45  #  ebp - 24:

    46  #  ebp - 20:

    47  #  ebp - 16:

    48  #  ebp - 12:

    49  #  ebp - 8:

    50  #  ebp - 4:

    51  #  ebp + 0:	caller's ebp

    52  #  ebp + 4:	return address

    53  #  ebp + 8:	a	argument

    54  #  ebp + 12:	a_len	argument

    55  #  ebp + 16:	b	argument

    56  #  ebp + 20:	c	argument

    57  #  registers:

    58  # 	eax:

    59  #	ebx:	carry

    60  #	ecx:	a_len

    61  #	edx:

    62  #	esi:	a ptr

    63  #	edi:	c ptr

    64 .globl	_s_mpv_mul_d

    65 .type	_s_mpv_mul_d,@function

    66 _s_mpv_mul_d:

    67     GET    is_sse,%eax

    68     cmp    $0,%eax

    69     je     _s_mpv_mul_d_x86

    70     jg     _s_mpv_mul_d_sse2

    71     call   _s_mpi_is_sse2

    72     PUT    %eax,is_sse

    73     cmp    $0,%eax

    74     jg     _s_mpv_mul_d_sse2

    75 _s_mpv_mul_d_x86:

    76     push   %ebp

    77     mov    %esp,%ebp

    78     sub    $28,%esp

    79     push   %edi

    80     push   %esi

    81     push   %ebx

    82     movl   $0,%ebx		# carry = 0

    83     mov    12(%ebp),%ecx	# ecx = a_len

    84     mov    20(%ebp),%edi

    85     cmp    $0,%ecx

    86     je     2f			# jmp if a_len == 0

    87     mov    8(%ebp),%esi		# esi = a

    88     cld

    89 1:

    90     lodsl			# eax = [ds:esi]; esi += 4

    91     mov    16(%ebp),%edx	# edx = b

    92     mull   %edx			# edx:eax = Phi:Plo = a_i * b

    94     add    %ebx,%eax		# add carry (%ebx) to edx:eax

    95     adc    $0,%edx

    96     mov    %edx,%ebx		# high half of product becomes next carry

    98     stosl			# [es:edi] = ax; edi += 4;

    99     dec    %ecx			# --a_len

   100     jnz    1b			# jmp if a_len != 0

   101 2:

   102     mov    %ebx,0(%edi)		# *c = carry

   103     pop    %ebx

   104     pop    %esi

   105     pop    %edi

   106     leave

   107     ret

   108     nop

   109 _s_mpv_mul_d_sse2:

   110     push   %ebp

   111     mov    %esp,%ebp

   112     push   %edi

   113     push   %esi

   114     psubq  %mm2,%mm2		# carry = 0

   115     mov    12(%ebp),%ecx	# ecx = a_len

   116     movd   16(%ebp),%mm1	# mm1 = b

   117     mov    20(%ebp),%edi

   118     cmp    $0,%ecx

   119     je     6f			# jmp if a_len == 0

   120     mov    8(%ebp),%esi		# esi = a

   121     cld

   122 5:

   123     movd   0(%esi),%mm0         # mm0 = *a++

   124     add    $4,%esi

   125     pmuludq %mm1,%mm0           # mm0 = b * *a++

   126     paddq  %mm0,%mm2            # add the carry

   127     movd   %mm2,0(%edi)         # store the 32bit result

   128     add    $4,%edi

   129     psrlq  $32, %mm2		# save the carry

   130     dec    %ecx			# --a_len

   131     jnz    5b			# jmp if a_len != 0

   132 6:

   133     movd   %mm2,0(%edi)		# *c = carry

   134     emms

   135     pop    %esi

   136     pop    %edi

   137     leave

   138     ret

   139     nop

   141  #  ebp - 36:	caller's esi

   142  #  ebp - 32:	caller's edi

   143  #  ebp - 28:

   144  #  ebp - 24:

   145  #  ebp - 20:

   146  #  ebp - 16:

   147  #  ebp - 12:

   148  #  ebp - 8:

   149  #  ebp - 4:

   150  #  ebp + 0:	caller's ebp

   151  #  ebp + 4:	return address

   152  #  ebp + 8:	a	argument

   153  #  ebp + 12:	a_len	argument

   154  #  ebp + 16:	b	argument

   155  #  ebp + 20:	c	argument

   156  #  registers:

   157  # 	eax:

   158  #	ebx:	carry

   159  #	ecx:	a_len

   160  #	edx:

   161  #	esi:	a ptr

   162  #	edi:	c ptr

   163 .globl	_s_mpv_mul_d_add

   164 .type	_s_mpv_mul_d_add,@function

   165 _s_mpv_mul_d_add:

   166     GET    is_sse,%eax

   167     cmp    $0,%eax

   168     je     _s_mpv_mul_d_add_x86

   169     jg     _s_mpv_mul_d_add_sse2

   170     call   _s_mpi_is_sse2

   171     PUT    %eax,is_sse

   172     cmp    $0,%eax

   173     jg     _s_mpv_mul_d_add_sse2

   174 _s_mpv_mul_d_add_x86:

   175     push   %ebp

   176     mov    %esp,%ebp

   177     sub    $28,%esp

   178     push   %edi

   179     push   %esi

   180     push   %ebx

   181     movl   $0,%ebx		# carry = 0

   182     mov    12(%ebp),%ecx	# ecx = a_len

   183     mov    20(%ebp),%edi

   184     cmp    $0,%ecx

   185     je     11f			# jmp if a_len == 0

   186     mov    8(%ebp),%esi		# esi = a

   187     cld

   188 10:

   189     lodsl			# eax = [ds:esi]; esi += 4

   190     mov    16(%ebp),%edx	# edx = b

   191     mull   %edx			# edx:eax = Phi:Plo = a_i * b

   193     add    %ebx,%eax		# add carry (%ebx) to edx:eax

   194     adc    $0,%edx

   195     mov    0(%edi),%ebx		# add in current word from *c

   196     add    %ebx,%eax

   197     adc    $0,%edx

   198     mov    %edx,%ebx		# high half of product becomes next carry

   200     stosl			# [es:edi] = ax; edi += 4;

   201     dec    %ecx			# --a_len

   202     jnz    10b			# jmp if a_len != 0

   203 11:

   204     mov    %ebx,0(%edi)		# *c = carry

   205     pop    %ebx

   206     pop    %esi

   207     pop    %edi

   208     leave

   209     ret

   210     nop

   211 _s_mpv_mul_d_add_sse2:

   212     push   %ebp

   213     mov    %esp,%ebp

   214     push   %edi

   215     push   %esi

   216     psubq  %mm2,%mm2		# carry = 0

   217     mov    12(%ebp),%ecx	# ecx = a_len

   218     movd   16(%ebp),%mm1	# mm1 = b

   219     mov    20(%ebp),%edi

   220     cmp    $0,%ecx

   221     je     16f			# jmp if a_len == 0

   222     mov    8(%ebp),%esi		# esi = a

   223     cld

   224 15:

   225     movd   0(%esi),%mm0         # mm0 = *a++

   226     add    $4,%esi

   227     pmuludq %mm1,%mm0           # mm0 = b * *a++

   228     paddq  %mm0,%mm2            # add the carry

   229     movd   0(%edi),%mm0

   230     paddq  %mm0,%mm2            # add the carry

   231     movd   %mm2,0(%edi)         # store the 32bit result

   232     add    $4,%edi

   233     psrlq  $32, %mm2		# save the carry

   234     dec    %ecx			# --a_len

   235     jnz    15b			# jmp if a_len != 0

   236 16:

   237     movd   %mm2,0(%edi)		# *c = carry

   238     emms

   239     pop    %esi

   240     pop    %edi

   241     leave

   242     ret

   243     nop

   245  #  ebp - 8:	caller's esi

   246  #  ebp - 4:	caller's edi

   247  #  ebp + 0:	caller's ebp

   248  #  ebp + 4:	return address

   249  #  ebp + 8:	a	argument

   250  #  ebp + 12:	a_len	argument

   251  #  ebp + 16:	b	argument

   252  #  ebp + 20:	c	argument

   253  #  registers:

   254  # 	eax:

   255  #	ebx:	carry

   256  #	ecx:	a_len

   257  #	edx:

   258  #	esi:	a ptr

   259  #	edi:	c ptr

   260 .globl	_s_mpv_mul_d_add_prop

   261 .type	_s_mpv_mul_d_add_prop,@function

   262 _s_mpv_mul_d_add_prop:

   263     GET    is_sse,%eax

   264     cmp    $0,%eax

   265     je     _s_mpv_mul_d_add_prop_x86

   266     jg     _s_mpv_mul_d_add_prop_sse2

   267     call   _s_mpi_is_sse2

   268     PUT    %eax,is_sse

   269     cmp    $0,%eax

   270     jg     _s_mpv_mul_d_add_prop_sse2

   271 _s_mpv_mul_d_add_prop_x86:

   272     push   %ebp

   273     mov    %esp,%ebp

   274     sub    $28,%esp

   275     push   %edi

   276     push   %esi

   277     push   %ebx

   278     movl   $0,%ebx		# carry = 0

   279     mov    12(%ebp),%ecx	# ecx = a_len

   280     mov    20(%ebp),%edi

   281     cmp    $0,%ecx

   282     je     21f			# jmp if a_len == 0

   283     cld

   284     mov    8(%ebp),%esi		# esi = a

   285 20:

   286     lodsl			# eax = [ds:esi]; esi += 4

   287     mov    16(%ebp),%edx	# edx = b

   288     mull   %edx			# edx:eax = Phi:Plo = a_i * b

   290     add    %ebx,%eax		# add carry (%ebx) to edx:eax

   291     adc    $0,%edx

   292     mov    0(%edi),%ebx		# add in current word from *c

   293     add    %ebx,%eax

   294     adc    $0,%edx

   295     mov    %edx,%ebx		# high half of product becomes next carry

   297     stosl			# [es:edi] = ax; edi += 4;

   298     dec    %ecx			# --a_len

   299     jnz    20b			# jmp if a_len != 0

   300 21:

   301     cmp    $0,%ebx		# is carry zero?

   302     jz     23f

   303     mov    0(%edi),%eax		# add in current word from *c

   304     add	   %ebx,%eax

   305     stosl			# [es:edi] = ax; edi += 4;

   306     jnc    23f

   307 22:

   308     mov    0(%edi),%eax		# add in current word from *c

   309     adc	   $0,%eax

   310     stosl			# [es:edi] = ax; edi += 4;

   311     jc     22b

   312 23:

   313     pop    %ebx

   314     pop    %esi

   315     pop    %edi

   316     leave

   317     ret

   318     nop

   319 _s_mpv_mul_d_add_prop_sse2:

   320     push   %ebp

   321     mov    %esp,%ebp

   322     push   %edi

   323     push   %esi

   324     push   %ebx

   325     psubq  %mm2,%mm2		# carry = 0

   326     mov    12(%ebp),%ecx	# ecx = a_len

   327     movd   16(%ebp),%mm1	# mm1 = b

   328     mov    20(%ebp),%edi

   329     cmp    $0,%ecx

   330     je     26f			# jmp if a_len == 0

   331     mov    8(%ebp),%esi		# esi = a

   332     cld

   333 25:

   334     movd   0(%esi),%mm0         # mm0 = *a++

   335     movd   0(%edi),%mm3		# fetch the sum

   336     add    $4,%esi

   337     pmuludq %mm1,%mm0           # mm0 = b * *a++

   338     paddq  %mm0,%mm2            # add the carry

   339     paddq  %mm3,%mm2            # add *c++

   340     movd   %mm2,0(%edi)         # store the 32bit result

   341     add    $4,%edi

   342     psrlq  $32, %mm2		# save the carry

   343     dec    %ecx			# --a_len

   344     jnz    25b			# jmp if a_len != 0

   345 26:

   346     movd   %mm2,%ebx

   347     cmp    $0,%ebx		# is carry zero?

   348     jz     28f

   349     mov    0(%edi),%eax

   350     add    %ebx, %eax

   351     stosl

   352     jnc    28f

   353 27:

   354     mov    0(%edi),%eax		# add in current word from *c

   355     adc	   $0,%eax

   356     stosl			# [es:edi] = ax; edi += 4;

   357     jc     27b

   358 28:

   359     emms

   360     pop    %ebx

   361     pop    %esi

   362     pop    %edi

   363     leave

   364     ret

   365     nop

   368  #  ebp - 20:	caller's esi

   369  #  ebp - 16:	caller's edi

   370  #  ebp - 12:

   371  #  ebp - 8:	carry

   372  #  ebp - 4:	a_len	local

   373  #  ebp + 0:	caller's ebp

   374  #  ebp + 4:	return address

   375  #  ebp + 8:	pa	argument

   376  #  ebp + 12:	a_len	argument

   377  #  ebp + 16:	ps	argument

   378  #  ebp + 20:

   379  #  registers:

   380  # 	eax:

   381  #	ebx:	carry

   382  #	ecx:	a_len

   383  #	edx:

   384  #	esi:	a ptr

   385  #	edi:	c ptr

   387 .globl	_s_mpv_sqr_add_prop

   388 .type	_s_mpv_sqr_add_prop,@function

   389 _s_mpv_sqr_add_prop:

   390      GET   is_sse,%eax

   391      cmp    $0,%eax

   392      je     _s_mpv_sqr_add_prop_x86

   393      jg     _s_mpv_sqr_add_prop_sse2

   394      call   _s_mpi_is_sse2

   395      PUT    %eax,is_sse

   396      cmp    $0,%eax

   397      jg     _s_mpv_sqr_add_prop_sse2

   398 _s_mpv_sqr_add_prop_x86:

   399      push   %ebp

   400      mov    %esp,%ebp

   401      sub    $12,%esp

   402      push   %edi

   403      push   %esi

   404      push   %ebx

   405      movl   $0,%ebx		# carry = 0

   406      mov    12(%ebp),%ecx	# a_len

   407      mov    16(%ebp),%edi	# edi = ps

   408      cmp    $0,%ecx

   409      je     31f			# jump if a_len == 0

   410      cld

   411      mov    8(%ebp),%esi	# esi = pa

   412 30:

   413      lodsl			# %eax = [ds:si]; si += 4;

   414      mull   %eax

   416      add    %ebx,%eax		# add "carry"

   417      adc    $0,%edx

   418      mov    0(%edi),%ebx

   419      add    %ebx,%eax		# add low word from result

   420      mov    4(%edi),%ebx

   421      stosl			# [es:di] = %eax; di += 4;

   422      adc    %ebx,%edx		# add high word from result

   423      movl   $0,%ebx

   424      mov    %edx,%eax

   425      adc    $0,%ebx

   426      stosl			# [es:di] = %eax; di += 4;

   427      dec    %ecx		# --a_len

   428      jnz    30b			# jmp if a_len != 0

   429 31:

   430     cmp    $0,%ebx		# is carry zero?

   431     jz     34f

   432     mov    0(%edi),%eax		# add in current word from *c

   433     add	   %ebx,%eax

   434     stosl			# [es:edi] = ax; edi += 4;

   435     jnc    34f

   436 32:

   437     mov    0(%edi),%eax		# add in current word from *c

   438     adc	   $0,%eax

   439     stosl			# [es:edi] = ax; edi += 4;

   440     jc     32b

   441 34:

   442     pop    %ebx

   443     pop    %esi

   444     pop    %edi

   445     leave

   446     ret

   447     nop

   448 _s_mpv_sqr_add_prop_sse2:

   449     push   %ebp

   450     mov    %esp,%ebp

   451     push   %edi

   452     push   %esi

   453     push   %ebx

   454     psubq  %mm2,%mm2		# carry = 0

   455     mov    12(%ebp),%ecx	# ecx = a_len

   456     mov    16(%ebp),%edi

   457     cmp    $0,%ecx

   458     je     36f			# jmp if a_len == 0

   459     mov    8(%ebp),%esi		# esi = a

   460     cld

   461 35:

   462     movd   0(%esi),%mm0        # mm0 = *a

   463     movd   0(%edi),%mm3	       # fetch the sum

   464     add	   $4,%esi

   465     pmuludq %mm0,%mm0          # mm0 = sqr(a)

   466     paddq  %mm0,%mm2           # add the carry

   467     paddq  %mm3,%mm2           # add the low word

   468     movd   4(%edi),%mm3

   469     movd   %mm2,0(%edi)        # store the 32bit result

   470     psrlq  $32, %mm2

   471     paddq  %mm3,%mm2           # add the high word

   472     movd   %mm2,4(%edi)        # store the 32bit result

   473     psrlq  $32, %mm2	       # save the carry.

   474     add    $8,%edi

   475     dec    %ecx			# --a_len

   476     jnz    35b			# jmp if a_len != 0

   477 36:

   478     movd   %mm2,%ebx

   479     cmp    $0,%ebx		# is carry zero?

   480     jz     38f

   481     mov    0(%edi),%eax

   482     add    %ebx, %eax

   483     stosl

   484     jnc    38f

   485 37:

   486     mov    0(%edi),%eax		# add in current word from *c

   487     adc	   $0,%eax

   488     stosl			# [es:edi] = ax; edi += 4;

   489     jc     37b

   490 38:

   491     emms

   492     pop    %ebx

   493     pop    %esi

   494     pop    %edi

   495     leave

   496     ret

   497     nop

   499  #

   500  # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized

   501  # so its high bit is 1.   This code is from NSPR.

   502  #

   503  # mp_err _s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,

   504  # 		          mp_digit *qp, mp_digit *rp)

   506  #  esp +  0:   Caller's ebx

   507  #  esp +  4:	return address

   508  #  esp +  8:	Nhi	argument

   509  #  esp + 12:	Nlo	argument

   510  #  esp + 16:	divisor	argument

   511  #  esp + 20:	qp	argument

   512  #  esp + 24:   rp	argument

   513  #  registers:

   514  # 	eax:

   515  #	ebx:	carry

   516  #	ecx:	a_len

   517  #	edx:

   518  #	esi:	a ptr

   519  #	edi:	c ptr

   520  #

   522 .globl	_s_mpv_div_2dx1d

   523 .type	_s_mpv_div_2dx1d,@function

   524 _s_mpv_div_2dx1d:

   525        push   %ebx

   526        mov    8(%esp),%edx

   527        mov    12(%esp),%eax

   528        mov    16(%esp),%ebx

   529        div    %ebx

   530        mov    20(%esp),%ebx

   531        mov    %eax,0(%ebx)

   532        mov    24(%esp),%ebx

   533        mov    %edx,0(%ebx)

   534        xor    %eax,%eax		# return zero

   535        pop    %ebx

   536        ret

   537        nop

The Tor Browser / file revision

security/nss/lib/freebl/mpi/mpi_x86_os2.s@b8a032363ba2

security/nss/lib/freebl/mpi/mpi_x86_os2.s