security/nss/lib/freebl/mpi/mpi_x86.s

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 #
     2 # This Source Code Form is subject to the terms of the Mozilla Public
     3 # License, v. 2.0. If a copy of the MPL was not distributed with this
     4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
     6 .data
     7 .align 4
     8  #
     9  # -1 means to call s_mpi_is_sse to determine if we support sse 
    10  #    instructions.
    11  #  0 means to use x86 instructions
    12  #  1 means to use sse2 instructions
    13 .type	is_sse,@object
    14 .size	is_sse,4
    15 is_sse: .long	-1 
    17 #
    18 # sigh, handle the difference between -fPIC and not PIC
    19 # default to pic, since this file seems to be exclusively
    20 # linux right now (solaris uses mpi_i86pc.s and windows uses
    21 # mpi_x86_asm.c)
    22 #
    23 .ifndef NO_PIC
    24 .macro GET   var,reg
    25     movl   \var@GOTOFF(%ebx),\reg
    26 .endm
    27 .macro PUT   reg,var
    28     movl   \reg,\var@GOTOFF(%ebx)
    29 .endm
    30 .else
    31 .macro GET   var,reg
    32     movl   \var,\reg
    33 .endm
    34 .macro PUT   reg,var
    35     movl   \reg,\var
    36 .endm
    37 .endif
    39 .text
    42  #  ebp - 36:	caller's esi
    43  #  ebp - 32:	caller's edi
    44  #  ebp - 28:	
    45  #  ebp - 24:	
    46  #  ebp - 20:	
    47  #  ebp - 16:	
    48  #  ebp - 12:	
    49  #  ebp - 8:	
    50  #  ebp - 4:	
    51  #  ebp + 0:	caller's ebp
    52  #  ebp + 4:	return address
    53  #  ebp + 8:	a	argument
    54  #  ebp + 12:	a_len	argument
    55  #  ebp + 16:	b	argument
    56  #  ebp + 20:	c	argument
    57  #  registers:
    58  # 	eax:
    59  #	ebx:	carry
    60  #	ecx:	a_len
    61  #	edx:
    62  #	esi:	a ptr
    63  #	edi:	c ptr
    64 .globl	s_mpv_mul_d
    65 .type	s_mpv_mul_d,@function
    66 s_mpv_mul_d:
    67     GET    is_sse,%eax
    68     cmp    $0,%eax
    69     je     s_mpv_mul_d_x86
    70     jg     s_mpv_mul_d_sse2
    71     call   s_mpi_is_sse2
    72     PUT    %eax,is_sse
    73     cmp    $0,%eax
    74     jg     s_mpv_mul_d_sse2
    75 s_mpv_mul_d_x86:
    76     push   %ebp
    77     mov    %esp,%ebp
    78     sub    $28,%esp
    79     push   %edi
    80     push   %esi
    81     push   %ebx
    82     movl   $0,%ebx		# carry = 0
    83     mov    12(%ebp),%ecx	# ecx = a_len
    84     mov    20(%ebp),%edi
    85     cmp    $0,%ecx
    86     je     2f			# jmp if a_len == 0
    87     mov    8(%ebp),%esi		# esi = a
    88     cld
    89 1:
    90     lodsl			# eax = [ds:esi]; esi += 4
    91     mov    16(%ebp),%edx	# edx = b
    92     mull   %edx			# edx:eax = Phi:Plo = a_i * b
    94     add    %ebx,%eax		# add carry (%ebx) to edx:eax
    95     adc    $0,%edx
    96     mov    %edx,%ebx		# high half of product becomes next carry
    98     stosl			# [es:edi] = ax; edi += 4;
    99     dec    %ecx			# --a_len
   100     jnz    1b			# jmp if a_len != 0
   101 2:
   102     mov    %ebx,0(%edi)		# *c = carry
   103     pop    %ebx
   104     pop    %esi
   105     pop    %edi
   106     leave  
   107     ret    
   108     nop
   109 s_mpv_mul_d_sse2:
   110     push   %ebp
   111     mov    %esp,%ebp
   112     push   %edi
   113     push   %esi
   114     psubq  %mm2,%mm2		# carry = 0
   115     mov    12(%ebp),%ecx	# ecx = a_len
   116     movd   16(%ebp),%mm1	# mm1 = b
   117     mov    20(%ebp),%edi
   118     cmp    $0,%ecx
   119     je     6f			# jmp if a_len == 0
   120     mov    8(%ebp),%esi		# esi = a
   121     cld
   122 5:
   123     movd   0(%esi),%mm0         # mm0 = *a++
   124     add    $4,%esi
   125     pmuludq %mm1,%mm0           # mm0 = b * *a++
   126     paddq  %mm0,%mm2            # add the carry
   127     movd   %mm2,0(%edi)         # store the 32bit result
   128     add    $4,%edi
   129     psrlq  $32, %mm2		# save the carry
   130     dec    %ecx			# --a_len
   131     jnz    5b			# jmp if a_len != 0
   132 6:
   133     movd   %mm2,0(%edi)		# *c = carry
   134     emms
   135     pop    %esi
   136     pop    %edi
   137     leave  
   138     ret    
   139     nop
   141  #  ebp - 36:	caller's esi
   142  #  ebp - 32:	caller's edi
   143  #  ebp - 28:	
   144  #  ebp - 24:	
   145  #  ebp - 20:	
   146  #  ebp - 16:	
   147  #  ebp - 12:	
   148  #  ebp - 8:	
   149  #  ebp - 4:	
   150  #  ebp + 0:	caller's ebp
   151  #  ebp + 4:	return address
   152  #  ebp + 8:	a	argument
   153  #  ebp + 12:	a_len	argument
   154  #  ebp + 16:	b	argument
   155  #  ebp + 20:	c	argument
   156  #  registers:
   157  # 	eax:
   158  #	ebx:	carry
   159  #	ecx:	a_len
   160  #	edx:
   161  #	esi:	a ptr
   162  #	edi:	c ptr
   163 .globl	s_mpv_mul_d_add
   164 .type	s_mpv_mul_d_add,@function
   165 s_mpv_mul_d_add:
   166     GET    is_sse,%eax
   167     cmp    $0,%eax
   168     je     s_mpv_mul_d_add_x86
   169     jg     s_mpv_mul_d_add_sse2
   170     call   s_mpi_is_sse2
   171     PUT    %eax,is_sse
   172     cmp    $0,%eax
   173     jg     s_mpv_mul_d_add_sse2
   174 s_mpv_mul_d_add_x86:
   175     push   %ebp
   176     mov    %esp,%ebp
   177     sub    $28,%esp
   178     push   %edi
   179     push   %esi
   180     push   %ebx
   181     movl   $0,%ebx		# carry = 0
   182     mov    12(%ebp),%ecx	# ecx = a_len
   183     mov    20(%ebp),%edi
   184     cmp    $0,%ecx
   185     je     11f			# jmp if a_len == 0
   186     mov    8(%ebp),%esi		# esi = a
   187     cld
   188 10:
   189     lodsl			# eax = [ds:esi]; esi += 4
   190     mov    16(%ebp),%edx	# edx = b
   191     mull   %edx			# edx:eax = Phi:Plo = a_i * b
   193     add    %ebx,%eax		# add carry (%ebx) to edx:eax
   194     adc    $0,%edx
   195     mov    0(%edi),%ebx		# add in current word from *c
   196     add    %ebx,%eax		
   197     adc    $0,%edx
   198     mov    %edx,%ebx		# high half of product becomes next carry
   200     stosl			# [es:edi] = ax; edi += 4;
   201     dec    %ecx			# --a_len
   202     jnz    10b			# jmp if a_len != 0
   203 11:
   204     mov    %ebx,0(%edi)		# *c = carry
   205     pop    %ebx
   206     pop    %esi
   207     pop    %edi
   208     leave  
   209     ret    
   210     nop
   211 s_mpv_mul_d_add_sse2:
   212     push   %ebp
   213     mov    %esp,%ebp
   214     push   %edi
   215     push   %esi
   216     psubq  %mm2,%mm2		# carry = 0
   217     mov    12(%ebp),%ecx	# ecx = a_len
   218     movd   16(%ebp),%mm1	# mm1 = b
   219     mov    20(%ebp),%edi
   220     cmp    $0,%ecx
   221     je     16f			# jmp if a_len == 0
   222     mov    8(%ebp),%esi		# esi = a
   223     cld
   224 15:
   225     movd   0(%esi),%mm0         # mm0 = *a++
   226     add    $4,%esi
   227     pmuludq %mm1,%mm0           # mm0 = b * *a++
   228     paddq  %mm0,%mm2            # add the carry
   229     movd   0(%edi),%mm0
   230     paddq  %mm0,%mm2            # add the carry
   231     movd   %mm2,0(%edi)         # store the 32bit result
   232     add    $4,%edi
   233     psrlq  $32, %mm2		# save the carry
   234     dec    %ecx			# --a_len
   235     jnz    15b			# jmp if a_len != 0
   236 16:
   237     movd   %mm2,0(%edi)		# *c = carry
   238     emms
   239     pop    %esi
   240     pop    %edi
   241     leave  
   242     ret    
   243     nop
   245  #  ebp - 8:	caller's esi
   246  #  ebp - 4:	caller's edi
   247  #  ebp + 0:	caller's ebp
   248  #  ebp + 4:	return address
   249  #  ebp + 8:	a	argument
   250  #  ebp + 12:	a_len	argument
   251  #  ebp + 16:	b	argument
   252  #  ebp + 20:	c	argument
   253  #  registers:
   254  # 	eax:
   255  #	ebx:	carry
   256  #	ecx:	a_len
   257  #	edx:
   258  #	esi:	a ptr
   259  #	edi:	c ptr
   260 .globl	s_mpv_mul_d_add_prop
   261 .type	s_mpv_mul_d_add_prop,@function
   262 s_mpv_mul_d_add_prop:
   263     GET    is_sse,%eax
   264     cmp    $0,%eax
   265     je     s_mpv_mul_d_add_prop_x86
   266     jg     s_mpv_mul_d_add_prop_sse2
   267     call   s_mpi_is_sse2
   268     PUT    %eax,is_sse
   269     cmp    $0,%eax
   270     jg     s_mpv_mul_d_add_prop_sse2
   271 s_mpv_mul_d_add_prop_x86:
   272     push   %ebp
   273     mov    %esp,%ebp
   274     sub    $28,%esp
   275     push   %edi
   276     push   %esi
   277     push   %ebx
   278     movl   $0,%ebx		# carry = 0
   279     mov    12(%ebp),%ecx	# ecx = a_len
   280     mov    20(%ebp),%edi
   281     cmp    $0,%ecx
   282     je     21f			# jmp if a_len == 0
   283     cld
   284     mov    8(%ebp),%esi		# esi = a
   285 20:
   286     lodsl			# eax = [ds:esi]; esi += 4
   287     mov    16(%ebp),%edx	# edx = b
   288     mull   %edx			# edx:eax = Phi:Plo = a_i * b
   290     add    %ebx,%eax		# add carry (%ebx) to edx:eax
   291     adc    $0,%edx
   292     mov    0(%edi),%ebx		# add in current word from *c
   293     add    %ebx,%eax		
   294     adc    $0,%edx
   295     mov    %edx,%ebx		# high half of product becomes next carry
   297     stosl			# [es:edi] = ax; edi += 4;
   298     dec    %ecx			# --a_len
   299     jnz    20b			# jmp if a_len != 0
   300 21:
   301     cmp    $0,%ebx		# is carry zero?
   302     jz     23f
   303     mov    0(%edi),%eax		# add in current word from *c
   304     add	   %ebx,%eax
   305     stosl			# [es:edi] = ax; edi += 4;
   306     jnc    23f
   307 22:
   308     mov    0(%edi),%eax		# add in current word from *c
   309     adc	   $0,%eax
   310     stosl			# [es:edi] = ax; edi += 4;
   311     jc     22b
   312 23:
   313     pop    %ebx
   314     pop    %esi
   315     pop    %edi
   316     leave  
   317     ret    
   318     nop
   319 s_mpv_mul_d_add_prop_sse2:
   320     push   %ebp
   321     mov    %esp,%ebp
   322     push   %edi
   323     push   %esi
   324     push   %ebx
   325     psubq  %mm2,%mm2		# carry = 0
   326     mov    12(%ebp),%ecx	# ecx = a_len
   327     movd   16(%ebp),%mm1	# mm1 = b
   328     mov    20(%ebp),%edi
   329     cmp    $0,%ecx
   330     je     26f			# jmp if a_len == 0
   331     mov    8(%ebp),%esi		# esi = a
   332     cld
   333 25:
   334     movd   0(%esi),%mm0         # mm0 = *a++
   335     movd   0(%edi),%mm3		# fetch the sum
   336     add    $4,%esi
   337     pmuludq %mm1,%mm0           # mm0 = b * *a++
   338     paddq  %mm0,%mm2            # add the carry
   339     paddq  %mm3,%mm2            # add *c++
   340     movd   %mm2,0(%edi)         # store the 32bit result
   341     add    $4,%edi
   342     psrlq  $32, %mm2		# save the carry
   343     dec    %ecx			# --a_len
   344     jnz    25b			# jmp if a_len != 0
   345 26:
   346     movd   %mm2,%ebx
   347     cmp    $0,%ebx		# is carry zero?
   348     jz     28f
   349     mov    0(%edi),%eax
   350     add    %ebx, %eax
   351     stosl
   352     jnc    28f
   353 27:
   354     mov    0(%edi),%eax		# add in current word from *c
   355     adc	   $0,%eax
   356     stosl			# [es:edi] = ax; edi += 4;
   357     jc     27b
   358 28:
   359     emms
   360     pop    %ebx
   361     pop    %esi
   362     pop    %edi
   363     leave  
   364     ret    
   365     nop
   368  #  ebp - 20:	caller's esi
   369  #  ebp - 16:	caller's edi
   370  #  ebp - 12:	
   371  #  ebp - 8:	carry
   372  #  ebp - 4:	a_len	local
   373  #  ebp + 0:	caller's ebp
   374  #  ebp + 4:	return address
   375  #  ebp + 8:	pa	argument
   376  #  ebp + 12:	a_len	argument
   377  #  ebp + 16:	ps	argument
   378  #  ebp + 20:	
   379  #  registers:
   380  # 	eax:
   381  #	ebx:	carry
   382  #	ecx:	a_len
   383  #	edx:
   384  #	esi:	a ptr
   385  #	edi:	c ptr
   387 .globl	s_mpv_sqr_add_prop
   388 .type	s_mpv_sqr_add_prop,@function
   389 s_mpv_sqr_add_prop:
   390      GET   is_sse,%eax
   391      cmp    $0,%eax
   392      je     s_mpv_sqr_add_prop_x86
   393      jg     s_mpv_sqr_add_prop_sse2
   394      call   s_mpi_is_sse2
   395      PUT    %eax,is_sse
   396      cmp    $0,%eax
   397      jg     s_mpv_sqr_add_prop_sse2
   398 s_mpv_sqr_add_prop_x86:
   399      push   %ebp
   400      mov    %esp,%ebp
   401      sub    $12,%esp
   402      push   %edi
   403      push   %esi
   404      push   %ebx
   405      movl   $0,%ebx		# carry = 0
   406      mov    12(%ebp),%ecx	# a_len
   407      mov    16(%ebp),%edi	# edi = ps
   408      cmp    $0,%ecx
   409      je     31f			# jump if a_len == 0
   410      cld
   411      mov    8(%ebp),%esi	# esi = pa
   412 30:
   413      lodsl			# %eax = [ds:si]; si += 4;
   414      mull   %eax
   416      add    %ebx,%eax		# add "carry"
   417      adc    $0,%edx
   418      mov    0(%edi),%ebx
   419      add    %ebx,%eax		# add low word from result
   420      mov    4(%edi),%ebx
   421      stosl			# [es:di] = %eax; di += 4;
   422      adc    %ebx,%edx		# add high word from result
   423      movl   $0,%ebx
   424      mov    %edx,%eax
   425      adc    $0,%ebx
   426      stosl			# [es:di] = %eax; di += 4;
   427      dec    %ecx		# --a_len
   428      jnz    30b			# jmp if a_len != 0
   429 31:
   430     cmp    $0,%ebx		# is carry zero?
   431     jz     34f
   432     mov    0(%edi),%eax		# add in current word from *c
   433     add	   %ebx,%eax
   434     stosl			# [es:edi] = ax; edi += 4;
   435     jnc    34f
   436 32:
   437     mov    0(%edi),%eax		# add in current word from *c
   438     adc	   $0,%eax
   439     stosl			# [es:edi] = ax; edi += 4;
   440     jc     32b
   441 34:
   442     pop    %ebx
   443     pop    %esi
   444     pop    %edi
   445     leave  
   446     ret    
   447     nop
   448 s_mpv_sqr_add_prop_sse2:
   449     push   %ebp
   450     mov    %esp,%ebp
   451     push   %edi
   452     push   %esi
   453     push   %ebx
   454     psubq  %mm2,%mm2		# carry = 0
   455     mov    12(%ebp),%ecx	# ecx = a_len
   456     mov    16(%ebp),%edi
   457     cmp    $0,%ecx
   458     je     36f			# jmp if a_len == 0
   459     mov    8(%ebp),%esi		# esi = a
   460     cld
   461 35:
   462     movd   0(%esi),%mm0        # mm0 = *a
   463     movd   0(%edi),%mm3	       # fetch the sum
   464     add	   $4,%esi
   465     pmuludq %mm0,%mm0          # mm0 = sqr(a)
   466     paddq  %mm0,%mm2           # add the carry
   467     paddq  %mm3,%mm2           # add the low word
   468     movd   4(%edi),%mm3
   469     movd   %mm2,0(%edi)        # store the 32bit result
   470     psrlq  $32, %mm2	
   471     paddq  %mm3,%mm2           # add the high word
   472     movd   %mm2,4(%edi)        # store the 32bit result
   473     psrlq  $32, %mm2	       # save the carry.
   474     add    $8,%edi
   475     dec    %ecx			# --a_len
   476     jnz    35b			# jmp if a_len != 0
   477 36:
   478     movd   %mm2,%ebx
   479     cmp    $0,%ebx		# is carry zero?
   480     jz     38f
   481     mov    0(%edi),%eax
   482     add    %ebx, %eax
   483     stosl
   484     jnc    38f
   485 37:
   486     mov    0(%edi),%eax		# add in current word from *c
   487     adc	   $0,%eax
   488     stosl			# [es:edi] = ax; edi += 4;
   489     jc     37b
   490 38:
   491     emms
   492     pop    %ebx
   493     pop    %esi
   494     pop    %edi
   495     leave  
   496     ret    
   497     nop
   499  #
   500  # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
   501  # so its high bit is 1.   This code is from NSPR.
   502  #
   503  # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
   504  # 		          mp_digit *qp, mp_digit *rp)
   506  #  esp +  0:   Caller's ebx
   507  #  esp +  4:	return address
   508  #  esp +  8:	Nhi	argument
   509  #  esp + 12:	Nlo	argument
   510  #  esp + 16:	divisor	argument
   511  #  esp + 20:	qp	argument
   512  #  esp + 24:   rp	argument
   513  #  registers:
   514  # 	eax:
   515  #	ebx:	carry
   516  #	ecx:	a_len
   517  #	edx:
   518  #	esi:	a ptr
   519  #	edi:	c ptr
   520  # 
   522 .globl	s_mpv_div_2dx1d
   523 .type	s_mpv_div_2dx1d,@function
   524 s_mpv_div_2dx1d:
   525        push   %ebx
   526        mov    8(%esp),%edx
   527        mov    12(%esp),%eax
   528        mov    16(%esp),%ebx
   529        div    %ebx
   530        mov    20(%esp),%ebx
   531        mov    %eax,0(%ebx)
   532        mov    24(%esp),%ebx
   533        mov    %edx,0(%ebx)
   534        xor    %eax,%eax		# return zero
   535        pop    %ebx
   536        ret    
   537        nop
   539  # Magic indicating no need for an executable stack
   540 .section .note.GNU-stack, "", @progbits
   541 .previous

mercurial