security/nss/lib/freebl/mpi/mpi_x86_asm.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2  *  mpi_x86_asm.c - MSVC inline assembly implementation of s_mpv_ functions.
     3  * 
     4  * This Source Code Form is subject to the terms of the Mozilla Public
     5  * License, v. 2.0. If a copy of the MPL was not distributed with this
     6  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     8 #include "mpi-priv.h"
    10 static int is_sse = -1;
    11 extern unsigned long s_mpi_is_sse2();
    13 /*
    14  *   ebp - 36:	caller's esi
    15  *   ebp - 32:	caller's edi
    16  *   ebp - 28:	
    17  *   ebp - 24:	
    18  *   ebp - 20:	
    19  *   ebp - 16:	
    20  *   ebp - 12:	
    21  *   ebp - 8:	
    22  *   ebp - 4:	
    23  *   ebp + 0:	caller's ebp
    24  *   ebp + 4:	return address
    25  *   ebp + 8:	a	argument
    26  *   ebp + 12:	a_len	argument
    27  *   ebp + 16:	b	argument
    28  *   ebp + 20:	c	argument
    29  *   registers:
    30  *  	eax:
    31  * 	ebx:	carry
    32  * 	ecx:	a_len
    33  * 	edx:
    34  * 	esi:	a ptr
    35  * 	edi:	c ptr
    36  */
    37 __declspec(naked) void
    38 s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
    39 {
    40   __asm {
    41     mov    eax, is_sse
    42     cmp    eax, 0
    43     je     s_mpv_mul_d_x86
    44     jg     s_mpv_mul_d_sse2
    45     call   s_mpi_is_sse2
    46     mov    is_sse, eax
    47     cmp    eax, 0
    48     jg     s_mpv_mul_d_sse2
    49 s_mpv_mul_d_x86:
    50     push   ebp
    51     mov    ebp,esp
    52     sub    esp,28
    53     push   edi
    54     push   esi
    55     push   ebx
    56     mov    ebx,0		; carry = 0
    57     mov    ecx,[ebp+12]		; ecx = a_len
    58     mov    edi,[ebp+20]
    59     cmp    ecx,0
    60     je     L_2			; jmp if a_len == 0
    61     mov    esi,[ebp+8]		; esi = a
    62     cld
    63 L_1:
    64     lodsd			; eax = [ds:esi]; esi += 4
    65     mov    edx,[ebp+16]		; edx = b
    66     mul    edx			; edx:eax = Phi:Plo = a_i * b
    68     add    eax,ebx		; add carry (ebx) to edx:eax
    69     adc    edx,0
    70     mov    ebx,edx		; high half of product becomes next carry
    72     stosd			; [es:edi] = ax; edi += 4;
    73     dec    ecx			; --a_len
    74     jnz    L_1			; jmp if a_len != 0
    75 L_2:
    76     mov    [edi],ebx		; *c = carry
    77     pop    ebx
    78     pop    esi
    79     pop    edi
    80     leave  
    81     ret    
    82     nop
    83 s_mpv_mul_d_sse2:
    84     push   ebp
    85     mov    ebp, esp
    86     push   edi
    87     push   esi
    88     psubq  mm2, mm2		; carry = 0
    89     mov    ecx, [ebp+12]	; ecx = a_len
    90     movd   mm1, [ebp+16]	; mm1 = b
    91     mov    edi, [ebp+20]
    92     cmp    ecx, 0
    93     je     L_6			; jmp if a_len == 0
    94     mov    esi, [ebp+8]		; esi = a
    95     cld
    96 L_5:
    97     movd   mm0, [esi]		; mm0 = *a++
    98     add    esi, 4
    99     pmuludq mm0, mm1		; mm0 = b * *a++
   100     paddq  mm2, mm0		; add the carry
   101     movd   [edi], mm2		; store the 32bit result
   102     add    edi, 4
   103     psrlq  mm2, 32		; save the carry
   104     dec    ecx			; --a_len
   105     jnz    L_5			; jmp if a_len != 0
   106 L_6:
   107     movd   [edi], mm2		; *c = carry
   108     emms
   109     pop    esi
   110     pop    edi
   111     leave  
   112     ret    
   113     nop
   114   }
   115 }
   117 /*
   118  *   ebp - 36:	caller's esi
   119  *   ebp - 32:	caller's edi
   120  *   ebp - 28:	
   121  *   ebp - 24:	
   122  *   ebp - 20:	
   123  *   ebp - 16:	
   124  *   ebp - 12:	
   125  *   ebp - 8:	
   126  *   ebp - 4:	
   127  *   ebp + 0:	caller's ebp
   128  *   ebp + 4:	return address
   129  *   ebp + 8:	a	argument
   130  *   ebp + 12:	a_len	argument
   131  *   ebp + 16:	b	argument
   132  *   ebp + 20:	c	argument
   133  *   registers:
   134  *  	eax:
   135  * 	ebx:	carry
   136  * 	ecx:	a_len
   137  * 	edx:
   138  * 	esi:	a ptr
   139  * 	edi:	c ptr
   140  */
   141 __declspec(naked) void
   142 s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
   143 {
   144   __asm {
   145     mov    eax, is_sse
   146     cmp    eax, 0
   147     je     s_mpv_mul_d_add_x86
   148     jg     s_mpv_mul_d_add_sse2
   149     call   s_mpi_is_sse2
   150     mov    is_sse, eax
   151     cmp    eax, 0
   152     jg     s_mpv_mul_d_add_sse2
   153 s_mpv_mul_d_add_x86:
   154     push   ebp
   155     mov    ebp,esp
   156     sub    esp,28
   157     push   edi
   158     push   esi
   159     push   ebx
   160     mov    ebx,0		; carry = 0
   161     mov    ecx,[ebp+12]		; ecx = a_len
   162     mov    edi,[ebp+20]
   163     cmp    ecx,0
   164     je     L_11			; jmp if a_len == 0
   165     mov    esi,[ebp+8]		; esi = a
   166     cld
   167 L_10:
   168     lodsd			; eax = [ds:esi]; esi += 4
   169     mov    edx,[ebp+16]		; edx = b
   170     mul    edx			; edx:eax = Phi:Plo = a_i * b
   172     add    eax,ebx		; add carry (ebx) to edx:eax
   173     adc    edx,0
   174     mov    ebx,[edi]		; add in current word from *c
   175     add    eax,ebx		
   176     adc    edx,0
   177     mov    ebx,edx		; high half of product becomes next carry
   179     stosd			; [es:edi] = ax; edi += 4;
   180     dec    ecx			; --a_len
   181     jnz    L_10			; jmp if a_len != 0
   182 L_11:
   183     mov    [edi],ebx		; *c = carry
   184     pop    ebx
   185     pop    esi
   186     pop    edi
   187     leave  
   188     ret    
   189     nop
   190 s_mpv_mul_d_add_sse2:
   191     push   ebp
   192     mov    ebp, esp
   193     push   edi
   194     push   esi
   195     psubq  mm2, mm2		; carry = 0
   196     mov    ecx, [ebp+12]	; ecx = a_len
   197     movd   mm1, [ebp+16]	; mm1 = b
   198     mov    edi, [ebp+20]
   199     cmp    ecx, 0
   200     je     L_16			; jmp if a_len == 0
   201     mov    esi, [ebp+8]		; esi = a
   202     cld
   203 L_15:
   204     movd   mm0, [esi]		; mm0 = *a++
   205     add    esi, 4
   206     pmuludq mm0, mm1		; mm0 = b * *a++
   207     paddq  mm2, mm0		; add the carry
   208     movd   mm0, [edi]
   209     paddq  mm2, mm0		; add the carry
   210     movd   [edi], mm2		; store the 32bit result
   211     add    edi, 4
   212     psrlq  mm2, 32		; save the carry
   213     dec    ecx			; --a_len
   214     jnz    L_15			; jmp if a_len != 0
   215 L_16:
   216     movd   [edi], mm2		; *c = carry
   217     emms
   218     pop    esi
   219     pop    edi
   220     leave  
   221     ret    
   222     nop
   223   }
   224 }
   226 /*
   227  *   ebp - 36:	caller's esi
   228  *   ebp - 32:	caller's edi
   229  *   ebp - 28:	
   230  *   ebp - 24:	
   231  *   ebp - 20:	
   232  *   ebp - 16:	
   233  *   ebp - 12:	
   234  *   ebp - 8:	
   235  *   ebp - 4:	
   236  *   ebp + 0:	caller's ebp
   237  *   ebp + 4:	return address
   238  *   ebp + 8:	a	argument
   239  *   ebp + 12:	a_len	argument
   240  *   ebp + 16:	b	argument
   241  *   ebp + 20:	c	argument
   242  *   registers:
   243  *  	eax:
   244  * 	ebx:	carry
   245  * 	ecx:	a_len
   246  * 	edx:
   247  * 	esi:	a ptr
   248  * 	edi:	c ptr
   249  */
   250 __declspec(naked) void
   251 s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
   252 {
   253   __asm {
   254     mov    eax, is_sse
   255     cmp    eax, 0
   256     je     s_mpv_mul_d_add_prop_x86
   257     jg     s_mpv_mul_d_add_prop_sse2
   258     call   s_mpi_is_sse2
   259     mov    is_sse, eax
   260     cmp    eax, 0
   261     jg     s_mpv_mul_d_add_prop_sse2
   262 s_mpv_mul_d_add_prop_x86:
   263     push   ebp
   264     mov    ebp,esp
   265     sub    esp,28
   266     push   edi
   267     push   esi
   268     push   ebx
   269     mov    ebx,0		; carry = 0
   270     mov    ecx,[ebp+12]		; ecx = a_len
   271     mov    edi,[ebp+20]
   272     cmp    ecx,0
   273     je     L_21			; jmp if a_len == 0
   274     cld
   275     mov    esi,[ebp+8]		; esi = a
   276 L_20:
   277     lodsd			; eax = [ds:esi]; esi += 4
   278     mov    edx,[ebp+16]		; edx = b
   279     mul    edx			; edx:eax = Phi:Plo = a_i * b
   281     add    eax,ebx		; add carry (ebx) to edx:eax
   282     adc    edx,0
   283     mov    ebx,[edi]		; add in current word from *c
   284     add    eax,ebx		
   285     adc    edx,0
   286     mov    ebx,edx		; high half of product becomes next carry
   288     stosd			; [es:edi] = ax; edi += 4;
   289     dec    ecx			; --a_len
   290     jnz    L_20			; jmp if a_len != 0
   291 L_21:
   292     cmp    ebx,0		; is carry zero?
   293     jz     L_23
   294     mov    eax,[edi]		; add in current word from *c
   295     add    eax,ebx
   296     stosd			; [es:edi] = ax; edi += 4;
   297     jnc    L_23
   298 L_22:
   299     mov    eax,[edi]		; add in current word from *c
   300     adc    eax,0
   301     stosd			; [es:edi] = ax; edi += 4;
   302     jc     L_22
   303 L_23:
   304     pop    ebx
   305     pop    esi
   306     pop    edi
   307     leave  
   308     ret    
   309     nop
   310 s_mpv_mul_d_add_prop_sse2:
   311     push   ebp
   312     mov    ebp, esp
   313     push   edi
   314     push   esi
   315     push   ebx
   316     psubq  mm2, mm2		; carry = 0
   317     mov    ecx, [ebp+12]	; ecx = a_len
   318     movd   mm1, [ebp+16]	; mm1 = b
   319     mov    edi, [ebp+20]
   320     cmp    ecx, 0
   321     je     L_26			; jmp if a_len == 0
   322     mov    esi, [ebp+8]		; esi = a
   323     cld
   324 L_25:
   325     movd   mm0, [esi]		; mm0 = *a++
   326     movd   mm3, [edi]		; fetch the sum
   327     add    esi, 4
   328     pmuludq mm0, mm1		; mm0 = b * *a++
   329     paddq  mm2, mm0		; add the carry
   330     paddq  mm2, mm3		; add *c++
   331     movd   [edi], mm2		; store the 32bit result
   332     add    edi, 4
   333     psrlq  mm2, 32		; save the carry
   334     dec    ecx			; --a_len
   335     jnz    L_25			; jmp if a_len != 0
   336 L_26:
   337     movd   ebx, mm2
   338     cmp    ebx, 0		; is carry zero?
   339     jz     L_28
   340     mov    eax, [edi]
   341     add    eax, ebx
   342     stosd
   343     jnc    L_28
   344 L_27:
   345     mov    eax, [edi]		; add in current word from *c
   346     adc	   eax, 0
   347     stosd			; [es:edi] = ax; edi += 4;
   348     jc     L_27
   349 L_28:
   350     emms
   351     pop    ebx
   352     pop    esi
   353     pop    edi
   354     leave  
   355     ret    
   356     nop
   357   }
   358 }
   360 /*
   361  *   ebp - 20:	caller's esi
   362  *   ebp - 16:	caller's edi
   363  *   ebp - 12:	
   364  *   ebp - 8:	carry
   365  *   ebp - 4:	a_len	local
   366  *   ebp + 0:	caller's ebp
   367  *   ebp + 4:	return address
   368  *   ebp + 8:	pa	argument
   369  *   ebp + 12:	a_len	argument
   370  *   ebp + 16:	ps	argument
   371  *   ebp + 20:	
   372  *   registers:
   373  *  	eax:
   374  * 	ebx:	carry
   375  * 	ecx:	a_len
   376  * 	edx:
   377  * 	esi:	a ptr
   378  * 	edi:	c ptr
   379  */
   380 __declspec(naked) void
   381 s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs)
   382 {
   383   __asm {
   384      mov    eax, is_sse
   385      cmp    eax, 0
   386      je     s_mpv_sqr_add_prop_x86
   387      jg     s_mpv_sqr_add_prop_sse2
   388      call   s_mpi_is_sse2
   389      mov    is_sse, eax
   390      cmp    eax, 0
   391      jg     s_mpv_sqr_add_prop_sse2
   392 s_mpv_sqr_add_prop_x86:
   393      push   ebp
   394      mov    ebp,esp
   395      sub    esp,12
   396      push   edi
   397      push   esi
   398      push   ebx
   399      mov    ebx,0		; carry = 0
   400      mov    ecx,[ebp+12]	; a_len
   401      mov    edi,[ebp+16]	; edi = ps
   402      cmp    ecx,0
   403      je     L_31		; jump if a_len == 0
   404      cld
   405      mov    esi,[ebp+8]		; esi = pa
   406 L_30:
   407      lodsd			; eax = [ds:si]; si += 4;
   408      mul    eax
   410      add    eax,ebx		; add "carry"
   411      adc    edx,0
   412      mov    ebx,[edi]
   413      add    eax,ebx		; add low word from result
   414      mov    ebx,[edi+4]
   415      stosd			; [es:di] = eax; di += 4;
   416      adc    edx,ebx		; add high word from result
   417      mov    ebx,0
   418      mov    eax,edx
   419      adc    ebx,0
   420      stosd			; [es:di] = eax; di += 4;
   421      dec    ecx			; --a_len
   422      jnz    L_30		; jmp if a_len != 0
   423 L_31:
   424     cmp    ebx,0		; is carry zero?
   425     jz     L_34
   426     mov    eax,[edi]		; add in current word from *c
   427     add    eax,ebx
   428     stosd			; [es:edi] = ax; edi += 4;
   429     jnc    L_34
   430 L_32:
   431     mov    eax,[edi]		; add in current word from *c
   432     adc    eax,0
   433     stosd			; [es:edi] = ax; edi += 4;
   434     jc     L_32
   435 L_34:
   436     pop    ebx
   437     pop    esi
   438     pop    edi
   439     leave  
   440     ret    
   441     nop
   442 s_mpv_sqr_add_prop_sse2:
   443     push   ebp
   444     mov    ebp, esp
   445     push   edi
   446     push   esi
   447     push   ebx
   448     psubq  mm2, mm2		; carry = 0
   449     mov    ecx, [ebp+12]	; ecx = a_len
   450     mov    edi, [ebp+16]
   451     cmp    ecx, 0
   452     je     L_36		; jmp if a_len == 0
   453     mov    esi, [ebp+8]		; esi = a
   454     cld
   455 L_35:
   456     movd   mm0, [esi]		; mm0 = *a
   457     movd   mm3, [edi]		; fetch the sum
   458     add	   esi, 4
   459     pmuludq mm0, mm0		; mm0 = sqr(a)
   460     paddq  mm2, mm0		; add the carry
   461     paddq  mm2, mm3		; add the low word
   462     movd   mm3, [edi+4]
   463     movd   [edi], mm2		; store the 32bit result
   464     psrlq  mm2, 32	
   465     paddq  mm2, mm3		; add the high word
   466     movd   [edi+4], mm2		; store the 32bit result
   467     psrlq  mm2, 32		; save the carry.
   468     add    edi, 8
   469     dec    ecx			; --a_len
   470     jnz    L_35			; jmp if a_len != 0
   471 L_36:
   472     movd   ebx, mm2
   473     cmp    ebx, 0		; is carry zero?
   474     jz     L_38
   475     mov    eax, [edi]
   476     add    eax, ebx
   477     stosd
   478     jnc    L_38
   479 L_37:
   480     mov    eax, [edi]		; add in current word from *c
   481     adc	   eax, 0
   482     stosd			; [es:edi] = ax; edi += 4;
   483     jc     L_37
   484 L_38:
   485     emms
   486     pop    ebx
   487     pop    esi
   488     pop    edi
   489     leave  
   490     ret    
   491     nop
   492   }
   493 }
   495 /* 
   496  *  Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
   497  *  so its high bit is 1.   This code is from NSPR.
   498  *
   499  *  Dump of assembler code for function s_mpv_div_2dx1d:
   500  *  
   501  *   esp +  0:   Caller's ebx
   502  *   esp +  4:	return address
   503  *   esp +  8:	Nhi	argument
   504  *   esp + 12:	Nlo	argument
   505  *   esp + 16:	divisor	argument
   506  *   esp + 20:	qp	argument
   507  *   esp + 24:   rp	argument
   508  *   registers:
   509  *  	eax:
   510  * 	ebx:	carry
   511  * 	ecx:	a_len
   512  * 	edx:
   513  * 	esi:	a ptr
   514  * 	edi:	c ptr
   515  */  
   516 __declspec(naked) mp_err
   517 s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
   518 		mp_digit *qp, mp_digit *rp)
   519 {
   520   __asm {
   521        push   ebx
   522        mov    edx,[esp+8]
   523        mov    eax,[esp+12]
   524        mov    ebx,[esp+16]
   525        div    ebx
   526        mov    ebx,[esp+20]
   527        mov    [ebx],eax
   528        mov    ebx,[esp+24]
   529        mov    [ebx],edx
   530        xor    eax,eax		; return zero
   531        pop    ebx
   532        ret    
   533        nop
   534   }
   535 }

mercurial