security/nss/lib/freebl/mpi/mpi_sse2.s

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 # This Source Code Form is subject to the terms of the Mozilla Public
     2 # License, v. 2.0. If a copy of the MPL was not distributed with this
     3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
     5 #ifdef DARWIN
     6 #define s_mpv_mul_d          _s_mpv_mul_d
     7 #define s_mpv_mul_d_add      _s_mpv_mul_d_add
     8 #define s_mpv_mul_d_add_prop _s_mpv_mul_d_add_prop
     9 #define s_mpv_sqr_add_prop   _s_mpv_sqr_add_prop
    10 #define s_mpv_div_2dx1d      _s_mpv_div_2dx1d
    11 #define TYPE_FUNCTION(x)
    12 #else
    13 #define TYPE_FUNCTION(x) .type x, @function
    14 #endif
    16 .text
    18  #  ebp - 8:    caller's esi
    19  #  ebp - 4:    caller's edi
    20  #  ebp + 0:    caller's ebp
    21  #  ebp + 4:    return address
    22  #  ebp + 8:    a       argument
    23  #  ebp + 12:   a_len   argument
    24  #  ebp + 16:   b       argument
    25  #  ebp + 20:   c       argument
    26  #  registers:
    27  #      ebx:
    28  #      ecx:    a_len
    29  #      esi:    a ptr
    30  #      edi:    c ptr
    31 .globl s_mpv_mul_d
    32 .private_extern s_mpv_mul_d
    33 TYPE_FUNCTION(s_mpv_mul_d)
    34 s_mpv_mul_d:
    35     push   %ebp
    36     mov    %esp, %ebp
    37     push   %edi
    38     push   %esi
    39     psubq  %mm2, %mm2           # carry = 0
    40     mov    12(%ebp), %ecx       # ecx = a_len
    41     movd   16(%ebp), %mm1       # mm1 = b
    42     mov    20(%ebp), %edi
    43     cmp    $0, %ecx
    44     je     2f                   # jmp if a_len == 0
    45     mov    8(%ebp), %esi        # esi = a
    46     cld
    47 1:
    48     movd   0(%esi), %mm0        # mm0 = *a++
    49     add    $4, %esi
    50     pmuludq %mm1, %mm0          # mm0 = b * *a++
    51     paddq  %mm0, %mm2           # add the carry
    52     movd   %mm2, 0(%edi)        # store the 32bit result
    53     add    $4, %edi
    54     psrlq  $32, %mm2            # save the carry
    55     dec    %ecx                 # --a_len
    56     jnz    1b                   # jmp if a_len != 0
    57 2:
    58     movd   %mm2, 0(%edi)        # *c = carry
    59     emms
    60     pop    %esi
    61     pop    %edi
    62     leave  
    63     ret    
    64     nop
    66  #  ebp - 8:    caller's esi
    67  #  ebp - 4:    caller's edi
    68  #  ebp + 0:    caller's ebp
    69  #  ebp + 4:    return address
    70  #  ebp + 8:    a       argument
    71  #  ebp + 12:   a_len   argument
    72  #  ebp + 16:   b       argument
    73  #  ebp + 20:   c       argument
    74  #  registers:
    75  #      ebx:
    76  #      ecx:    a_len
    77  #      esi:    a ptr
    78  #      edi:    c ptr
    79 .globl s_mpv_mul_d_add
    80 .private_extern s_mpv_mul_d_add
    81 TYPE_FUNCTION(s_mpv_mul_d_add)
    82 s_mpv_mul_d_add:
    83     push   %ebp
    84     mov    %esp, %ebp
    85     push   %edi
    86     push   %esi
    87     psubq  %mm2, %mm2           # carry = 0
    88     mov    12(%ebp), %ecx       # ecx = a_len
    89     movd   16(%ebp), %mm1       # mm1 = b
    90     mov    20(%ebp), %edi
    91     cmp    $0, %ecx
    92     je     2f                   # jmp if a_len == 0
    93     mov    8(%ebp), %esi        # esi = a
    94     cld
    95 1:
    96     movd   0(%esi), %mm0        # mm0 = *a++
    97     add    $4, %esi
    98     pmuludq %mm1, %mm0          # mm0 = b * *a++
    99     paddq  %mm0, %mm2           # add the carry
   100     movd   0(%edi), %mm0
   101     paddq  %mm0, %mm2           # add the carry
   102     movd   %mm2, 0(%edi)        # store the 32bit result
   103     add    $4, %edi
   104     psrlq  $32, %mm2            # save the carry
   105     dec    %ecx                 # --a_len
   106     jnz    1b                   # jmp if a_len != 0
   107 2:
   108     movd   %mm2, 0(%edi)        # *c = carry
   109     emms
   110     pop    %esi
   111     pop    %edi
   112     leave  
   113     ret    
   114     nop
   116  #  ebp - 12:   caller's ebx
   117  #  ebp - 8:    caller's esi
   118  #  ebp - 4:    caller's edi
   119  #  ebp + 0:    caller's ebp
   120  #  ebp + 4:    return address
   121  #  ebp + 8:    a       argument
   122  #  ebp + 12:   a_len   argument
   123  #  ebp + 16:   b       argument
   124  #  ebp + 20:   c       argument
   125  #  registers:
   126  #      eax:
   127  #      ebx:    carry
   128  #      ecx:    a_len
   129  #      esi:    a ptr
   130  #      edi:    c ptr
   131 .globl s_mpv_mul_d_add_prop
   132 .private_extern s_mpv_mul_d_add_prop
   133 TYPE_FUNCTION(s_mpv_mul_d_add_prop)
   134 s_mpv_mul_d_add_prop:
   135     push   %ebp
   136     mov    %esp, %ebp
   137     push   %edi
   138     push   %esi
   139     push   %ebx
   140     psubq  %mm2, %mm2           # carry = 0
   141     mov    12(%ebp), %ecx       # ecx = a_len
   142     movd   16(%ebp), %mm1       # mm1 = b
   143     mov    20(%ebp), %edi
   144     cmp    $0, %ecx
   145     je     2f                   # jmp if a_len == 0
   146     mov    8(%ebp), %esi        # esi = a
   147     cld
   148 1:
   149     movd   0(%esi), %mm0        # mm0 = *a++
   150     movd   0(%edi), %mm3        # fetch the sum
   151     add    $4, %esi
   152     pmuludq %mm1, %mm0          # mm0 = b * *a++
   153     paddq  %mm0, %mm2           # add the carry
   154     paddq  %mm3, %mm2           # add *c++
   155     movd   %mm2, 0(%edi)        # store the 32bit result
   156     add    $4, %edi
   157     psrlq  $32, %mm2            # save the carry
   158     dec    %ecx                 # --a_len
   159     jnz    1b                   # jmp if a_len != 0
   160 2:
   161     movd   %mm2, %ebx
   162     cmp    $0, %ebx             # is carry zero?
   163     jz     4f
   164     mov    0(%edi), %eax
   165     add    %ebx, %eax
   166     stosl
   167     jnc    4f
   168 3:
   169     mov    0(%edi), %eax        # add in current word from *c
   170     adc    $0, %eax
   171     stosl                       # [es:edi] = ax; edi += 4;
   172     jc     3b
   173 4:
   174     emms
   175     pop    %ebx
   176     pop    %esi
   177     pop    %edi
   178     leave  
   179     ret    
   180     nop
   182  #  ebp - 12:   caller's ebx
   183  #  ebp - 8:    caller's esi
   184  #  ebp - 4:    caller's edi
   185  #  ebp + 0:    caller's ebp
   186  #  ebp + 4:    return address
   187  #  ebp + 8:    pa      argument
   188  #  ebp + 12:   a_len   argument
   189  #  ebp + 16:   ps      argument
   190  #  registers:
   191  #      eax:
   192  #      ebx:    carry
   193  #      ecx:    a_len
   194  #      esi:    a ptr
   195  #      edi:    c ptr
   196 .globl s_mpv_sqr_add_prop
   197 .private_extern s_mpv_sqr_add_prop
   198 TYPE_FUNCTION(s_mpv_sqr_add_prop)
   199 s_mpv_sqr_add_prop:
   200     push   %ebp
   201     mov    %esp, %ebp
   202     push   %edi
   203     push   %esi
   204     push   %ebx
   205     psubq  %mm2, %mm2           # carry = 0
   206     mov    12(%ebp), %ecx       # ecx = a_len
   207     mov    16(%ebp), %edi
   208     cmp    $0, %ecx
   209     je     2f                   # jmp if a_len == 0
   210     mov    8(%ebp), %esi        # esi = a
   211     cld
   212 1:
   213     movd   0(%esi), %mm0        # mm0 = *a
   214     movd   0(%edi), %mm3        # fetch the sum
   215     add    $4, %esi
   216     pmuludq %mm0, %mm0          # mm0 = sqr(a)
   217     paddq  %mm0, %mm2           # add the carry
   218     paddq  %mm3, %mm2           # add the low word
   219     movd   4(%edi), %mm3
   220     movd   %mm2, 0(%edi)        # store the 32bit result
   221     psrlq  $32, %mm2
   222     paddq  %mm3, %mm2           # add the high word
   223     movd   %mm2, 4(%edi)        # store the 32bit result
   224     psrlq  $32, %mm2            # save the carry.
   225     add    $8, %edi
   226     dec    %ecx                 # --a_len
   227     jnz    1b                   # jmp if a_len != 0
   228 2:
   229     movd   %mm2, %ebx
   230     cmp    $0, %ebx             # is carry zero?
   231     jz     4f
   232     mov    0(%edi), %eax
   233     add    %ebx, %eax
   234     stosl
   235     jnc    4f
   236 3:
   237     mov    0(%edi), %eax        # add in current word from *c
   238     adc    $0, %eax
   239     stosl                       #  [es:edi] = ax; edi += 4;
   240     jc     3b
   241 4:
   242     emms
   243     pop    %ebx
   244     pop    %esi
   245     pop    %edi
   246     leave  
   247     ret    
   248     nop
   250  #
   251  # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
   252  # so its high bit is 1.   This code is from NSPR.
   253  #
   254  # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
   255  #                        mp_digit *qp, mp_digit *rp)
   257  #  esp +  0:   Caller's ebx
   258  #  esp +  4:   return address
   259  #  esp +  8:   Nhi     argument
   260  #  esp + 12:   Nlo     argument
   261  #  esp + 16:   divisor argument
   262  #  esp + 20:   qp      argument
   263  #  esp + 24:   rp      argument
   264  #  registers:
   265  #      eax:
   266  #      ebx:    carry
   267  #      ecx:    a_len
   268  #      edx:
   269  #      esi:    a ptr
   270  #      edi:    c ptr
   271  # 
   272 .globl s_mpv_div_2dx1d
   273 .private_extern s_mpv_div_2dx1d
   274 TYPE_FUNCTION(s_mpv_div_2dx1d)
   275 s_mpv_div_2dx1d:
   276        push   %ebx
   277        mov    8(%esp), %edx
   278        mov    12(%esp), %eax
   279        mov    16(%esp), %ebx
   280        div    %ebx
   281        mov    20(%esp), %ebx
   282        mov    %eax, 0(%ebx)
   283        mov    24(%esp), %ebx
   284        mov    %edx, 0(%ebx)
   285        xor    %eax, %eax        # return zero
   286        pop    %ebx
   287        ret    
   288        nop
   290 #ifndef DARWIN
   291  # Magic indicating no need for an executable stack
   292 .section .note.GNU-stack, "", @progbits
   293 .previous
   294 #endif

mercurial