security/nss/lib/freebl/mpi/mpi_sse2.s

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 # This Source Code Form is subject to the terms of the Mozilla Public
michael@0 2 # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0 4
michael@0 5 #ifdef DARWIN
michael@0 6 #define s_mpv_mul_d _s_mpv_mul_d
michael@0 7 #define s_mpv_mul_d_add _s_mpv_mul_d_add
michael@0 8 #define s_mpv_mul_d_add_prop _s_mpv_mul_d_add_prop
michael@0 9 #define s_mpv_sqr_add_prop _s_mpv_sqr_add_prop
michael@0 10 #define s_mpv_div_2dx1d _s_mpv_div_2dx1d
michael@0 11 #define TYPE_FUNCTION(x)
michael@0 12 #else
michael@0 13 #define TYPE_FUNCTION(x) .type x, @function
michael@0 14 #endif
michael@0 15
michael@0 16 .text
michael@0 17
michael@0 18 # ebp - 8: caller's esi
michael@0 19 # ebp - 4: caller's edi
michael@0 20 # ebp + 0: caller's ebp
michael@0 21 # ebp + 4: return address
michael@0 22 # ebp + 8: a argument
michael@0 23 # ebp + 12: a_len argument
michael@0 24 # ebp + 16: b argument
michael@0 25 # ebp + 20: c argument
michael@0 26 # registers:
michael@0 27 # ebx:
michael@0 28 # ecx: a_len
michael@0 29 # esi: a ptr
michael@0 30 # edi: c ptr
michael@0 31 .globl s_mpv_mul_d
michael@0 32 .private_extern s_mpv_mul_d
michael@0 33 TYPE_FUNCTION(s_mpv_mul_d)
michael@0 34 s_mpv_mul_d:
michael@0 35 push %ebp
michael@0 36 mov %esp, %ebp
michael@0 37 push %edi
michael@0 38 push %esi
michael@0 39 psubq %mm2, %mm2 # carry = 0
michael@0 40 mov 12(%ebp), %ecx # ecx = a_len
michael@0 41 movd 16(%ebp), %mm1 # mm1 = b
michael@0 42 mov 20(%ebp), %edi
michael@0 43 cmp $0, %ecx
michael@0 44 je 2f # jmp if a_len == 0
michael@0 45 mov 8(%ebp), %esi # esi = a
michael@0 46 cld
michael@0 47 1:
michael@0 48 movd 0(%esi), %mm0 # mm0 = *a++
michael@0 49 add $4, %esi
michael@0 50 pmuludq %mm1, %mm0 # mm0 = b * *a++
michael@0 51 paddq %mm0, %mm2 # add the carry
michael@0 52 movd %mm2, 0(%edi) # store the 32bit result
michael@0 53 add $4, %edi
michael@0 54 psrlq $32, %mm2 # save the carry
michael@0 55 dec %ecx # --a_len
michael@0 56 jnz 1b # jmp if a_len != 0
michael@0 57 2:
michael@0 58 movd %mm2, 0(%edi) # *c = carry
michael@0 59 emms
michael@0 60 pop %esi
michael@0 61 pop %edi
michael@0 62 leave
michael@0 63 ret
michael@0 64 nop
michael@0 65
michael@0 66 # ebp - 8: caller's esi
michael@0 67 # ebp - 4: caller's edi
michael@0 68 # ebp + 0: caller's ebp
michael@0 69 # ebp + 4: return address
michael@0 70 # ebp + 8: a argument
michael@0 71 # ebp + 12: a_len argument
michael@0 72 # ebp + 16: b argument
michael@0 73 # ebp + 20: c argument
michael@0 74 # registers:
michael@0 75 # ebx:
michael@0 76 # ecx: a_len
michael@0 77 # esi: a ptr
michael@0 78 # edi: c ptr
michael@0 79 .globl s_mpv_mul_d_add
michael@0 80 .private_extern s_mpv_mul_d_add
michael@0 81 TYPE_FUNCTION(s_mpv_mul_d_add)
michael@0 82 s_mpv_mul_d_add:
michael@0 83 push %ebp
michael@0 84 mov %esp, %ebp
michael@0 85 push %edi
michael@0 86 push %esi
michael@0 87 psubq %mm2, %mm2 # carry = 0
michael@0 88 mov 12(%ebp), %ecx # ecx = a_len
michael@0 89 movd 16(%ebp), %mm1 # mm1 = b
michael@0 90 mov 20(%ebp), %edi
michael@0 91 cmp $0, %ecx
michael@0 92 je 2f # jmp if a_len == 0
michael@0 93 mov 8(%ebp), %esi # esi = a
michael@0 94 cld
michael@0 95 1:
michael@0 96 movd 0(%esi), %mm0 # mm0 = *a++
michael@0 97 add $4, %esi
michael@0 98 pmuludq %mm1, %mm0 # mm0 = b * *a++
michael@0 99 paddq %mm0, %mm2 # add the carry
michael@0 100 movd 0(%edi), %mm0
michael@0 101 paddq %mm0, %mm2 # add the carry
michael@0 102 movd %mm2, 0(%edi) # store the 32bit result
michael@0 103 add $4, %edi
michael@0 104 psrlq $32, %mm2 # save the carry
michael@0 105 dec %ecx # --a_len
michael@0 106 jnz 1b # jmp if a_len != 0
michael@0 107 2:
michael@0 108 movd %mm2, 0(%edi) # *c = carry
michael@0 109 emms
michael@0 110 pop %esi
michael@0 111 pop %edi
michael@0 112 leave
michael@0 113 ret
michael@0 114 nop
michael@0 115
michael@0 116 # ebp - 12: caller's ebx
michael@0 117 # ebp - 8: caller's esi
michael@0 118 # ebp - 4: caller's edi
michael@0 119 # ebp + 0: caller's ebp
michael@0 120 # ebp + 4: return address
michael@0 121 # ebp + 8: a argument
michael@0 122 # ebp + 12: a_len argument
michael@0 123 # ebp + 16: b argument
michael@0 124 # ebp + 20: c argument
michael@0 125 # registers:
michael@0 126 # eax:
michael@0 127 # ebx: carry
michael@0 128 # ecx: a_len
michael@0 129 # esi: a ptr
michael@0 130 # edi: c ptr
michael@0 131 .globl s_mpv_mul_d_add_prop
michael@0 132 .private_extern s_mpv_mul_d_add_prop
michael@0 133 TYPE_FUNCTION(s_mpv_mul_d_add_prop)
michael@0 134 s_mpv_mul_d_add_prop:
michael@0 135 push %ebp
michael@0 136 mov %esp, %ebp
michael@0 137 push %edi
michael@0 138 push %esi
michael@0 139 push %ebx
michael@0 140 psubq %mm2, %mm2 # carry = 0
michael@0 141 mov 12(%ebp), %ecx # ecx = a_len
michael@0 142 movd 16(%ebp), %mm1 # mm1 = b
michael@0 143 mov 20(%ebp), %edi
michael@0 144 cmp $0, %ecx
michael@0 145 je 2f # jmp if a_len == 0
michael@0 146 mov 8(%ebp), %esi # esi = a
michael@0 147 cld
michael@0 148 1:
michael@0 149 movd 0(%esi), %mm0 # mm0 = *a++
michael@0 150 movd 0(%edi), %mm3 # fetch the sum
michael@0 151 add $4, %esi
michael@0 152 pmuludq %mm1, %mm0 # mm0 = b * *a++
michael@0 153 paddq %mm0, %mm2 # add the carry
michael@0 154 paddq %mm3, %mm2 # add *c++
michael@0 155 movd %mm2, 0(%edi) # store the 32bit result
michael@0 156 add $4, %edi
michael@0 157 psrlq $32, %mm2 # save the carry
michael@0 158 dec %ecx # --a_len
michael@0 159 jnz 1b # jmp if a_len != 0
michael@0 160 2:
michael@0 161 movd %mm2, %ebx
michael@0 162 cmp $0, %ebx # is carry zero?
michael@0 163 jz 4f
michael@0 164 mov 0(%edi), %eax
michael@0 165 add %ebx, %eax
michael@0 166 stosl
michael@0 167 jnc 4f
michael@0 168 3:
michael@0 169 mov 0(%edi), %eax # add in current word from *c
michael@0 170 adc $0, %eax
michael@0 171 stosl # [es:edi] = ax; edi += 4;
michael@0 172 jc 3b
michael@0 173 4:
michael@0 174 emms
michael@0 175 pop %ebx
michael@0 176 pop %esi
michael@0 177 pop %edi
michael@0 178 leave
michael@0 179 ret
michael@0 180 nop
michael@0 181
michael@0 182 # ebp - 12: caller's ebx
michael@0 183 # ebp - 8: caller's esi
michael@0 184 # ebp - 4: caller's edi
michael@0 185 # ebp + 0: caller's ebp
michael@0 186 # ebp + 4: return address
michael@0 187 # ebp + 8: pa argument
michael@0 188 # ebp + 12: a_len argument
michael@0 189 # ebp + 16: ps argument
michael@0 190 # registers:
michael@0 191 # eax:
michael@0 192 # ebx: carry
michael@0 193 # ecx: a_len
michael@0 194 # esi: a ptr
michael@0 195 # edi: c ptr
michael@0 196 .globl s_mpv_sqr_add_prop
michael@0 197 .private_extern s_mpv_sqr_add_prop
michael@0 198 TYPE_FUNCTION(s_mpv_sqr_add_prop)
michael@0 199 s_mpv_sqr_add_prop:
michael@0 200 push %ebp
michael@0 201 mov %esp, %ebp
michael@0 202 push %edi
michael@0 203 push %esi
michael@0 204 push %ebx
michael@0 205 psubq %mm2, %mm2 # carry = 0
michael@0 206 mov 12(%ebp), %ecx # ecx = a_len
michael@0 207 mov 16(%ebp), %edi
michael@0 208 cmp $0, %ecx
michael@0 209 je 2f # jmp if a_len == 0
michael@0 210 mov 8(%ebp), %esi # esi = a
michael@0 211 cld
michael@0 212 1:
michael@0 213 movd 0(%esi), %mm0 # mm0 = *a
michael@0 214 movd 0(%edi), %mm3 # fetch the sum
michael@0 215 add $4, %esi
michael@0 216 pmuludq %mm0, %mm0 # mm0 = sqr(a)
michael@0 217 paddq %mm0, %mm2 # add the carry
michael@0 218 paddq %mm3, %mm2 # add the low word
michael@0 219 movd 4(%edi), %mm3
michael@0 220 movd %mm2, 0(%edi) # store the 32bit result
michael@0 221 psrlq $32, %mm2
michael@0 222 paddq %mm3, %mm2 # add the high word
michael@0 223 movd %mm2, 4(%edi) # store the 32bit result
michael@0 224 psrlq $32, %mm2 # save the carry.
michael@0 225 add $8, %edi
michael@0 226 dec %ecx # --a_len
michael@0 227 jnz 1b # jmp if a_len != 0
michael@0 228 2:
michael@0 229 movd %mm2, %ebx
michael@0 230 cmp $0, %ebx # is carry zero?
michael@0 231 jz 4f
michael@0 232 mov 0(%edi), %eax
michael@0 233 add %ebx, %eax
michael@0 234 stosl
michael@0 235 jnc 4f
michael@0 236 3:
michael@0 237 mov 0(%edi), %eax # add in current word from *c
michael@0 238 adc $0, %eax
michael@0 239 stosl # [es:edi] = ax; edi += 4;
michael@0 240 jc 3b
michael@0 241 4:
michael@0 242 emms
michael@0 243 pop %ebx
michael@0 244 pop %esi
michael@0 245 pop %edi
michael@0 246 leave
michael@0 247 ret
michael@0 248 nop
michael@0 249
michael@0 250 #
michael@0 251 # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
michael@0 252 # so its high bit is 1. This code is from NSPR.
michael@0 253 #
michael@0 254 # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
michael@0 255 # mp_digit *qp, mp_digit *rp)
michael@0 256
michael@0 257 # esp + 0: Caller's ebx
michael@0 258 # esp + 4: return address
michael@0 259 # esp + 8: Nhi argument
michael@0 260 # esp + 12: Nlo argument
michael@0 261 # esp + 16: divisor argument
michael@0 262 # esp + 20: qp argument
michael@0 263 # esp + 24: rp argument
michael@0 264 # registers:
michael@0 265 # eax:
michael@0 266 # ebx: carry
michael@0 267 # ecx: a_len
michael@0 268 # edx:
michael@0 269 # esi: a ptr
michael@0 270 # edi: c ptr
michael@0 271 #
michael@0 272 .globl s_mpv_div_2dx1d
michael@0 273 .private_extern s_mpv_div_2dx1d
michael@0 274 TYPE_FUNCTION(s_mpv_div_2dx1d)
michael@0 275 s_mpv_div_2dx1d:
michael@0 276 push %ebx
michael@0 277 mov 8(%esp), %edx
michael@0 278 mov 12(%esp), %eax
michael@0 279 mov 16(%esp), %ebx
michael@0 280 div %ebx
michael@0 281 mov 20(%esp), %ebx
michael@0 282 mov %eax, 0(%ebx)
michael@0 283 mov 24(%esp), %ebx
michael@0 284 mov %edx, 0(%ebx)
michael@0 285 xor %eax, %eax # return zero
michael@0 286 pop %ebx
michael@0 287 ret
michael@0 288 nop
michael@0 289
michael@0 290 #ifndef DARWIN
michael@0 291 # Magic indicating no need for an executable stack
michael@0 292 .section .note.GNU-stack, "", @progbits
michael@0 293 .previous
michael@0 294 #endif

mercurial