media/libvpx/vp8/encoder/x86/encodeopt.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 ;
michael@0 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
michael@0 3 ;
michael@0 4 ; Use of this source code is governed by a BSD-style license
michael@0 5 ; that can be found in the LICENSE file in the root of the source
michael@0 6 ; tree. An additional intellectual property rights grant can be found
michael@0 7 ; in the file PATENTS. All contributing project authors may
michael@0 8 ; be found in the AUTHORS file in the root of the source tree.
michael@0 9 ;
michael@0 10
michael@0 11
michael@0 12 %include "vpx_ports/x86_abi_support.asm"
michael@0 13
michael@0 14 ;int vp8_block_error_xmm(short *coeff_ptr, short *dcoef_ptr)
michael@0 15 global sym(vp8_block_error_xmm) PRIVATE
michael@0 16 sym(vp8_block_error_xmm):
michael@0 17 push rbp
michael@0 18 mov rbp, rsp
michael@0 19 SHADOW_ARGS_TO_STACK 2
michael@0 20 push rsi
michael@0 21 push rdi
michael@0 22 ; end prologue
michael@0 23
michael@0 24 mov rsi, arg(0) ;coeff_ptr
michael@0 25 mov rdi, arg(1) ;dcoef_ptr
michael@0 26
michael@0 27 movdqa xmm0, [rsi]
michael@0 28 movdqa xmm1, [rdi]
michael@0 29
michael@0 30 movdqa xmm2, [rsi+16]
michael@0 31 movdqa xmm3, [rdi+16]
michael@0 32
michael@0 33 psubw xmm0, xmm1
michael@0 34 psubw xmm2, xmm3
michael@0 35
michael@0 36 pmaddwd xmm0, xmm0
michael@0 37 pmaddwd xmm2, xmm2
michael@0 38
michael@0 39 paddd xmm0, xmm2
michael@0 40
michael@0 41 pxor xmm5, xmm5
michael@0 42 movdqa xmm1, xmm0
michael@0 43
michael@0 44 punpckldq xmm0, xmm5
michael@0 45 punpckhdq xmm1, xmm5
michael@0 46
michael@0 47 paddd xmm0, xmm1
michael@0 48 movdqa xmm1, xmm0
michael@0 49
michael@0 50 psrldq xmm0, 8
michael@0 51 paddd xmm0, xmm1
michael@0 52
michael@0 53 movq rax, xmm0
michael@0 54
michael@0 55 pop rdi
michael@0 56 pop rsi
michael@0 57 ; begin epilog
michael@0 58 UNSHADOW_ARGS
michael@0 59 pop rbp
michael@0 60 ret
michael@0 61
michael@0 62 ;int vp8_block_error_mmx(short *coeff_ptr, short *dcoef_ptr)
michael@0 63 global sym(vp8_block_error_mmx) PRIVATE
michael@0 64 sym(vp8_block_error_mmx):
michael@0 65 push rbp
michael@0 66 mov rbp, rsp
michael@0 67 SHADOW_ARGS_TO_STACK 2
michael@0 68 push rsi
michael@0 69 push rdi
michael@0 70 ; end prolog
michael@0 71
michael@0 72
michael@0 73 mov rsi, arg(0) ;coeff_ptr
michael@0 74 pxor mm7, mm7
michael@0 75
michael@0 76 mov rdi, arg(1) ;dcoef_ptr
michael@0 77 movq mm3, [rsi]
michael@0 78
michael@0 79 movq mm4, [rdi]
michael@0 80 movq mm5, [rsi+8]
michael@0 81
michael@0 82 movq mm6, [rdi+8]
michael@0 83 pxor mm1, mm1 ; from movd mm1, dc ; dc =0
michael@0 84
michael@0 85 movq mm2, mm7
michael@0 86 psubw mm5, mm6
michael@0 87
michael@0 88 por mm1, mm2
michael@0 89 pmaddwd mm5, mm5
michael@0 90
michael@0 91 pcmpeqw mm1, mm7
michael@0 92 psubw mm3, mm4
michael@0 93
michael@0 94 pand mm1, mm3
michael@0 95 pmaddwd mm1, mm1
michael@0 96
michael@0 97 paddd mm1, mm5
michael@0 98 movq mm3, [rsi+16]
michael@0 99
michael@0 100 movq mm4, [rdi+16]
michael@0 101 movq mm5, [rsi+24]
michael@0 102
michael@0 103 movq mm6, [rdi+24]
michael@0 104 psubw mm5, mm6
michael@0 105
michael@0 106 pmaddwd mm5, mm5
michael@0 107 psubw mm3, mm4
michael@0 108
michael@0 109 pmaddwd mm3, mm3
michael@0 110 paddd mm3, mm5
michael@0 111
michael@0 112 paddd mm1, mm3
michael@0 113 movq mm0, mm1
michael@0 114
michael@0 115 psrlq mm1, 32
michael@0 116 paddd mm0, mm1
michael@0 117
michael@0 118 movq rax, mm0
michael@0 119
michael@0 120 pop rdi
michael@0 121 pop rsi
michael@0 122 ; begin epilog
michael@0 123 UNSHADOW_ARGS
michael@0 124 pop rbp
michael@0 125 ret
michael@0 126
michael@0 127
michael@0 128 ;int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
michael@0 129 global sym(vp8_mbblock_error_mmx_impl) PRIVATE
michael@0 130 sym(vp8_mbblock_error_mmx_impl):
michael@0 131 push rbp
michael@0 132 mov rbp, rsp
michael@0 133 SHADOW_ARGS_TO_STACK 3
michael@0 134 push rsi
michael@0 135 push rdi
michael@0 136 ; end prolog
michael@0 137
michael@0 138
michael@0 139 mov rsi, arg(0) ;coeff_ptr
michael@0 140 pxor mm7, mm7
michael@0 141
michael@0 142 mov rdi, arg(1) ;dcoef_ptr
michael@0 143 pxor mm2, mm2
michael@0 144
michael@0 145 movd mm1, dword ptr arg(2) ;dc
michael@0 146 por mm1, mm2
michael@0 147
michael@0 148 pcmpeqw mm1, mm7
michael@0 149 mov rcx, 16
michael@0 150
michael@0 151 .mberror_loop_mmx:
michael@0 152 movq mm3, [rsi]
michael@0 153 movq mm4, [rdi]
michael@0 154
michael@0 155 movq mm5, [rsi+8]
michael@0 156 movq mm6, [rdi+8]
michael@0 157
michael@0 158
michael@0 159 psubw mm5, mm6
michael@0 160 pmaddwd mm5, mm5
michael@0 161
michael@0 162 psubw mm3, mm4
michael@0 163 pand mm3, mm1
michael@0 164
michael@0 165 pmaddwd mm3, mm3
michael@0 166 paddd mm2, mm5
michael@0 167
michael@0 168 paddd mm2, mm3
michael@0 169 movq mm3, [rsi+16]
michael@0 170
michael@0 171 movq mm4, [rdi+16]
michael@0 172 movq mm5, [rsi+24]
michael@0 173
michael@0 174 movq mm6, [rdi+24]
michael@0 175 psubw mm5, mm6
michael@0 176
michael@0 177 pmaddwd mm5, mm5
michael@0 178 psubw mm3, mm4
michael@0 179
michael@0 180 pmaddwd mm3, mm3
michael@0 181 paddd mm2, mm5
michael@0 182
michael@0 183 paddd mm2, mm3
michael@0 184 add rsi, 32
michael@0 185
michael@0 186 add rdi, 32
michael@0 187 sub rcx, 1
michael@0 188
michael@0 189 jnz .mberror_loop_mmx
michael@0 190
michael@0 191 movq mm0, mm2
michael@0 192 psrlq mm2, 32
michael@0 193
michael@0 194 paddd mm0, mm2
michael@0 195 movq rax, mm0
michael@0 196
michael@0 197 pop rdi
michael@0 198 pop rsi
michael@0 199 ; begin epilog
michael@0 200 UNSHADOW_ARGS
michael@0 201 pop rbp
michael@0 202 ret
michael@0 203
michael@0 204
michael@0 205 ;int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
michael@0 206 global sym(vp8_mbblock_error_xmm_impl) PRIVATE
michael@0 207 sym(vp8_mbblock_error_xmm_impl):
michael@0 208 push rbp
michael@0 209 mov rbp, rsp
michael@0 210 SHADOW_ARGS_TO_STACK 3
michael@0 211 SAVE_XMM 6
michael@0 212 push rsi
michael@0 213 push rdi
michael@0 214 ; end prolog
michael@0 215
michael@0 216
michael@0 217 mov rsi, arg(0) ;coeff_ptr
michael@0 218 pxor xmm6, xmm6
michael@0 219
michael@0 220 mov rdi, arg(1) ;dcoef_ptr
michael@0 221 pxor xmm4, xmm4
michael@0 222
michael@0 223 movd xmm5, dword ptr arg(2) ;dc
michael@0 224 por xmm5, xmm4
michael@0 225
michael@0 226 pcmpeqw xmm5, xmm6
michael@0 227 mov rcx, 16
michael@0 228
michael@0 229 .mberror_loop:
michael@0 230 movdqa xmm0, [rsi]
michael@0 231 movdqa xmm1, [rdi]
michael@0 232
michael@0 233 movdqa xmm2, [rsi+16]
michael@0 234 movdqa xmm3, [rdi+16]
michael@0 235
michael@0 236
michael@0 237 psubw xmm2, xmm3
michael@0 238 pmaddwd xmm2, xmm2
michael@0 239
michael@0 240 psubw xmm0, xmm1
michael@0 241 pand xmm0, xmm5
michael@0 242
michael@0 243 pmaddwd xmm0, xmm0
michael@0 244 add rsi, 32
michael@0 245
michael@0 246 add rdi, 32
michael@0 247
michael@0 248 sub rcx, 1
michael@0 249 paddd xmm4, xmm2
michael@0 250
michael@0 251 paddd xmm4, xmm0
michael@0 252 jnz .mberror_loop
michael@0 253
michael@0 254 movdqa xmm0, xmm4
michael@0 255 punpckldq xmm0, xmm6
michael@0 256
michael@0 257 punpckhdq xmm4, xmm6
michael@0 258 paddd xmm0, xmm4
michael@0 259
michael@0 260 movdqa xmm1, xmm0
michael@0 261 psrldq xmm0, 8
michael@0 262
michael@0 263 paddd xmm0, xmm1
michael@0 264 movq rax, xmm0
michael@0 265
michael@0 266 pop rdi
michael@0 267 pop rsi
michael@0 268 ; begin epilog
michael@0 269 RESTORE_XMM
michael@0 270 UNSHADOW_ARGS
michael@0 271 pop rbp
michael@0 272 ret
michael@0 273
michael@0 274
michael@0 275 ;int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
michael@0 276 global sym(vp8_mbuverror_mmx_impl) PRIVATE
michael@0 277 sym(vp8_mbuverror_mmx_impl):
michael@0 278 push rbp
michael@0 279 mov rbp, rsp
michael@0 280 SHADOW_ARGS_TO_STACK 2
michael@0 281 push rsi
michael@0 282 push rdi
michael@0 283 ; end prolog
michael@0 284
michael@0 285
michael@0 286 mov rsi, arg(0) ;s_ptr
michael@0 287 mov rdi, arg(1) ;d_ptr
michael@0 288
michael@0 289 mov rcx, 16
michael@0 290 pxor mm7, mm7
michael@0 291
michael@0 292 .mbuverror_loop_mmx:
michael@0 293
michael@0 294 movq mm1, [rsi]
michael@0 295 movq mm2, [rdi]
michael@0 296
michael@0 297 psubw mm1, mm2
michael@0 298 pmaddwd mm1, mm1
michael@0 299
michael@0 300
michael@0 301 movq mm3, [rsi+8]
michael@0 302 movq mm4, [rdi+8]
michael@0 303
michael@0 304 psubw mm3, mm4
michael@0 305 pmaddwd mm3, mm3
michael@0 306
michael@0 307
michael@0 308 paddd mm7, mm1
michael@0 309 paddd mm7, mm3
michael@0 310
michael@0 311
michael@0 312 add rsi, 16
michael@0 313 add rdi, 16
michael@0 314
michael@0 315 dec rcx
michael@0 316 jnz .mbuverror_loop_mmx
michael@0 317
michael@0 318 movq mm0, mm7
michael@0 319 psrlq mm7, 32
michael@0 320
michael@0 321 paddd mm0, mm7
michael@0 322 movq rax, mm0
michael@0 323
michael@0 324 pop rdi
michael@0 325 pop rsi
michael@0 326 ; begin epilog
michael@0 327 UNSHADOW_ARGS
michael@0 328 pop rbp
michael@0 329 ret
michael@0 330
michael@0 331
michael@0 332 ;int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);
michael@0 333 global sym(vp8_mbuverror_xmm_impl) PRIVATE
michael@0 334 sym(vp8_mbuverror_xmm_impl):
michael@0 335 push rbp
michael@0 336 mov rbp, rsp
michael@0 337 SHADOW_ARGS_TO_STACK 2
michael@0 338 push rsi
michael@0 339 push rdi
michael@0 340 ; end prolog
michael@0 341
michael@0 342
michael@0 343 mov rsi, arg(0) ;s_ptr
michael@0 344 mov rdi, arg(1) ;d_ptr
michael@0 345
michael@0 346 mov rcx, 16
michael@0 347 pxor xmm3, xmm3
michael@0 348
michael@0 349 .mbuverror_loop:
michael@0 350
michael@0 351 movdqa xmm1, [rsi]
michael@0 352 movdqa xmm2, [rdi]
michael@0 353
michael@0 354 psubw xmm1, xmm2
michael@0 355 pmaddwd xmm1, xmm1
michael@0 356
michael@0 357 paddd xmm3, xmm1
michael@0 358
michael@0 359 add rsi, 16
michael@0 360 add rdi, 16
michael@0 361
michael@0 362 dec rcx
michael@0 363 jnz .mbuverror_loop
michael@0 364
michael@0 365 pxor xmm0, xmm0
michael@0 366 movdqa xmm1, xmm3
michael@0 367
michael@0 368 movdqa xmm2, xmm1
michael@0 369 punpckldq xmm1, xmm0
michael@0 370
michael@0 371 punpckhdq xmm2, xmm0
michael@0 372 paddd xmm1, xmm2
michael@0 373
michael@0 374 movdqa xmm2, xmm1
michael@0 375
michael@0 376 psrldq xmm1, 8
michael@0 377 paddd xmm1, xmm2
michael@0 378
michael@0 379 movq rax, xmm1
michael@0 380
michael@0 381 pop rdi
michael@0 382 pop rsi
michael@0 383 ; begin epilog
michael@0 384 UNSHADOW_ARGS
michael@0 385 pop rbp
michael@0 386 ret

mercurial