media/libvpx/vp8/common/x86/sad_sse2.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 ;
michael@0 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
michael@0 3 ;
michael@0 4 ; Use of this source code is governed by a BSD-style license
michael@0 5 ; that can be found in the LICENSE file in the root of the source
michael@0 6 ; tree. An additional intellectual property rights grant can be found
michael@0 7 ; in the file PATENTS. All contributing project authors may
michael@0 8 ; be found in the AUTHORS file in the root of the source tree.
michael@0 9 ;
michael@0 10
michael@0 11
michael@0 12 %include "vpx_ports/x86_abi_support.asm"
michael@0 13
michael@0 14 ;unsigned int vp8_sad16x16_wmt(
michael@0 15 ; unsigned char *src_ptr,
michael@0 16 ; int src_stride,
michael@0 17 ; unsigned char *ref_ptr,
michael@0 18 ; int ref_stride)
michael@0 19 global sym(vp8_sad16x16_wmt) PRIVATE
michael@0 20 sym(vp8_sad16x16_wmt):
michael@0 21 push rbp
michael@0 22 mov rbp, rsp
michael@0 23 SHADOW_ARGS_TO_STACK 4
michael@0 24 SAVE_XMM 6
michael@0 25 push rsi
michael@0 26 push rdi
michael@0 27 ; end prolog
michael@0 28
michael@0 29 mov rsi, arg(0) ;src_ptr
michael@0 30 mov rdi, arg(2) ;ref_ptr
michael@0 31
michael@0 32 movsxd rax, dword ptr arg(1) ;src_stride
michael@0 33 movsxd rdx, dword ptr arg(3) ;ref_stride
michael@0 34
michael@0 35 lea rcx, [rsi+rax*8]
michael@0 36
michael@0 37 lea rcx, [rcx+rax*8]
michael@0 38 pxor xmm6, xmm6
michael@0 39
michael@0 40 .x16x16sad_wmt_loop:
michael@0 41
michael@0 42 movq xmm0, QWORD PTR [rsi]
michael@0 43 movq xmm2, QWORD PTR [rsi+8]
michael@0 44
michael@0 45 movq xmm1, QWORD PTR [rdi]
michael@0 46 movq xmm3, QWORD PTR [rdi+8]
michael@0 47
michael@0 48 movq xmm4, QWORD PTR [rsi+rax]
michael@0 49 movq xmm5, QWORD PTR [rdi+rdx]
michael@0 50
michael@0 51
michael@0 52 punpcklbw xmm0, xmm2
michael@0 53 punpcklbw xmm1, xmm3
michael@0 54
michael@0 55 psadbw xmm0, xmm1
michael@0 56 movq xmm2, QWORD PTR [rsi+rax+8]
michael@0 57
michael@0 58 movq xmm3, QWORD PTR [rdi+rdx+8]
michael@0 59 lea rsi, [rsi+rax*2]
michael@0 60
michael@0 61 lea rdi, [rdi+rdx*2]
michael@0 62 punpcklbw xmm4, xmm2
michael@0 63
michael@0 64 punpcklbw xmm5, xmm3
michael@0 65 psadbw xmm4, xmm5
michael@0 66
michael@0 67 paddw xmm6, xmm0
michael@0 68 paddw xmm6, xmm4
michael@0 69
michael@0 70 cmp rsi, rcx
michael@0 71 jne .x16x16sad_wmt_loop
michael@0 72
michael@0 73 movq xmm0, xmm6
michael@0 74 psrldq xmm6, 8
michael@0 75
michael@0 76 paddw xmm0, xmm6
michael@0 77 movq rax, xmm0
michael@0 78
michael@0 79 ; begin epilog
michael@0 80 pop rdi
michael@0 81 pop rsi
michael@0 82 RESTORE_XMM
michael@0 83 UNSHADOW_ARGS
michael@0 84 pop rbp
michael@0 85 ret
michael@0 86
michael@0 87 ;unsigned int vp8_sad8x16_wmt(
michael@0 88 ; unsigned char *src_ptr,
michael@0 89 ; int src_stride,
michael@0 90 ; unsigned char *ref_ptr,
michael@0 91 ; int ref_stride,
michael@0 92 ; int max_sad)
michael@0 93 global sym(vp8_sad8x16_wmt) PRIVATE
michael@0 94 sym(vp8_sad8x16_wmt):
michael@0 95 push rbp
michael@0 96 mov rbp, rsp
michael@0 97 SHADOW_ARGS_TO_STACK 5
michael@0 98 push rbx
michael@0 99 push rsi
michael@0 100 push rdi
michael@0 101 ; end prolog
michael@0 102
michael@0 103 mov rsi, arg(0) ;src_ptr
michael@0 104 mov rdi, arg(2) ;ref_ptr
michael@0 105
michael@0 106 movsxd rbx, dword ptr arg(1) ;src_stride
michael@0 107 movsxd rdx, dword ptr arg(3) ;ref_stride
michael@0 108
michael@0 109 lea rcx, [rsi+rbx*8]
michael@0 110
michael@0 111 lea rcx, [rcx+rbx*8]
michael@0 112 pxor mm7, mm7
michael@0 113
michael@0 114 .x8x16sad_wmt_loop:
michael@0 115
michael@0 116 movq rax, mm7
michael@0 117 cmp eax, arg(4)
michael@0 118 ja .x8x16sad_wmt_early_exit
michael@0 119
michael@0 120 movq mm0, QWORD PTR [rsi]
michael@0 121 movq mm1, QWORD PTR [rdi]
michael@0 122
michael@0 123 movq mm2, QWORD PTR [rsi+rbx]
michael@0 124 movq mm3, QWORD PTR [rdi+rdx]
michael@0 125
michael@0 126 psadbw mm0, mm1
michael@0 127 psadbw mm2, mm3
michael@0 128
michael@0 129 lea rsi, [rsi+rbx*2]
michael@0 130 lea rdi, [rdi+rdx*2]
michael@0 131
michael@0 132 paddw mm7, mm0
michael@0 133 paddw mm7, mm2
michael@0 134
michael@0 135 cmp rsi, rcx
michael@0 136 jne .x8x16sad_wmt_loop
michael@0 137
michael@0 138 movq rax, mm7
michael@0 139
michael@0 140 .x8x16sad_wmt_early_exit:
michael@0 141
michael@0 142 ; begin epilog
michael@0 143 pop rdi
michael@0 144 pop rsi
michael@0 145 pop rbx
michael@0 146 UNSHADOW_ARGS
michael@0 147 pop rbp
michael@0 148 ret
michael@0 149
michael@0 150
michael@0 151 ;unsigned int vp8_sad8x8_wmt(
michael@0 152 ; unsigned char *src_ptr,
michael@0 153 ; int src_stride,
michael@0 154 ; unsigned char *ref_ptr,
michael@0 155 ; int ref_stride)
michael@0 156 global sym(vp8_sad8x8_wmt) PRIVATE
michael@0 157 sym(vp8_sad8x8_wmt):
michael@0 158 push rbp
michael@0 159 mov rbp, rsp
michael@0 160 SHADOW_ARGS_TO_STACK 5
michael@0 161 push rbx
michael@0 162 push rsi
michael@0 163 push rdi
michael@0 164 ; end prolog
michael@0 165
michael@0 166 mov rsi, arg(0) ;src_ptr
michael@0 167 mov rdi, arg(2) ;ref_ptr
michael@0 168
michael@0 169 movsxd rbx, dword ptr arg(1) ;src_stride
michael@0 170 movsxd rdx, dword ptr arg(3) ;ref_stride
michael@0 171
michael@0 172 lea rcx, [rsi+rbx*8]
michael@0 173 pxor mm7, mm7
michael@0 174
michael@0 175 .x8x8sad_wmt_loop:
michael@0 176
michael@0 177 movq rax, mm7
michael@0 178 cmp eax, arg(4)
michael@0 179 ja .x8x8sad_wmt_early_exit
michael@0 180
michael@0 181 movq mm0, QWORD PTR [rsi]
michael@0 182 movq mm1, QWORD PTR [rdi]
michael@0 183
michael@0 184 psadbw mm0, mm1
michael@0 185 lea rsi, [rsi+rbx]
michael@0 186
michael@0 187 add rdi, rdx
michael@0 188 paddw mm7, mm0
michael@0 189
michael@0 190 cmp rsi, rcx
michael@0 191 jne .x8x8sad_wmt_loop
michael@0 192
michael@0 193 movq rax, mm7
michael@0 194 .x8x8sad_wmt_early_exit:
michael@0 195
michael@0 196 ; begin epilog
michael@0 197 pop rdi
michael@0 198 pop rsi
michael@0 199 pop rbx
michael@0 200 UNSHADOW_ARGS
michael@0 201 pop rbp
michael@0 202 ret
michael@0 203
michael@0 204 ;unsigned int vp8_sad4x4_wmt(
michael@0 205 ; unsigned char *src_ptr,
michael@0 206 ; int src_stride,
michael@0 207 ; unsigned char *ref_ptr,
michael@0 208 ; int ref_stride)
michael@0 209 global sym(vp8_sad4x4_wmt) PRIVATE
michael@0 210 sym(vp8_sad4x4_wmt):
michael@0 211 push rbp
michael@0 212 mov rbp, rsp
michael@0 213 SHADOW_ARGS_TO_STACK 4
michael@0 214 push rsi
michael@0 215 push rdi
michael@0 216 ; end prolog
michael@0 217
michael@0 218 mov rsi, arg(0) ;src_ptr
michael@0 219 mov rdi, arg(2) ;ref_ptr
michael@0 220
michael@0 221 movsxd rax, dword ptr arg(1) ;src_stride
michael@0 222 movsxd rdx, dword ptr arg(3) ;ref_stride
michael@0 223
michael@0 224 movd mm0, DWORD PTR [rsi]
michael@0 225 movd mm1, DWORD PTR [rdi]
michael@0 226
michael@0 227 movd mm2, DWORD PTR [rsi+rax]
michael@0 228 movd mm3, DWORD PTR [rdi+rdx]
michael@0 229
michael@0 230 punpcklbw mm0, mm2
michael@0 231 punpcklbw mm1, mm3
michael@0 232
michael@0 233 psadbw mm0, mm1
michael@0 234 lea rsi, [rsi+rax*2]
michael@0 235
michael@0 236 lea rdi, [rdi+rdx*2]
michael@0 237 movd mm4, DWORD PTR [rsi]
michael@0 238
michael@0 239 movd mm5, DWORD PTR [rdi]
michael@0 240 movd mm6, DWORD PTR [rsi+rax]
michael@0 241
michael@0 242 movd mm7, DWORD PTR [rdi+rdx]
michael@0 243 punpcklbw mm4, mm6
michael@0 244
michael@0 245 punpcklbw mm5, mm7
michael@0 246 psadbw mm4, mm5
michael@0 247
michael@0 248 paddw mm0, mm4
michael@0 249 movq rax, mm0
michael@0 250
michael@0 251 ; begin epilog
michael@0 252 pop rdi
michael@0 253 pop rsi
michael@0 254 UNSHADOW_ARGS
michael@0 255 pop rbp
michael@0 256 ret
michael@0 257
michael@0 258
michael@0 259 ;unsigned int vp8_sad16x8_wmt(
michael@0 260 ; unsigned char *src_ptr,
michael@0 261 ; int src_stride,
michael@0 262 ; unsigned char *ref_ptr,
michael@0 263 ; int ref_stride)
michael@0 264 global sym(vp8_sad16x8_wmt) PRIVATE
michael@0 265 sym(vp8_sad16x8_wmt):
michael@0 266 push rbp
michael@0 267 mov rbp, rsp
michael@0 268 SHADOW_ARGS_TO_STACK 5
michael@0 269 push rbx
michael@0 270 push rsi
michael@0 271 push rdi
michael@0 272 ; end prolog
michael@0 273
michael@0 274
michael@0 275 mov rsi, arg(0) ;src_ptr
michael@0 276 mov rdi, arg(2) ;ref_ptr
michael@0 277
michael@0 278 movsxd rbx, dword ptr arg(1) ;src_stride
michael@0 279 movsxd rdx, dword ptr arg(3) ;ref_stride
michael@0 280
michael@0 281 lea rcx, [rsi+rbx*8]
michael@0 282 pxor mm7, mm7
michael@0 283
michael@0 284 .x16x8sad_wmt_loop:
michael@0 285
michael@0 286 movq rax, mm7
michael@0 287 cmp eax, arg(4)
michael@0 288 ja .x16x8sad_wmt_early_exit
michael@0 289
michael@0 290 movq mm0, QWORD PTR [rsi]
michael@0 291 movq mm2, QWORD PTR [rsi+8]
michael@0 292
michael@0 293 movq mm1, QWORD PTR [rdi]
michael@0 294 movq mm3, QWORD PTR [rdi+8]
michael@0 295
michael@0 296 movq mm4, QWORD PTR [rsi+rbx]
michael@0 297 movq mm5, QWORD PTR [rdi+rdx]
michael@0 298
michael@0 299 psadbw mm0, mm1
michael@0 300 psadbw mm2, mm3
michael@0 301
michael@0 302 movq mm1, QWORD PTR [rsi+rbx+8]
michael@0 303 movq mm3, QWORD PTR [rdi+rdx+8]
michael@0 304
michael@0 305 psadbw mm4, mm5
michael@0 306 psadbw mm1, mm3
michael@0 307
michael@0 308 lea rsi, [rsi+rbx*2]
michael@0 309 lea rdi, [rdi+rdx*2]
michael@0 310
michael@0 311 paddw mm0, mm2
michael@0 312 paddw mm4, mm1
michael@0 313
michael@0 314 paddw mm7, mm0
michael@0 315 paddw mm7, mm4
michael@0 316
michael@0 317 cmp rsi, rcx
michael@0 318 jne .x16x8sad_wmt_loop
michael@0 319
michael@0 320 movq rax, mm7
michael@0 321
michael@0 322 .x16x8sad_wmt_early_exit:
michael@0 323
michael@0 324 ; begin epilog
michael@0 325 pop rdi
michael@0 326 pop rsi
michael@0 327 pop rbx
michael@0 328 UNSHADOW_ARGS
michael@0 329 pop rbp
michael@0 330 ret
michael@0 331
michael@0 332 ;void vp8_copy32xn_sse2(
michael@0 333 ; unsigned char *src_ptr,
michael@0 334 ; int src_stride,
michael@0 335 ; unsigned char *dst_ptr,
michael@0 336 ; int dst_stride,
michael@0 337 ; int height);
michael@0 338 global sym(vp8_copy32xn_sse2) PRIVATE
michael@0 339 sym(vp8_copy32xn_sse2):
michael@0 340 push rbp
michael@0 341 mov rbp, rsp
michael@0 342 SHADOW_ARGS_TO_STACK 5
michael@0 343 SAVE_XMM 7
michael@0 344 push rsi
michael@0 345 push rdi
michael@0 346 ; end prolog
michael@0 347
michael@0 348 mov rsi, arg(0) ;src_ptr
michael@0 349 mov rdi, arg(2) ;dst_ptr
michael@0 350
michael@0 351 movsxd rax, dword ptr arg(1) ;src_stride
michael@0 352 movsxd rdx, dword ptr arg(3) ;dst_stride
michael@0 353 movsxd rcx, dword ptr arg(4) ;height
michael@0 354
michael@0 355 .block_copy_sse2_loopx4:
michael@0 356 movdqu xmm0, XMMWORD PTR [rsi]
michael@0 357 movdqu xmm1, XMMWORD PTR [rsi + 16]
michael@0 358 movdqu xmm2, XMMWORD PTR [rsi + rax]
michael@0 359 movdqu xmm3, XMMWORD PTR [rsi + rax + 16]
michael@0 360
michael@0 361 lea rsi, [rsi+rax*2]
michael@0 362
michael@0 363 movdqu xmm4, XMMWORD PTR [rsi]
michael@0 364 movdqu xmm5, XMMWORD PTR [rsi + 16]
michael@0 365 movdqu xmm6, XMMWORD PTR [rsi + rax]
michael@0 366 movdqu xmm7, XMMWORD PTR [rsi + rax + 16]
michael@0 367
michael@0 368 lea rsi, [rsi+rax*2]
michael@0 369
michael@0 370 movdqa XMMWORD PTR [rdi], xmm0
michael@0 371 movdqa XMMWORD PTR [rdi + 16], xmm1
michael@0 372 movdqa XMMWORD PTR [rdi + rdx], xmm2
michael@0 373 movdqa XMMWORD PTR [rdi + rdx + 16], xmm3
michael@0 374
michael@0 375 lea rdi, [rdi+rdx*2]
michael@0 376
michael@0 377 movdqa XMMWORD PTR [rdi], xmm4
michael@0 378 movdqa XMMWORD PTR [rdi + 16], xmm5
michael@0 379 movdqa XMMWORD PTR [rdi + rdx], xmm6
michael@0 380 movdqa XMMWORD PTR [rdi + rdx + 16], xmm7
michael@0 381
michael@0 382 lea rdi, [rdi+rdx*2]
michael@0 383
michael@0 384 sub rcx, 4
michael@0 385 cmp rcx, 4
michael@0 386 jge .block_copy_sse2_loopx4
michael@0 387
michael@0 388 cmp rcx, 0
michael@0 389 je .copy_is_done
michael@0 390
michael@0 391 .block_copy_sse2_loop:
michael@0 392 movdqu xmm0, XMMWORD PTR [rsi]
michael@0 393 movdqu xmm1, XMMWORD PTR [rsi + 16]
michael@0 394 lea rsi, [rsi+rax]
michael@0 395
michael@0 396 movdqa XMMWORD PTR [rdi], xmm0
michael@0 397 movdqa XMMWORD PTR [rdi + 16], xmm1
michael@0 398 lea rdi, [rdi+rdx]
michael@0 399
michael@0 400 sub rcx, 1
michael@0 401 jne .block_copy_sse2_loop
michael@0 402
michael@0 403 .copy_is_done:
michael@0 404 ; begin epilog
michael@0 405 pop rdi
michael@0 406 pop rsi
michael@0 407 RESTORE_XMM
michael@0 408 UNSHADOW_ARGS
michael@0 409 pop rbp
michael@0 410 ret

mercurial