media/libvpx/vp9/encoder/x86/vp9_sad_mmx.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 ;
michael@0 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
michael@0 3 ;
michael@0 4 ; Use of this source code is governed by a BSD-style license
michael@0 5 ; that can be found in the LICENSE file in the root of the source
michael@0 6 ; tree. An additional intellectual property rights grant can be found
michael@0 7 ; in the file PATENTS. All contributing project authors may
michael@0 8 ; be found in the AUTHORS file in the root of the source tree.
michael@0 9 ;
michael@0 10
michael@0 11
michael@0 12 %include "vpx_ports/x86_abi_support.asm"
michael@0 13
michael@0 14 global sym(vp9_sad16x16_mmx) PRIVATE
michael@0 15 global sym(vp9_sad8x16_mmx) PRIVATE
michael@0 16 global sym(vp9_sad8x8_mmx) PRIVATE
michael@0 17 global sym(vp9_sad4x4_mmx) PRIVATE
michael@0 18 global sym(vp9_sad16x8_mmx) PRIVATE
michael@0 19
michael@0 20 ;unsigned int vp9_sad16x16_mmx(
michael@0 21 ; unsigned char *src_ptr,
michael@0 22 ; int src_stride,
michael@0 23 ; unsigned char *ref_ptr,
michael@0 24 ; int ref_stride)
michael@0 25 sym(vp9_sad16x16_mmx):
michael@0 26 push rbp
michael@0 27 mov rbp, rsp
michael@0 28 SHADOW_ARGS_TO_STACK 4
michael@0 29 push rsi
michael@0 30 push rdi
michael@0 31 ; end prolog
michael@0 32
michael@0 33 mov rsi, arg(0) ;src_ptr
michael@0 34 mov rdi, arg(2) ;ref_ptr
michael@0 35
michael@0 36 movsxd rax, dword ptr arg(1) ;src_stride
michael@0 37 movsxd rdx, dword ptr arg(3) ;ref_stride
michael@0 38
michael@0 39 lea rcx, [rsi+rax*8]
michael@0 40
michael@0 41 lea rcx, [rcx+rax*8]
michael@0 42 pxor mm7, mm7
michael@0 43
michael@0 44 pxor mm6, mm6
michael@0 45
michael@0 46 .x16x16sad_mmx_loop:
michael@0 47
michael@0 48 movq mm0, QWORD PTR [rsi]
michael@0 49 movq mm2, QWORD PTR [rsi+8]
michael@0 50
michael@0 51 movq mm1, QWORD PTR [rdi]
michael@0 52 movq mm3, QWORD PTR [rdi+8]
michael@0 53
michael@0 54 movq mm4, mm0
michael@0 55 movq mm5, mm2
michael@0 56
michael@0 57 psubusb mm0, mm1
michael@0 58 psubusb mm1, mm4
michael@0 59
michael@0 60 psubusb mm2, mm3
michael@0 61 psubusb mm3, mm5
michael@0 62
michael@0 63 por mm0, mm1
michael@0 64 por mm2, mm3
michael@0 65
michael@0 66 movq mm1, mm0
michael@0 67 movq mm3, mm2
michael@0 68
michael@0 69 punpcklbw mm0, mm6
michael@0 70 punpcklbw mm2, mm6
michael@0 71
michael@0 72 punpckhbw mm1, mm6
michael@0 73 punpckhbw mm3, mm6
michael@0 74
michael@0 75 paddw mm0, mm2
michael@0 76 paddw mm1, mm3
michael@0 77
michael@0 78
michael@0 79 lea rsi, [rsi+rax]
michael@0 80 add rdi, rdx
michael@0 81
michael@0 82 paddw mm7, mm0
michael@0 83 paddw mm7, mm1
michael@0 84
michael@0 85 cmp rsi, rcx
michael@0 86 jne .x16x16sad_mmx_loop
michael@0 87
michael@0 88
michael@0 89 movq mm0, mm7
michael@0 90
michael@0 91 punpcklwd mm0, mm6
michael@0 92 punpckhwd mm7, mm6
michael@0 93
michael@0 94 paddw mm0, mm7
michael@0 95 movq mm7, mm0
michael@0 96
michael@0 97
michael@0 98 psrlq mm0, 32
michael@0 99 paddw mm7, mm0
michael@0 100
michael@0 101 movq rax, mm7
michael@0 102
michael@0 103 pop rdi
michael@0 104 pop rsi
michael@0 105 mov rsp, rbp
michael@0 106 ; begin epilog
michael@0 107 UNSHADOW_ARGS
michael@0 108 pop rbp
michael@0 109 ret
michael@0 110
michael@0 111
michael@0 112 ;unsigned int vp9_sad8x16_mmx(
michael@0 113 ; unsigned char *src_ptr,
michael@0 114 ; int src_stride,
michael@0 115 ; unsigned char *ref_ptr,
michael@0 116 ; int ref_stride)
michael@0 117 sym(vp9_sad8x16_mmx):
michael@0 118 push rbp
michael@0 119 mov rbp, rsp
michael@0 120 SHADOW_ARGS_TO_STACK 4
michael@0 121 push rsi
michael@0 122 push rdi
michael@0 123 ; end prolog
michael@0 124
michael@0 125 mov rsi, arg(0) ;src_ptr
michael@0 126 mov rdi, arg(2) ;ref_ptr
michael@0 127
michael@0 128 movsxd rax, dword ptr arg(1) ;src_stride
michael@0 129 movsxd rdx, dword ptr arg(3) ;ref_stride
michael@0 130
michael@0 131 lea rcx, [rsi+rax*8]
michael@0 132
michael@0 133 lea rcx, [rcx+rax*8]
michael@0 134 pxor mm7, mm7
michael@0 135
michael@0 136 pxor mm6, mm6
michael@0 137
michael@0 138 .x8x16sad_mmx_loop:
michael@0 139
michael@0 140 movq mm0, QWORD PTR [rsi]
michael@0 141 movq mm1, QWORD PTR [rdi]
michael@0 142
michael@0 143 movq mm2, mm0
michael@0 144 psubusb mm0, mm1
michael@0 145
michael@0 146 psubusb mm1, mm2
michael@0 147 por mm0, mm1
michael@0 148
michael@0 149 movq mm2, mm0
michael@0 150 punpcklbw mm0, mm6
michael@0 151
michael@0 152 punpckhbw mm2, mm6
michael@0 153 lea rsi, [rsi+rax]
michael@0 154
michael@0 155 add rdi, rdx
michael@0 156 paddw mm7, mm0
michael@0 157
michael@0 158 paddw mm7, mm2
michael@0 159 cmp rsi, rcx
michael@0 160
michael@0 161 jne .x8x16sad_mmx_loop
michael@0 162
michael@0 163 movq mm0, mm7
michael@0 164 punpcklwd mm0, mm6
michael@0 165
michael@0 166 punpckhwd mm7, mm6
michael@0 167 paddw mm0, mm7
michael@0 168
michael@0 169 movq mm7, mm0
michael@0 170 psrlq mm0, 32
michael@0 171
michael@0 172 paddw mm7, mm0
michael@0 173 movq rax, mm7
michael@0 174
michael@0 175 pop rdi
michael@0 176 pop rsi
michael@0 177 mov rsp, rbp
michael@0 178 ; begin epilog
michael@0 179 UNSHADOW_ARGS
michael@0 180 pop rbp
michael@0 181 ret
michael@0 182
michael@0 183
michael@0 184 ;unsigned int vp9_sad8x8_mmx(
michael@0 185 ; unsigned char *src_ptr,
michael@0 186 ; int src_stride,
michael@0 187 ; unsigned char *ref_ptr,
michael@0 188 ; int ref_stride)
michael@0 189 sym(vp9_sad8x8_mmx):
michael@0 190 push rbp
michael@0 191 mov rbp, rsp
michael@0 192 SHADOW_ARGS_TO_STACK 4
michael@0 193 push rsi
michael@0 194 push rdi
michael@0 195 ; end prolog
michael@0 196
michael@0 197 mov rsi, arg(0) ;src_ptr
michael@0 198 mov rdi, arg(2) ;ref_ptr
michael@0 199
michael@0 200 movsxd rax, dword ptr arg(1) ;src_stride
michael@0 201 movsxd rdx, dword ptr arg(3) ;ref_stride
michael@0 202
michael@0 203 lea rcx, [rsi+rax*8]
michael@0 204 pxor mm7, mm7
michael@0 205
michael@0 206 pxor mm6, mm6
michael@0 207
michael@0 208 .x8x8sad_mmx_loop:
michael@0 209
michael@0 210 movq mm0, QWORD PTR [rsi]
michael@0 211 movq mm1, QWORD PTR [rdi]
michael@0 212
michael@0 213 movq mm2, mm0
michael@0 214 psubusb mm0, mm1
michael@0 215
michael@0 216 psubusb mm1, mm2
michael@0 217 por mm0, mm1
michael@0 218
michael@0 219 movq mm2, mm0
michael@0 220 punpcklbw mm0, mm6
michael@0 221
michael@0 222 punpckhbw mm2, mm6
michael@0 223 paddw mm0, mm2
michael@0 224
michael@0 225 lea rsi, [rsi+rax]
michael@0 226 add rdi, rdx
michael@0 227
michael@0 228 paddw mm7, mm0
michael@0 229 cmp rsi, rcx
michael@0 230
michael@0 231 jne .x8x8sad_mmx_loop
michael@0 232
michael@0 233 movq mm0, mm7
michael@0 234 punpcklwd mm0, mm6
michael@0 235
michael@0 236 punpckhwd mm7, mm6
michael@0 237 paddw mm0, mm7
michael@0 238
michael@0 239 movq mm7, mm0
michael@0 240 psrlq mm0, 32
michael@0 241
michael@0 242 paddw mm7, mm0
michael@0 243 movq rax, mm7
michael@0 244
michael@0 245 pop rdi
michael@0 246 pop rsi
michael@0 247 mov rsp, rbp
michael@0 248 ; begin epilog
michael@0 249 UNSHADOW_ARGS
michael@0 250 pop rbp
michael@0 251 ret
michael@0 252
michael@0 253
michael@0 254 ;unsigned int vp9_sad4x4_mmx(
michael@0 255 ; unsigned char *src_ptr,
michael@0 256 ; int src_stride,
michael@0 257 ; unsigned char *ref_ptr,
michael@0 258 ; int ref_stride)
michael@0 259 sym(vp9_sad4x4_mmx):
michael@0 260 push rbp
michael@0 261 mov rbp, rsp
michael@0 262 SHADOW_ARGS_TO_STACK 4
michael@0 263 push rsi
michael@0 264 push rdi
michael@0 265 ; end prolog
michael@0 266
michael@0 267 mov rsi, arg(0) ;src_ptr
michael@0 268 mov rdi, arg(2) ;ref_ptr
michael@0 269
michael@0 270 movsxd rax, dword ptr arg(1) ;src_stride
michael@0 271 movsxd rdx, dword ptr arg(3) ;ref_stride
michael@0 272
michael@0 273 movd mm0, DWORD PTR [rsi]
michael@0 274 movd mm1, DWORD PTR [rdi]
michael@0 275
michael@0 276 movd mm2, DWORD PTR [rsi+rax]
michael@0 277 movd mm3, DWORD PTR [rdi+rdx]
michael@0 278
michael@0 279 punpcklbw mm0, mm2
michael@0 280 punpcklbw mm1, mm3
michael@0 281
michael@0 282 movq mm2, mm0
michael@0 283 psubusb mm0, mm1
michael@0 284
michael@0 285 psubusb mm1, mm2
michael@0 286 por mm0, mm1
michael@0 287
michael@0 288 movq mm2, mm0
michael@0 289 pxor mm3, mm3
michael@0 290
michael@0 291 punpcklbw mm0, mm3
michael@0 292 punpckhbw mm2, mm3
michael@0 293
michael@0 294 paddw mm0, mm2
michael@0 295
michael@0 296 lea rsi, [rsi+rax*2]
michael@0 297 lea rdi, [rdi+rdx*2]
michael@0 298
michael@0 299 movd mm4, DWORD PTR [rsi]
michael@0 300 movd mm5, DWORD PTR [rdi]
michael@0 301
michael@0 302 movd mm6, DWORD PTR [rsi+rax]
michael@0 303 movd mm7, DWORD PTR [rdi+rdx]
michael@0 304
michael@0 305 punpcklbw mm4, mm6
michael@0 306 punpcklbw mm5, mm7
michael@0 307
michael@0 308 movq mm6, mm4
michael@0 309 psubusb mm4, mm5
michael@0 310
michael@0 311 psubusb mm5, mm6
michael@0 312 por mm4, mm5
michael@0 313
michael@0 314 movq mm5, mm4
michael@0 315 punpcklbw mm4, mm3
michael@0 316
michael@0 317 punpckhbw mm5, mm3
michael@0 318 paddw mm4, mm5
michael@0 319
michael@0 320 paddw mm0, mm4
michael@0 321 movq mm1, mm0
michael@0 322
michael@0 323 punpcklwd mm0, mm3
michael@0 324 punpckhwd mm1, mm3
michael@0 325
michael@0 326 paddw mm0, mm1
michael@0 327 movq mm1, mm0
michael@0 328
michael@0 329 psrlq mm0, 32
michael@0 330 paddw mm0, mm1
michael@0 331
michael@0 332 movq rax, mm0
michael@0 333
michael@0 334 pop rdi
michael@0 335 pop rsi
michael@0 336 mov rsp, rbp
michael@0 337 ; begin epilog
michael@0 338 UNSHADOW_ARGS
michael@0 339 pop rbp
michael@0 340 ret
michael@0 341
michael@0 342
michael@0 343 ;unsigned int vp9_sad16x8_mmx(
michael@0 344 ; unsigned char *src_ptr,
michael@0 345 ; int src_stride,
michael@0 346 ; unsigned char *ref_ptr,
michael@0 347 ; int ref_stride)
michael@0 348 sym(vp9_sad16x8_mmx):
michael@0 349 push rbp
michael@0 350 mov rbp, rsp
michael@0 351 SHADOW_ARGS_TO_STACK 4
michael@0 352 push rsi
michael@0 353 push rdi
michael@0 354 ; end prolog
michael@0 355
michael@0 356 mov rsi, arg(0) ;src_ptr
michael@0 357 mov rdi, arg(2) ;ref_ptr
michael@0 358
michael@0 359 movsxd rax, dword ptr arg(1) ;src_stride
michael@0 360 movsxd rdx, dword ptr arg(3) ;ref_stride
michael@0 361
michael@0 362 lea rcx, [rsi+rax*8]
michael@0 363 pxor mm7, mm7
michael@0 364
michael@0 365 pxor mm6, mm6
michael@0 366
michael@0 367 .x16x8sad_mmx_loop:
michael@0 368
michael@0 369 movq mm0, [rsi]
michael@0 370 movq mm1, [rdi]
michael@0 371
michael@0 372 movq mm2, [rsi+8]
michael@0 373 movq mm3, [rdi+8]
michael@0 374
michael@0 375 movq mm4, mm0
michael@0 376 movq mm5, mm2
michael@0 377
michael@0 378 psubusb mm0, mm1
michael@0 379 psubusb mm1, mm4
michael@0 380
michael@0 381 psubusb mm2, mm3
michael@0 382 psubusb mm3, mm5
michael@0 383
michael@0 384 por mm0, mm1
michael@0 385 por mm2, mm3
michael@0 386
michael@0 387 movq mm1, mm0
michael@0 388 movq mm3, mm2
michael@0 389
michael@0 390 punpcklbw mm0, mm6
michael@0 391 punpckhbw mm1, mm6
michael@0 392
michael@0 393 punpcklbw mm2, mm6
michael@0 394 punpckhbw mm3, mm6
michael@0 395
michael@0 396
michael@0 397 paddw mm0, mm2
michael@0 398 paddw mm1, mm3
michael@0 399
michael@0 400 paddw mm0, mm1
michael@0 401 lea rsi, [rsi+rax]
michael@0 402
michael@0 403 add rdi, rdx
michael@0 404 paddw mm7, mm0
michael@0 405
michael@0 406 cmp rsi, rcx
michael@0 407 jne .x16x8sad_mmx_loop
michael@0 408
michael@0 409 movq mm0, mm7
michael@0 410 punpcklwd mm0, mm6
michael@0 411
michael@0 412 punpckhwd mm7, mm6
michael@0 413 paddw mm0, mm7
michael@0 414
michael@0 415 movq mm7, mm0
michael@0 416 psrlq mm0, 32
michael@0 417
michael@0 418 paddw mm7, mm0
michael@0 419 movq rax, mm7
michael@0 420
michael@0 421 pop rdi
michael@0 422 pop rsi
michael@0 423 mov rsp, rbp
michael@0 424 ; begin epilog
michael@0 425 UNSHADOW_ARGS
michael@0 426 pop rbp
michael@0 427 ret

mercurial