media/libvpx/vp9/encoder/x86/vp9_sad_sse4.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 ;
michael@0 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
michael@0 3 ;
michael@0 4 ; Use of this source code is governed by a BSD-style license
michael@0 5 ; that can be found in the LICENSE file in the root of the source
michael@0 6 ; tree. An additional intellectual property rights grant can be found
michael@0 7 ; in the file PATENTS. All contributing project authors may
michael@0 8 ; be found in the AUTHORS file in the root of the source tree.
michael@0 9 ;
michael@0 10
michael@0 11
michael@0 12 %include "vpx_ports/x86_abi_support.asm"
michael@0 13
michael@0 14 %macro PROCESS_16X2X8 1
michael@0 15 %if %1
michael@0 16 movdqa xmm0, XMMWORD PTR [rsi]
michael@0 17 movq xmm1, MMWORD PTR [rdi]
michael@0 18 movq xmm3, MMWORD PTR [rdi+8]
michael@0 19 movq xmm2, MMWORD PTR [rdi+16]
michael@0 20 punpcklqdq xmm1, xmm3
michael@0 21 punpcklqdq xmm3, xmm2
michael@0 22
michael@0 23 movdqa xmm2, xmm1
michael@0 24 mpsadbw xmm1, xmm0, 0x0
michael@0 25 mpsadbw xmm2, xmm0, 0x5
michael@0 26
michael@0 27 psrldq xmm0, 8
michael@0 28
michael@0 29 movdqa xmm4, xmm3
michael@0 30 mpsadbw xmm3, xmm0, 0x0
michael@0 31 mpsadbw xmm4, xmm0, 0x5
michael@0 32
michael@0 33 paddw xmm1, xmm2
michael@0 34 paddw xmm1, xmm3
michael@0 35 paddw xmm1, xmm4
michael@0 36 %else
michael@0 37 movdqa xmm0, XMMWORD PTR [rsi]
michael@0 38 movq xmm5, MMWORD PTR [rdi]
michael@0 39 movq xmm3, MMWORD PTR [rdi+8]
michael@0 40 movq xmm2, MMWORD PTR [rdi+16]
michael@0 41 punpcklqdq xmm5, xmm3
michael@0 42 punpcklqdq xmm3, xmm2
michael@0 43
michael@0 44 movdqa xmm2, xmm5
michael@0 45 mpsadbw xmm5, xmm0, 0x0
michael@0 46 mpsadbw xmm2, xmm0, 0x5
michael@0 47
michael@0 48 psrldq xmm0, 8
michael@0 49
michael@0 50 movdqa xmm4, xmm3
michael@0 51 mpsadbw xmm3, xmm0, 0x0
michael@0 52 mpsadbw xmm4, xmm0, 0x5
michael@0 53
michael@0 54 paddw xmm5, xmm2
michael@0 55 paddw xmm5, xmm3
michael@0 56 paddw xmm5, xmm4
michael@0 57
michael@0 58 paddw xmm1, xmm5
michael@0 59 %endif
michael@0 60 movdqa xmm0, XMMWORD PTR [rsi + rax]
michael@0 61 movq xmm5, MMWORD PTR [rdi+ rdx]
michael@0 62 movq xmm3, MMWORD PTR [rdi+ rdx+8]
michael@0 63 movq xmm2, MMWORD PTR [rdi+ rdx+16]
michael@0 64 punpcklqdq xmm5, xmm3
michael@0 65 punpcklqdq xmm3, xmm2
michael@0 66
michael@0 67 lea rsi, [rsi+rax*2]
michael@0 68 lea rdi, [rdi+rdx*2]
michael@0 69
michael@0 70 movdqa xmm2, xmm5
michael@0 71 mpsadbw xmm5, xmm0, 0x0
michael@0 72 mpsadbw xmm2, xmm0, 0x5
michael@0 73
michael@0 74 psrldq xmm0, 8
michael@0 75 movdqa xmm4, xmm3
michael@0 76 mpsadbw xmm3, xmm0, 0x0
michael@0 77 mpsadbw xmm4, xmm0, 0x5
michael@0 78
michael@0 79 paddw xmm5, xmm2
michael@0 80 paddw xmm5, xmm3
michael@0 81 paddw xmm5, xmm4
michael@0 82
michael@0 83 paddw xmm1, xmm5
michael@0 84 %endmacro
michael@0 85
michael@0 86 %macro PROCESS_8X2X8 1
michael@0 87 %if %1
michael@0 88 movq xmm0, MMWORD PTR [rsi]
michael@0 89 movq xmm1, MMWORD PTR [rdi]
michael@0 90 movq xmm3, MMWORD PTR [rdi+8]
michael@0 91 punpcklqdq xmm1, xmm3
michael@0 92
michael@0 93 movdqa xmm2, xmm1
michael@0 94 mpsadbw xmm1, xmm0, 0x0
michael@0 95 mpsadbw xmm2, xmm0, 0x5
michael@0 96 paddw xmm1, xmm2
michael@0 97 %else
michael@0 98 movq xmm0, MMWORD PTR [rsi]
michael@0 99 movq xmm5, MMWORD PTR [rdi]
michael@0 100 movq xmm3, MMWORD PTR [rdi+8]
michael@0 101 punpcklqdq xmm5, xmm3
michael@0 102
michael@0 103 movdqa xmm2, xmm5
michael@0 104 mpsadbw xmm5, xmm0, 0x0
michael@0 105 mpsadbw xmm2, xmm0, 0x5
michael@0 106 paddw xmm5, xmm2
michael@0 107
michael@0 108 paddw xmm1, xmm5
michael@0 109 %endif
michael@0 110 movq xmm0, MMWORD PTR [rsi + rax]
michael@0 111 movq xmm5, MMWORD PTR [rdi+ rdx]
michael@0 112 movq xmm3, MMWORD PTR [rdi+ rdx+8]
michael@0 113 punpcklqdq xmm5, xmm3
michael@0 114
michael@0 115 lea rsi, [rsi+rax*2]
michael@0 116 lea rdi, [rdi+rdx*2]
michael@0 117
michael@0 118 movdqa xmm2, xmm5
michael@0 119 mpsadbw xmm5, xmm0, 0x0
michael@0 120 mpsadbw xmm2, xmm0, 0x5
michael@0 121 paddw xmm5, xmm2
michael@0 122
michael@0 123 paddw xmm1, xmm5
michael@0 124 %endmacro
michael@0 125
michael@0 126 %macro PROCESS_4X2X8 1
michael@0 127 %if %1
michael@0 128 movd xmm0, [rsi]
michael@0 129 movq xmm1, MMWORD PTR [rdi]
michael@0 130 movq xmm3, MMWORD PTR [rdi+8]
michael@0 131 punpcklqdq xmm1, xmm3
michael@0 132
michael@0 133 mpsadbw xmm1, xmm0, 0x0
michael@0 134 %else
michael@0 135 movd xmm0, [rsi]
michael@0 136 movq xmm5, MMWORD PTR [rdi]
michael@0 137 movq xmm3, MMWORD PTR [rdi+8]
michael@0 138 punpcklqdq xmm5, xmm3
michael@0 139
michael@0 140 mpsadbw xmm5, xmm0, 0x0
michael@0 141
michael@0 142 paddw xmm1, xmm5
michael@0 143 %endif
michael@0 144 movd xmm0, [rsi + rax]
michael@0 145 movq xmm5, MMWORD PTR [rdi+ rdx]
michael@0 146 movq xmm3, MMWORD PTR [rdi+ rdx+8]
michael@0 147 punpcklqdq xmm5, xmm3
michael@0 148
michael@0 149 lea rsi, [rsi+rax*2]
michael@0 150 lea rdi, [rdi+rdx*2]
michael@0 151
michael@0 152 mpsadbw xmm5, xmm0, 0x0
michael@0 153
michael@0 154 paddw xmm1, xmm5
michael@0 155 %endmacro
michael@0 156
michael@0 157 %macro WRITE_AS_INTS 0
michael@0 158 mov rdi, arg(4) ;Results
michael@0 159 pxor xmm0, xmm0
michael@0 160 movdqa xmm2, xmm1
michael@0 161 punpcklwd xmm1, xmm0
michael@0 162 punpckhwd xmm2, xmm0
michael@0 163
michael@0 164 movdqa [rdi], xmm1
michael@0 165 movdqa [rdi + 16], xmm2
michael@0 166 %endmacro
michael@0 167
michael@0 168 ;void vp9_sad16x16x8_sse4(
michael@0 169 ; const unsigned char *src_ptr,
michael@0 170 ; int src_stride,
michael@0 171 ; const unsigned char *ref_ptr,
michael@0 172 ; int ref_stride,
michael@0 173 ; unsigned short *sad_array);
michael@0 174 global sym(vp9_sad16x16x8_sse4) PRIVATE
michael@0 175 sym(vp9_sad16x16x8_sse4):
michael@0 176 push rbp
michael@0 177 mov rbp, rsp
michael@0 178 SHADOW_ARGS_TO_STACK 5
michael@0 179 push rsi
michael@0 180 push rdi
michael@0 181 ; end prolog
michael@0 182
michael@0 183 mov rsi, arg(0) ;src_ptr
michael@0 184 mov rdi, arg(2) ;ref_ptr
michael@0 185
michael@0 186 movsxd rax, dword ptr arg(1) ;src_stride
michael@0 187 movsxd rdx, dword ptr arg(3) ;ref_stride
michael@0 188
michael@0 189 PROCESS_16X2X8 1
michael@0 190 PROCESS_16X2X8 0
michael@0 191 PROCESS_16X2X8 0
michael@0 192 PROCESS_16X2X8 0
michael@0 193 PROCESS_16X2X8 0
michael@0 194 PROCESS_16X2X8 0
michael@0 195 PROCESS_16X2X8 0
michael@0 196 PROCESS_16X2X8 0
michael@0 197
michael@0 198 WRITE_AS_INTS
michael@0 199
michael@0 200 ; begin epilog
michael@0 201 pop rdi
michael@0 202 pop rsi
michael@0 203 UNSHADOW_ARGS
michael@0 204 pop rbp
michael@0 205 ret
michael@0 206
michael@0 207
michael@0 208 ;void vp9_sad16x8x8_sse4(
michael@0 209 ; const unsigned char *src_ptr,
michael@0 210 ; int src_stride,
michael@0 211 ; const unsigned char *ref_ptr,
michael@0 212 ; int ref_stride,
michael@0 213 ; unsigned short *sad_array
michael@0 214 ;);
michael@0 215 global sym(vp9_sad16x8x8_sse4) PRIVATE
michael@0 216 sym(vp9_sad16x8x8_sse4):
michael@0 217 push rbp
michael@0 218 mov rbp, rsp
michael@0 219 SHADOW_ARGS_TO_STACK 5
michael@0 220 push rsi
michael@0 221 push rdi
michael@0 222 ; end prolog
michael@0 223
michael@0 224 mov rsi, arg(0) ;src_ptr
michael@0 225 mov rdi, arg(2) ;ref_ptr
michael@0 226
michael@0 227 movsxd rax, dword ptr arg(1) ;src_stride
michael@0 228 movsxd rdx, dword ptr arg(3) ;ref_stride
michael@0 229
michael@0 230 PROCESS_16X2X8 1
michael@0 231 PROCESS_16X2X8 0
michael@0 232 PROCESS_16X2X8 0
michael@0 233 PROCESS_16X2X8 0
michael@0 234
michael@0 235 WRITE_AS_INTS
michael@0 236
michael@0 237 ; begin epilog
michael@0 238 pop rdi
michael@0 239 pop rsi
michael@0 240 UNSHADOW_ARGS
michael@0 241 pop rbp
michael@0 242 ret
michael@0 243
michael@0 244
michael@0 245 ;void vp9_sad8x8x8_sse4(
michael@0 246 ; const unsigned char *src_ptr,
michael@0 247 ; int src_stride,
michael@0 248 ; const unsigned char *ref_ptr,
michael@0 249 ; int ref_stride,
michael@0 250 ; unsigned short *sad_array
michael@0 251 ;);
michael@0 252 global sym(vp9_sad8x8x8_sse4) PRIVATE
michael@0 253 sym(vp9_sad8x8x8_sse4):
michael@0 254 push rbp
michael@0 255 mov rbp, rsp
michael@0 256 SHADOW_ARGS_TO_STACK 5
michael@0 257 push rsi
michael@0 258 push rdi
michael@0 259 ; end prolog
michael@0 260
michael@0 261 mov rsi, arg(0) ;src_ptr
michael@0 262 mov rdi, arg(2) ;ref_ptr
michael@0 263
michael@0 264 movsxd rax, dword ptr arg(1) ;src_stride
michael@0 265 movsxd rdx, dword ptr arg(3) ;ref_stride
michael@0 266
michael@0 267 PROCESS_8X2X8 1
michael@0 268 PROCESS_8X2X8 0
michael@0 269 PROCESS_8X2X8 0
michael@0 270 PROCESS_8X2X8 0
michael@0 271
michael@0 272 WRITE_AS_INTS
michael@0 273
michael@0 274 ; begin epilog
michael@0 275 pop rdi
michael@0 276 pop rsi
michael@0 277 UNSHADOW_ARGS
michael@0 278 pop rbp
michael@0 279 ret
michael@0 280
michael@0 281
michael@0 282 ;void vp9_sad8x16x8_sse4(
michael@0 283 ; const unsigned char *src_ptr,
michael@0 284 ; int src_stride,
michael@0 285 ; const unsigned char *ref_ptr,
michael@0 286 ; int ref_stride,
michael@0 287 ; unsigned short *sad_array
michael@0 288 ;);
michael@0 289 global sym(vp9_sad8x16x8_sse4) PRIVATE
michael@0 290 sym(vp9_sad8x16x8_sse4):
michael@0 291 push rbp
michael@0 292 mov rbp, rsp
michael@0 293 SHADOW_ARGS_TO_STACK 5
michael@0 294 push rsi
michael@0 295 push rdi
michael@0 296 ; end prolog
michael@0 297
michael@0 298 mov rsi, arg(0) ;src_ptr
michael@0 299 mov rdi, arg(2) ;ref_ptr
michael@0 300
michael@0 301 movsxd rax, dword ptr arg(1) ;src_stride
michael@0 302 movsxd rdx, dword ptr arg(3) ;ref_stride
michael@0 303
michael@0 304 PROCESS_8X2X8 1
michael@0 305 PROCESS_8X2X8 0
michael@0 306 PROCESS_8X2X8 0
michael@0 307 PROCESS_8X2X8 0
michael@0 308 PROCESS_8X2X8 0
michael@0 309 PROCESS_8X2X8 0
michael@0 310 PROCESS_8X2X8 0
michael@0 311 PROCESS_8X2X8 0
michael@0 312
michael@0 313 WRITE_AS_INTS
michael@0 314
michael@0 315 ; begin epilog
michael@0 316 pop rdi
michael@0 317 pop rsi
michael@0 318 UNSHADOW_ARGS
michael@0 319 pop rbp
michael@0 320 ret
michael@0 321
michael@0 322
michael@0 323 ;void vp9_sad4x4x8_c(
michael@0 324 ; const unsigned char *src_ptr,
michael@0 325 ; int src_stride,
michael@0 326 ; const unsigned char *ref_ptr,
michael@0 327 ; int ref_stride,
michael@0 328 ; unsigned short *sad_array
michael@0 329 ;);
michael@0 330 global sym(vp9_sad4x4x8_sse4) PRIVATE
michael@0 331 sym(vp9_sad4x4x8_sse4):
michael@0 332 push rbp
michael@0 333 mov rbp, rsp
michael@0 334 SHADOW_ARGS_TO_STACK 5
michael@0 335 push rsi
michael@0 336 push rdi
michael@0 337 ; end prolog
michael@0 338
michael@0 339 mov rsi, arg(0) ;src_ptr
michael@0 340 mov rdi, arg(2) ;ref_ptr
michael@0 341
michael@0 342 movsxd rax, dword ptr arg(1) ;src_stride
michael@0 343 movsxd rdx, dword ptr arg(3) ;ref_stride
michael@0 344
michael@0 345 PROCESS_4X2X8 1
michael@0 346 PROCESS_4X2X8 0
michael@0 347
michael@0 348 WRITE_AS_INTS
michael@0 349
michael@0 350 ; begin epilog
michael@0 351 pop rdi
michael@0 352 pop rsi
michael@0 353 UNSHADOW_ARGS
michael@0 354 pop rbp
michael@0 355 ret
michael@0 356
michael@0 357
michael@0 358
michael@0 359

mercurial