media/libvpx/vp8/common/x86/sad_sse4.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 ;
michael@0 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
michael@0 3 ;
michael@0 4 ; Use of this source code is governed by a BSD-style license
michael@0 5 ; that can be found in the LICENSE file in the root of the source
michael@0 6 ; tree. An additional intellectual property rights grant can be found
michael@0 7 ; in the file PATENTS. All contributing project authors may
michael@0 8 ; be found in the AUTHORS file in the root of the source tree.
michael@0 9 ;
michael@0 10
michael@0 11
michael@0 12 %include "vpx_ports/x86_abi_support.asm"
michael@0 13
michael@0 14 %macro PROCESS_16X2X8 1
michael@0 15 %if %1
michael@0 16 movdqa xmm0, XMMWORD PTR [rsi]
michael@0 17 movq xmm1, MMWORD PTR [rdi]
michael@0 18 movq xmm3, MMWORD PTR [rdi+8]
michael@0 19 movq xmm2, MMWORD PTR [rdi+16]
michael@0 20 punpcklqdq xmm1, xmm3
michael@0 21 punpcklqdq xmm3, xmm2
michael@0 22
michael@0 23 movdqa xmm2, xmm1
michael@0 24 mpsadbw xmm1, xmm0, 0x0
michael@0 25 mpsadbw xmm2, xmm0, 0x5
michael@0 26
michael@0 27 psrldq xmm0, 8
michael@0 28
michael@0 29 movdqa xmm4, xmm3
michael@0 30 mpsadbw xmm3, xmm0, 0x0
michael@0 31 mpsadbw xmm4, xmm0, 0x5
michael@0 32
michael@0 33 paddw xmm1, xmm2
michael@0 34 paddw xmm1, xmm3
michael@0 35 paddw xmm1, xmm4
michael@0 36 %else
michael@0 37 movdqa xmm0, XMMWORD PTR [rsi]
michael@0 38 movq xmm5, MMWORD PTR [rdi]
michael@0 39 movq xmm3, MMWORD PTR [rdi+8]
michael@0 40 movq xmm2, MMWORD PTR [rdi+16]
michael@0 41 punpcklqdq xmm5, xmm3
michael@0 42 punpcklqdq xmm3, xmm2
michael@0 43
michael@0 44 movdqa xmm2, xmm5
michael@0 45 mpsadbw xmm5, xmm0, 0x0
michael@0 46 mpsadbw xmm2, xmm0, 0x5
michael@0 47
michael@0 48 psrldq xmm0, 8
michael@0 49
michael@0 50 movdqa xmm4, xmm3
michael@0 51 mpsadbw xmm3, xmm0, 0x0
michael@0 52 mpsadbw xmm4, xmm0, 0x5
michael@0 53
michael@0 54 paddw xmm5, xmm2
michael@0 55 paddw xmm5, xmm3
michael@0 56 paddw xmm5, xmm4
michael@0 57
michael@0 58 paddw xmm1, xmm5
michael@0 59 %endif
michael@0 60 movdqa xmm0, XMMWORD PTR [rsi + rax]
michael@0 61 movq xmm5, MMWORD PTR [rdi+ rdx]
michael@0 62 movq xmm3, MMWORD PTR [rdi+ rdx+8]
michael@0 63 movq xmm2, MMWORD PTR [rdi+ rdx+16]
michael@0 64 punpcklqdq xmm5, xmm3
michael@0 65 punpcklqdq xmm3, xmm2
michael@0 66
michael@0 67 lea rsi, [rsi+rax*2]
michael@0 68 lea rdi, [rdi+rdx*2]
michael@0 69
michael@0 70 movdqa xmm2, xmm5
michael@0 71 mpsadbw xmm5, xmm0, 0x0
michael@0 72 mpsadbw xmm2, xmm0, 0x5
michael@0 73
michael@0 74 psrldq xmm0, 8
michael@0 75 movdqa xmm4, xmm3
michael@0 76 mpsadbw xmm3, xmm0, 0x0
michael@0 77 mpsadbw xmm4, xmm0, 0x5
michael@0 78
michael@0 79 paddw xmm5, xmm2
michael@0 80 paddw xmm5, xmm3
michael@0 81 paddw xmm5, xmm4
michael@0 82
michael@0 83 paddw xmm1, xmm5
michael@0 84 %endmacro
michael@0 85
michael@0 86 %macro PROCESS_8X2X8 1
michael@0 87 %if %1
michael@0 88 movq xmm0, MMWORD PTR [rsi]
michael@0 89 movq xmm1, MMWORD PTR [rdi]
michael@0 90 movq xmm3, MMWORD PTR [rdi+8]
michael@0 91 punpcklqdq xmm1, xmm3
michael@0 92
michael@0 93 movdqa xmm2, xmm1
michael@0 94 mpsadbw xmm1, xmm0, 0x0
michael@0 95 mpsadbw xmm2, xmm0, 0x5
michael@0 96 paddw xmm1, xmm2
michael@0 97 %else
michael@0 98 movq xmm0, MMWORD PTR [rsi]
michael@0 99 movq xmm5, MMWORD PTR [rdi]
michael@0 100 movq xmm3, MMWORD PTR [rdi+8]
michael@0 101 punpcklqdq xmm5, xmm3
michael@0 102
michael@0 103 movdqa xmm2, xmm5
michael@0 104 mpsadbw xmm5, xmm0, 0x0
michael@0 105 mpsadbw xmm2, xmm0, 0x5
michael@0 106 paddw xmm5, xmm2
michael@0 107
michael@0 108 paddw xmm1, xmm5
michael@0 109 %endif
michael@0 110 movq xmm0, MMWORD PTR [rsi + rax]
michael@0 111 movq xmm5, MMWORD PTR [rdi+ rdx]
michael@0 112 movq xmm3, MMWORD PTR [rdi+ rdx+8]
michael@0 113 punpcklqdq xmm5, xmm3
michael@0 114
michael@0 115 lea rsi, [rsi+rax*2]
michael@0 116 lea rdi, [rdi+rdx*2]
michael@0 117
michael@0 118 movdqa xmm2, xmm5
michael@0 119 mpsadbw xmm5, xmm0, 0x0
michael@0 120 mpsadbw xmm2, xmm0, 0x5
michael@0 121 paddw xmm5, xmm2
michael@0 122
michael@0 123 paddw xmm1, xmm5
michael@0 124 %endmacro
michael@0 125
michael@0 126 %macro PROCESS_4X2X8 1
michael@0 127 %if %1
michael@0 128 movd xmm0, [rsi]
michael@0 129 movq xmm1, MMWORD PTR [rdi]
michael@0 130 movq xmm3, MMWORD PTR [rdi+8]
michael@0 131 punpcklqdq xmm1, xmm3
michael@0 132
michael@0 133 mpsadbw xmm1, xmm0, 0x0
michael@0 134 %else
michael@0 135 movd xmm0, [rsi]
michael@0 136 movq xmm5, MMWORD PTR [rdi]
michael@0 137 movq xmm3, MMWORD PTR [rdi+8]
michael@0 138 punpcklqdq xmm5, xmm3
michael@0 139
michael@0 140 mpsadbw xmm5, xmm0, 0x0
michael@0 141
michael@0 142 paddw xmm1, xmm5
michael@0 143 %endif
michael@0 144 movd xmm0, [rsi + rax]
michael@0 145 movq xmm5, MMWORD PTR [rdi+ rdx]
michael@0 146 movq xmm3, MMWORD PTR [rdi+ rdx+8]
michael@0 147 punpcklqdq xmm5, xmm3
michael@0 148
michael@0 149 lea rsi, [rsi+rax*2]
michael@0 150 lea rdi, [rdi+rdx*2]
michael@0 151
michael@0 152 mpsadbw xmm5, xmm0, 0x0
michael@0 153
michael@0 154 paddw xmm1, xmm5
michael@0 155 %endmacro
michael@0 156
michael@0 157
michael@0 158 ;void vp8_sad16x16x8_sse4(
michael@0 159 ; const unsigned char *src_ptr,
michael@0 160 ; int src_stride,
michael@0 161 ; const unsigned char *ref_ptr,
michael@0 162 ; int ref_stride,
michael@0 163 ; unsigned short *sad_array);
michael@0 164 global sym(vp8_sad16x16x8_sse4) PRIVATE
michael@0 165 sym(vp8_sad16x16x8_sse4):
michael@0 166 push rbp
michael@0 167 mov rbp, rsp
michael@0 168 SHADOW_ARGS_TO_STACK 5
michael@0 169 push rsi
michael@0 170 push rdi
michael@0 171 ; end prolog
michael@0 172
michael@0 173 mov rsi, arg(0) ;src_ptr
michael@0 174 mov rdi, arg(2) ;ref_ptr
michael@0 175
michael@0 176 movsxd rax, dword ptr arg(1) ;src_stride
michael@0 177 movsxd rdx, dword ptr arg(3) ;ref_stride
michael@0 178
michael@0 179 PROCESS_16X2X8 1
michael@0 180 PROCESS_16X2X8 0
michael@0 181 PROCESS_16X2X8 0
michael@0 182 PROCESS_16X2X8 0
michael@0 183 PROCESS_16X2X8 0
michael@0 184 PROCESS_16X2X8 0
michael@0 185 PROCESS_16X2X8 0
michael@0 186 PROCESS_16X2X8 0
michael@0 187
michael@0 188 mov rdi, arg(4) ;Results
michael@0 189 movdqa XMMWORD PTR [rdi], xmm1
michael@0 190
michael@0 191 ; begin epilog
michael@0 192 pop rdi
michael@0 193 pop rsi
michael@0 194 UNSHADOW_ARGS
michael@0 195 pop rbp
michael@0 196 ret
michael@0 197
michael@0 198
michael@0 199 ;void vp8_sad16x8x8_sse4(
michael@0 200 ; const unsigned char *src_ptr,
michael@0 201 ; int src_stride,
michael@0 202 ; const unsigned char *ref_ptr,
michael@0 203 ; int ref_stride,
michael@0 204 ; unsigned short *sad_array
michael@0 205 ;);
michael@0 206 global sym(vp8_sad16x8x8_sse4) PRIVATE
michael@0 207 sym(vp8_sad16x8x8_sse4):
michael@0 208 push rbp
michael@0 209 mov rbp, rsp
michael@0 210 SHADOW_ARGS_TO_STACK 5
michael@0 211 push rsi
michael@0 212 push rdi
michael@0 213 ; end prolog
michael@0 214
michael@0 215 mov rsi, arg(0) ;src_ptr
michael@0 216 mov rdi, arg(2) ;ref_ptr
michael@0 217
michael@0 218 movsxd rax, dword ptr arg(1) ;src_stride
michael@0 219 movsxd rdx, dword ptr arg(3) ;ref_stride
michael@0 220
michael@0 221 PROCESS_16X2X8 1
michael@0 222 PROCESS_16X2X8 0
michael@0 223 PROCESS_16X2X8 0
michael@0 224 PROCESS_16X2X8 0
michael@0 225
michael@0 226 mov rdi, arg(4) ;Results
michael@0 227 movdqa XMMWORD PTR [rdi], xmm1
michael@0 228
michael@0 229 ; begin epilog
michael@0 230 pop rdi
michael@0 231 pop rsi
michael@0 232 UNSHADOW_ARGS
michael@0 233 pop rbp
michael@0 234 ret
michael@0 235
michael@0 236
michael@0 237 ;void vp8_sad8x8x8_sse4(
michael@0 238 ; const unsigned char *src_ptr,
michael@0 239 ; int src_stride,
michael@0 240 ; const unsigned char *ref_ptr,
michael@0 241 ; int ref_stride,
michael@0 242 ; unsigned short *sad_array
michael@0 243 ;);
michael@0 244 global sym(vp8_sad8x8x8_sse4) PRIVATE
michael@0 245 sym(vp8_sad8x8x8_sse4):
michael@0 246 push rbp
michael@0 247 mov rbp, rsp
michael@0 248 SHADOW_ARGS_TO_STACK 5
michael@0 249 push rsi
michael@0 250 push rdi
michael@0 251 ; end prolog
michael@0 252
michael@0 253 mov rsi, arg(0) ;src_ptr
michael@0 254 mov rdi, arg(2) ;ref_ptr
michael@0 255
michael@0 256 movsxd rax, dword ptr arg(1) ;src_stride
michael@0 257 movsxd rdx, dword ptr arg(3) ;ref_stride
michael@0 258
michael@0 259 PROCESS_8X2X8 1
michael@0 260 PROCESS_8X2X8 0
michael@0 261 PROCESS_8X2X8 0
michael@0 262 PROCESS_8X2X8 0
michael@0 263
michael@0 264 mov rdi, arg(4) ;Results
michael@0 265 movdqa XMMWORD PTR [rdi], xmm1
michael@0 266
michael@0 267 ; begin epilog
michael@0 268 pop rdi
michael@0 269 pop rsi
michael@0 270 UNSHADOW_ARGS
michael@0 271 pop rbp
michael@0 272 ret
michael@0 273
michael@0 274
michael@0 275 ;void vp8_sad8x16x8_sse4(
michael@0 276 ; const unsigned char *src_ptr,
michael@0 277 ; int src_stride,
michael@0 278 ; const unsigned char *ref_ptr,
michael@0 279 ; int ref_stride,
michael@0 280 ; unsigned short *sad_array
michael@0 281 ;);
michael@0 282 global sym(vp8_sad8x16x8_sse4) PRIVATE
michael@0 283 sym(vp8_sad8x16x8_sse4):
michael@0 284 push rbp
michael@0 285 mov rbp, rsp
michael@0 286 SHADOW_ARGS_TO_STACK 5
michael@0 287 push rsi
michael@0 288 push rdi
michael@0 289 ; end prolog
michael@0 290
michael@0 291 mov rsi, arg(0) ;src_ptr
michael@0 292 mov rdi, arg(2) ;ref_ptr
michael@0 293
michael@0 294 movsxd rax, dword ptr arg(1) ;src_stride
michael@0 295 movsxd rdx, dword ptr arg(3) ;ref_stride
michael@0 296
michael@0 297 PROCESS_8X2X8 1
michael@0 298 PROCESS_8X2X8 0
michael@0 299 PROCESS_8X2X8 0
michael@0 300 PROCESS_8X2X8 0
michael@0 301 PROCESS_8X2X8 0
michael@0 302 PROCESS_8X2X8 0
michael@0 303 PROCESS_8X2X8 0
michael@0 304 PROCESS_8X2X8 0
michael@0 305 mov rdi, arg(4) ;Results
michael@0 306 movdqa XMMWORD PTR [rdi], xmm1
michael@0 307
michael@0 308 ; begin epilog
michael@0 309 pop rdi
michael@0 310 pop rsi
michael@0 311 UNSHADOW_ARGS
michael@0 312 pop rbp
michael@0 313 ret
michael@0 314
michael@0 315
michael@0 316 ;void vp8_sad4x4x8_c(
michael@0 317 ; const unsigned char *src_ptr,
michael@0 318 ; int src_stride,
michael@0 319 ; const unsigned char *ref_ptr,
michael@0 320 ; int ref_stride,
michael@0 321 ; unsigned short *sad_array
michael@0 322 ;);
michael@0 323 global sym(vp8_sad4x4x8_sse4) PRIVATE
michael@0 324 sym(vp8_sad4x4x8_sse4):
michael@0 325 push rbp
michael@0 326 mov rbp, rsp
michael@0 327 SHADOW_ARGS_TO_STACK 5
michael@0 328 push rsi
michael@0 329 push rdi
michael@0 330 ; end prolog
michael@0 331
michael@0 332 mov rsi, arg(0) ;src_ptr
michael@0 333 mov rdi, arg(2) ;ref_ptr
michael@0 334
michael@0 335 movsxd rax, dword ptr arg(1) ;src_stride
michael@0 336 movsxd rdx, dword ptr arg(3) ;ref_stride
michael@0 337
michael@0 338 PROCESS_4X2X8 1
michael@0 339 PROCESS_4X2X8 0
michael@0 340
michael@0 341 mov rdi, arg(4) ;Results
michael@0 342 movdqa XMMWORD PTR [rdi], xmm1
michael@0 343
michael@0 344 ; begin epilog
michael@0 345 pop rdi
michael@0 346 pop rsi
michael@0 347 UNSHADOW_ARGS
michael@0 348 pop rbp
michael@0 349 ret
michael@0 350
michael@0 351
michael@0 352
michael@0 353

mercurial