Thu, 15 Jan 2015 15:59:08 +0100
Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
michael@0 | 1 | ; |
michael@0 | 2 | ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
michael@0 | 3 | ; |
michael@0 | 4 | ; Use of this source code is governed by a BSD-style license |
michael@0 | 5 | ; that can be found in the LICENSE file in the root of the source |
michael@0 | 6 | ; tree. An additional intellectual property rights grant can be found |
michael@0 | 7 | ; in the file PATENTS. All contributing project authors may |
michael@0 | 8 | ; be found in the AUTHORS file in the root of the source tree. |
michael@0 | 9 | ; |
michael@0 | 10 | |
michael@0 | 11 | %include "vpx_ports/x86_abi_support.asm" |
michael@0 | 12 | |
michael@0 | 13 | %macro STACK_FRAME_CREATE_X3 0 |
michael@0 | 14 | %if ABI_IS_32BIT |
michael@0 | 15 | %define src_ptr rsi |
michael@0 | 16 | %define src_stride rax |
michael@0 | 17 | %define ref_ptr rdi |
michael@0 | 18 | %define ref_stride rdx |
michael@0 | 19 | %define end_ptr rcx |
michael@0 | 20 | %define ret_var rbx |
michael@0 | 21 | %define result_ptr arg(4) |
michael@0 | 22 | %define max_sad arg(4) |
michael@0 | 23 | %define height dword ptr arg(4) |
michael@0 | 24 | push rbp |
michael@0 | 25 | mov rbp, rsp |
michael@0 | 26 | push rsi |
michael@0 | 27 | push rdi |
michael@0 | 28 | push rbx |
michael@0 | 29 | |
michael@0 | 30 | mov rsi, arg(0) ; src_ptr |
michael@0 | 31 | mov rdi, arg(2) ; ref_ptr |
michael@0 | 32 | |
michael@0 | 33 | movsxd rax, dword ptr arg(1) ; src_stride |
michael@0 | 34 | movsxd rdx, dword ptr arg(3) ; ref_stride |
michael@0 | 35 | %else |
michael@0 | 36 | %if LIBVPX_YASM_WIN64 |
michael@0 | 37 | SAVE_XMM 7, u |
michael@0 | 38 | %define src_ptr rcx |
michael@0 | 39 | %define src_stride rdx |
michael@0 | 40 | %define ref_ptr r8 |
michael@0 | 41 | %define ref_stride r9 |
michael@0 | 42 | %define end_ptr r10 |
michael@0 | 43 | %define ret_var r11 |
michael@0 | 44 | %define result_ptr [rsp+xmm_stack_space+8+4*8] |
michael@0 | 45 | %define max_sad [rsp+xmm_stack_space+8+4*8] |
michael@0 | 46 | %define height dword ptr [rsp+xmm_stack_space+8+4*8] |
michael@0 | 47 | %else |
michael@0 | 48 | %define src_ptr rdi |
michael@0 | 49 | %define src_stride rsi |
michael@0 | 50 | %define ref_ptr rdx |
michael@0 | 51 | %define ref_stride rcx |
michael@0 | 52 | %define end_ptr r9 |
michael@0 | 53 | %define ret_var r10 |
michael@0 | 54 | %define result_ptr r8 |
michael@0 | 55 | %define max_sad r8 |
michael@0 | 56 | %define height r8 |
michael@0 | 57 | %endif |
michael@0 | 58 | %endif |
michael@0 | 59 | |
michael@0 | 60 | %endmacro |
michael@0 | 61 | |
michael@0 | 62 | %macro STACK_FRAME_DESTROY_X3 0 |
michael@0 | 63 | %define src_ptr |
michael@0 | 64 | %define src_stride |
michael@0 | 65 | %define ref_ptr |
michael@0 | 66 | %define ref_stride |
michael@0 | 67 | %define end_ptr |
michael@0 | 68 | %define ret_var |
michael@0 | 69 | %define result_ptr |
michael@0 | 70 | %define max_sad |
michael@0 | 71 | %define height |
michael@0 | 72 | |
michael@0 | 73 | %if ABI_IS_32BIT |
michael@0 | 74 | pop rbx |
michael@0 | 75 | pop rdi |
michael@0 | 76 | pop rsi |
michael@0 | 77 | pop rbp |
michael@0 | 78 | %else |
michael@0 | 79 | %if LIBVPX_YASM_WIN64 |
michael@0 | 80 | RESTORE_XMM |
michael@0 | 81 | %endif |
michael@0 | 82 | %endif |
michael@0 | 83 | ret |
michael@0 | 84 | %endmacro |
michael@0 | 85 | |
michael@0 | 86 | %macro STACK_FRAME_CREATE_X4 0 |
michael@0 | 87 | %if ABI_IS_32BIT |
michael@0 | 88 | %define src_ptr rsi |
michael@0 | 89 | %define src_stride rax |
michael@0 | 90 | %define r0_ptr rcx |
michael@0 | 91 | %define r1_ptr rdx |
michael@0 | 92 | %define r2_ptr rbx |
michael@0 | 93 | %define r3_ptr rdi |
michael@0 | 94 | %define ref_stride rbp |
michael@0 | 95 | %define result_ptr arg(4) |
michael@0 | 96 | push rbp |
michael@0 | 97 | mov rbp, rsp |
michael@0 | 98 | push rsi |
michael@0 | 99 | push rdi |
michael@0 | 100 | push rbx |
michael@0 | 101 | |
michael@0 | 102 | push rbp |
michael@0 | 103 | mov rdi, arg(2) ; ref_ptr_base |
michael@0 | 104 | |
michael@0 | 105 | LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi |
michael@0 | 106 | |
michael@0 | 107 | mov rsi, arg(0) ; src_ptr |
michael@0 | 108 | |
michael@0 | 109 | movsxd rbx, dword ptr arg(1) ; src_stride |
michael@0 | 110 | movsxd rbp, dword ptr arg(3) ; ref_stride |
michael@0 | 111 | |
michael@0 | 112 | xchg rbx, rax |
michael@0 | 113 | %else |
michael@0 | 114 | %if LIBVPX_YASM_WIN64 |
michael@0 | 115 | SAVE_XMM 7, u |
michael@0 | 116 | %define src_ptr rcx |
michael@0 | 117 | %define src_stride rdx |
michael@0 | 118 | %define r0_ptr rsi |
michael@0 | 119 | %define r1_ptr r10 |
michael@0 | 120 | %define r2_ptr r11 |
michael@0 | 121 | %define r3_ptr r8 |
michael@0 | 122 | %define ref_stride r9 |
michael@0 | 123 | %define result_ptr [rsp+xmm_stack_space+16+4*8] |
michael@0 | 124 | push rsi |
michael@0 | 125 | |
michael@0 | 126 | LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr |
michael@0 | 127 | %else |
michael@0 | 128 | %define src_ptr rdi |
michael@0 | 129 | %define src_stride rsi |
michael@0 | 130 | %define r0_ptr r9 |
michael@0 | 131 | %define r1_ptr r10 |
michael@0 | 132 | %define r2_ptr r11 |
michael@0 | 133 | %define r3_ptr rdx |
michael@0 | 134 | %define ref_stride rcx |
michael@0 | 135 | %define result_ptr r8 |
michael@0 | 136 | |
michael@0 | 137 | LOAD_X4_ADDRESSES rdx, r0_ptr, r1_ptr, r2_ptr, r3_ptr |
michael@0 | 138 | |
michael@0 | 139 | %endif |
michael@0 | 140 | %endif |
michael@0 | 141 | %endmacro |
michael@0 | 142 | |
michael@0 | 143 | %macro STACK_FRAME_DESTROY_X4 0 |
michael@0 | 144 | %define src_ptr |
michael@0 | 145 | %define src_stride |
michael@0 | 146 | %define r0_ptr |
michael@0 | 147 | %define r1_ptr |
michael@0 | 148 | %define r2_ptr |
michael@0 | 149 | %define r3_ptr |
michael@0 | 150 | %define ref_stride |
michael@0 | 151 | %define result_ptr |
michael@0 | 152 | |
michael@0 | 153 | %if ABI_IS_32BIT |
michael@0 | 154 | pop rbx |
michael@0 | 155 | pop rdi |
michael@0 | 156 | pop rsi |
michael@0 | 157 | pop rbp |
michael@0 | 158 | %else |
michael@0 | 159 | %if LIBVPX_YASM_WIN64 |
michael@0 | 160 | pop rsi |
michael@0 | 161 | RESTORE_XMM |
michael@0 | 162 | %endif |
michael@0 | 163 | %endif |
michael@0 | 164 | ret |
michael@0 | 165 | %endmacro |
michael@0 | 166 | |
michael@0 | 167 | %macro PROCESS_16X2X3 5 |
michael@0 | 168 | %if %1==0 |
michael@0 | 169 | movdqa xmm0, XMMWORD PTR [%2] |
michael@0 | 170 | lddqu xmm5, XMMWORD PTR [%3] |
michael@0 | 171 | lddqu xmm6, XMMWORD PTR [%3+1] |
michael@0 | 172 | lddqu xmm7, XMMWORD PTR [%3+2] |
michael@0 | 173 | |
michael@0 | 174 | psadbw xmm5, xmm0 |
michael@0 | 175 | psadbw xmm6, xmm0 |
michael@0 | 176 | psadbw xmm7, xmm0 |
michael@0 | 177 | %else |
michael@0 | 178 | movdqa xmm0, XMMWORD PTR [%2] |
michael@0 | 179 | lddqu xmm1, XMMWORD PTR [%3] |
michael@0 | 180 | lddqu xmm2, XMMWORD PTR [%3+1] |
michael@0 | 181 | lddqu xmm3, XMMWORD PTR [%3+2] |
michael@0 | 182 | |
michael@0 | 183 | psadbw xmm1, xmm0 |
michael@0 | 184 | psadbw xmm2, xmm0 |
michael@0 | 185 | psadbw xmm3, xmm0 |
michael@0 | 186 | |
michael@0 | 187 | paddw xmm5, xmm1 |
michael@0 | 188 | paddw xmm6, xmm2 |
michael@0 | 189 | paddw xmm7, xmm3 |
michael@0 | 190 | %endif |
michael@0 | 191 | movdqa xmm0, XMMWORD PTR [%2+%4] |
michael@0 | 192 | lddqu xmm1, XMMWORD PTR [%3+%5] |
michael@0 | 193 | lddqu xmm2, XMMWORD PTR [%3+%5+1] |
michael@0 | 194 | lddqu xmm3, XMMWORD PTR [%3+%5+2] |
michael@0 | 195 | |
michael@0 | 196 | %if %1==0 || %1==1 |
michael@0 | 197 | lea %2, [%2+%4*2] |
michael@0 | 198 | lea %3, [%3+%5*2] |
michael@0 | 199 | %endif |
michael@0 | 200 | |
michael@0 | 201 | psadbw xmm1, xmm0 |
michael@0 | 202 | psadbw xmm2, xmm0 |
michael@0 | 203 | psadbw xmm3, xmm0 |
michael@0 | 204 | |
michael@0 | 205 | paddw xmm5, xmm1 |
michael@0 | 206 | paddw xmm6, xmm2 |
michael@0 | 207 | paddw xmm7, xmm3 |
michael@0 | 208 | %endmacro |
michael@0 | 209 | |
michael@0 | 210 | %macro PROCESS_8X2X3 5 |
michael@0 | 211 | %if %1==0 |
michael@0 | 212 | movq mm0, QWORD PTR [%2] |
michael@0 | 213 | movq mm5, QWORD PTR [%3] |
michael@0 | 214 | movq mm6, QWORD PTR [%3+1] |
michael@0 | 215 | movq mm7, QWORD PTR [%3+2] |
michael@0 | 216 | |
michael@0 | 217 | psadbw mm5, mm0 |
michael@0 | 218 | psadbw mm6, mm0 |
michael@0 | 219 | psadbw mm7, mm0 |
michael@0 | 220 | %else |
michael@0 | 221 | movq mm0, QWORD PTR [%2] |
michael@0 | 222 | movq mm1, QWORD PTR [%3] |
michael@0 | 223 | movq mm2, QWORD PTR [%3+1] |
michael@0 | 224 | movq mm3, QWORD PTR [%3+2] |
michael@0 | 225 | |
michael@0 | 226 | psadbw mm1, mm0 |
michael@0 | 227 | psadbw mm2, mm0 |
michael@0 | 228 | psadbw mm3, mm0 |
michael@0 | 229 | |
michael@0 | 230 | paddw mm5, mm1 |
michael@0 | 231 | paddw mm6, mm2 |
michael@0 | 232 | paddw mm7, mm3 |
michael@0 | 233 | %endif |
michael@0 | 234 | movq mm0, QWORD PTR [%2+%4] |
michael@0 | 235 | movq mm1, QWORD PTR [%3+%5] |
michael@0 | 236 | movq mm2, QWORD PTR [%3+%5+1] |
michael@0 | 237 | movq mm3, QWORD PTR [%3+%5+2] |
michael@0 | 238 | |
michael@0 | 239 | %if %1==0 || %1==1 |
michael@0 | 240 | lea %2, [%2+%4*2] |
michael@0 | 241 | lea %3, [%3+%5*2] |
michael@0 | 242 | %endif |
michael@0 | 243 | |
michael@0 | 244 | psadbw mm1, mm0 |
michael@0 | 245 | psadbw mm2, mm0 |
michael@0 | 246 | psadbw mm3, mm0 |
michael@0 | 247 | |
michael@0 | 248 | paddw mm5, mm1 |
michael@0 | 249 | paddw mm6, mm2 |
michael@0 | 250 | paddw mm7, mm3 |
michael@0 | 251 | %endmacro |
michael@0 | 252 | |
michael@0 | 253 | %macro LOAD_X4_ADDRESSES 5 |
michael@0 | 254 | mov %2, [%1+REG_SZ_BYTES*0] |
michael@0 | 255 | mov %3, [%1+REG_SZ_BYTES*1] |
michael@0 | 256 | |
michael@0 | 257 | mov %4, [%1+REG_SZ_BYTES*2] |
michael@0 | 258 | mov %5, [%1+REG_SZ_BYTES*3] |
michael@0 | 259 | %endmacro |
michael@0 | 260 | |
michael@0 | 261 | %macro PROCESS_16X2X4 8 |
michael@0 | 262 | %if %1==0 |
michael@0 | 263 | movdqa xmm0, XMMWORD PTR [%2] |
michael@0 | 264 | lddqu xmm4, XMMWORD PTR [%3] |
michael@0 | 265 | lddqu xmm5, XMMWORD PTR [%4] |
michael@0 | 266 | lddqu xmm6, XMMWORD PTR [%5] |
michael@0 | 267 | lddqu xmm7, XMMWORD PTR [%6] |
michael@0 | 268 | |
michael@0 | 269 | psadbw xmm4, xmm0 |
michael@0 | 270 | psadbw xmm5, xmm0 |
michael@0 | 271 | psadbw xmm6, xmm0 |
michael@0 | 272 | psadbw xmm7, xmm0 |
michael@0 | 273 | %else |
michael@0 | 274 | movdqa xmm0, XMMWORD PTR [%2] |
michael@0 | 275 | lddqu xmm1, XMMWORD PTR [%3] |
michael@0 | 276 | lddqu xmm2, XMMWORD PTR [%4] |
michael@0 | 277 | lddqu xmm3, XMMWORD PTR [%5] |
michael@0 | 278 | |
michael@0 | 279 | psadbw xmm1, xmm0 |
michael@0 | 280 | psadbw xmm2, xmm0 |
michael@0 | 281 | psadbw xmm3, xmm0 |
michael@0 | 282 | |
michael@0 | 283 | paddw xmm4, xmm1 |
michael@0 | 284 | lddqu xmm1, XMMWORD PTR [%6] |
michael@0 | 285 | paddw xmm5, xmm2 |
michael@0 | 286 | paddw xmm6, xmm3 |
michael@0 | 287 | |
michael@0 | 288 | psadbw xmm1, xmm0 |
michael@0 | 289 | paddw xmm7, xmm1 |
michael@0 | 290 | %endif |
michael@0 | 291 | movdqa xmm0, XMMWORD PTR [%2+%7] |
michael@0 | 292 | lddqu xmm1, XMMWORD PTR [%3+%8] |
michael@0 | 293 | lddqu xmm2, XMMWORD PTR [%4+%8] |
michael@0 | 294 | lddqu xmm3, XMMWORD PTR [%5+%8] |
michael@0 | 295 | |
michael@0 | 296 | psadbw xmm1, xmm0 |
michael@0 | 297 | psadbw xmm2, xmm0 |
michael@0 | 298 | psadbw xmm3, xmm0 |
michael@0 | 299 | |
michael@0 | 300 | paddw xmm4, xmm1 |
michael@0 | 301 | lddqu xmm1, XMMWORD PTR [%6+%8] |
michael@0 | 302 | paddw xmm5, xmm2 |
michael@0 | 303 | paddw xmm6, xmm3 |
michael@0 | 304 | |
michael@0 | 305 | %if %1==0 || %1==1 |
michael@0 | 306 | lea %2, [%2+%7*2] |
michael@0 | 307 | lea %3, [%3+%8*2] |
michael@0 | 308 | |
michael@0 | 309 | lea %4, [%4+%8*2] |
michael@0 | 310 | lea %5, [%5+%8*2] |
michael@0 | 311 | |
michael@0 | 312 | lea %6, [%6+%8*2] |
michael@0 | 313 | %endif |
michael@0 | 314 | psadbw xmm1, xmm0 |
michael@0 | 315 | paddw xmm7, xmm1 |
michael@0 | 316 | |
michael@0 | 317 | %endmacro |
michael@0 | 318 | |
michael@0 | 319 | %macro PROCESS_8X2X4 8 |
michael@0 | 320 | %if %1==0 |
michael@0 | 321 | movq mm0, QWORD PTR [%2] |
michael@0 | 322 | movq mm4, QWORD PTR [%3] |
michael@0 | 323 | movq mm5, QWORD PTR [%4] |
michael@0 | 324 | movq mm6, QWORD PTR [%5] |
michael@0 | 325 | movq mm7, QWORD PTR [%6] |
michael@0 | 326 | |
michael@0 | 327 | psadbw mm4, mm0 |
michael@0 | 328 | psadbw mm5, mm0 |
michael@0 | 329 | psadbw mm6, mm0 |
michael@0 | 330 | psadbw mm7, mm0 |
michael@0 | 331 | %else |
michael@0 | 332 | movq mm0, QWORD PTR [%2] |
michael@0 | 333 | movq mm1, QWORD PTR [%3] |
michael@0 | 334 | movq mm2, QWORD PTR [%4] |
michael@0 | 335 | movq mm3, QWORD PTR [%5] |
michael@0 | 336 | |
michael@0 | 337 | psadbw mm1, mm0 |
michael@0 | 338 | psadbw mm2, mm0 |
michael@0 | 339 | psadbw mm3, mm0 |
michael@0 | 340 | |
michael@0 | 341 | paddw mm4, mm1 |
michael@0 | 342 | movq mm1, QWORD PTR [%6] |
michael@0 | 343 | paddw mm5, mm2 |
michael@0 | 344 | paddw mm6, mm3 |
michael@0 | 345 | |
michael@0 | 346 | psadbw mm1, mm0 |
michael@0 | 347 | paddw mm7, mm1 |
michael@0 | 348 | %endif |
michael@0 | 349 | movq mm0, QWORD PTR [%2+%7] |
michael@0 | 350 | movq mm1, QWORD PTR [%3+%8] |
michael@0 | 351 | movq mm2, QWORD PTR [%4+%8] |
michael@0 | 352 | movq mm3, QWORD PTR [%5+%8] |
michael@0 | 353 | |
michael@0 | 354 | psadbw mm1, mm0 |
michael@0 | 355 | psadbw mm2, mm0 |
michael@0 | 356 | psadbw mm3, mm0 |
michael@0 | 357 | |
michael@0 | 358 | paddw mm4, mm1 |
michael@0 | 359 | movq mm1, QWORD PTR [%6+%8] |
michael@0 | 360 | paddw mm5, mm2 |
michael@0 | 361 | paddw mm6, mm3 |
michael@0 | 362 | |
michael@0 | 363 | %if %1==0 || %1==1 |
michael@0 | 364 | lea %2, [%2+%7*2] |
michael@0 | 365 | lea %3, [%3+%8*2] |
michael@0 | 366 | |
michael@0 | 367 | lea %4, [%4+%8*2] |
michael@0 | 368 | lea %5, [%5+%8*2] |
michael@0 | 369 | |
michael@0 | 370 | lea %6, [%6+%8*2] |
michael@0 | 371 | %endif |
michael@0 | 372 | psadbw mm1, mm0 |
michael@0 | 373 | paddw mm7, mm1 |
michael@0 | 374 | |
michael@0 | 375 | %endmacro |
michael@0 | 376 | |
michael@0 | 377 | ;void int vp8_sad16x16x3_sse3( |
michael@0 | 378 | ; unsigned char *src_ptr, |
michael@0 | 379 | ; int src_stride, |
michael@0 | 380 | ; unsigned char *ref_ptr, |
michael@0 | 381 | ; int ref_stride, |
michael@0 | 382 | ; int *results) |
michael@0 | 383 | global sym(vp8_sad16x16x3_sse3) PRIVATE |
michael@0 | 384 | sym(vp8_sad16x16x3_sse3): |
michael@0 | 385 | |
michael@0 | 386 | STACK_FRAME_CREATE_X3 |
michael@0 | 387 | |
michael@0 | 388 | PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 389 | PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 390 | PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 391 | PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 392 | PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 393 | PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 394 | PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 395 | PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 396 | |
michael@0 | 397 | mov rcx, result_ptr |
michael@0 | 398 | |
michael@0 | 399 | movq xmm0, xmm5 |
michael@0 | 400 | psrldq xmm5, 8 |
michael@0 | 401 | |
michael@0 | 402 | paddw xmm0, xmm5 |
michael@0 | 403 | movd [rcx], xmm0 |
michael@0 | 404 | ;- |
michael@0 | 405 | movq xmm0, xmm6 |
michael@0 | 406 | psrldq xmm6, 8 |
michael@0 | 407 | |
michael@0 | 408 | paddw xmm0, xmm6 |
michael@0 | 409 | movd [rcx+4], xmm0 |
michael@0 | 410 | ;- |
michael@0 | 411 | movq xmm0, xmm7 |
michael@0 | 412 | psrldq xmm7, 8 |
michael@0 | 413 | |
michael@0 | 414 | paddw xmm0, xmm7 |
michael@0 | 415 | movd [rcx+8], xmm0 |
michael@0 | 416 | |
michael@0 | 417 | STACK_FRAME_DESTROY_X3 |
michael@0 | 418 | |
michael@0 | 419 | ;void int vp8_sad16x8x3_sse3( |
michael@0 | 420 | ; unsigned char *src_ptr, |
michael@0 | 421 | ; int src_stride, |
michael@0 | 422 | ; unsigned char *ref_ptr, |
michael@0 | 423 | ; int ref_stride, |
michael@0 | 424 | ; int *results) |
michael@0 | 425 | global sym(vp8_sad16x8x3_sse3) PRIVATE |
michael@0 | 426 | sym(vp8_sad16x8x3_sse3): |
michael@0 | 427 | |
michael@0 | 428 | STACK_FRAME_CREATE_X3 |
michael@0 | 429 | |
michael@0 | 430 | PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 431 | PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 432 | PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 433 | PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 434 | |
michael@0 | 435 | mov rcx, result_ptr |
michael@0 | 436 | |
michael@0 | 437 | movq xmm0, xmm5 |
michael@0 | 438 | psrldq xmm5, 8 |
michael@0 | 439 | |
michael@0 | 440 | paddw xmm0, xmm5 |
michael@0 | 441 | movd [rcx], xmm0 |
michael@0 | 442 | ;- |
michael@0 | 443 | movq xmm0, xmm6 |
michael@0 | 444 | psrldq xmm6, 8 |
michael@0 | 445 | |
michael@0 | 446 | paddw xmm0, xmm6 |
michael@0 | 447 | movd [rcx+4], xmm0 |
michael@0 | 448 | ;- |
michael@0 | 449 | movq xmm0, xmm7 |
michael@0 | 450 | psrldq xmm7, 8 |
michael@0 | 451 | |
michael@0 | 452 | paddw xmm0, xmm7 |
michael@0 | 453 | movd [rcx+8], xmm0 |
michael@0 | 454 | |
michael@0 | 455 | STACK_FRAME_DESTROY_X3 |
michael@0 | 456 | |
michael@0 | 457 | ;void int vp8_sad8x16x3_sse3( |
michael@0 | 458 | ; unsigned char *src_ptr, |
michael@0 | 459 | ; int src_stride, |
michael@0 | 460 | ; unsigned char *ref_ptr, |
michael@0 | 461 | ; int ref_stride, |
michael@0 | 462 | ; int *results) |
michael@0 | 463 | global sym(vp8_sad8x16x3_sse3) PRIVATE |
michael@0 | 464 | sym(vp8_sad8x16x3_sse3): |
michael@0 | 465 | |
michael@0 | 466 | STACK_FRAME_CREATE_X3 |
michael@0 | 467 | |
michael@0 | 468 | PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 469 | PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 470 | PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 471 | PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 472 | PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 473 | PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 474 | PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 475 | PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 476 | |
michael@0 | 477 | mov rcx, result_ptr |
michael@0 | 478 | |
michael@0 | 479 | punpckldq mm5, mm6 |
michael@0 | 480 | |
michael@0 | 481 | movq [rcx], mm5 |
michael@0 | 482 | movd [rcx+8], mm7 |
michael@0 | 483 | |
michael@0 | 484 | STACK_FRAME_DESTROY_X3 |
michael@0 | 485 | |
michael@0 | 486 | ;void int vp8_sad8x8x3_sse3( |
michael@0 | 487 | ; unsigned char *src_ptr, |
michael@0 | 488 | ; int src_stride, |
michael@0 | 489 | ; unsigned char *ref_ptr, |
michael@0 | 490 | ; int ref_stride, |
michael@0 | 491 | ; int *results) |
michael@0 | 492 | global sym(vp8_sad8x8x3_sse3) PRIVATE |
michael@0 | 493 | sym(vp8_sad8x8x3_sse3): |
michael@0 | 494 | |
michael@0 | 495 | STACK_FRAME_CREATE_X3 |
michael@0 | 496 | |
michael@0 | 497 | PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 498 | PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 499 | PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 500 | PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride |
michael@0 | 501 | |
michael@0 | 502 | mov rcx, result_ptr |
michael@0 | 503 | |
michael@0 | 504 | punpckldq mm5, mm6 |
michael@0 | 505 | |
michael@0 | 506 | movq [rcx], mm5 |
michael@0 | 507 | movd [rcx+8], mm7 |
michael@0 | 508 | |
michael@0 | 509 | STACK_FRAME_DESTROY_X3 |
michael@0 | 510 | |
michael@0 | 511 | ;void int vp8_sad4x4x3_sse3( |
michael@0 | 512 | ; unsigned char *src_ptr, |
michael@0 | 513 | ; int src_stride, |
michael@0 | 514 | ; unsigned char *ref_ptr, |
michael@0 | 515 | ; int ref_stride, |
michael@0 | 516 | ; int *results) |
michael@0 | 517 | global sym(vp8_sad4x4x3_sse3) PRIVATE |
michael@0 | 518 | sym(vp8_sad4x4x3_sse3): |
michael@0 | 519 | |
michael@0 | 520 | STACK_FRAME_CREATE_X3 |
michael@0 | 521 | |
michael@0 | 522 | movd mm0, DWORD PTR [src_ptr] |
michael@0 | 523 | movd mm1, DWORD PTR [ref_ptr] |
michael@0 | 524 | |
michael@0 | 525 | movd mm2, DWORD PTR [src_ptr+src_stride] |
michael@0 | 526 | movd mm3, DWORD PTR [ref_ptr+ref_stride] |
michael@0 | 527 | |
michael@0 | 528 | punpcklbw mm0, mm2 |
michael@0 | 529 | punpcklbw mm1, mm3 |
michael@0 | 530 | |
michael@0 | 531 | movd mm4, DWORD PTR [ref_ptr+1] |
michael@0 | 532 | movd mm5, DWORD PTR [ref_ptr+2] |
michael@0 | 533 | |
michael@0 | 534 | movd mm2, DWORD PTR [ref_ptr+ref_stride+1] |
michael@0 | 535 | movd mm3, DWORD PTR [ref_ptr+ref_stride+2] |
michael@0 | 536 | |
michael@0 | 537 | psadbw mm1, mm0 |
michael@0 | 538 | |
michael@0 | 539 | punpcklbw mm4, mm2 |
michael@0 | 540 | punpcklbw mm5, mm3 |
michael@0 | 541 | |
michael@0 | 542 | psadbw mm4, mm0 |
michael@0 | 543 | psadbw mm5, mm0 |
michael@0 | 544 | |
michael@0 | 545 | lea src_ptr, [src_ptr+src_stride*2] |
michael@0 | 546 | lea ref_ptr, [ref_ptr+ref_stride*2] |
michael@0 | 547 | |
michael@0 | 548 | movd mm0, DWORD PTR [src_ptr] |
michael@0 | 549 | movd mm2, DWORD PTR [ref_ptr] |
michael@0 | 550 | |
michael@0 | 551 | movd mm3, DWORD PTR [src_ptr+src_stride] |
michael@0 | 552 | movd mm6, DWORD PTR [ref_ptr+ref_stride] |
michael@0 | 553 | |
michael@0 | 554 | punpcklbw mm0, mm3 |
michael@0 | 555 | punpcklbw mm2, mm6 |
michael@0 | 556 | |
michael@0 | 557 | movd mm3, DWORD PTR [ref_ptr+1] |
michael@0 | 558 | movd mm7, DWORD PTR [ref_ptr+2] |
michael@0 | 559 | |
michael@0 | 560 | psadbw mm2, mm0 |
michael@0 | 561 | |
michael@0 | 562 | paddw mm1, mm2 |
michael@0 | 563 | |
michael@0 | 564 | movd mm2, DWORD PTR [ref_ptr+ref_stride+1] |
michael@0 | 565 | movd mm6, DWORD PTR [ref_ptr+ref_stride+2] |
michael@0 | 566 | |
michael@0 | 567 | punpcklbw mm3, mm2 |
michael@0 | 568 | punpcklbw mm7, mm6 |
michael@0 | 569 | |
michael@0 | 570 | psadbw mm3, mm0 |
michael@0 | 571 | psadbw mm7, mm0 |
michael@0 | 572 | |
michael@0 | 573 | paddw mm3, mm4 |
michael@0 | 574 | paddw mm7, mm5 |
michael@0 | 575 | |
michael@0 | 576 | mov rcx, result_ptr |
michael@0 | 577 | |
michael@0 | 578 | punpckldq mm1, mm3 |
michael@0 | 579 | |
michael@0 | 580 | movq [rcx], mm1 |
michael@0 | 581 | movd [rcx+8], mm7 |
michael@0 | 582 | |
michael@0 | 583 | STACK_FRAME_DESTROY_X3 |
michael@0 | 584 | |
michael@0 | 585 | ;unsigned int vp8_sad16x16_sse3( |
michael@0 | 586 | ; unsigned char *src_ptr, |
michael@0 | 587 | ; int src_stride, |
michael@0 | 588 | ; unsigned char *ref_ptr, |
michael@0 | 589 | ; int ref_stride, |
michael@0 | 590 | ; int max_sad) |
michael@0 | 591 | ;%define lddqu movdqu |
michael@0 | 592 | global sym(vp8_sad16x16_sse3) PRIVATE |
michael@0 | 593 | sym(vp8_sad16x16_sse3): |
michael@0 | 594 | |
michael@0 | 595 | STACK_FRAME_CREATE_X3 |
michael@0 | 596 | |
michael@0 | 597 | mov end_ptr, 4 |
michael@0 | 598 | pxor xmm7, xmm7 |
michael@0 | 599 | |
michael@0 | 600 | .vp8_sad16x16_sse3_loop: |
michael@0 | 601 | movdqa xmm0, XMMWORD PTR [src_ptr] |
michael@0 | 602 | movdqu xmm1, XMMWORD PTR [ref_ptr] |
michael@0 | 603 | movdqa xmm2, XMMWORD PTR [src_ptr+src_stride] |
michael@0 | 604 | movdqu xmm3, XMMWORD PTR [ref_ptr+ref_stride] |
michael@0 | 605 | |
michael@0 | 606 | lea src_ptr, [src_ptr+src_stride*2] |
michael@0 | 607 | lea ref_ptr, [ref_ptr+ref_stride*2] |
michael@0 | 608 | |
michael@0 | 609 | movdqa xmm4, XMMWORD PTR [src_ptr] |
michael@0 | 610 | movdqu xmm5, XMMWORD PTR [ref_ptr] |
michael@0 | 611 | movdqa xmm6, XMMWORD PTR [src_ptr+src_stride] |
michael@0 | 612 | |
michael@0 | 613 | psadbw xmm0, xmm1 |
michael@0 | 614 | |
michael@0 | 615 | movdqu xmm1, XMMWORD PTR [ref_ptr+ref_stride] |
michael@0 | 616 | |
michael@0 | 617 | psadbw xmm2, xmm3 |
michael@0 | 618 | psadbw xmm4, xmm5 |
michael@0 | 619 | psadbw xmm6, xmm1 |
michael@0 | 620 | |
michael@0 | 621 | lea src_ptr, [src_ptr+src_stride*2] |
michael@0 | 622 | lea ref_ptr, [ref_ptr+ref_stride*2] |
michael@0 | 623 | |
michael@0 | 624 | paddw xmm7, xmm0 |
michael@0 | 625 | paddw xmm7, xmm2 |
michael@0 | 626 | paddw xmm7, xmm4 |
michael@0 | 627 | paddw xmm7, xmm6 |
michael@0 | 628 | |
michael@0 | 629 | sub end_ptr, 1 |
michael@0 | 630 | jne .vp8_sad16x16_sse3_loop |
michael@0 | 631 | |
michael@0 | 632 | movq xmm0, xmm7 |
michael@0 | 633 | psrldq xmm7, 8 |
michael@0 | 634 | paddw xmm0, xmm7 |
michael@0 | 635 | movq rax, xmm0 |
michael@0 | 636 | |
michael@0 | 637 | STACK_FRAME_DESTROY_X3 |
michael@0 | 638 | |
michael@0 | 639 | ;void vp8_copy32xn_sse3( |
michael@0 | 640 | ; unsigned char *src_ptr, |
michael@0 | 641 | ; int src_stride, |
michael@0 | 642 | ; unsigned char *dst_ptr, |
michael@0 | 643 | ; int dst_stride, |
michael@0 | 644 | ; int height); |
michael@0 | 645 | global sym(vp8_copy32xn_sse3) PRIVATE |
michael@0 | 646 | sym(vp8_copy32xn_sse3): |
michael@0 | 647 | |
michael@0 | 648 | STACK_FRAME_CREATE_X3 |
michael@0 | 649 | |
michael@0 | 650 | .block_copy_sse3_loopx4: |
michael@0 | 651 | lea end_ptr, [src_ptr+src_stride*2] |
michael@0 | 652 | |
michael@0 | 653 | movdqu xmm0, XMMWORD PTR [src_ptr] |
michael@0 | 654 | movdqu xmm1, XMMWORD PTR [src_ptr + 16] |
michael@0 | 655 | movdqu xmm2, XMMWORD PTR [src_ptr + src_stride] |
michael@0 | 656 | movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16] |
michael@0 | 657 | movdqu xmm4, XMMWORD PTR [end_ptr] |
michael@0 | 658 | movdqu xmm5, XMMWORD PTR [end_ptr + 16] |
michael@0 | 659 | movdqu xmm6, XMMWORD PTR [end_ptr + src_stride] |
michael@0 | 660 | movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16] |
michael@0 | 661 | |
michael@0 | 662 | lea src_ptr, [src_ptr+src_stride*4] |
michael@0 | 663 | |
michael@0 | 664 | lea end_ptr, [ref_ptr+ref_stride*2] |
michael@0 | 665 | |
michael@0 | 666 | movdqa XMMWORD PTR [ref_ptr], xmm0 |
michael@0 | 667 | movdqa XMMWORD PTR [ref_ptr + 16], xmm1 |
michael@0 | 668 | movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2 |
michael@0 | 669 | movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3 |
michael@0 | 670 | movdqa XMMWORD PTR [end_ptr], xmm4 |
michael@0 | 671 | movdqa XMMWORD PTR [end_ptr + 16], xmm5 |
michael@0 | 672 | movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6 |
michael@0 | 673 | movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7 |
michael@0 | 674 | |
michael@0 | 675 | lea ref_ptr, [ref_ptr+ref_stride*4] |
michael@0 | 676 | |
michael@0 | 677 | sub height, 4 |
michael@0 | 678 | cmp height, 4 |
michael@0 | 679 | jge .block_copy_sse3_loopx4 |
michael@0 | 680 | |
michael@0 | 681 | ;Check to see if there is more rows need to be copied. |
michael@0 | 682 | cmp height, 0 |
michael@0 | 683 | je .copy_is_done |
michael@0 | 684 | |
michael@0 | 685 | .block_copy_sse3_loop: |
michael@0 | 686 | movdqu xmm0, XMMWORD PTR [src_ptr] |
michael@0 | 687 | movdqu xmm1, XMMWORD PTR [src_ptr + 16] |
michael@0 | 688 | lea src_ptr, [src_ptr+src_stride] |
michael@0 | 689 | |
michael@0 | 690 | movdqa XMMWORD PTR [ref_ptr], xmm0 |
michael@0 | 691 | movdqa XMMWORD PTR [ref_ptr + 16], xmm1 |
michael@0 | 692 | lea ref_ptr, [ref_ptr+ref_stride] |
michael@0 | 693 | |
michael@0 | 694 | sub height, 1 |
michael@0 | 695 | jne .block_copy_sse3_loop |
michael@0 | 696 | |
michael@0 | 697 | .copy_is_done: |
michael@0 | 698 | STACK_FRAME_DESTROY_X3 |
michael@0 | 699 | |
michael@0 | 700 | ;void vp8_sad16x16x4d_sse3( |
michael@0 | 701 | ; unsigned char *src_ptr, |
michael@0 | 702 | ; int src_stride, |
michael@0 | 703 | ; unsigned char *ref_ptr_base, |
michael@0 | 704 | ; int ref_stride, |
michael@0 | 705 | ; int *results) |
michael@0 | 706 | global sym(vp8_sad16x16x4d_sse3) PRIVATE |
michael@0 | 707 | sym(vp8_sad16x16x4d_sse3): |
michael@0 | 708 | |
michael@0 | 709 | STACK_FRAME_CREATE_X4 |
michael@0 | 710 | |
michael@0 | 711 | PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 712 | PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 713 | PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 714 | PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 715 | PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 716 | PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 717 | PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 718 | PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 719 | |
michael@0 | 720 | %if ABI_IS_32BIT |
michael@0 | 721 | pop rbp |
michael@0 | 722 | %endif |
michael@0 | 723 | mov rcx, result_ptr |
michael@0 | 724 | |
michael@0 | 725 | movq xmm0, xmm4 |
michael@0 | 726 | psrldq xmm4, 8 |
michael@0 | 727 | |
michael@0 | 728 | paddw xmm0, xmm4 |
michael@0 | 729 | movd [rcx], xmm0 |
michael@0 | 730 | ;- |
michael@0 | 731 | movq xmm0, xmm5 |
michael@0 | 732 | psrldq xmm5, 8 |
michael@0 | 733 | |
michael@0 | 734 | paddw xmm0, xmm5 |
michael@0 | 735 | movd [rcx+4], xmm0 |
michael@0 | 736 | ;- |
michael@0 | 737 | movq xmm0, xmm6 |
michael@0 | 738 | psrldq xmm6, 8 |
michael@0 | 739 | |
michael@0 | 740 | paddw xmm0, xmm6 |
michael@0 | 741 | movd [rcx+8], xmm0 |
michael@0 | 742 | ;- |
michael@0 | 743 | movq xmm0, xmm7 |
michael@0 | 744 | psrldq xmm7, 8 |
michael@0 | 745 | |
michael@0 | 746 | paddw xmm0, xmm7 |
michael@0 | 747 | movd [rcx+12], xmm0 |
michael@0 | 748 | |
michael@0 | 749 | STACK_FRAME_DESTROY_X4 |
michael@0 | 750 | |
michael@0 | 751 | ;void vp8_sad16x8x4d_sse3( |
michael@0 | 752 | ; unsigned char *src_ptr, |
michael@0 | 753 | ; int src_stride, |
michael@0 | 754 | ; unsigned char *ref_ptr_base, |
michael@0 | 755 | ; int ref_stride, |
michael@0 | 756 | ; int *results) |
michael@0 | 757 | global sym(vp8_sad16x8x4d_sse3) PRIVATE |
michael@0 | 758 | sym(vp8_sad16x8x4d_sse3): |
michael@0 | 759 | |
michael@0 | 760 | STACK_FRAME_CREATE_X4 |
michael@0 | 761 | |
michael@0 | 762 | PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 763 | PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 764 | PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 765 | PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 766 | |
michael@0 | 767 | %if ABI_IS_32BIT |
michael@0 | 768 | pop rbp |
michael@0 | 769 | %endif |
michael@0 | 770 | mov rcx, result_ptr |
michael@0 | 771 | |
michael@0 | 772 | movq xmm0, xmm4 |
michael@0 | 773 | psrldq xmm4, 8 |
michael@0 | 774 | |
michael@0 | 775 | paddw xmm0, xmm4 |
michael@0 | 776 | movd [rcx], xmm0 |
michael@0 | 777 | ;- |
michael@0 | 778 | movq xmm0, xmm5 |
michael@0 | 779 | psrldq xmm5, 8 |
michael@0 | 780 | |
michael@0 | 781 | paddw xmm0, xmm5 |
michael@0 | 782 | movd [rcx+4], xmm0 |
michael@0 | 783 | ;- |
michael@0 | 784 | movq xmm0, xmm6 |
michael@0 | 785 | psrldq xmm6, 8 |
michael@0 | 786 | |
michael@0 | 787 | paddw xmm0, xmm6 |
michael@0 | 788 | movd [rcx+8], xmm0 |
michael@0 | 789 | ;- |
michael@0 | 790 | movq xmm0, xmm7 |
michael@0 | 791 | psrldq xmm7, 8 |
michael@0 | 792 | |
michael@0 | 793 | paddw xmm0, xmm7 |
michael@0 | 794 | movd [rcx+12], xmm0 |
michael@0 | 795 | |
michael@0 | 796 | STACK_FRAME_DESTROY_X4 |
michael@0 | 797 | |
michael@0 | 798 | ;void int vp8_sad8x16x4d_sse3( |
michael@0 | 799 | ; unsigned char *src_ptr, |
michael@0 | 800 | ; int src_stride, |
michael@0 | 801 | ; unsigned char *ref_ptr, |
michael@0 | 802 | ; int ref_stride, |
michael@0 | 803 | ; int *results) |
michael@0 | 804 | global sym(vp8_sad8x16x4d_sse3) PRIVATE |
michael@0 | 805 | sym(vp8_sad8x16x4d_sse3): |
michael@0 | 806 | |
michael@0 | 807 | STACK_FRAME_CREATE_X4 |
michael@0 | 808 | |
michael@0 | 809 | PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 810 | PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 811 | PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 812 | PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 813 | PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 814 | PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 815 | PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 816 | PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 817 | |
michael@0 | 818 | %if ABI_IS_32BIT |
michael@0 | 819 | pop rbp |
michael@0 | 820 | %endif |
michael@0 | 821 | mov rcx, result_ptr |
michael@0 | 822 | |
michael@0 | 823 | punpckldq mm4, mm5 |
michael@0 | 824 | punpckldq mm6, mm7 |
michael@0 | 825 | |
michael@0 | 826 | movq [rcx], mm4 |
michael@0 | 827 | movq [rcx+8], mm6 |
michael@0 | 828 | |
michael@0 | 829 | STACK_FRAME_DESTROY_X4 |
michael@0 | 830 | |
michael@0 | 831 | ;void int vp8_sad8x8x4d_sse3( |
michael@0 | 832 | ; unsigned char *src_ptr, |
michael@0 | 833 | ; int src_stride, |
michael@0 | 834 | ; unsigned char *ref_ptr, |
michael@0 | 835 | ; int ref_stride, |
michael@0 | 836 | ; int *results) |
michael@0 | 837 | global sym(vp8_sad8x8x4d_sse3) PRIVATE |
michael@0 | 838 | sym(vp8_sad8x8x4d_sse3): |
michael@0 | 839 | |
michael@0 | 840 | STACK_FRAME_CREATE_X4 |
michael@0 | 841 | |
michael@0 | 842 | PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 843 | PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 844 | PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 845 | PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride |
michael@0 | 846 | |
michael@0 | 847 | %if ABI_IS_32BIT |
michael@0 | 848 | pop rbp |
michael@0 | 849 | %endif |
michael@0 | 850 | mov rcx, result_ptr |
michael@0 | 851 | |
michael@0 | 852 | punpckldq mm4, mm5 |
michael@0 | 853 | punpckldq mm6, mm7 |
michael@0 | 854 | |
michael@0 | 855 | movq [rcx], mm4 |
michael@0 | 856 | movq [rcx+8], mm6 |
michael@0 | 857 | |
michael@0 | 858 | STACK_FRAME_DESTROY_X4 |
michael@0 | 859 | |
michael@0 | 860 | ;void int vp8_sad4x4x4d_sse3( |
michael@0 | 861 | ; unsigned char *src_ptr, |
michael@0 | 862 | ; int src_stride, |
michael@0 | 863 | ; unsigned char *ref_ptr, |
michael@0 | 864 | ; int ref_stride, |
michael@0 | 865 | ; int *results) |
michael@0 | 866 | global sym(vp8_sad4x4x4d_sse3) PRIVATE |
michael@0 | 867 | sym(vp8_sad4x4x4d_sse3): |
michael@0 | 868 | |
michael@0 | 869 | STACK_FRAME_CREATE_X4 |
michael@0 | 870 | |
michael@0 | 871 | movd mm0, DWORD PTR [src_ptr] |
michael@0 | 872 | movd mm1, DWORD PTR [r0_ptr] |
michael@0 | 873 | |
michael@0 | 874 | movd mm2, DWORD PTR [src_ptr+src_stride] |
michael@0 | 875 | movd mm3, DWORD PTR [r0_ptr+ref_stride] |
michael@0 | 876 | |
michael@0 | 877 | punpcklbw mm0, mm2 |
michael@0 | 878 | punpcklbw mm1, mm3 |
michael@0 | 879 | |
michael@0 | 880 | movd mm4, DWORD PTR [r1_ptr] |
michael@0 | 881 | movd mm5, DWORD PTR [r2_ptr] |
michael@0 | 882 | |
michael@0 | 883 | movd mm6, DWORD PTR [r3_ptr] |
michael@0 | 884 | movd mm2, DWORD PTR [r1_ptr+ref_stride] |
michael@0 | 885 | |
michael@0 | 886 | movd mm3, DWORD PTR [r2_ptr+ref_stride] |
michael@0 | 887 | movd mm7, DWORD PTR [r3_ptr+ref_stride] |
michael@0 | 888 | |
michael@0 | 889 | psadbw mm1, mm0 |
michael@0 | 890 | |
michael@0 | 891 | punpcklbw mm4, mm2 |
michael@0 | 892 | punpcklbw mm5, mm3 |
michael@0 | 893 | |
michael@0 | 894 | punpcklbw mm6, mm7 |
michael@0 | 895 | psadbw mm4, mm0 |
michael@0 | 896 | |
michael@0 | 897 | psadbw mm5, mm0 |
michael@0 | 898 | psadbw mm6, mm0 |
michael@0 | 899 | |
michael@0 | 900 | |
michael@0 | 901 | |
michael@0 | 902 | lea src_ptr, [src_ptr+src_stride*2] |
michael@0 | 903 | lea r0_ptr, [r0_ptr+ref_stride*2] |
michael@0 | 904 | |
michael@0 | 905 | lea r1_ptr, [r1_ptr+ref_stride*2] |
michael@0 | 906 | lea r2_ptr, [r2_ptr+ref_stride*2] |
michael@0 | 907 | |
michael@0 | 908 | lea r3_ptr, [r3_ptr+ref_stride*2] |
michael@0 | 909 | |
michael@0 | 910 | movd mm0, DWORD PTR [src_ptr] |
michael@0 | 911 | movd mm2, DWORD PTR [r0_ptr] |
michael@0 | 912 | |
michael@0 | 913 | movd mm3, DWORD PTR [src_ptr+src_stride] |
michael@0 | 914 | movd mm7, DWORD PTR [r0_ptr+ref_stride] |
michael@0 | 915 | |
michael@0 | 916 | punpcklbw mm0, mm3 |
michael@0 | 917 | punpcklbw mm2, mm7 |
michael@0 | 918 | |
michael@0 | 919 | movd mm3, DWORD PTR [r1_ptr] |
michael@0 | 920 | movd mm7, DWORD PTR [r2_ptr] |
michael@0 | 921 | |
michael@0 | 922 | psadbw mm2, mm0 |
michael@0 | 923 | %if ABI_IS_32BIT |
michael@0 | 924 | mov rax, rbp |
michael@0 | 925 | |
michael@0 | 926 | pop rbp |
michael@0 | 927 | %define ref_stride rax |
michael@0 | 928 | %endif |
michael@0 | 929 | mov rsi, result_ptr |
michael@0 | 930 | |
michael@0 | 931 | paddw mm1, mm2 |
michael@0 | 932 | movd [rsi], mm1 |
michael@0 | 933 | |
michael@0 | 934 | movd mm2, DWORD PTR [r1_ptr+ref_stride] |
michael@0 | 935 | movd mm1, DWORD PTR [r2_ptr+ref_stride] |
michael@0 | 936 | |
michael@0 | 937 | punpcklbw mm3, mm2 |
michael@0 | 938 | punpcklbw mm7, mm1 |
michael@0 | 939 | |
michael@0 | 940 | psadbw mm3, mm0 |
michael@0 | 941 | psadbw mm7, mm0 |
michael@0 | 942 | |
michael@0 | 943 | movd mm2, DWORD PTR [r3_ptr] |
michael@0 | 944 | movd mm1, DWORD PTR [r3_ptr+ref_stride] |
michael@0 | 945 | |
michael@0 | 946 | paddw mm3, mm4 |
michael@0 | 947 | paddw mm7, mm5 |
michael@0 | 948 | |
michael@0 | 949 | movd [rsi+4], mm3 |
michael@0 | 950 | punpcklbw mm2, mm1 |
michael@0 | 951 | |
michael@0 | 952 | movd [rsi+8], mm7 |
michael@0 | 953 | psadbw mm2, mm0 |
michael@0 | 954 | |
michael@0 | 955 | paddw mm2, mm6 |
michael@0 | 956 | movd [rsi+12], mm2 |
michael@0 | 957 | |
michael@0 | 958 | |
michael@0 | 959 | STACK_FRAME_DESTROY_X4 |
michael@0 | 960 |