media/libvpx/vp8/common/x86/vp8_asm_stubs.c

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 /*
michael@0 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
michael@0 3 *
michael@0 4 * Use of this source code is governed by a BSD-style license
michael@0 5 * that can be found in the LICENSE file in the root of the source
michael@0 6 * tree. An additional intellectual property rights grant can be found
michael@0 7 * in the file PATENTS. All contributing project authors may
michael@0 8 * be found in the AUTHORS file in the root of the source tree.
michael@0 9 */
michael@0 10
michael@0 11
michael@0 12 #include "vpx_config.h"
michael@0 13 #include "vp8_rtcd.h"
michael@0 14 #include "vpx_ports/mem.h"
michael@0 15 #include "filter_x86.h"
michael@0 16
michael@0 17 extern const short vp8_six_tap_mmx[8][6*8];
michael@0 18
michael@0 19 extern void vp8_filter_block1d_h6_mmx
michael@0 20 (
michael@0 21 unsigned char *src_ptr,
michael@0 22 unsigned short *output_ptr,
michael@0 23 unsigned int src_pixels_per_line,
michael@0 24 unsigned int pixel_step,
michael@0 25 unsigned int output_height,
michael@0 26 unsigned int output_width,
michael@0 27 const short *vp8_filter
michael@0 28 );
michael@0 29 extern void vp8_filter_block1dc_v6_mmx
michael@0 30 (
michael@0 31 unsigned short *src_ptr,
michael@0 32 unsigned char *output_ptr,
michael@0 33 int output_pitch,
michael@0 34 unsigned int pixels_per_line,
michael@0 35 unsigned int pixel_step,
michael@0 36 unsigned int output_height,
michael@0 37 unsigned int output_width,
michael@0 38 const short *vp8_filter
michael@0 39 );
michael@0 40 extern void vp8_filter_block1d8_h6_sse2
michael@0 41 (
michael@0 42 unsigned char *src_ptr,
michael@0 43 unsigned short *output_ptr,
michael@0 44 unsigned int src_pixels_per_line,
michael@0 45 unsigned int pixel_step,
michael@0 46 unsigned int output_height,
michael@0 47 unsigned int output_width,
michael@0 48 const short *vp8_filter
michael@0 49 );
michael@0 50 extern void vp8_filter_block1d16_h6_sse2
michael@0 51 (
michael@0 52 unsigned char *src_ptr,
michael@0 53 unsigned short *output_ptr,
michael@0 54 unsigned int src_pixels_per_line,
michael@0 55 unsigned int pixel_step,
michael@0 56 unsigned int output_height,
michael@0 57 unsigned int output_width,
michael@0 58 const short *vp8_filter
michael@0 59 );
michael@0 60 extern void vp8_filter_block1d8_v6_sse2
michael@0 61 (
michael@0 62 unsigned short *src_ptr,
michael@0 63 unsigned char *output_ptr,
michael@0 64 int dst_ptich,
michael@0 65 unsigned int pixels_per_line,
michael@0 66 unsigned int pixel_step,
michael@0 67 unsigned int output_height,
michael@0 68 unsigned int output_width,
michael@0 69 const short *vp8_filter
michael@0 70 );
michael@0 71 extern void vp8_filter_block1d16_v6_sse2
michael@0 72 (
michael@0 73 unsigned short *src_ptr,
michael@0 74 unsigned char *output_ptr,
michael@0 75 int dst_ptich,
michael@0 76 unsigned int pixels_per_line,
michael@0 77 unsigned int pixel_step,
michael@0 78 unsigned int output_height,
michael@0 79 unsigned int output_width,
michael@0 80 const short *vp8_filter
michael@0 81 );
michael@0 82 extern void vp8_unpack_block1d16_h6_sse2
michael@0 83 (
michael@0 84 unsigned char *src_ptr,
michael@0 85 unsigned short *output_ptr,
michael@0 86 unsigned int src_pixels_per_line,
michael@0 87 unsigned int output_height,
michael@0 88 unsigned int output_width
michael@0 89 );
michael@0 90 extern void vp8_filter_block1d8_h6_only_sse2
michael@0 91 (
michael@0 92 unsigned char *src_ptr,
michael@0 93 unsigned int src_pixels_per_line,
michael@0 94 unsigned char *output_ptr,
michael@0 95 int dst_ptich,
michael@0 96 unsigned int output_height,
michael@0 97 const short *vp8_filter
michael@0 98 );
michael@0 99 extern void vp8_filter_block1d16_h6_only_sse2
michael@0 100 (
michael@0 101 unsigned char *src_ptr,
michael@0 102 unsigned int src_pixels_per_line,
michael@0 103 unsigned char *output_ptr,
michael@0 104 int dst_ptich,
michael@0 105 unsigned int output_height,
michael@0 106 const short *vp8_filter
michael@0 107 );
michael@0 108 extern void vp8_filter_block1d8_v6_only_sse2
michael@0 109 (
michael@0 110 unsigned char *src_ptr,
michael@0 111 unsigned int src_pixels_per_line,
michael@0 112 unsigned char *output_ptr,
michael@0 113 int dst_ptich,
michael@0 114 unsigned int output_height,
michael@0 115 const short *vp8_filter
michael@0 116 );
michael@0 117
michael@0 118
michael@0 119 #if HAVE_MMX
michael@0 120 void vp8_sixtap_predict4x4_mmx
michael@0 121 (
michael@0 122 unsigned char *src_ptr,
michael@0 123 int src_pixels_per_line,
michael@0 124 int xoffset,
michael@0 125 int yoffset,
michael@0 126 unsigned char *dst_ptr,
michael@0 127 int dst_pitch
michael@0 128 )
michael@0 129 {
michael@0 130 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 16*16); /* Temp data bufffer used in filtering */
michael@0 131 const short *HFilter, *VFilter;
michael@0 132 HFilter = vp8_six_tap_mmx[xoffset];
michael@0 133 vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 8, HFilter);
michael@0 134 VFilter = vp8_six_tap_mmx[yoffset];
michael@0 135 vp8_filter_block1dc_v6_mmx(FData2 + 8, dst_ptr, dst_pitch, 8, 4 , 4, 4, VFilter);
michael@0 136
michael@0 137 }
michael@0 138
michael@0 139
michael@0 140 void vp8_sixtap_predict16x16_mmx
michael@0 141 (
michael@0 142 unsigned char *src_ptr,
michael@0 143 int src_pixels_per_line,
michael@0 144 int xoffset,
michael@0 145 int yoffset,
michael@0 146 unsigned char *dst_ptr,
michael@0 147 int dst_pitch
michael@0 148 )
michael@0 149 {
michael@0 150
michael@0 151 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */
michael@0 152
michael@0 153 const short *HFilter, *VFilter;
michael@0 154
michael@0 155
michael@0 156 HFilter = vp8_six_tap_mmx[xoffset];
michael@0 157
michael@0 158 vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter);
michael@0 159 vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 21, 32, HFilter);
michael@0 160 vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 8, FData2 + 8, src_pixels_per_line, 1, 21, 32, HFilter);
michael@0 161 vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 12, FData2 + 12, src_pixels_per_line, 1, 21, 32, HFilter);
michael@0 162
michael@0 163 VFilter = vp8_six_tap_mmx[yoffset];
michael@0 164 vp8_filter_block1dc_v6_mmx(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, 16, VFilter);
michael@0 165 vp8_filter_block1dc_v6_mmx(FData2 + 36, dst_ptr + 4, dst_pitch, 32, 16 , 16, 16, VFilter);
michael@0 166 vp8_filter_block1dc_v6_mmx(FData2 + 40, dst_ptr + 8, dst_pitch, 32, 16 , 16, 16, VFilter);
michael@0 167 vp8_filter_block1dc_v6_mmx(FData2 + 44, dst_ptr + 12, dst_pitch, 32, 16 , 16, 16, VFilter);
michael@0 168
michael@0 169 }
michael@0 170
michael@0 171
michael@0 172 void vp8_sixtap_predict8x8_mmx
michael@0 173 (
michael@0 174 unsigned char *src_ptr,
michael@0 175 int src_pixels_per_line,
michael@0 176 int xoffset,
michael@0 177 int yoffset,
michael@0 178 unsigned char *dst_ptr,
michael@0 179 int dst_pitch
michael@0 180 )
michael@0 181 {
michael@0 182
michael@0 183 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
michael@0 184
michael@0 185 const short *HFilter, *VFilter;
michael@0 186
michael@0 187 HFilter = vp8_six_tap_mmx[xoffset];
michael@0 188 vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter);
michael@0 189 vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 13, 16, HFilter);
michael@0 190
michael@0 191 VFilter = vp8_six_tap_mmx[yoffset];
michael@0 192 vp8_filter_block1dc_v6_mmx(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, 8, VFilter);
michael@0 193 vp8_filter_block1dc_v6_mmx(FData2 + 20, dst_ptr + 4, dst_pitch, 16, 8 , 8, 8, VFilter);
michael@0 194
michael@0 195 }
michael@0 196
michael@0 197
michael@0 198 void vp8_sixtap_predict8x4_mmx
michael@0 199 (
michael@0 200 unsigned char *src_ptr,
michael@0 201 int src_pixels_per_line,
michael@0 202 int xoffset,
michael@0 203 int yoffset,
michael@0 204 unsigned char *dst_ptr,
michael@0 205 int dst_pitch
michael@0 206 )
michael@0 207 {
michael@0 208
michael@0 209 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
michael@0 210
michael@0 211 const short *HFilter, *VFilter;
michael@0 212
michael@0 213 HFilter = vp8_six_tap_mmx[xoffset];
michael@0 214 vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter);
michael@0 215 vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 9, 16, HFilter);
michael@0 216
michael@0 217 VFilter = vp8_six_tap_mmx[yoffset];
michael@0 218 vp8_filter_block1dc_v6_mmx(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, 8, VFilter);
michael@0 219 vp8_filter_block1dc_v6_mmx(FData2 + 20, dst_ptr + 4, dst_pitch, 16, 8 , 4, 8, VFilter);
michael@0 220
michael@0 221 }
michael@0 222
michael@0 223
michael@0 224
michael@0 225 void vp8_bilinear_predict16x16_mmx
michael@0 226 (
michael@0 227 unsigned char *src_ptr,
michael@0 228 int src_pixels_per_line,
michael@0 229 int xoffset,
michael@0 230 int yoffset,
michael@0 231 unsigned char *dst_ptr,
michael@0 232 int dst_pitch
michael@0 233 )
michael@0 234 {
michael@0 235 vp8_bilinear_predict8x8_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pitch);
michael@0 236 vp8_bilinear_predict8x8_mmx(src_ptr + 8, src_pixels_per_line, xoffset, yoffset, dst_ptr + 8, dst_pitch);
michael@0 237 vp8_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line, src_pixels_per_line, xoffset, yoffset, dst_ptr + dst_pitch * 8, dst_pitch);
michael@0 238 vp8_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line + 8, src_pixels_per_line, xoffset, yoffset, dst_ptr + dst_pitch * 8 + 8, dst_pitch);
michael@0 239 }
michael@0 240 #endif
michael@0 241
michael@0 242
michael@0 243 #if HAVE_SSE2
michael@0 244 void vp8_sixtap_predict16x16_sse2
michael@0 245 (
michael@0 246 unsigned char *src_ptr,
michael@0 247 int src_pixels_per_line,
michael@0 248 int xoffset,
michael@0 249 int yoffset,
michael@0 250 unsigned char *dst_ptr,
michael@0 251 int dst_pitch
michael@0 252
michael@0 253 )
michael@0 254 {
michael@0 255 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */
michael@0 256
michael@0 257 const short *HFilter, *VFilter;
michael@0 258
michael@0 259 if (xoffset)
michael@0 260 {
michael@0 261 if (yoffset)
michael@0 262 {
michael@0 263 HFilter = vp8_six_tap_mmx[xoffset];
michael@0 264 vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter);
michael@0 265 VFilter = vp8_six_tap_mmx[yoffset];
michael@0 266 vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
michael@0 267 }
michael@0 268 else
michael@0 269 {
michael@0 270 /* First-pass only */
michael@0 271 HFilter = vp8_six_tap_mmx[xoffset];
michael@0 272 vp8_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, HFilter);
michael@0 273 }
michael@0 274 }
michael@0 275 else
michael@0 276 {
michael@0 277 /* Second-pass only */
michael@0 278 VFilter = vp8_six_tap_mmx[yoffset];
michael@0 279 vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 21, 32);
michael@0 280 vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
michael@0 281 }
michael@0 282 }
michael@0 283
michael@0 284
michael@0 285 void vp8_sixtap_predict8x8_sse2
michael@0 286 (
michael@0 287 unsigned char *src_ptr,
michael@0 288 int src_pixels_per_line,
michael@0 289 int xoffset,
michael@0 290 int yoffset,
michael@0 291 unsigned char *dst_ptr,
michael@0 292 int dst_pitch
michael@0 293 )
michael@0 294 {
michael@0 295 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
michael@0 296 const short *HFilter, *VFilter;
michael@0 297
michael@0 298 if (xoffset)
michael@0 299 {
michael@0 300 if (yoffset)
michael@0 301 {
michael@0 302 HFilter = vp8_six_tap_mmx[xoffset];
michael@0 303 vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter);
michael@0 304 VFilter = vp8_six_tap_mmx[yoffset];
michael@0 305 vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, dst_pitch, VFilter);
michael@0 306 }
michael@0 307 else
michael@0 308 {
michael@0 309 /* First-pass only */
michael@0 310 HFilter = vp8_six_tap_mmx[xoffset];
michael@0 311 vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, HFilter);
michael@0 312 }
michael@0 313 }
michael@0 314 else
michael@0 315 {
michael@0 316 /* Second-pass only */
michael@0 317 VFilter = vp8_six_tap_mmx[yoffset];
michael@0 318 vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, VFilter);
michael@0 319 }
michael@0 320 }
michael@0 321
michael@0 322
michael@0 323 void vp8_sixtap_predict8x4_sse2
michael@0 324 (
michael@0 325 unsigned char *src_ptr,
michael@0 326 int src_pixels_per_line,
michael@0 327 int xoffset,
michael@0 328 int yoffset,
michael@0 329 unsigned char *dst_ptr,
michael@0 330 int dst_pitch
michael@0 331 )
michael@0 332 {
michael@0 333 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
michael@0 334 const short *HFilter, *VFilter;
michael@0 335
michael@0 336 if (xoffset)
michael@0 337 {
michael@0 338 if (yoffset)
michael@0 339 {
michael@0 340 HFilter = vp8_six_tap_mmx[xoffset];
michael@0 341 vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter);
michael@0 342 VFilter = vp8_six_tap_mmx[yoffset];
michael@0 343 vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, dst_pitch, VFilter);
michael@0 344 }
michael@0 345 else
michael@0 346 {
michael@0 347 /* First-pass only */
michael@0 348 HFilter = vp8_six_tap_mmx[xoffset];
michael@0 349 vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, HFilter);
michael@0 350 }
michael@0 351 }
michael@0 352 else
michael@0 353 {
michael@0 354 /* Second-pass only */
michael@0 355 VFilter = vp8_six_tap_mmx[yoffset];
michael@0 356 vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, VFilter);
michael@0 357 }
michael@0 358 }
michael@0 359
michael@0 360 #endif
michael@0 361
michael@0 362 #if HAVE_SSSE3
michael@0 363
michael@0 364 extern void vp8_filter_block1d8_h6_ssse3
michael@0 365 (
michael@0 366 unsigned char *src_ptr,
michael@0 367 unsigned int src_pixels_per_line,
michael@0 368 unsigned char *output_ptr,
michael@0 369 unsigned int output_pitch,
michael@0 370 unsigned int output_height,
michael@0 371 unsigned int vp8_filter_index
michael@0 372 );
michael@0 373
michael@0 374 extern void vp8_filter_block1d16_h6_ssse3
michael@0 375 (
michael@0 376 unsigned char *src_ptr,
michael@0 377 unsigned int src_pixels_per_line,
michael@0 378 unsigned char *output_ptr,
michael@0 379 unsigned int output_pitch,
michael@0 380 unsigned int output_height,
michael@0 381 unsigned int vp8_filter_index
michael@0 382 );
michael@0 383
michael@0 384 extern void vp8_filter_block1d16_v6_ssse3
michael@0 385 (
michael@0 386 unsigned char *src_ptr,
michael@0 387 unsigned int src_pitch,
michael@0 388 unsigned char *output_ptr,
michael@0 389 unsigned int out_pitch,
michael@0 390 unsigned int output_height,
michael@0 391 unsigned int vp8_filter_index
michael@0 392 );
michael@0 393
michael@0 394 extern void vp8_filter_block1d8_v6_ssse3
michael@0 395 (
michael@0 396 unsigned char *src_ptr,
michael@0 397 unsigned int src_pitch,
michael@0 398 unsigned char *output_ptr,
michael@0 399 unsigned int out_pitch,
michael@0 400 unsigned int output_height,
michael@0 401 unsigned int vp8_filter_index
michael@0 402 );
michael@0 403
michael@0 404 extern void vp8_filter_block1d4_h6_ssse3
michael@0 405 (
michael@0 406 unsigned char *src_ptr,
michael@0 407 unsigned int src_pixels_per_line,
michael@0 408 unsigned char *output_ptr,
michael@0 409 unsigned int output_pitch,
michael@0 410 unsigned int output_height,
michael@0 411 unsigned int vp8_filter_index
michael@0 412 );
michael@0 413
michael@0 414 extern void vp8_filter_block1d4_v6_ssse3
michael@0 415 (
michael@0 416 unsigned char *src_ptr,
michael@0 417 unsigned int src_pitch,
michael@0 418 unsigned char *output_ptr,
michael@0 419 unsigned int out_pitch,
michael@0 420 unsigned int output_height,
michael@0 421 unsigned int vp8_filter_index
michael@0 422 );
michael@0 423
michael@0 424 void vp8_sixtap_predict16x16_ssse3
michael@0 425 (
michael@0 426 unsigned char *src_ptr,
michael@0 427 int src_pixels_per_line,
michael@0 428 int xoffset,
michael@0 429 int yoffset,
michael@0 430 unsigned char *dst_ptr,
michael@0 431 int dst_pitch
michael@0 432
michael@0 433 )
michael@0 434 {
michael@0 435 DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 24*24);
michael@0 436
michael@0 437 if (xoffset)
michael@0 438 {
michael@0 439 if (yoffset)
michael@0 440 {
michael@0 441 vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
michael@0 442 src_pixels_per_line, FData2,
michael@0 443 16, 21, xoffset);
michael@0 444 vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch,
michael@0 445 16, yoffset);
michael@0 446 }
michael@0 447 else
michael@0 448 {
michael@0 449 /* First-pass only */
michael@0 450 vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line,
michael@0 451 dst_ptr, dst_pitch, 16, xoffset);
michael@0 452 }
michael@0 453 }
michael@0 454 else
michael@0 455 {
michael@0 456 if (yoffset)
michael@0 457 {
michael@0 458 /* Second-pass only */
michael@0 459 vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
michael@0 460 src_pixels_per_line,
michael@0 461 dst_ptr, dst_pitch, 16, yoffset);
michael@0 462 }
michael@0 463 else
michael@0 464 {
michael@0 465 /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
michael@0 466 * yoffset==0) case correctly. Add copy function here to guarantee
michael@0 467 * six-tap function handles all possible offsets. */
michael@0 468 vp8_copy_mem16x16(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
michael@0 469 }
michael@0 470 }
michael@0 471 }
michael@0 472
michael@0 473 void vp8_sixtap_predict8x8_ssse3
michael@0 474 (
michael@0 475 unsigned char *src_ptr,
michael@0 476 int src_pixels_per_line,
michael@0 477 int xoffset,
michael@0 478 int yoffset,
michael@0 479 unsigned char *dst_ptr,
michael@0 480 int dst_pitch
michael@0 481 )
michael@0 482 {
michael@0 483 DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
michael@0 484
michael@0 485 if (xoffset)
michael@0 486 {
michael@0 487 if (yoffset)
michael@0 488 {
michael@0 489 vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
michael@0 490 src_pixels_per_line, FData2,
michael@0 491 8, 13, xoffset);
michael@0 492 vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch,
michael@0 493 8, yoffset);
michael@0 494 }
michael@0 495 else
michael@0 496 {
michael@0 497 vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
michael@0 498 dst_ptr, dst_pitch, 8, xoffset);
michael@0 499 }
michael@0 500 }
michael@0 501 else
michael@0 502 {
michael@0 503 if (yoffset)
michael@0 504 {
michael@0 505 /* Second-pass only */
michael@0 506 vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
michael@0 507 src_pixels_per_line,
michael@0 508 dst_ptr, dst_pitch, 8, yoffset);
michael@0 509 }
michael@0 510 else
michael@0 511 {
michael@0 512 /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
michael@0 513 * yoffset==0) case correctly. Add copy function here to guarantee
michael@0 514 * six-tap function handles all possible offsets. */
michael@0 515 vp8_copy_mem8x8(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
michael@0 516 }
michael@0 517 }
michael@0 518 }
michael@0 519
michael@0 520
michael@0 521 void vp8_sixtap_predict8x4_ssse3
michael@0 522 (
michael@0 523 unsigned char *src_ptr,
michael@0 524 int src_pixels_per_line,
michael@0 525 int xoffset,
michael@0 526 int yoffset,
michael@0 527 unsigned char *dst_ptr,
michael@0 528 int dst_pitch
michael@0 529 )
michael@0 530 {
michael@0 531 DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
michael@0 532
michael@0 533 if (xoffset)
michael@0 534 {
michael@0 535 if (yoffset)
michael@0 536 {
michael@0 537 vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
michael@0 538 src_pixels_per_line, FData2,
michael@0 539 8, 9, xoffset);
michael@0 540 vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch,
michael@0 541 4, yoffset);
michael@0 542 }
michael@0 543 else
michael@0 544 {
michael@0 545 /* First-pass only */
michael@0 546 vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
michael@0 547 dst_ptr, dst_pitch, 4, xoffset);
michael@0 548 }
michael@0 549 }
michael@0 550 else
michael@0 551 {
michael@0 552 if (yoffset)
michael@0 553 {
michael@0 554 /* Second-pass only */
michael@0 555 vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
michael@0 556 src_pixels_per_line,
michael@0 557 dst_ptr, dst_pitch, 4, yoffset);
michael@0 558 }
michael@0 559 else
michael@0 560 {
michael@0 561 /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
michael@0 562 * yoffset==0) case correctly. Add copy function here to guarantee
michael@0 563 * six-tap function handles all possible offsets. */
michael@0 564 vp8_copy_mem8x4(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
michael@0 565 }
michael@0 566 }
michael@0 567 }
michael@0 568
michael@0 569 void vp8_sixtap_predict4x4_ssse3
michael@0 570 (
michael@0 571 unsigned char *src_ptr,
michael@0 572 int src_pixels_per_line,
michael@0 573 int xoffset,
michael@0 574 int yoffset,
michael@0 575 unsigned char *dst_ptr,
michael@0 576 int dst_pitch
michael@0 577 )
michael@0 578 {
michael@0 579 DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 4*9);
michael@0 580
michael@0 581 if (xoffset)
michael@0 582 {
michael@0 583 if (yoffset)
michael@0 584 {
michael@0 585 vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
michael@0 586 src_pixels_per_line,
michael@0 587 FData2, 4, 9, xoffset);
michael@0 588 vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch,
michael@0 589 4, yoffset);
michael@0 590 }
michael@0 591 else
michael@0 592 {
michael@0 593 vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line,
michael@0 594 dst_ptr, dst_pitch, 4, xoffset);
michael@0 595 }
michael@0 596 }
michael@0 597 else
michael@0 598 {
michael@0 599 if (yoffset)
michael@0 600 {
michael@0 601 vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
michael@0 602 src_pixels_per_line,
michael@0 603 dst_ptr, dst_pitch, 4, yoffset);
michael@0 604 }
michael@0 605 else
michael@0 606 {
michael@0 607 /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
michael@0 608 * yoffset==0) case correctly. Add copy function here to guarantee
michael@0 609 * six-tap function handles all possible offsets. */
michael@0 610 int r;
michael@0 611
michael@0 612 for (r = 0; r < 4; r++)
michael@0 613 {
michael@0 614 dst_ptr[0] = src_ptr[0];
michael@0 615 dst_ptr[1] = src_ptr[1];
michael@0 616 dst_ptr[2] = src_ptr[2];
michael@0 617 dst_ptr[3] = src_ptr[3];
michael@0 618 dst_ptr += dst_pitch;
michael@0 619 src_ptr += src_pixels_per_line;
michael@0 620 }
michael@0 621 }
michael@0 622 }
michael@0 623 }
michael@0 624
michael@0 625 #endif

mercurial