media/libvpx/vp9/common/x86/vp9_asm_stubs.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
michael@0 3 *
michael@0 4 * Use of this source code is governed by a BSD-style license
michael@0 5 * that can be found in the LICENSE file in the root of the source
michael@0 6 * tree. An additional intellectual property rights grant can be found
michael@0 7 * in the file PATENTS. All contributing project authors may
michael@0 8 * be found in the AUTHORS file in the root of the source tree.
michael@0 9 */
michael@0 10
michael@0 11 #include <assert.h>
michael@0 12
michael@0 13 #include "./vpx_config.h"
michael@0 14 #include "./vp9_rtcd.h"
michael@0 15 #include "vpx_ports/mem.h"
michael@0 16 ///////////////////////////////////////////////////////////////////////////
michael@0 17 // the mmx function that does the bilinear filtering and var calculation //
michael@0 18 // int one pass //
michael@0 19 ///////////////////////////////////////////////////////////////////////////
michael@0 20 DECLARE_ALIGNED(16, const short, vp9_bilinear_filters_mmx[16][8]) = {
michael@0 21 { 128, 128, 128, 128, 0, 0, 0, 0 },
michael@0 22 { 120, 120, 120, 120, 8, 8, 8, 8 },
michael@0 23 { 112, 112, 112, 112, 16, 16, 16, 16 },
michael@0 24 { 104, 104, 104, 104, 24, 24, 24, 24 },
michael@0 25 { 96, 96, 96, 96, 32, 32, 32, 32 },
michael@0 26 { 88, 88, 88, 88, 40, 40, 40, 40 },
michael@0 27 { 80, 80, 80, 80, 48, 48, 48, 48 },
michael@0 28 { 72, 72, 72, 72, 56, 56, 56, 56 },
michael@0 29 { 64, 64, 64, 64, 64, 64, 64, 64 },
michael@0 30 { 56, 56, 56, 56, 72, 72, 72, 72 },
michael@0 31 { 48, 48, 48, 48, 80, 80, 80, 80 },
michael@0 32 { 40, 40, 40, 40, 88, 88, 88, 88 },
michael@0 33 { 32, 32, 32, 32, 96, 96, 96, 96 },
michael@0 34 { 24, 24, 24, 24, 104, 104, 104, 104 },
michael@0 35 { 16, 16, 16, 16, 112, 112, 112, 112 },
michael@0 36 { 8, 8, 8, 8, 120, 120, 120, 120 }
michael@0 37 };
michael@0 38
michael@0 39 typedef void filter8_1dfunction (
michael@0 40 const unsigned char *src_ptr,
michael@0 41 const unsigned int src_pitch,
michael@0 42 unsigned char *output_ptr,
michael@0 43 unsigned int out_pitch,
michael@0 44 unsigned int output_height,
michael@0 45 const short *filter
michael@0 46 );
michael@0 47
michael@0 48 #if HAVE_SSSE3
michael@0 49 filter8_1dfunction vp9_filter_block1d16_v8_ssse3;
michael@0 50 filter8_1dfunction vp9_filter_block1d16_h8_ssse3;
michael@0 51 filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
michael@0 52 filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
michael@0 53 filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
michael@0 54 filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
michael@0 55 filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3;
michael@0 56 filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3;
michael@0 57 filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3;
michael@0 58 filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3;
michael@0 59 filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3;
michael@0 60 filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3;
michael@0 61
michael@0 62 void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
michael@0 63 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 64 const int16_t *filter_x, int x_step_q4,
michael@0 65 const int16_t *filter_y, int y_step_q4,
michael@0 66 int w, int h) {
michael@0 67 /* Ensure the filter can be compressed to int16_t. */
michael@0 68 if (x_step_q4 == 16 && filter_x[3] != 128) {
michael@0 69 while (w >= 16) {
michael@0 70 vp9_filter_block1d16_h8_ssse3(src, src_stride,
michael@0 71 dst, dst_stride,
michael@0 72 h, filter_x);
michael@0 73 src += 16;
michael@0 74 dst += 16;
michael@0 75 w -= 16;
michael@0 76 }
michael@0 77 while (w >= 8) {
michael@0 78 vp9_filter_block1d8_h8_ssse3(src, src_stride,
michael@0 79 dst, dst_stride,
michael@0 80 h, filter_x);
michael@0 81 src += 8;
michael@0 82 dst += 8;
michael@0 83 w -= 8;
michael@0 84 }
michael@0 85 while (w >= 4) {
michael@0 86 vp9_filter_block1d4_h8_ssse3(src, src_stride,
michael@0 87 dst, dst_stride,
michael@0 88 h, filter_x);
michael@0 89 src += 4;
michael@0 90 dst += 4;
michael@0 91 w -= 4;
michael@0 92 }
michael@0 93 }
michael@0 94 if (w) {
michael@0 95 vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
michael@0 96 filter_x, x_step_q4, filter_y, y_step_q4,
michael@0 97 w, h);
michael@0 98 }
michael@0 99 }
michael@0 100
michael@0 101 void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
michael@0 102 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 103 const int16_t *filter_x, int x_step_q4,
michael@0 104 const int16_t *filter_y, int y_step_q4,
michael@0 105 int w, int h) {
michael@0 106 if (y_step_q4 == 16 && filter_y[3] != 128) {
michael@0 107 while (w >= 16) {
michael@0 108 vp9_filter_block1d16_v8_ssse3(src - src_stride * 3, src_stride,
michael@0 109 dst, dst_stride,
michael@0 110 h, filter_y);
michael@0 111 src += 16;
michael@0 112 dst += 16;
michael@0 113 w -= 16;
michael@0 114 }
michael@0 115 while (w >= 8) {
michael@0 116 vp9_filter_block1d8_v8_ssse3(src - src_stride * 3, src_stride,
michael@0 117 dst, dst_stride,
michael@0 118 h, filter_y);
michael@0 119 src += 8;
michael@0 120 dst += 8;
michael@0 121 w -= 8;
michael@0 122 }
michael@0 123 while (w >= 4) {
michael@0 124 vp9_filter_block1d4_v8_ssse3(src - src_stride * 3, src_stride,
michael@0 125 dst, dst_stride,
michael@0 126 h, filter_y);
michael@0 127 src += 4;
michael@0 128 dst += 4;
michael@0 129 w -= 4;
michael@0 130 }
michael@0 131 }
michael@0 132 if (w) {
michael@0 133 vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
michael@0 134 filter_x, x_step_q4, filter_y, y_step_q4,
michael@0 135 w, h);
michael@0 136 }
michael@0 137 }
michael@0 138
michael@0 139 void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
michael@0 140 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 141 const int16_t *filter_x, int x_step_q4,
michael@0 142 const int16_t *filter_y, int y_step_q4,
michael@0 143 int w, int h) {
michael@0 144 if (x_step_q4 == 16 && filter_x[3] != 128) {
michael@0 145 while (w >= 16) {
michael@0 146 vp9_filter_block1d16_h8_avg_ssse3(src, src_stride,
michael@0 147 dst, dst_stride,
michael@0 148 h, filter_x);
michael@0 149 src += 16;
michael@0 150 dst += 16;
michael@0 151 w -= 16;
michael@0 152 }
michael@0 153 while (w >= 8) {
michael@0 154 vp9_filter_block1d8_h8_avg_ssse3(src, src_stride,
michael@0 155 dst, dst_stride,
michael@0 156 h, filter_x);
michael@0 157 src += 8;
michael@0 158 dst += 8;
michael@0 159 w -= 8;
michael@0 160 }
michael@0 161 while (w >= 4) {
michael@0 162 vp9_filter_block1d4_h8_avg_ssse3(src, src_stride,
michael@0 163 dst, dst_stride,
michael@0 164 h, filter_x);
michael@0 165 src += 4;
michael@0 166 dst += 4;
michael@0 167 w -= 4;
michael@0 168 }
michael@0 169 }
michael@0 170 if (w) {
michael@0 171 vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
michael@0 172 filter_x, x_step_q4, filter_y, y_step_q4,
michael@0 173 w, h);
michael@0 174 }
michael@0 175 }
michael@0 176
michael@0 177 void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
michael@0 178 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 179 const int16_t *filter_x, int x_step_q4,
michael@0 180 const int16_t *filter_y, int y_step_q4,
michael@0 181 int w, int h) {
michael@0 182 if (y_step_q4 == 16 && filter_y[3] != 128) {
michael@0 183 while (w >= 16) {
michael@0 184 vp9_filter_block1d16_v8_avg_ssse3(src - src_stride * 3, src_stride,
michael@0 185 dst, dst_stride,
michael@0 186 h, filter_y);
michael@0 187 src += 16;
michael@0 188 dst += 16;
michael@0 189 w -= 16;
michael@0 190 }
michael@0 191 while (w >= 8) {
michael@0 192 vp9_filter_block1d8_v8_avg_ssse3(src - src_stride * 3, src_stride,
michael@0 193 dst, dst_stride,
michael@0 194 h, filter_y);
michael@0 195 src += 8;
michael@0 196 dst += 8;
michael@0 197 w -= 8;
michael@0 198 }
michael@0 199 while (w >= 4) {
michael@0 200 vp9_filter_block1d4_v8_avg_ssse3(src - src_stride * 3, src_stride,
michael@0 201 dst, dst_stride,
michael@0 202 h, filter_y);
michael@0 203 src += 4;
michael@0 204 dst += 4;
michael@0 205 w -= 4;
michael@0 206 }
michael@0 207 }
michael@0 208 if (w) {
michael@0 209 vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
michael@0 210 filter_x, x_step_q4, filter_y, y_step_q4,
michael@0 211 w, h);
michael@0 212 }
michael@0 213 }
michael@0 214
michael@0 215 void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
michael@0 216 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 217 const int16_t *filter_x, int x_step_q4,
michael@0 218 const int16_t *filter_y, int y_step_q4,
michael@0 219 int w, int h) {
michael@0 220 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71);
michael@0 221
michael@0 222 assert(w <= 64);
michael@0 223 assert(h <= 64);
michael@0 224 if (x_step_q4 == 16 && y_step_q4 == 16) {
michael@0 225 vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64,
michael@0 226 filter_x, x_step_q4, filter_y, y_step_q4,
michael@0 227 w, h + 7);
michael@0 228 vp9_convolve8_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride,
michael@0 229 filter_x, x_step_q4, filter_y, y_step_q4, w, h);
michael@0 230 } else {
michael@0 231 vp9_convolve8_c(src, src_stride, dst, dst_stride,
michael@0 232 filter_x, x_step_q4, filter_y, y_step_q4, w, h);
michael@0 233 }
michael@0 234 }
michael@0 235
michael@0 236 void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
michael@0 237 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 238 const int16_t *filter_x, int x_step_q4,
michael@0 239 const int16_t *filter_y, int y_step_q4,
michael@0 240 int w, int h) {
michael@0 241 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71);
michael@0 242
michael@0 243 assert(w <= 64);
michael@0 244 assert(h <= 64);
michael@0 245 if (x_step_q4 == 16 && y_step_q4 == 16) {
michael@0 246 vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64,
michael@0 247 filter_x, x_step_q4, filter_y, y_step_q4,
michael@0 248 w, h + 7);
michael@0 249 vp9_convolve8_avg_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride,
michael@0 250 filter_x, x_step_q4, filter_y, y_step_q4,
michael@0 251 w, h);
michael@0 252 } else {
michael@0 253 vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
michael@0 254 filter_x, x_step_q4, filter_y, y_step_q4, w, h);
michael@0 255 }
michael@0 256 }
michael@0 257 #endif
michael@0 258
michael@0 259 #if HAVE_SSE2
michael@0 260 filter8_1dfunction vp9_filter_block1d16_v8_sse2;
michael@0 261 filter8_1dfunction vp9_filter_block1d16_h8_sse2;
michael@0 262 filter8_1dfunction vp9_filter_block1d8_v8_sse2;
michael@0 263 filter8_1dfunction vp9_filter_block1d8_h8_sse2;
michael@0 264 filter8_1dfunction vp9_filter_block1d4_v8_sse2;
michael@0 265 filter8_1dfunction vp9_filter_block1d4_h8_sse2;
michael@0 266 filter8_1dfunction vp9_filter_block1d16_v8_avg_sse2;
michael@0 267 filter8_1dfunction vp9_filter_block1d16_h8_avg_sse2;
michael@0 268 filter8_1dfunction vp9_filter_block1d8_v8_avg_sse2;
michael@0 269 filter8_1dfunction vp9_filter_block1d8_h8_avg_sse2;
michael@0 270 filter8_1dfunction vp9_filter_block1d4_v8_avg_sse2;
michael@0 271 filter8_1dfunction vp9_filter_block1d4_h8_avg_sse2;
michael@0 272
michael@0 273 void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
michael@0 274 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 275 const int16_t *filter_x, int x_step_q4,
michael@0 276 const int16_t *filter_y, int y_step_q4,
michael@0 277 int w, int h) {
michael@0 278 /* Ensure the filter can be compressed to int16_t. */
michael@0 279 if (x_step_q4 == 16 && filter_x[3] != 128) {
michael@0 280 while (w >= 16) {
michael@0 281 vp9_filter_block1d16_h8_sse2(src, src_stride,
michael@0 282 dst, dst_stride,
michael@0 283 h, filter_x);
michael@0 284 src += 16;
michael@0 285 dst += 16;
michael@0 286 w -= 16;
michael@0 287 }
michael@0 288 while (w >= 8) {
michael@0 289 vp9_filter_block1d8_h8_sse2(src, src_stride,
michael@0 290 dst, dst_stride,
michael@0 291 h, filter_x);
michael@0 292 src += 8;
michael@0 293 dst += 8;
michael@0 294 w -= 8;
michael@0 295 }
michael@0 296 while (w >= 4) {
michael@0 297 vp9_filter_block1d4_h8_sse2(src, src_stride,
michael@0 298 dst, dst_stride,
michael@0 299 h, filter_x);
michael@0 300 src += 4;
michael@0 301 dst += 4;
michael@0 302 w -= 4;
michael@0 303 }
michael@0 304 }
michael@0 305 if (w) {
michael@0 306 vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
michael@0 307 filter_x, x_step_q4, filter_y, y_step_q4,
michael@0 308 w, h);
michael@0 309 }
michael@0 310 }
michael@0 311
michael@0 312 void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
michael@0 313 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 314 const int16_t *filter_x, int x_step_q4,
michael@0 315 const int16_t *filter_y, int y_step_q4,
michael@0 316 int w, int h) {
michael@0 317 if (y_step_q4 == 16 && filter_y[3] != 128) {
michael@0 318 while (w >= 16) {
michael@0 319 vp9_filter_block1d16_v8_sse2(src - src_stride * 3, src_stride,
michael@0 320 dst, dst_stride,
michael@0 321 h, filter_y);
michael@0 322 src += 16;
michael@0 323 dst += 16;
michael@0 324 w -= 16;
michael@0 325 }
michael@0 326 while (w >= 8) {
michael@0 327 vp9_filter_block1d8_v8_sse2(src - src_stride * 3, src_stride,
michael@0 328 dst, dst_stride,
michael@0 329 h, filter_y);
michael@0 330 src += 8;
michael@0 331 dst += 8;
michael@0 332 w -= 8;
michael@0 333 }
michael@0 334 while (w >= 4) {
michael@0 335 vp9_filter_block1d4_v8_sse2(src - src_stride * 3, src_stride,
michael@0 336 dst, dst_stride,
michael@0 337 h, filter_y);
michael@0 338 src += 4;
michael@0 339 dst += 4;
michael@0 340 w -= 4;
michael@0 341 }
michael@0 342 }
michael@0 343 if (w) {
michael@0 344 vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
michael@0 345 filter_x, x_step_q4, filter_y, y_step_q4,
michael@0 346 w, h);
michael@0 347 }
michael@0 348 }
michael@0 349
michael@0 350 void vp9_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
michael@0 351 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 352 const int16_t *filter_x, int x_step_q4,
michael@0 353 const int16_t *filter_y, int y_step_q4,
michael@0 354 int w, int h) {
michael@0 355 if (x_step_q4 == 16 && filter_x[3] != 128) {
michael@0 356 while (w >= 16) {
michael@0 357 vp9_filter_block1d16_h8_avg_sse2(src, src_stride,
michael@0 358 dst, dst_stride,
michael@0 359 h, filter_x);
michael@0 360 src += 16;
michael@0 361 dst += 16;
michael@0 362 w -= 16;
michael@0 363 }
michael@0 364 while (w >= 8) {
michael@0 365 vp9_filter_block1d8_h8_avg_sse2(src, src_stride,
michael@0 366 dst, dst_stride,
michael@0 367 h, filter_x);
michael@0 368 src += 8;
michael@0 369 dst += 8;
michael@0 370 w -= 8;
michael@0 371 }
michael@0 372 while (w >= 4) {
michael@0 373 vp9_filter_block1d4_h8_avg_sse2(src, src_stride,
michael@0 374 dst, dst_stride,
michael@0 375 h, filter_x);
michael@0 376 src += 4;
michael@0 377 dst += 4;
michael@0 378 w -= 4;
michael@0 379 }
michael@0 380 }
michael@0 381 if (w) {
michael@0 382 vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
michael@0 383 filter_x, x_step_q4, filter_y, y_step_q4,
michael@0 384 w, h);
michael@0 385 }
michael@0 386 }
michael@0 387
michael@0 388 void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
michael@0 389 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 390 const int16_t *filter_x, int x_step_q4,
michael@0 391 const int16_t *filter_y, int y_step_q4,
michael@0 392 int w, int h) {
michael@0 393 if (y_step_q4 == 16 && filter_y[3] != 128) {
michael@0 394 while (w >= 16) {
michael@0 395 vp9_filter_block1d16_v8_avg_sse2(src - src_stride * 3, src_stride,
michael@0 396 dst, dst_stride,
michael@0 397 h, filter_y);
michael@0 398 src += 16;
michael@0 399 dst += 16;
michael@0 400 w -= 16;
michael@0 401 }
michael@0 402 while (w >= 8) {
michael@0 403 vp9_filter_block1d8_v8_avg_sse2(src - src_stride * 3, src_stride,
michael@0 404 dst, dst_stride,
michael@0 405 h, filter_y);
michael@0 406 src += 8;
michael@0 407 dst += 8;
michael@0 408 w -= 8;
michael@0 409 }
michael@0 410 while (w >= 4) {
michael@0 411 vp9_filter_block1d4_v8_avg_sse2(src - src_stride * 3, src_stride,
michael@0 412 dst, dst_stride,
michael@0 413 h, filter_y);
michael@0 414 src += 4;
michael@0 415 dst += 4;
michael@0 416 w -= 4;
michael@0 417 }
michael@0 418 }
michael@0 419 if (w) {
michael@0 420 vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
michael@0 421 filter_x, x_step_q4, filter_y, y_step_q4,
michael@0 422 w, h);
michael@0 423 }
michael@0 424 }
michael@0 425
michael@0 426 void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
michael@0 427 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 428 const int16_t *filter_x, int x_step_q4,
michael@0 429 const int16_t *filter_y, int y_step_q4,
michael@0 430 int w, int h) {
michael@0 431 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71);
michael@0 432
michael@0 433 assert(w <= 64);
michael@0 434 assert(h <= 64);
michael@0 435 if (x_step_q4 == 16 && y_step_q4 == 16) {
michael@0 436 vp9_convolve8_horiz_sse2(src - 3 * src_stride, src_stride, fdata2, 64,
michael@0 437 filter_x, x_step_q4, filter_y, y_step_q4,
michael@0 438 w, h + 7);
michael@0 439 vp9_convolve8_vert_sse2(fdata2 + 3 * 64, 64, dst, dst_stride,
michael@0 440 filter_x, x_step_q4, filter_y, y_step_q4, w, h);
michael@0 441 } else {
michael@0 442 vp9_convolve8_c(src, src_stride, dst, dst_stride,
michael@0 443 filter_x, x_step_q4, filter_y, y_step_q4, w, h);
michael@0 444 }
michael@0 445 }
michael@0 446
michael@0 447 void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
michael@0 448 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 449 const int16_t *filter_x, int x_step_q4,
michael@0 450 const int16_t *filter_y, int y_step_q4,
michael@0 451 int w, int h) {
michael@0 452 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71);
michael@0 453
michael@0 454 assert(w <= 64);
michael@0 455 assert(h <= 64);
michael@0 456 if (x_step_q4 == 16 && y_step_q4 == 16) {
michael@0 457 vp9_convolve8_horiz_sse2(src - 3 * src_stride, src_stride, fdata2, 64,
michael@0 458 filter_x, x_step_q4, filter_y, y_step_q4,
michael@0 459 w, h + 7);
michael@0 460 vp9_convolve8_avg_vert_sse2(fdata2 + 3 * 64, 64, dst, dst_stride,
michael@0 461 filter_x, x_step_q4, filter_y, y_step_q4,
michael@0 462 w, h);
michael@0 463 } else {
michael@0 464 vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
michael@0 465 filter_x, x_step_q4, filter_y, y_step_q4, w, h);
michael@0 466 }
michael@0 467 }
michael@0 468 #endif

mercurial