media/libvpx/vp9/encoder/vp9_variance_c.c

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
michael@0 3 *
michael@0 4 * Use of this source code is governed by a BSD-style license
michael@0 5 * that can be found in the LICENSE file in the root of the source
michael@0 6 * tree. An additional intellectual property rights grant can be found
michael@0 7 * in the file PATENTS. All contributing project authors may
michael@0 8 * be found in the AUTHORS file in the root of the source tree.
michael@0 9 */
michael@0 10
michael@0 11 #include "./vp9_rtcd.h"
michael@0 12
michael@0 13 #include "vpx_ports/mem.h"
michael@0 14 #include "vpx/vpx_integer.h"
michael@0 15
michael@0 16 #include "vp9/common/vp9_common.h"
michael@0 17 #include "vp9/common/vp9_filter.h"
michael@0 18
michael@0 19 #include "vp9/encoder/vp9_variance.h"
michael@0 20
michael@0 21 void variance(const uint8_t *src_ptr,
michael@0 22 int source_stride,
michael@0 23 const uint8_t *ref_ptr,
michael@0 24 int recon_stride,
michael@0 25 int w,
michael@0 26 int h,
michael@0 27 unsigned int *sse,
michael@0 28 int *sum) {
michael@0 29 int i, j;
michael@0 30 int diff;
michael@0 31
michael@0 32 *sum = 0;
michael@0 33 *sse = 0;
michael@0 34
michael@0 35 for (i = 0; i < h; i++) {
michael@0 36 for (j = 0; j < w; j++) {
michael@0 37 diff = src_ptr[j] - ref_ptr[j];
michael@0 38 *sum += diff;
michael@0 39 *sse += diff * diff;
michael@0 40 }
michael@0 41
michael@0 42 src_ptr += source_stride;
michael@0 43 ref_ptr += recon_stride;
michael@0 44 }
michael@0 45 }
michael@0 46
michael@0 47 /****************************************************************************
michael@0 48 *
michael@0 49 * ROUTINE : filter_block2d_bil_first_pass
michael@0 50 *
michael@0 51 * INPUTS : uint8_t *src_ptr : Pointer to source block.
michael@0 52 * uint32_t src_pixels_per_line : Stride of input block.
michael@0 53 * uint32_t pixel_step : Offset between filter input
michael@0 54 * samples (see notes).
michael@0 55 * uint32_t output_height : Input block height.
michael@0 56 * uint32_t output_width : Input block width.
michael@0 57 * int32_t *vp9_filter : Array of 2 bi-linear filter
michael@0 58 * taps.
michael@0 59 *
michael@0 60 * OUTPUTS : int32_t *output_ptr : Pointer to filtered block.
michael@0 61 *
michael@0 62 * RETURNS : void
michael@0 63 *
michael@0 64 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
michael@0 65 * either horizontal or vertical direction to produce the
michael@0 66 * filtered output block. Used to implement first-pass
michael@0 67 * of 2-D separable filter.
michael@0 68 *
michael@0 69 * SPECIAL NOTES : Produces int32_t output to retain precision for next pass.
michael@0 70 * Two filter taps should sum to VP9_FILTER_WEIGHT.
michael@0 71 * pixel_step defines whether the filter is applied
michael@0 72 * horizontally (pixel_step=1) or vertically (pixel_step=
michael@0 73 * stride).
michael@0 74 * It defines the offset required to move from one input
michael@0 75 * to the next.
michael@0 76 *
michael@0 77 ****************************************************************************/
michael@0 78 static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
michael@0 79 uint16_t *output_ptr,
michael@0 80 unsigned int src_pixels_per_line,
michael@0 81 int pixel_step,
michael@0 82 unsigned int output_height,
michael@0 83 unsigned int output_width,
michael@0 84 const int16_t *vp9_filter) {
michael@0 85 unsigned int i, j;
michael@0 86
michael@0 87 for (i = 0; i < output_height; i++) {
michael@0 88 for (j = 0; j < output_width; j++) {
michael@0 89 output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
michael@0 90 (int)src_ptr[pixel_step] * vp9_filter[1],
michael@0 91 FILTER_BITS);
michael@0 92
michael@0 93 src_ptr++;
michael@0 94 }
michael@0 95
michael@0 96 // Next row...
michael@0 97 src_ptr += src_pixels_per_line - output_width;
michael@0 98 output_ptr += output_width;
michael@0 99 }
michael@0 100 }
michael@0 101
michael@0 102 /****************************************************************************
michael@0 103 *
michael@0 104 * ROUTINE : filter_block2d_bil_second_pass
michael@0 105 *
michael@0 106 * INPUTS : int32_t *src_ptr : Pointer to source block.
michael@0 107 * uint32_t src_pixels_per_line : Stride of input block.
michael@0 108 * uint32_t pixel_step : Offset between filter input
michael@0 109 * samples (see notes).
michael@0 110 * uint32_t output_height : Input block height.
michael@0 111 * uint32_t output_width : Input block width.
michael@0 112 * int32_t *vp9_filter : Array of 2 bi-linear filter
michael@0 113 * taps.
michael@0 114 *
michael@0 115 * OUTPUTS : uint16_t *output_ptr : Pointer to filtered block.
michael@0 116 *
michael@0 117 * RETURNS : void
michael@0 118 *
michael@0 119 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
michael@0 120 * either horizontal or vertical direction to produce the
michael@0 121 * filtered output block. Used to implement second-pass
michael@0 122 * of 2-D separable filter.
michael@0 123 *
michael@0 124 * SPECIAL NOTES : Requires 32-bit input as produced by
michael@0 125 * filter_block2d_bil_first_pass.
michael@0 126 * Two filter taps should sum to VP9_FILTER_WEIGHT.
michael@0 127 * pixel_step defines whether the filter is applied
michael@0 128 * horizontally (pixel_step=1) or vertically (pixel_step=
michael@0 129 * stride).
michael@0 130 * It defines the offset required to move from one input
michael@0 131 * to the next.
michael@0 132 *
michael@0 133 ****************************************************************************/
michael@0 134 static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
michael@0 135 uint8_t *output_ptr,
michael@0 136 unsigned int src_pixels_per_line,
michael@0 137 unsigned int pixel_step,
michael@0 138 unsigned int output_height,
michael@0 139 unsigned int output_width,
michael@0 140 const int16_t *vp9_filter) {
michael@0 141 unsigned int i, j;
michael@0 142
michael@0 143 for (i = 0; i < output_height; i++) {
michael@0 144 for (j = 0; j < output_width; j++) {
michael@0 145 output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
michael@0 146 (int)src_ptr[pixel_step] * vp9_filter[1],
michael@0 147 FILTER_BITS);
michael@0 148 src_ptr++;
michael@0 149 }
michael@0 150
michael@0 151 src_ptr += src_pixels_per_line - output_width;
michael@0 152 output_ptr += output_width;
michael@0 153 }
michael@0 154 }
michael@0 155
michael@0 156 unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
michael@0 157 unsigned int i, sum = 0;
michael@0 158
michael@0 159 for (i = 0; i < 256; i++) {
michael@0 160 sum += (src_ptr[i] * src_ptr[i]);
michael@0 161 }
michael@0 162
michael@0 163 return sum;
michael@0 164 }
michael@0 165
michael@0 166 unsigned int vp9_variance64x32_c(const uint8_t *src_ptr,
michael@0 167 int source_stride,
michael@0 168 const uint8_t *ref_ptr,
michael@0 169 int recon_stride,
michael@0 170 unsigned int *sse) {
michael@0 171 unsigned int var;
michael@0 172 int avg;
michael@0 173
michael@0 174 variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg);
michael@0 175 *sse = var;
michael@0 176 return (var - (((int64_t)avg * avg) >> 11));
michael@0 177 }
michael@0 178
michael@0 179 unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr,
michael@0 180 int src_pixels_per_line,
michael@0 181 int xoffset,
michael@0 182 int yoffset,
michael@0 183 const uint8_t *dst_ptr,
michael@0 184 int dst_pixels_per_line,
michael@0 185 unsigned int *sse) {
michael@0 186 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
michael@0 187 uint8_t temp2[68 * 64];
michael@0 188 const int16_t *hfilter, *vfilter;
michael@0 189
michael@0 190 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 191 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 192
michael@0 193 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 194 1, 33, 64, hfilter);
michael@0 195 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
michael@0 196
michael@0 197 return vp9_variance64x32(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
michael@0 198 }
michael@0 199
michael@0 200 unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr,
michael@0 201 int src_pixels_per_line,
michael@0 202 int xoffset,
michael@0 203 int yoffset,
michael@0 204 const uint8_t *dst_ptr,
michael@0 205 int dst_pixels_per_line,
michael@0 206 unsigned int *sse,
michael@0 207 const uint8_t *second_pred) {
michael@0 208 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
michael@0 209 uint8_t temp2[68 * 64];
michael@0 210 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer
michael@0 211 const int16_t *hfilter, *vfilter;
michael@0 212
michael@0 213 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 214 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 215
michael@0 216 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 217 1, 33, 64, hfilter);
michael@0 218 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
michael@0 219 comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64);
michael@0 220 return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
michael@0 221 }
michael@0 222
michael@0 223 unsigned int vp9_variance32x64_c(const uint8_t *src_ptr,
michael@0 224 int source_stride,
michael@0 225 const uint8_t *ref_ptr,
michael@0 226 int recon_stride,
michael@0 227 unsigned int *sse) {
michael@0 228 unsigned int var;
michael@0 229 int avg;
michael@0 230
michael@0 231 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg);
michael@0 232 *sse = var;
michael@0 233 return (var - (((int64_t)avg * avg) >> 11));
michael@0 234 }
michael@0 235
michael@0 236 unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr,
michael@0 237 int src_pixels_per_line,
michael@0 238 int xoffset,
michael@0 239 int yoffset,
michael@0 240 const uint8_t *dst_ptr,
michael@0 241 int dst_pixels_per_line,
michael@0 242 unsigned int *sse) {
michael@0 243 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
michael@0 244 uint8_t temp2[68 * 64];
michael@0 245 const int16_t *hfilter, *vfilter;
michael@0 246
michael@0 247 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 248 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 249
michael@0 250 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 251 1, 65, 32, hfilter);
michael@0 252 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
michael@0 253
michael@0 254 return vp9_variance32x64(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
michael@0 255 }
michael@0 256
michael@0 257 unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr,
michael@0 258 int src_pixels_per_line,
michael@0 259 int xoffset,
michael@0 260 int yoffset,
michael@0 261 const uint8_t *dst_ptr,
michael@0 262 int dst_pixels_per_line,
michael@0 263 unsigned int *sse,
michael@0 264 const uint8_t *second_pred) {
michael@0 265 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
michael@0 266 uint8_t temp2[68 * 64];
michael@0 267 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); // compound pred buffer
michael@0 268 const int16_t *hfilter, *vfilter;
michael@0 269
michael@0 270 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 271 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 272
michael@0 273 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 274 1, 65, 32, hfilter);
michael@0 275 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
michael@0 276 comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32);
michael@0 277 return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
michael@0 278 }
michael@0 279
michael@0 280 unsigned int vp9_variance32x16_c(const uint8_t *src_ptr,
michael@0 281 int source_stride,
michael@0 282 const uint8_t *ref_ptr,
michael@0 283 int recon_stride,
michael@0 284 unsigned int *sse) {
michael@0 285 unsigned int var;
michael@0 286 int avg;
michael@0 287
michael@0 288 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg);
michael@0 289 *sse = var;
michael@0 290 return (var - (((int64_t)avg * avg) >> 9));
michael@0 291 }
michael@0 292
michael@0 293 unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr,
michael@0 294 int src_pixels_per_line,
michael@0 295 int xoffset,
michael@0 296 int yoffset,
michael@0 297 const uint8_t *dst_ptr,
michael@0 298 int dst_pixels_per_line,
michael@0 299 unsigned int *sse) {
michael@0 300 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
michael@0 301 uint8_t temp2[36 * 32];
michael@0 302 const int16_t *hfilter, *vfilter;
michael@0 303
michael@0 304 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 305 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 306
michael@0 307 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 308 1, 17, 32, hfilter);
michael@0 309 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
michael@0 310
michael@0 311 return vp9_variance32x16(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
michael@0 312 }
michael@0 313
michael@0 314 unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr,
michael@0 315 int src_pixels_per_line,
michael@0 316 int xoffset,
michael@0 317 int yoffset,
michael@0 318 const uint8_t *dst_ptr,
michael@0 319 int dst_pixels_per_line,
michael@0 320 unsigned int *sse,
michael@0 321 const uint8_t *second_pred) {
michael@0 322 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
michael@0 323 uint8_t temp2[36 * 32];
michael@0 324 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); // compound pred buffer
michael@0 325 const int16_t *hfilter, *vfilter;
michael@0 326
michael@0 327 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 328 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 329
michael@0 330 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 331 1, 17, 32, hfilter);
michael@0 332 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
michael@0 333 comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32);
michael@0 334 return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
michael@0 335 }
michael@0 336
michael@0 337 unsigned int vp9_variance16x32_c(const uint8_t *src_ptr,
michael@0 338 int source_stride,
michael@0 339 const uint8_t *ref_ptr,
michael@0 340 int recon_stride,
michael@0 341 unsigned int *sse) {
michael@0 342 unsigned int var;
michael@0 343 int avg;
michael@0 344
michael@0 345 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg);
michael@0 346 *sse = var;
michael@0 347 return (var - (((int64_t)avg * avg) >> 9));
michael@0 348 }
michael@0 349
michael@0 350 unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr,
michael@0 351 int src_pixels_per_line,
michael@0 352 int xoffset,
michael@0 353 int yoffset,
michael@0 354 const uint8_t *dst_ptr,
michael@0 355 int dst_pixels_per_line,
michael@0 356 unsigned int *sse) {
michael@0 357 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
michael@0 358 uint8_t temp2[36 * 32];
michael@0 359 const int16_t *hfilter, *vfilter;
michael@0 360
michael@0 361 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 362 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 363
michael@0 364 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 365 1, 33, 16, hfilter);
michael@0 366 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
michael@0 367
michael@0 368 return vp9_variance16x32(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
michael@0 369 }
michael@0 370
michael@0 371 unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr,
michael@0 372 int src_pixels_per_line,
michael@0 373 int xoffset,
michael@0 374 int yoffset,
michael@0 375 const uint8_t *dst_ptr,
michael@0 376 int dst_pixels_per_line,
michael@0 377 unsigned int *sse,
michael@0 378 const uint8_t *second_pred) {
michael@0 379 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
michael@0 380 uint8_t temp2[36 * 32];
michael@0 381 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); // compound pred buffer
michael@0 382 const int16_t *hfilter, *vfilter;
michael@0 383
michael@0 384 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 385 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 386
michael@0 387 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 388 1, 33, 16, hfilter);
michael@0 389 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
michael@0 390 comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16);
michael@0 391 return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
michael@0 392 }
michael@0 393
michael@0 394 unsigned int vp9_variance64x64_c(const uint8_t *src_ptr,
michael@0 395 int source_stride,
michael@0 396 const uint8_t *ref_ptr,
michael@0 397 int recon_stride,
michael@0 398 unsigned int *sse) {
michael@0 399 unsigned int var;
michael@0 400 int avg;
michael@0 401
michael@0 402 variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg);
michael@0 403 *sse = var;
michael@0 404 return (var - (((int64_t)avg * avg) >> 12));
michael@0 405 }
michael@0 406
michael@0 407 unsigned int vp9_variance32x32_c(const uint8_t *src_ptr,
michael@0 408 int source_stride,
michael@0 409 const uint8_t *ref_ptr,
michael@0 410 int recon_stride,
michael@0 411 unsigned int *sse) {
michael@0 412 unsigned int var;
michael@0 413 int avg;
michael@0 414
michael@0 415 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg);
michael@0 416 *sse = var;
michael@0 417 return (var - (((int64_t)avg * avg) >> 10));
michael@0 418 }
michael@0 419
michael@0 420 unsigned int vp9_variance16x16_c(const uint8_t *src_ptr,
michael@0 421 int source_stride,
michael@0 422 const uint8_t *ref_ptr,
michael@0 423 int recon_stride,
michael@0 424 unsigned int *sse) {
michael@0 425 unsigned int var;
michael@0 426 int avg;
michael@0 427
michael@0 428 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
michael@0 429 *sse = var;
michael@0 430 return (var - (((unsigned int)avg * avg) >> 8));
michael@0 431 }
michael@0 432
michael@0 433 unsigned int vp9_variance8x16_c(const uint8_t *src_ptr,
michael@0 434 int source_stride,
michael@0 435 const uint8_t *ref_ptr,
michael@0 436 int recon_stride,
michael@0 437 unsigned int *sse) {
michael@0 438 unsigned int var;
michael@0 439 int avg;
michael@0 440
michael@0 441 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
michael@0 442 *sse = var;
michael@0 443 return (var - (((unsigned int)avg * avg) >> 7));
michael@0 444 }
michael@0 445
michael@0 446 unsigned int vp9_variance16x8_c(const uint8_t *src_ptr,
michael@0 447 int source_stride,
michael@0 448 const uint8_t *ref_ptr,
michael@0 449 int recon_stride,
michael@0 450 unsigned int *sse) {
michael@0 451 unsigned int var;
michael@0 452 int avg;
michael@0 453
michael@0 454 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
michael@0 455 *sse = var;
michael@0 456 return (var - (((unsigned int)avg * avg) >> 7));
michael@0 457 }
michael@0 458
michael@0 459 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride,
michael@0 460 const uint8_t *ref_ptr, int ref_stride,
michael@0 461 unsigned int *sse, int *sum) {
michael@0 462 variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
michael@0 463 }
michael@0 464
michael@0 465 unsigned int vp9_variance8x8_c(const uint8_t *src_ptr,
michael@0 466 int source_stride,
michael@0 467 const uint8_t *ref_ptr,
michael@0 468 int recon_stride,
michael@0 469 unsigned int *sse) {
michael@0 470 unsigned int var;
michael@0 471 int avg;
michael@0 472
michael@0 473 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
michael@0 474 *sse = var;
michael@0 475 return (var - (((unsigned int)avg * avg) >> 6));
michael@0 476 }
michael@0 477
michael@0 478 unsigned int vp9_variance8x4_c(const uint8_t *src_ptr,
michael@0 479 int source_stride,
michael@0 480 const uint8_t *ref_ptr,
michael@0 481 int recon_stride,
michael@0 482 unsigned int *sse) {
michael@0 483 unsigned int var;
michael@0 484 int avg;
michael@0 485
michael@0 486 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg);
michael@0 487 *sse = var;
michael@0 488 return (var - (((unsigned int)avg * avg) >> 5));
michael@0 489 }
michael@0 490
michael@0 491 unsigned int vp9_variance4x8_c(const uint8_t *src_ptr,
michael@0 492 int source_stride,
michael@0 493 const uint8_t *ref_ptr,
michael@0 494 int recon_stride,
michael@0 495 unsigned int *sse) {
michael@0 496 unsigned int var;
michael@0 497 int avg;
michael@0 498
michael@0 499 variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg);
michael@0 500 *sse = var;
michael@0 501 return (var - (((unsigned int)avg * avg) >> 5));
michael@0 502 }
michael@0 503
michael@0 504 unsigned int vp9_variance4x4_c(const uint8_t *src_ptr,
michael@0 505 int source_stride,
michael@0 506 const uint8_t *ref_ptr,
michael@0 507 int recon_stride,
michael@0 508 unsigned int *sse) {
michael@0 509 unsigned int var;
michael@0 510 int avg;
michael@0 511
michael@0 512 variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
michael@0 513 *sse = var;
michael@0 514 return (var - (((unsigned int)avg * avg) >> 4));
michael@0 515 }
michael@0 516
michael@0 517
michael@0 518 unsigned int vp9_mse16x16_c(const uint8_t *src_ptr,
michael@0 519 int source_stride,
michael@0 520 const uint8_t *ref_ptr,
michael@0 521 int recon_stride,
michael@0 522 unsigned int *sse) {
michael@0 523 unsigned int var;
michael@0 524 int avg;
michael@0 525
michael@0 526 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
michael@0 527 *sse = var;
michael@0 528 return var;
michael@0 529 }
michael@0 530
michael@0 531 unsigned int vp9_mse16x8_c(const uint8_t *src_ptr,
michael@0 532 int source_stride,
michael@0 533 const uint8_t *ref_ptr,
michael@0 534 int recon_stride,
michael@0 535 unsigned int *sse) {
michael@0 536 unsigned int var;
michael@0 537 int avg;
michael@0 538
michael@0 539 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
michael@0 540 *sse = var;
michael@0 541 return var;
michael@0 542 }
michael@0 543
michael@0 544 unsigned int vp9_mse8x16_c(const uint8_t *src_ptr,
michael@0 545 int source_stride,
michael@0 546 const uint8_t *ref_ptr,
michael@0 547 int recon_stride,
michael@0 548 unsigned int *sse) {
michael@0 549 unsigned int var;
michael@0 550 int avg;
michael@0 551
michael@0 552 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
michael@0 553 *sse = var;
michael@0 554 return var;
michael@0 555 }
michael@0 556
michael@0 557 unsigned int vp9_mse8x8_c(const uint8_t *src_ptr,
michael@0 558 int source_stride,
michael@0 559 const uint8_t *ref_ptr,
michael@0 560 int recon_stride,
michael@0 561 unsigned int *sse) {
michael@0 562 unsigned int var;
michael@0 563 int avg;
michael@0 564
michael@0 565 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
michael@0 566 *sse = var;
michael@0 567 return var;
michael@0 568 }
michael@0 569
michael@0 570
michael@0 571 unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
michael@0 572 int src_pixels_per_line,
michael@0 573 int xoffset,
michael@0 574 int yoffset,
michael@0 575 const uint8_t *dst_ptr,
michael@0 576 int dst_pixels_per_line,
michael@0 577 unsigned int *sse) {
michael@0 578 uint8_t temp2[20 * 16];
michael@0 579 const int16_t *hfilter, *vfilter;
michael@0 580 uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering
michael@0 581
michael@0 582 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 583 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 584
michael@0 585 // First filter 1d Horizontal
michael@0 586 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 587 1, 5, 4, hfilter);
michael@0 588
michael@0 589 // Now filter Verticaly
michael@0 590 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter);
michael@0 591
michael@0 592 return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
michael@0 593 }
michael@0 594
michael@0 595 unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr,
michael@0 596 int src_pixels_per_line,
michael@0 597 int xoffset,
michael@0 598 int yoffset,
michael@0 599 const uint8_t *dst_ptr,
michael@0 600 int dst_pixels_per_line,
michael@0 601 unsigned int *sse,
michael@0 602 const uint8_t *second_pred) {
michael@0 603 uint8_t temp2[20 * 16];
michael@0 604 const int16_t *hfilter, *vfilter;
michael@0 605 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); // compound pred buffer
michael@0 606 uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering
michael@0 607
michael@0 608 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 609 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 610
michael@0 611 // First filter 1d Horizontal
michael@0 612 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 613 1, 5, 4, hfilter);
michael@0 614
michael@0 615 // Now filter Verticaly
michael@0 616 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter);
michael@0 617 comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
michael@0 618 return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
michael@0 619 }
michael@0 620
michael@0 621 unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
michael@0 622 int src_pixels_per_line,
michael@0 623 int xoffset,
michael@0 624 int yoffset,
michael@0 625 const uint8_t *dst_ptr,
michael@0 626 int dst_pixels_per_line,
michael@0 627 unsigned int *sse) {
michael@0 628 uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering
michael@0 629 uint8_t temp2[20 * 16];
michael@0 630 const int16_t *hfilter, *vfilter;
michael@0 631
michael@0 632 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 633 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 634
michael@0 635 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 636 1, 9, 8, hfilter);
michael@0 637 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
michael@0 638
michael@0 639 return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
michael@0 640 }
michael@0 641
michael@0 642 unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr,
michael@0 643 int src_pixels_per_line,
michael@0 644 int xoffset,
michael@0 645 int yoffset,
michael@0 646 const uint8_t *dst_ptr,
michael@0 647 int dst_pixels_per_line,
michael@0 648 unsigned int *sse,
michael@0 649 const uint8_t *second_pred) {
michael@0 650 uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering
michael@0 651 uint8_t temp2[20 * 16];
michael@0 652 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); // compound pred buffer
michael@0 653 const int16_t *hfilter, *vfilter;
michael@0 654
michael@0 655 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 656 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 657
michael@0 658 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 659 1, 9, 8, hfilter);
michael@0 660 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
michael@0 661 comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
michael@0 662 return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
michael@0 663 }
michael@0 664
michael@0 665 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
michael@0 666 int src_pixels_per_line,
michael@0 667 int xoffset,
michael@0 668 int yoffset,
michael@0 669 const uint8_t *dst_ptr,
michael@0 670 int dst_pixels_per_line,
michael@0 671 unsigned int *sse) {
michael@0 672 uint16_t fdata3[17 * 16]; // Temp data buffer used in filtering
michael@0 673 uint8_t temp2[20 * 16];
michael@0 674 const int16_t *hfilter, *vfilter;
michael@0 675
michael@0 676 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 677 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 678
michael@0 679 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 680 1, 17, 16, hfilter);
michael@0 681 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
michael@0 682
michael@0 683 return vp9_variance16x16(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
michael@0 684 }
michael@0 685
michael@0 686 unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr,
michael@0 687 int src_pixels_per_line,
michael@0 688 int xoffset,
michael@0 689 int yoffset,
michael@0 690 const uint8_t *dst_ptr,
michael@0 691 int dst_pixels_per_line,
michael@0 692 unsigned int *sse,
michael@0 693 const uint8_t *second_pred) {
michael@0 694 uint16_t fdata3[17 * 16];
michael@0 695 uint8_t temp2[20 * 16];
michael@0 696 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); // compound pred buffer
michael@0 697 const int16_t *hfilter, *vfilter;
michael@0 698
michael@0 699 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 700 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 701
michael@0 702 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 703 1, 17, 16, hfilter);
michael@0 704 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
michael@0 705
michael@0 706 comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16);
michael@0 707 return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
michael@0 708 }
michael@0 709
michael@0 710 unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
michael@0 711 int src_pixels_per_line,
michael@0 712 int xoffset,
michael@0 713 int yoffset,
michael@0 714 const uint8_t *dst_ptr,
michael@0 715 int dst_pixels_per_line,
michael@0 716 unsigned int *sse) {
michael@0 717 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
michael@0 718 uint8_t temp2[68 * 64];
michael@0 719 const int16_t *hfilter, *vfilter;
michael@0 720
michael@0 721 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 722 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 723
michael@0 724 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 725 1, 65, 64, hfilter);
michael@0 726 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
michael@0 727
michael@0 728 return vp9_variance64x64(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
michael@0 729 }
michael@0 730
michael@0 731 unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr,
michael@0 732 int src_pixels_per_line,
michael@0 733 int xoffset,
michael@0 734 int yoffset,
michael@0 735 const uint8_t *dst_ptr,
michael@0 736 int dst_pixels_per_line,
michael@0 737 unsigned int *sse,
michael@0 738 const uint8_t *second_pred) {
michael@0 739 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
michael@0 740 uint8_t temp2[68 * 64];
michael@0 741 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer
michael@0 742 const int16_t *hfilter, *vfilter;
michael@0 743
michael@0 744 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 745 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 746
michael@0 747 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 748 1, 65, 64, hfilter);
michael@0 749 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
michael@0 750 comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64);
michael@0 751 return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
michael@0 752 }
michael@0 753
michael@0 754 unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
michael@0 755 int src_pixels_per_line,
michael@0 756 int xoffset,
michael@0 757 int yoffset,
michael@0 758 const uint8_t *dst_ptr,
michael@0 759 int dst_pixels_per_line,
michael@0 760 unsigned int *sse) {
michael@0 761 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
michael@0 762 uint8_t temp2[36 * 32];
michael@0 763 const int16_t *hfilter, *vfilter;
michael@0 764
michael@0 765 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 766 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 767
michael@0 768 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 769 1, 33, 32, hfilter);
michael@0 770 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
michael@0 771
michael@0 772 return vp9_variance32x32(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
michael@0 773 }
michael@0 774
michael@0 775 unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr,
michael@0 776 int src_pixels_per_line,
michael@0 777 int xoffset,
michael@0 778 int yoffset,
michael@0 779 const uint8_t *dst_ptr,
michael@0 780 int dst_pixels_per_line,
michael@0 781 unsigned int *sse,
michael@0 782 const uint8_t *second_pred) {
michael@0 783 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
michael@0 784 uint8_t temp2[36 * 32];
michael@0 785 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); // compound pred buffer
michael@0 786 const int16_t *hfilter, *vfilter;
michael@0 787
michael@0 788 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 789 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 790
michael@0 791 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 792 1, 33, 32, hfilter);
michael@0 793 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
michael@0 794 comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32);
michael@0 795 return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
michael@0 796 }
michael@0 797
michael@0 798 unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr,
michael@0 799 int source_stride,
michael@0 800 const uint8_t *ref_ptr,
michael@0 801 int recon_stride,
michael@0 802 unsigned int *sse) {
michael@0 803 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
michael@0 804 ref_ptr, recon_stride, sse);
michael@0 805 }
michael@0 806
michael@0 807 unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr,
michael@0 808 int source_stride,
michael@0 809 const uint8_t *ref_ptr,
michael@0 810 int recon_stride,
michael@0 811 unsigned int *sse) {
michael@0 812 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0,
michael@0 813 ref_ptr, recon_stride, sse);
michael@0 814 }
michael@0 815
michael@0 816 unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr,
michael@0 817 int source_stride,
michael@0 818 const uint8_t *ref_ptr,
michael@0 819 int recon_stride,
michael@0 820 unsigned int *sse) {
michael@0 821 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0,
michael@0 822 ref_ptr, recon_stride, sse);
michael@0 823 }
michael@0 824
michael@0 825 unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr,
michael@0 826 int source_stride,
michael@0 827 const uint8_t *ref_ptr,
michael@0 828 int recon_stride,
michael@0 829 unsigned int *sse) {
michael@0 830 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
michael@0 831 ref_ptr, recon_stride, sse);
michael@0 832 }
michael@0 833
michael@0 834 unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr,
michael@0 835 int source_stride,
michael@0 836 const uint8_t *ref_ptr,
michael@0 837 int recon_stride,
michael@0 838 unsigned int *sse) {
michael@0 839 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
michael@0 840 ref_ptr, recon_stride, sse);
michael@0 841 }
michael@0 842
michael@0 843 unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr,
michael@0 844 int source_stride,
michael@0 845 const uint8_t *ref_ptr,
michael@0 846 int recon_stride,
michael@0 847 unsigned int *sse) {
michael@0 848 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8,
michael@0 849 ref_ptr, recon_stride, sse);
michael@0 850 }
michael@0 851
michael@0 852 unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr,
michael@0 853 int source_stride,
michael@0 854 const uint8_t *ref_ptr,
michael@0 855 int recon_stride,
michael@0 856 unsigned int *sse) {
michael@0 857 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
michael@0 858 ref_ptr, recon_stride, sse);
michael@0 859 }
michael@0 860
michael@0 861 unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr,
michael@0 862 int source_stride,
michael@0 863 const uint8_t *ref_ptr,
michael@0 864 int recon_stride,
michael@0 865 unsigned int *sse) {
michael@0 866 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8,
michael@0 867 ref_ptr, recon_stride, sse);
michael@0 868 }
michael@0 869
michael@0 870 unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr,
michael@0 871 int source_stride,
michael@0 872 const uint8_t *ref_ptr,
michael@0 873 int recon_stride,
michael@0 874 unsigned int *sse) {
michael@0 875 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8,
michael@0 876 ref_ptr, recon_stride, sse);
michael@0 877 }
michael@0 878
michael@0 879 unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr,
michael@0 880 int src_pixels_per_line,
michael@0 881 int xoffset,
michael@0 882 int yoffset,
michael@0 883 const uint8_t *dst_ptr,
michael@0 884 int dst_pixels_per_line,
michael@0 885 unsigned int *sse) {
michael@0 886 vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line,
michael@0 887 xoffset, yoffset, dst_ptr,
michael@0 888 dst_pixels_per_line, sse);
michael@0 889 return *sse;
michael@0 890 }
michael@0 891
michael@0 892 unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr,
michael@0 893 int src_pixels_per_line,
michael@0 894 int xoffset,
michael@0 895 int yoffset,
michael@0 896 const uint8_t *dst_ptr,
michael@0 897 int dst_pixels_per_line,
michael@0 898 unsigned int *sse) {
michael@0 899 vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line,
michael@0 900 xoffset, yoffset, dst_ptr,
michael@0 901 dst_pixels_per_line, sse);
michael@0 902 return *sse;
michael@0 903 }
michael@0 904
michael@0 905 unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr,
michael@0 906 int src_pixels_per_line,
michael@0 907 int xoffset,
michael@0 908 int yoffset,
michael@0 909 const uint8_t *dst_ptr,
michael@0 910 int dst_pixels_per_line,
michael@0 911 unsigned int *sse) {
michael@0 912 vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line,
michael@0 913 xoffset, yoffset, dst_ptr,
michael@0 914 dst_pixels_per_line, sse);
michael@0 915 return *sse;
michael@0 916 }
michael@0 917
michael@0 918 unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
michael@0 919 int src_pixels_per_line,
michael@0 920 int xoffset,
michael@0 921 int yoffset,
michael@0 922 const uint8_t *dst_ptr,
michael@0 923 int dst_pixels_per_line,
michael@0 924 unsigned int *sse) {
michael@0 925 uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering
michael@0 926 uint8_t temp2[20 * 16];
michael@0 927 const int16_t *hfilter, *vfilter;
michael@0 928
michael@0 929 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 930 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 931
michael@0 932 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 933 1, 9, 16, hfilter);
michael@0 934 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
michael@0 935
michael@0 936 return vp9_variance16x8(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
michael@0 937 }
michael@0 938
michael@0 939 unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr,
michael@0 940 int src_pixels_per_line,
michael@0 941 int xoffset,
michael@0 942 int yoffset,
michael@0 943 const uint8_t *dst_ptr,
michael@0 944 int dst_pixels_per_line,
michael@0 945 unsigned int *sse,
michael@0 946 const uint8_t *second_pred) {
michael@0 947 uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering
michael@0 948 uint8_t temp2[20 * 16];
michael@0 949 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); // compound pred buffer
michael@0 950 const int16_t *hfilter, *vfilter;
michael@0 951
michael@0 952 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 953 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 954
michael@0 955 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 956 1, 9, 16, hfilter);
michael@0 957 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
michael@0 958 comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16);
michael@0 959 return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
michael@0 960 }
michael@0 961
michael@0 962 unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
michael@0 963 int src_pixels_per_line,
michael@0 964 int xoffset,
michael@0 965 int yoffset,
michael@0 966 const uint8_t *dst_ptr,
michael@0 967 int dst_pixels_per_line,
michael@0 968 unsigned int *sse) {
michael@0 969 uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering
michael@0 970 uint8_t temp2[20 * 16];
michael@0 971 const int16_t *hfilter, *vfilter;
michael@0 972
michael@0 973 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 974 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 975
michael@0 976 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 977 1, 17, 8, hfilter);
michael@0 978 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
michael@0 979
michael@0 980 return vp9_variance8x16(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
michael@0 981 }
michael@0 982
michael@0 983 unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
michael@0 984 int src_pixels_per_line,
michael@0 985 int xoffset,
michael@0 986 int yoffset,
michael@0 987 const uint8_t *dst_ptr,
michael@0 988 int dst_pixels_per_line,
michael@0 989 unsigned int *sse,
michael@0 990 const uint8_t *second_pred) {
michael@0 991 uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering
michael@0 992 uint8_t temp2[20 * 16];
michael@0 993 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); // compound pred buffer
michael@0 994 const int16_t *hfilter, *vfilter;
michael@0 995
michael@0 996 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 997 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 998
michael@0 999 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 1000 1, 17, 8, hfilter);
michael@0 1001 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
michael@0 1002 comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
michael@0 1003 return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
michael@0 1004 }
michael@0 1005
michael@0 1006 unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
michael@0 1007 int src_pixels_per_line,
michael@0 1008 int xoffset,
michael@0 1009 int yoffset,
michael@0 1010 const uint8_t *dst_ptr,
michael@0 1011 int dst_pixels_per_line,
michael@0 1012 unsigned int *sse) {
michael@0 1013 uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering
michael@0 1014 uint8_t temp2[20 * 16];
michael@0 1015 const int16_t *hfilter, *vfilter;
michael@0 1016
michael@0 1017 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 1018 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 1019
michael@0 1020 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 1021 1, 5, 8, hfilter);
michael@0 1022 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
michael@0 1023
michael@0 1024 return vp9_variance8x4(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
michael@0 1025 }
michael@0 1026
michael@0 1027 unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
michael@0 1028 int src_pixels_per_line,
michael@0 1029 int xoffset,
michael@0 1030 int yoffset,
michael@0 1031 const uint8_t *dst_ptr,
michael@0 1032 int dst_pixels_per_line,
michael@0 1033 unsigned int *sse,
michael@0 1034 const uint8_t *second_pred) {
michael@0 1035 uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering
michael@0 1036 uint8_t temp2[20 * 16];
michael@0 1037 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer
michael@0 1038 const int16_t *hfilter, *vfilter;
michael@0 1039
michael@0 1040 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 1041 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 1042
michael@0 1043 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 1044 1, 5, 8, hfilter);
michael@0 1045 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
michael@0 1046 comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
michael@0 1047 return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
michael@0 1048 }
michael@0 1049
michael@0 1050 unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
michael@0 1051 int src_pixels_per_line,
michael@0 1052 int xoffset,
michael@0 1053 int yoffset,
michael@0 1054 const uint8_t *dst_ptr,
michael@0 1055 int dst_pixels_per_line,
michael@0 1056 unsigned int *sse) {
michael@0 1057 uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering
michael@0 1058 // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be
michael@0 1059 // of this big? same issue appears in all other block size settings.
michael@0 1060 uint8_t temp2[20 * 16];
michael@0 1061 const int16_t *hfilter, *vfilter;
michael@0 1062
michael@0 1063 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 1064 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 1065
michael@0 1066 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 1067 1, 9, 4, hfilter);
michael@0 1068 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
michael@0 1069
michael@0 1070 return vp9_variance4x8(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
michael@0 1071 }
michael@0 1072
michael@0 1073 unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
michael@0 1074 int src_pixels_per_line,
michael@0 1075 int xoffset,
michael@0 1076 int yoffset,
michael@0 1077 const uint8_t *dst_ptr,
michael@0 1078 int dst_pixels_per_line,
michael@0 1079 unsigned int *sse,
michael@0 1080 const uint8_t *second_pred) {
michael@0 1081 uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering
michael@0 1082 uint8_t temp2[20 * 16];
michael@0 1083 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer
michael@0 1084 const int16_t *hfilter, *vfilter;
michael@0 1085
michael@0 1086 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
michael@0 1087 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
michael@0 1088
michael@0 1089 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
michael@0 1090 1, 9, 4, hfilter);
michael@0 1091 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
michael@0 1092 comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
michael@0 1093 return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
michael@0 1094 }

mercurial