1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libvpx/vp9/encoder/vp9_variance_c.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1094 @@ 1.4 +/* 1.5 + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 1.6 + * 1.7 + * Use of this source code is governed by a BSD-style license 1.8 + * that can be found in the LICENSE file in the root of the source 1.9 + * tree. An additional intellectual property rights grant can be found 1.10 + * in the file PATENTS. All contributing project authors may 1.11 + * be found in the AUTHORS file in the root of the source tree. 1.12 + */ 1.13 + 1.14 +#include "./vp9_rtcd.h" 1.15 + 1.16 +#include "vpx_ports/mem.h" 1.17 +#include "vpx/vpx_integer.h" 1.18 + 1.19 +#include "vp9/common/vp9_common.h" 1.20 +#include "vp9/common/vp9_filter.h" 1.21 + 1.22 +#include "vp9/encoder/vp9_variance.h" 1.23 + 1.24 +void variance(const uint8_t *src_ptr, 1.25 + int source_stride, 1.26 + const uint8_t *ref_ptr, 1.27 + int recon_stride, 1.28 + int w, 1.29 + int h, 1.30 + unsigned int *sse, 1.31 + int *sum) { 1.32 + int i, j; 1.33 + int diff; 1.34 + 1.35 + *sum = 0; 1.36 + *sse = 0; 1.37 + 1.38 + for (i = 0; i < h; i++) { 1.39 + for (j = 0; j < w; j++) { 1.40 + diff = src_ptr[j] - ref_ptr[j]; 1.41 + *sum += diff; 1.42 + *sse += diff * diff; 1.43 + } 1.44 + 1.45 + src_ptr += source_stride; 1.46 + ref_ptr += recon_stride; 1.47 + } 1.48 +} 1.49 + 1.50 +/**************************************************************************** 1.51 + * 1.52 + * ROUTINE : filter_block2d_bil_first_pass 1.53 + * 1.54 + * INPUTS : uint8_t *src_ptr : Pointer to source block. 1.55 + * uint32_t src_pixels_per_line : Stride of input block. 1.56 + * uint32_t pixel_step : Offset between filter input 1.57 + * samples (see notes). 1.58 + * uint32_t output_height : Input block height. 1.59 + * uint32_t output_width : Input block width. 1.60 + * int32_t *vp9_filter : Array of 2 bi-linear filter 1.61 + * taps. 1.62 + * 1.63 + * OUTPUTS : int32_t *output_ptr : Pointer to filtered block. 1.64 + * 1.65 + * RETURNS : void 1.66 + * 1.67 + * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in 1.68 + * either horizontal or vertical direction to produce the 1.69 + * filtered output block. Used to implement first-pass 1.70 + * of 2-D separable filter. 1.71 + * 1.72 + * SPECIAL NOTES : Produces int32_t output to retain precision for next pass. 1.73 + * Two filter taps should sum to VP9_FILTER_WEIGHT. 1.74 + * pixel_step defines whether the filter is applied 1.75 + * horizontally (pixel_step=1) or vertically (pixel_step= 1.76 + * stride). 1.77 + * It defines the offset required to move from one input 1.78 + * to the next. 1.79 + * 1.80 + ****************************************************************************/ 1.81 +static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr, 1.82 + uint16_t *output_ptr, 1.83 + unsigned int src_pixels_per_line, 1.84 + int pixel_step, 1.85 + unsigned int output_height, 1.86 + unsigned int output_width, 1.87 + const int16_t *vp9_filter) { 1.88 + unsigned int i, j; 1.89 + 1.90 + for (i = 0; i < output_height; i++) { 1.91 + for (j = 0; j < output_width; j++) { 1.92 + output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] + 1.93 + (int)src_ptr[pixel_step] * vp9_filter[1], 1.94 + FILTER_BITS); 1.95 + 1.96 + src_ptr++; 1.97 + } 1.98 + 1.99 + // Next row... 1.100 + src_ptr += src_pixels_per_line - output_width; 1.101 + output_ptr += output_width; 1.102 + } 1.103 +} 1.104 + 1.105 +/**************************************************************************** 1.106 + * 1.107 + * ROUTINE : filter_block2d_bil_second_pass 1.108 + * 1.109 + * INPUTS : int32_t *src_ptr : Pointer to source block. 1.110 + * uint32_t src_pixels_per_line : Stride of input block. 1.111 + * uint32_t pixel_step : Offset between filter input 1.112 + * samples (see notes). 1.113 + * uint32_t output_height : Input block height. 1.114 + * uint32_t output_width : Input block width. 1.115 + * int32_t *vp9_filter : Array of 2 bi-linear filter 1.116 + * taps. 1.117 + * 1.118 + * OUTPUTS : uint16_t *output_ptr : Pointer to filtered block. 1.119 + * 1.120 + * RETURNS : void 1.121 + * 1.122 + * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in 1.123 + * either horizontal or vertical direction to produce the 1.124 + * filtered output block. Used to implement second-pass 1.125 + * of 2-D separable filter. 1.126 + * 1.127 + * SPECIAL NOTES : Requires 32-bit input as produced by 1.128 + * filter_block2d_bil_first_pass. 1.129 + * Two filter taps should sum to VP9_FILTER_WEIGHT. 1.130 + * pixel_step defines whether the filter is applied 1.131 + * horizontally (pixel_step=1) or vertically (pixel_step= 1.132 + * stride). 1.133 + * It defines the offset required to move from one input 1.134 + * to the next. 1.135 + * 1.136 + ****************************************************************************/ 1.137 +static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr, 1.138 + uint8_t *output_ptr, 1.139 + unsigned int src_pixels_per_line, 1.140 + unsigned int pixel_step, 1.141 + unsigned int output_height, 1.142 + unsigned int output_width, 1.143 + const int16_t *vp9_filter) { 1.144 + unsigned int i, j; 1.145 + 1.146 + for (i = 0; i < output_height; i++) { 1.147 + for (j = 0; j < output_width; j++) { 1.148 + output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] + 1.149 + (int)src_ptr[pixel_step] * vp9_filter[1], 1.150 + FILTER_BITS); 1.151 + src_ptr++; 1.152 + } 1.153 + 1.154 + src_ptr += src_pixels_per_line - output_width; 1.155 + output_ptr += output_width; 1.156 + } 1.157 +} 1.158 + 1.159 +unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { 1.160 + unsigned int i, sum = 0; 1.161 + 1.162 + for (i = 0; i < 256; i++) { 1.163 + sum += (src_ptr[i] * src_ptr[i]); 1.164 + } 1.165 + 1.166 + return sum; 1.167 +} 1.168 + 1.169 +unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, 1.170 + int source_stride, 1.171 + const uint8_t *ref_ptr, 1.172 + int recon_stride, 1.173 + unsigned int *sse) { 1.174 + unsigned int var; 1.175 + int avg; 1.176 + 1.177 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg); 1.178 + *sse = var; 1.179 + return (var - (((int64_t)avg * avg) >> 11)); 1.180 +} 1.181 + 1.182 +unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr, 1.183 + int src_pixels_per_line, 1.184 + int xoffset, 1.185 + int yoffset, 1.186 + const uint8_t *dst_ptr, 1.187 + int dst_pixels_per_line, 1.188 + unsigned int *sse) { 1.189 + uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 1.190 + uint8_t temp2[68 * 64]; 1.191 + const int16_t *hfilter, *vfilter; 1.192 + 1.193 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.194 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.195 + 1.196 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.197 + 1, 33, 64, hfilter); 1.198 + var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter); 1.199 + 1.200 + return vp9_variance64x32(temp2, 64, dst_ptr, dst_pixels_per_line, sse); 1.201 +} 1.202 + 1.203 +unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, 1.204 + int src_pixels_per_line, 1.205 + int xoffset, 1.206 + int yoffset, 1.207 + const uint8_t *dst_ptr, 1.208 + int dst_pixels_per_line, 1.209 + unsigned int *sse, 1.210 + const uint8_t *second_pred) { 1.211 + uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 1.212 + uint8_t temp2[68 * 64]; 1.213 + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer 1.214 + const int16_t *hfilter, *vfilter; 1.215 + 1.216 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.217 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.218 + 1.219 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.220 + 1, 33, 64, hfilter); 1.221 + var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter); 1.222 + comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64); 1.223 + return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse); 1.224 +} 1.225 + 1.226 +unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, 1.227 + int source_stride, 1.228 + const uint8_t *ref_ptr, 1.229 + int recon_stride, 1.230 + unsigned int *sse) { 1.231 + unsigned int var; 1.232 + int avg; 1.233 + 1.234 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg); 1.235 + *sse = var; 1.236 + return (var - (((int64_t)avg * avg) >> 11)); 1.237 +} 1.238 + 1.239 +unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, 1.240 + int src_pixels_per_line, 1.241 + int xoffset, 1.242 + int yoffset, 1.243 + const uint8_t *dst_ptr, 1.244 + int dst_pixels_per_line, 1.245 + unsigned int *sse) { 1.246 + uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 1.247 + uint8_t temp2[68 * 64]; 1.248 + const int16_t *hfilter, *vfilter; 1.249 + 1.250 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.251 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.252 + 1.253 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.254 + 1, 65, 32, hfilter); 1.255 + var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter); 1.256 + 1.257 + return vp9_variance32x64(temp2, 32, dst_ptr, dst_pixels_per_line, sse); 1.258 +} 1.259 + 1.260 +unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, 1.261 + int src_pixels_per_line, 1.262 + int xoffset, 1.263 + int yoffset, 1.264 + const uint8_t *dst_ptr, 1.265 + int dst_pixels_per_line, 1.266 + unsigned int *sse, 1.267 + const uint8_t *second_pred) { 1.268 + uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 1.269 + uint8_t temp2[68 * 64]; 1.270 + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); // compound pred buffer 1.271 + const int16_t *hfilter, *vfilter; 1.272 + 1.273 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.274 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.275 + 1.276 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.277 + 1, 65, 32, hfilter); 1.278 + var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter); 1.279 + comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32); 1.280 + return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse); 1.281 +} 1.282 + 1.283 +unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, 1.284 + int source_stride, 1.285 + const uint8_t *ref_ptr, 1.286 + int recon_stride, 1.287 + unsigned int *sse) { 1.288 + unsigned int var; 1.289 + int avg; 1.290 + 1.291 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg); 1.292 + *sse = var; 1.293 + return (var - (((int64_t)avg * avg) >> 9)); 1.294 +} 1.295 + 1.296 +unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr, 1.297 + int src_pixels_per_line, 1.298 + int xoffset, 1.299 + int yoffset, 1.300 + const uint8_t *dst_ptr, 1.301 + int dst_pixels_per_line, 1.302 + unsigned int *sse) { 1.303 + uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 1.304 + uint8_t temp2[36 * 32]; 1.305 + const int16_t *hfilter, *vfilter; 1.306 + 1.307 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.308 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.309 + 1.310 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.311 + 1, 17, 32, hfilter); 1.312 + var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter); 1.313 + 1.314 + return vp9_variance32x16(temp2, 32, dst_ptr, dst_pixels_per_line, sse); 1.315 +} 1.316 + 1.317 +unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, 1.318 + int src_pixels_per_line, 1.319 + int xoffset, 1.320 + int yoffset, 1.321 + const uint8_t *dst_ptr, 1.322 + int dst_pixels_per_line, 1.323 + unsigned int *sse, 1.324 + const uint8_t *second_pred) { 1.325 + uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 1.326 + uint8_t temp2[36 * 32]; 1.327 + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); // compound pred buffer 1.328 + const int16_t *hfilter, *vfilter; 1.329 + 1.330 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.331 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.332 + 1.333 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.334 + 1, 17, 32, hfilter); 1.335 + var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter); 1.336 + comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32); 1.337 + return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse); 1.338 +} 1.339 + 1.340 +unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, 1.341 + int source_stride, 1.342 + const uint8_t *ref_ptr, 1.343 + int recon_stride, 1.344 + unsigned int *sse) { 1.345 + unsigned int var; 1.346 + int avg; 1.347 + 1.348 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg); 1.349 + *sse = var; 1.350 + return (var - (((int64_t)avg * avg) >> 9)); 1.351 +} 1.352 + 1.353 +unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr, 1.354 + int src_pixels_per_line, 1.355 + int xoffset, 1.356 + int yoffset, 1.357 + const uint8_t *dst_ptr, 1.358 + int dst_pixels_per_line, 1.359 + unsigned int *sse) { 1.360 + uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 1.361 + uint8_t temp2[36 * 32]; 1.362 + const int16_t *hfilter, *vfilter; 1.363 + 1.364 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.365 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.366 + 1.367 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.368 + 1, 33, 16, hfilter); 1.369 + var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter); 1.370 + 1.371 + return vp9_variance16x32(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 1.372 +} 1.373 + 1.374 +unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, 1.375 + int src_pixels_per_line, 1.376 + int xoffset, 1.377 + int yoffset, 1.378 + const uint8_t *dst_ptr, 1.379 + int dst_pixels_per_line, 1.380 + unsigned int *sse, 1.381 + const uint8_t *second_pred) { 1.382 + uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 1.383 + uint8_t temp2[36 * 32]; 1.384 + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); // compound pred buffer 1.385 + const int16_t *hfilter, *vfilter; 1.386 + 1.387 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.388 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.389 + 1.390 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.391 + 1, 33, 16, hfilter); 1.392 + var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter); 1.393 + comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16); 1.394 + return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse); 1.395 +} 1.396 + 1.397 +unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, 1.398 + int source_stride, 1.399 + const uint8_t *ref_ptr, 1.400 + int recon_stride, 1.401 + unsigned int *sse) { 1.402 + unsigned int var; 1.403 + int avg; 1.404 + 1.405 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg); 1.406 + *sse = var; 1.407 + return (var - (((int64_t)avg * avg) >> 12)); 1.408 +} 1.409 + 1.410 +unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, 1.411 + int source_stride, 1.412 + const uint8_t *ref_ptr, 1.413 + int recon_stride, 1.414 + unsigned int *sse) { 1.415 + unsigned int var; 1.416 + int avg; 1.417 + 1.418 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg); 1.419 + *sse = var; 1.420 + return (var - (((int64_t)avg * avg) >> 10)); 1.421 +} 1.422 + 1.423 +unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, 1.424 + int source_stride, 1.425 + const uint8_t *ref_ptr, 1.426 + int recon_stride, 1.427 + unsigned int *sse) { 1.428 + unsigned int var; 1.429 + int avg; 1.430 + 1.431 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); 1.432 + *sse = var; 1.433 + return (var - (((unsigned int)avg * avg) >> 8)); 1.434 +} 1.435 + 1.436 +unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, 1.437 + int source_stride, 1.438 + const uint8_t *ref_ptr, 1.439 + int recon_stride, 1.440 + unsigned int *sse) { 1.441 + unsigned int var; 1.442 + int avg; 1.443 + 1.444 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); 1.445 + *sse = var; 1.446 + return (var - (((unsigned int)avg * avg) >> 7)); 1.447 +} 1.448 + 1.449 +unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, 1.450 + int source_stride, 1.451 + const uint8_t *ref_ptr, 1.452 + int recon_stride, 1.453 + unsigned int *sse) { 1.454 + unsigned int var; 1.455 + int avg; 1.456 + 1.457 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); 1.458 + *sse = var; 1.459 + return (var - (((unsigned int)avg * avg) >> 7)); 1.460 +} 1.461 + 1.462 +void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, 1.463 + const uint8_t *ref_ptr, int ref_stride, 1.464 + unsigned int *sse, int *sum) { 1.465 + variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum); 1.466 +} 1.467 + 1.468 +unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, 1.469 + int source_stride, 1.470 + const uint8_t *ref_ptr, 1.471 + int recon_stride, 1.472 + unsigned int *sse) { 1.473 + unsigned int var; 1.474 + int avg; 1.475 + 1.476 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); 1.477 + *sse = var; 1.478 + return (var - (((unsigned int)avg * avg) >> 6)); 1.479 +} 1.480 + 1.481 +unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, 1.482 + int source_stride, 1.483 + const uint8_t *ref_ptr, 1.484 + int recon_stride, 1.485 + unsigned int *sse) { 1.486 + unsigned int var; 1.487 + int avg; 1.488 + 1.489 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg); 1.490 + *sse = var; 1.491 + return (var - (((unsigned int)avg * avg) >> 5)); 1.492 +} 1.493 + 1.494 +unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, 1.495 + int source_stride, 1.496 + const uint8_t *ref_ptr, 1.497 + int recon_stride, 1.498 + unsigned int *sse) { 1.499 + unsigned int var; 1.500 + int avg; 1.501 + 1.502 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg); 1.503 + *sse = var; 1.504 + return (var - (((unsigned int)avg * avg) >> 5)); 1.505 +} 1.506 + 1.507 +unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, 1.508 + int source_stride, 1.509 + const uint8_t *ref_ptr, 1.510 + int recon_stride, 1.511 + unsigned int *sse) { 1.512 + unsigned int var; 1.513 + int avg; 1.514 + 1.515 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg); 1.516 + *sse = var; 1.517 + return (var - (((unsigned int)avg * avg) >> 4)); 1.518 +} 1.519 + 1.520 + 1.521 +unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, 1.522 + int source_stride, 1.523 + const uint8_t *ref_ptr, 1.524 + int recon_stride, 1.525 + unsigned int *sse) { 1.526 + unsigned int var; 1.527 + int avg; 1.528 + 1.529 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); 1.530 + *sse = var; 1.531 + return var; 1.532 +} 1.533 + 1.534 +unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, 1.535 + int source_stride, 1.536 + const uint8_t *ref_ptr, 1.537 + int recon_stride, 1.538 + unsigned int *sse) { 1.539 + unsigned int var; 1.540 + int avg; 1.541 + 1.542 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); 1.543 + *sse = var; 1.544 + return var; 1.545 +} 1.546 + 1.547 +unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, 1.548 + int source_stride, 1.549 + const uint8_t *ref_ptr, 1.550 + int recon_stride, 1.551 + unsigned int *sse) { 1.552 + unsigned int var; 1.553 + int avg; 1.554 + 1.555 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); 1.556 + *sse = var; 1.557 + return var; 1.558 +} 1.559 + 1.560 +unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, 1.561 + int source_stride, 1.562 + const uint8_t *ref_ptr, 1.563 + int recon_stride, 1.564 + unsigned int *sse) { 1.565 + unsigned int var; 1.566 + int avg; 1.567 + 1.568 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); 1.569 + *sse = var; 1.570 + return var; 1.571 +} 1.572 + 1.573 + 1.574 +unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, 1.575 + int src_pixels_per_line, 1.576 + int xoffset, 1.577 + int yoffset, 1.578 + const uint8_t *dst_ptr, 1.579 + int dst_pixels_per_line, 1.580 + unsigned int *sse) { 1.581 + uint8_t temp2[20 * 16]; 1.582 + const int16_t *hfilter, *vfilter; 1.583 + uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering 1.584 + 1.585 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.586 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.587 + 1.588 + // First filter 1d Horizontal 1.589 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.590 + 1, 5, 4, hfilter); 1.591 + 1.592 + // Now filter Verticaly 1.593 + var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); 1.594 + 1.595 + return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse); 1.596 +} 1.597 + 1.598 +unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, 1.599 + int src_pixels_per_line, 1.600 + int xoffset, 1.601 + int yoffset, 1.602 + const uint8_t *dst_ptr, 1.603 + int dst_pixels_per_line, 1.604 + unsigned int *sse, 1.605 + const uint8_t *second_pred) { 1.606 + uint8_t temp2[20 * 16]; 1.607 + const int16_t *hfilter, *vfilter; 1.608 + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); // compound pred buffer 1.609 + uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering 1.610 + 1.611 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.612 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.613 + 1.614 + // First filter 1d Horizontal 1.615 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.616 + 1, 5, 4, hfilter); 1.617 + 1.618 + // Now filter Verticaly 1.619 + var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); 1.620 + comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4); 1.621 + return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse); 1.622 +} 1.623 + 1.624 +unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr, 1.625 + int src_pixels_per_line, 1.626 + int xoffset, 1.627 + int yoffset, 1.628 + const uint8_t *dst_ptr, 1.629 + int dst_pixels_per_line, 1.630 + unsigned int *sse) { 1.631 + uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering 1.632 + uint8_t temp2[20 * 16]; 1.633 + const int16_t *hfilter, *vfilter; 1.634 + 1.635 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.636 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.637 + 1.638 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.639 + 1, 9, 8, hfilter); 1.640 + var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); 1.641 + 1.642 + return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 1.643 +} 1.644 + 1.645 +unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, 1.646 + int src_pixels_per_line, 1.647 + int xoffset, 1.648 + int yoffset, 1.649 + const uint8_t *dst_ptr, 1.650 + int dst_pixels_per_line, 1.651 + unsigned int *sse, 1.652 + const uint8_t *second_pred) { 1.653 + uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering 1.654 + uint8_t temp2[20 * 16]; 1.655 + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); // compound pred buffer 1.656 + const int16_t *hfilter, *vfilter; 1.657 + 1.658 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.659 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.660 + 1.661 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.662 + 1, 9, 8, hfilter); 1.663 + var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); 1.664 + comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8); 1.665 + return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse); 1.666 +} 1.667 + 1.668 +unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, 1.669 + int src_pixels_per_line, 1.670 + int xoffset, 1.671 + int yoffset, 1.672 + const uint8_t *dst_ptr, 1.673 + int dst_pixels_per_line, 1.674 + unsigned int *sse) { 1.675 + uint16_t fdata3[17 * 16]; // Temp data buffer used in filtering 1.676 + uint8_t temp2[20 * 16]; 1.677 + const int16_t *hfilter, *vfilter; 1.678 + 1.679 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.680 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.681 + 1.682 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.683 + 1, 17, 16, hfilter); 1.684 + var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter); 1.685 + 1.686 + return vp9_variance16x16(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 1.687 +} 1.688 + 1.689 +unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, 1.690 + int src_pixels_per_line, 1.691 + int xoffset, 1.692 + int yoffset, 1.693 + const uint8_t *dst_ptr, 1.694 + int dst_pixels_per_line, 1.695 + unsigned int *sse, 1.696 + const uint8_t *second_pred) { 1.697 + uint16_t fdata3[17 * 16]; 1.698 + uint8_t temp2[20 * 16]; 1.699 + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); // compound pred buffer 1.700 + const int16_t *hfilter, *vfilter; 1.701 + 1.702 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.703 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.704 + 1.705 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.706 + 1, 17, 16, hfilter); 1.707 + var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter); 1.708 + 1.709 + comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16); 1.710 + return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse); 1.711 +} 1.712 + 1.713 +unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, 1.714 + int src_pixels_per_line, 1.715 + int xoffset, 1.716 + int yoffset, 1.717 + const uint8_t *dst_ptr, 1.718 + int dst_pixels_per_line, 1.719 + unsigned int *sse) { 1.720 + uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 1.721 + uint8_t temp2[68 * 64]; 1.722 + const int16_t *hfilter, *vfilter; 1.723 + 1.724 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.725 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.726 + 1.727 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.728 + 1, 65, 64, hfilter); 1.729 + var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter); 1.730 + 1.731 + return vp9_variance64x64(temp2, 64, dst_ptr, dst_pixels_per_line, sse); 1.732 +} 1.733 + 1.734 +unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, 1.735 + int src_pixels_per_line, 1.736 + int xoffset, 1.737 + int yoffset, 1.738 + const uint8_t *dst_ptr, 1.739 + int dst_pixels_per_line, 1.740 + unsigned int *sse, 1.741 + const uint8_t *second_pred) { 1.742 + uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 1.743 + uint8_t temp2[68 * 64]; 1.744 + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer 1.745 + const int16_t *hfilter, *vfilter; 1.746 + 1.747 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.748 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.749 + 1.750 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.751 + 1, 65, 64, hfilter); 1.752 + var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter); 1.753 + comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64); 1.754 + return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse); 1.755 +} 1.756 + 1.757 +unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, 1.758 + int src_pixels_per_line, 1.759 + int xoffset, 1.760 + int yoffset, 1.761 + const uint8_t *dst_ptr, 1.762 + int dst_pixels_per_line, 1.763 + unsigned int *sse) { 1.764 + uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 1.765 + uint8_t temp2[36 * 32]; 1.766 + const int16_t *hfilter, *vfilter; 1.767 + 1.768 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.769 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.770 + 1.771 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.772 + 1, 33, 32, hfilter); 1.773 + var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter); 1.774 + 1.775 + return vp9_variance32x32(temp2, 32, dst_ptr, dst_pixels_per_line, sse); 1.776 +} 1.777 + 1.778 +unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, 1.779 + int src_pixels_per_line, 1.780 + int xoffset, 1.781 + int yoffset, 1.782 + const uint8_t *dst_ptr, 1.783 + int dst_pixels_per_line, 1.784 + unsigned int *sse, 1.785 + const uint8_t *second_pred) { 1.786 + uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 1.787 + uint8_t temp2[36 * 32]; 1.788 + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); // compound pred buffer 1.789 + const int16_t *hfilter, *vfilter; 1.790 + 1.791 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.792 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.793 + 1.794 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.795 + 1, 33, 32, hfilter); 1.796 + var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter); 1.797 + comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32); 1.798 + return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse); 1.799 +} 1.800 + 1.801 +unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, 1.802 + int source_stride, 1.803 + const uint8_t *ref_ptr, 1.804 + int recon_stride, 1.805 + unsigned int *sse) { 1.806 + return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0, 1.807 + ref_ptr, recon_stride, sse); 1.808 +} 1.809 + 1.810 +unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, 1.811 + int source_stride, 1.812 + const uint8_t *ref_ptr, 1.813 + int recon_stride, 1.814 + unsigned int *sse) { 1.815 + return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0, 1.816 + ref_ptr, recon_stride, sse); 1.817 +} 1.818 + 1.819 +unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, 1.820 + int source_stride, 1.821 + const uint8_t *ref_ptr, 1.822 + int recon_stride, 1.823 + unsigned int *sse) { 1.824 + return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0, 1.825 + ref_ptr, recon_stride, sse); 1.826 +} 1.827 + 1.828 +unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, 1.829 + int source_stride, 1.830 + const uint8_t *ref_ptr, 1.831 + int recon_stride, 1.832 + unsigned int *sse) { 1.833 + return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8, 1.834 + ref_ptr, recon_stride, sse); 1.835 +} 1.836 + 1.837 +unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, 1.838 + int source_stride, 1.839 + const uint8_t *ref_ptr, 1.840 + int recon_stride, 1.841 + unsigned int *sse) { 1.842 + return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8, 1.843 + ref_ptr, recon_stride, sse); 1.844 +} 1.845 + 1.846 +unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, 1.847 + int source_stride, 1.848 + const uint8_t *ref_ptr, 1.849 + int recon_stride, 1.850 + unsigned int *sse) { 1.851 + return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8, 1.852 + ref_ptr, recon_stride, sse); 1.853 +} 1.854 + 1.855 +unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, 1.856 + int source_stride, 1.857 + const uint8_t *ref_ptr, 1.858 + int recon_stride, 1.859 + unsigned int *sse) { 1.860 + return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8, 1.861 + ref_ptr, recon_stride, sse); 1.862 +} 1.863 + 1.864 +unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, 1.865 + int source_stride, 1.866 + const uint8_t *ref_ptr, 1.867 + int recon_stride, 1.868 + unsigned int *sse) { 1.869 + return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8, 1.870 + ref_ptr, recon_stride, sse); 1.871 +} 1.872 + 1.873 +unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, 1.874 + int source_stride, 1.875 + const uint8_t *ref_ptr, 1.876 + int recon_stride, 1.877 + unsigned int *sse) { 1.878 + return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8, 1.879 + ref_ptr, recon_stride, sse); 1.880 +} 1.881 + 1.882 +unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr, 1.883 + int src_pixels_per_line, 1.884 + int xoffset, 1.885 + int yoffset, 1.886 + const uint8_t *dst_ptr, 1.887 + int dst_pixels_per_line, 1.888 + unsigned int *sse) { 1.889 + vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, 1.890 + xoffset, yoffset, dst_ptr, 1.891 + dst_pixels_per_line, sse); 1.892 + return *sse; 1.893 +} 1.894 + 1.895 +unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, 1.896 + int src_pixels_per_line, 1.897 + int xoffset, 1.898 + int yoffset, 1.899 + const uint8_t *dst_ptr, 1.900 + int dst_pixels_per_line, 1.901 + unsigned int *sse) { 1.902 + vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line, 1.903 + xoffset, yoffset, dst_ptr, 1.904 + dst_pixels_per_line, sse); 1.905 + return *sse; 1.906 +} 1.907 + 1.908 +unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, 1.909 + int src_pixels_per_line, 1.910 + int xoffset, 1.911 + int yoffset, 1.912 + const uint8_t *dst_ptr, 1.913 + int dst_pixels_per_line, 1.914 + unsigned int *sse) { 1.915 + vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line, 1.916 + xoffset, yoffset, dst_ptr, 1.917 + dst_pixels_per_line, sse); 1.918 + return *sse; 1.919 +} 1.920 + 1.921 +unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr, 1.922 + int src_pixels_per_line, 1.923 + int xoffset, 1.924 + int yoffset, 1.925 + const uint8_t *dst_ptr, 1.926 + int dst_pixels_per_line, 1.927 + unsigned int *sse) { 1.928 + uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering 1.929 + uint8_t temp2[20 * 16]; 1.930 + const int16_t *hfilter, *vfilter; 1.931 + 1.932 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.933 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.934 + 1.935 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.936 + 1, 9, 16, hfilter); 1.937 + var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter); 1.938 + 1.939 + return vp9_variance16x8(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 1.940 +} 1.941 + 1.942 +unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, 1.943 + int src_pixels_per_line, 1.944 + int xoffset, 1.945 + int yoffset, 1.946 + const uint8_t *dst_ptr, 1.947 + int dst_pixels_per_line, 1.948 + unsigned int *sse, 1.949 + const uint8_t *second_pred) { 1.950 + uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering 1.951 + uint8_t temp2[20 * 16]; 1.952 + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); // compound pred buffer 1.953 + const int16_t *hfilter, *vfilter; 1.954 + 1.955 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.956 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.957 + 1.958 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.959 + 1, 9, 16, hfilter); 1.960 + var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter); 1.961 + comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16); 1.962 + return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse); 1.963 +} 1.964 + 1.965 +unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, 1.966 + int src_pixels_per_line, 1.967 + int xoffset, 1.968 + int yoffset, 1.969 + const uint8_t *dst_ptr, 1.970 + int dst_pixels_per_line, 1.971 + unsigned int *sse) { 1.972 + uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering 1.973 + uint8_t temp2[20 * 16]; 1.974 + const int16_t *hfilter, *vfilter; 1.975 + 1.976 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.977 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.978 + 1.979 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.980 + 1, 17, 8, hfilter); 1.981 + var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter); 1.982 + 1.983 + return vp9_variance8x16(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 1.984 +} 1.985 + 1.986 +unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, 1.987 + int src_pixels_per_line, 1.988 + int xoffset, 1.989 + int yoffset, 1.990 + const uint8_t *dst_ptr, 1.991 + int dst_pixels_per_line, 1.992 + unsigned int *sse, 1.993 + const uint8_t *second_pred) { 1.994 + uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering 1.995 + uint8_t temp2[20 * 16]; 1.996 + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); // compound pred buffer 1.997 + const int16_t *hfilter, *vfilter; 1.998 + 1.999 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.1000 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.1001 + 1.1002 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.1003 + 1, 17, 8, hfilter); 1.1004 + var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter); 1.1005 + comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8); 1.1006 + return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse); 1.1007 +} 1.1008 + 1.1009 +unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr, 1.1010 + int src_pixels_per_line, 1.1011 + int xoffset, 1.1012 + int yoffset, 1.1013 + const uint8_t *dst_ptr, 1.1014 + int dst_pixels_per_line, 1.1015 + unsigned int *sse) { 1.1016 + uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering 1.1017 + uint8_t temp2[20 * 16]; 1.1018 + const int16_t *hfilter, *vfilter; 1.1019 + 1.1020 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.1021 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.1022 + 1.1023 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.1024 + 1, 5, 8, hfilter); 1.1025 + var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter); 1.1026 + 1.1027 + return vp9_variance8x4(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 1.1028 +} 1.1029 + 1.1030 +unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, 1.1031 + int src_pixels_per_line, 1.1032 + int xoffset, 1.1033 + int yoffset, 1.1034 + const uint8_t *dst_ptr, 1.1035 + int dst_pixels_per_line, 1.1036 + unsigned int *sse, 1.1037 + const uint8_t *second_pred) { 1.1038 + uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering 1.1039 + uint8_t temp2[20 * 16]; 1.1040 + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer 1.1041 + const int16_t *hfilter, *vfilter; 1.1042 + 1.1043 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.1044 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.1045 + 1.1046 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.1047 + 1, 5, 8, hfilter); 1.1048 + var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter); 1.1049 + comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8); 1.1050 + return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse); 1.1051 +} 1.1052 + 1.1053 +unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr, 1.1054 + int src_pixels_per_line, 1.1055 + int xoffset, 1.1056 + int yoffset, 1.1057 + const uint8_t *dst_ptr, 1.1058 + int dst_pixels_per_line, 1.1059 + unsigned int *sse) { 1.1060 + uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering 1.1061 + // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be 1.1062 + // of this big? same issue appears in all other block size settings. 1.1063 + uint8_t temp2[20 * 16]; 1.1064 + const int16_t *hfilter, *vfilter; 1.1065 + 1.1066 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.1067 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.1068 + 1.1069 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.1070 + 1, 9, 4, hfilter); 1.1071 + var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter); 1.1072 + 1.1073 + return vp9_variance4x8(temp2, 4, dst_ptr, dst_pixels_per_line, sse); 1.1074 +} 1.1075 + 1.1076 +unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, 1.1077 + int src_pixels_per_line, 1.1078 + int xoffset, 1.1079 + int yoffset, 1.1080 + const uint8_t *dst_ptr, 1.1081 + int dst_pixels_per_line, 1.1082 + unsigned int *sse, 1.1083 + const uint8_t *second_pred) { 1.1084 + uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering 1.1085 + uint8_t temp2[20 * 16]; 1.1086 + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer 1.1087 + const int16_t *hfilter, *vfilter; 1.1088 + 1.1089 + hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1.1090 + vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1.1091 + 1.1092 + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1.1093 + 1, 9, 4, hfilter); 1.1094 + var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter); 1.1095 + comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4); 1.1096 + return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse); 1.1097 +}