1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libvpx/vp8/common/variance_c.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,458 @@ 1.4 +/* 1.5 + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 1.6 + * 1.7 + * Use of this source code is governed by a BSD-style license 1.8 + * that can be found in the LICENSE file in the root of the source 1.9 + * tree. An additional intellectual property rights grant can be found 1.10 + * in the file PATENTS. All contributing project authors may 1.11 + * be found in the AUTHORS file in the root of the source tree. 1.12 + */ 1.13 + 1.14 + 1.15 +#include "variance.h" 1.16 +#include "filter.h" 1.17 + 1.18 + 1.19 +unsigned int vp8_get_mb_ss_c 1.20 +( 1.21 + const short *src_ptr 1.22 +) 1.23 +{ 1.24 + unsigned int i = 0, sum = 0; 1.25 + 1.26 + do 1.27 + { 1.28 + sum += (src_ptr[i] * src_ptr[i]); 1.29 + i++; 1.30 + } 1.31 + while (i < 256); 1.32 + 1.33 + return sum; 1.34 +} 1.35 + 1.36 + 1.37 +static void variance( 1.38 + const unsigned char *src_ptr, 1.39 + int source_stride, 1.40 + const unsigned char *ref_ptr, 1.41 + int recon_stride, 1.42 + int w, 1.43 + int h, 1.44 + unsigned int *sse, 1.45 + int *sum) 1.46 +{ 1.47 + int i, j; 1.48 + int diff; 1.49 + 1.50 + *sum = 0; 1.51 + *sse = 0; 1.52 + 1.53 + for (i = 0; i < h; i++) 1.54 + { 1.55 + for (j = 0; j < w; j++) 1.56 + { 1.57 + diff = src_ptr[j] - ref_ptr[j]; 1.58 + *sum += diff; 1.59 + *sse += diff * diff; 1.60 + } 1.61 + 1.62 + src_ptr += source_stride; 1.63 + ref_ptr += recon_stride; 1.64 + } 1.65 +} 1.66 + 1.67 + 1.68 +unsigned int vp8_variance16x16_c( 1.69 + const unsigned char *src_ptr, 1.70 + int source_stride, 1.71 + const unsigned char *ref_ptr, 1.72 + int recon_stride, 1.73 + unsigned int *sse) 1.74 +{ 1.75 + unsigned int var; 1.76 + int avg; 1.77 + 1.78 + 1.79 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); 1.80 + *sse = var; 1.81 + return (var - (((unsigned int)avg * avg) >> 8)); 1.82 +} 1.83 + 1.84 +unsigned int vp8_variance8x16_c( 1.85 + const unsigned char *src_ptr, 1.86 + int source_stride, 1.87 + const unsigned char *ref_ptr, 1.88 + int recon_stride, 1.89 + unsigned int *sse) 1.90 +{ 1.91 + unsigned int var; 1.92 + int avg; 1.93 + 1.94 + 1.95 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); 1.96 + *sse = var; 1.97 + return (var - (((unsigned int)avg * avg) >> 7)); 1.98 +} 1.99 + 1.100 +unsigned int vp8_variance16x8_c( 1.101 + const unsigned char *src_ptr, 1.102 + int source_stride, 1.103 + const unsigned char *ref_ptr, 1.104 + int recon_stride, 1.105 + unsigned int *sse) 1.106 +{ 1.107 + unsigned int var; 1.108 + int avg; 1.109 + 1.110 + 1.111 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); 1.112 + *sse = var; 1.113 + return (var - (((unsigned int)avg * avg) >> 7)); 1.114 +} 1.115 + 1.116 + 1.117 +unsigned int vp8_variance8x8_c( 1.118 + const unsigned char *src_ptr, 1.119 + int source_stride, 1.120 + const unsigned char *ref_ptr, 1.121 + int recon_stride, 1.122 + unsigned int *sse) 1.123 +{ 1.124 + unsigned int var; 1.125 + int avg; 1.126 + 1.127 + 1.128 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); 1.129 + *sse = var; 1.130 + return (var - (((unsigned int)avg * avg) >> 6)); 1.131 +} 1.132 + 1.133 +unsigned int vp8_variance4x4_c( 1.134 + const unsigned char *src_ptr, 1.135 + int source_stride, 1.136 + const unsigned char *ref_ptr, 1.137 + int recon_stride, 1.138 + unsigned int *sse) 1.139 +{ 1.140 + unsigned int var; 1.141 + int avg; 1.142 + 1.143 + 1.144 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg); 1.145 + *sse = var; 1.146 + return (var - (((unsigned int)avg * avg) >> 4)); 1.147 +} 1.148 + 1.149 + 1.150 +unsigned int vp8_mse16x16_c( 1.151 + const unsigned char *src_ptr, 1.152 + int source_stride, 1.153 + const unsigned char *ref_ptr, 1.154 + int recon_stride, 1.155 + unsigned int *sse) 1.156 +{ 1.157 + unsigned int var; 1.158 + int avg; 1.159 + 1.160 + variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); 1.161 + *sse = var; 1.162 + return var; 1.163 +} 1.164 + 1.165 + 1.166 +/**************************************************************************** 1.167 + * 1.168 + * ROUTINE : filter_block2d_bil_first_pass 1.169 + * 1.170 + * INPUTS : UINT8 *src_ptr : Pointer to source block. 1.171 + * UINT32 src_pixels_per_line : Stride of input block. 1.172 + * UINT32 pixel_step : Offset between filter input samples (see notes). 1.173 + * UINT32 output_height : Input block height. 1.174 + * UINT32 output_width : Input block width. 1.175 + * INT32 *vp8_filter : Array of 2 bi-linear filter taps. 1.176 + * 1.177 + * OUTPUTS : INT32 *output_ptr : Pointer to filtered block. 1.178 + * 1.179 + * RETURNS : void 1.180 + * 1.181 + * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in 1.182 + * either horizontal or vertical direction to produce the 1.183 + * filtered output block. Used to implement first-pass 1.184 + * of 2-D separable filter. 1.185 + * 1.186 + * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. 1.187 + * Two filter taps should sum to VP8_FILTER_WEIGHT. 1.188 + * pixel_step defines whether the filter is applied 1.189 + * horizontally (pixel_step=1) or vertically (pixel_step=stride). 1.190 + * It defines the offset required to move from one input 1.191 + * to the next. 1.192 + * 1.193 + ****************************************************************************/ 1.194 +static void var_filter_block2d_bil_first_pass 1.195 +( 1.196 + const unsigned char *src_ptr, 1.197 + unsigned short *output_ptr, 1.198 + unsigned int src_pixels_per_line, 1.199 + int pixel_step, 1.200 + unsigned int output_height, 1.201 + unsigned int output_width, 1.202 + const short *vp8_filter 1.203 +) 1.204 +{ 1.205 + unsigned int i, j; 1.206 + 1.207 + for (i = 0; i < output_height; i++) 1.208 + { 1.209 + for (j = 0; j < output_width; j++) 1.210 + { 1.211 + /* Apply bilinear filter */ 1.212 + output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + 1.213 + ((int)src_ptr[pixel_step] * vp8_filter[1]) + 1.214 + (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; 1.215 + src_ptr++; 1.216 + } 1.217 + 1.218 + /* Next row... */ 1.219 + src_ptr += src_pixels_per_line - output_width; 1.220 + output_ptr += output_width; 1.221 + } 1.222 +} 1.223 + 1.224 +/**************************************************************************** 1.225 + * 1.226 + * ROUTINE : filter_block2d_bil_second_pass 1.227 + * 1.228 + * INPUTS : INT32 *src_ptr : Pointer to source block. 1.229 + * UINT32 src_pixels_per_line : Stride of input block. 1.230 + * UINT32 pixel_step : Offset between filter input samples (see notes). 1.231 + * UINT32 output_height : Input block height. 1.232 + * UINT32 output_width : Input block width. 1.233 + * INT32 *vp8_filter : Array of 2 bi-linear filter taps. 1.234 + * 1.235 + * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block. 1.236 + * 1.237 + * RETURNS : void 1.238 + * 1.239 + * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in 1.240 + * either horizontal or vertical direction to produce the 1.241 + * filtered output block. Used to implement second-pass 1.242 + * of 2-D separable filter. 1.243 + * 1.244 + * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass. 1.245 + * Two filter taps should sum to VP8_FILTER_WEIGHT. 1.246 + * pixel_step defines whether the filter is applied 1.247 + * horizontally (pixel_step=1) or vertically (pixel_step=stride). 1.248 + * It defines the offset required to move from one input 1.249 + * to the next. 1.250 + * 1.251 + ****************************************************************************/ 1.252 +static void var_filter_block2d_bil_second_pass 1.253 +( 1.254 + const unsigned short *src_ptr, 1.255 + unsigned char *output_ptr, 1.256 + unsigned int src_pixels_per_line, 1.257 + unsigned int pixel_step, 1.258 + unsigned int output_height, 1.259 + unsigned int output_width, 1.260 + const short *vp8_filter 1.261 +) 1.262 +{ 1.263 + unsigned int i, j; 1.264 + int Temp; 1.265 + 1.266 + for (i = 0; i < output_height; i++) 1.267 + { 1.268 + for (j = 0; j < output_width; j++) 1.269 + { 1.270 + /* Apply filter */ 1.271 + Temp = ((int)src_ptr[0] * vp8_filter[0]) + 1.272 + ((int)src_ptr[pixel_step] * vp8_filter[1]) + 1.273 + (VP8_FILTER_WEIGHT / 2); 1.274 + output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); 1.275 + src_ptr++; 1.276 + } 1.277 + 1.278 + /* Next row... */ 1.279 + src_ptr += src_pixels_per_line - output_width; 1.280 + output_ptr += output_width; 1.281 + } 1.282 +} 1.283 + 1.284 + 1.285 +unsigned int vp8_sub_pixel_variance4x4_c 1.286 +( 1.287 + const unsigned char *src_ptr, 1.288 + int src_pixels_per_line, 1.289 + int xoffset, 1.290 + int yoffset, 1.291 + const unsigned char *dst_ptr, 1.292 + int dst_pixels_per_line, 1.293 + unsigned int *sse 1.294 +) 1.295 +{ 1.296 + unsigned char temp2[20*16]; 1.297 + const short *HFilter, *VFilter; 1.298 + unsigned short FData3[5*4]; /* Temp data bufffer used in filtering */ 1.299 + 1.300 + HFilter = vp8_bilinear_filters[xoffset]; 1.301 + VFilter = vp8_bilinear_filters[yoffset]; 1.302 + 1.303 + /* First filter 1d Horizontal */ 1.304 + var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter); 1.305 + 1.306 + /* Now filter Verticaly */ 1.307 + var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter); 1.308 + 1.309 + return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse); 1.310 +} 1.311 + 1.312 + 1.313 +unsigned int vp8_sub_pixel_variance8x8_c 1.314 +( 1.315 + const unsigned char *src_ptr, 1.316 + int src_pixels_per_line, 1.317 + int xoffset, 1.318 + int yoffset, 1.319 + const unsigned char *dst_ptr, 1.320 + int dst_pixels_per_line, 1.321 + unsigned int *sse 1.322 +) 1.323 +{ 1.324 + unsigned short FData3[9*8]; /* Temp data bufffer used in filtering */ 1.325 + unsigned char temp2[20*16]; 1.326 + const short *HFilter, *VFilter; 1.327 + 1.328 + HFilter = vp8_bilinear_filters[xoffset]; 1.329 + VFilter = vp8_bilinear_filters[yoffset]; 1.330 + 1.331 + var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter); 1.332 + var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter); 1.333 + 1.334 + return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 1.335 +} 1.336 + 1.337 +unsigned int vp8_sub_pixel_variance16x16_c 1.338 +( 1.339 + const unsigned char *src_ptr, 1.340 + int src_pixels_per_line, 1.341 + int xoffset, 1.342 + int yoffset, 1.343 + const unsigned char *dst_ptr, 1.344 + int dst_pixels_per_line, 1.345 + unsigned int *sse 1.346 +) 1.347 +{ 1.348 + unsigned short FData3[17*16]; /* Temp data bufffer used in filtering */ 1.349 + unsigned char temp2[20*16]; 1.350 + const short *HFilter, *VFilter; 1.351 + 1.352 + HFilter = vp8_bilinear_filters[xoffset]; 1.353 + VFilter = vp8_bilinear_filters[yoffset]; 1.354 + 1.355 + var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter); 1.356 + var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter); 1.357 + 1.358 + return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 1.359 +} 1.360 + 1.361 + 1.362 +unsigned int vp8_variance_halfpixvar16x16_h_c( 1.363 + const unsigned char *src_ptr, 1.364 + int source_stride, 1.365 + const unsigned char *ref_ptr, 1.366 + int recon_stride, 1.367 + unsigned int *sse) 1.368 +{ 1.369 + return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 0, 1.370 + ref_ptr, recon_stride, sse); 1.371 +} 1.372 + 1.373 + 1.374 +unsigned int vp8_variance_halfpixvar16x16_v_c( 1.375 + const unsigned char *src_ptr, 1.376 + int source_stride, 1.377 + const unsigned char *ref_ptr, 1.378 + int recon_stride, 1.379 + unsigned int *sse) 1.380 +{ 1.381 + return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 4, 1.382 + ref_ptr, recon_stride, sse); 1.383 +} 1.384 + 1.385 + 1.386 +unsigned int vp8_variance_halfpixvar16x16_hv_c( 1.387 + const unsigned char *src_ptr, 1.388 + int source_stride, 1.389 + const unsigned char *ref_ptr, 1.390 + int recon_stride, 1.391 + unsigned int *sse) 1.392 +{ 1.393 + return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 4, 1.394 + ref_ptr, recon_stride, sse); 1.395 +} 1.396 + 1.397 + 1.398 +unsigned int vp8_sub_pixel_mse16x16_c 1.399 +( 1.400 + const unsigned char *src_ptr, 1.401 + int src_pixels_per_line, 1.402 + int xoffset, 1.403 + int yoffset, 1.404 + const unsigned char *dst_ptr, 1.405 + int dst_pixels_per_line, 1.406 + unsigned int *sse 1.407 +) 1.408 +{ 1.409 + vp8_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); 1.410 + return *sse; 1.411 +} 1.412 + 1.413 +unsigned int vp8_sub_pixel_variance16x8_c 1.414 +( 1.415 + const unsigned char *src_ptr, 1.416 + int src_pixels_per_line, 1.417 + int xoffset, 1.418 + int yoffset, 1.419 + const unsigned char *dst_ptr, 1.420 + int dst_pixels_per_line, 1.421 + unsigned int *sse 1.422 +) 1.423 +{ 1.424 + unsigned short FData3[16*9]; /* Temp data bufffer used in filtering */ 1.425 + unsigned char temp2[20*16]; 1.426 + const short *HFilter, *VFilter; 1.427 + 1.428 + HFilter = vp8_bilinear_filters[xoffset]; 1.429 + VFilter = vp8_bilinear_filters[yoffset]; 1.430 + 1.431 + var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter); 1.432 + var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter); 1.433 + 1.434 + return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 1.435 +} 1.436 + 1.437 +unsigned int vp8_sub_pixel_variance8x16_c 1.438 +( 1.439 + const unsigned char *src_ptr, 1.440 + int src_pixels_per_line, 1.441 + int xoffset, 1.442 + int yoffset, 1.443 + const unsigned char *dst_ptr, 1.444 + int dst_pixels_per_line, 1.445 + unsigned int *sse 1.446 +) 1.447 +{ 1.448 + unsigned short FData3[9*16]; /* Temp data bufffer used in filtering */ 1.449 + unsigned char temp2[20*16]; 1.450 + const short *HFilter, *VFilter; 1.451 + 1.452 + 1.453 + HFilter = vp8_bilinear_filters[xoffset]; 1.454 + VFilter = vp8_bilinear_filters[yoffset]; 1.455 + 1.456 + 1.457 + var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter); 1.458 + var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter); 1.459 + 1.460 + return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 1.461 +}