1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libvpx/vp9/encoder/x86/vp9_variance_sse2.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,555 @@ 1.4 +/* 1.5 + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 1.6 + * 1.7 + * Use of this source code is governed by a BSD-style license 1.8 + * that can be found in the LICENSE file in the root of the source 1.9 + * tree. An additional intellectual property rights grant can be found 1.10 + * in the file PATENTS. All contributing project authors may 1.11 + * be found in the AUTHORS file in the root of the source tree. 1.12 + */ 1.13 + 1.14 +#include "./vpx_config.h" 1.15 + 1.16 +#include "vp9/encoder/vp9_variance.h" 1.17 +#include "vp9/common/vp9_pragmas.h" 1.18 +#include "vpx_ports/mem.h" 1.19 + 1.20 +extern unsigned int vp9_get4x4var_mmx 1.21 +( 1.22 + const unsigned char *src_ptr, 1.23 + int source_stride, 1.24 + const unsigned char *ref_ptr, 1.25 + int recon_stride, 1.26 + unsigned int *SSE, 1.27 + int *Sum 1.28 +); 1.29 + 1.30 +unsigned int vp9_get_mb_ss_sse2 1.31 +( 1.32 + const int16_t *src_ptr 1.33 +); 1.34 +unsigned int vp9_get16x16var_sse2 1.35 +( 1.36 + const unsigned char *src_ptr, 1.37 + int source_stride, 1.38 + const unsigned char *ref_ptr, 1.39 + int recon_stride, 1.40 + unsigned int *SSE, 1.41 + int *Sum 1.42 +); 1.43 +unsigned int vp9_get8x8var_sse2 1.44 +( 1.45 + const unsigned char *src_ptr, 1.46 + int source_stride, 1.47 + const unsigned char *ref_ptr, 1.48 + int recon_stride, 1.49 + unsigned int *SSE, 1.50 + int *Sum 1.51 +); 1.52 +void vp9_half_horiz_vert_variance8x_h_sse2 1.53 +( 1.54 + const unsigned char *ref_ptr, 1.55 + int ref_pixels_per_line, 1.56 + const unsigned char *src_ptr, 1.57 + int src_pixels_per_line, 1.58 + unsigned int Height, 1.59 + int *sum, 1.60 + unsigned int *sumsquared 1.61 +); 1.62 +void vp9_half_horiz_vert_variance16x_h_sse2 1.63 +( 1.64 + const unsigned char *ref_ptr, 1.65 + int ref_pixels_per_line, 1.66 + const unsigned char *src_ptr, 1.67 + int src_pixels_per_line, 1.68 + unsigned int Height, 1.69 + int *sum, 1.70 + unsigned int *sumsquared 1.71 +); 1.72 +void vp9_half_horiz_variance8x_h_sse2 1.73 +( 1.74 + const unsigned char *ref_ptr, 1.75 + int ref_pixels_per_line, 1.76 + const unsigned char *src_ptr, 1.77 + int src_pixels_per_line, 1.78 + unsigned int Height, 1.79 + int *sum, 1.80 + unsigned int *sumsquared 1.81 +); 1.82 +void vp9_half_horiz_variance16x_h_sse2 1.83 +( 1.84 + const unsigned char *ref_ptr, 1.85 + int ref_pixels_per_line, 1.86 + const unsigned char *src_ptr, 1.87 + int src_pixels_per_line, 1.88 + unsigned int Height, 1.89 + int *sum, 1.90 + unsigned int *sumsquared 1.91 +); 1.92 +void vp9_half_vert_variance8x_h_sse2 1.93 +( 1.94 + const unsigned char *ref_ptr, 1.95 + int ref_pixels_per_line, 1.96 + const unsigned char *src_ptr, 1.97 + int src_pixels_per_line, 1.98 + unsigned int Height, 1.99 + int *sum, 1.100 + unsigned int *sumsquared 1.101 +); 1.102 +void vp9_half_vert_variance16x_h_sse2 1.103 +( 1.104 + const unsigned char *ref_ptr, 1.105 + int ref_pixels_per_line, 1.106 + const unsigned char *src_ptr, 1.107 + int src_pixels_per_line, 1.108 + unsigned int Height, 1.109 + int *sum, 1.110 + unsigned int *sumsquared 1.111 +); 1.112 + 1.113 +typedef unsigned int (*get_var_sse2) ( 1.114 + const unsigned char *src_ptr, 1.115 + int source_stride, 1.116 + const unsigned char *ref_ptr, 1.117 + int recon_stride, 1.118 + unsigned int *SSE, 1.119 + int *Sum 1.120 +); 1.121 + 1.122 +static void variance_sse2(const unsigned char *src_ptr, int source_stride, 1.123 + const unsigned char *ref_ptr, int recon_stride, 1.124 + int w, int h, unsigned int *sse, int *sum, 1.125 + get_var_sse2 var_fn, int block_size) { 1.126 + unsigned int sse0; 1.127 + int sum0; 1.128 + int i, j; 1.129 + 1.130 + *sse = 0; 1.131 + *sum = 0; 1.132 + 1.133 + for (i = 0; i < h; i += block_size) { 1.134 + for (j = 0; j < w; j += block_size) { 1.135 + var_fn(src_ptr + source_stride * i + j, source_stride, 1.136 + ref_ptr + recon_stride * i + j, recon_stride, &sse0, &sum0); 1.137 + *sse += sse0; 1.138 + *sum += sum0; 1.139 + } 1.140 + } 1.141 +} 1.142 + 1.143 +unsigned int vp9_variance4x4_sse2( 1.144 + const unsigned char *src_ptr, 1.145 + int source_stride, 1.146 + const unsigned char *ref_ptr, 1.147 + int recon_stride, 1.148 + unsigned int *sse) { 1.149 + unsigned int var; 1.150 + int avg; 1.151 + 1.152 + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, 1.153 + &var, &avg, vp9_get4x4var_mmx, 4); 1.154 + *sse = var; 1.155 + return (var - (((unsigned int)avg * avg) >> 4)); 1.156 +} 1.157 + 1.158 +unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, 1.159 + int source_stride, 1.160 + const uint8_t *ref_ptr, 1.161 + int recon_stride, 1.162 + unsigned int *sse) { 1.163 + unsigned int var; 1.164 + int avg; 1.165 + 1.166 + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, 1.167 + &var, &avg, vp9_get4x4var_mmx, 4); 1.168 + *sse = var; 1.169 + return (var - (((unsigned int)avg * avg) >> 5)); 1.170 +} 1.171 + 1.172 +unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, 1.173 + int source_stride, 1.174 + const uint8_t *ref_ptr, 1.175 + int recon_stride, 1.176 + unsigned int *sse) { 1.177 + unsigned int var; 1.178 + int avg; 1.179 + 1.180 + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, 1.181 + &var, &avg, vp9_get4x4var_mmx, 4); 1.182 + *sse = var; 1.183 + return (var - (((unsigned int)avg * avg) >> 5)); 1.184 +} 1.185 + 1.186 +unsigned int vp9_variance8x8_sse2 1.187 +( 1.188 + const unsigned char *src_ptr, 1.189 + int source_stride, 1.190 + const unsigned char *ref_ptr, 1.191 + int recon_stride, 1.192 + unsigned int *sse) { 1.193 + unsigned int var; 1.194 + int avg; 1.195 + 1.196 + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, 1.197 + &var, &avg, vp9_get8x8var_sse2, 8); 1.198 + *sse = var; 1.199 + return (var - (((unsigned int)avg * avg) >> 6)); 1.200 +} 1.201 + 1.202 +unsigned int vp9_variance16x8_sse2 1.203 +( 1.204 + const unsigned char *src_ptr, 1.205 + int source_stride, 1.206 + const unsigned char *ref_ptr, 1.207 + int recon_stride, 1.208 + unsigned int *sse) { 1.209 + unsigned int var; 1.210 + int avg; 1.211 + 1.212 + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, 1.213 + &var, &avg, vp9_get8x8var_sse2, 8); 1.214 + *sse = var; 1.215 + return (var - (((unsigned int)avg * avg) >> 7)); 1.216 +} 1.217 + 1.218 +unsigned int vp9_variance8x16_sse2 1.219 +( 1.220 + const unsigned char *src_ptr, 1.221 + int source_stride, 1.222 + const unsigned char *ref_ptr, 1.223 + int recon_stride, 1.224 + unsigned int *sse) { 1.225 + unsigned int var; 1.226 + int avg; 1.227 + 1.228 + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, 1.229 + &var, &avg, vp9_get8x8var_sse2, 8); 1.230 + *sse = var; 1.231 + return (var - (((unsigned int)avg * avg) >> 7)); 1.232 +} 1.233 + 1.234 +unsigned int vp9_variance16x16_sse2 1.235 +( 1.236 + const unsigned char *src_ptr, 1.237 + int source_stride, 1.238 + const unsigned char *ref_ptr, 1.239 + int recon_stride, 1.240 + unsigned int *sse) { 1.241 + unsigned int var; 1.242 + int avg; 1.243 + 1.244 + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, 1.245 + &var, &avg, vp9_get16x16var_sse2, 16); 1.246 + *sse = var; 1.247 + return (var - (((unsigned int)avg * avg) >> 8)); 1.248 +} 1.249 + 1.250 +unsigned int vp9_mse16x16_sse2( 1.251 + const unsigned char *src_ptr, 1.252 + int source_stride, 1.253 + const unsigned char *ref_ptr, 1.254 + int recon_stride, 1.255 + unsigned int *sse) { 1.256 + unsigned int sse0; 1.257 + int sum0; 1.258 + vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, 1.259 + &sum0); 1.260 + *sse = sse0; 1.261 + return sse0; 1.262 +} 1.263 + 1.264 +unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, 1.265 + int source_stride, 1.266 + const uint8_t *ref_ptr, 1.267 + int recon_stride, 1.268 + unsigned int *sse) { 1.269 + unsigned int var; 1.270 + int avg; 1.271 + 1.272 + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, 1.273 + &var, &avg, vp9_get16x16var_sse2, 16); 1.274 + *sse = var; 1.275 + return (var - (((int64_t)avg * avg) >> 10)); 1.276 +} 1.277 + 1.278 +unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, 1.279 + int source_stride, 1.280 + const uint8_t *ref_ptr, 1.281 + int recon_stride, 1.282 + unsigned int *sse) { 1.283 + unsigned int var; 1.284 + int avg; 1.285 + 1.286 + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, 1.287 + &var, &avg, vp9_get16x16var_sse2, 16); 1.288 + *sse = var; 1.289 + return (var - (((int64_t)avg * avg) >> 9)); 1.290 +} 1.291 + 1.292 +unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, 1.293 + int source_stride, 1.294 + const uint8_t *ref_ptr, 1.295 + int recon_stride, 1.296 + unsigned int *sse) { 1.297 + unsigned int var; 1.298 + int avg; 1.299 + 1.300 + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, 1.301 + &var, &avg, vp9_get16x16var_sse2, 16); 1.302 + *sse = var; 1.303 + return (var - (((int64_t)avg * avg) >> 9)); 1.304 +} 1.305 + 1.306 +unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, 1.307 + int source_stride, 1.308 + const uint8_t *ref_ptr, 1.309 + int recon_stride, 1.310 + unsigned int *sse) { 1.311 + unsigned int var; 1.312 + int avg; 1.313 + 1.314 + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, 1.315 + &var, &avg, vp9_get16x16var_sse2, 16); 1.316 + *sse = var; 1.317 + return (var - (((int64_t)avg * avg) >> 12)); 1.318 +} 1.319 + 1.320 +unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, 1.321 + int source_stride, 1.322 + const uint8_t *ref_ptr, 1.323 + int recon_stride, 1.324 + unsigned int *sse) { 1.325 + unsigned int var; 1.326 + int avg; 1.327 + 1.328 + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, 1.329 + &var, &avg, vp9_get16x16var_sse2, 16); 1.330 + *sse = var; 1.331 + return (var - (((int64_t)avg * avg) >> 11)); 1.332 +} 1.333 + 1.334 +unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, 1.335 + int source_stride, 1.336 + const uint8_t *ref_ptr, 1.337 + int recon_stride, 1.338 + unsigned int *sse) { 1.339 + unsigned int var; 1.340 + int avg; 1.341 + 1.342 + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, 1.343 + &var, &avg, vp9_get16x16var_sse2, 16); 1.344 + *sse = var; 1.345 + return (var - (((int64_t)avg * avg) >> 11)); 1.346 +} 1.347 + 1.348 +#define DECL(w, opt) \ 1.349 +int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \ 1.350 + ptrdiff_t src_stride, \ 1.351 + int x_offset, int y_offset, \ 1.352 + const uint8_t *dst, \ 1.353 + ptrdiff_t dst_stride, \ 1.354 + int height, unsigned int *sse) 1.355 +#define DECLS(opt1, opt2) \ 1.356 +DECL(4, opt2); \ 1.357 +DECL(8, opt1); \ 1.358 +DECL(16, opt1) 1.359 + 1.360 +DECLS(sse2, sse); 1.361 +DECLS(ssse3, ssse3); 1.362 +#undef DECLS 1.363 +#undef DECL 1.364 + 1.365 +#define FN(w, h, wf, wlog2, hlog2, opt, cast) \ 1.366 +unsigned int vp9_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \ 1.367 + int src_stride, \ 1.368 + int x_offset, \ 1.369 + int y_offset, \ 1.370 + const uint8_t *dst, \ 1.371 + int dst_stride, \ 1.372 + unsigned int *sse_ptr) { \ 1.373 + unsigned int sse; \ 1.374 + int se = vp9_sub_pixel_variance##wf##xh_##opt(src, src_stride, x_offset, \ 1.375 + y_offset, dst, dst_stride, \ 1.376 + h, &sse); \ 1.377 + if (w > wf) { \ 1.378 + unsigned int sse2; \ 1.379 + int se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 16, src_stride, \ 1.380 + x_offset, y_offset, \ 1.381 + dst + 16, dst_stride, \ 1.382 + h, &sse2); \ 1.383 + se += se2; \ 1.384 + sse += sse2; \ 1.385 + if (w > wf * 2) { \ 1.386 + se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \ 1.387 + x_offset, y_offset, \ 1.388 + dst + 32, dst_stride, \ 1.389 + h, &sse2); \ 1.390 + se += se2; \ 1.391 + sse += sse2; \ 1.392 + se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \ 1.393 + x_offset, y_offset, \ 1.394 + dst + 48, dst_stride, \ 1.395 + h, &sse2); \ 1.396 + se += se2; \ 1.397 + sse += sse2; \ 1.398 + } \ 1.399 + } \ 1.400 + *sse_ptr = sse; \ 1.401 + return sse - ((cast se * se) >> (wlog2 + hlog2)); \ 1.402 +} 1.403 + 1.404 +#define FNS(opt1, opt2) \ 1.405 +FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ 1.406 +FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ 1.407 +FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ 1.408 +FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ 1.409 +FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ 1.410 +FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ 1.411 +FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ 1.412 +FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ 1.413 +FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ 1.414 +FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ 1.415 +FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ 1.416 +FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ 1.417 +FN(4, 4, 4, 2, 2, opt2, (unsigned int)) 1.418 + 1.419 +FNS(sse2, sse); 1.420 +FNS(ssse3, ssse3); 1.421 + 1.422 +#undef FNS 1.423 +#undef FN 1.424 + 1.425 +#define DECL(w, opt) \ 1.426 +int vp9_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \ 1.427 + ptrdiff_t src_stride, \ 1.428 + int x_offset, int y_offset, \ 1.429 + const uint8_t *dst, \ 1.430 + ptrdiff_t dst_stride, \ 1.431 + const uint8_t *sec, \ 1.432 + ptrdiff_t sec_stride, \ 1.433 + int height, unsigned int *sse) 1.434 +#define DECLS(opt1, opt2) \ 1.435 +DECL(4, opt2); \ 1.436 +DECL(8, opt1); \ 1.437 +DECL(16, opt1) 1.438 + 1.439 +DECLS(sse2, sse); 1.440 +DECLS(ssse3, ssse3); 1.441 +#undef DECL 1.442 +#undef DECLS 1.443 + 1.444 +#define FN(w, h, wf, wlog2, hlog2, opt, cast) \ 1.445 +unsigned int vp9_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \ 1.446 + int src_stride, \ 1.447 + int x_offset, \ 1.448 + int y_offset, \ 1.449 + const uint8_t *dst, \ 1.450 + int dst_stride, \ 1.451 + unsigned int *sseptr, \ 1.452 + const uint8_t *sec) { \ 1.453 + unsigned int sse; \ 1.454 + int se = vp9_sub_pixel_avg_variance##wf##xh_##opt(src, src_stride, x_offset, \ 1.455 + y_offset, dst, dst_stride, \ 1.456 + sec, w, h, &sse); \ 1.457 + if (w > wf) { \ 1.458 + unsigned int sse2; \ 1.459 + int se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 16, src_stride, \ 1.460 + x_offset, y_offset, \ 1.461 + dst + 16, dst_stride, \ 1.462 + sec + 16, w, h, &sse2); \ 1.463 + se += se2; \ 1.464 + sse += sse2; \ 1.465 + if (w > wf * 2) { \ 1.466 + se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 32, src_stride, \ 1.467 + x_offset, y_offset, \ 1.468 + dst + 32, dst_stride, \ 1.469 + sec + 32, w, h, &sse2); \ 1.470 + se += se2; \ 1.471 + sse += sse2; \ 1.472 + se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 48, src_stride, \ 1.473 + x_offset, y_offset, \ 1.474 + dst + 48, dst_stride, \ 1.475 + sec + 48, w, h, &sse2); \ 1.476 + se += se2; \ 1.477 + sse += sse2; \ 1.478 + } \ 1.479 + } \ 1.480 + *sseptr = sse; \ 1.481 + return sse - ((cast se * se) >> (wlog2 + hlog2)); \ 1.482 +} 1.483 + 1.484 +#define FNS(opt1, opt2) \ 1.485 +FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ 1.486 +FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ 1.487 +FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ 1.488 +FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ 1.489 +FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ 1.490 +FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ 1.491 +FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ 1.492 +FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ 1.493 +FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ 1.494 +FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ 1.495 +FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ 1.496 +FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ 1.497 +FN(4, 4, 4, 2, 2, opt2, (unsigned int)) 1.498 + 1.499 +FNS(sse2, sse); 1.500 +FNS(ssse3, ssse3); 1.501 + 1.502 +#undef FNS 1.503 +#undef FN 1.504 + 1.505 +unsigned int vp9_variance_halfpixvar16x16_h_sse2( 1.506 + const unsigned char *src_ptr, 1.507 + int src_pixels_per_line, 1.508 + const unsigned char *dst_ptr, 1.509 + int dst_pixels_per_line, 1.510 + unsigned int *sse) { 1.511 + int xsum0; 1.512 + unsigned int xxsum0; 1.513 + 1.514 + vp9_half_horiz_variance16x_h_sse2( 1.515 + src_ptr, src_pixels_per_line, 1.516 + dst_ptr, dst_pixels_per_line, 16, 1.517 + &xsum0, &xxsum0); 1.518 + 1.519 + *sse = xxsum0; 1.520 + return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 1.521 +} 1.522 + 1.523 + 1.524 +unsigned int vp9_variance_halfpixvar16x16_v_sse2( 1.525 + const unsigned char *src_ptr, 1.526 + int src_pixels_per_line, 1.527 + const unsigned char *dst_ptr, 1.528 + int dst_pixels_per_line, 1.529 + unsigned int *sse) { 1.530 + int xsum0; 1.531 + unsigned int xxsum0; 1.532 + vp9_half_vert_variance16x_h_sse2( 1.533 + src_ptr, src_pixels_per_line, 1.534 + dst_ptr, dst_pixels_per_line, 16, 1.535 + &xsum0, &xxsum0); 1.536 + 1.537 + *sse = xxsum0; 1.538 + return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 1.539 +} 1.540 + 1.541 + 1.542 +unsigned int vp9_variance_halfpixvar16x16_hv_sse2( 1.543 + const unsigned char *src_ptr, 1.544 + int src_pixels_per_line, 1.545 + const unsigned char *dst_ptr, 1.546 + int dst_pixels_per_line, 1.547 + unsigned int *sse) { 1.548 + int xsum0; 1.549 + unsigned int xxsum0; 1.550 + 1.551 + vp9_half_horiz_vert_variance16x_h_sse2( 1.552 + src_ptr, src_pixels_per_line, 1.553 + dst_ptr, dst_pixels_per_line, 16, 1.554 + &xsum0, &xxsum0); 1.555 + 1.556 + *sse = xxsum0; 1.557 + return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 1.558 +}