1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libvpx/vp8/encoder/mcomp.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,2029 @@ 1.4 +/* 1.5 + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 1.6 + * 1.7 + * Use of this source code is governed by a BSD-style license 1.8 + * that can be found in the LICENSE file in the root of the source 1.9 + * tree. An additional intellectual property rights grant can be found 1.10 + * in the file PATENTS. All contributing project authors may 1.11 + * be found in the AUTHORS file in the root of the source tree. 1.12 + */ 1.13 + 1.14 + 1.15 +#include "onyx_int.h" 1.16 +#include "mcomp.h" 1.17 +#include "vpx_mem/vpx_mem.h" 1.18 +#include "vpx_config.h" 1.19 +#include <stdio.h> 1.20 +#include <limits.h> 1.21 +#include <math.h> 1.22 +#include "vp8/common/findnearmv.h" 1.23 + 1.24 +#ifdef VP8_ENTROPY_STATS 1.25 +static int mv_ref_ct [31] [4] [2]; 1.26 +static int mv_mode_cts [4] [2]; 1.27 +#endif 1.28 + 1.29 +int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) 1.30 +{ 1.31 + /* MV costing is based on the distribution of vectors in the previous 1.32 + * frame and as such will tend to over state the cost of vectors. In 1.33 + * addition coding a new vector can have a knock on effect on the cost 1.34 + * of subsequent vectors and the quality of prediction from NEAR and 1.35 + * NEAREST for subsequent blocks. The "Weight" parameter allows, to a 1.36 + * limited extent, for some account to be taken of these factors. 1.37 + */ 1.38 + return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7; 1.39 +} 1.40 + 1.41 +static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit) 1.42 +{ 1.43 + /* Ignore mv costing if mvcost is NULL */ 1.44 + if (mvcost) 1.45 + return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + 1.46 + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) 1.47 + * error_per_bit + 128) >> 8; 1.48 + return 0; 1.49 +} 1.50 + 1.51 +static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit) 1.52 +{ 1.53 + /* Calculate sad error cost on full pixel basis. */ 1.54 + /* Ignore mv costing if mvsadcost is NULL */ 1.55 + if (mvsadcost) 1.56 + return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] + 1.57 + mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) 1.58 + * error_per_bit + 128) >> 8; 1.59 + return 0; 1.60 +} 1.61 + 1.62 +void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) 1.63 +{ 1.64 + int Len; 1.65 + int search_site_count = 0; 1.66 + 1.67 + 1.68 + /* Generate offsets for 4 search sites per step. */ 1.69 + Len = MAX_FIRST_STEP; 1.70 + x->ss[search_site_count].mv.col = 0; 1.71 + x->ss[search_site_count].mv.row = 0; 1.72 + x->ss[search_site_count].offset = 0; 1.73 + search_site_count++; 1.74 + 1.75 + while (Len > 0) 1.76 + { 1.77 + 1.78 + /* Compute offsets for search sites. */ 1.79 + x->ss[search_site_count].mv.col = 0; 1.80 + x->ss[search_site_count].mv.row = -Len; 1.81 + x->ss[search_site_count].offset = -Len * stride; 1.82 + search_site_count++; 1.83 + 1.84 + /* Compute offsets for search sites. */ 1.85 + x->ss[search_site_count].mv.col = 0; 1.86 + x->ss[search_site_count].mv.row = Len; 1.87 + x->ss[search_site_count].offset = Len * stride; 1.88 + search_site_count++; 1.89 + 1.90 + /* Compute offsets for search sites. */ 1.91 + x->ss[search_site_count].mv.col = -Len; 1.92 + x->ss[search_site_count].mv.row = 0; 1.93 + x->ss[search_site_count].offset = -Len; 1.94 + search_site_count++; 1.95 + 1.96 + /* Compute offsets for search sites. */ 1.97 + x->ss[search_site_count].mv.col = Len; 1.98 + x->ss[search_site_count].mv.row = 0; 1.99 + x->ss[search_site_count].offset = Len; 1.100 + search_site_count++; 1.101 + 1.102 + /* Contract. */ 1.103 + Len /= 2; 1.104 + } 1.105 + 1.106 + x->ss_count = search_site_count; 1.107 + x->searches_per_step = 4; 1.108 +} 1.109 + 1.110 +void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) 1.111 +{ 1.112 + int Len; 1.113 + int search_site_count = 0; 1.114 + 1.115 + /* Generate offsets for 8 search sites per step. */ 1.116 + Len = MAX_FIRST_STEP; 1.117 + x->ss[search_site_count].mv.col = 0; 1.118 + x->ss[search_site_count].mv.row = 0; 1.119 + x->ss[search_site_count].offset = 0; 1.120 + search_site_count++; 1.121 + 1.122 + while (Len > 0) 1.123 + { 1.124 + 1.125 + /* Compute offsets for search sites. */ 1.126 + x->ss[search_site_count].mv.col = 0; 1.127 + x->ss[search_site_count].mv.row = -Len; 1.128 + x->ss[search_site_count].offset = -Len * stride; 1.129 + search_site_count++; 1.130 + 1.131 + /* Compute offsets for search sites. */ 1.132 + x->ss[search_site_count].mv.col = 0; 1.133 + x->ss[search_site_count].mv.row = Len; 1.134 + x->ss[search_site_count].offset = Len * stride; 1.135 + search_site_count++; 1.136 + 1.137 + /* Compute offsets for search sites. */ 1.138 + x->ss[search_site_count].mv.col = -Len; 1.139 + x->ss[search_site_count].mv.row = 0; 1.140 + x->ss[search_site_count].offset = -Len; 1.141 + search_site_count++; 1.142 + 1.143 + /* Compute offsets for search sites. */ 1.144 + x->ss[search_site_count].mv.col = Len; 1.145 + x->ss[search_site_count].mv.row = 0; 1.146 + x->ss[search_site_count].offset = Len; 1.147 + search_site_count++; 1.148 + 1.149 + /* Compute offsets for search sites. */ 1.150 + x->ss[search_site_count].mv.col = -Len; 1.151 + x->ss[search_site_count].mv.row = -Len; 1.152 + x->ss[search_site_count].offset = -Len * stride - Len; 1.153 + search_site_count++; 1.154 + 1.155 + /* Compute offsets for search sites. */ 1.156 + x->ss[search_site_count].mv.col = Len; 1.157 + x->ss[search_site_count].mv.row = -Len; 1.158 + x->ss[search_site_count].offset = -Len * stride + Len; 1.159 + search_site_count++; 1.160 + 1.161 + /* Compute offsets for search sites. */ 1.162 + x->ss[search_site_count].mv.col = -Len; 1.163 + x->ss[search_site_count].mv.row = Len; 1.164 + x->ss[search_site_count].offset = Len * stride - Len; 1.165 + search_site_count++; 1.166 + 1.167 + /* Compute offsets for search sites. */ 1.168 + x->ss[search_site_count].mv.col = Len; 1.169 + x->ss[search_site_count].mv.row = Len; 1.170 + x->ss[search_site_count].offset = Len * stride + Len; 1.171 + search_site_count++; 1.172 + 1.173 + 1.174 + /* Contract. */ 1.175 + Len /= 2; 1.176 + } 1.177 + 1.178 + x->ss_count = search_site_count; 1.179 + x->searches_per_step = 8; 1.180 +} 1.181 + 1.182 +/* 1.183 + * To avoid the penalty for crossing cache-line read, preload the reference 1.184 + * area in a small buffer, which is aligned to make sure there won't be crossing 1.185 + * cache-line read while reading from this buffer. This reduced the cpu 1.186 + * cycles spent on reading ref data in sub-pixel filter functions. 1.187 + * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x 1.188 + * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we 1.189 + * could reduce the area. 1.190 + */ 1.191 + 1.192 +/* estimated cost of a motion vector (r,c) */ 1.193 +#define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 : 0) 1.194 +/* pointer to predictor base of a motionvector */ 1.195 +#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) 1.196 +/* convert motion vector component to offset for svf calc */ 1.197 +#define SP(x) (((x)&3)<<1) 1.198 +/* returns subpixel variance error function. */ 1.199 +#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) 1.200 +#define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e; 1.201 +/* returns distortion + motion vector cost */ 1.202 +#define ERR(r,c) (MVC(r,c)+DIST(r,c)) 1.203 +/* checks if (r,c) has better score than previous best */ 1.204 +#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=UINT_MAX;) 1.205 + 1.206 +int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, 1.207 + int_mv *bestmv, int_mv *ref_mv, 1.208 + int error_per_bit, 1.209 + const vp8_variance_fn_ptr_t *vfp, 1.210 + int *mvcost[2], int *distortion, 1.211 + unsigned int *sse1) 1.212 +{ 1.213 + unsigned char *z = (*(b->base_src) + b->src); 1.214 + 1.215 + int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1; 1.216 + int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4; 1.217 + int tr = br, tc = bc; 1.218 + unsigned int besterr; 1.219 + unsigned int left, right, up, down, diag; 1.220 + unsigned int sse; 1.221 + unsigned int whichdir; 1.222 + unsigned int halfiters = 4; 1.223 + unsigned int quarteriters = 4; 1.224 + int thismse; 1.225 + 1.226 + int minc = MAX(x->mv_col_min * 4, 1.227 + (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1)); 1.228 + int maxc = MIN(x->mv_col_max * 4, 1.229 + (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1)); 1.230 + int minr = MAX(x->mv_row_min * 4, 1.231 + (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1)); 1.232 + int maxr = MIN(x->mv_row_max * 4, 1.233 + (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1)); 1.234 + 1.235 + int y_stride; 1.236 + int offset; 1.237 + int pre_stride = x->e_mbd.pre.y_stride; 1.238 + unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1.239 + 1.240 + 1.241 +#if ARCH_X86 || ARCH_X86_64 1.242 + MACROBLOCKD *xd = &x->e_mbd; 1.243 + unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; 1.244 + unsigned char *y; 1.245 + int buf_r1, buf_r2, buf_c1; 1.246 + 1.247 + /* Clamping to avoid out-of-range data access */ 1.248 + buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3; 1.249 + buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3; 1.250 + buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3; 1.251 + y_stride = 32; 1.252 + 1.253 + /* Copy to intermediate buffer before searching. */ 1.254 + vfp->copymem(y_0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2); 1.255 + y = xd->y_buf + y_stride*buf_r1 +buf_c1; 1.256 +#else 1.257 + unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; 1.258 + y_stride = pre_stride; 1.259 +#endif 1.260 + 1.261 + offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; 1.262 + 1.263 + /* central mv */ 1.264 + bestmv->as_mv.row *= 8; 1.265 + bestmv->as_mv.col *= 8; 1.266 + 1.267 + /* calculate central point error */ 1.268 + besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1); 1.269 + *distortion = besterr; 1.270 + besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); 1.271 + 1.272 + /* TODO: Each subsequent iteration checks at least one point in common 1.273 + * with the last iteration could be 2 ( if diag selected) 1.274 + */ 1.275 + while (--halfiters) 1.276 + { 1.277 + /* 1/2 pel */ 1.278 + CHECK_BETTER(left, tr, tc - 2); 1.279 + CHECK_BETTER(right, tr, tc + 2); 1.280 + CHECK_BETTER(up, tr - 2, tc); 1.281 + CHECK_BETTER(down, tr + 2, tc); 1.282 + 1.283 + whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); 1.284 + 1.285 + switch (whichdir) 1.286 + { 1.287 + case 0: 1.288 + CHECK_BETTER(diag, tr - 2, tc - 2); 1.289 + break; 1.290 + case 1: 1.291 + CHECK_BETTER(diag, tr - 2, tc + 2); 1.292 + break; 1.293 + case 2: 1.294 + CHECK_BETTER(diag, tr + 2, tc - 2); 1.295 + break; 1.296 + case 3: 1.297 + CHECK_BETTER(diag, tr + 2, tc + 2); 1.298 + break; 1.299 + } 1.300 + 1.301 + /* no reason to check the same one again. */ 1.302 + if (tr == br && tc == bc) 1.303 + break; 1.304 + 1.305 + tr = br; 1.306 + tc = bc; 1.307 + } 1.308 + 1.309 + /* TODO: Each subsequent iteration checks at least one point in common 1.310 + * with the last iteration could be 2 ( if diag selected) 1.311 + */ 1.312 + 1.313 + /* 1/4 pel */ 1.314 + while (--quarteriters) 1.315 + { 1.316 + CHECK_BETTER(left, tr, tc - 1); 1.317 + CHECK_BETTER(right, tr, tc + 1); 1.318 + CHECK_BETTER(up, tr - 1, tc); 1.319 + CHECK_BETTER(down, tr + 1, tc); 1.320 + 1.321 + whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); 1.322 + 1.323 + switch (whichdir) 1.324 + { 1.325 + case 0: 1.326 + CHECK_BETTER(diag, tr - 1, tc - 1); 1.327 + break; 1.328 + case 1: 1.329 + CHECK_BETTER(diag, tr - 1, tc + 1); 1.330 + break; 1.331 + case 2: 1.332 + CHECK_BETTER(diag, tr + 1, tc - 1); 1.333 + break; 1.334 + case 3: 1.335 + CHECK_BETTER(diag, tr + 1, tc + 1); 1.336 + break; 1.337 + } 1.338 + 1.339 + /* no reason to check the same one again. */ 1.340 + if (tr == br && tc == bc) 1.341 + break; 1.342 + 1.343 + tr = br; 1.344 + tc = bc; 1.345 + } 1.346 + 1.347 + bestmv->as_mv.row = br * 2; 1.348 + bestmv->as_mv.col = bc * 2; 1.349 + 1.350 + if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) || 1.351 + (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3))) 1.352 + return INT_MAX; 1.353 + 1.354 + return besterr; 1.355 +} 1.356 +#undef MVC 1.357 +#undef PRE 1.358 +#undef SP 1.359 +#undef DIST 1.360 +#undef IFMVCV 1.361 +#undef ERR 1.362 +#undef CHECK_BETTER 1.363 + 1.364 +int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, 1.365 + int_mv *bestmv, int_mv *ref_mv, 1.366 + int error_per_bit, 1.367 + const vp8_variance_fn_ptr_t *vfp, 1.368 + int *mvcost[2], int *distortion, 1.369 + unsigned int *sse1) 1.370 +{ 1.371 + int bestmse = INT_MAX; 1.372 + int_mv startmv; 1.373 + int_mv this_mv; 1.374 + unsigned char *z = (*(b->base_src) + b->src); 1.375 + int left, right, up, down, diag; 1.376 + unsigned int sse; 1.377 + int whichdir ; 1.378 + int thismse; 1.379 + int y_stride; 1.380 + int pre_stride = x->e_mbd.pre.y_stride; 1.381 + unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1.382 + 1.383 +#if ARCH_X86 || ARCH_X86_64 1.384 + MACROBLOCKD *xd = &x->e_mbd; 1.385 + unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; 1.386 + unsigned char *y; 1.387 + 1.388 + y_stride = 32; 1.389 + /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ 1.390 + vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18); 1.391 + y = xd->y_buf + y_stride + 1; 1.392 +#else 1.393 + unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; 1.394 + y_stride = pre_stride; 1.395 +#endif 1.396 + 1.397 + /* central mv */ 1.398 + bestmv->as_mv.row <<= 3; 1.399 + bestmv->as_mv.col <<= 3; 1.400 + startmv = *bestmv; 1.401 + 1.402 + /* calculate central point error */ 1.403 + bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); 1.404 + *distortion = bestmse; 1.405 + bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); 1.406 + 1.407 + /* go left then right and check error */ 1.408 + this_mv.as_mv.row = startmv.as_mv.row; 1.409 + this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); 1.410 + thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse); 1.411 + left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 1.412 + 1.413 + if (left < bestmse) 1.414 + { 1.415 + *bestmv = this_mv; 1.416 + bestmse = left; 1.417 + *distortion = thismse; 1.418 + *sse1 = sse; 1.419 + } 1.420 + 1.421 + this_mv.as_mv.col += 8; 1.422 + thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse); 1.423 + right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 1.424 + 1.425 + if (right < bestmse) 1.426 + { 1.427 + *bestmv = this_mv; 1.428 + bestmse = right; 1.429 + *distortion = thismse; 1.430 + *sse1 = sse; 1.431 + } 1.432 + 1.433 + /* go up then down and check error */ 1.434 + this_mv.as_mv.col = startmv.as_mv.col; 1.435 + this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); 1.436 + thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse); 1.437 + up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 1.438 + 1.439 + if (up < bestmse) 1.440 + { 1.441 + *bestmv = this_mv; 1.442 + bestmse = up; 1.443 + *distortion = thismse; 1.444 + *sse1 = sse; 1.445 + } 1.446 + 1.447 + this_mv.as_mv.row += 8; 1.448 + thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse); 1.449 + down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 1.450 + 1.451 + if (down < bestmse) 1.452 + { 1.453 + *bestmv = this_mv; 1.454 + bestmse = down; 1.455 + *distortion = thismse; 1.456 + *sse1 = sse; 1.457 + } 1.458 + 1.459 + 1.460 + /* now check 1 more diagonal */ 1.461 + whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); 1.462 + this_mv = startmv; 1.463 + 1.464 + switch (whichdir) 1.465 + { 1.466 + case 0: 1.467 + this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; 1.468 + this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; 1.469 + thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse); 1.470 + break; 1.471 + case 1: 1.472 + this_mv.as_mv.col += 4; 1.473 + this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; 1.474 + thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse); 1.475 + break; 1.476 + case 2: 1.477 + this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; 1.478 + this_mv.as_mv.row += 4; 1.479 + thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse); 1.480 + break; 1.481 + case 3: 1.482 + default: 1.483 + this_mv.as_mv.col += 4; 1.484 + this_mv.as_mv.row += 4; 1.485 + thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse); 1.486 + break; 1.487 + } 1.488 + 1.489 + diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 1.490 + 1.491 + if (diag < bestmse) 1.492 + { 1.493 + *bestmv = this_mv; 1.494 + bestmse = diag; 1.495 + *distortion = thismse; 1.496 + *sse1 = sse; 1.497 + } 1.498 + 1.499 + 1.500 + /* time to check quarter pels. */ 1.501 + if (bestmv->as_mv.row < startmv.as_mv.row) 1.502 + y -= y_stride; 1.503 + 1.504 + if (bestmv->as_mv.col < startmv.as_mv.col) 1.505 + y--; 1.506 + 1.507 + startmv = *bestmv; 1.508 + 1.509 + 1.510 + 1.511 + /* go left then right and check error */ 1.512 + this_mv.as_mv.row = startmv.as_mv.row; 1.513 + 1.514 + if (startmv.as_mv.col & 7) 1.515 + { 1.516 + this_mv.as_mv.col = startmv.as_mv.col - 2; 1.517 + thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); 1.518 + } 1.519 + else 1.520 + { 1.521 + this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; 1.522 + thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse); 1.523 + } 1.524 + 1.525 + left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 1.526 + 1.527 + if (left < bestmse) 1.528 + { 1.529 + *bestmv = this_mv; 1.530 + bestmse = left; 1.531 + *distortion = thismse; 1.532 + *sse1 = sse; 1.533 + } 1.534 + 1.535 + this_mv.as_mv.col += 4; 1.536 + thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); 1.537 + right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 1.538 + 1.539 + if (right < bestmse) 1.540 + { 1.541 + *bestmv = this_mv; 1.542 + bestmse = right; 1.543 + *distortion = thismse; 1.544 + *sse1 = sse; 1.545 + } 1.546 + 1.547 + /* go up then down and check error */ 1.548 + this_mv.as_mv.col = startmv.as_mv.col; 1.549 + 1.550 + if (startmv.as_mv.row & 7) 1.551 + { 1.552 + this_mv.as_mv.row = startmv.as_mv.row - 2; 1.553 + thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); 1.554 + } 1.555 + else 1.556 + { 1.557 + this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; 1.558 + thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse); 1.559 + } 1.560 + 1.561 + up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 1.562 + 1.563 + if (up < bestmse) 1.564 + { 1.565 + *bestmv = this_mv; 1.566 + bestmse = up; 1.567 + *distortion = thismse; 1.568 + *sse1 = sse; 1.569 + } 1.570 + 1.571 + this_mv.as_mv.row += 4; 1.572 + thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); 1.573 + down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 1.574 + 1.575 + if (down < bestmse) 1.576 + { 1.577 + *bestmv = this_mv; 1.578 + bestmse = down; 1.579 + *distortion = thismse; 1.580 + *sse1 = sse; 1.581 + } 1.582 + 1.583 + 1.584 + /* now check 1 more diagonal */ 1.585 + whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); 1.586 + 1.587 + this_mv = startmv; 1.588 + 1.589 + switch (whichdir) 1.590 + { 1.591 + case 0: 1.592 + 1.593 + if (startmv.as_mv.row & 7) 1.594 + { 1.595 + this_mv.as_mv.row -= 2; 1.596 + 1.597 + if (startmv.as_mv.col & 7) 1.598 + { 1.599 + this_mv.as_mv.col -= 2; 1.600 + thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); 1.601 + } 1.602 + else 1.603 + { 1.604 + this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; 1.605 + thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);; 1.606 + } 1.607 + } 1.608 + else 1.609 + { 1.610 + this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; 1.611 + 1.612 + if (startmv.as_mv.col & 7) 1.613 + { 1.614 + this_mv.as_mv.col -= 2; 1.615 + thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse); 1.616 + } 1.617 + else 1.618 + { 1.619 + this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; 1.620 + thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, &sse); 1.621 + } 1.622 + } 1.623 + 1.624 + break; 1.625 + case 1: 1.626 + this_mv.as_mv.col += 2; 1.627 + 1.628 + if (startmv.as_mv.row & 7) 1.629 + { 1.630 + this_mv.as_mv.row -= 2; 1.631 + thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); 1.632 + } 1.633 + else 1.634 + { 1.635 + this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; 1.636 + thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse); 1.637 + } 1.638 + 1.639 + break; 1.640 + case 2: 1.641 + this_mv.as_mv.row += 2; 1.642 + 1.643 + if (startmv.as_mv.col & 7) 1.644 + { 1.645 + this_mv.as_mv.col -= 2; 1.646 + thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); 1.647 + } 1.648 + else 1.649 + { 1.650 + this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; 1.651 + thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse); 1.652 + } 1.653 + 1.654 + break; 1.655 + case 3: 1.656 + this_mv.as_mv.col += 2; 1.657 + this_mv.as_mv.row += 2; 1.658 + thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); 1.659 + break; 1.660 + } 1.661 + 1.662 + diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 1.663 + 1.664 + if (diag < bestmse) 1.665 + { 1.666 + *bestmv = this_mv; 1.667 + bestmse = diag; 1.668 + *distortion = thismse; 1.669 + *sse1 = sse; 1.670 + } 1.671 + 1.672 + return bestmse; 1.673 +} 1.674 + 1.675 +int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, 1.676 + int_mv *bestmv, int_mv *ref_mv, 1.677 + int error_per_bit, 1.678 + const vp8_variance_fn_ptr_t *vfp, 1.679 + int *mvcost[2], int *distortion, 1.680 + unsigned int *sse1) 1.681 +{ 1.682 + int bestmse = INT_MAX; 1.683 + int_mv startmv; 1.684 + int_mv this_mv; 1.685 + unsigned char *z = (*(b->base_src) + b->src); 1.686 + int left, right, up, down, diag; 1.687 + unsigned int sse; 1.688 + int whichdir ; 1.689 + int thismse; 1.690 + int y_stride; 1.691 + int pre_stride = x->e_mbd.pre.y_stride; 1.692 + unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1.693 + 1.694 +#if ARCH_X86 || ARCH_X86_64 1.695 + MACROBLOCKD *xd = &x->e_mbd; 1.696 + unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; 1.697 + unsigned char *y; 1.698 + 1.699 + y_stride = 32; 1.700 + /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ 1.701 + vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18); 1.702 + y = xd->y_buf + y_stride + 1; 1.703 +#else 1.704 + unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; 1.705 + y_stride = pre_stride; 1.706 +#endif 1.707 + 1.708 + /* central mv */ 1.709 + bestmv->as_mv.row *= 8; 1.710 + bestmv->as_mv.col *= 8; 1.711 + startmv = *bestmv; 1.712 + 1.713 + /* calculate central point error */ 1.714 + bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); 1.715 + *distortion = bestmse; 1.716 + bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); 1.717 + 1.718 + /* go left then right and check error */ 1.719 + this_mv.as_mv.row = startmv.as_mv.row; 1.720 + this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); 1.721 + thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse); 1.722 + left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 1.723 + 1.724 + if (left < bestmse) 1.725 + { 1.726 + *bestmv = this_mv; 1.727 + bestmse = left; 1.728 + *distortion = thismse; 1.729 + *sse1 = sse; 1.730 + } 1.731 + 1.732 + this_mv.as_mv.col += 8; 1.733 + thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse); 1.734 + right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 1.735 + 1.736 + if (right < bestmse) 1.737 + { 1.738 + *bestmv = this_mv; 1.739 + bestmse = right; 1.740 + *distortion = thismse; 1.741 + *sse1 = sse; 1.742 + } 1.743 + 1.744 + /* go up then down and check error */ 1.745 + this_mv.as_mv.col = startmv.as_mv.col; 1.746 + this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); 1.747 + thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse); 1.748 + up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 1.749 + 1.750 + if (up < bestmse) 1.751 + { 1.752 + *bestmv = this_mv; 1.753 + bestmse = up; 1.754 + *distortion = thismse; 1.755 + *sse1 = sse; 1.756 + } 1.757 + 1.758 + this_mv.as_mv.row += 8; 1.759 + thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse); 1.760 + down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 1.761 + 1.762 + if (down < bestmse) 1.763 + { 1.764 + *bestmv = this_mv; 1.765 + bestmse = down; 1.766 + *distortion = thismse; 1.767 + *sse1 = sse; 1.768 + } 1.769 + 1.770 + /* now check 1 more diagonal - */ 1.771 + whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); 1.772 + this_mv = startmv; 1.773 + 1.774 + switch (whichdir) 1.775 + { 1.776 + case 0: 1.777 + this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; 1.778 + this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; 1.779 + thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse); 1.780 + break; 1.781 + case 1: 1.782 + this_mv.as_mv.col += 4; 1.783 + this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; 1.784 + thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse); 1.785 + break; 1.786 + case 2: 1.787 + this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; 1.788 + this_mv.as_mv.row += 4; 1.789 + thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse); 1.790 + break; 1.791 + case 3: 1.792 + default: 1.793 + this_mv.as_mv.col += 4; 1.794 + this_mv.as_mv.row += 4; 1.795 + thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse); 1.796 + break; 1.797 + } 1.798 + 1.799 + diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 1.800 + 1.801 + if (diag < bestmse) 1.802 + { 1.803 + *bestmv = this_mv; 1.804 + bestmse = diag; 1.805 + *distortion = thismse; 1.806 + *sse1 = sse; 1.807 + } 1.808 + 1.809 + return bestmse; 1.810 +} 1.811 + 1.812 +#define CHECK_BOUNDS(range) \ 1.813 +{\ 1.814 + all_in = 1;\ 1.815 + all_in &= ((br-range) >= x->mv_row_min);\ 1.816 + all_in &= ((br+range) <= x->mv_row_max);\ 1.817 + all_in &= ((bc-range) >= x->mv_col_min);\ 1.818 + all_in &= ((bc+range) <= x->mv_col_max);\ 1.819 +} 1.820 + 1.821 +#define CHECK_POINT \ 1.822 +{\ 1.823 + if (this_mv.as_mv.col < x->mv_col_min) continue;\ 1.824 + if (this_mv.as_mv.col > x->mv_col_max) continue;\ 1.825 + if (this_mv.as_mv.row < x->mv_row_min) continue;\ 1.826 + if (this_mv.as_mv.row > x->mv_row_max) continue;\ 1.827 +} 1.828 + 1.829 +#define CHECK_BETTER \ 1.830 +{\ 1.831 + if (thissad < bestsad)\ 1.832 + {\ 1.833 + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);\ 1.834 + if (thissad < bestsad)\ 1.835 + {\ 1.836 + bestsad = thissad;\ 1.837 + best_site = i;\ 1.838 + }\ 1.839 + }\ 1.840 +} 1.841 + 1.842 +static const MV next_chkpts[6][3] = 1.843 +{ 1.844 + {{ -2, 0}, { -1, -2}, {1, -2}}, 1.845 + {{ -1, -2}, {1, -2}, {2, 0}}, 1.846 + {{1, -2}, {2, 0}, {1, 2}}, 1.847 + {{2, 0}, {1, 2}, { -1, 2}}, 1.848 + {{1, 2}, { -1, 2}, { -2, 0}}, 1.849 + {{ -1, 2}, { -2, 0}, { -1, -2}} 1.850 +}; 1.851 + 1.852 +int vp8_hex_search 1.853 +( 1.854 + MACROBLOCK *x, 1.855 + BLOCK *b, 1.856 + BLOCKD *d, 1.857 + int_mv *ref_mv, 1.858 + int_mv *best_mv, 1.859 + int search_param, 1.860 + int sad_per_bit, 1.861 + const vp8_variance_fn_ptr_t *vfp, 1.862 + int *mvsadcost[2], 1.863 + int *mvcost[2], 1.864 + int_mv *center_mv 1.865 +) 1.866 +{ 1.867 + MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ; 1.868 + MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}} ; 1.869 + int i, j; 1.870 + 1.871 + unsigned char *what = (*(b->base_src) + b->src); 1.872 + int what_stride = b->src_stride; 1.873 + int pre_stride = x->e_mbd.pre.y_stride; 1.874 + unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1.875 + 1.876 + int in_what_stride = pre_stride; 1.877 + int br, bc; 1.878 + int_mv this_mv; 1.879 + unsigned int bestsad; 1.880 + unsigned int thissad; 1.881 + unsigned char *base_offset; 1.882 + unsigned char *this_offset; 1.883 + int k = -1; 1.884 + int all_in; 1.885 + int best_site = -1; 1.886 + int hex_range = 127; 1.887 + int dia_range = 8; 1.888 + 1.889 + int_mv fcenter_mv; 1.890 + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1.891 + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1.892 + 1.893 + /* adjust ref_mv to make sure it is within MV range */ 1.894 + vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 1.895 + br = ref_mv->as_mv.row; 1.896 + bc = ref_mv->as_mv.col; 1.897 + 1.898 + /* Work out the start point for the search */ 1.899 + base_offset = (unsigned char *)(base_pre + d->offset); 1.900 + this_offset = base_offset + (br * (pre_stride)) + bc; 1.901 + this_mv.as_mv.row = br; 1.902 + this_mv.as_mv.col = bc; 1.903 + bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX) 1.904 + + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1.905 + 1.906 +#if CONFIG_MULTI_RES_ENCODING 1.907 + /* Lower search range based on prediction info */ 1.908 + if (search_param >= 6) goto cal_neighbors; 1.909 + else if (search_param >= 5) hex_range = 4; 1.910 + else if (search_param >= 4) hex_range = 6; 1.911 + else if (search_param >= 3) hex_range = 15; 1.912 + else if (search_param >= 2) hex_range = 31; 1.913 + else if (search_param >= 1) hex_range = 63; 1.914 + 1.915 + dia_range = 8; 1.916 +#endif 1.917 + 1.918 + /* hex search */ 1.919 + CHECK_BOUNDS(2) 1.920 + 1.921 + if(all_in) 1.922 + { 1.923 + for (i = 0; i < 6; i++) 1.924 + { 1.925 + this_mv.as_mv.row = br + hex[i].row; 1.926 + this_mv.as_mv.col = bc + hex[i].col; 1.927 + this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col; 1.928 + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); 1.929 + CHECK_BETTER 1.930 + } 1.931 + }else 1.932 + { 1.933 + for (i = 0; i < 6; i++) 1.934 + { 1.935 + this_mv.as_mv.row = br + hex[i].row; 1.936 + this_mv.as_mv.col = bc + hex[i].col; 1.937 + CHECK_POINT 1.938 + this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col; 1.939 + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); 1.940 + CHECK_BETTER 1.941 + } 1.942 + } 1.943 + 1.944 + if (best_site == -1) 1.945 + goto cal_neighbors; 1.946 + else 1.947 + { 1.948 + br += hex[best_site].row; 1.949 + bc += hex[best_site].col; 1.950 + k = best_site; 1.951 + } 1.952 + 1.953 + for (j = 1; j < hex_range; j++) 1.954 + { 1.955 + best_site = -1; 1.956 + CHECK_BOUNDS(2) 1.957 + 1.958 + if(all_in) 1.959 + { 1.960 + for (i = 0; i < 3; i++) 1.961 + { 1.962 + this_mv.as_mv.row = br + next_chkpts[k][i].row; 1.963 + this_mv.as_mv.col = bc + next_chkpts[k][i].col; 1.964 + this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; 1.965 + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); 1.966 + CHECK_BETTER 1.967 + } 1.968 + }else 1.969 + { 1.970 + for (i = 0; i < 3; i++) 1.971 + { 1.972 + this_mv.as_mv.row = br + next_chkpts[k][i].row; 1.973 + this_mv.as_mv.col = bc + next_chkpts[k][i].col; 1.974 + CHECK_POINT 1.975 + this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; 1.976 + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); 1.977 + CHECK_BETTER 1.978 + } 1.979 + } 1.980 + 1.981 + if (best_site == -1) 1.982 + break; 1.983 + else 1.984 + { 1.985 + br += next_chkpts[k][best_site].row; 1.986 + bc += next_chkpts[k][best_site].col; 1.987 + k += 5 + best_site; 1.988 + if (k >= 12) k -= 12; 1.989 + else if (k >= 6) k -= 6; 1.990 + } 1.991 + } 1.992 + 1.993 + /* check 4 1-away neighbors */ 1.994 +cal_neighbors: 1.995 + for (j = 0; j < dia_range; j++) 1.996 + { 1.997 + best_site = -1; 1.998 + CHECK_BOUNDS(1) 1.999 + 1.1000 + if(all_in) 1.1001 + { 1.1002 + for (i = 0; i < 4; i++) 1.1003 + { 1.1004 + this_mv.as_mv.row = br + neighbors[i].row; 1.1005 + this_mv.as_mv.col = bc + neighbors[i].col; 1.1006 + this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; 1.1007 + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); 1.1008 + CHECK_BETTER 1.1009 + } 1.1010 + }else 1.1011 + { 1.1012 + for (i = 0; i < 4; i++) 1.1013 + { 1.1014 + this_mv.as_mv.row = br + neighbors[i].row; 1.1015 + this_mv.as_mv.col = bc + neighbors[i].col; 1.1016 + CHECK_POINT 1.1017 + this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; 1.1018 + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); 1.1019 + CHECK_BETTER 1.1020 + } 1.1021 + } 1.1022 + 1.1023 + if (best_site == -1) 1.1024 + break; 1.1025 + else 1.1026 + { 1.1027 + br += neighbors[best_site].row; 1.1028 + bc += neighbors[best_site].col; 1.1029 + } 1.1030 + } 1.1031 + 1.1032 + best_mv->as_mv.row = br; 1.1033 + best_mv->as_mv.col = bc; 1.1034 + 1.1035 + return bestsad; 1.1036 +} 1.1037 +#undef CHECK_BOUNDS 1.1038 +#undef CHECK_POINT 1.1039 +#undef CHECK_BETTER 1.1040 + 1.1041 +int vp8_diamond_search_sad_c 1.1042 +( 1.1043 + MACROBLOCK *x, 1.1044 + BLOCK *b, 1.1045 + BLOCKD *d, 1.1046 + int_mv *ref_mv, 1.1047 + int_mv *best_mv, 1.1048 + int search_param, 1.1049 + int sad_per_bit, 1.1050 + int *num00, 1.1051 + vp8_variance_fn_ptr_t *fn_ptr, 1.1052 + int *mvcost[2], 1.1053 + int_mv *center_mv 1.1054 +) 1.1055 +{ 1.1056 + int i, j, step; 1.1057 + 1.1058 + unsigned char *what = (*(b->base_src) + b->src); 1.1059 + int what_stride = b->src_stride; 1.1060 + unsigned char *in_what; 1.1061 + int pre_stride = x->e_mbd.pre.y_stride; 1.1062 + unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1.1063 + int in_what_stride = pre_stride; 1.1064 + unsigned char *best_address; 1.1065 + 1.1066 + int tot_steps; 1.1067 + int_mv this_mv; 1.1068 + 1.1069 + unsigned int bestsad; 1.1070 + unsigned int thissad; 1.1071 + int best_site = 0; 1.1072 + int last_site = 0; 1.1073 + 1.1074 + int ref_row; 1.1075 + int ref_col; 1.1076 + int this_row_offset; 1.1077 + int this_col_offset; 1.1078 + search_site *ss; 1.1079 + 1.1080 + unsigned char *check_here; 1.1081 + 1.1082 + int *mvsadcost[2]; 1.1083 + int_mv fcenter_mv; 1.1084 + 1.1085 + mvsadcost[0] = x->mvsadcost[0]; 1.1086 + mvsadcost[1] = x->mvsadcost[1]; 1.1087 + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1.1088 + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1.1089 + 1.1090 + vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 1.1091 + ref_row = ref_mv->as_mv.row; 1.1092 + ref_col = ref_mv->as_mv.col; 1.1093 + *num00 = 0; 1.1094 + best_mv->as_mv.row = ref_row; 1.1095 + best_mv->as_mv.col = ref_col; 1.1096 + 1.1097 + /* Work out the start point for the search */ 1.1098 + in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col); 1.1099 + best_address = in_what; 1.1100 + 1.1101 + /* Check the starting position */ 1.1102 + bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX) 1.1103 + + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1.1104 + 1.1105 + /* search_param determines the length of the initial step and hence 1.1106 + * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1.1107 + * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. 1.1108 + */ 1.1109 + ss = &x->ss[search_param * x->searches_per_step]; 1.1110 + tot_steps = (x->ss_count / x->searches_per_step) - search_param; 1.1111 + 1.1112 + i = 1; 1.1113 + 1.1114 + for (step = 0; step < tot_steps ; step++) 1.1115 + { 1.1116 + for (j = 0 ; j < x->searches_per_step ; j++) 1.1117 + { 1.1118 + /* Trap illegal vectors */ 1.1119 + this_row_offset = best_mv->as_mv.row + ss[i].mv.row; 1.1120 + this_col_offset = best_mv->as_mv.col + ss[i].mv.col; 1.1121 + 1.1122 + if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && 1.1123 + (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) 1.1124 + 1.1125 + { 1.1126 + check_here = ss[i].offset + best_address; 1.1127 + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); 1.1128 + 1.1129 + if (thissad < bestsad) 1.1130 + { 1.1131 + this_mv.as_mv.row = this_row_offset; 1.1132 + this_mv.as_mv.col = this_col_offset; 1.1133 + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1.1134 + mvsadcost, sad_per_bit); 1.1135 + 1.1136 + if (thissad < bestsad) 1.1137 + { 1.1138 + bestsad = thissad; 1.1139 + best_site = i; 1.1140 + } 1.1141 + } 1.1142 + } 1.1143 + 1.1144 + i++; 1.1145 + } 1.1146 + 1.1147 + if (best_site != last_site) 1.1148 + { 1.1149 + best_mv->as_mv.row += ss[best_site].mv.row; 1.1150 + best_mv->as_mv.col += ss[best_site].mv.col; 1.1151 + best_address += ss[best_site].offset; 1.1152 + last_site = best_site; 1.1153 + } 1.1154 + else if (best_address == in_what) 1.1155 + (*num00)++; 1.1156 + } 1.1157 + 1.1158 + this_mv.as_mv.row = best_mv->as_mv.row << 3; 1.1159 + this_mv.as_mv.col = best_mv->as_mv.col << 3; 1.1160 + 1.1161 + return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) 1.1162 + + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1.1163 +} 1.1164 + 1.1165 +int vp8_diamond_search_sadx4 1.1166 +( 1.1167 + MACROBLOCK *x, 1.1168 + BLOCK *b, 1.1169 + BLOCKD *d, 1.1170 + int_mv *ref_mv, 1.1171 + int_mv *best_mv, 1.1172 + int search_param, 1.1173 + int sad_per_bit, 1.1174 + int *num00, 1.1175 + vp8_variance_fn_ptr_t *fn_ptr, 1.1176 + int *mvcost[2], 1.1177 + int_mv *center_mv 1.1178 +) 1.1179 +{ 1.1180 + int i, j, step; 1.1181 + 1.1182 + unsigned char *what = (*(b->base_src) + b->src); 1.1183 + int what_stride = b->src_stride; 1.1184 + unsigned char *in_what; 1.1185 + int pre_stride = x->e_mbd.pre.y_stride; 1.1186 + unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1.1187 + int in_what_stride = pre_stride; 1.1188 + unsigned char *best_address; 1.1189 + 1.1190 + int tot_steps; 1.1191 + int_mv this_mv; 1.1192 + 1.1193 + unsigned int bestsad; 1.1194 + unsigned int thissad; 1.1195 + int best_site = 0; 1.1196 + int last_site = 0; 1.1197 + 1.1198 + int ref_row; 1.1199 + int ref_col; 1.1200 + int this_row_offset; 1.1201 + int this_col_offset; 1.1202 + search_site *ss; 1.1203 + 1.1204 + unsigned char *check_here; 1.1205 + 1.1206 + int *mvsadcost[2]; 1.1207 + int_mv fcenter_mv; 1.1208 + 1.1209 + mvsadcost[0] = x->mvsadcost[0]; 1.1210 + mvsadcost[1] = x->mvsadcost[1]; 1.1211 + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1.1212 + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1.1213 + 1.1214 + vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 1.1215 + ref_row = ref_mv->as_mv.row; 1.1216 + ref_col = ref_mv->as_mv.col; 1.1217 + *num00 = 0; 1.1218 + best_mv->as_mv.row = ref_row; 1.1219 + best_mv->as_mv.col = ref_col; 1.1220 + 1.1221 + /* Work out the start point for the search */ 1.1222 + in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col); 1.1223 + best_address = in_what; 1.1224 + 1.1225 + /* Check the starting position */ 1.1226 + bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX) 1.1227 + + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1.1228 + 1.1229 + /* search_param determines the length of the initial step and hence the 1.1230 + * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 = 1.1231 + * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. 1.1232 + */ 1.1233 + ss = &x->ss[search_param * x->searches_per_step]; 1.1234 + tot_steps = (x->ss_count / x->searches_per_step) - search_param; 1.1235 + 1.1236 + i = 1; 1.1237 + 1.1238 + for (step = 0; step < tot_steps ; step++) 1.1239 + { 1.1240 + int all_in = 1, t; 1.1241 + 1.1242 + /* To know if all neighbor points are within the bounds, 4 bounds 1.1243 + * checking are enough instead of checking 4 bounds for each 1.1244 + * points. 1.1245 + */ 1.1246 + all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min); 1.1247 + all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max); 1.1248 + all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min); 1.1249 + all_in &= ((best_mv->as_mv.col + ss[i+3].mv.col) < x->mv_col_max); 1.1250 + 1.1251 + if (all_in) 1.1252 + { 1.1253 + unsigned int sad_array[4]; 1.1254 + 1.1255 + for (j = 0 ; j < x->searches_per_step ; j += 4) 1.1256 + { 1.1257 + const unsigned char *block_offset[4]; 1.1258 + 1.1259 + for (t = 0; t < 4; t++) 1.1260 + block_offset[t] = ss[i+t].offset + best_address; 1.1261 + 1.1262 + fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); 1.1263 + 1.1264 + for (t = 0; t < 4; t++, i++) 1.1265 + { 1.1266 + if (sad_array[t] < bestsad) 1.1267 + { 1.1268 + this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row; 1.1269 + this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col; 1.1270 + sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv, 1.1271 + mvsadcost, sad_per_bit); 1.1272 + 1.1273 + if (sad_array[t] < bestsad) 1.1274 + { 1.1275 + bestsad = sad_array[t]; 1.1276 + best_site = i; 1.1277 + } 1.1278 + } 1.1279 + } 1.1280 + } 1.1281 + } 1.1282 + else 1.1283 + { 1.1284 + for (j = 0 ; j < x->searches_per_step ; j++) 1.1285 + { 1.1286 + /* Trap illegal vectors */ 1.1287 + this_row_offset = best_mv->as_mv.row + ss[i].mv.row; 1.1288 + this_col_offset = best_mv->as_mv.col + ss[i].mv.col; 1.1289 + 1.1290 + if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && 1.1291 + (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) 1.1292 + { 1.1293 + check_here = ss[i].offset + best_address; 1.1294 + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); 1.1295 + 1.1296 + if (thissad < bestsad) 1.1297 + { 1.1298 + this_mv.as_mv.row = this_row_offset; 1.1299 + this_mv.as_mv.col = this_col_offset; 1.1300 + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1.1301 + mvsadcost, sad_per_bit); 1.1302 + 1.1303 + if (thissad < bestsad) 1.1304 + { 1.1305 + bestsad = thissad; 1.1306 + best_site = i; 1.1307 + } 1.1308 + } 1.1309 + } 1.1310 + i++; 1.1311 + } 1.1312 + } 1.1313 + 1.1314 + if (best_site != last_site) 1.1315 + { 1.1316 + best_mv->as_mv.row += ss[best_site].mv.row; 1.1317 + best_mv->as_mv.col += ss[best_site].mv.col; 1.1318 + best_address += ss[best_site].offset; 1.1319 + last_site = best_site; 1.1320 + } 1.1321 + else if (best_address == in_what) 1.1322 + (*num00)++; 1.1323 + } 1.1324 + 1.1325 + this_mv.as_mv.row = best_mv->as_mv.row * 8; 1.1326 + this_mv.as_mv.col = best_mv->as_mv.col * 8; 1.1327 + 1.1328 + return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) 1.1329 + + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1.1330 +} 1.1331 + 1.1332 +int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, 1.1333 + int sad_per_bit, int distance, 1.1334 + vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], 1.1335 + int_mv *center_mv) 1.1336 +{ 1.1337 + unsigned char *what = (*(b->base_src) + b->src); 1.1338 + int what_stride = b->src_stride; 1.1339 + unsigned char *in_what; 1.1340 + int pre_stride = x->e_mbd.pre.y_stride; 1.1341 + unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1.1342 + int in_what_stride = pre_stride; 1.1343 + int mv_stride = pre_stride; 1.1344 + unsigned char *bestaddress; 1.1345 + int_mv *best_mv = &d->bmi.mv; 1.1346 + int_mv this_mv; 1.1347 + unsigned int bestsad; 1.1348 + unsigned int thissad; 1.1349 + int r, c; 1.1350 + 1.1351 + unsigned char *check_here; 1.1352 + 1.1353 + int ref_row = ref_mv->as_mv.row; 1.1354 + int ref_col = ref_mv->as_mv.col; 1.1355 + 1.1356 + int row_min = ref_row - distance; 1.1357 + int row_max = ref_row + distance; 1.1358 + int col_min = ref_col - distance; 1.1359 + int col_max = ref_col + distance; 1.1360 + 1.1361 + int *mvsadcost[2]; 1.1362 + int_mv fcenter_mv; 1.1363 + 1.1364 + mvsadcost[0] = x->mvsadcost[0]; 1.1365 + mvsadcost[1] = x->mvsadcost[1]; 1.1366 + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1.1367 + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1.1368 + 1.1369 + /* Work out the mid point for the search */ 1.1370 + in_what = base_pre + d->offset; 1.1371 + bestaddress = in_what + (ref_row * pre_stride) + ref_col; 1.1372 + 1.1373 + best_mv->as_mv.row = ref_row; 1.1374 + best_mv->as_mv.col = ref_col; 1.1375 + 1.1376 + /* Baseline value at the centre */ 1.1377 + bestsad = fn_ptr->sdf(what, what_stride, bestaddress, 1.1378 + in_what_stride, UINT_MAX) 1.1379 + + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1.1380 + 1.1381 + /* Apply further limits to prevent us looking using vectors that 1.1382 + * stretch beyiond the UMV border 1.1383 + */ 1.1384 + if (col_min < x->mv_col_min) 1.1385 + col_min = x->mv_col_min; 1.1386 + 1.1387 + if (col_max > x->mv_col_max) 1.1388 + col_max = x->mv_col_max; 1.1389 + 1.1390 + if (row_min < x->mv_row_min) 1.1391 + row_min = x->mv_row_min; 1.1392 + 1.1393 + if (row_max > x->mv_row_max) 1.1394 + row_max = x->mv_row_max; 1.1395 + 1.1396 + for (r = row_min; r < row_max ; r++) 1.1397 + { 1.1398 + this_mv.as_mv.row = r; 1.1399 + check_here = r * mv_stride + in_what + col_min; 1.1400 + 1.1401 + for (c = col_min; c < col_max; c++) 1.1402 + { 1.1403 + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); 1.1404 + 1.1405 + this_mv.as_mv.col = c; 1.1406 + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1.1407 + mvsadcost, sad_per_bit); 1.1408 + 1.1409 + if (thissad < bestsad) 1.1410 + { 1.1411 + bestsad = thissad; 1.1412 + best_mv->as_mv.row = r; 1.1413 + best_mv->as_mv.col = c; 1.1414 + bestaddress = check_here; 1.1415 + } 1.1416 + 1.1417 + check_here++; 1.1418 + } 1.1419 + } 1.1420 + 1.1421 + this_mv.as_mv.row = best_mv->as_mv.row << 3; 1.1422 + this_mv.as_mv.col = best_mv->as_mv.col << 3; 1.1423 + 1.1424 + return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) 1.1425 + + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1.1426 +} 1.1427 + 1.1428 +int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, 1.1429 + int sad_per_bit, int distance, 1.1430 + vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], 1.1431 + int_mv *center_mv) 1.1432 +{ 1.1433 + unsigned char *what = (*(b->base_src) + b->src); 1.1434 + int what_stride = b->src_stride; 1.1435 + unsigned char *in_what; 1.1436 + int pre_stride = x->e_mbd.pre.y_stride; 1.1437 + unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1.1438 + int in_what_stride = pre_stride; 1.1439 + int mv_stride = pre_stride; 1.1440 + unsigned char *bestaddress; 1.1441 + int_mv *best_mv = &d->bmi.mv; 1.1442 + int_mv this_mv; 1.1443 + unsigned int bestsad; 1.1444 + unsigned int thissad; 1.1445 + int r, c; 1.1446 + 1.1447 + unsigned char *check_here; 1.1448 + 1.1449 + int ref_row = ref_mv->as_mv.row; 1.1450 + int ref_col = ref_mv->as_mv.col; 1.1451 + 1.1452 + int row_min = ref_row - distance; 1.1453 + int row_max = ref_row + distance; 1.1454 + int col_min = ref_col - distance; 1.1455 + int col_max = ref_col + distance; 1.1456 + 1.1457 + unsigned int sad_array[3]; 1.1458 + 1.1459 + int *mvsadcost[2]; 1.1460 + int_mv fcenter_mv; 1.1461 + 1.1462 + mvsadcost[0] = x->mvsadcost[0]; 1.1463 + mvsadcost[1] = x->mvsadcost[1]; 1.1464 + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1.1465 + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1.1466 + 1.1467 + /* Work out the mid point for the search */ 1.1468 + in_what = base_pre + d->offset; 1.1469 + bestaddress = in_what + (ref_row * pre_stride) + ref_col; 1.1470 + 1.1471 + best_mv->as_mv.row = ref_row; 1.1472 + best_mv->as_mv.col = ref_col; 1.1473 + 1.1474 + /* Baseline value at the centre */ 1.1475 + bestsad = fn_ptr->sdf(what, what_stride, bestaddress, 1.1476 + in_what_stride, UINT_MAX) 1.1477 + + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1.1478 + 1.1479 + /* Apply further limits to prevent us looking using vectors that stretch 1.1480 + * beyond the UMV border 1.1481 + */ 1.1482 + if (col_min < x->mv_col_min) 1.1483 + col_min = x->mv_col_min; 1.1484 + 1.1485 + if (col_max > x->mv_col_max) 1.1486 + col_max = x->mv_col_max; 1.1487 + 1.1488 + if (row_min < x->mv_row_min) 1.1489 + row_min = x->mv_row_min; 1.1490 + 1.1491 + if (row_max > x->mv_row_max) 1.1492 + row_max = x->mv_row_max; 1.1493 + 1.1494 + for (r = row_min; r < row_max ; r++) 1.1495 + { 1.1496 + this_mv.as_mv.row = r; 1.1497 + check_here = r * mv_stride + in_what + col_min; 1.1498 + c = col_min; 1.1499 + 1.1500 + while ((c + 2) < col_max) 1.1501 + { 1.1502 + int i; 1.1503 + 1.1504 + fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); 1.1505 + 1.1506 + for (i = 0; i < 3; i++) 1.1507 + { 1.1508 + thissad = sad_array[i]; 1.1509 + 1.1510 + if (thissad < bestsad) 1.1511 + { 1.1512 + this_mv.as_mv.col = c; 1.1513 + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1.1514 + mvsadcost, sad_per_bit); 1.1515 + 1.1516 + if (thissad < bestsad) 1.1517 + { 1.1518 + bestsad = thissad; 1.1519 + best_mv->as_mv.row = r; 1.1520 + best_mv->as_mv.col = c; 1.1521 + bestaddress = check_here; 1.1522 + } 1.1523 + } 1.1524 + 1.1525 + check_here++; 1.1526 + c++; 1.1527 + } 1.1528 + } 1.1529 + 1.1530 + while (c < col_max) 1.1531 + { 1.1532 + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); 1.1533 + 1.1534 + if (thissad < bestsad) 1.1535 + { 1.1536 + this_mv.as_mv.col = c; 1.1537 + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1.1538 + mvsadcost, sad_per_bit); 1.1539 + 1.1540 + if (thissad < bestsad) 1.1541 + { 1.1542 + bestsad = thissad; 1.1543 + best_mv->as_mv.row = r; 1.1544 + best_mv->as_mv.col = c; 1.1545 + bestaddress = check_here; 1.1546 + } 1.1547 + } 1.1548 + 1.1549 + check_here ++; 1.1550 + c ++; 1.1551 + } 1.1552 + 1.1553 + } 1.1554 + 1.1555 + this_mv.as_mv.row = best_mv->as_mv.row << 3; 1.1556 + this_mv.as_mv.col = best_mv->as_mv.col << 3; 1.1557 + 1.1558 + return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) 1.1559 + + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1.1560 +} 1.1561 + 1.1562 +int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, 1.1563 + int sad_per_bit, int distance, 1.1564 + vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], 1.1565 + int_mv *center_mv) 1.1566 +{ 1.1567 + unsigned char *what = (*(b->base_src) + b->src); 1.1568 + int what_stride = b->src_stride; 1.1569 + int pre_stride = x->e_mbd.pre.y_stride; 1.1570 + unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1.1571 + unsigned char *in_what; 1.1572 + int in_what_stride = pre_stride; 1.1573 + int mv_stride = pre_stride; 1.1574 + unsigned char *bestaddress; 1.1575 + int_mv *best_mv = &d->bmi.mv; 1.1576 + int_mv this_mv; 1.1577 + unsigned int bestsad; 1.1578 + unsigned int thissad; 1.1579 + int r, c; 1.1580 + 1.1581 + unsigned char *check_here; 1.1582 + 1.1583 + int ref_row = ref_mv->as_mv.row; 1.1584 + int ref_col = ref_mv->as_mv.col; 1.1585 + 1.1586 + int row_min = ref_row - distance; 1.1587 + int row_max = ref_row + distance; 1.1588 + int col_min = ref_col - distance; 1.1589 + int col_max = ref_col + distance; 1.1590 + 1.1591 + DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8); 1.1592 + unsigned int sad_array[3]; 1.1593 + 1.1594 + int *mvsadcost[2]; 1.1595 + int_mv fcenter_mv; 1.1596 + 1.1597 + mvsadcost[0] = x->mvsadcost[0]; 1.1598 + mvsadcost[1] = x->mvsadcost[1]; 1.1599 + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1.1600 + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1.1601 + 1.1602 + /* Work out the mid point for the search */ 1.1603 + in_what = base_pre + d->offset; 1.1604 + bestaddress = in_what + (ref_row * pre_stride) + ref_col; 1.1605 + 1.1606 + best_mv->as_mv.row = ref_row; 1.1607 + best_mv->as_mv.col = ref_col; 1.1608 + 1.1609 + /* Baseline value at the centre */ 1.1610 + bestsad = fn_ptr->sdf(what, what_stride, 1.1611 + bestaddress, in_what_stride, UINT_MAX) 1.1612 + + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1.1613 + 1.1614 + /* Apply further limits to prevent us looking using vectors that stretch 1.1615 + * beyond the UMV border 1.1616 + */ 1.1617 + if (col_min < x->mv_col_min) 1.1618 + col_min = x->mv_col_min; 1.1619 + 1.1620 + if (col_max > x->mv_col_max) 1.1621 + col_max = x->mv_col_max; 1.1622 + 1.1623 + if (row_min < x->mv_row_min) 1.1624 + row_min = x->mv_row_min; 1.1625 + 1.1626 + if (row_max > x->mv_row_max) 1.1627 + row_max = x->mv_row_max; 1.1628 + 1.1629 + for (r = row_min; r < row_max ; r++) 1.1630 + { 1.1631 + this_mv.as_mv.row = r; 1.1632 + check_here = r * mv_stride + in_what + col_min; 1.1633 + c = col_min; 1.1634 + 1.1635 + while ((c + 7) < col_max) 1.1636 + { 1.1637 + int i; 1.1638 + 1.1639 + fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); 1.1640 + 1.1641 + for (i = 0; i < 8; i++) 1.1642 + { 1.1643 + thissad = sad_array8[i]; 1.1644 + 1.1645 + if (thissad < bestsad) 1.1646 + { 1.1647 + this_mv.as_mv.col = c; 1.1648 + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1.1649 + mvsadcost, sad_per_bit); 1.1650 + 1.1651 + if (thissad < bestsad) 1.1652 + { 1.1653 + bestsad = thissad; 1.1654 + best_mv->as_mv.row = r; 1.1655 + best_mv->as_mv.col = c; 1.1656 + bestaddress = check_here; 1.1657 + } 1.1658 + } 1.1659 + 1.1660 + check_here++; 1.1661 + c++; 1.1662 + } 1.1663 + } 1.1664 + 1.1665 + while ((c + 2) < col_max) 1.1666 + { 1.1667 + int i; 1.1668 + 1.1669 + fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array); 1.1670 + 1.1671 + for (i = 0; i < 3; i++) 1.1672 + { 1.1673 + thissad = sad_array[i]; 1.1674 + 1.1675 + if (thissad < bestsad) 1.1676 + { 1.1677 + this_mv.as_mv.col = c; 1.1678 + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1.1679 + mvsadcost, sad_per_bit); 1.1680 + 1.1681 + if (thissad < bestsad) 1.1682 + { 1.1683 + bestsad = thissad; 1.1684 + best_mv->as_mv.row = r; 1.1685 + best_mv->as_mv.col = c; 1.1686 + bestaddress = check_here; 1.1687 + } 1.1688 + } 1.1689 + 1.1690 + check_here++; 1.1691 + c++; 1.1692 + } 1.1693 + } 1.1694 + 1.1695 + while (c < col_max) 1.1696 + { 1.1697 + thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); 1.1698 + 1.1699 + if (thissad < bestsad) 1.1700 + { 1.1701 + this_mv.as_mv.col = c; 1.1702 + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1.1703 + mvsadcost, sad_per_bit); 1.1704 + 1.1705 + if (thissad < bestsad) 1.1706 + { 1.1707 + bestsad = thissad; 1.1708 + best_mv->as_mv.row = r; 1.1709 + best_mv->as_mv.col = c; 1.1710 + bestaddress = check_here; 1.1711 + } 1.1712 + } 1.1713 + 1.1714 + check_here ++; 1.1715 + c ++; 1.1716 + } 1.1717 + } 1.1718 + 1.1719 + this_mv.as_mv.row = best_mv->as_mv.row * 8; 1.1720 + this_mv.as_mv.col = best_mv->as_mv.col * 8; 1.1721 + 1.1722 + return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) 1.1723 + + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1.1724 +} 1.1725 + 1.1726 +int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, 1.1727 + int error_per_bit, int search_range, 1.1728 + vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], 1.1729 + int_mv *center_mv) 1.1730 +{ 1.1731 + MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}}; 1.1732 + int i, j; 1.1733 + short this_row_offset, this_col_offset; 1.1734 + 1.1735 + int what_stride = b->src_stride; 1.1736 + int pre_stride = x->e_mbd.pre.y_stride; 1.1737 + unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1.1738 + int in_what_stride = pre_stride; 1.1739 + unsigned char *what = (*(b->base_src) + b->src); 1.1740 + unsigned char *best_address = (unsigned char *)(base_pre + d->offset + 1.1741 + (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col); 1.1742 + unsigned char *check_here; 1.1743 + int_mv this_mv; 1.1744 + unsigned int bestsad; 1.1745 + unsigned int thissad; 1.1746 + 1.1747 + int *mvsadcost[2]; 1.1748 + int_mv fcenter_mv; 1.1749 + 1.1750 + mvsadcost[0] = x->mvsadcost[0]; 1.1751 + mvsadcost[1] = x->mvsadcost[1]; 1.1752 + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1.1753 + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1.1754 + 1.1755 + bestsad = fn_ptr->sdf(what, what_stride, best_address, 1.1756 + in_what_stride, UINT_MAX) 1.1757 + + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); 1.1758 + 1.1759 + for (i=0; i<search_range; i++) 1.1760 + { 1.1761 + int best_site = -1; 1.1762 + 1.1763 + for (j = 0 ; j < 4 ; j++) 1.1764 + { 1.1765 + this_row_offset = ref_mv->as_mv.row + neighbors[j].row; 1.1766 + this_col_offset = ref_mv->as_mv.col + neighbors[j].col; 1.1767 + 1.1768 + if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && 1.1769 + (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) 1.1770 + { 1.1771 + check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address; 1.1772 + thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); 1.1773 + 1.1774 + if (thissad < bestsad) 1.1775 + { 1.1776 + this_mv.as_mv.row = this_row_offset; 1.1777 + this_mv.as_mv.col = this_col_offset; 1.1778 + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); 1.1779 + 1.1780 + if (thissad < bestsad) 1.1781 + { 1.1782 + bestsad = thissad; 1.1783 + best_site = j; 1.1784 + } 1.1785 + } 1.1786 + } 1.1787 + } 1.1788 + 1.1789 + if (best_site == -1) 1.1790 + break; 1.1791 + else 1.1792 + { 1.1793 + ref_mv->as_mv.row += neighbors[best_site].row; 1.1794 + ref_mv->as_mv.col += neighbors[best_site].col; 1.1795 + best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col; 1.1796 + } 1.1797 + } 1.1798 + 1.1799 + this_mv.as_mv.row = ref_mv->as_mv.row << 3; 1.1800 + this_mv.as_mv.col = ref_mv->as_mv.col << 3; 1.1801 + 1.1802 + return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) 1.1803 + + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1.1804 +} 1.1805 + 1.1806 +int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, 1.1807 + int_mv *ref_mv, int error_per_bit, 1.1808 + int search_range, vp8_variance_fn_ptr_t *fn_ptr, 1.1809 + int *mvcost[2], int_mv *center_mv) 1.1810 +{ 1.1811 + MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}}; 1.1812 + int i, j; 1.1813 + short this_row_offset, this_col_offset; 1.1814 + 1.1815 + int what_stride = b->src_stride; 1.1816 + int pre_stride = x->e_mbd.pre.y_stride; 1.1817 + unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1.1818 + int in_what_stride = pre_stride; 1.1819 + unsigned char *what = (*(b->base_src) + b->src); 1.1820 + unsigned char *best_address = (unsigned char *)(base_pre + d->offset + 1.1821 + (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col); 1.1822 + unsigned char *check_here; 1.1823 + int_mv this_mv; 1.1824 + unsigned int bestsad; 1.1825 + unsigned int thissad; 1.1826 + 1.1827 + int *mvsadcost[2]; 1.1828 + int_mv fcenter_mv; 1.1829 + 1.1830 + mvsadcost[0] = x->mvsadcost[0]; 1.1831 + mvsadcost[1] = x->mvsadcost[1]; 1.1832 + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1.1833 + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1.1834 + 1.1835 + bestsad = fn_ptr->sdf(what, what_stride, best_address, 1.1836 + in_what_stride, UINT_MAX) 1.1837 + + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); 1.1838 + 1.1839 + for (i=0; i<search_range; i++) 1.1840 + { 1.1841 + int best_site = -1; 1.1842 + int all_in = 1; 1.1843 + 1.1844 + all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min); 1.1845 + all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max); 1.1846 + all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min); 1.1847 + all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max); 1.1848 + 1.1849 + if(all_in) 1.1850 + { 1.1851 + unsigned int sad_array[4]; 1.1852 + const unsigned char *block_offset[4]; 1.1853 + block_offset[0] = best_address - in_what_stride; 1.1854 + block_offset[1] = best_address - 1; 1.1855 + block_offset[2] = best_address + 1; 1.1856 + block_offset[3] = best_address + in_what_stride; 1.1857 + 1.1858 + fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); 1.1859 + 1.1860 + for (j = 0; j < 4; j++) 1.1861 + { 1.1862 + if (sad_array[j] < bestsad) 1.1863 + { 1.1864 + this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row; 1.1865 + this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col; 1.1866 + sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); 1.1867 + 1.1868 + if (sad_array[j] < bestsad) 1.1869 + { 1.1870 + bestsad = sad_array[j]; 1.1871 + best_site = j; 1.1872 + } 1.1873 + } 1.1874 + } 1.1875 + } 1.1876 + else 1.1877 + { 1.1878 + for (j = 0 ; j < 4 ; j++) 1.1879 + { 1.1880 + this_row_offset = ref_mv->as_mv.row + neighbors[j].row; 1.1881 + this_col_offset = ref_mv->as_mv.col + neighbors[j].col; 1.1882 + 1.1883 + if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && 1.1884 + (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) 1.1885 + { 1.1886 + check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address; 1.1887 + thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); 1.1888 + 1.1889 + if (thissad < bestsad) 1.1890 + { 1.1891 + this_mv.as_mv.row = this_row_offset; 1.1892 + this_mv.as_mv.col = this_col_offset; 1.1893 + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); 1.1894 + 1.1895 + if (thissad < bestsad) 1.1896 + { 1.1897 + bestsad = thissad; 1.1898 + best_site = j; 1.1899 + } 1.1900 + } 1.1901 + } 1.1902 + } 1.1903 + } 1.1904 + 1.1905 + if (best_site == -1) 1.1906 + break; 1.1907 + else 1.1908 + { 1.1909 + ref_mv->as_mv.row += neighbors[best_site].row; 1.1910 + ref_mv->as_mv.col += neighbors[best_site].col; 1.1911 + best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col; 1.1912 + } 1.1913 + } 1.1914 + 1.1915 + this_mv.as_mv.row = ref_mv->as_mv.row * 8; 1.1916 + this_mv.as_mv.col = ref_mv->as_mv.col * 8; 1.1917 + 1.1918 + return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) 1.1919 + + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1.1920 +} 1.1921 + 1.1922 +#ifdef VP8_ENTROPY_STATS 1.1923 +void print_mode_context(void) 1.1924 +{ 1.1925 + FILE *f = fopen("modecont.c", "w"); 1.1926 + int i, j; 1.1927 + 1.1928 + fprintf(f, "#include \"entropy.h\"\n"); 1.1929 + fprintf(f, "const int vp8_mode_contexts[6][4] =\n"); 1.1930 + fprintf(f, "{\n"); 1.1931 + 1.1932 + for (j = 0; j < 6; j++) 1.1933 + { 1.1934 + fprintf(f, " { /* %d */\n", j); 1.1935 + fprintf(f, " "); 1.1936 + 1.1937 + for (i = 0; i < 4; i++) 1.1938 + { 1.1939 + int overal_prob; 1.1940 + int this_prob; 1.1941 + int count; 1.1942 + 1.1943 + /* Overall probs */ 1.1944 + count = mv_mode_cts[i][0] + mv_mode_cts[i][1]; 1.1945 + 1.1946 + if (count) 1.1947 + overal_prob = 256 * mv_mode_cts[i][0] / count; 1.1948 + else 1.1949 + overal_prob = 128; 1.1950 + 1.1951 + if (overal_prob == 0) 1.1952 + overal_prob = 1; 1.1953 + 1.1954 + /* context probs */ 1.1955 + count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1]; 1.1956 + 1.1957 + if (count) 1.1958 + this_prob = 256 * mv_ref_ct[j][i][0] / count; 1.1959 + else 1.1960 + this_prob = 128; 1.1961 + 1.1962 + if (this_prob == 0) 1.1963 + this_prob = 1; 1.1964 + 1.1965 + fprintf(f, "%5d, ", this_prob); 1.1966 + } 1.1967 + 1.1968 + fprintf(f, " },\n"); 1.1969 + } 1.1970 + 1.1971 + fprintf(f, "};\n"); 1.1972 + fclose(f); 1.1973 +} 1.1974 + 1.1975 +/* MV ref count VP8_ENTROPY_STATS stats code */ 1.1976 +#ifdef VP8_ENTROPY_STATS 1.1977 +void init_mv_ref_counts() 1.1978 +{ 1.1979 + vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct)); 1.1980 + vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts)); 1.1981 +} 1.1982 + 1.1983 +void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) 1.1984 +{ 1.1985 + if (m == ZEROMV) 1.1986 + { 1.1987 + ++mv_ref_ct [ct[0]] [0] [0]; 1.1988 + ++mv_mode_cts[0][0]; 1.1989 + } 1.1990 + else 1.1991 + { 1.1992 + ++mv_ref_ct [ct[0]] [0] [1]; 1.1993 + ++mv_mode_cts[0][1]; 1.1994 + 1.1995 + if (m == NEARESTMV) 1.1996 + { 1.1997 + ++mv_ref_ct [ct[1]] [1] [0]; 1.1998 + ++mv_mode_cts[1][0]; 1.1999 + } 1.2000 + else 1.2001 + { 1.2002 + ++mv_ref_ct [ct[1]] [1] [1]; 1.2003 + ++mv_mode_cts[1][1]; 1.2004 + 1.2005 + if (m == NEARMV) 1.2006 + { 1.2007 + ++mv_ref_ct [ct[2]] [2] [0]; 1.2008 + ++mv_mode_cts[2][0]; 1.2009 + } 1.2010 + else 1.2011 + { 1.2012 + ++mv_ref_ct [ct[2]] [2] [1]; 1.2013 + ++mv_mode_cts[2][1]; 1.2014 + 1.2015 + if (m == NEWMV) 1.2016 + { 1.2017 + ++mv_ref_ct [ct[3]] [3] [0]; 1.2018 + ++mv_mode_cts[3][0]; 1.2019 + } 1.2020 + else 1.2021 + { 1.2022 + ++mv_ref_ct [ct[3]] [3] [1]; 1.2023 + ++mv_mode_cts[3][1]; 1.2024 + } 1.2025 + } 1.2026 + } 1.2027 + } 1.2028 +} 1.2029 + 1.2030 +#endif/* END MV ref count VP8_ENTROPY_STATS stats code */ 1.2031 + 1.2032 +#endif