1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libvpx/vp9/encoder/vp9_mcomp.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,2110 @@ 1.4 +/* 1.5 + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 1.6 + * 1.7 + * Use of this source code is governed by a BSD-style license 1.8 + * that can be found in the LICENSE file in the root of the source 1.9 + * tree. An additional intellectual property rights grant can be found 1.10 + * in the file PATENTS. All contributing project authors may 1.11 + * be found in the AUTHORS file in the root of the source tree. 1.12 + */ 1.13 + 1.14 +#include <limits.h> 1.15 +#include <math.h> 1.16 +#include <stdio.h> 1.17 + 1.18 +#include "./vpx_config.h" 1.19 + 1.20 +#include "vpx_mem/vpx_mem.h" 1.21 + 1.22 +#include "vp9/common/vp9_findnearmv.h" 1.23 +#include "vp9/common/vp9_common.h" 1.24 + 1.25 +#include "vp9/encoder/vp9_onyx_int.h" 1.26 +#include "vp9/encoder/vp9_mcomp.h" 1.27 + 1.28 +// #define NEW_DIAMOND_SEARCH 1.29 + 1.30 +void vp9_clamp_mv_min_max(MACROBLOCK *x, MV *mv) { 1.31 + const int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); 1.32 + const int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); 1.33 + const int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; 1.34 + const int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL; 1.35 + 1.36 + // Get intersection of UMV window and valid MV window to reduce # of checks 1.37 + // in diamond search. 1.38 + if (x->mv_col_min < col_min) 1.39 + x->mv_col_min = col_min; 1.40 + if (x->mv_col_max > col_max) 1.41 + x->mv_col_max = col_max; 1.42 + if (x->mv_row_min < row_min) 1.43 + x->mv_row_min = row_min; 1.44 + if (x->mv_row_max > row_max) 1.45 + x->mv_row_max = row_max; 1.46 +} 1.47 + 1.48 +int vp9_init_search_range(VP9_COMP *cpi, int size) { 1.49 + int sr = 0; 1.50 + 1.51 + // Minimum search size no matter what the passed in value. 1.52 + size = MAX(16, size); 1.53 + 1.54 + while ((size << sr) < MAX_FULL_PEL_VAL) 1.55 + sr++; 1.56 + 1.57 + if (sr) 1.58 + sr--; 1.59 + 1.60 + sr += cpi->sf.reduce_first_step_size; 1.61 + sr = MIN(sr, (cpi->sf.max_step_search_steps - 2)); 1.62 + return sr; 1.63 +} 1.64 + 1.65 +static INLINE int mv_cost(const MV *mv, 1.66 + const int *joint_cost, int *comp_cost[2]) { 1.67 + return joint_cost[vp9_get_mv_joint(mv)] + 1.68 + comp_cost[0][mv->row] + comp_cost[1][mv->col]; 1.69 +} 1.70 + 1.71 +int vp9_mv_bit_cost(const MV *mv, const MV *ref, 1.72 + const int *mvjcost, int *mvcost[2], int weight) { 1.73 + const MV diff = { mv->row - ref->row, 1.74 + mv->col - ref->col }; 1.75 + return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7); 1.76 +} 1.77 + 1.78 +static int mv_err_cost(const MV *mv, const MV *ref, 1.79 + const int *mvjcost, int *mvcost[2], 1.80 + int error_per_bit) { 1.81 + if (mvcost) { 1.82 + const MV diff = { mv->row - ref->row, 1.83 + mv->col - ref->col }; 1.84 + return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * 1.85 + error_per_bit, 13); 1.86 + } 1.87 + return 0; 1.88 +} 1.89 + 1.90 +static int mvsad_err_cost(const MV *mv, const MV *ref, 1.91 + const int *mvjsadcost, int *mvsadcost[2], 1.92 + int error_per_bit) { 1.93 + if (mvsadcost) { 1.94 + const MV diff = { mv->row - ref->row, 1.95 + mv->col - ref->col }; 1.96 + return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) * 1.97 + error_per_bit, 8); 1.98 + } 1.99 + return 0; 1.100 +} 1.101 + 1.102 +void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) { 1.103 + int len; 1.104 + int search_site_count = 0; 1.105 + 1.106 + // Generate offsets for 4 search sites per step. 1.107 + x->ss[search_site_count].mv.col = 0; 1.108 + x->ss[search_site_count].mv.row = 0; 1.109 + x->ss[search_site_count].offset = 0; 1.110 + search_site_count++; 1.111 + 1.112 + for (len = MAX_FIRST_STEP; len > 0; len /= 2) { 1.113 + // Compute offsets for search sites. 1.114 + x->ss[search_site_count].mv.col = 0; 1.115 + x->ss[search_site_count].mv.row = -len; 1.116 + x->ss[search_site_count].offset = -len * stride; 1.117 + search_site_count++; 1.118 + 1.119 + // Compute offsets for search sites. 1.120 + x->ss[search_site_count].mv.col = 0; 1.121 + x->ss[search_site_count].mv.row = len; 1.122 + x->ss[search_site_count].offset = len * stride; 1.123 + search_site_count++; 1.124 + 1.125 + // Compute offsets for search sites. 1.126 + x->ss[search_site_count].mv.col = -len; 1.127 + x->ss[search_site_count].mv.row = 0; 1.128 + x->ss[search_site_count].offset = -len; 1.129 + search_site_count++; 1.130 + 1.131 + // Compute offsets for search sites. 1.132 + x->ss[search_site_count].mv.col = len; 1.133 + x->ss[search_site_count].mv.row = 0; 1.134 + x->ss[search_site_count].offset = len; 1.135 + search_site_count++; 1.136 + } 1.137 + 1.138 + x->ss_count = search_site_count; 1.139 + x->searches_per_step = 4; 1.140 +} 1.141 + 1.142 +void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { 1.143 + int len, ss_count = 1; 1.144 + 1.145 + x->ss[0].mv.col = x->ss[0].mv.row = 0; 1.146 + x->ss[0].offset = 0; 1.147 + 1.148 + for (len = MAX_FIRST_STEP; len > 0; len /= 2) { 1.149 + // Generate offsets for 8 search sites per step. 1.150 + const MV ss_mvs[8] = { 1.151 + {-len, 0 }, {len, 0 }, { 0, -len}, {0, len}, 1.152 + {-len, -len}, {-len, len}, {len, -len}, {len, len} 1.153 + }; 1.154 + int i; 1.155 + for (i = 0; i < 8; ++i) { 1.156 + search_site *const ss = &x->ss[ss_count++]; 1.157 + ss->mv = ss_mvs[i]; 1.158 + ss->offset = ss->mv.row * stride + ss->mv.col; 1.159 + } 1.160 + } 1.161 + 1.162 + x->ss_count = ss_count; 1.163 + x->searches_per_step = 8; 1.164 +} 1.165 + 1.166 +/* 1.167 + * To avoid the penalty for crossing cache-line read, preload the reference 1.168 + * area in a small buffer, which is aligned to make sure there won't be crossing 1.169 + * cache-line read while reading from this buffer. This reduced the cpu 1.170 + * cycles spent on reading ref data in sub-pixel filter functions. 1.171 + * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x 1.172 + * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we 1.173 + * could reduce the area. 1.174 + */ 1.175 + 1.176 +/* estimated cost of a motion vector (r,c) */ 1.177 +#define MVC(r, c) \ 1.178 + (mvcost ? \ 1.179 + ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ 1.180 + mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \ 1.181 + error_per_bit + 4096) >> 13 : 0) 1.182 + 1.183 + 1.184 +#define SP(x) (((x) & 7) << 1) // convert motion vector component to offset 1.185 + // for svf calc 1.186 + 1.187 +#define IFMVCV(r, c, s, e) \ 1.188 + if (c >= minc && c <= maxc && r >= minr && r <= maxr) \ 1.189 + s \ 1.190 + else \ 1.191 + e; 1.192 + 1.193 +/* pointer to predictor base of a motionvector */ 1.194 +#define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset))) 1.195 + 1.196 +/* returns subpixel variance error function */ 1.197 +#define DIST(r, c) \ 1.198 + vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, src_stride, &sse) 1.199 + 1.200 +/* checks if (r, c) has better score than previous best */ 1.201 +#define CHECK_BETTER(v, r, c) \ 1.202 + IFMVCV(r, c, { \ 1.203 + thismse = (DIST(r, c)); \ 1.204 + if ((v = MVC(r, c) + thismse) < besterr) { \ 1.205 + besterr = v; \ 1.206 + br = r; \ 1.207 + bc = c; \ 1.208 + *distortion = thismse; \ 1.209 + *sse1 = sse; \ 1.210 + } \ 1.211 + }, \ 1.212 + v = INT_MAX;) 1.213 + 1.214 +#define FIRST_LEVEL_CHECKS \ 1.215 + { \ 1.216 + unsigned int left, right, up, down, diag; \ 1.217 + CHECK_BETTER(left, tr, tc - hstep); \ 1.218 + CHECK_BETTER(right, tr, tc + hstep); \ 1.219 + CHECK_BETTER(up, tr - hstep, tc); \ 1.220 + CHECK_BETTER(down, tr + hstep, tc); \ 1.221 + whichdir = (left < right ? 0 : 1) + \ 1.222 + (up < down ? 0 : 2); \ 1.223 + switch (whichdir) { \ 1.224 + case 0: \ 1.225 + CHECK_BETTER(diag, tr - hstep, tc - hstep); \ 1.226 + break; \ 1.227 + case 1: \ 1.228 + CHECK_BETTER(diag, tr - hstep, tc + hstep); \ 1.229 + break; \ 1.230 + case 2: \ 1.231 + CHECK_BETTER(diag, tr + hstep, tc - hstep); \ 1.232 + break; \ 1.233 + case 3: \ 1.234 + CHECK_BETTER(diag, tr + hstep, tc + hstep); \ 1.235 + break; \ 1.236 + } \ 1.237 + } 1.238 + 1.239 +#define SECOND_LEVEL_CHECKS \ 1.240 + { \ 1.241 + int kr, kc; \ 1.242 + unsigned int second; \ 1.243 + if (tr != br && tc != bc) { \ 1.244 + kr = br - tr; \ 1.245 + kc = bc - tc; \ 1.246 + CHECK_BETTER(second, tr + kr, tc + 2 * kc); \ 1.247 + CHECK_BETTER(second, tr + 2 * kr, tc + kc); \ 1.248 + } else if (tr == br && tc != bc) { \ 1.249 + kc = bc - tc; \ 1.250 + CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \ 1.251 + CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \ 1.252 + switch (whichdir) { \ 1.253 + case 0: \ 1.254 + case 1: \ 1.255 + CHECK_BETTER(second, tr + hstep, tc + kc); \ 1.256 + break; \ 1.257 + case 2: \ 1.258 + case 3: \ 1.259 + CHECK_BETTER(second, tr - hstep, tc + kc); \ 1.260 + break; \ 1.261 + } \ 1.262 + } else if (tr != br && tc == bc) { \ 1.263 + kr = br - tr; \ 1.264 + CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \ 1.265 + CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \ 1.266 + switch (whichdir) { \ 1.267 + case 0: \ 1.268 + case 2: \ 1.269 + CHECK_BETTER(second, tr + kr, tc + hstep); \ 1.270 + break; \ 1.271 + case 1: \ 1.272 + case 3: \ 1.273 + CHECK_BETTER(second, tr + kr, tc - hstep); \ 1.274 + break; \ 1.275 + } \ 1.276 + } \ 1.277 + } 1.278 + 1.279 +int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, 1.280 + MV *bestmv, const MV *ref_mv, 1.281 + int allow_hp, 1.282 + int error_per_bit, 1.283 + const vp9_variance_fn_ptr_t *vfp, 1.284 + int forced_stop, 1.285 + int iters_per_step, 1.286 + int *mvjcost, int *mvcost[2], 1.287 + int *distortion, 1.288 + unsigned int *sse1) { 1.289 + uint8_t *z = x->plane[0].src.buf; 1.290 + int src_stride = x->plane[0].src.stride; 1.291 + MACROBLOCKD *xd = &x->e_mbd; 1.292 + 1.293 + unsigned int besterr = INT_MAX; 1.294 + unsigned int sse; 1.295 + unsigned int whichdir; 1.296 + unsigned int halfiters = iters_per_step; 1.297 + unsigned int quarteriters = iters_per_step; 1.298 + unsigned int eighthiters = iters_per_step; 1.299 + int thismse; 1.300 + 1.301 + const int y_stride = xd->plane[0].pre[0].stride; 1.302 + const int offset = bestmv->row * y_stride + bestmv->col; 1.303 + uint8_t *y = xd->plane[0].pre[0].buf + offset; 1.304 + 1.305 + int rr = ref_mv->row; 1.306 + int rc = ref_mv->col; 1.307 + int br = bestmv->row * 8; 1.308 + int bc = bestmv->col * 8; 1.309 + int hstep = 4; 1.310 + const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); 1.311 + const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); 1.312 + const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); 1.313 + const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); 1.314 + 1.315 + int tr = br; 1.316 + int tc = bc; 1.317 + 1.318 + // central mv 1.319 + bestmv->row <<= 3; 1.320 + bestmv->col <<= 3; 1.321 + 1.322 + // calculate central point error 1.323 + besterr = vfp->vf(y, y_stride, z, src_stride, sse1); 1.324 + *distortion = besterr; 1.325 + besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 1.326 + 1.327 + // TODO(jbb): Each subsequent iteration checks at least one point in 1.328 + // common with the last iteration could be 2 if diagonal is selected. 1.329 + while (halfiters--) { 1.330 + // 1/2 pel 1.331 + FIRST_LEVEL_CHECKS; 1.332 + // no reason to check the same one again. 1.333 + if (tr == br && tc == bc) 1.334 + break; 1.335 + tr = br; 1.336 + tc = bc; 1.337 + } 1.338 + 1.339 + // TODO(yaowu): Each subsequent iteration checks at least one point in common 1.340 + // with the last iteration could be 2 if diagonal is selected. 1.341 + 1.342 + // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 1.343 + if (forced_stop != 2) { 1.344 + hstep >>= 1; 1.345 + while (quarteriters--) { 1.346 + FIRST_LEVEL_CHECKS; 1.347 + // no reason to check the same one again. 1.348 + if (tr == br && tc == bc) 1.349 + break; 1.350 + tr = br; 1.351 + tc = bc; 1.352 + } 1.353 + } 1.354 + 1.355 + if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { 1.356 + hstep >>= 1; 1.357 + while (eighthiters--) { 1.358 + FIRST_LEVEL_CHECKS; 1.359 + // no reason to check the same one again. 1.360 + if (tr == br && tc == bc) 1.361 + break; 1.362 + tr = br; 1.363 + tc = bc; 1.364 + } 1.365 + } 1.366 + 1.367 + bestmv->row = br; 1.368 + bestmv->col = bc; 1.369 + 1.370 + if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || 1.371 + (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) 1.372 + return INT_MAX; 1.373 + 1.374 + return besterr; 1.375 +} 1.376 + 1.377 +int vp9_find_best_sub_pixel_tree(MACROBLOCK *x, 1.378 + MV *bestmv, const MV *ref_mv, 1.379 + int allow_hp, 1.380 + int error_per_bit, 1.381 + const vp9_variance_fn_ptr_t *vfp, 1.382 + int forced_stop, 1.383 + int iters_per_step, 1.384 + int *mvjcost, int *mvcost[2], 1.385 + int *distortion, 1.386 + unsigned int *sse1) { 1.387 + uint8_t *z = x->plane[0].src.buf; 1.388 + const int src_stride = x->plane[0].src.stride; 1.389 + MACROBLOCKD *xd = &x->e_mbd; 1.390 + unsigned int besterr = INT_MAX; 1.391 + unsigned int sse; 1.392 + unsigned int whichdir; 1.393 + int thismse; 1.394 + unsigned int halfiters = iters_per_step; 1.395 + unsigned int quarteriters = iters_per_step; 1.396 + unsigned int eighthiters = iters_per_step; 1.397 + 1.398 + const int y_stride = xd->plane[0].pre[0].stride; 1.399 + const int offset = bestmv->row * y_stride + bestmv->col; 1.400 + uint8_t *y = xd->plane[0].pre[0].buf + offset; 1.401 + 1.402 + int rr = ref_mv->row; 1.403 + int rc = ref_mv->col; 1.404 + int br = bestmv->row * 8; 1.405 + int bc = bestmv->col * 8; 1.406 + int hstep = 4; 1.407 + const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); 1.408 + const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); 1.409 + const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); 1.410 + const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); 1.411 + 1.412 + int tr = br; 1.413 + int tc = bc; 1.414 + 1.415 + // central mv 1.416 + bestmv->row *= 8; 1.417 + bestmv->col *= 8; 1.418 + 1.419 + // calculate central point error 1.420 + besterr = vfp->vf(y, y_stride, z, src_stride, sse1); 1.421 + *distortion = besterr; 1.422 + besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 1.423 + 1.424 + // 1/2 pel 1.425 + FIRST_LEVEL_CHECKS; 1.426 + if (halfiters > 1) { 1.427 + SECOND_LEVEL_CHECKS; 1.428 + } 1.429 + tr = br; 1.430 + tc = bc; 1.431 + 1.432 + // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 1.433 + if (forced_stop != 2) { 1.434 + hstep >>= 1; 1.435 + FIRST_LEVEL_CHECKS; 1.436 + if (quarteriters > 1) { 1.437 + SECOND_LEVEL_CHECKS; 1.438 + } 1.439 + tr = br; 1.440 + tc = bc; 1.441 + } 1.442 + 1.443 + if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { 1.444 + hstep >>= 1; 1.445 + FIRST_LEVEL_CHECKS; 1.446 + if (eighthiters > 1) { 1.447 + SECOND_LEVEL_CHECKS; 1.448 + } 1.449 + tr = br; 1.450 + tc = bc; 1.451 + } 1.452 + 1.453 + bestmv->row = br; 1.454 + bestmv->col = bc; 1.455 + 1.456 + if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || 1.457 + (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) 1.458 + return INT_MAX; 1.459 + 1.460 + return besterr; 1.461 +} 1.462 + 1.463 +#undef DIST 1.464 +/* returns subpixel variance error function */ 1.465 +#define DIST(r, c) \ 1.466 + vfp->svaf(PRE(r, c), y_stride, SP(c), SP(r), \ 1.467 + z, src_stride, &sse, second_pred) 1.468 + 1.469 +int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, 1.470 + MV *bestmv, const MV *ref_mv, 1.471 + int allow_hp, 1.472 + int error_per_bit, 1.473 + const vp9_variance_fn_ptr_t *vfp, 1.474 + int forced_stop, 1.475 + int iters_per_step, 1.476 + int *mvjcost, int *mvcost[2], 1.477 + int *distortion, 1.478 + unsigned int *sse1, 1.479 + const uint8_t *second_pred, 1.480 + int w, int h) { 1.481 + uint8_t *const z = x->plane[0].src.buf; 1.482 + const int src_stride = x->plane[0].src.stride; 1.483 + MACROBLOCKD *const xd = &x->e_mbd; 1.484 + 1.485 + unsigned int besterr = INT_MAX; 1.486 + unsigned int sse; 1.487 + unsigned int whichdir; 1.488 + unsigned int halfiters = iters_per_step; 1.489 + unsigned int quarteriters = iters_per_step; 1.490 + unsigned int eighthiters = iters_per_step; 1.491 + int thismse; 1.492 + 1.493 + DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); 1.494 + const int y_stride = xd->plane[0].pre[0].stride; 1.495 + const int offset = bestmv->row * y_stride + bestmv->col; 1.496 + uint8_t *const y = xd->plane[0].pre[0].buf + offset; 1.497 + 1.498 + int rr = ref_mv->row; 1.499 + int rc = ref_mv->col; 1.500 + int br = bestmv->row * 8; 1.501 + int bc = bestmv->col * 8; 1.502 + int hstep = 4; 1.503 + const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); 1.504 + const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); 1.505 + const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); 1.506 + const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); 1.507 + 1.508 + int tr = br; 1.509 + int tc = bc; 1.510 + 1.511 + // central mv 1.512 + bestmv->row *= 8; 1.513 + bestmv->col *= 8; 1.514 + 1.515 + // calculate central point error 1.516 + // TODO(yunqingwang): central pointer error was already calculated in full- 1.517 + // pixel search, and can be passed in this function. 1.518 + comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); 1.519 + besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); 1.520 + *distortion = besterr; 1.521 + besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 1.522 + 1.523 + // Each subsequent iteration checks at least one point in 1.524 + // common with the last iteration could be 2 ( if diag selected) 1.525 + while (halfiters--) { 1.526 + // 1/2 pel 1.527 + FIRST_LEVEL_CHECKS; 1.528 + // no reason to check the same one again. 1.529 + if (tr == br && tc == bc) 1.530 + break; 1.531 + tr = br; 1.532 + tc = bc; 1.533 + } 1.534 + 1.535 + // Each subsequent iteration checks at least one point in common with 1.536 + // the last iteration could be 2 ( if diag selected) 1/4 pel 1.537 + 1.538 + // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 1.539 + if (forced_stop != 2) { 1.540 + hstep >>= 1; 1.541 + while (quarteriters--) { 1.542 + FIRST_LEVEL_CHECKS; 1.543 + // no reason to check the same one again. 1.544 + if (tr == br && tc == bc) 1.545 + break; 1.546 + tr = br; 1.547 + tc = bc; 1.548 + } 1.549 + } 1.550 + 1.551 + if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { 1.552 + hstep >>= 1; 1.553 + while (eighthiters--) { 1.554 + FIRST_LEVEL_CHECKS; 1.555 + // no reason to check the same one again. 1.556 + if (tr == br && tc == bc) 1.557 + break; 1.558 + tr = br; 1.559 + tc = bc; 1.560 + } 1.561 + } 1.562 + bestmv->row = br; 1.563 + bestmv->col = bc; 1.564 + 1.565 + if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || 1.566 + (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) 1.567 + return INT_MAX; 1.568 + 1.569 + return besterr; 1.570 +} 1.571 + 1.572 +int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x, 1.573 + MV *bestmv, const MV *ref_mv, 1.574 + int allow_hp, 1.575 + int error_per_bit, 1.576 + const vp9_variance_fn_ptr_t *vfp, 1.577 + int forced_stop, 1.578 + int iters_per_step, 1.579 + int *mvjcost, int *mvcost[2], 1.580 + int *distortion, 1.581 + unsigned int *sse1, 1.582 + const uint8_t *second_pred, 1.583 + int w, int h) { 1.584 + uint8_t *z = x->plane[0].src.buf; 1.585 + const int src_stride = x->plane[0].src.stride; 1.586 + MACROBLOCKD *xd = &x->e_mbd; 1.587 + unsigned int besterr = INT_MAX; 1.588 + unsigned int sse; 1.589 + unsigned int whichdir; 1.590 + int thismse; 1.591 + unsigned int halfiters = iters_per_step; 1.592 + unsigned int quarteriters = iters_per_step; 1.593 + unsigned int eighthiters = iters_per_step; 1.594 + 1.595 + DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); 1.596 + const int y_stride = xd->plane[0].pre[0].stride; 1.597 + const int offset = bestmv->row * y_stride + bestmv->col; 1.598 + uint8_t *y = xd->plane[0].pre[0].buf + offset; 1.599 + 1.600 + int rr = ref_mv->row; 1.601 + int rc = ref_mv->col; 1.602 + int br = bestmv->row * 8; 1.603 + int bc = bestmv->col * 8; 1.604 + int hstep = 4; 1.605 + const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); 1.606 + const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); 1.607 + const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); 1.608 + const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); 1.609 + 1.610 + int tr = br; 1.611 + int tc = bc; 1.612 + 1.613 + // central mv 1.614 + bestmv->row *= 8; 1.615 + bestmv->col *= 8; 1.616 + 1.617 + // calculate central point error 1.618 + // TODO(yunqingwang): central pointer error was already calculated in full- 1.619 + // pixel search, and can be passed in this function. 1.620 + comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); 1.621 + besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); 1.622 + *distortion = besterr; 1.623 + besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 1.624 + 1.625 + // Each subsequent iteration checks at least one point in 1.626 + // common with the last iteration could be 2 ( if diag selected) 1.627 + // 1/2 pel 1.628 + FIRST_LEVEL_CHECKS; 1.629 + if (halfiters > 1) { 1.630 + SECOND_LEVEL_CHECKS; 1.631 + } 1.632 + tr = br; 1.633 + tc = bc; 1.634 + 1.635 + // Each subsequent iteration checks at least one point in common with 1.636 + // the last iteration could be 2 ( if diag selected) 1/4 pel 1.637 + 1.638 + // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 1.639 + if (forced_stop != 2) { 1.640 + hstep >>= 1; 1.641 + FIRST_LEVEL_CHECKS; 1.642 + if (quarteriters > 1) { 1.643 + SECOND_LEVEL_CHECKS; 1.644 + } 1.645 + tr = br; 1.646 + tc = bc; 1.647 + } 1.648 + 1.649 + if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { 1.650 + hstep >>= 1; 1.651 + FIRST_LEVEL_CHECKS; 1.652 + if (eighthiters > 1) { 1.653 + SECOND_LEVEL_CHECKS; 1.654 + } 1.655 + tr = br; 1.656 + tc = bc; 1.657 + } 1.658 + bestmv->row = br; 1.659 + bestmv->col = bc; 1.660 + 1.661 + if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || 1.662 + (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) 1.663 + return INT_MAX; 1.664 + 1.665 + return besterr; 1.666 +} 1.667 + 1.668 +#undef MVC 1.669 +#undef PRE 1.670 +#undef DIST 1.671 +#undef IFMVCV 1.672 +#undef CHECK_BETTER 1.673 +#undef SP 1.674 + 1.675 +#define CHECK_BOUNDS(range) \ 1.676 + {\ 1.677 + all_in = 1;\ 1.678 + all_in &= ((br-range) >= x->mv_row_min);\ 1.679 + all_in &= ((br+range) <= x->mv_row_max);\ 1.680 + all_in &= ((bc-range) >= x->mv_col_min);\ 1.681 + all_in &= ((bc+range) <= x->mv_col_max);\ 1.682 + } 1.683 + 1.684 +#define CHECK_POINT \ 1.685 + {\ 1.686 + if (this_mv.col < x->mv_col_min) continue;\ 1.687 + if (this_mv.col > x->mv_col_max) continue;\ 1.688 + if (this_mv.row < x->mv_row_min) continue;\ 1.689 + if (this_mv.row > x->mv_row_max) continue;\ 1.690 + } 1.691 + 1.692 +#define CHECK_BETTER \ 1.693 + {\ 1.694 + if (thissad < bestsad)\ 1.695 + {\ 1.696 + if (use_mvcost) \ 1.697 + thissad += mvsad_err_cost(&this_mv, &fcenter_mv.as_mv, \ 1.698 + mvjsadcost, mvsadcost, \ 1.699 + sad_per_bit);\ 1.700 + if (thissad < bestsad)\ 1.701 + {\ 1.702 + bestsad = thissad;\ 1.703 + best_site = i;\ 1.704 + }\ 1.705 + }\ 1.706 + } 1.707 + 1.708 +#define get_next_chkpts(list, i, n) \ 1.709 + list[0] = ((i) == 0 ? (n) - 1 : (i) - 1); \ 1.710 + list[1] = (i); \ 1.711 + list[2] = ((i) == (n) - 1 ? 0 : (i) + 1); 1.712 + 1.713 +#define MAX_PATTERN_SCALES 11 1.714 +#define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale 1.715 +#define PATTERN_CANDIDATES_REF 3 // number of refinement candidates 1.716 + 1.717 +// Generic pattern search function that searches over multiple scales. 1.718 +// Each scale can have a different number of candidates and shape of 1.719 +// candidates as indicated in the num_candidates and candidates arrays 1.720 +// passed into this function 1.721 +static int vp9_pattern_search(MACROBLOCK *x, 1.722 + MV *ref_mv, 1.723 + int search_param, 1.724 + int sad_per_bit, 1.725 + int do_init_search, 1.726 + int do_refine, 1.727 + const vp9_variance_fn_ptr_t *vfp, 1.728 + int use_mvcost, 1.729 + const MV *center_mv, MV *best_mv, 1.730 + const int num_candidates[MAX_PATTERN_SCALES], 1.731 + const MV candidates[MAX_PATTERN_SCALES] 1.732 + [MAX_PATTERN_CANDIDATES]) { 1.733 + const MACROBLOCKD* const xd = &x->e_mbd; 1.734 + static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { 1.735 + 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 1.736 + }; 1.737 + int i, j, s, t; 1.738 + uint8_t *what = x->plane[0].src.buf; 1.739 + int what_stride = x->plane[0].src.stride; 1.740 + int in_what_stride = xd->plane[0].pre[0].stride; 1.741 + int br, bc; 1.742 + MV this_mv; 1.743 + int bestsad = INT_MAX; 1.744 + int thissad; 1.745 + uint8_t *base_offset; 1.746 + uint8_t *this_offset; 1.747 + int k = -1; 1.748 + int all_in; 1.749 + int best_site = -1; 1.750 + int_mv fcenter_mv; 1.751 + int best_init_s = search_param_to_steps[search_param]; 1.752 + int *mvjsadcost = x->nmvjointsadcost; 1.753 + int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1.754 + 1.755 + fcenter_mv.as_mv.row = center_mv->row >> 3; 1.756 + fcenter_mv.as_mv.col = center_mv->col >> 3; 1.757 + 1.758 + // adjust ref_mv to make sure it is within MV range 1.759 + clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 1.760 + br = ref_mv->row; 1.761 + bc = ref_mv->col; 1.762 + 1.763 + // Work out the start point for the search 1.764 + base_offset = (uint8_t *)(xd->plane[0].pre[0].buf); 1.765 + this_offset = base_offset + (br * in_what_stride) + bc; 1.766 + this_mv.row = br; 1.767 + this_mv.col = bc; 1.768 + bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff) 1.769 + + mvsad_err_cost(&this_mv, &fcenter_mv.as_mv, 1.770 + mvjsadcost, mvsadcost, sad_per_bit); 1.771 + 1.772 + // Search all possible scales upto the search param around the center point 1.773 + // pick the scale of the point that is best as the starting scale of 1.774 + // further steps around it. 1.775 + if (do_init_search) { 1.776 + s = best_init_s; 1.777 + best_init_s = -1; 1.778 + for (t = 0; t <= s; ++t) { 1.779 + best_site = -1; 1.780 + CHECK_BOUNDS((1 << t)) 1.781 + if (all_in) { 1.782 + for (i = 0; i < num_candidates[t]; i++) { 1.783 + this_mv.row = br + candidates[t][i].row; 1.784 + this_mv.col = bc + candidates[t][i].col; 1.785 + this_offset = base_offset + (this_mv.row * in_what_stride) + 1.786 + this_mv.col; 1.787 + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 1.788 + bestsad); 1.789 + CHECK_BETTER 1.790 + } 1.791 + } else { 1.792 + for (i = 0; i < num_candidates[t]; i++) { 1.793 + this_mv.row = br + candidates[t][i].row; 1.794 + this_mv.col = bc + candidates[t][i].col; 1.795 + CHECK_POINT 1.796 + this_offset = base_offset + (this_mv.row * in_what_stride) + 1.797 + this_mv.col; 1.798 + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 1.799 + bestsad); 1.800 + CHECK_BETTER 1.801 + } 1.802 + } 1.803 + if (best_site == -1) { 1.804 + continue; 1.805 + } else { 1.806 + best_init_s = t; 1.807 + k = best_site; 1.808 + } 1.809 + } 1.810 + if (best_init_s != -1) { 1.811 + br += candidates[best_init_s][k].row; 1.812 + bc += candidates[best_init_s][k].col; 1.813 + } 1.814 + } 1.815 + 1.816 + // If the center point is still the best, just skip this and move to 1.817 + // the refinement step. 1.818 + if (best_init_s != -1) { 1.819 + s = best_init_s; 1.820 + best_site = -1; 1.821 + do { 1.822 + // No need to search all 6 points the 1st time if initial search was used 1.823 + if (!do_init_search || s != best_init_s) { 1.824 + CHECK_BOUNDS((1 << s)) 1.825 + if (all_in) { 1.826 + for (i = 0; i < num_candidates[s]; i++) { 1.827 + this_mv.row = br + candidates[s][i].row; 1.828 + this_mv.col = bc + candidates[s][i].col; 1.829 + this_offset = base_offset + (this_mv.row * in_what_stride) + 1.830 + this_mv.col; 1.831 + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 1.832 + bestsad); 1.833 + CHECK_BETTER 1.834 + } 1.835 + } else { 1.836 + for (i = 0; i < num_candidates[s]; i++) { 1.837 + this_mv.row = br + candidates[s][i].row; 1.838 + this_mv.col = bc + candidates[s][i].col; 1.839 + CHECK_POINT 1.840 + this_offset = base_offset + (this_mv.row * in_what_stride) + 1.841 + this_mv.col; 1.842 + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 1.843 + bestsad); 1.844 + CHECK_BETTER 1.845 + } 1.846 + } 1.847 + 1.848 + if (best_site == -1) { 1.849 + continue; 1.850 + } else { 1.851 + br += candidates[s][best_site].row; 1.852 + bc += candidates[s][best_site].col; 1.853 + k = best_site; 1.854 + } 1.855 + } 1.856 + 1.857 + do { 1.858 + int next_chkpts_indices[PATTERN_CANDIDATES_REF]; 1.859 + best_site = -1; 1.860 + CHECK_BOUNDS((1 << s)) 1.861 + 1.862 + get_next_chkpts(next_chkpts_indices, k, num_candidates[s]); 1.863 + if (all_in) { 1.864 + for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1.865 + this_mv.row = br + candidates[s][next_chkpts_indices[i]].row; 1.866 + this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col; 1.867 + this_offset = base_offset + (this_mv.row * (in_what_stride)) + 1.868 + this_mv.col; 1.869 + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 1.870 + bestsad); 1.871 + CHECK_BETTER 1.872 + } 1.873 + } else { 1.874 + for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1.875 + this_mv.row = br + candidates[s][next_chkpts_indices[i]].row; 1.876 + this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col; 1.877 + CHECK_POINT 1.878 + this_offset = base_offset + (this_mv.row * (in_what_stride)) + 1.879 + this_mv.col; 1.880 + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 1.881 + bestsad); 1.882 + CHECK_BETTER 1.883 + } 1.884 + } 1.885 + 1.886 + if (best_site != -1) { 1.887 + k = next_chkpts_indices[best_site]; 1.888 + br += candidates[s][k].row; 1.889 + bc += candidates[s][k].col; 1.890 + } 1.891 + } while (best_site != -1); 1.892 + } while (s--); 1.893 + } 1.894 + 1.895 + // Check 4 1-away neighbors if do_refine is true. 1.896 + // For most well-designed schemes do_refine will not be necessary. 1.897 + if (do_refine) { 1.898 + static const MV neighbors[4] = { 1.899 + {0, -1}, { -1, 0}, {1, 0}, {0, 1}, 1.900 + }; 1.901 + for (j = 0; j < 16; j++) { 1.902 + best_site = -1; 1.903 + CHECK_BOUNDS(1) 1.904 + if (all_in) { 1.905 + for (i = 0; i < 4; i++) { 1.906 + this_mv.row = br + neighbors[i].row; 1.907 + this_mv.col = bc + neighbors[i].col; 1.908 + this_offset = base_offset + (this_mv.row * (in_what_stride)) + 1.909 + this_mv.col; 1.910 + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 1.911 + bestsad); 1.912 + CHECK_BETTER 1.913 + } 1.914 + } else { 1.915 + for (i = 0; i < 4; i++) { 1.916 + this_mv.row = br + neighbors[i].row; 1.917 + this_mv.col = bc + neighbors[i].col; 1.918 + CHECK_POINT 1.919 + this_offset = base_offset + (this_mv.row * (in_what_stride)) + 1.920 + this_mv.col; 1.921 + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 1.922 + bestsad); 1.923 + CHECK_BETTER 1.924 + } 1.925 + } 1.926 + 1.927 + if (best_site == -1) { 1.928 + break; 1.929 + } else { 1.930 + br += neighbors[best_site].row; 1.931 + bc += neighbors[best_site].col; 1.932 + } 1.933 + } 1.934 + } 1.935 + 1.936 + best_mv->row = br; 1.937 + best_mv->col = bc; 1.938 + 1.939 + this_offset = base_offset + (best_mv->row * in_what_stride) + 1.940 + best_mv->col; 1.941 + this_mv.row = best_mv->row * 8; 1.942 + this_mv.col = best_mv->col * 8; 1.943 + if (bestsad == INT_MAX) 1.944 + return INT_MAX; 1.945 + 1.946 + return vfp->vf(what, what_stride, this_offset, in_what_stride, 1.947 + (unsigned int *)&bestsad) + 1.948 + use_mvcost ? mv_err_cost(&this_mv, center_mv, 1.949 + x->nmvjointcost, x->mvcost, x->errorperbit) 1.950 + : 0; 1.951 +} 1.952 + 1.953 + 1.954 +int vp9_hex_search(MACROBLOCK *x, 1.955 + MV *ref_mv, 1.956 + int search_param, 1.957 + int sad_per_bit, 1.958 + int do_init_search, 1.959 + const vp9_variance_fn_ptr_t *vfp, 1.960 + int use_mvcost, 1.961 + const MV *center_mv, MV *best_mv) { 1.962 + // First scale has 8-closest points, the rest have 6 points in hex shape 1.963 + // at increasing scales 1.964 + static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 1.965 + 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 1.966 + }; 1.967 + // Note that the largest candidate step at each scale is 2^scale 1.968 + static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { 1.969 + {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}}, 1.970 + {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}}, 1.971 + {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}}, 1.972 + {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}}, 1.973 + {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}}, 1.974 + {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}}, 1.975 + {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}}, 1.976 + {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}}, 1.977 + {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}}, 1.978 + {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}}, 1.979 + {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024}, 1.980 + { -1024, 0}}, 1.981 + }; 1.982 + return 1.983 + vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, 1.984 + do_init_search, 0, vfp, use_mvcost, 1.985 + center_mv, best_mv, 1.986 + hex_num_candidates, hex_candidates); 1.987 +} 1.988 + 1.989 +int vp9_bigdia_search(MACROBLOCK *x, 1.990 + MV *ref_mv, 1.991 + int search_param, 1.992 + int sad_per_bit, 1.993 + int do_init_search, 1.994 + const vp9_variance_fn_ptr_t *vfp, 1.995 + int use_mvcost, 1.996 + const MV *center_mv, 1.997 + MV *best_mv) { 1.998 + // First scale has 4-closest points, the rest have 8 points in diamond 1.999 + // shape at increasing scales 1.1000 + static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = { 1.1001 + 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1.1002 + }; 1.1003 + // Note that the largest candidate step at each scale is 2^scale 1.1004 + static const MV bigdia_candidates[MAX_PATTERN_SCALES] 1.1005 + [MAX_PATTERN_CANDIDATES] = { 1.1006 + {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}}, 1.1007 + {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}}, 1.1008 + {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}}, 1.1009 + {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}}, 1.1010 + {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}}, 1.1011 + {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32}, 1.1012 + {-16, 16}, {-32, 0}}, 1.1013 + {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64}, 1.1014 + {-32, 32}, {-64, 0}}, 1.1015 + {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128}, 1.1016 + {-64, 64}, {-128, 0}}, 1.1017 + {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256}, 1.1018 + {-128, 128}, {-256, 0}}, 1.1019 + {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512}, 1.1020 + {-256, 256}, {-512, 0}}, 1.1021 + {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024}, 1.1022 + {-512, 512}, {-1024, 0}}, 1.1023 + }; 1.1024 + return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, 1.1025 + do_init_search, 0, vfp, use_mvcost, 1.1026 + center_mv, best_mv, 1.1027 + bigdia_num_candidates, bigdia_candidates); 1.1028 +} 1.1029 + 1.1030 +int vp9_square_search(MACROBLOCK *x, 1.1031 + MV *ref_mv, 1.1032 + int search_param, 1.1033 + int sad_per_bit, 1.1034 + int do_init_search, 1.1035 + const vp9_variance_fn_ptr_t *vfp, 1.1036 + int use_mvcost, 1.1037 + const MV *center_mv, 1.1038 + MV *best_mv) { 1.1039 + // All scales have 8 closest points in square shape 1.1040 + static const int square_num_candidates[MAX_PATTERN_SCALES] = { 1.1041 + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1.1042 + }; 1.1043 + // Note that the largest candidate step at each scale is 2^scale 1.1044 + static const MV square_candidates[MAX_PATTERN_SCALES] 1.1045 + [MAX_PATTERN_CANDIDATES] = { 1.1046 + {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}}, 1.1047 + {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}}, 1.1048 + {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}}, 1.1049 + {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}}, 1.1050 + {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16}, 1.1051 + {-16, 16}, {-16, 0}}, 1.1052 + {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32}, 1.1053 + {-32, 32}, {-32, 0}}, 1.1054 + {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64}, 1.1055 + {-64, 64}, {-64, 0}}, 1.1056 + {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128}, 1.1057 + {-128, 128}, {-128, 0}}, 1.1058 + {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256}, 1.1059 + {-256, 256}, {-256, 0}}, 1.1060 + {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512}, 1.1061 + {-512, 512}, {-512, 0}}, 1.1062 + {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024}, 1.1063 + {0, 1024}, {-1024, 1024}, {-1024, 0}}, 1.1064 + }; 1.1065 + return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, 1.1066 + do_init_search, 0, vfp, use_mvcost, 1.1067 + center_mv, best_mv, 1.1068 + square_num_candidates, square_candidates); 1.1069 +}; 1.1070 + 1.1071 +#undef CHECK_BOUNDS 1.1072 +#undef CHECK_POINT 1.1073 +#undef CHECK_BETTER 1.1074 + 1.1075 +int vp9_diamond_search_sad_c(MACROBLOCK *x, 1.1076 + int_mv *ref_mv, int_mv *best_mv, 1.1077 + int search_param, int sad_per_bit, int *num00, 1.1078 + vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, 1.1079 + int *mvcost[2], int_mv *center_mv) { 1.1080 + int i, j, step; 1.1081 + 1.1082 + const MACROBLOCKD* const xd = &x->e_mbd; 1.1083 + uint8_t *what = x->plane[0].src.buf; 1.1084 + int what_stride = x->plane[0].src.stride; 1.1085 + uint8_t *in_what; 1.1086 + int in_what_stride = xd->plane[0].pre[0].stride; 1.1087 + uint8_t *best_address; 1.1088 + 1.1089 + int tot_steps; 1.1090 + int_mv this_mv; 1.1091 + 1.1092 + int bestsad = INT_MAX; 1.1093 + int best_site = 0; 1.1094 + int last_site = 0; 1.1095 + 1.1096 + int ref_row, ref_col; 1.1097 + int this_row_offset, this_col_offset; 1.1098 + search_site *ss; 1.1099 + 1.1100 + uint8_t *check_here; 1.1101 + int thissad; 1.1102 + int_mv fcenter_mv; 1.1103 + 1.1104 + int *mvjsadcost = x->nmvjointsadcost; 1.1105 + int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1.1106 + 1.1107 + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1.1108 + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1.1109 + 1.1110 + clamp_mv(&ref_mv->as_mv, 1.1111 + x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 1.1112 + ref_row = ref_mv->as_mv.row; 1.1113 + ref_col = ref_mv->as_mv.col; 1.1114 + *num00 = 0; 1.1115 + best_mv->as_mv.row = ref_row; 1.1116 + best_mv->as_mv.col = ref_col; 1.1117 + 1.1118 + // Work out the start point for the search 1.1119 + in_what = (uint8_t *)(xd->plane[0].pre[0].buf + 1.1120 + (ref_row * (xd->plane[0].pre[0].stride)) + ref_col); 1.1121 + best_address = in_what; 1.1122 + 1.1123 + // Check the starting position 1.1124 + bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) 1.1125 + + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, 1.1126 + mvjsadcost, mvsadcost, sad_per_bit); 1.1127 + 1.1128 + // search_param determines the length of the initial step and hence the number 1.1129 + // of iterations 1.1130 + // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = 1.1131 + // (MAX_FIRST_STEP/4) pel... etc. 1.1132 + ss = &x->ss[search_param * x->searches_per_step]; 1.1133 + tot_steps = (x->ss_count / x->searches_per_step) - search_param; 1.1134 + 1.1135 + i = 1; 1.1136 + 1.1137 + for (step = 0; step < tot_steps; step++) { 1.1138 + for (j = 0; j < x->searches_per_step; j++) { 1.1139 + // Trap illegal vectors 1.1140 + this_row_offset = best_mv->as_mv.row + ss[i].mv.row; 1.1141 + this_col_offset = best_mv->as_mv.col + ss[i].mv.col; 1.1142 + 1.1143 + if ((this_col_offset > x->mv_col_min) && 1.1144 + (this_col_offset < x->mv_col_max) && 1.1145 + (this_row_offset > x->mv_row_min) && 1.1146 + (this_row_offset < x->mv_row_max)) { 1.1147 + check_here = ss[i].offset + best_address; 1.1148 + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1.1149 + bestsad); 1.1150 + 1.1151 + if (thissad < bestsad) { 1.1152 + this_mv.as_mv.row = this_row_offset; 1.1153 + this_mv.as_mv.col = this_col_offset; 1.1154 + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1.1155 + mvjsadcost, mvsadcost, sad_per_bit); 1.1156 + 1.1157 + if (thissad < bestsad) { 1.1158 + bestsad = thissad; 1.1159 + best_site = i; 1.1160 + } 1.1161 + } 1.1162 + } 1.1163 + 1.1164 + i++; 1.1165 + } 1.1166 + 1.1167 + if (best_site != last_site) { 1.1168 + best_mv->as_mv.row += ss[best_site].mv.row; 1.1169 + best_mv->as_mv.col += ss[best_site].mv.col; 1.1170 + best_address += ss[best_site].offset; 1.1171 + last_site = best_site; 1.1172 +#if defined(NEW_DIAMOND_SEARCH) 1.1173 + while (1) { 1.1174 + this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row; 1.1175 + this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col; 1.1176 + if ((this_col_offset > x->mv_col_min) && 1.1177 + (this_col_offset < x->mv_col_max) && 1.1178 + (this_row_offset > x->mv_row_min) && 1.1179 + (this_row_offset < x->mv_row_max)) { 1.1180 + check_here = ss[best_site].offset + best_address; 1.1181 + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1.1182 + bestsad); 1.1183 + if (thissad < bestsad) { 1.1184 + this_mv.as_mv.row = this_row_offset; 1.1185 + this_mv.as_mv.col = this_col_offset; 1.1186 + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1.1187 + mvjsadcost, mvsadcost, sad_per_bit); 1.1188 + if (thissad < bestsad) { 1.1189 + bestsad = thissad; 1.1190 + best_mv->as_mv.row += ss[best_site].mv.row; 1.1191 + best_mv->as_mv.col += ss[best_site].mv.col; 1.1192 + best_address += ss[best_site].offset; 1.1193 + continue; 1.1194 + } 1.1195 + } 1.1196 + } 1.1197 + break; 1.1198 + }; 1.1199 +#endif 1.1200 + } else if (best_address == in_what) { 1.1201 + (*num00)++; 1.1202 + } 1.1203 + } 1.1204 + 1.1205 + this_mv.as_mv.row = best_mv->as_mv.row * 8; 1.1206 + this_mv.as_mv.col = best_mv->as_mv.col * 8; 1.1207 + 1.1208 + if (bestsad == INT_MAX) 1.1209 + return INT_MAX; 1.1210 + 1.1211 + return fn_ptr->vf(what, what_stride, best_address, in_what_stride, 1.1212 + (unsigned int *)(&thissad)) + 1.1213 + mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, 1.1214 + mvjcost, mvcost, x->errorperbit); 1.1215 +} 1.1216 + 1.1217 +int vp9_diamond_search_sadx4(MACROBLOCK *x, 1.1218 + int_mv *ref_mv, int_mv *best_mv, int search_param, 1.1219 + int sad_per_bit, int *num00, 1.1220 + vp9_variance_fn_ptr_t *fn_ptr, 1.1221 + int *mvjcost, int *mvcost[2], int_mv *center_mv) { 1.1222 + int i, j, step; 1.1223 + 1.1224 + const MACROBLOCKD* const xd = &x->e_mbd; 1.1225 + uint8_t *what = x->plane[0].src.buf; 1.1226 + int what_stride = x->plane[0].src.stride; 1.1227 + uint8_t *in_what; 1.1228 + int in_what_stride = xd->plane[0].pre[0].stride; 1.1229 + uint8_t *best_address; 1.1230 + 1.1231 + int tot_steps; 1.1232 + int_mv this_mv; 1.1233 + 1.1234 + unsigned int bestsad = INT_MAX; 1.1235 + int best_site = 0; 1.1236 + int last_site = 0; 1.1237 + 1.1238 + int ref_row; 1.1239 + int ref_col; 1.1240 + int this_row_offset; 1.1241 + int this_col_offset; 1.1242 + search_site *ss; 1.1243 + 1.1244 + uint8_t *check_here; 1.1245 + unsigned int thissad; 1.1246 + int_mv fcenter_mv; 1.1247 + 1.1248 + int *mvjsadcost = x->nmvjointsadcost; 1.1249 + int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1.1250 + 1.1251 + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1.1252 + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1.1253 + 1.1254 + clamp_mv(&ref_mv->as_mv, 1.1255 + x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 1.1256 + ref_row = ref_mv->as_mv.row; 1.1257 + ref_col = ref_mv->as_mv.col; 1.1258 + *num00 = 0; 1.1259 + best_mv->as_mv.row = ref_row; 1.1260 + best_mv->as_mv.col = ref_col; 1.1261 + 1.1262 + // Work out the start point for the search 1.1263 + in_what = (uint8_t *)(xd->plane[0].pre[0].buf + 1.1264 + (ref_row * (xd->plane[0].pre[0].stride)) + ref_col); 1.1265 + best_address = in_what; 1.1266 + 1.1267 + // Check the starting position 1.1268 + bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) 1.1269 + + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, 1.1270 + mvjsadcost, mvsadcost, sad_per_bit); 1.1271 + 1.1272 + // search_param determines the length of the initial step and hence the number 1.1273 + // of iterations. 1.1274 + // 0 = initial step (MAX_FIRST_STEP) pel 1.1275 + // 1 = (MAX_FIRST_STEP/2) pel, 1.1276 + // 2 = (MAX_FIRST_STEP/4) pel... 1.1277 + ss = &x->ss[search_param * x->searches_per_step]; 1.1278 + tot_steps = (x->ss_count / x->searches_per_step) - search_param; 1.1279 + 1.1280 + i = 1; 1.1281 + 1.1282 + for (step = 0; step < tot_steps; step++) { 1.1283 + int all_in = 1, t; 1.1284 + 1.1285 + // All_in is true if every one of the points we are checking are within 1.1286 + // the bounds of the image. 1.1287 + all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min); 1.1288 + all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max); 1.1289 + all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min); 1.1290 + all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max); 1.1291 + 1.1292 + // If all the pixels are within the bounds we don't check whether the 1.1293 + // search point is valid in this loop, otherwise we check each point 1.1294 + // for validity.. 1.1295 + if (all_in) { 1.1296 + unsigned int sad_array[4]; 1.1297 + 1.1298 + for (j = 0; j < x->searches_per_step; j += 4) { 1.1299 + unsigned char const *block_offset[4]; 1.1300 + 1.1301 + for (t = 0; t < 4; t++) 1.1302 + block_offset[t] = ss[i + t].offset + best_address; 1.1303 + 1.1304 + fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, 1.1305 + sad_array); 1.1306 + 1.1307 + for (t = 0; t < 4; t++, i++) { 1.1308 + if (sad_array[t] < bestsad) { 1.1309 + this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row; 1.1310 + this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col; 1.1311 + sad_array[t] += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1.1312 + mvjsadcost, mvsadcost, sad_per_bit); 1.1313 + 1.1314 + if (sad_array[t] < bestsad) { 1.1315 + bestsad = sad_array[t]; 1.1316 + best_site = i; 1.1317 + } 1.1318 + } 1.1319 + } 1.1320 + } 1.1321 + } else { 1.1322 + for (j = 0; j < x->searches_per_step; j++) { 1.1323 + // Trap illegal vectors 1.1324 + this_row_offset = best_mv->as_mv.row + ss[i].mv.row; 1.1325 + this_col_offset = best_mv->as_mv.col + ss[i].mv.col; 1.1326 + 1.1327 + if ((this_col_offset > x->mv_col_min) && 1.1328 + (this_col_offset < x->mv_col_max) && 1.1329 + (this_row_offset > x->mv_row_min) && 1.1330 + (this_row_offset < x->mv_row_max)) { 1.1331 + check_here = ss[i].offset + best_address; 1.1332 + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1.1333 + bestsad); 1.1334 + 1.1335 + if (thissad < bestsad) { 1.1336 + this_mv.as_mv.row = this_row_offset; 1.1337 + this_mv.as_mv.col = this_col_offset; 1.1338 + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1.1339 + mvjsadcost, mvsadcost, sad_per_bit); 1.1340 + 1.1341 + if (thissad < bestsad) { 1.1342 + bestsad = thissad; 1.1343 + best_site = i; 1.1344 + } 1.1345 + } 1.1346 + } 1.1347 + i++; 1.1348 + } 1.1349 + } 1.1350 + if (best_site != last_site) { 1.1351 + best_mv->as_mv.row += ss[best_site].mv.row; 1.1352 + best_mv->as_mv.col += ss[best_site].mv.col; 1.1353 + best_address += ss[best_site].offset; 1.1354 + last_site = best_site; 1.1355 +#if defined(NEW_DIAMOND_SEARCH) 1.1356 + while (1) { 1.1357 + this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row; 1.1358 + this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col; 1.1359 + if ((this_col_offset > x->mv_col_min) && 1.1360 + (this_col_offset < x->mv_col_max) && 1.1361 + (this_row_offset > x->mv_row_min) && 1.1362 + (this_row_offset < x->mv_row_max)) { 1.1363 + check_here = ss[best_site].offset + best_address; 1.1364 + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1.1365 + bestsad); 1.1366 + if (thissad < bestsad) { 1.1367 + this_mv.as_mv.row = this_row_offset; 1.1368 + this_mv.as_mv.col = this_col_offset; 1.1369 + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1.1370 + mvjsadcost, mvsadcost, sad_per_bit); 1.1371 + if (thissad < bestsad) { 1.1372 + bestsad = thissad; 1.1373 + best_mv->as_mv.row += ss[best_site].mv.row; 1.1374 + best_mv->as_mv.col += ss[best_site].mv.col; 1.1375 + best_address += ss[best_site].offset; 1.1376 + continue; 1.1377 + } 1.1378 + } 1.1379 + } 1.1380 + break; 1.1381 + }; 1.1382 +#endif 1.1383 + } else if (best_address == in_what) { 1.1384 + (*num00)++; 1.1385 + } 1.1386 + } 1.1387 + 1.1388 + this_mv.as_mv.row = best_mv->as_mv.row * 8; 1.1389 + this_mv.as_mv.col = best_mv->as_mv.col * 8; 1.1390 + 1.1391 + if (bestsad == INT_MAX) 1.1392 + return INT_MAX; 1.1393 + 1.1394 + return fn_ptr->vf(what, what_stride, best_address, in_what_stride, 1.1395 + (unsigned int *)(&thissad)) + 1.1396 + mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, 1.1397 + mvjcost, mvcost, x->errorperbit); 1.1398 +} 1.1399 + 1.1400 +/* do_refine: If last step (1-away) of n-step search doesn't pick the center 1.1401 + point as the best match, we will do a final 1-away diamond 1.1402 + refining search */ 1.1403 + 1.1404 +int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, 1.1405 + int_mv *mvp_full, int step_param, 1.1406 + int sadpb, int further_steps, 1.1407 + int do_refine, vp9_variance_fn_ptr_t *fn_ptr, 1.1408 + int_mv *ref_mv, int_mv *dst_mv) { 1.1409 + int_mv temp_mv; 1.1410 + int thissme, n, num00; 1.1411 + int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, 1.1412 + step_param, sadpb, &num00, 1.1413 + fn_ptr, x->nmvjointcost, 1.1414 + x->mvcost, ref_mv); 1.1415 + dst_mv->as_int = temp_mv.as_int; 1.1416 + 1.1417 + n = num00; 1.1418 + num00 = 0; 1.1419 + 1.1420 + /* If there won't be more n-step search, check to see if refining search is 1.1421 + * needed. */ 1.1422 + if (n > further_steps) 1.1423 + do_refine = 0; 1.1424 + 1.1425 + while (n < further_steps) { 1.1426 + n++; 1.1427 + 1.1428 + if (num00) { 1.1429 + num00--; 1.1430 + } else { 1.1431 + thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, 1.1432 + step_param + n, sadpb, &num00, 1.1433 + fn_ptr, x->nmvjointcost, x->mvcost, 1.1434 + ref_mv); 1.1435 + 1.1436 + /* check to see if refining search is needed. */ 1.1437 + if (num00 > (further_steps - n)) 1.1438 + do_refine = 0; 1.1439 + 1.1440 + if (thissme < bestsme) { 1.1441 + bestsme = thissme; 1.1442 + dst_mv->as_int = temp_mv.as_int; 1.1443 + } 1.1444 + } 1.1445 + } 1.1446 + 1.1447 + /* final 1-away diamond refining search */ 1.1448 + if (do_refine == 1) { 1.1449 + int search_range = 8; 1.1450 + int_mv best_mv; 1.1451 + best_mv.as_int = dst_mv->as_int; 1.1452 + thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range, 1.1453 + fn_ptr, x->nmvjointcost, x->mvcost, 1.1454 + ref_mv); 1.1455 + 1.1456 + if (thissme < bestsme) { 1.1457 + bestsme = thissme; 1.1458 + dst_mv->as_int = best_mv.as_int; 1.1459 + } 1.1460 + } 1.1461 + return bestsme; 1.1462 +} 1.1463 + 1.1464 +int vp9_full_search_sad_c(MACROBLOCK *x, int_mv *ref_mv, 1.1465 + int sad_per_bit, int distance, 1.1466 + vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, 1.1467 + int *mvcost[2], 1.1468 + int_mv *center_mv, int n) { 1.1469 + const MACROBLOCKD* const xd = &x->e_mbd; 1.1470 + uint8_t *what = x->plane[0].src.buf; 1.1471 + int what_stride = x->plane[0].src.stride; 1.1472 + uint8_t *in_what; 1.1473 + int in_what_stride = xd->plane[0].pre[0].stride; 1.1474 + int mv_stride = xd->plane[0].pre[0].stride; 1.1475 + uint8_t *bestaddress; 1.1476 + int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0]; 1.1477 + int_mv this_mv; 1.1478 + int bestsad = INT_MAX; 1.1479 + int r, c; 1.1480 + 1.1481 + uint8_t *check_here; 1.1482 + int thissad; 1.1483 + 1.1484 + int ref_row = ref_mv->as_mv.row; 1.1485 + int ref_col = ref_mv->as_mv.col; 1.1486 + 1.1487 + int row_min = ref_row - distance; 1.1488 + int row_max = ref_row + distance; 1.1489 + int col_min = ref_col - distance; 1.1490 + int col_max = ref_col + distance; 1.1491 + int_mv fcenter_mv; 1.1492 + 1.1493 + int *mvjsadcost = x->nmvjointsadcost; 1.1494 + int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1.1495 + 1.1496 + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1.1497 + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1.1498 + 1.1499 + // Work out the mid point for the search 1.1500 + in_what = xd->plane[0].pre[0].buf; 1.1501 + bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; 1.1502 + 1.1503 + best_mv->as_mv.row = ref_row; 1.1504 + best_mv->as_mv.col = ref_col; 1.1505 + 1.1506 + // Baseline value at the centre 1.1507 + bestsad = fn_ptr->sdf(what, what_stride, bestaddress, 1.1508 + in_what_stride, 0x7fffffff) 1.1509 + + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, 1.1510 + mvjsadcost, mvsadcost, sad_per_bit); 1.1511 + 1.1512 + // Apply further limits to prevent us looking using vectors that stretch 1.1513 + // beyond the UMV border 1.1514 + col_min = MAX(col_min, x->mv_col_min); 1.1515 + col_max = MIN(col_max, x->mv_col_max); 1.1516 + row_min = MAX(row_min, x->mv_row_min); 1.1517 + row_max = MIN(row_max, x->mv_row_max); 1.1518 + 1.1519 + for (r = row_min; r < row_max; r++) { 1.1520 + this_mv.as_mv.row = r; 1.1521 + check_here = r * mv_stride + in_what + col_min; 1.1522 + 1.1523 + for (c = col_min; c < col_max; c++) { 1.1524 + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1.1525 + bestsad); 1.1526 + 1.1527 + this_mv.as_mv.col = c; 1.1528 + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1.1529 + mvjsadcost, mvsadcost, sad_per_bit); 1.1530 + 1.1531 + if (thissad < bestsad) { 1.1532 + bestsad = thissad; 1.1533 + best_mv->as_mv.row = r; 1.1534 + best_mv->as_mv.col = c; 1.1535 + bestaddress = check_here; 1.1536 + } 1.1537 + 1.1538 + check_here++; 1.1539 + } 1.1540 + } 1.1541 + 1.1542 + this_mv.as_mv.row = best_mv->as_mv.row * 8; 1.1543 + this_mv.as_mv.col = best_mv->as_mv.col * 8; 1.1544 + 1.1545 + if (bestsad < INT_MAX) 1.1546 + return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, 1.1547 + (unsigned int *)(&thissad)) + 1.1548 + mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, 1.1549 + mvjcost, mvcost, x->errorperbit); 1.1550 + else 1.1551 + return INT_MAX; 1.1552 +} 1.1553 + 1.1554 +int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv, 1.1555 + int sad_per_bit, int distance, 1.1556 + vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, 1.1557 + int *mvcost[2], int_mv *center_mv, int n) { 1.1558 + const MACROBLOCKD* const xd = &x->e_mbd; 1.1559 + uint8_t *what = x->plane[0].src.buf; 1.1560 + int what_stride = x->plane[0].src.stride; 1.1561 + uint8_t *in_what; 1.1562 + int in_what_stride = xd->plane[0].pre[0].stride; 1.1563 + int mv_stride = xd->plane[0].pre[0].stride; 1.1564 + uint8_t *bestaddress; 1.1565 + int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0]; 1.1566 + int_mv this_mv; 1.1567 + unsigned int bestsad = INT_MAX; 1.1568 + int r, c; 1.1569 + 1.1570 + uint8_t *check_here; 1.1571 + unsigned int thissad; 1.1572 + 1.1573 + int ref_row = ref_mv->as_mv.row; 1.1574 + int ref_col = ref_mv->as_mv.col; 1.1575 + 1.1576 + int row_min = ref_row - distance; 1.1577 + int row_max = ref_row + distance; 1.1578 + int col_min = ref_col - distance; 1.1579 + int col_max = ref_col + distance; 1.1580 + 1.1581 + unsigned int sad_array[3]; 1.1582 + int_mv fcenter_mv; 1.1583 + 1.1584 + int *mvjsadcost = x->nmvjointsadcost; 1.1585 + int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1.1586 + 1.1587 + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1.1588 + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1.1589 + 1.1590 + // Work out the mid point for the search 1.1591 + in_what = xd->plane[0].pre[0].buf; 1.1592 + bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; 1.1593 + 1.1594 + best_mv->as_mv.row = ref_row; 1.1595 + best_mv->as_mv.col = ref_col; 1.1596 + 1.1597 + // Baseline value at the centre 1.1598 + bestsad = fn_ptr->sdf(what, what_stride, 1.1599 + bestaddress, in_what_stride, 0x7fffffff) 1.1600 + + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, 1.1601 + mvjsadcost, mvsadcost, sad_per_bit); 1.1602 + 1.1603 + // Apply further limits to prevent us looking using vectors that stretch 1.1604 + // beyond the UMV border 1.1605 + col_min = MAX(col_min, x->mv_col_min); 1.1606 + col_max = MIN(col_max, x->mv_col_max); 1.1607 + row_min = MAX(row_min, x->mv_row_min); 1.1608 + row_max = MIN(row_max, x->mv_row_max); 1.1609 + 1.1610 + for (r = row_min; r < row_max; r++) { 1.1611 + this_mv.as_mv.row = r; 1.1612 + check_here = r * mv_stride + in_what + col_min; 1.1613 + c = col_min; 1.1614 + 1.1615 + while ((c + 2) < col_max) { 1.1616 + int i; 1.1617 + 1.1618 + fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); 1.1619 + 1.1620 + for (i = 0; i < 3; i++) { 1.1621 + thissad = sad_array[i]; 1.1622 + 1.1623 + if (thissad < bestsad) { 1.1624 + this_mv.as_mv.col = c; 1.1625 + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1.1626 + mvjsadcost, mvsadcost, sad_per_bit); 1.1627 + 1.1628 + if (thissad < bestsad) { 1.1629 + bestsad = thissad; 1.1630 + best_mv->as_mv.row = r; 1.1631 + best_mv->as_mv.col = c; 1.1632 + bestaddress = check_here; 1.1633 + } 1.1634 + } 1.1635 + 1.1636 + check_here++; 1.1637 + c++; 1.1638 + } 1.1639 + } 1.1640 + 1.1641 + while (c < col_max) { 1.1642 + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1.1643 + bestsad); 1.1644 + 1.1645 + if (thissad < bestsad) { 1.1646 + this_mv.as_mv.col = c; 1.1647 + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1.1648 + mvjsadcost, mvsadcost, sad_per_bit); 1.1649 + 1.1650 + if (thissad < bestsad) { 1.1651 + bestsad = thissad; 1.1652 + best_mv->as_mv.row = r; 1.1653 + best_mv->as_mv.col = c; 1.1654 + bestaddress = check_here; 1.1655 + } 1.1656 + } 1.1657 + 1.1658 + check_here++; 1.1659 + c++; 1.1660 + } 1.1661 + } 1.1662 + 1.1663 + this_mv.as_mv.row = best_mv->as_mv.row * 8; 1.1664 + this_mv.as_mv.col = best_mv->as_mv.col * 8; 1.1665 + 1.1666 + if (bestsad < INT_MAX) 1.1667 + return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, 1.1668 + (unsigned int *)(&thissad)) + 1.1669 + mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, 1.1670 + mvjcost, mvcost, x->errorperbit); 1.1671 + else 1.1672 + return INT_MAX; 1.1673 +} 1.1674 + 1.1675 +int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv, 1.1676 + int sad_per_bit, int distance, 1.1677 + vp9_variance_fn_ptr_t *fn_ptr, 1.1678 + int *mvjcost, int *mvcost[2], 1.1679 + int_mv *center_mv, int n) { 1.1680 + const MACROBLOCKD* const xd = &x->e_mbd; 1.1681 + uint8_t *what = x->plane[0].src.buf; 1.1682 + int what_stride = x->plane[0].src.stride; 1.1683 + uint8_t *in_what; 1.1684 + int in_what_stride = xd->plane[0].pre[0].stride; 1.1685 + int mv_stride = xd->plane[0].pre[0].stride; 1.1686 + uint8_t *bestaddress; 1.1687 + int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0]; 1.1688 + int_mv this_mv; 1.1689 + unsigned int bestsad = INT_MAX; 1.1690 + int r, c; 1.1691 + 1.1692 + uint8_t *check_here; 1.1693 + unsigned int thissad; 1.1694 + 1.1695 + int ref_row = ref_mv->as_mv.row; 1.1696 + int ref_col = ref_mv->as_mv.col; 1.1697 + 1.1698 + int row_min = ref_row - distance; 1.1699 + int row_max = ref_row + distance; 1.1700 + int col_min = ref_col - distance; 1.1701 + int col_max = ref_col + distance; 1.1702 + 1.1703 + DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8); 1.1704 + unsigned int sad_array[3]; 1.1705 + int_mv fcenter_mv; 1.1706 + 1.1707 + int *mvjsadcost = x->nmvjointsadcost; 1.1708 + int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1.1709 + 1.1710 + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1.1711 + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1.1712 + 1.1713 + // Work out the mid point for the search 1.1714 + in_what = xd->plane[0].pre[0].buf; 1.1715 + bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; 1.1716 + 1.1717 + best_mv->as_mv.row = ref_row; 1.1718 + best_mv->as_mv.col = ref_col; 1.1719 + 1.1720 + // Baseline value at the centre 1.1721 + bestsad = fn_ptr->sdf(what, what_stride, 1.1722 + bestaddress, in_what_stride, 0x7fffffff) 1.1723 + + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, 1.1724 + mvjsadcost, mvsadcost, sad_per_bit); 1.1725 + 1.1726 + // Apply further limits to prevent us looking using vectors that stretch 1.1727 + // beyond the UMV border 1.1728 + col_min = MAX(col_min, x->mv_col_min); 1.1729 + col_max = MIN(col_max, x->mv_col_max); 1.1730 + row_min = MAX(row_min, x->mv_row_min); 1.1731 + row_max = MIN(row_max, x->mv_row_max); 1.1732 + 1.1733 + for (r = row_min; r < row_max; r++) { 1.1734 + this_mv.as_mv.row = r; 1.1735 + check_here = r * mv_stride + in_what + col_min; 1.1736 + c = col_min; 1.1737 + 1.1738 + while ((c + 7) < col_max) { 1.1739 + int i; 1.1740 + 1.1741 + fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); 1.1742 + 1.1743 + for (i = 0; i < 8; i++) { 1.1744 + thissad = (unsigned int)sad_array8[i]; 1.1745 + 1.1746 + if (thissad < bestsad) { 1.1747 + this_mv.as_mv.col = c; 1.1748 + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1.1749 + mvjsadcost, mvsadcost, sad_per_bit); 1.1750 + 1.1751 + if (thissad < bestsad) { 1.1752 + bestsad = thissad; 1.1753 + best_mv->as_mv.row = r; 1.1754 + best_mv->as_mv.col = c; 1.1755 + bestaddress = check_here; 1.1756 + } 1.1757 + } 1.1758 + 1.1759 + check_here++; 1.1760 + c++; 1.1761 + } 1.1762 + } 1.1763 + 1.1764 + while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) { 1.1765 + int i; 1.1766 + 1.1767 + fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); 1.1768 + 1.1769 + for (i = 0; i < 3; i++) { 1.1770 + thissad = sad_array[i]; 1.1771 + 1.1772 + if (thissad < bestsad) { 1.1773 + this_mv.as_mv.col = c; 1.1774 + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1.1775 + mvjsadcost, mvsadcost, sad_per_bit); 1.1776 + 1.1777 + if (thissad < bestsad) { 1.1778 + bestsad = thissad; 1.1779 + best_mv->as_mv.row = r; 1.1780 + best_mv->as_mv.col = c; 1.1781 + bestaddress = check_here; 1.1782 + } 1.1783 + } 1.1784 + 1.1785 + check_here++; 1.1786 + c++; 1.1787 + } 1.1788 + } 1.1789 + 1.1790 + while (c < col_max) { 1.1791 + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1.1792 + bestsad); 1.1793 + 1.1794 + if (thissad < bestsad) { 1.1795 + this_mv.as_mv.col = c; 1.1796 + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1.1797 + mvjsadcost, mvsadcost, sad_per_bit); 1.1798 + 1.1799 + if (thissad < bestsad) { 1.1800 + bestsad = thissad; 1.1801 + best_mv->as_mv.row = r; 1.1802 + best_mv->as_mv.col = c; 1.1803 + bestaddress = check_here; 1.1804 + } 1.1805 + } 1.1806 + 1.1807 + check_here++; 1.1808 + c++; 1.1809 + } 1.1810 + } 1.1811 + 1.1812 + this_mv.as_mv.row = best_mv->as_mv.row * 8; 1.1813 + this_mv.as_mv.col = best_mv->as_mv.col * 8; 1.1814 + 1.1815 + if (bestsad < INT_MAX) 1.1816 + return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, 1.1817 + (unsigned int *)(&thissad)) + 1.1818 + mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, 1.1819 + mvjcost, mvcost, x->errorperbit); 1.1820 + else 1.1821 + return INT_MAX; 1.1822 +} 1.1823 +int vp9_refining_search_sad_c(MACROBLOCK *x, 1.1824 + int_mv *ref_mv, int error_per_bit, 1.1825 + int search_range, vp9_variance_fn_ptr_t *fn_ptr, 1.1826 + int *mvjcost, int *mvcost[2], int_mv *center_mv) { 1.1827 + const MACROBLOCKD* const xd = &x->e_mbd; 1.1828 + MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; 1.1829 + int i, j; 1.1830 + int this_row_offset, this_col_offset; 1.1831 + 1.1832 + int what_stride = x->plane[0].src.stride; 1.1833 + int in_what_stride = xd->plane[0].pre[0].stride; 1.1834 + uint8_t *what = x->plane[0].src.buf; 1.1835 + uint8_t *best_address = xd->plane[0].pre[0].buf + 1.1836 + (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + 1.1837 + ref_mv->as_mv.col; 1.1838 + uint8_t *check_here; 1.1839 + unsigned int thissad; 1.1840 + int_mv this_mv; 1.1841 + unsigned int bestsad = INT_MAX; 1.1842 + int_mv fcenter_mv; 1.1843 + 1.1844 + int *mvjsadcost = x->nmvjointsadcost; 1.1845 + int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1.1846 + 1.1847 + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1.1848 + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1.1849 + 1.1850 + bestsad = fn_ptr->sdf(what, what_stride, best_address, 1.1851 + in_what_stride, 0x7fffffff) + 1.1852 + mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv, 1.1853 + mvjsadcost, mvsadcost, error_per_bit); 1.1854 + 1.1855 + for (i = 0; i < search_range; i++) { 1.1856 + int best_site = -1; 1.1857 + 1.1858 + for (j = 0; j < 4; j++) { 1.1859 + this_row_offset = ref_mv->as_mv.row + neighbors[j].row; 1.1860 + this_col_offset = ref_mv->as_mv.col + neighbors[j].col; 1.1861 + 1.1862 + if ((this_col_offset > x->mv_col_min) && 1.1863 + (this_col_offset < x->mv_col_max) && 1.1864 + (this_row_offset > x->mv_row_min) && 1.1865 + (this_row_offset < x->mv_row_max)) { 1.1866 + check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + 1.1867 + best_address; 1.1868 + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1.1869 + bestsad); 1.1870 + 1.1871 + if (thissad < bestsad) { 1.1872 + this_mv.as_mv.row = this_row_offset; 1.1873 + this_mv.as_mv.col = this_col_offset; 1.1874 + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1.1875 + mvjsadcost, mvsadcost, error_per_bit); 1.1876 + 1.1877 + if (thissad < bestsad) { 1.1878 + bestsad = thissad; 1.1879 + best_site = j; 1.1880 + } 1.1881 + } 1.1882 + } 1.1883 + } 1.1884 + 1.1885 + if (best_site == -1) { 1.1886 + break; 1.1887 + } else { 1.1888 + ref_mv->as_mv.row += neighbors[best_site].row; 1.1889 + ref_mv->as_mv.col += neighbors[best_site].col; 1.1890 + best_address += (neighbors[best_site].row) * in_what_stride + 1.1891 + neighbors[best_site].col; 1.1892 + } 1.1893 + } 1.1894 + 1.1895 + this_mv.as_mv.row = ref_mv->as_mv.row * 8; 1.1896 + this_mv.as_mv.col = ref_mv->as_mv.col * 8; 1.1897 + 1.1898 + if (bestsad < INT_MAX) 1.1899 + return fn_ptr->vf(what, what_stride, best_address, in_what_stride, 1.1900 + (unsigned int *)(&thissad)) + 1.1901 + mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, 1.1902 + mvjcost, mvcost, x->errorperbit); 1.1903 + else 1.1904 + return INT_MAX; 1.1905 +} 1.1906 + 1.1907 +int vp9_refining_search_sadx4(MACROBLOCK *x, 1.1908 + int_mv *ref_mv, int error_per_bit, 1.1909 + int search_range, vp9_variance_fn_ptr_t *fn_ptr, 1.1910 + int *mvjcost, int *mvcost[2], int_mv *center_mv) { 1.1911 + const MACROBLOCKD* const xd = &x->e_mbd; 1.1912 + MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; 1.1913 + int i, j; 1.1914 + int this_row_offset, this_col_offset; 1.1915 + 1.1916 + int what_stride = x->plane[0].src.stride; 1.1917 + int in_what_stride = xd->plane[0].pre[0].stride; 1.1918 + uint8_t *what = x->plane[0].src.buf; 1.1919 + uint8_t *best_address = xd->plane[0].pre[0].buf + 1.1920 + (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + 1.1921 + ref_mv->as_mv.col; 1.1922 + uint8_t *check_here; 1.1923 + unsigned int thissad; 1.1924 + int_mv this_mv; 1.1925 + unsigned int bestsad = INT_MAX; 1.1926 + int_mv fcenter_mv; 1.1927 + 1.1928 + int *mvjsadcost = x->nmvjointsadcost; 1.1929 + int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1.1930 + 1.1931 + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1.1932 + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1.1933 + 1.1934 + bestsad = fn_ptr->sdf(what, what_stride, best_address, 1.1935 + in_what_stride, 0x7fffffff) + 1.1936 + mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv, 1.1937 + mvjsadcost, mvsadcost, error_per_bit); 1.1938 + 1.1939 + for (i = 0; i < search_range; i++) { 1.1940 + int best_site = -1; 1.1941 + int all_in = ((ref_mv->as_mv.row - 1) > x->mv_row_min) & 1.1942 + ((ref_mv->as_mv.row + 1) < x->mv_row_max) & 1.1943 + ((ref_mv->as_mv.col - 1) > x->mv_col_min) & 1.1944 + ((ref_mv->as_mv.col + 1) < x->mv_col_max); 1.1945 + 1.1946 + if (all_in) { 1.1947 + unsigned int sad_array[4]; 1.1948 + unsigned char const *block_offset[4]; 1.1949 + block_offset[0] = best_address - in_what_stride; 1.1950 + block_offset[1] = best_address - 1; 1.1951 + block_offset[2] = best_address + 1; 1.1952 + block_offset[3] = best_address + in_what_stride; 1.1953 + 1.1954 + fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, 1.1955 + sad_array); 1.1956 + 1.1957 + for (j = 0; j < 4; j++) { 1.1958 + if (sad_array[j] < bestsad) { 1.1959 + this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row; 1.1960 + this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col; 1.1961 + sad_array[j] += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1.1962 + mvjsadcost, mvsadcost, error_per_bit); 1.1963 + 1.1964 + if (sad_array[j] < bestsad) { 1.1965 + bestsad = sad_array[j]; 1.1966 + best_site = j; 1.1967 + } 1.1968 + } 1.1969 + } 1.1970 + } else { 1.1971 + for (j = 0; j < 4; j++) { 1.1972 + this_row_offset = ref_mv->as_mv.row + neighbors[j].row; 1.1973 + this_col_offset = ref_mv->as_mv.col + neighbors[j].col; 1.1974 + 1.1975 + if ((this_col_offset > x->mv_col_min) && 1.1976 + (this_col_offset < x->mv_col_max) && 1.1977 + (this_row_offset > x->mv_row_min) && 1.1978 + (this_row_offset < x->mv_row_max)) { 1.1979 + check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + 1.1980 + best_address; 1.1981 + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1.1982 + bestsad); 1.1983 + 1.1984 + if (thissad < bestsad) { 1.1985 + this_mv.as_mv.row = this_row_offset; 1.1986 + this_mv.as_mv.col = this_col_offset; 1.1987 + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1.1988 + mvjsadcost, mvsadcost, error_per_bit); 1.1989 + 1.1990 + if (thissad < bestsad) { 1.1991 + bestsad = thissad; 1.1992 + best_site = j; 1.1993 + } 1.1994 + } 1.1995 + } 1.1996 + } 1.1997 + } 1.1998 + 1.1999 + if (best_site == -1) { 1.2000 + break; 1.2001 + } else { 1.2002 + ref_mv->as_mv.row += neighbors[best_site].row; 1.2003 + ref_mv->as_mv.col += neighbors[best_site].col; 1.2004 + best_address += (neighbors[best_site].row) * in_what_stride + 1.2005 + neighbors[best_site].col; 1.2006 + } 1.2007 + } 1.2008 + 1.2009 + this_mv.as_mv.row = ref_mv->as_mv.row * 8; 1.2010 + this_mv.as_mv.col = ref_mv->as_mv.col * 8; 1.2011 + 1.2012 + if (bestsad < INT_MAX) 1.2013 + return fn_ptr->vf(what, what_stride, best_address, in_what_stride, 1.2014 + (unsigned int *)(&thissad)) + 1.2015 + mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, 1.2016 + mvjcost, mvcost, x->errorperbit); 1.2017 + else 1.2018 + return INT_MAX; 1.2019 +} 1.2020 + 1.2021 +/* This function is called when we do joint motion search in comp_inter_inter 1.2022 + * mode. 1.2023 + */ 1.2024 +int vp9_refining_search_8p_c(MACROBLOCK *x, 1.2025 + int_mv *ref_mv, int error_per_bit, 1.2026 + int search_range, vp9_variance_fn_ptr_t *fn_ptr, 1.2027 + int *mvjcost, int *mvcost[2], int_mv *center_mv, 1.2028 + const uint8_t *second_pred, int w, int h) { 1.2029 + const MACROBLOCKD* const xd = &x->e_mbd; 1.2030 + MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}, 1.2031 + {-1, -1}, {1, -1}, {-1, 1}, {1, 1}}; 1.2032 + int i, j; 1.2033 + int this_row_offset, this_col_offset; 1.2034 + 1.2035 + int what_stride = x->plane[0].src.stride; 1.2036 + int in_what_stride = xd->plane[0].pre[0].stride; 1.2037 + uint8_t *what = x->plane[0].src.buf; 1.2038 + uint8_t *best_address = xd->plane[0].pre[0].buf + 1.2039 + (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + 1.2040 + ref_mv->as_mv.col; 1.2041 + uint8_t *check_here; 1.2042 + unsigned int thissad; 1.2043 + int_mv this_mv; 1.2044 + unsigned int bestsad = INT_MAX; 1.2045 + int_mv fcenter_mv; 1.2046 + 1.2047 + int *mvjsadcost = x->nmvjointsadcost; 1.2048 + int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1.2049 + 1.2050 + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1.2051 + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1.2052 + 1.2053 + /* Get compound pred by averaging two pred blocks. */ 1.2054 + bestsad = fn_ptr->sdaf(what, what_stride, best_address, in_what_stride, 1.2055 + second_pred, 0x7fffffff) + 1.2056 + mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv, 1.2057 + mvjsadcost, mvsadcost, error_per_bit); 1.2058 + 1.2059 + for (i = 0; i < search_range; i++) { 1.2060 + int best_site = -1; 1.2061 + 1.2062 + for (j = 0; j < 8; j++) { 1.2063 + this_row_offset = ref_mv->as_mv.row + neighbors[j].row; 1.2064 + this_col_offset = ref_mv->as_mv.col + neighbors[j].col; 1.2065 + 1.2066 + if ((this_col_offset > x->mv_col_min) && 1.2067 + (this_col_offset < x->mv_col_max) && 1.2068 + (this_row_offset > x->mv_row_min) && 1.2069 + (this_row_offset < x->mv_row_max)) { 1.2070 + check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + 1.2071 + best_address; 1.2072 + 1.2073 + /* Get compound block and use it to calculate SAD. */ 1.2074 + thissad = fn_ptr->sdaf(what, what_stride, check_here, in_what_stride, 1.2075 + second_pred, bestsad); 1.2076 + 1.2077 + if (thissad < bestsad) { 1.2078 + this_mv.as_mv.row = this_row_offset; 1.2079 + this_mv.as_mv.col = this_col_offset; 1.2080 + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1.2081 + mvjsadcost, mvsadcost, error_per_bit); 1.2082 + if (thissad < bestsad) { 1.2083 + bestsad = thissad; 1.2084 + best_site = j; 1.2085 + } 1.2086 + } 1.2087 + } 1.2088 + } 1.2089 + 1.2090 + if (best_site == -1) { 1.2091 + break; 1.2092 + } else { 1.2093 + ref_mv->as_mv.row += neighbors[best_site].row; 1.2094 + ref_mv->as_mv.col += neighbors[best_site].col; 1.2095 + best_address += (neighbors[best_site].row) * in_what_stride + 1.2096 + neighbors[best_site].col; 1.2097 + } 1.2098 + } 1.2099 + 1.2100 + this_mv.as_mv.row = ref_mv->as_mv.row * 8; 1.2101 + this_mv.as_mv.col = ref_mv->as_mv.col * 8; 1.2102 + 1.2103 + if (bestsad < INT_MAX) { 1.2104 + // FIXME(rbultje, yunqing): add full-pixel averaging variance functions 1.2105 + // so we don't have to use the subpixel with xoff=0,yoff=0 here. 1.2106 + return fn_ptr->svaf(best_address, in_what_stride, 0, 0, what, what_stride, 1.2107 + (unsigned int *)(&thissad), second_pred) + 1.2108 + mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, 1.2109 + mvjcost, mvcost, x->errorperbit); 1.2110 + } else { 1.2111 + return INT_MAX; 1.2112 + } 1.2113 +}