media/libvpx/vp9/encoder/vp9_mcomp.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/media/libvpx/vp9/encoder/vp9_mcomp.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,2110 @@
     1.4 +/*
     1.5 + *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     1.6 + *
     1.7 + *  Use of this source code is governed by a BSD-style license
     1.8 + *  that can be found in the LICENSE file in the root of the source
     1.9 + *  tree. An additional intellectual property rights grant can be found
    1.10 + *  in the file PATENTS.  All contributing project authors may
    1.11 + *  be found in the AUTHORS file in the root of the source tree.
    1.12 + */
    1.13 +
    1.14 +#include <limits.h>
    1.15 +#include <math.h>
    1.16 +#include <stdio.h>
    1.17 +
    1.18 +#include "./vpx_config.h"
    1.19 +
    1.20 +#include "vpx_mem/vpx_mem.h"
    1.21 +
    1.22 +#include "vp9/common/vp9_findnearmv.h"
    1.23 +#include "vp9/common/vp9_common.h"
    1.24 +
    1.25 +#include "vp9/encoder/vp9_onyx_int.h"
    1.26 +#include "vp9/encoder/vp9_mcomp.h"
    1.27 +
    1.28 +// #define NEW_DIAMOND_SEARCH
    1.29 +
    1.30 +void vp9_clamp_mv_min_max(MACROBLOCK *x, MV *mv) {
    1.31 +  const int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
    1.32 +  const int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
    1.33 +  const int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
    1.34 +  const int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
    1.35 +
    1.36 +  // Get intersection of UMV window and valid MV window to reduce # of checks
    1.37 +  // in diamond search.
    1.38 +  if (x->mv_col_min < col_min)
    1.39 +    x->mv_col_min = col_min;
    1.40 +  if (x->mv_col_max > col_max)
    1.41 +    x->mv_col_max = col_max;
    1.42 +  if (x->mv_row_min < row_min)
    1.43 +    x->mv_row_min = row_min;
    1.44 +  if (x->mv_row_max > row_max)
    1.45 +    x->mv_row_max = row_max;
    1.46 +}
    1.47 +
    1.48 +int vp9_init_search_range(VP9_COMP *cpi, int size) {
    1.49 +  int sr = 0;
    1.50 +
    1.51 +  // Minimum search size no matter what the passed in value.
    1.52 +  size = MAX(16, size);
    1.53 +
    1.54 +  while ((size << sr) < MAX_FULL_PEL_VAL)
    1.55 +    sr++;
    1.56 +
    1.57 +  if (sr)
    1.58 +    sr--;
    1.59 +
    1.60 +  sr += cpi->sf.reduce_first_step_size;
    1.61 +  sr = MIN(sr, (cpi->sf.max_step_search_steps - 2));
    1.62 +  return sr;
    1.63 +}
    1.64 +
    1.65 +static INLINE int mv_cost(const MV *mv,
    1.66 +                          const int *joint_cost, int *comp_cost[2]) {
    1.67 +  return joint_cost[vp9_get_mv_joint(mv)] +
    1.68 +             comp_cost[0][mv->row] + comp_cost[1][mv->col];
    1.69 +}
    1.70 +
    1.71 +int vp9_mv_bit_cost(const MV *mv, const MV *ref,
    1.72 +                    const int *mvjcost, int *mvcost[2], int weight) {
    1.73 +  const MV diff = { mv->row - ref->row,
    1.74 +                    mv->col - ref->col };
    1.75 +  return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
    1.76 +}
    1.77 +
    1.78 +static int mv_err_cost(const MV *mv, const MV *ref,
    1.79 +                       const int *mvjcost, int *mvcost[2],
    1.80 +                       int error_per_bit) {
    1.81 +  if (mvcost) {
    1.82 +    const MV diff = { mv->row - ref->row,
    1.83 +                      mv->col - ref->col };
    1.84 +    return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) *
    1.85 +                                  error_per_bit, 13);
    1.86 +  }
    1.87 +  return 0;
    1.88 +}
    1.89 +
    1.90 +static int mvsad_err_cost(const MV *mv, const MV *ref,
    1.91 +                          const int *mvjsadcost, int *mvsadcost[2],
    1.92 +                          int error_per_bit) {
    1.93 +  if (mvsadcost) {
    1.94 +    const MV diff = { mv->row - ref->row,
    1.95 +                      mv->col - ref->col };
    1.96 +    return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) *
    1.97 +                                  error_per_bit, 8);
    1.98 +  }
    1.99 +  return 0;
   1.100 +}
   1.101 +
   1.102 +void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
   1.103 +  int len;
   1.104 +  int search_site_count = 0;
   1.105 +
   1.106 +  // Generate offsets for 4 search sites per step.
   1.107 +  x->ss[search_site_count].mv.col = 0;
   1.108 +  x->ss[search_site_count].mv.row = 0;
   1.109 +  x->ss[search_site_count].offset = 0;
   1.110 +  search_site_count++;
   1.111 +
   1.112 +  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
   1.113 +    // Compute offsets for search sites.
   1.114 +    x->ss[search_site_count].mv.col = 0;
   1.115 +    x->ss[search_site_count].mv.row = -len;
   1.116 +    x->ss[search_site_count].offset = -len * stride;
   1.117 +    search_site_count++;
   1.118 +
   1.119 +    // Compute offsets for search sites.
   1.120 +    x->ss[search_site_count].mv.col = 0;
   1.121 +    x->ss[search_site_count].mv.row = len;
   1.122 +    x->ss[search_site_count].offset = len * stride;
   1.123 +    search_site_count++;
   1.124 +
   1.125 +    // Compute offsets for search sites.
   1.126 +    x->ss[search_site_count].mv.col = -len;
   1.127 +    x->ss[search_site_count].mv.row = 0;
   1.128 +    x->ss[search_site_count].offset = -len;
   1.129 +    search_site_count++;
   1.130 +
   1.131 +    // Compute offsets for search sites.
   1.132 +    x->ss[search_site_count].mv.col = len;
   1.133 +    x->ss[search_site_count].mv.row = 0;
   1.134 +    x->ss[search_site_count].offset = len;
   1.135 +    search_site_count++;
   1.136 +  }
   1.137 +
   1.138 +  x->ss_count = search_site_count;
   1.139 +  x->searches_per_step = 4;
   1.140 +}
   1.141 +
   1.142 +void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
   1.143 +  int len, ss_count = 1;
   1.144 +
   1.145 +  x->ss[0].mv.col = x->ss[0].mv.row = 0;
   1.146 +  x->ss[0].offset = 0;
   1.147 +
   1.148 +  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
   1.149 +    // Generate offsets for 8 search sites per step.
   1.150 +    const MV ss_mvs[8] = {
   1.151 +      {-len,  0  }, {len,  0  }, { 0,   -len}, {0,    len},
   1.152 +      {-len, -len}, {-len, len}, {len,  -len}, {len,  len}
   1.153 +    };
   1.154 +    int i;
   1.155 +    for (i = 0; i < 8; ++i) {
   1.156 +      search_site *const ss = &x->ss[ss_count++];
   1.157 +      ss->mv = ss_mvs[i];
   1.158 +      ss->offset = ss->mv.row * stride + ss->mv.col;
   1.159 +    }
   1.160 +  }
   1.161 +
   1.162 +  x->ss_count = ss_count;
   1.163 +  x->searches_per_step = 8;
   1.164 +}
   1.165 +
   1.166 +/*
   1.167 + * To avoid the penalty for crossing cache-line read, preload the reference
   1.168 + * area in a small buffer, which is aligned to make sure there won't be crossing
   1.169 + * cache-line read while reading from this buffer. This reduced the cpu
   1.170 + * cycles spent on reading ref data in sub-pixel filter functions.
   1.171 + * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
   1.172 + * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
   1.173 + * could reduce the area.
   1.174 + */
   1.175 +
   1.176 +/* estimated cost of a motion vector (r,c) */
   1.177 +#define MVC(r, c)                                       \
   1.178 +    (mvcost ?                                           \
   1.179 +     ((mvjcost[((r) != rr) * 2 + ((c) != rc)] +         \
   1.180 +       mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
   1.181 +      error_per_bit + 4096) >> 13 : 0)
   1.182 +
   1.183 +
   1.184 +#define SP(x) (((x) & 7) << 1)  // convert motion vector component to offset
   1.185 +                                // for svf calc
   1.186 +
   1.187 +#define IFMVCV(r, c, s, e)                                \
   1.188 +    if (c >= minc && c <= maxc && r >= minr && r <= maxr) \
   1.189 +      s                                                   \
   1.190 +    else                                                  \
   1.191 +      e;
   1.192 +
   1.193 +/* pointer to predictor base of a motionvector */
   1.194 +#define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset)))
   1.195 +
   1.196 +/* returns subpixel variance error function */
   1.197 +#define DIST(r, c) \
   1.198 +    vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, src_stride, &sse)
   1.199 +
   1.200 +/* checks if (r, c) has better score than previous best */
   1.201 +#define CHECK_BETTER(v, r, c) \
   1.202 +    IFMVCV(r, c, {                                                       \
   1.203 +      thismse = (DIST(r, c));                                            \
   1.204 +      if ((v = MVC(r, c) + thismse) < besterr) {                         \
   1.205 +        besterr = v;                                                     \
   1.206 +        br = r;                                                          \
   1.207 +        bc = c;                                                          \
   1.208 +        *distortion = thismse;                                           \
   1.209 +        *sse1 = sse;                                                     \
   1.210 +      }                                                                  \
   1.211 +    },                                                                   \
   1.212 +    v = INT_MAX;)
   1.213 +
   1.214 +#define FIRST_LEVEL_CHECKS                              \
   1.215 +  {                                                     \
   1.216 +    unsigned int left, right, up, down, diag;           \
   1.217 +    CHECK_BETTER(left, tr, tc - hstep);                 \
   1.218 +    CHECK_BETTER(right, tr, tc + hstep);                \
   1.219 +    CHECK_BETTER(up, tr - hstep, tc);                   \
   1.220 +    CHECK_BETTER(down, tr + hstep, tc);                 \
   1.221 +    whichdir = (left < right ? 0 : 1) +                 \
   1.222 +               (up < down ? 0 : 2);                     \
   1.223 +    switch (whichdir) {                                 \
   1.224 +      case 0:                                           \
   1.225 +        CHECK_BETTER(diag, tr - hstep, tc - hstep);     \
   1.226 +        break;                                          \
   1.227 +      case 1:                                           \
   1.228 +        CHECK_BETTER(diag, tr - hstep, tc + hstep);     \
   1.229 +        break;                                          \
   1.230 +      case 2:                                           \
   1.231 +        CHECK_BETTER(diag, tr + hstep, tc - hstep);     \
   1.232 +        break;                                          \
   1.233 +      case 3:                                           \
   1.234 +        CHECK_BETTER(diag, tr + hstep, tc + hstep);     \
   1.235 +        break;                                          \
   1.236 +    }                                                   \
   1.237 +  }
   1.238 +
   1.239 +#define SECOND_LEVEL_CHECKS                             \
   1.240 +  {                                                     \
   1.241 +    int kr, kc;                                         \
   1.242 +    unsigned int second;                                \
   1.243 +    if (tr != br && tc != bc) {                         \
   1.244 +      kr = br - tr;                                     \
   1.245 +      kc = bc - tc;                                     \
   1.246 +      CHECK_BETTER(second, tr + kr, tc + 2 * kc);       \
   1.247 +      CHECK_BETTER(second, tr + 2 * kr, tc + kc);       \
   1.248 +    } else if (tr == br && tc != bc) {                  \
   1.249 +      kc = bc - tc;                                     \
   1.250 +      CHECK_BETTER(second, tr + hstep, tc + 2 * kc);    \
   1.251 +      CHECK_BETTER(second, tr - hstep, tc + 2 * kc);    \
   1.252 +      switch (whichdir) {                               \
   1.253 +        case 0:                                         \
   1.254 +        case 1:                                         \
   1.255 +          CHECK_BETTER(second, tr + hstep, tc + kc);    \
   1.256 +          break;                                        \
   1.257 +        case 2:                                         \
   1.258 +        case 3:                                         \
   1.259 +          CHECK_BETTER(second, tr - hstep, tc + kc);    \
   1.260 +          break;                                        \
   1.261 +      }                                                 \
   1.262 +    } else if (tr != br && tc == bc) {                  \
   1.263 +      kr = br - tr;                                     \
   1.264 +      CHECK_BETTER(second, tr + 2 * kr, tc + hstep);    \
   1.265 +      CHECK_BETTER(second, tr + 2 * kr, tc - hstep);    \
   1.266 +      switch (whichdir) {                               \
   1.267 +        case 0:                                         \
   1.268 +        case 2:                                         \
   1.269 +          CHECK_BETTER(second, tr + kr, tc + hstep);    \
   1.270 +          break;                                        \
   1.271 +        case 1:                                         \
   1.272 +        case 3:                                         \
   1.273 +          CHECK_BETTER(second, tr + kr, tc - hstep);    \
   1.274 +          break;                                        \
   1.275 +      }                                                 \
   1.276 +    }                                                   \
   1.277 +  }
   1.278 +
   1.279 +int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
   1.280 +                                      MV *bestmv, const MV *ref_mv,
   1.281 +                                      int allow_hp,
   1.282 +                                      int error_per_bit,
   1.283 +                                      const vp9_variance_fn_ptr_t *vfp,
   1.284 +                                      int forced_stop,
   1.285 +                                      int iters_per_step,
   1.286 +                                      int *mvjcost, int *mvcost[2],
   1.287 +                                      int *distortion,
   1.288 +                                      unsigned int *sse1) {
   1.289 +  uint8_t *z = x->plane[0].src.buf;
   1.290 +  int src_stride = x->plane[0].src.stride;
   1.291 +  MACROBLOCKD *xd = &x->e_mbd;
   1.292 +
   1.293 +  unsigned int besterr = INT_MAX;
   1.294 +  unsigned int sse;
   1.295 +  unsigned int whichdir;
   1.296 +  unsigned int halfiters = iters_per_step;
   1.297 +  unsigned int quarteriters = iters_per_step;
   1.298 +  unsigned int eighthiters = iters_per_step;
   1.299 +  int thismse;
   1.300 +
   1.301 +  const int y_stride = xd->plane[0].pre[0].stride;
   1.302 +  const int offset = bestmv->row * y_stride + bestmv->col;
   1.303 +  uint8_t *y = xd->plane[0].pre[0].buf + offset;
   1.304 +
   1.305 +  int rr = ref_mv->row;
   1.306 +  int rc = ref_mv->col;
   1.307 +  int br = bestmv->row * 8;
   1.308 +  int bc = bestmv->col * 8;
   1.309 +  int hstep = 4;
   1.310 +  const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
   1.311 +  const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
   1.312 +  const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
   1.313 +  const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
   1.314 +
   1.315 +  int tr = br;
   1.316 +  int tc = bc;
   1.317 +
   1.318 +  // central mv
   1.319 +  bestmv->row <<= 3;
   1.320 +  bestmv->col <<= 3;
   1.321 +
   1.322 +  // calculate central point error
   1.323 +  besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
   1.324 +  *distortion = besterr;
   1.325 +  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
   1.326 +
   1.327 +  // TODO(jbb): Each subsequent iteration checks at least one point in
   1.328 +  // common with the last iteration could be 2 if diagonal is selected.
   1.329 +  while (halfiters--) {
   1.330 +    // 1/2 pel
   1.331 +    FIRST_LEVEL_CHECKS;
   1.332 +    // no reason to check the same one again.
   1.333 +    if (tr == br && tc == bc)
   1.334 +      break;
   1.335 +    tr = br;
   1.336 +    tc = bc;
   1.337 +  }
   1.338 +
   1.339 +  // TODO(yaowu): Each subsequent iteration checks at least one point in common
   1.340 +  // with the last iteration could be 2 if diagonal is selected.
   1.341 +
   1.342 +  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
   1.343 +  if (forced_stop != 2) {
   1.344 +    hstep >>= 1;
   1.345 +    while (quarteriters--) {
   1.346 +      FIRST_LEVEL_CHECKS;
   1.347 +      // no reason to check the same one again.
   1.348 +      if (tr == br && tc == bc)
   1.349 +        break;
   1.350 +      tr = br;
   1.351 +      tc = bc;
   1.352 +    }
   1.353 +  }
   1.354 +
   1.355 +  if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
   1.356 +    hstep >>= 1;
   1.357 +    while (eighthiters--) {
   1.358 +      FIRST_LEVEL_CHECKS;
   1.359 +      // no reason to check the same one again.
   1.360 +      if (tr == br && tc == bc)
   1.361 +        break;
   1.362 +      tr = br;
   1.363 +      tc = bc;
   1.364 +    }
   1.365 +  }
   1.366 +
   1.367 +  bestmv->row = br;
   1.368 +  bestmv->col = bc;
   1.369 +
   1.370 +  if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
   1.371 +      (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
   1.372 +    return INT_MAX;
   1.373 +
   1.374 +  return besterr;
   1.375 +}
   1.376 +
   1.377 +int vp9_find_best_sub_pixel_tree(MACROBLOCK *x,
   1.378 +                                 MV *bestmv, const MV *ref_mv,
   1.379 +                                 int allow_hp,
   1.380 +                                 int error_per_bit,
   1.381 +                                 const vp9_variance_fn_ptr_t *vfp,
   1.382 +                                 int forced_stop,
   1.383 +                                 int iters_per_step,
   1.384 +                                 int *mvjcost, int *mvcost[2],
   1.385 +                                 int *distortion,
   1.386 +                                 unsigned int *sse1) {
   1.387 +  uint8_t *z = x->plane[0].src.buf;
   1.388 +  const int src_stride = x->plane[0].src.stride;
   1.389 +  MACROBLOCKD *xd = &x->e_mbd;
   1.390 +  unsigned int besterr = INT_MAX;
   1.391 +  unsigned int sse;
   1.392 +  unsigned int whichdir;
   1.393 +  int thismse;
   1.394 +  unsigned int halfiters = iters_per_step;
   1.395 +  unsigned int quarteriters = iters_per_step;
   1.396 +  unsigned int eighthiters = iters_per_step;
   1.397 +
   1.398 +  const int y_stride = xd->plane[0].pre[0].stride;
   1.399 +  const int offset = bestmv->row * y_stride + bestmv->col;
   1.400 +  uint8_t *y = xd->plane[0].pre[0].buf + offset;
   1.401 +
   1.402 +  int rr = ref_mv->row;
   1.403 +  int rc = ref_mv->col;
   1.404 +  int br = bestmv->row * 8;
   1.405 +  int bc = bestmv->col * 8;
   1.406 +  int hstep = 4;
   1.407 +  const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
   1.408 +  const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
   1.409 +  const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
   1.410 +  const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
   1.411 +
   1.412 +  int tr = br;
   1.413 +  int tc = bc;
   1.414 +
   1.415 +  // central mv
   1.416 +  bestmv->row *= 8;
   1.417 +  bestmv->col *= 8;
   1.418 +
   1.419 +  // calculate central point error
   1.420 +  besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
   1.421 +  *distortion = besterr;
   1.422 +  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
   1.423 +
   1.424 +  // 1/2 pel
   1.425 +  FIRST_LEVEL_CHECKS;
   1.426 +  if (halfiters > 1) {
   1.427 +    SECOND_LEVEL_CHECKS;
   1.428 +  }
   1.429 +  tr = br;
   1.430 +  tc = bc;
   1.431 +
   1.432 +  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
   1.433 +  if (forced_stop != 2) {
   1.434 +    hstep >>= 1;
   1.435 +    FIRST_LEVEL_CHECKS;
   1.436 +    if (quarteriters > 1) {
   1.437 +      SECOND_LEVEL_CHECKS;
   1.438 +    }
   1.439 +    tr = br;
   1.440 +    tc = bc;
   1.441 +  }
   1.442 +
   1.443 +  if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
   1.444 +    hstep >>= 1;
   1.445 +    FIRST_LEVEL_CHECKS;
   1.446 +    if (eighthiters > 1) {
   1.447 +      SECOND_LEVEL_CHECKS;
   1.448 +    }
   1.449 +    tr = br;
   1.450 +    tc = bc;
   1.451 +  }
   1.452 +
   1.453 +  bestmv->row = br;
   1.454 +  bestmv->col = bc;
   1.455 +
   1.456 +  if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
   1.457 +      (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
   1.458 +    return INT_MAX;
   1.459 +
   1.460 +  return besterr;
   1.461 +}
   1.462 +
   1.463 +#undef DIST
   1.464 +/* returns subpixel variance error function */
   1.465 +#define DIST(r, c) \
   1.466 +    vfp->svaf(PRE(r, c), y_stride, SP(c), SP(r), \
   1.467 +              z, src_stride, &sse, second_pred)
   1.468 +
   1.469 +int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
   1.470 +                                           MV *bestmv, const MV *ref_mv,
   1.471 +                                           int allow_hp,
   1.472 +                                           int error_per_bit,
   1.473 +                                           const vp9_variance_fn_ptr_t *vfp,
   1.474 +                                           int forced_stop,
   1.475 +                                           int iters_per_step,
   1.476 +                                           int *mvjcost, int *mvcost[2],
   1.477 +                                           int *distortion,
   1.478 +                                           unsigned int *sse1,
   1.479 +                                           const uint8_t *second_pred,
   1.480 +                                           int w, int h) {
   1.481 +  uint8_t *const z = x->plane[0].src.buf;
   1.482 +  const int src_stride = x->plane[0].src.stride;
   1.483 +  MACROBLOCKD *const xd = &x->e_mbd;
   1.484 +
   1.485 +  unsigned int besterr = INT_MAX;
   1.486 +  unsigned int sse;
   1.487 +  unsigned int whichdir;
   1.488 +  unsigned int halfiters = iters_per_step;
   1.489 +  unsigned int quarteriters = iters_per_step;
   1.490 +  unsigned int eighthiters = iters_per_step;
   1.491 +  int thismse;
   1.492 +
   1.493 +  DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
   1.494 +  const int y_stride = xd->plane[0].pre[0].stride;
   1.495 +  const int offset = bestmv->row * y_stride + bestmv->col;
   1.496 +  uint8_t *const y = xd->plane[0].pre[0].buf + offset;
   1.497 +
   1.498 +  int rr = ref_mv->row;
   1.499 +  int rc = ref_mv->col;
   1.500 +  int br = bestmv->row * 8;
   1.501 +  int bc = bestmv->col * 8;
   1.502 +  int hstep = 4;
   1.503 +  const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
   1.504 +  const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
   1.505 +  const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
   1.506 +  const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
   1.507 +
   1.508 +  int tr = br;
   1.509 +  int tc = bc;
   1.510 +
   1.511 +  // central mv
   1.512 +  bestmv->row *= 8;
   1.513 +  bestmv->col *= 8;
   1.514 +
   1.515 +  // calculate central point error
   1.516 +  // TODO(yunqingwang): central pointer error was already calculated in full-
   1.517 +  // pixel search, and can be passed in this function.
   1.518 +  comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
   1.519 +  besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
   1.520 +  *distortion = besterr;
   1.521 +  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
   1.522 +
   1.523 +  // Each subsequent iteration checks at least one point in
   1.524 +  // common with the last iteration could be 2 ( if diag selected)
   1.525 +  while (halfiters--) {
   1.526 +    // 1/2 pel
   1.527 +    FIRST_LEVEL_CHECKS;
   1.528 +    // no reason to check the same one again.
   1.529 +    if (tr == br && tc == bc)
   1.530 +      break;
   1.531 +    tr = br;
   1.532 +    tc = bc;
   1.533 +  }
   1.534 +
   1.535 +  // Each subsequent iteration checks at least one point in common with
   1.536 +  // the last iteration could be 2 ( if diag selected) 1/4 pel
   1.537 +
   1.538 +  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
   1.539 +  if (forced_stop != 2) {
   1.540 +    hstep >>= 1;
   1.541 +    while (quarteriters--) {
   1.542 +      FIRST_LEVEL_CHECKS;
   1.543 +      // no reason to check the same one again.
   1.544 +      if (tr == br && tc == bc)
   1.545 +        break;
   1.546 +      tr = br;
   1.547 +      tc = bc;
   1.548 +    }
   1.549 +  }
   1.550 +
   1.551 +  if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
   1.552 +    hstep >>= 1;
   1.553 +    while (eighthiters--) {
   1.554 +      FIRST_LEVEL_CHECKS;
   1.555 +      // no reason to check the same one again.
   1.556 +      if (tr == br && tc == bc)
   1.557 +        break;
   1.558 +      tr = br;
   1.559 +      tc = bc;
   1.560 +    }
   1.561 +  }
   1.562 +  bestmv->row = br;
   1.563 +  bestmv->col = bc;
   1.564 +
   1.565 +  if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
   1.566 +      (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
   1.567 +    return INT_MAX;
   1.568 +
   1.569 +  return besterr;
   1.570 +}
   1.571 +
   1.572 +int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
   1.573 +                                      MV *bestmv, const MV *ref_mv,
   1.574 +                                      int allow_hp,
   1.575 +                                      int error_per_bit,
   1.576 +                                      const vp9_variance_fn_ptr_t *vfp,
   1.577 +                                      int forced_stop,
   1.578 +                                      int iters_per_step,
   1.579 +                                      int *mvjcost, int *mvcost[2],
   1.580 +                                      int *distortion,
   1.581 +                                      unsigned int *sse1,
   1.582 +                                      const uint8_t *second_pred,
   1.583 +                                      int w, int h) {
   1.584 +  uint8_t *z = x->plane[0].src.buf;
   1.585 +  const int src_stride = x->plane[0].src.stride;
   1.586 +  MACROBLOCKD *xd = &x->e_mbd;
   1.587 +  unsigned int besterr = INT_MAX;
   1.588 +  unsigned int sse;
   1.589 +  unsigned int whichdir;
   1.590 +  int thismse;
   1.591 +  unsigned int halfiters = iters_per_step;
   1.592 +  unsigned int quarteriters = iters_per_step;
   1.593 +  unsigned int eighthiters = iters_per_step;
   1.594 +
   1.595 +  DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
   1.596 +  const int y_stride = xd->plane[0].pre[0].stride;
   1.597 +  const int offset = bestmv->row * y_stride + bestmv->col;
   1.598 +  uint8_t *y = xd->plane[0].pre[0].buf + offset;
   1.599 +
   1.600 +  int rr = ref_mv->row;
   1.601 +  int rc = ref_mv->col;
   1.602 +  int br = bestmv->row * 8;
   1.603 +  int bc = bestmv->col * 8;
   1.604 +  int hstep = 4;
   1.605 +  const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
   1.606 +  const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
   1.607 +  const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
   1.608 +  const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
   1.609 +
   1.610 +  int tr = br;
   1.611 +  int tc = bc;
   1.612 +
   1.613 +  // central mv
   1.614 +  bestmv->row *= 8;
   1.615 +  bestmv->col *= 8;
   1.616 +
   1.617 +  // calculate central point error
   1.618 +  // TODO(yunqingwang): central pointer error was already calculated in full-
   1.619 +  // pixel search, and can be passed in this function.
   1.620 +  comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
   1.621 +  besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
   1.622 +  *distortion = besterr;
   1.623 +  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
   1.624 +
   1.625 +  // Each subsequent iteration checks at least one point in
   1.626 +  // common with the last iteration could be 2 ( if diag selected)
   1.627 +  // 1/2 pel
   1.628 +  FIRST_LEVEL_CHECKS;
   1.629 +  if (halfiters > 1) {
   1.630 +    SECOND_LEVEL_CHECKS;
   1.631 +  }
   1.632 +  tr = br;
   1.633 +  tc = bc;
   1.634 +
   1.635 +  // Each subsequent iteration checks at least one point in common with
   1.636 +  // the last iteration could be 2 ( if diag selected) 1/4 pel
   1.637 +
   1.638 +  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
   1.639 +  if (forced_stop != 2) {
   1.640 +    hstep >>= 1;
   1.641 +    FIRST_LEVEL_CHECKS;
   1.642 +    if (quarteriters > 1) {
   1.643 +      SECOND_LEVEL_CHECKS;
   1.644 +    }
   1.645 +    tr = br;
   1.646 +    tc = bc;
   1.647 +  }
   1.648 +
   1.649 +  if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
   1.650 +    hstep >>= 1;
   1.651 +    FIRST_LEVEL_CHECKS;
   1.652 +    if (eighthiters > 1) {
   1.653 +      SECOND_LEVEL_CHECKS;
   1.654 +    }
   1.655 +    tr = br;
   1.656 +    tc = bc;
   1.657 +  }
   1.658 +  bestmv->row = br;
   1.659 +  bestmv->col = bc;
   1.660 +
   1.661 +  if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
   1.662 +      (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
   1.663 +    return INT_MAX;
   1.664 +
   1.665 +  return besterr;
   1.666 +}
   1.667 +
   1.668 +#undef MVC
   1.669 +#undef PRE
   1.670 +#undef DIST
   1.671 +#undef IFMVCV
   1.672 +#undef CHECK_BETTER
   1.673 +#undef SP
   1.674 +
   1.675 +#define CHECK_BOUNDS(range) \
   1.676 +  {\
   1.677 +    all_in = 1;\
   1.678 +    all_in &= ((br-range) >= x->mv_row_min);\
   1.679 +    all_in &= ((br+range) <= x->mv_row_max);\
   1.680 +    all_in &= ((bc-range) >= x->mv_col_min);\
   1.681 +    all_in &= ((bc+range) <= x->mv_col_max);\
   1.682 +  }
   1.683 +
   1.684 +#define CHECK_POINT \
   1.685 +  {\
   1.686 +    if (this_mv.col < x->mv_col_min) continue;\
   1.687 +    if (this_mv.col > x->mv_col_max) continue;\
   1.688 +    if (this_mv.row < x->mv_row_min) continue;\
   1.689 +    if (this_mv.row > x->mv_row_max) continue;\
   1.690 +  }
   1.691 +
   1.692 +#define CHECK_BETTER \
   1.693 +  {\
   1.694 +    if (thissad < bestsad)\
   1.695 +    {\
   1.696 +      if (use_mvcost) \
   1.697 +        thissad += mvsad_err_cost(&this_mv, &fcenter_mv.as_mv, \
   1.698 +                                  mvjsadcost, mvsadcost, \
   1.699 +                                  sad_per_bit);\
   1.700 +      if (thissad < bestsad)\
   1.701 +      {\
   1.702 +        bestsad = thissad;\
   1.703 +        best_site = i;\
   1.704 +      }\
   1.705 +    }\
   1.706 +  }
   1.707 +
   1.708 +#define get_next_chkpts(list, i, n)   \
   1.709 +    list[0] = ((i) == 0 ? (n) - 1 : (i) - 1);  \
   1.710 +    list[1] = (i);                             \
   1.711 +    list[2] = ((i) == (n) - 1 ? 0 : (i) + 1);
   1.712 +
   1.713 +#define MAX_PATTERN_SCALES         11
   1.714 +#define MAX_PATTERN_CANDIDATES      8  // max number of canddiates per scale
   1.715 +#define PATTERN_CANDIDATES_REF      3  // number of refinement candidates
   1.716 +
   1.717 +// Generic pattern search function that searches over multiple scales.
   1.718 +// Each scale can have a different number of candidates and shape of
   1.719 +// candidates as indicated in the num_candidates and candidates arrays
   1.720 +// passed into this function
   1.721 +static int vp9_pattern_search(MACROBLOCK *x,
   1.722 +                              MV *ref_mv,
   1.723 +                              int search_param,
   1.724 +                              int sad_per_bit,
   1.725 +                              int do_init_search,
   1.726 +                              int do_refine,
   1.727 +                              const vp9_variance_fn_ptr_t *vfp,
   1.728 +                              int use_mvcost,
   1.729 +                              const MV *center_mv, MV *best_mv,
   1.730 +                              const int num_candidates[MAX_PATTERN_SCALES],
   1.731 +                              const MV candidates[MAX_PATTERN_SCALES]
   1.732 +                                                 [MAX_PATTERN_CANDIDATES]) {
   1.733 +  const MACROBLOCKD* const xd = &x->e_mbd;
   1.734 +  static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
   1.735 +    10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
   1.736 +  };
   1.737 +  int i, j, s, t;
   1.738 +  uint8_t *what = x->plane[0].src.buf;
   1.739 +  int what_stride = x->plane[0].src.stride;
   1.740 +  int in_what_stride = xd->plane[0].pre[0].stride;
   1.741 +  int br, bc;
   1.742 +  MV this_mv;
   1.743 +  int bestsad = INT_MAX;
   1.744 +  int thissad;
   1.745 +  uint8_t *base_offset;
   1.746 +  uint8_t *this_offset;
   1.747 +  int k = -1;
   1.748 +  int all_in;
   1.749 +  int best_site = -1;
   1.750 +  int_mv fcenter_mv;
   1.751 +  int best_init_s = search_param_to_steps[search_param];
   1.752 +  int *mvjsadcost = x->nmvjointsadcost;
   1.753 +  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
   1.754 +
   1.755 +  fcenter_mv.as_mv.row = center_mv->row >> 3;
   1.756 +  fcenter_mv.as_mv.col = center_mv->col >> 3;
   1.757 +
   1.758 +  // adjust ref_mv to make sure it is within MV range
   1.759 +  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   1.760 +  br = ref_mv->row;
   1.761 +  bc = ref_mv->col;
   1.762 +
   1.763 +  // Work out the start point for the search
   1.764 +  base_offset = (uint8_t *)(xd->plane[0].pre[0].buf);
   1.765 +  this_offset = base_offset + (br * in_what_stride) + bc;
   1.766 +  this_mv.row = br;
   1.767 +  this_mv.col = bc;
   1.768 +  bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff)
   1.769 +                + mvsad_err_cost(&this_mv, &fcenter_mv.as_mv,
   1.770 +                                 mvjsadcost, mvsadcost, sad_per_bit);
   1.771 +
   1.772 +  // Search all possible scales upto the search param around the center point
   1.773 +  // pick the scale of the point that is best as the starting scale of
   1.774 +  // further steps around it.
   1.775 +  if (do_init_search) {
   1.776 +    s = best_init_s;
   1.777 +    best_init_s = -1;
   1.778 +    for (t = 0; t <= s; ++t) {
   1.779 +      best_site = -1;
   1.780 +      CHECK_BOUNDS((1 << t))
   1.781 +      if (all_in) {
   1.782 +        for (i = 0; i < num_candidates[t]; i++) {
   1.783 +          this_mv.row = br + candidates[t][i].row;
   1.784 +          this_mv.col = bc + candidates[t][i].col;
   1.785 +          this_offset = base_offset + (this_mv.row * in_what_stride) +
   1.786 +                                       this_mv.col;
   1.787 +          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
   1.788 +                             bestsad);
   1.789 +          CHECK_BETTER
   1.790 +        }
   1.791 +      } else {
   1.792 +        for (i = 0; i < num_candidates[t]; i++) {
   1.793 +          this_mv.row = br + candidates[t][i].row;
   1.794 +          this_mv.col = bc + candidates[t][i].col;
   1.795 +          CHECK_POINT
   1.796 +          this_offset = base_offset + (this_mv.row * in_what_stride) +
   1.797 +                                       this_mv.col;
   1.798 +          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
   1.799 +                             bestsad);
   1.800 +          CHECK_BETTER
   1.801 +        }
   1.802 +      }
   1.803 +      if (best_site == -1) {
   1.804 +        continue;
   1.805 +      } else {
   1.806 +        best_init_s = t;
   1.807 +        k = best_site;
   1.808 +      }
   1.809 +    }
   1.810 +    if (best_init_s != -1) {
   1.811 +      br += candidates[best_init_s][k].row;
   1.812 +      bc += candidates[best_init_s][k].col;
   1.813 +    }
   1.814 +  }
   1.815 +
   1.816 +  // If the center point is still the best, just skip this and move to
   1.817 +  // the refinement step.
   1.818 +  if (best_init_s != -1) {
   1.819 +    s = best_init_s;
   1.820 +    best_site = -1;
   1.821 +    do {
   1.822 +      // No need to search all 6 points the 1st time if initial search was used
   1.823 +      if (!do_init_search || s != best_init_s) {
   1.824 +        CHECK_BOUNDS((1 << s))
   1.825 +        if (all_in) {
   1.826 +          for (i = 0; i < num_candidates[s]; i++) {
   1.827 +            this_mv.row = br + candidates[s][i].row;
   1.828 +            this_mv.col = bc + candidates[s][i].col;
   1.829 +            this_offset = base_offset + (this_mv.row * in_what_stride) +
   1.830 +                                         this_mv.col;
   1.831 +            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
   1.832 +                               bestsad);
   1.833 +            CHECK_BETTER
   1.834 +          }
   1.835 +        } else {
   1.836 +          for (i = 0; i < num_candidates[s]; i++) {
   1.837 +            this_mv.row = br + candidates[s][i].row;
   1.838 +            this_mv.col = bc + candidates[s][i].col;
   1.839 +            CHECK_POINT
   1.840 +            this_offset = base_offset + (this_mv.row * in_what_stride) +
   1.841 +                                         this_mv.col;
   1.842 +            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
   1.843 +                               bestsad);
   1.844 +            CHECK_BETTER
   1.845 +          }
   1.846 +        }
   1.847 +
   1.848 +        if (best_site == -1) {
   1.849 +          continue;
   1.850 +        } else {
   1.851 +          br += candidates[s][best_site].row;
   1.852 +          bc += candidates[s][best_site].col;
   1.853 +          k = best_site;
   1.854 +        }
   1.855 +      }
   1.856 +
   1.857 +      do {
   1.858 +        int next_chkpts_indices[PATTERN_CANDIDATES_REF];
   1.859 +        best_site = -1;
   1.860 +        CHECK_BOUNDS((1 << s))
   1.861 +
   1.862 +        get_next_chkpts(next_chkpts_indices, k, num_candidates[s]);
   1.863 +        if (all_in) {
   1.864 +          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
   1.865 +            this_mv.row = br + candidates[s][next_chkpts_indices[i]].row;
   1.866 +            this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col;
   1.867 +            this_offset = base_offset + (this_mv.row * (in_what_stride)) +
   1.868 +                                         this_mv.col;
   1.869 +            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
   1.870 +                               bestsad);
   1.871 +            CHECK_BETTER
   1.872 +          }
   1.873 +        } else {
   1.874 +          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
   1.875 +            this_mv.row = br + candidates[s][next_chkpts_indices[i]].row;
   1.876 +            this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col;
   1.877 +            CHECK_POINT
   1.878 +            this_offset = base_offset + (this_mv.row * (in_what_stride)) +
   1.879 +                                         this_mv.col;
   1.880 +            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
   1.881 +                               bestsad);
   1.882 +            CHECK_BETTER
   1.883 +          }
   1.884 +        }
   1.885 +
   1.886 +        if (best_site != -1) {
   1.887 +          k = next_chkpts_indices[best_site];
   1.888 +          br += candidates[s][k].row;
   1.889 +          bc += candidates[s][k].col;
   1.890 +        }
   1.891 +      } while (best_site != -1);
   1.892 +    } while (s--);
   1.893 +  }
   1.894 +
   1.895 +  // Check 4 1-away neighbors if do_refine is true.
   1.896 +  // For most well-designed schemes do_refine will not be necessary.
   1.897 +  if (do_refine) {
   1.898 +    static const MV neighbors[4] = {
   1.899 +      {0, -1}, { -1, 0}, {1, 0}, {0, 1},
   1.900 +    };
   1.901 +    for (j = 0; j < 16; j++) {
   1.902 +      best_site = -1;
   1.903 +      CHECK_BOUNDS(1)
   1.904 +      if (all_in) {
   1.905 +        for (i = 0; i < 4; i++) {
   1.906 +          this_mv.row = br + neighbors[i].row;
   1.907 +          this_mv.col = bc + neighbors[i].col;
   1.908 +          this_offset = base_offset + (this_mv.row * (in_what_stride)) +
   1.909 +                                       this_mv.col;
   1.910 +          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
   1.911 +                             bestsad);
   1.912 +          CHECK_BETTER
   1.913 +        }
   1.914 +      } else {
   1.915 +        for (i = 0; i < 4; i++) {
   1.916 +          this_mv.row = br + neighbors[i].row;
   1.917 +          this_mv.col = bc + neighbors[i].col;
   1.918 +          CHECK_POINT
   1.919 +          this_offset = base_offset + (this_mv.row * (in_what_stride)) +
   1.920 +                                       this_mv.col;
   1.921 +          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
   1.922 +                             bestsad);
   1.923 +          CHECK_BETTER
   1.924 +        }
   1.925 +          }
   1.926 +
   1.927 +      if (best_site == -1) {
   1.928 +        break;
   1.929 +      } else {
   1.930 +        br += neighbors[best_site].row;
   1.931 +        bc += neighbors[best_site].col;
   1.932 +      }
   1.933 +    }
   1.934 +  }
   1.935 +
   1.936 +  best_mv->row = br;
   1.937 +  best_mv->col = bc;
   1.938 +
   1.939 +  this_offset = base_offset + (best_mv->row * in_what_stride) +
   1.940 +                               best_mv->col;
   1.941 +  this_mv.row = best_mv->row * 8;
   1.942 +  this_mv.col = best_mv->col * 8;
   1.943 +  if (bestsad == INT_MAX)
   1.944 +    return INT_MAX;
   1.945 +
   1.946 +  return vfp->vf(what, what_stride, this_offset, in_what_stride,
   1.947 +                 (unsigned int *)&bestsad) +
   1.948 +         use_mvcost ? mv_err_cost(&this_mv, center_mv,
   1.949 +                                  x->nmvjointcost, x->mvcost, x->errorperbit)
   1.950 +                    : 0;
   1.951 +}
   1.952 +
   1.953 +
   1.954 +int vp9_hex_search(MACROBLOCK *x,
   1.955 +                   MV *ref_mv,
   1.956 +                   int search_param,
   1.957 +                   int sad_per_bit,
   1.958 +                   int do_init_search,
   1.959 +                   const vp9_variance_fn_ptr_t *vfp,
   1.960 +                   int use_mvcost,
   1.961 +                   const MV *center_mv, MV *best_mv) {
   1.962 +  // First scale has 8-closest points, the rest have 6 points in hex shape
   1.963 +  // at increasing scales
   1.964 +  static const int hex_num_candidates[MAX_PATTERN_SCALES] = {
   1.965 +    8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6
   1.966 +  };
   1.967 +  // Note that the largest candidate step at each scale is 2^scale
   1.968 +  static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
   1.969 +    {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}},
   1.970 +    {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}},
   1.971 +    {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}},
   1.972 +    {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}},
   1.973 +    {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}},
   1.974 +    {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}},
   1.975 +    {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}},
   1.976 +    {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}},
   1.977 +    {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}},
   1.978 +    {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}},
   1.979 +    {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024},
   1.980 +      { -1024, 0}},
   1.981 +  };
   1.982 +  return
   1.983 +      vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
   1.984 +                         do_init_search, 0, vfp, use_mvcost,
   1.985 +                         center_mv, best_mv,
   1.986 +                         hex_num_candidates, hex_candidates);
   1.987 +}
   1.988 +
   1.989 +int vp9_bigdia_search(MACROBLOCK *x,
   1.990 +                      MV *ref_mv,
   1.991 +                      int search_param,
   1.992 +                      int sad_per_bit,
   1.993 +                      int do_init_search,
   1.994 +                      const vp9_variance_fn_ptr_t *vfp,
   1.995 +                      int use_mvcost,
   1.996 +                      const MV *center_mv,
   1.997 +                      MV *best_mv) {
   1.998 +  // First scale has 4-closest points, the rest have 8 points in diamond
   1.999 +  // shape at increasing scales
  1.1000 +  static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
  1.1001 +    4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  1.1002 +  };
  1.1003 +  // Note that the largest candidate step at each scale is 2^scale
  1.1004 +  static const MV bigdia_candidates[MAX_PATTERN_SCALES]
  1.1005 +                                   [MAX_PATTERN_CANDIDATES] = {
  1.1006 +    {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}},
  1.1007 +    {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}},
  1.1008 +    {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}},
  1.1009 +    {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}},
  1.1010 +    {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}},
  1.1011 +    {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32},
  1.1012 +      {-16, 16}, {-32, 0}},
  1.1013 +    {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64},
  1.1014 +      {-32, 32}, {-64, 0}},
  1.1015 +    {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128},
  1.1016 +      {-64, 64}, {-128, 0}},
  1.1017 +    {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256},
  1.1018 +      {-128, 128}, {-256, 0}},
  1.1019 +    {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512},
  1.1020 +      {-256, 256}, {-512, 0}},
  1.1021 +    {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024},
  1.1022 +      {-512, 512}, {-1024, 0}},
  1.1023 +  };
  1.1024 +  return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
  1.1025 +                            do_init_search, 0, vfp, use_mvcost,
  1.1026 +                            center_mv, best_mv,
  1.1027 +                            bigdia_num_candidates, bigdia_candidates);
  1.1028 +}
  1.1029 +
  1.1030 +int vp9_square_search(MACROBLOCK *x,
  1.1031 +                      MV *ref_mv,
  1.1032 +                      int search_param,
  1.1033 +                      int sad_per_bit,
  1.1034 +                      int do_init_search,
  1.1035 +                      const vp9_variance_fn_ptr_t *vfp,
  1.1036 +                      int use_mvcost,
  1.1037 +                      const MV *center_mv,
  1.1038 +                      MV *best_mv) {
  1.1039 +  // All scales have 8 closest points in square shape
  1.1040 +  static const int square_num_candidates[MAX_PATTERN_SCALES] = {
  1.1041 +    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  1.1042 +  };
  1.1043 +  // Note that the largest candidate step at each scale is 2^scale
  1.1044 +  static const MV square_candidates[MAX_PATTERN_SCALES]
  1.1045 +                                   [MAX_PATTERN_CANDIDATES] = {
  1.1046 +    {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}},
  1.1047 +    {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}},
  1.1048 +    {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}},
  1.1049 +    {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}},
  1.1050 +    {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16},
  1.1051 +      {-16, 16}, {-16, 0}},
  1.1052 +    {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32},
  1.1053 +      {-32, 32}, {-32, 0}},
  1.1054 +    {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64},
  1.1055 +      {-64, 64}, {-64, 0}},
  1.1056 +    {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128},
  1.1057 +      {-128, 128}, {-128, 0}},
  1.1058 +    {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256},
  1.1059 +      {-256, 256}, {-256, 0}},
  1.1060 +    {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512},
  1.1061 +      {-512, 512}, {-512, 0}},
  1.1062 +    {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024},
  1.1063 +      {0, 1024}, {-1024, 1024}, {-1024, 0}},
  1.1064 +  };
  1.1065 +  return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
  1.1066 +                            do_init_search, 0, vfp, use_mvcost,
  1.1067 +                            center_mv, best_mv,
  1.1068 +                            square_num_candidates, square_candidates);
  1.1069 +};
  1.1070 +
  1.1071 +#undef CHECK_BOUNDS
  1.1072 +#undef CHECK_POINT
  1.1073 +#undef CHECK_BETTER
  1.1074 +
  1.1075 +int vp9_diamond_search_sad_c(MACROBLOCK *x,
  1.1076 +                             int_mv *ref_mv, int_mv *best_mv,
  1.1077 +                             int search_param, int sad_per_bit, int *num00,
  1.1078 +                             vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
  1.1079 +                             int *mvcost[2], int_mv *center_mv) {
  1.1080 +  int i, j, step;
  1.1081 +
  1.1082 +  const MACROBLOCKD* const xd = &x->e_mbd;
  1.1083 +  uint8_t *what = x->plane[0].src.buf;
  1.1084 +  int what_stride = x->plane[0].src.stride;
  1.1085 +  uint8_t *in_what;
  1.1086 +  int in_what_stride = xd->plane[0].pre[0].stride;
  1.1087 +  uint8_t *best_address;
  1.1088 +
  1.1089 +  int tot_steps;
  1.1090 +  int_mv this_mv;
  1.1091 +
  1.1092 +  int bestsad = INT_MAX;
  1.1093 +  int best_site = 0;
  1.1094 +  int last_site = 0;
  1.1095 +
  1.1096 +  int ref_row, ref_col;
  1.1097 +  int this_row_offset, this_col_offset;
  1.1098 +  search_site *ss;
  1.1099 +
  1.1100 +  uint8_t *check_here;
  1.1101 +  int thissad;
  1.1102 +  int_mv fcenter_mv;
  1.1103 +
  1.1104 +  int *mvjsadcost = x->nmvjointsadcost;
  1.1105 +  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
  1.1106 +
  1.1107 +  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1.1108 +  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1.1109 +
  1.1110 +  clamp_mv(&ref_mv->as_mv,
  1.1111 +           x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
  1.1112 +  ref_row = ref_mv->as_mv.row;
  1.1113 +  ref_col = ref_mv->as_mv.col;
  1.1114 +  *num00 = 0;
  1.1115 +  best_mv->as_mv.row = ref_row;
  1.1116 +  best_mv->as_mv.col = ref_col;
  1.1117 +
  1.1118 +  // Work out the start point for the search
  1.1119 +  in_what = (uint8_t *)(xd->plane[0].pre[0].buf +
  1.1120 +                        (ref_row * (xd->plane[0].pre[0].stride)) + ref_col);
  1.1121 +  best_address = in_what;
  1.1122 +
  1.1123 +  // Check the starting position
  1.1124 +  bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
  1.1125 +                + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
  1.1126 +                                 mvjsadcost, mvsadcost, sad_per_bit);
  1.1127 +
  1.1128 +  // search_param determines the length of the initial step and hence the number
  1.1129 +  // of iterations
  1.1130 +  // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
  1.1131 +  // (MAX_FIRST_STEP/4) pel... etc.
  1.1132 +  ss = &x->ss[search_param * x->searches_per_step];
  1.1133 +  tot_steps = (x->ss_count / x->searches_per_step) - search_param;
  1.1134 +
  1.1135 +  i = 1;
  1.1136 +
  1.1137 +  for (step = 0; step < tot_steps; step++) {
  1.1138 +    for (j = 0; j < x->searches_per_step; j++) {
  1.1139 +      // Trap illegal vectors
  1.1140 +      this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
  1.1141 +      this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
  1.1142 +
  1.1143 +      if ((this_col_offset > x->mv_col_min) &&
  1.1144 +          (this_col_offset < x->mv_col_max) &&
  1.1145 +          (this_row_offset > x->mv_row_min) &&
  1.1146 +          (this_row_offset < x->mv_row_max)) {
  1.1147 +        check_here = ss[i].offset + best_address;
  1.1148 +        thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
  1.1149 +                              bestsad);
  1.1150 +
  1.1151 +        if (thissad < bestsad) {
  1.1152 +          this_mv.as_mv.row = this_row_offset;
  1.1153 +          this_mv.as_mv.col = this_col_offset;
  1.1154 +          thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
  1.1155 +                                    mvjsadcost, mvsadcost, sad_per_bit);
  1.1156 +
  1.1157 +          if (thissad < bestsad) {
  1.1158 +            bestsad = thissad;
  1.1159 +            best_site = i;
  1.1160 +          }
  1.1161 +        }
  1.1162 +      }
  1.1163 +
  1.1164 +      i++;
  1.1165 +    }
  1.1166 +
  1.1167 +    if (best_site != last_site) {
  1.1168 +      best_mv->as_mv.row += ss[best_site].mv.row;
  1.1169 +      best_mv->as_mv.col += ss[best_site].mv.col;
  1.1170 +      best_address += ss[best_site].offset;
  1.1171 +      last_site = best_site;
  1.1172 +#if defined(NEW_DIAMOND_SEARCH)
  1.1173 +      while (1) {
  1.1174 +        this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row;
  1.1175 +        this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col;
  1.1176 +        if ((this_col_offset > x->mv_col_min) &&
  1.1177 +            (this_col_offset < x->mv_col_max) &&
  1.1178 +            (this_row_offset > x->mv_row_min) &&
  1.1179 +            (this_row_offset < x->mv_row_max)) {
  1.1180 +          check_here = ss[best_site].offset + best_address;
  1.1181 +          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
  1.1182 +                                bestsad);
  1.1183 +          if (thissad < bestsad) {
  1.1184 +            this_mv.as_mv.row = this_row_offset;
  1.1185 +            this_mv.as_mv.col = this_col_offset;
  1.1186 +            thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
  1.1187 +                                      mvjsadcost, mvsadcost, sad_per_bit);
  1.1188 +            if (thissad < bestsad) {
  1.1189 +              bestsad = thissad;
  1.1190 +              best_mv->as_mv.row += ss[best_site].mv.row;
  1.1191 +              best_mv->as_mv.col += ss[best_site].mv.col;
  1.1192 +              best_address += ss[best_site].offset;
  1.1193 +              continue;
  1.1194 +            }
  1.1195 +          }
  1.1196 +        }
  1.1197 +        break;
  1.1198 +      };
  1.1199 +#endif
  1.1200 +    } else if (best_address == in_what) {
  1.1201 +      (*num00)++;
  1.1202 +    }
  1.1203 +  }
  1.1204 +
  1.1205 +  this_mv.as_mv.row = best_mv->as_mv.row * 8;
  1.1206 +  this_mv.as_mv.col = best_mv->as_mv.col * 8;
  1.1207 +
  1.1208 +  if (bestsad == INT_MAX)
  1.1209 +    return INT_MAX;
  1.1210 +
  1.1211 +  return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
  1.1212 +                    (unsigned int *)(&thissad)) +
  1.1213 +                       mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
  1.1214 +                                   mvjcost, mvcost, x->errorperbit);
  1.1215 +}
  1.1216 +
  1.1217 +int vp9_diamond_search_sadx4(MACROBLOCK *x,
  1.1218 +                             int_mv *ref_mv, int_mv *best_mv, int search_param,
  1.1219 +                             int sad_per_bit, int *num00,
  1.1220 +                             vp9_variance_fn_ptr_t *fn_ptr,
  1.1221 +                             int *mvjcost, int *mvcost[2], int_mv *center_mv) {
  1.1222 +  int i, j, step;
  1.1223 +
  1.1224 +  const MACROBLOCKD* const xd = &x->e_mbd;
  1.1225 +  uint8_t *what = x->plane[0].src.buf;
  1.1226 +  int what_stride = x->plane[0].src.stride;
  1.1227 +  uint8_t *in_what;
  1.1228 +  int in_what_stride = xd->plane[0].pre[0].stride;
  1.1229 +  uint8_t *best_address;
  1.1230 +
  1.1231 +  int tot_steps;
  1.1232 +  int_mv this_mv;
  1.1233 +
  1.1234 +  unsigned int bestsad = INT_MAX;
  1.1235 +  int best_site = 0;
  1.1236 +  int last_site = 0;
  1.1237 +
  1.1238 +  int ref_row;
  1.1239 +  int ref_col;
  1.1240 +  int this_row_offset;
  1.1241 +  int this_col_offset;
  1.1242 +  search_site *ss;
  1.1243 +
  1.1244 +  uint8_t *check_here;
  1.1245 +  unsigned int thissad;
  1.1246 +  int_mv fcenter_mv;
  1.1247 +
  1.1248 +  int *mvjsadcost = x->nmvjointsadcost;
  1.1249 +  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
  1.1250 +
  1.1251 +  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1.1252 +  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1.1253 +
  1.1254 +  clamp_mv(&ref_mv->as_mv,
  1.1255 +           x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
  1.1256 +  ref_row = ref_mv->as_mv.row;
  1.1257 +  ref_col = ref_mv->as_mv.col;
  1.1258 +  *num00 = 0;
  1.1259 +  best_mv->as_mv.row = ref_row;
  1.1260 +  best_mv->as_mv.col = ref_col;
  1.1261 +
  1.1262 +  // Work out the start point for the search
  1.1263 +  in_what = (uint8_t *)(xd->plane[0].pre[0].buf +
  1.1264 +                        (ref_row * (xd->plane[0].pre[0].stride)) + ref_col);
  1.1265 +  best_address = in_what;
  1.1266 +
  1.1267 +  // Check the starting position
  1.1268 +  bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
  1.1269 +                + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
  1.1270 +                                 mvjsadcost, mvsadcost, sad_per_bit);
  1.1271 +
  1.1272 +  // search_param determines the length of the initial step and hence the number
  1.1273 +  // of iterations.
  1.1274 +  // 0 = initial step (MAX_FIRST_STEP) pel
  1.1275 +  // 1 = (MAX_FIRST_STEP/2) pel,
  1.1276 +  // 2 = (MAX_FIRST_STEP/4) pel...
  1.1277 +  ss = &x->ss[search_param * x->searches_per_step];
  1.1278 +  tot_steps = (x->ss_count / x->searches_per_step) - search_param;
  1.1279 +
  1.1280 +  i = 1;
  1.1281 +
  1.1282 +  for (step = 0; step < tot_steps; step++) {
  1.1283 +    int all_in = 1, t;
  1.1284 +
  1.1285 +    // All_in is true if every one of the points we are checking are within
  1.1286 +    // the bounds of the image.
  1.1287 +    all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
  1.1288 +    all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
  1.1289 +    all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
  1.1290 +    all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
  1.1291 +
  1.1292 +    // If all the pixels are within the bounds we don't check whether the
  1.1293 +    // search point is valid in this loop,  otherwise we check each point
  1.1294 +    // for validity..
  1.1295 +    if (all_in) {
  1.1296 +      unsigned int sad_array[4];
  1.1297 +
  1.1298 +      for (j = 0; j < x->searches_per_step; j += 4) {
  1.1299 +        unsigned char const *block_offset[4];
  1.1300 +
  1.1301 +        for (t = 0; t < 4; t++)
  1.1302 +          block_offset[t] = ss[i + t].offset + best_address;
  1.1303 +
  1.1304 +        fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
  1.1305 +                       sad_array);
  1.1306 +
  1.1307 +        for (t = 0; t < 4; t++, i++) {
  1.1308 +          if (sad_array[t] < bestsad) {
  1.1309 +            this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
  1.1310 +            this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
  1.1311 +            sad_array[t] += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
  1.1312 +                                           mvjsadcost, mvsadcost, sad_per_bit);
  1.1313 +
  1.1314 +            if (sad_array[t] < bestsad) {
  1.1315 +              bestsad = sad_array[t];
  1.1316 +              best_site = i;
  1.1317 +            }
  1.1318 +          }
  1.1319 +        }
  1.1320 +      }
  1.1321 +    } else {
  1.1322 +      for (j = 0; j < x->searches_per_step; j++) {
  1.1323 +        // Trap illegal vectors
  1.1324 +        this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
  1.1325 +        this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
  1.1326 +
  1.1327 +        if ((this_col_offset > x->mv_col_min) &&
  1.1328 +            (this_col_offset < x->mv_col_max) &&
  1.1329 +            (this_row_offset > x->mv_row_min) &&
  1.1330 +            (this_row_offset < x->mv_row_max)) {
  1.1331 +          check_here = ss[i].offset + best_address;
  1.1332 +          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
  1.1333 +                                bestsad);
  1.1334 +
  1.1335 +          if (thissad < bestsad) {
  1.1336 +            this_mv.as_mv.row = this_row_offset;
  1.1337 +            this_mv.as_mv.col = this_col_offset;
  1.1338 +            thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
  1.1339 +                                      mvjsadcost, mvsadcost, sad_per_bit);
  1.1340 +
  1.1341 +            if (thissad < bestsad) {
  1.1342 +              bestsad = thissad;
  1.1343 +              best_site = i;
  1.1344 +            }
  1.1345 +          }
  1.1346 +        }
  1.1347 +        i++;
  1.1348 +      }
  1.1349 +    }
  1.1350 +    if (best_site != last_site) {
  1.1351 +      best_mv->as_mv.row += ss[best_site].mv.row;
  1.1352 +      best_mv->as_mv.col += ss[best_site].mv.col;
  1.1353 +      best_address += ss[best_site].offset;
  1.1354 +      last_site = best_site;
  1.1355 +#if defined(NEW_DIAMOND_SEARCH)
  1.1356 +      while (1) {
  1.1357 +        this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row;
  1.1358 +        this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col;
  1.1359 +        if ((this_col_offset > x->mv_col_min) &&
  1.1360 +            (this_col_offset < x->mv_col_max) &&
  1.1361 +            (this_row_offset > x->mv_row_min) &&
  1.1362 +            (this_row_offset < x->mv_row_max)) {
  1.1363 +          check_here = ss[best_site].offset + best_address;
  1.1364 +          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
  1.1365 +                                bestsad);
  1.1366 +          if (thissad < bestsad) {
  1.1367 +            this_mv.as_mv.row = this_row_offset;
  1.1368 +            this_mv.as_mv.col = this_col_offset;
  1.1369 +            thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
  1.1370 +                                      mvjsadcost, mvsadcost, sad_per_bit);
  1.1371 +            if (thissad < bestsad) {
  1.1372 +              bestsad = thissad;
  1.1373 +              best_mv->as_mv.row += ss[best_site].mv.row;
  1.1374 +              best_mv->as_mv.col += ss[best_site].mv.col;
  1.1375 +              best_address += ss[best_site].offset;
  1.1376 +              continue;
  1.1377 +            }
  1.1378 +          }
  1.1379 +        }
  1.1380 +        break;
  1.1381 +      };
  1.1382 +#endif
  1.1383 +    } else if (best_address == in_what) {
  1.1384 +      (*num00)++;
  1.1385 +    }
  1.1386 +  }
  1.1387 +
  1.1388 +  this_mv.as_mv.row = best_mv->as_mv.row * 8;
  1.1389 +  this_mv.as_mv.col = best_mv->as_mv.col * 8;
  1.1390 +
  1.1391 +  if (bestsad == INT_MAX)
  1.1392 +    return INT_MAX;
  1.1393 +
  1.1394 +  return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
  1.1395 +                    (unsigned int *)(&thissad)) +
  1.1396 +                    mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
  1.1397 +                                mvjcost, mvcost, x->errorperbit);
  1.1398 +}
  1.1399 +
  1.1400 +/* do_refine: If last step (1-away) of n-step search doesn't pick the center
  1.1401 +              point as the best match, we will do a final 1-away diamond
  1.1402 +              refining search  */
  1.1403 +
  1.1404 +int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x,
  1.1405 +                           int_mv *mvp_full, int step_param,
  1.1406 +                           int sadpb, int further_steps,
  1.1407 +                           int do_refine, vp9_variance_fn_ptr_t *fn_ptr,
  1.1408 +                           int_mv *ref_mv, int_mv *dst_mv) {
  1.1409 +  int_mv temp_mv;
  1.1410 +  int thissme, n, num00;
  1.1411 +  int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
  1.1412 +                                        step_param, sadpb, &num00,
  1.1413 +                                        fn_ptr, x->nmvjointcost,
  1.1414 +                                        x->mvcost, ref_mv);
  1.1415 +  dst_mv->as_int = temp_mv.as_int;
  1.1416 +
  1.1417 +  n = num00;
  1.1418 +  num00 = 0;
  1.1419 +
  1.1420 +  /* If there won't be more n-step search, check to see if refining search is
  1.1421 +   * needed. */
  1.1422 +  if (n > further_steps)
  1.1423 +    do_refine = 0;
  1.1424 +
  1.1425 +  while (n < further_steps) {
  1.1426 +    n++;
  1.1427 +
  1.1428 +    if (num00) {
  1.1429 +      num00--;
  1.1430 +    } else {
  1.1431 +      thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
  1.1432 +                                        step_param + n, sadpb, &num00,
  1.1433 +                                        fn_ptr, x->nmvjointcost, x->mvcost,
  1.1434 +                                        ref_mv);
  1.1435 +
  1.1436 +      /* check to see if refining search is needed. */
  1.1437 +      if (num00 > (further_steps - n))
  1.1438 +        do_refine = 0;
  1.1439 +
  1.1440 +      if (thissme < bestsme) {
  1.1441 +        bestsme = thissme;
  1.1442 +        dst_mv->as_int = temp_mv.as_int;
  1.1443 +      }
  1.1444 +    }
  1.1445 +  }
  1.1446 +
  1.1447 +  /* final 1-away diamond refining search */
  1.1448 +  if (do_refine == 1) {
  1.1449 +    int search_range = 8;
  1.1450 +    int_mv best_mv;
  1.1451 +    best_mv.as_int = dst_mv->as_int;
  1.1452 +    thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range,
  1.1453 +                                       fn_ptr, x->nmvjointcost, x->mvcost,
  1.1454 +                                       ref_mv);
  1.1455 +
  1.1456 +    if (thissme < bestsme) {
  1.1457 +      bestsme = thissme;
  1.1458 +      dst_mv->as_int = best_mv.as_int;
  1.1459 +    }
  1.1460 +  }
  1.1461 +  return bestsme;
  1.1462 +}
  1.1463 +
  1.1464 +int vp9_full_search_sad_c(MACROBLOCK *x, int_mv *ref_mv,
  1.1465 +                          int sad_per_bit, int distance,
  1.1466 +                          vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
  1.1467 +                          int *mvcost[2],
  1.1468 +                          int_mv *center_mv, int n) {
  1.1469 +  const MACROBLOCKD* const xd = &x->e_mbd;
  1.1470 +  uint8_t *what = x->plane[0].src.buf;
  1.1471 +  int what_stride = x->plane[0].src.stride;
  1.1472 +  uint8_t *in_what;
  1.1473 +  int in_what_stride = xd->plane[0].pre[0].stride;
  1.1474 +  int mv_stride = xd->plane[0].pre[0].stride;
  1.1475 +  uint8_t *bestaddress;
  1.1476 +  int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0];
  1.1477 +  int_mv this_mv;
  1.1478 +  int bestsad = INT_MAX;
  1.1479 +  int r, c;
  1.1480 +
  1.1481 +  uint8_t *check_here;
  1.1482 +  int thissad;
  1.1483 +
  1.1484 +  int ref_row = ref_mv->as_mv.row;
  1.1485 +  int ref_col = ref_mv->as_mv.col;
  1.1486 +
  1.1487 +  int row_min = ref_row - distance;
  1.1488 +  int row_max = ref_row + distance;
  1.1489 +  int col_min = ref_col - distance;
  1.1490 +  int col_max = ref_col + distance;
  1.1491 +  int_mv fcenter_mv;
  1.1492 +
  1.1493 +  int *mvjsadcost = x->nmvjointsadcost;
  1.1494 +  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
  1.1495 +
  1.1496 +  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1.1497 +  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1.1498 +
  1.1499 +  // Work out the mid point for the search
  1.1500 +  in_what = xd->plane[0].pre[0].buf;
  1.1501 +  bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col;
  1.1502 +
  1.1503 +  best_mv->as_mv.row = ref_row;
  1.1504 +  best_mv->as_mv.col = ref_col;
  1.1505 +
  1.1506 +  // Baseline value at the centre
  1.1507 +  bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
  1.1508 +                        in_what_stride, 0x7fffffff)
  1.1509 +                           + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
  1.1510 +                                            mvjsadcost, mvsadcost, sad_per_bit);
  1.1511 +
  1.1512 +  // Apply further limits to prevent us looking using vectors that stretch
  1.1513 +  // beyond the UMV border
  1.1514 +  col_min = MAX(col_min, x->mv_col_min);
  1.1515 +  col_max = MIN(col_max, x->mv_col_max);
  1.1516 +  row_min = MAX(row_min, x->mv_row_min);
  1.1517 +  row_max = MIN(row_max, x->mv_row_max);
  1.1518 +
  1.1519 +  for (r = row_min; r < row_max; r++) {
  1.1520 +    this_mv.as_mv.row = r;
  1.1521 +    check_here = r * mv_stride + in_what + col_min;
  1.1522 +
  1.1523 +    for (c = col_min; c < col_max; c++) {
  1.1524 +      thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
  1.1525 +                            bestsad);
  1.1526 +
  1.1527 +      this_mv.as_mv.col = c;
  1.1528 +      thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
  1.1529 +                                mvjsadcost, mvsadcost, sad_per_bit);
  1.1530 +
  1.1531 +      if (thissad < bestsad) {
  1.1532 +        bestsad = thissad;
  1.1533 +        best_mv->as_mv.row = r;
  1.1534 +        best_mv->as_mv.col = c;
  1.1535 +        bestaddress = check_here;
  1.1536 +      }
  1.1537 +
  1.1538 +      check_here++;
  1.1539 +    }
  1.1540 +  }
  1.1541 +
  1.1542 +  this_mv.as_mv.row = best_mv->as_mv.row * 8;
  1.1543 +  this_mv.as_mv.col = best_mv->as_mv.col * 8;
  1.1544 +
  1.1545 +  if (bestsad < INT_MAX)
  1.1546 +    return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
  1.1547 +                      (unsigned int *)(&thissad)) +
  1.1548 +                      mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
  1.1549 +                                  mvjcost, mvcost, x->errorperbit);
  1.1550 +  else
  1.1551 +    return INT_MAX;
  1.1552 +}
  1.1553 +
  1.1554 +int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv,
  1.1555 +                          int sad_per_bit, int distance,
  1.1556 +                          vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
  1.1557 +                          int *mvcost[2], int_mv *center_mv, int n) {
  1.1558 +  const MACROBLOCKD* const xd = &x->e_mbd;
  1.1559 +  uint8_t *what = x->plane[0].src.buf;
  1.1560 +  int what_stride = x->plane[0].src.stride;
  1.1561 +  uint8_t *in_what;
  1.1562 +  int in_what_stride = xd->plane[0].pre[0].stride;
  1.1563 +  int mv_stride = xd->plane[0].pre[0].stride;
  1.1564 +  uint8_t *bestaddress;
  1.1565 +  int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0];
  1.1566 +  int_mv this_mv;
  1.1567 +  unsigned int bestsad = INT_MAX;
  1.1568 +  int r, c;
  1.1569 +
  1.1570 +  uint8_t *check_here;
  1.1571 +  unsigned int thissad;
  1.1572 +
  1.1573 +  int ref_row = ref_mv->as_mv.row;
  1.1574 +  int ref_col = ref_mv->as_mv.col;
  1.1575 +
  1.1576 +  int row_min = ref_row - distance;
  1.1577 +  int row_max = ref_row + distance;
  1.1578 +  int col_min = ref_col - distance;
  1.1579 +  int col_max = ref_col + distance;
  1.1580 +
  1.1581 +  unsigned int sad_array[3];
  1.1582 +  int_mv fcenter_mv;
  1.1583 +
  1.1584 +  int *mvjsadcost = x->nmvjointsadcost;
  1.1585 +  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
  1.1586 +
  1.1587 +  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1.1588 +  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1.1589 +
  1.1590 +  // Work out the mid point for the search
  1.1591 +  in_what = xd->plane[0].pre[0].buf;
  1.1592 +  bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col;
  1.1593 +
  1.1594 +  best_mv->as_mv.row = ref_row;
  1.1595 +  best_mv->as_mv.col = ref_col;
  1.1596 +
  1.1597 +  // Baseline value at the centre
  1.1598 +  bestsad = fn_ptr->sdf(what, what_stride,
  1.1599 +                        bestaddress, in_what_stride, 0x7fffffff)
  1.1600 +            + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
  1.1601 +                             mvjsadcost, mvsadcost, sad_per_bit);
  1.1602 +
  1.1603 +  // Apply further limits to prevent us looking using vectors that stretch
  1.1604 +  // beyond the UMV border
  1.1605 +  col_min = MAX(col_min, x->mv_col_min);
  1.1606 +  col_max = MIN(col_max, x->mv_col_max);
  1.1607 +  row_min = MAX(row_min, x->mv_row_min);
  1.1608 +  row_max = MIN(row_max, x->mv_row_max);
  1.1609 +
  1.1610 +  for (r = row_min; r < row_max; r++) {
  1.1611 +    this_mv.as_mv.row = r;
  1.1612 +    check_here = r * mv_stride + in_what + col_min;
  1.1613 +    c = col_min;
  1.1614 +
  1.1615 +    while ((c + 2) < col_max) {
  1.1616 +      int i;
  1.1617 +
  1.1618 +      fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
  1.1619 +
  1.1620 +      for (i = 0; i < 3; i++) {
  1.1621 +        thissad = sad_array[i];
  1.1622 +
  1.1623 +        if (thissad < bestsad) {
  1.1624 +          this_mv.as_mv.col = c;
  1.1625 +          thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
  1.1626 +                                    mvjsadcost, mvsadcost, sad_per_bit);
  1.1627 +
  1.1628 +          if (thissad < bestsad) {
  1.1629 +            bestsad = thissad;
  1.1630 +            best_mv->as_mv.row = r;
  1.1631 +            best_mv->as_mv.col = c;
  1.1632 +            bestaddress = check_here;
  1.1633 +          }
  1.1634 +        }
  1.1635 +
  1.1636 +        check_here++;
  1.1637 +        c++;
  1.1638 +      }
  1.1639 +    }
  1.1640 +
  1.1641 +    while (c < col_max) {
  1.1642 +      thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
  1.1643 +                            bestsad);
  1.1644 +
  1.1645 +      if (thissad < bestsad) {
  1.1646 +        this_mv.as_mv.col = c;
  1.1647 +        thissad  += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
  1.1648 +                                   mvjsadcost, mvsadcost, sad_per_bit);
  1.1649 +
  1.1650 +        if (thissad < bestsad) {
  1.1651 +          bestsad = thissad;
  1.1652 +          best_mv->as_mv.row = r;
  1.1653 +          best_mv->as_mv.col = c;
  1.1654 +          bestaddress = check_here;
  1.1655 +        }
  1.1656 +      }
  1.1657 +
  1.1658 +      check_here++;
  1.1659 +      c++;
  1.1660 +    }
  1.1661 +  }
  1.1662 +
  1.1663 +  this_mv.as_mv.row = best_mv->as_mv.row * 8;
  1.1664 +  this_mv.as_mv.col = best_mv->as_mv.col * 8;
  1.1665 +
  1.1666 +  if (bestsad < INT_MAX)
  1.1667 +    return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
  1.1668 +                      (unsigned int *)(&thissad)) +
  1.1669 +                      mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
  1.1670 +                                  mvjcost, mvcost, x->errorperbit);
  1.1671 +  else
  1.1672 +    return INT_MAX;
  1.1673 +}
  1.1674 +
  1.1675 +int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv,
  1.1676 +                          int sad_per_bit, int distance,
  1.1677 +                          vp9_variance_fn_ptr_t *fn_ptr,
  1.1678 +                          int *mvjcost, int *mvcost[2],
  1.1679 +                          int_mv *center_mv, int n) {
  1.1680 +  const MACROBLOCKD* const xd = &x->e_mbd;
  1.1681 +  uint8_t *what = x->plane[0].src.buf;
  1.1682 +  int what_stride = x->plane[0].src.stride;
  1.1683 +  uint8_t *in_what;
  1.1684 +  int in_what_stride = xd->plane[0].pre[0].stride;
  1.1685 +  int mv_stride = xd->plane[0].pre[0].stride;
  1.1686 +  uint8_t *bestaddress;
  1.1687 +  int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0];
  1.1688 +  int_mv this_mv;
  1.1689 +  unsigned int bestsad = INT_MAX;
  1.1690 +  int r, c;
  1.1691 +
  1.1692 +  uint8_t *check_here;
  1.1693 +  unsigned int thissad;
  1.1694 +
  1.1695 +  int ref_row = ref_mv->as_mv.row;
  1.1696 +  int ref_col = ref_mv->as_mv.col;
  1.1697 +
  1.1698 +  int row_min = ref_row - distance;
  1.1699 +  int row_max = ref_row + distance;
  1.1700 +  int col_min = ref_col - distance;
  1.1701 +  int col_max = ref_col + distance;
  1.1702 +
  1.1703 +  DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8);
  1.1704 +  unsigned int sad_array[3];
  1.1705 +  int_mv fcenter_mv;
  1.1706 +
  1.1707 +  int *mvjsadcost = x->nmvjointsadcost;
  1.1708 +  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
  1.1709 +
  1.1710 +  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1.1711 +  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1.1712 +
  1.1713 +  // Work out the mid point for the search
  1.1714 +  in_what = xd->plane[0].pre[0].buf;
  1.1715 +  bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col;
  1.1716 +
  1.1717 +  best_mv->as_mv.row = ref_row;
  1.1718 +  best_mv->as_mv.col = ref_col;
  1.1719 +
  1.1720 +  // Baseline value at the centre
  1.1721 +  bestsad = fn_ptr->sdf(what, what_stride,
  1.1722 +                        bestaddress, in_what_stride, 0x7fffffff)
  1.1723 +            + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
  1.1724 +                             mvjsadcost, mvsadcost, sad_per_bit);
  1.1725 +
  1.1726 +  // Apply further limits to prevent us looking using vectors that stretch
  1.1727 +  // beyond the UMV border
  1.1728 +  col_min = MAX(col_min, x->mv_col_min);
  1.1729 +  col_max = MIN(col_max, x->mv_col_max);
  1.1730 +  row_min = MAX(row_min, x->mv_row_min);
  1.1731 +  row_max = MIN(row_max, x->mv_row_max);
  1.1732 +
  1.1733 +  for (r = row_min; r < row_max; r++) {
  1.1734 +    this_mv.as_mv.row = r;
  1.1735 +    check_here = r * mv_stride + in_what + col_min;
  1.1736 +    c = col_min;
  1.1737 +
  1.1738 +    while ((c + 7) < col_max) {
  1.1739 +      int i;
  1.1740 +
  1.1741 +      fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
  1.1742 +
  1.1743 +      for (i = 0; i < 8; i++) {
  1.1744 +        thissad = (unsigned int)sad_array8[i];
  1.1745 +
  1.1746 +        if (thissad < bestsad) {
  1.1747 +          this_mv.as_mv.col = c;
  1.1748 +          thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
  1.1749 +                                    mvjsadcost, mvsadcost, sad_per_bit);
  1.1750 +
  1.1751 +          if (thissad < bestsad) {
  1.1752 +            bestsad = thissad;
  1.1753 +            best_mv->as_mv.row = r;
  1.1754 +            best_mv->as_mv.col = c;
  1.1755 +            bestaddress = check_here;
  1.1756 +          }
  1.1757 +        }
  1.1758 +
  1.1759 +        check_here++;
  1.1760 +        c++;
  1.1761 +      }
  1.1762 +    }
  1.1763 +
  1.1764 +    while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) {
  1.1765 +      int i;
  1.1766 +
  1.1767 +      fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
  1.1768 +
  1.1769 +      for (i = 0; i < 3; i++) {
  1.1770 +        thissad = sad_array[i];
  1.1771 +
  1.1772 +        if (thissad < bestsad) {
  1.1773 +          this_mv.as_mv.col = c;
  1.1774 +          thissad  += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
  1.1775 +                                     mvjsadcost, mvsadcost, sad_per_bit);
  1.1776 +
  1.1777 +          if (thissad < bestsad) {
  1.1778 +            bestsad = thissad;
  1.1779 +            best_mv->as_mv.row = r;
  1.1780 +            best_mv->as_mv.col = c;
  1.1781 +            bestaddress = check_here;
  1.1782 +          }
  1.1783 +        }
  1.1784 +
  1.1785 +        check_here++;
  1.1786 +        c++;
  1.1787 +      }
  1.1788 +    }
  1.1789 +
  1.1790 +    while (c < col_max) {
  1.1791 +      thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
  1.1792 +                            bestsad);
  1.1793 +
  1.1794 +      if (thissad < bestsad) {
  1.1795 +        this_mv.as_mv.col = c;
  1.1796 +        thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
  1.1797 +                                  mvjsadcost, mvsadcost, sad_per_bit);
  1.1798 +
  1.1799 +        if (thissad < bestsad) {
  1.1800 +          bestsad = thissad;
  1.1801 +          best_mv->as_mv.row = r;
  1.1802 +          best_mv->as_mv.col = c;
  1.1803 +          bestaddress = check_here;
  1.1804 +        }
  1.1805 +      }
  1.1806 +
  1.1807 +      check_here++;
  1.1808 +      c++;
  1.1809 +    }
  1.1810 +  }
  1.1811 +
  1.1812 +  this_mv.as_mv.row = best_mv->as_mv.row * 8;
  1.1813 +  this_mv.as_mv.col = best_mv->as_mv.col * 8;
  1.1814 +
  1.1815 +  if (bestsad < INT_MAX)
  1.1816 +    return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
  1.1817 +                      (unsigned int *)(&thissad)) +
  1.1818 +                      mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
  1.1819 +                                  mvjcost, mvcost, x->errorperbit);
  1.1820 +  else
  1.1821 +    return INT_MAX;
  1.1822 +}
  1.1823 +int vp9_refining_search_sad_c(MACROBLOCK *x,
  1.1824 +                              int_mv *ref_mv, int error_per_bit,
  1.1825 +                              int search_range, vp9_variance_fn_ptr_t *fn_ptr,
  1.1826 +                              int *mvjcost, int *mvcost[2], int_mv *center_mv) {
  1.1827 +  const MACROBLOCKD* const xd = &x->e_mbd;
  1.1828 +  MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
  1.1829 +  int i, j;
  1.1830 +  int this_row_offset, this_col_offset;
  1.1831 +
  1.1832 +  int what_stride = x->plane[0].src.stride;
  1.1833 +  int in_what_stride = xd->plane[0].pre[0].stride;
  1.1834 +  uint8_t *what = x->plane[0].src.buf;
  1.1835 +  uint8_t *best_address = xd->plane[0].pre[0].buf +
  1.1836 +                          (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) +
  1.1837 +                          ref_mv->as_mv.col;
  1.1838 +  uint8_t *check_here;
  1.1839 +  unsigned int thissad;
  1.1840 +  int_mv this_mv;
  1.1841 +  unsigned int bestsad = INT_MAX;
  1.1842 +  int_mv fcenter_mv;
  1.1843 +
  1.1844 +  int *mvjsadcost = x->nmvjointsadcost;
  1.1845 +  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
  1.1846 +
  1.1847 +  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1.1848 +  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1.1849 +
  1.1850 +  bestsad = fn_ptr->sdf(what, what_stride, best_address,
  1.1851 +                        in_what_stride, 0x7fffffff) +
  1.1852 +                        mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv,
  1.1853 +                                       mvjsadcost, mvsadcost, error_per_bit);
  1.1854 +
  1.1855 +  for (i = 0; i < search_range; i++) {
  1.1856 +    int best_site = -1;
  1.1857 +
  1.1858 +    for (j = 0; j < 4; j++) {
  1.1859 +      this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
  1.1860 +      this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
  1.1861 +
  1.1862 +      if ((this_col_offset > x->mv_col_min) &&
  1.1863 +          (this_col_offset < x->mv_col_max) &&
  1.1864 +          (this_row_offset > x->mv_row_min) &&
  1.1865 +          (this_row_offset < x->mv_row_max)) {
  1.1866 +        check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
  1.1867 +                     best_address;
  1.1868 +        thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
  1.1869 +                              bestsad);
  1.1870 +
  1.1871 +        if (thissad < bestsad) {
  1.1872 +          this_mv.as_mv.row = this_row_offset;
  1.1873 +          this_mv.as_mv.col = this_col_offset;
  1.1874 +          thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
  1.1875 +                                    mvjsadcost, mvsadcost, error_per_bit);
  1.1876 +
  1.1877 +          if (thissad < bestsad) {
  1.1878 +            bestsad = thissad;
  1.1879 +            best_site = j;
  1.1880 +          }
  1.1881 +        }
  1.1882 +      }
  1.1883 +    }
  1.1884 +
  1.1885 +    if (best_site == -1) {
  1.1886 +      break;
  1.1887 +    } else {
  1.1888 +      ref_mv->as_mv.row += neighbors[best_site].row;
  1.1889 +      ref_mv->as_mv.col += neighbors[best_site].col;
  1.1890 +      best_address += (neighbors[best_site].row) * in_what_stride +
  1.1891 +                      neighbors[best_site].col;
  1.1892 +    }
  1.1893 +  }
  1.1894 +
  1.1895 +  this_mv.as_mv.row = ref_mv->as_mv.row * 8;
  1.1896 +  this_mv.as_mv.col = ref_mv->as_mv.col * 8;
  1.1897 +
  1.1898 +  if (bestsad < INT_MAX)
  1.1899 +    return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
  1.1900 +                      (unsigned int *)(&thissad)) +
  1.1901 +                      mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
  1.1902 +                                  mvjcost, mvcost, x->errorperbit);
  1.1903 +  else
  1.1904 +    return INT_MAX;
  1.1905 +}
  1.1906 +
  1.1907 +int vp9_refining_search_sadx4(MACROBLOCK *x,
  1.1908 +                              int_mv *ref_mv, int error_per_bit,
  1.1909 +                              int search_range, vp9_variance_fn_ptr_t *fn_ptr,
  1.1910 +                              int *mvjcost, int *mvcost[2], int_mv *center_mv) {
  1.1911 +  const MACROBLOCKD* const xd = &x->e_mbd;
  1.1912 +  MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
  1.1913 +  int i, j;
  1.1914 +  int this_row_offset, this_col_offset;
  1.1915 +
  1.1916 +  int what_stride = x->plane[0].src.stride;
  1.1917 +  int in_what_stride = xd->plane[0].pre[0].stride;
  1.1918 +  uint8_t *what = x->plane[0].src.buf;
  1.1919 +  uint8_t *best_address = xd->plane[0].pre[0].buf +
  1.1920 +                          (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) +
  1.1921 +                          ref_mv->as_mv.col;
  1.1922 +  uint8_t *check_here;
  1.1923 +  unsigned int thissad;
  1.1924 +  int_mv this_mv;
  1.1925 +  unsigned int bestsad = INT_MAX;
  1.1926 +  int_mv fcenter_mv;
  1.1927 +
  1.1928 +  int *mvjsadcost = x->nmvjointsadcost;
  1.1929 +  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
  1.1930 +
  1.1931 +  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1.1932 +  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1.1933 +
  1.1934 +  bestsad = fn_ptr->sdf(what, what_stride, best_address,
  1.1935 +                        in_what_stride, 0x7fffffff) +
  1.1936 +      mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv,
  1.1937 +                     mvjsadcost, mvsadcost, error_per_bit);
  1.1938 +
  1.1939 +  for (i = 0; i < search_range; i++) {
  1.1940 +    int best_site = -1;
  1.1941 +    int all_in = ((ref_mv->as_mv.row - 1) > x->mv_row_min) &
  1.1942 +                 ((ref_mv->as_mv.row + 1) < x->mv_row_max) &
  1.1943 +                 ((ref_mv->as_mv.col - 1) > x->mv_col_min) &
  1.1944 +                 ((ref_mv->as_mv.col + 1) < x->mv_col_max);
  1.1945 +
  1.1946 +    if (all_in) {
  1.1947 +      unsigned int sad_array[4];
  1.1948 +      unsigned char const *block_offset[4];
  1.1949 +      block_offset[0] = best_address - in_what_stride;
  1.1950 +      block_offset[1] = best_address - 1;
  1.1951 +      block_offset[2] = best_address + 1;
  1.1952 +      block_offset[3] = best_address + in_what_stride;
  1.1953 +
  1.1954 +      fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
  1.1955 +                     sad_array);
  1.1956 +
  1.1957 +      for (j = 0; j < 4; j++) {
  1.1958 +        if (sad_array[j] < bestsad) {
  1.1959 +          this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
  1.1960 +          this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
  1.1961 +          sad_array[j] += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
  1.1962 +                                         mvjsadcost, mvsadcost, error_per_bit);
  1.1963 +
  1.1964 +          if (sad_array[j] < bestsad) {
  1.1965 +            bestsad = sad_array[j];
  1.1966 +            best_site = j;
  1.1967 +          }
  1.1968 +        }
  1.1969 +      }
  1.1970 +    } else {
  1.1971 +      for (j = 0; j < 4; j++) {
  1.1972 +        this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
  1.1973 +        this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
  1.1974 +
  1.1975 +        if ((this_col_offset > x->mv_col_min) &&
  1.1976 +            (this_col_offset < x->mv_col_max) &&
  1.1977 +            (this_row_offset > x->mv_row_min) &&
  1.1978 +            (this_row_offset < x->mv_row_max)) {
  1.1979 +          check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
  1.1980 +                       best_address;
  1.1981 +          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
  1.1982 +                                bestsad);
  1.1983 +
  1.1984 +          if (thissad < bestsad) {
  1.1985 +            this_mv.as_mv.row = this_row_offset;
  1.1986 +            this_mv.as_mv.col = this_col_offset;
  1.1987 +            thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
  1.1988 +                                      mvjsadcost, mvsadcost, error_per_bit);
  1.1989 +
  1.1990 +            if (thissad < bestsad) {
  1.1991 +              bestsad = thissad;
  1.1992 +              best_site = j;
  1.1993 +            }
  1.1994 +          }
  1.1995 +        }
  1.1996 +      }
  1.1997 +    }
  1.1998 +
  1.1999 +    if (best_site == -1) {
  1.2000 +      break;
  1.2001 +    } else {
  1.2002 +      ref_mv->as_mv.row += neighbors[best_site].row;
  1.2003 +      ref_mv->as_mv.col += neighbors[best_site].col;
  1.2004 +      best_address += (neighbors[best_site].row) * in_what_stride +
  1.2005 +                      neighbors[best_site].col;
  1.2006 +    }
  1.2007 +  }
  1.2008 +
  1.2009 +  this_mv.as_mv.row = ref_mv->as_mv.row * 8;
  1.2010 +  this_mv.as_mv.col = ref_mv->as_mv.col * 8;
  1.2011 +
  1.2012 +  if (bestsad < INT_MAX)
  1.2013 +    return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
  1.2014 +                      (unsigned int *)(&thissad)) +
  1.2015 +                      mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
  1.2016 +                                  mvjcost, mvcost, x->errorperbit);
  1.2017 +  else
  1.2018 +    return INT_MAX;
  1.2019 +}
  1.2020 +
  1.2021 +/* This function is called when we do joint motion search in comp_inter_inter
  1.2022 + * mode.
  1.2023 + */
  1.2024 +int vp9_refining_search_8p_c(MACROBLOCK *x,
  1.2025 +                             int_mv *ref_mv, int error_per_bit,
  1.2026 +                             int search_range, vp9_variance_fn_ptr_t *fn_ptr,
  1.2027 +                             int *mvjcost, int *mvcost[2], int_mv *center_mv,
  1.2028 +                             const uint8_t *second_pred, int w, int h) {
  1.2029 +  const MACROBLOCKD* const xd = &x->e_mbd;
  1.2030 +  MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
  1.2031 +      {-1, -1}, {1, -1}, {-1, 1}, {1, 1}};
  1.2032 +  int i, j;
  1.2033 +  int this_row_offset, this_col_offset;
  1.2034 +
  1.2035 +  int what_stride = x->plane[0].src.stride;
  1.2036 +  int in_what_stride = xd->plane[0].pre[0].stride;
  1.2037 +  uint8_t *what = x->plane[0].src.buf;
  1.2038 +  uint8_t *best_address = xd->plane[0].pre[0].buf +
  1.2039 +                          (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) +
  1.2040 +                          ref_mv->as_mv.col;
  1.2041 +  uint8_t *check_here;
  1.2042 +  unsigned int thissad;
  1.2043 +  int_mv this_mv;
  1.2044 +  unsigned int bestsad = INT_MAX;
  1.2045 +  int_mv fcenter_mv;
  1.2046 +
  1.2047 +  int *mvjsadcost = x->nmvjointsadcost;
  1.2048 +  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
  1.2049 +
  1.2050 +  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1.2051 +  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1.2052 +
  1.2053 +  /* Get compound pred by averaging two pred blocks. */
  1.2054 +  bestsad = fn_ptr->sdaf(what, what_stride, best_address, in_what_stride,
  1.2055 +                         second_pred, 0x7fffffff) +
  1.2056 +      mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv,
  1.2057 +                     mvjsadcost, mvsadcost, error_per_bit);
  1.2058 +
  1.2059 +  for (i = 0; i < search_range; i++) {
  1.2060 +    int best_site = -1;
  1.2061 +
  1.2062 +    for (j = 0; j < 8; j++) {
  1.2063 +      this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
  1.2064 +      this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
  1.2065 +
  1.2066 +      if ((this_col_offset > x->mv_col_min) &&
  1.2067 +          (this_col_offset < x->mv_col_max) &&
  1.2068 +          (this_row_offset > x->mv_row_min) &&
  1.2069 +          (this_row_offset < x->mv_row_max)) {
  1.2070 +        check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
  1.2071 +            best_address;
  1.2072 +
  1.2073 +        /* Get compound block and use it to calculate SAD. */
  1.2074 +        thissad = fn_ptr->sdaf(what, what_stride, check_here, in_what_stride,
  1.2075 +                               second_pred, bestsad);
  1.2076 +
  1.2077 +        if (thissad < bestsad) {
  1.2078 +          this_mv.as_mv.row = this_row_offset;
  1.2079 +          this_mv.as_mv.col = this_col_offset;
  1.2080 +          thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
  1.2081 +                                    mvjsadcost, mvsadcost, error_per_bit);
  1.2082 +          if (thissad < bestsad) {
  1.2083 +            bestsad = thissad;
  1.2084 +            best_site = j;
  1.2085 +          }
  1.2086 +        }
  1.2087 +      }
  1.2088 +    }
  1.2089 +
  1.2090 +    if (best_site == -1) {
  1.2091 +      break;
  1.2092 +    } else {
  1.2093 +      ref_mv->as_mv.row += neighbors[best_site].row;
  1.2094 +      ref_mv->as_mv.col += neighbors[best_site].col;
  1.2095 +      best_address += (neighbors[best_site].row) * in_what_stride +
  1.2096 +          neighbors[best_site].col;
  1.2097 +    }
  1.2098 +  }
  1.2099 +
  1.2100 +  this_mv.as_mv.row = ref_mv->as_mv.row * 8;
  1.2101 +  this_mv.as_mv.col = ref_mv->as_mv.col * 8;
  1.2102 +
  1.2103 +  if (bestsad < INT_MAX) {
  1.2104 +    // FIXME(rbultje, yunqing): add full-pixel averaging variance functions
  1.2105 +    // so we don't have to use the subpixel with xoff=0,yoff=0 here.
  1.2106 +    return fn_ptr->svaf(best_address, in_what_stride, 0, 0, what, what_stride,
  1.2107 +                        (unsigned int *)(&thissad), second_pred) +
  1.2108 +                        mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
  1.2109 +                                    mvjcost, mvcost, x->errorperbit);
  1.2110 +  } else {
  1.2111 +    return INT_MAX;
  1.2112 +  }
  1.2113 +}

mercurial